gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "params.h"
  97 #include "builtins.h"
  98 #include "tree-eh.h"
  99 #include "ssa.h"
 100
 101 static struct datadep_stats
 102 {
 103   int num_dependence_tests;
 104   int num_dependence_dependent;
 105   int num_dependence_independent;
 106   int num_dependence_undetermined;
 107
 108   int num_subscript_tests;
 109   int num_subscript_undetermined;
 110   int num_same_subscript_function;
 111
 112   int num_ziv;
 113   int num_ziv_independent;
 114   int num_ziv_dependent;
 115   int num_ziv_unimplemented;
 116
 117   int num_siv;
 118   int num_siv_independent;
 119   int num_siv_dependent;
 120   int num_siv_unimplemented;
 121
 122   int num_miv;
 123   int num_miv_independent;
 124   int num_miv_dependent;
 125   int num_miv_unimplemented;
 126 } dependence_stats;
 127
 128 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 129                                            unsigned int, unsigned int,
 130                                            struct loop *);
 131 /* Returns true iff A divides B.  */
 132
 133 static inline bool
 134 tree_fold_divides_p (const_tree a, const_tree b)
 135 {
 136   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 137   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 138   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 139 }
 140
 141 /* Returns true iff A divides B.  */
 142
 143 static inline bool
 144 int_divides_p (int a, int b)
 145 {
 146   return ((b % a) == 0);
 147 }
 148
 149 /* Return true if reference REF contains a union access.  */
 150
 151 static bool
 152 ref_contains_union_access_p (tree ref)
 153 {
 154   while (handled_component_p (ref))
 155     {
 156       ref = TREE_OPERAND (ref, 0);
 157       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 158           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 159         return true;
 160     }
 161   return false;
 162 }
 163
 164 \f
 165
 166 /* Dump into FILE all the data references from DATAREFS.  */
 167
 168 static void
 169 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 170 {
 171   unsigned int i;
 172   struct data_reference *dr;
 173
 174   FOR_EACH_VEC_ELT (datarefs, i, dr)
 175     dump_data_reference (file, dr);
 176 }
 177
 178 /* Unified dump into FILE all the data references from DATAREFS.  */
 179
 180 DEBUG_FUNCTION void
 181 debug (vec<data_reference_p> &ref)
 182 {
 183   dump_data_references (stderr, ref);
 184 }
 185
 186 DEBUG_FUNCTION void
 187 debug (vec<data_reference_p> *ptr)
 188 {
 189   if (ptr)
 190     debug (*ptr);
 191   else
 192     fprintf (stderr, "<nil>\n");
 193 }
 194
 195
 196 /* Dump into STDERR all the data references from DATAREFS.  */
 197
 198 DEBUG_FUNCTION void
 199 debug_data_references (vec<data_reference_p> datarefs)
 200 {
 201   dump_data_references (stderr, datarefs);
 202 }
 203
 204 /* Print to STDERR the data_reference DR.  */
 205
 206 DEBUG_FUNCTION void
 207 debug_data_reference (struct data_reference *dr)
 208 {
 209   dump_data_reference (stderr, dr);
 210 }
 211
 212 /* Dump function for a DATA_REFERENCE structure.  */
 213
 214 void
 215 dump_data_reference (FILE *outf,
 216                      struct data_reference *dr)
 217 {
 218   unsigned int i;
 219
 220   fprintf (outf, "#(Data Ref: \n");
 221   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 222   fprintf (outf, "#  stmt: ");
 223   print_gimple_stmt (outf, DR_STMT (dr), 0);
 224   fprintf (outf, "#  ref: ");
 225   print_generic_stmt (outf, DR_REF (dr));
 226   fprintf (outf, "#  base_object: ");
 227   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 228
 229   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 230     {
 231       fprintf (outf, "#  Access function %d: ", i);
 232       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 233     }
 234   fprintf (outf, "#)\n");
 235 }
 236
 237 /* Unified dump function for a DATA_REFERENCE structure.  */
 238
 239 DEBUG_FUNCTION void
 240 debug (data_reference &ref)
 241 {
 242   dump_data_reference (stderr, &ref);
 243 }
 244
 245 DEBUG_FUNCTION void
 246 debug (data_reference *ptr)
 247 {
 248   if (ptr)
 249     debug (*ptr);
 250   else
 251     fprintf (stderr, "<nil>\n");
 252 }
 253
 254
 255 /* Dumps the affine function described by FN to the file OUTF.  */
 256
 257 DEBUG_FUNCTION void
 258 dump_affine_function (FILE *outf, affine_fn fn)
 259 {
 260   unsigned i;
 261   tree coef;
 262
 263   print_generic_expr (outf, fn[0], TDF_SLIM);
 264   for (i = 1; fn.iterate (i, &coef); i++)
 265     {
 266       fprintf (outf, " + ");
 267       print_generic_expr (outf, coef, TDF_SLIM);
 268       fprintf (outf, " * x_%u", i);
 269     }
 270 }
 271
 272 /* Dumps the conflict function CF to the file OUTF.  */
 273
 274 DEBUG_FUNCTION void
 275 dump_conflict_function (FILE *outf, conflict_function *cf)
 276 {
 277   unsigned i;
 278
 279   if (cf->n == NO_DEPENDENCE)
 280     fprintf (outf, "no dependence");
 281   else if (cf->n == NOT_KNOWN)
 282     fprintf (outf, "not known");
 283   else
 284     {
 285       for (i = 0; i < cf->n; i++)
 286         {
 287           if (i != 0)
 288             fprintf (outf, " ");
 289           fprintf (outf, "[");
 290           dump_affine_function (outf, cf->fns[i]);
 291           fprintf (outf, "]");
 292         }
 293     }
 294 }
 295
 296 /* Dump function for a SUBSCRIPT structure.  */
 297
 298 DEBUG_FUNCTION void
 299 dump_subscript (FILE *outf, struct subscript *subscript)
 300 {
 301   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 302
 303   fprintf (outf, "\n (subscript \n");
 304   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 305   dump_conflict_function (outf, cf);
 306   if (CF_NONTRIVIAL_P (cf))
 307     {
 308       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 309       fprintf (outf, "\n  last_conflict: ");
 310       print_generic_expr (outf, last_iteration);
 311     }
 312
 313   cf = SUB_CONFLICTS_IN_B (subscript);
 314   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 315   dump_conflict_function (outf, cf);
 316   if (CF_NONTRIVIAL_P (cf))
 317     {
 318       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 319       fprintf (outf, "\n  last_conflict: ");
 320       print_generic_expr (outf, last_iteration);
 321     }
 322
 323   fprintf (outf, "\n  (Subscript distance: ");
 324   print_generic_expr (outf, SUB_DISTANCE (subscript));
 325   fprintf (outf, " ))\n");
 326 }
 327
 328 /* Print the classic direction vector DIRV to OUTF.  */
 329
 330 DEBUG_FUNCTION void
 331 print_direction_vector (FILE *outf,
 332                         lambda_vector dirv,
 333                         int length)
 334 {
 335   int eq;
 336
 337   for (eq = 0; eq < length; eq++)
 338     {
 339       enum data_dependence_direction dir = ((enum data_dependence_direction)
 340                                             dirv[eq]);
 341
 342       switch (dir)
 343         {
 344         case dir_positive:
 345           fprintf (outf, "    +");
 346           break;
 347         case dir_negative:
 348           fprintf (outf, "    -");
 349           break;
 350         case dir_equal:
 351           fprintf (outf, "    =");
 352           break;
 353         case dir_positive_or_equal:
 354           fprintf (outf, "   +=");
 355           break;
 356         case dir_positive_or_negative:
 357           fprintf (outf, "   +-");
 358           break;
 359         case dir_negative_or_equal:
 360           fprintf (outf, "   -=");
 361           break;
 362         case dir_star:
 363           fprintf (outf, "    *");
 364           break;
 365         default:
 366           fprintf (outf, "indep");
 367           break;
 368         }
 369     }
 370   fprintf (outf, "\n");
 371 }
 372
 373 /* Print a vector of direction vectors.  */
 374
 375 DEBUG_FUNCTION void
 376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 377                    int length)
 378 {
 379   unsigned j;
 380   lambda_vector v;
 381
 382   FOR_EACH_VEC_ELT (dir_vects, j, v)
 383     print_direction_vector (outf, v, length);
 384 }
 385
 386 /* Print out a vector VEC of length N to OUTFILE.  */
 387
 388 DEBUG_FUNCTION void
 389 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 390 {
 391   int i;
 392
 393   for (i = 0; i < n; i++)
 394     fprintf (outfile, "%3d ", (int)vector[i]);
 395   fprintf (outfile, "\n");
 396 }
 397
 398 /* Print a vector of distance vectors.  */
 399
 400 DEBUG_FUNCTION void
 401 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 402                     int length)
 403 {
 404   unsigned j;
 405   lambda_vector v;
 406
 407   FOR_EACH_VEC_ELT (dist_vects, j, v)
 408     print_lambda_vector (outf, v, length);
 409 }
 410
 411 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 412
 413 DEBUG_FUNCTION void
 414 dump_data_dependence_relation (FILE *outf,
 415                                struct data_dependence_relation *ddr)
 416 {
 417   struct data_reference *dra, *drb;
 418
 419   fprintf (outf, "(Data Dep: \n");
 420
 421   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 422     {
 423       if (ddr)
 424         {
 425           dra = DDR_A (ddr);
 426           drb = DDR_B (ddr);
 427           if (dra)
 428             dump_data_reference (outf, dra);
 429           else
 430             fprintf (outf, "    (nil)\n");
 431           if (drb)
 432             dump_data_reference (outf, drb);
 433           else
 434             fprintf (outf, "    (nil)\n");
 435         }
 436       fprintf (outf, "    (don't know)\n)\n");
 437       return;
 438     }
 439
 440   dra = DDR_A (ddr);
 441   drb = DDR_B (ddr);
 442   dump_data_reference (outf, dra);
 443   dump_data_reference (outf, drb);
 444
 445   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 446     fprintf (outf, "    (no dependence)\n");
 447
 448   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 449     {
 450       unsigned int i;
 451       struct loop *loopi;
 452
 453       subscript *sub;
 454       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 455         {
 456           fprintf (outf, "  access_fn_A: ");
 457           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 458           fprintf (outf, "  access_fn_B: ");
 459           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 460           dump_subscript (outf, sub);
 461         }
 462
 463       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 464       fprintf (outf, "  loop nest: (");
 465       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 466         fprintf (outf, "%d ", loopi->num);
 467       fprintf (outf, ")\n");
 468
 469       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 470         {
 471           fprintf (outf, "  distance_vector: ");
 472           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 473                                DDR_NB_LOOPS (ddr));
 474         }
 475
 476       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 477         {
 478           fprintf (outf, "  direction_vector: ");
 479           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 480                                   DDR_NB_LOOPS (ddr));
 481         }
 482     }
 483
 484   fprintf (outf, ")\n");
 485 }
 486
 487 /* Debug version.  */
 488
 489 DEBUG_FUNCTION void
 490 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 491 {
 492   dump_data_dependence_relation (stderr, ddr);
 493 }
 494
 495 /* Dump into FILE all the dependence relations from DDRS.  */
 496
 497 DEBUG_FUNCTION void
 498 dump_data_dependence_relations (FILE *file,
 499                                 vec<ddr_p> ddrs)
 500 {
 501   unsigned int i;
 502   struct data_dependence_relation *ddr;
 503
 504   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 505     dump_data_dependence_relation (file, ddr);
 506 }
 507
 508 DEBUG_FUNCTION void
 509 debug (vec<ddr_p> &ref)
 510 {
 511   dump_data_dependence_relations (stderr, ref);
 512 }
 513
 514 DEBUG_FUNCTION void
 515 debug (vec<ddr_p> *ptr)
 516 {
 517   if (ptr)
 518     debug (*ptr);
 519   else
 520     fprintf (stderr, "<nil>\n");
 521 }
 522
 523
 524 /* Dump to STDERR all the dependence relations from DDRS.  */
 525
 526 DEBUG_FUNCTION void
 527 debug_data_dependence_relations (vec<ddr_p> ddrs)
 528 {
 529   dump_data_dependence_relations (stderr, ddrs);
 530 }
 531
 532 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 533    the dependence relations, and VECT_SIZE is the size of the
 534    dependence vectors, or in other words the number of loops in the
 535    considered nest.  */
 536
 537 DEBUG_FUNCTION void
 538 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 539 {
 540   unsigned int i, j;
 541   struct data_dependence_relation *ddr;
 542   lambda_vector v;
 543
 544   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 545     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 546       {
 547         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 548           {
 549             fprintf (file, "DISTANCE_V (");
 550             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 551             fprintf (file, ")\n");
 552           }
 553
 554         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 555           {
 556             fprintf (file, "DIRECTION_V (");
 557             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 558             fprintf (file, ")\n");
 559           }
 560       }
 561
 562   fprintf (file, "\n\n");
 563 }
 564
 565 /* Dumps the data dependence relations DDRS in FILE.  */
 566
 567 DEBUG_FUNCTION void
 568 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 569 {
 570   unsigned int i;
 571   struct data_dependence_relation *ddr;
 572
 573   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 574     dump_data_dependence_relation (file, ddr);
 575
 576   fprintf (file, "\n\n");
 577 }
 578
 579 DEBUG_FUNCTION void
 580 debug_ddrs (vec<ddr_p> ddrs)
 581 {
 582   dump_ddrs (stderr, ddrs);
 583 }
 584
 585 static void
 586 split_constant_offset (tree exp, tree *var, tree *off,
 587                        hash_map<tree, std::pair<tree, tree> > &cache);
 588
 589 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 590    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 591    constant of type ssizetype, and returns true.  If we cannot do this
 592    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 593    is returned.  */
 594
 595 static bool
 596 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 597                          tree *var, tree *off,
 598                          hash_map<tree, std::pair<tree, tree> > &cache)
 599 {
 600   tree var0, var1;
 601   tree off0, off1;
 602   enum tree_code ocode = code;
 603
 604   *var = NULL_TREE;
 605   *off = NULL_TREE;
 606
 607   switch (code)
 608     {
 609     case INTEGER_CST:
 610       *var = build_int_cst (type, 0);
 611       *off = fold_convert (ssizetype, op0);
 612       return true;
 613
 614     case POINTER_PLUS_EXPR:
 615       ocode = PLUS_EXPR;
 616       /* FALLTHROUGH */
 617     case PLUS_EXPR:
 618     case MINUS_EXPR:
 619       split_constant_offset (op0, &var0, &off0, cache);
 620       split_constant_offset (op1, &var1, &off1, cache);
 621       *var = fold_build2 (code, type, var0, var1);
 622       *off = size_binop (ocode, off0, off1);
 623       return true;
 624
 625     case MULT_EXPR:
 626       if (TREE_CODE (op1) != INTEGER_CST)
 627         return false;
 628
 629       split_constant_offset (op0, &var0, &off0, cache);
 630       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 631       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 632       return true;
 633
 634     case ADDR_EXPR:
 635       {
 636         tree base, poffset;
 637         poly_int64 pbitsize, pbitpos, pbytepos;
 638         machine_mode pmode;
 639         int punsignedp, preversep, pvolatilep;
 640
 641         op0 = TREE_OPERAND (op0, 0);
 642         base
 643           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 644                                  &punsignedp, &preversep, &pvolatilep);
 645
 646         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 647           return false;
 648         base = build_fold_addr_expr (base);
 649         off0 = ssize_int (pbytepos);
 650
 651         if (poffset)
 652           {
 653             split_constant_offset (poffset, &poffset, &off1, cache);
 654             off0 = size_binop (PLUS_EXPR, off0, off1);
 655             if (POINTER_TYPE_P (TREE_TYPE (base)))
 656               base = fold_build_pointer_plus (base, poffset);
 657             else
 658               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 659                                   fold_convert (TREE_TYPE (base), poffset));
 660           }
 661
 662         var0 = fold_convert (type, base);
 663
 664         /* If variable length types are involved, punt, otherwise casts
 665            might be converted into ARRAY_REFs in gimplify_conversion.
 666            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 667            possibly no longer appears in current GIMPLE, might resurface.
 668            This perhaps could run
 669            if (CONVERT_EXPR_P (var0))
 670              {
 671                gimplify_conversion (&var0);
 672                // Attempt to fill in any within var0 found ARRAY_REF's
 673                // element size from corresponding op embedded ARRAY_REF,
 674                // if unsuccessful, just punt.
 675              }  */
 676         while (POINTER_TYPE_P (type))
 677           type = TREE_TYPE (type);
 678         if (int_size_in_bytes (type) < 0)
 679           return false;
 680
 681         *var = var0;
 682         *off = off0;
 683         return true;
 684       }
 685
 686     case SSA_NAME:
 687       {
 688         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 689           return false;
 690
 691         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 692         enum tree_code subcode;
 693
 694         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 695           return false;
 696
 697         subcode = gimple_assign_rhs_code (def_stmt);
 698
 699         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 700         bool use_cache = false;
 701         if (!has_single_use (op0)
 702             && (subcode == POINTER_PLUS_EXPR
 703                 || subcode == PLUS_EXPR
 704                 || subcode == MINUS_EXPR
 705                 || subcode == MULT_EXPR
 706                 || subcode == ADDR_EXPR
 707                 || CONVERT_EXPR_CODE_P (subcode)))
 708           {
 709             use_cache = true;
 710             bool existed;
 711             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 712             if (existed)
 713               {
 714                 if (integer_zerop (e.second))
 715                   return false;
 716                 *var = e.first;
 717                 *off = e.second;
 718                 return true;
 719               }
 720             e = std::make_pair (op0, ssize_int (0));
 721           }
 722
 723         var0 = gimple_assign_rhs1 (def_stmt);
 724         var1 = gimple_assign_rhs2 (def_stmt);
 725
 726         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 727                                             var, off, cache);
 728         if (res && use_cache)
 729           *cache.get (op0) = std::make_pair (*var, *off);
 730         return res;
 731       }
 732     CASE_CONVERT:
 733       {
 734         /* We must not introduce undefined overflow, and we must not change
 735            the value.  Hence we're okay if the inner type doesn't overflow
 736            to start with (pointer or signed), the outer type also is an
 737            integer or pointer and the outer precision is at least as large
 738            as the inner.  */
 739         tree itype = TREE_TYPE (op0);
 740         if ((POINTER_TYPE_P (itype)
 741              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 742             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 743             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 744           {
 745             if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
 746               {
 747                 /* Split the unconverted operand and try to prove that
 748                    wrapping isn't a problem.  */
 749                 tree tmp_var, tmp_off;
 750                 split_constant_offset (op0, &tmp_var, &tmp_off, cache);
 751
 752                 /* See whether we have an SSA_NAME whose range is known
 753                    to be [A, B].  */
 754                 if (TREE_CODE (tmp_var) != SSA_NAME)
 755                   return false;
 756                 wide_int var_min, var_max;
 757                 value_range_kind vr_type = get_range_info (tmp_var, &var_min,
 758                                                            &var_max);
 759                 wide_int var_nonzero = get_nonzero_bits (tmp_var);
 760                 signop sgn = TYPE_SIGN (itype);
 761                 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
 762                                                        &var_max, var_nonzero,
 763                                                        sgn) != VR_RANGE)
 764                   return false;
 765
 766                 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
 767                    is known to be [A + TMP_OFF, B + TMP_OFF], with all
 768                    operations done in ITYPE.  The addition must overflow
 769                    at both ends of the range or at neither.  */
 770                 wi::overflow_type overflow[2];
 771                 unsigned int prec = TYPE_PRECISION (itype);
 772                 wide_int woff = wi::to_wide (tmp_off, prec);
 773                 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
 774                 wi::add (var_max, woff, sgn, &overflow[1]);
 775                 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
 776                   return false;
 777
 778                 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
 779                 widest_int diff = (widest_int::from (op0_min, sgn)
 780                                    - widest_int::from (var_min, sgn));
 781                 var0 = tmp_var;
 782                 *off = wide_int_to_tree (ssizetype, diff);
 783               }
 784             else
 785               split_constant_offset (op0, &var0, off, cache);
 786             *var = fold_convert (type, var0);
 787             return true;
 788           }
 789         return false;
 790       }
 791
 792     default:
 793       return false;
 794     }
 795 }
 796
 797 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 798    will be ssizetype.  */
 799
 800 static void
 801 split_constant_offset (tree exp, tree *var, tree *off,
 802                        hash_map<tree, std::pair<tree, tree> > &cache)
 803 {
 804   tree type = TREE_TYPE (exp), op0, op1, e, o;
 805   enum tree_code code;
 806
 807   *var = exp;
 808   *off = ssize_int (0);
 809
 810   if (tree_is_chrec (exp)
 811       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 812     return;
 813
 814   code = TREE_CODE (exp);
 815   extract_ops_from_tree (exp, &code, &op0, &op1);
 816   if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache))
 817     {
 818       *var = e;
 819       *off = o;
 820     }
 821 }
 822
 823 void
 824 split_constant_offset (tree exp, tree *var, tree *off)
 825 {
 826   static hash_map<tree, std::pair<tree, tree> > *cache;
 827   if (!cache)
 828     cache = new hash_map<tree, std::pair<tree, tree> > (37);
 829   split_constant_offset (exp, var, off, *cache);
 830   cache->empty ();
 831 }
 832
 833 /* Returns the address ADDR of an object in a canonical shape (without nop
 834    casts, and with type of pointer to the object).  */
 835
 836 static tree
 837 canonicalize_base_object_address (tree addr)
 838 {
 839   tree orig = addr;
 840
 841   STRIP_NOPS (addr);
 842
 843   /* The base address may be obtained by casting from integer, in that case
 844      keep the cast.  */
 845   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 846     return orig;
 847
 848   if (TREE_CODE (addr) != ADDR_EXPR)
 849     return addr;
 850
 851   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 852 }
 853
 854 /* Analyze the behavior of memory reference REF within STMT.
 855    There are two modes:
 856
 857    - BB analysis.  In this case we simply split the address into base,
 858      init and offset components, without reference to any containing loop.
 859      The resulting base and offset are general expressions and they can
 860      vary arbitrarily from one iteration of the containing loop to the next.
 861      The step is always zero.
 862
 863    - loop analysis.  In this case we analyze the reference both wrt LOOP
 864      and on the basis that the reference occurs (is "used") in LOOP;
 865      see the comment above analyze_scalar_evolution_in_loop for more
 866      information about this distinction.  The base, init, offset and
 867      step fields are all invariant in LOOP.
 868
 869    Perform BB analysis if LOOP is null, or if LOOP is the function's
 870    dummy outermost loop.  In other cases perform loop analysis.
 871
 872    Return true if the analysis succeeded and store the results in DRB if so.
 873    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 874
 875 opt_result
 876 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 877                       struct loop *loop, const gimple *stmt)
 878 {
 879   poly_int64 pbitsize, pbitpos;
 880   tree base, poffset;
 881   machine_mode pmode;
 882   int punsignedp, preversep, pvolatilep;
 883   affine_iv base_iv, offset_iv;
 884   tree init, dinit, step;
 885   bool in_loop = (loop && loop->num);
 886
 887   if (dump_file && (dump_flags & TDF_DETAILS))
 888     fprintf (dump_file, "analyze_innermost: ");
 889
 890   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 891                               &punsignedp, &preversep, &pvolatilep);
 892   gcc_assert (base != NULL_TREE);
 893
 894   poly_int64 pbytepos;
 895   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 896     return opt_result::failure_at (stmt,
 897                                    "failed: bit offset alignment.\n");
 898
 899   if (preversep)
 900     return opt_result::failure_at (stmt,
 901                                    "failed: reverse storage order.\n");
 902
 903   /* Calculate the alignment and misalignment for the inner reference.  */
 904   unsigned int HOST_WIDE_INT bit_base_misalignment;
 905   unsigned int bit_base_alignment;
 906   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
 907
 908   /* There are no bitfield references remaining in BASE, so the values
 909      we got back must be whole bytes.  */
 910   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
 911               && bit_base_misalignment % BITS_PER_UNIT == 0);
 912   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
 913   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
 914
 915   if (TREE_CODE (base) == MEM_REF)
 916     {
 917       if (!integer_zerop (TREE_OPERAND (base, 1)))
 918         {
 919           /* Subtract MOFF from the base and add it to POFFSET instead.
 920              Adjust the misalignment to reflect the amount we subtracted.  */
 921           poly_offset_int moff = mem_ref_offset (base);
 922           base_misalignment -= moff.force_shwi ();
 923           tree mofft = wide_int_to_tree (sizetype, moff);
 924           if (!poffset)
 925             poffset = mofft;
 926           else
 927             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 928         }
 929       base = TREE_OPERAND (base, 0);
 930     }
 931   else
 932     base = build_fold_addr_expr (base);
 933
 934   if (in_loop)
 935     {
 936       if (!simple_iv (loop, loop, base, &base_iv, true))
 937         return opt_result::failure_at
 938           (stmt, "failed: evolution of base is not affine.\n");
 939     }
 940   else
 941     {
 942       base_iv.base = base;
 943       base_iv.step = ssize_int (0);
 944       base_iv.no_overflow = true;
 945     }
 946
 947   if (!poffset)
 948     {
 949       offset_iv.base = ssize_int (0);
 950       offset_iv.step = ssize_int (0);
 951     }
 952   else
 953     {
 954       if (!in_loop)
 955         {
 956           offset_iv.base = poffset;
 957           offset_iv.step = ssize_int (0);
 958         }
 959       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 960         return opt_result::failure_at
 961           (stmt, "failed: evolution of offset is not affine.\n");
 962     }
 963
 964   init = ssize_int (pbytepos);
 965
 966   /* Subtract any constant component from the base and add it to INIT instead.
 967      Adjust the misalignment to reflect the amount we subtracted.  */
 968   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 969   init = size_binop (PLUS_EXPR, init, dinit);
 970   base_misalignment -= TREE_INT_CST_LOW (dinit);
 971
 972   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 973   init = size_binop (PLUS_EXPR, init, dinit);
 974
 975   step = size_binop (PLUS_EXPR,
 976                      fold_convert (ssizetype, base_iv.step),
 977                      fold_convert (ssizetype, offset_iv.step));
 978
 979   base = canonicalize_base_object_address (base_iv.base);
 980
 981   /* See if get_pointer_alignment can guarantee a higher alignment than
 982      the one we calculated above.  */
 983   unsigned int HOST_WIDE_INT alt_misalignment;
 984   unsigned int alt_alignment;
 985   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
 986
 987   /* As above, these values must be whole bytes.  */
 988   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
 989               && alt_misalignment % BITS_PER_UNIT == 0);
 990   alt_alignment /= BITS_PER_UNIT;
 991   alt_misalignment /= BITS_PER_UNIT;
 992
 993   if (base_alignment < alt_alignment)
 994     {
 995       base_alignment = alt_alignment;
 996       base_misalignment = alt_misalignment;
 997     }
 998
 999   drb->base_address = base;
1000   drb->offset = fold_convert (ssizetype, offset_iv.base);
1001   drb->init = init;
1002   drb->step = step;
1003   if (known_misalignment (base_misalignment, base_alignment,
1004                           &drb->base_misalignment))
1005     drb->base_alignment = base_alignment;
1006   else
1007     {
1008       drb->base_alignment = known_alignment (base_misalignment);
1009       drb->base_misalignment = 0;
1010     }
1011   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1012   drb->step_alignment = highest_pow2_factor (step);
1013
1014   if (dump_file && (dump_flags & TDF_DETAILS))
1015     fprintf (dump_file, "success.\n");
1016
1017   return opt_result::success ();
1018 }
1019
1020 /* Return true if OP is a valid component reference for a DR access
1021    function.  This accepts a subset of what handled_component_p accepts.  */
1022
1023 static bool
1024 access_fn_component_p (tree op)
1025 {
1026   switch (TREE_CODE (op))
1027     {
1028     case REALPART_EXPR:
1029     case IMAGPART_EXPR:
1030     case ARRAY_REF:
1031       return true;
1032
1033     case COMPONENT_REF:
1034       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1035
1036     default:
1037       return false;
1038     }
1039 }
1040
1041 /* Determines the base object and the list of indices of memory reference
1042    DR, analyzed in LOOP and instantiated before NEST.  */
1043
1044 static void
1045 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1046 {
1047   vec<tree> access_fns = vNULL;
1048   tree ref, op;
1049   tree base, off, access_fn;
1050
1051   /* If analyzing a basic-block there are no indices to analyze
1052      and thus no access functions.  */
1053   if (!nest)
1054     {
1055       DR_BASE_OBJECT (dr) = DR_REF (dr);
1056       DR_ACCESS_FNS (dr).create (0);
1057       return;
1058     }
1059
1060   ref = DR_REF (dr);
1061
1062   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1063      into a two element array with a constant index.  The base is
1064      then just the immediate underlying object.  */
1065   if (TREE_CODE (ref) == REALPART_EXPR)
1066     {
1067       ref = TREE_OPERAND (ref, 0);
1068       access_fns.safe_push (integer_zero_node);
1069     }
1070   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1071     {
1072       ref = TREE_OPERAND (ref, 0);
1073       access_fns.safe_push (integer_one_node);
1074     }
1075
1076   /* Analyze access functions of dimensions we know to be independent.
1077      The list of component references handled here should be kept in
1078      sync with access_fn_component_p.  */
1079   while (handled_component_p (ref))
1080     {
1081       if (TREE_CODE (ref) == ARRAY_REF)
1082         {
1083           op = TREE_OPERAND (ref, 1);
1084           access_fn = analyze_scalar_evolution (loop, op);
1085           access_fn = instantiate_scev (nest, loop, access_fn);
1086           access_fns.safe_push (access_fn);
1087         }
1088       else if (TREE_CODE (ref) == COMPONENT_REF
1089                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1090         {
1091           /* For COMPONENT_REFs of records (but not unions!) use the
1092              FIELD_DECL offset as constant access function so we can
1093              disambiguate a[i].f1 and a[i].f2.  */
1094           tree off = component_ref_field_offset (ref);
1095           off = size_binop (PLUS_EXPR,
1096                             size_binop (MULT_EXPR,
1097                                         fold_convert (bitsizetype, off),
1098                                         bitsize_int (BITS_PER_UNIT)),
1099                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1100           access_fns.safe_push (off);
1101         }
1102       else
1103         /* If we have an unhandled component we could not translate
1104            to an access function stop analyzing.  We have determined
1105            our base object in this case.  */
1106         break;
1107
1108       ref = TREE_OPERAND (ref, 0);
1109     }
1110
1111   /* If the address operand of a MEM_REF base has an evolution in the
1112      analyzed nest, add it as an additional independent access-function.  */
1113   if (TREE_CODE (ref) == MEM_REF)
1114     {
1115       op = TREE_OPERAND (ref, 0);
1116       access_fn = analyze_scalar_evolution (loop, op);
1117       access_fn = instantiate_scev (nest, loop, access_fn);
1118       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1119         {
1120           tree orig_type;
1121           tree memoff = TREE_OPERAND (ref, 1);
1122           base = initial_condition (access_fn);
1123           orig_type = TREE_TYPE (base);
1124           STRIP_USELESS_TYPE_CONVERSION (base);
1125           split_constant_offset (base, &base, &off);
1126           STRIP_USELESS_TYPE_CONVERSION (base);
1127           /* Fold the MEM_REF offset into the evolutions initial
1128              value to make more bases comparable.  */
1129           if (!integer_zerop (memoff))
1130             {
1131               off = size_binop (PLUS_EXPR, off,
1132                                 fold_convert (ssizetype, memoff));
1133               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1134             }
1135           /* Adjust the offset so it is a multiple of the access type
1136              size and thus we separate bases that can possibly be used
1137              to produce partial overlaps (which the access_fn machinery
1138              cannot handle).  */
1139           wide_int rem;
1140           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1141               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1142               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1143             rem = wi::mod_trunc
1144               (wi::to_wide (off),
1145                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1146                SIGNED);
1147           else
1148             /* If we can't compute the remainder simply force the initial
1149                condition to zero.  */
1150             rem = wi::to_wide (off);
1151           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1152           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1153           /* And finally replace the initial condition.  */
1154           access_fn = chrec_replace_initial_condition
1155               (access_fn, fold_convert (orig_type, off));
1156           /* ???  This is still not a suitable base object for
1157              dr_may_alias_p - the base object needs to be an
1158              access that covers the object as whole.  With
1159              an evolution in the pointer this cannot be
1160              guaranteed.
1161              As a band-aid, mark the access so we can special-case
1162              it in dr_may_alias_p.  */
1163           tree old = ref;
1164           ref = fold_build2_loc (EXPR_LOCATION (ref),
1165                                  MEM_REF, TREE_TYPE (ref),
1166                                  base, memoff);
1167           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1168           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1169           DR_UNCONSTRAINED_BASE (dr) = true;
1170           access_fns.safe_push (access_fn);
1171         }
1172     }
1173   else if (DECL_P (ref))
1174     {
1175       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1176       ref = build2 (MEM_REF, TREE_TYPE (ref),
1177                     build_fold_addr_expr (ref),
1178                     build_int_cst (reference_alias_ptr_type (ref), 0));
1179     }
1180
1181   DR_BASE_OBJECT (dr) = ref;
1182   DR_ACCESS_FNS (dr) = access_fns;
1183 }
1184
1185 /* Extracts the alias analysis information from the memory reference DR.  */
1186
1187 static void
1188 dr_analyze_alias (struct data_reference *dr)
1189 {
1190   tree ref = DR_REF (dr);
1191   tree base = get_base_address (ref), addr;
1192
1193   if (INDIRECT_REF_P (base)
1194       || TREE_CODE (base) == MEM_REF)
1195     {
1196       addr = TREE_OPERAND (base, 0);
1197       if (TREE_CODE (addr) == SSA_NAME)
1198         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1199     }
1200 }
1201
1202 /* Frees data reference DR.  */
1203
1204 void
1205 free_data_ref (data_reference_p dr)
1206 {
1207   DR_ACCESS_FNS (dr).release ();
1208   free (dr);
1209 }
1210
1211 /* Analyze memory reference MEMREF, which is accessed in STMT.
1212    The reference is a read if IS_READ is true, otherwise it is a write.
1213    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1214    within STMT, i.e. that it might not occur even if STMT is executed
1215    and runs to completion.
1216
1217    Return the data_reference description of MEMREF.  NEST is the outermost
1218    loop in which the reference should be instantiated, LOOP is the loop
1219    in which the data reference should be analyzed.  */
1220
1221 struct data_reference *
1222 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1223                  bool is_read, bool is_conditional_in_stmt)
1224 {
1225   struct data_reference *dr;
1226
1227   if (dump_file && (dump_flags & TDF_DETAILS))
1228     {
1229       fprintf (dump_file, "Creating dr for ");
1230       print_generic_expr (dump_file, memref, TDF_SLIM);
1231       fprintf (dump_file, "\n");
1232     }
1233
1234   dr = XCNEW (struct data_reference);
1235   DR_STMT (dr) = stmt;
1236   DR_REF (dr) = memref;
1237   DR_IS_READ (dr) = is_read;
1238   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1239
1240   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1241                         nest != NULL ? loop : NULL, stmt);
1242   dr_analyze_indices (dr, nest, loop);
1243   dr_analyze_alias (dr);
1244
1245   if (dump_file && (dump_flags & TDF_DETAILS))
1246     {
1247       unsigned i;
1248       fprintf (dump_file, "\tbase_address: ");
1249       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1250       fprintf (dump_file, "\n\toffset from base address: ");
1251       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1252       fprintf (dump_file, "\n\tconstant offset from base address: ");
1253       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1254       fprintf (dump_file, "\n\tstep: ");
1255       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1256       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1257       fprintf (dump_file, "\n\tbase misalignment: %d",
1258                DR_BASE_MISALIGNMENT (dr));
1259       fprintf (dump_file, "\n\toffset alignment: %d",
1260                DR_OFFSET_ALIGNMENT (dr));
1261       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1262       fprintf (dump_file, "\n\tbase_object: ");
1263       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1264       fprintf (dump_file, "\n");
1265       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1266         {
1267           fprintf (dump_file, "\tAccess function %d: ", i);
1268           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1269         }
1270     }
1271
1272   return dr;
1273 }
1274
1275 /*  A helper function computes order between two tree epxressions T1 and T2.
1276     This is used in comparator functions sorting objects based on the order
1277     of tree expressions.  The function returns -1, 0, or 1.  */
1278
1279 int
1280 data_ref_compare_tree (tree t1, tree t2)
1281 {
1282   int i, cmp;
1283   enum tree_code code;
1284   char tclass;
1285
1286   if (t1 == t2)
1287     return 0;
1288   if (t1 == NULL)
1289     return -1;
1290   if (t2 == NULL)
1291     return 1;
1292
1293   STRIP_USELESS_TYPE_CONVERSION (t1);
1294   STRIP_USELESS_TYPE_CONVERSION (t2);
1295   if (t1 == t2)
1296     return 0;
1297
1298   if (TREE_CODE (t1) != TREE_CODE (t2)
1299       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1300     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1301
1302   code = TREE_CODE (t1);
1303   switch (code)
1304     {
1305     case INTEGER_CST:
1306       return tree_int_cst_compare (t1, t2);
1307
1308     case STRING_CST:
1309       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1310         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1311       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1312                      TREE_STRING_LENGTH (t1));
1313
1314     case SSA_NAME:
1315       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1316         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1317       break;
1318
1319     default:
1320       if (POLY_INT_CST_P (t1))
1321         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1322                                        wi::to_poly_widest (t2));
1323
1324       tclass = TREE_CODE_CLASS (code);
1325
1326       /* For decls, compare their UIDs.  */
1327       if (tclass == tcc_declaration)
1328         {
1329           if (DECL_UID (t1) != DECL_UID (t2))
1330             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1331           break;
1332         }
1333       /* For expressions, compare their operands recursively.  */
1334       else if (IS_EXPR_CODE_CLASS (tclass))
1335         {
1336           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1337             {
1338               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1339                                            TREE_OPERAND (t2, i));
1340               if (cmp != 0)
1341                 return cmp;
1342             }
1343         }
1344       else
1345         gcc_unreachable ();
1346     }
1347
1348   return 0;
1349 }
1350
1351 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1352    check.  */
1353
1354 opt_result
1355 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1356 {
1357   if (dump_enabled_p ())
1358     dump_printf (MSG_NOTE,
1359                  "consider run-time aliasing test between %T and %T\n",
1360                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1361
1362   if (!speed_p)
1363     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1364                                    "runtime alias check not supported when"
1365                                    " optimizing for size.\n");
1366
1367   /* FORNOW: We don't support versioning with outer-loop in either
1368      vectorization or loop distribution.  */
1369   if (loop != NULL && loop->inner != NULL)
1370     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1371                                    "runtime alias check not supported for"
1372                                    " outer loop.\n");
1373
1374   return opt_result::success ();
1375 }
1376
1377 /* Operator == between two dr_with_seg_len objects.
1378
1379    This equality operator is used to make sure two data refs
1380    are the same one so that we will consider to combine the
1381    aliasing checks of those two pairs of data dependent data
1382    refs.  */
1383
1384 static bool
1385 operator == (const dr_with_seg_len& d1,
1386              const dr_with_seg_len& d2)
1387 {
1388   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1389                            DR_BASE_ADDRESS (d2.dr), 0)
1390           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1391           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1392           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1393           && known_eq (d1.access_size, d2.access_size)
1394           && d1.align == d2.align);
1395 }
1396
1397 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1398    so that we can combine aliasing checks in one scan.  */
1399
1400 static int
1401 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1402 {
1403   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1404   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1405   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1406   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1407
1408   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1409      if a and c have the same basic address snd step, and b and d have the same
1410      address and step.  Therefore, if any a&c or b&d don't have the same address
1411      and step, we don't care the order of those two pairs after sorting.  */
1412   int comp_res;
1413
1414   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1415                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1416     return comp_res;
1417   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1418                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1419     return comp_res;
1420   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1421                                          DR_STEP (b1.dr))) != 0)
1422     return comp_res;
1423   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1424                                          DR_STEP (b2.dr))) != 0)
1425     return comp_res;
1426   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1427                                          DR_OFFSET (b1.dr))) != 0)
1428     return comp_res;
1429   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1430                                          DR_INIT (b1.dr))) != 0)
1431     return comp_res;
1432   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1433                                          DR_OFFSET (b2.dr))) != 0)
1434     return comp_res;
1435   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1436                                          DR_INIT (b2.dr))) != 0)
1437     return comp_res;
1438
1439   return 0;
1440 }
1441
1442 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1443    FACTOR is number of iterations that each data reference is accessed.
1444
1445    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1446    we create an expression:
1447
1448    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1449    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1450
1451    for aliasing checks.  However, in some cases we can decrease the number
1452    of checks by combining two checks into one.  For example, suppose we have
1453    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1454    condition is satisfied:
1455
1456    load_ptr_0 < load_ptr_1  &&
1457    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1458
1459    (this condition means, in each iteration of vectorized loop, the accessed
1460    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1461    load_ptr_1.)
1462
1463    we then can use only the following expression to finish the alising checks
1464    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1465
1466    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1467    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1468
1469    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1470    basic address.  */
1471
1472 void
1473 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1474                                poly_uint64)
1475 {
1476   /* Sort the collected data ref pairs so that we can scan them once to
1477      combine all possible aliasing checks.  */
1478   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1479
1480   /* Scan the sorted dr pairs and check if we can combine alias checks
1481      of two neighboring dr pairs.  */
1482   for (size_t i = 1; i < alias_pairs->length (); ++i)
1483     {
1484       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1485       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1486                       *dr_b1 = &(*alias_pairs)[i-1].second,
1487                       *dr_a2 = &(*alias_pairs)[i].first,
1488                       *dr_b2 = &(*alias_pairs)[i].second;
1489
1490       /* Remove duplicate data ref pairs.  */
1491       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1492         {
1493           if (dump_enabled_p ())
1494             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1495                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1496                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1497           alias_pairs->ordered_remove (i--);
1498           continue;
1499         }
1500
1501       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1502         {
1503           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1504              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1505           if (*dr_a1 == *dr_a2)
1506             {
1507               std::swap (dr_a1, dr_b1);
1508               std::swap (dr_a2, dr_b2);
1509             }
1510
1511           poly_int64 init_a1, init_a2;
1512           /* Only consider cases in which the distance between the initial
1513              DR_A1 and the initial DR_A2 is known at compile time.  */
1514           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1515                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1516               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1517                                    DR_OFFSET (dr_a2->dr), 0)
1518               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1519               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1520             continue;
1521
1522           /* Don't combine if we can't tell which one comes first.  */
1523           if (!ordered_p (init_a1, init_a2))
1524             continue;
1525
1526           /* Make sure dr_a1 starts left of dr_a2.  */
1527           if (maybe_gt (init_a1, init_a2))
1528             {
1529               std::swap (*dr_a1, *dr_a2);
1530               std::swap (init_a1, init_a2);
1531             }
1532
1533           /* Work out what the segment length would be if we did combine
1534              DR_A1 and DR_A2:
1535
1536              - If DR_A1 and DR_A2 have equal lengths, that length is
1537                also the combined length.
1538
1539              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1540                length is the lower bound on those lengths.
1541
1542              - If DR_A1 and DR_A2 both have positive lengths, the combined
1543                length is the upper bound on those lengths.
1544
1545              Other cases are unlikely to give a useful combination.
1546
1547              The lengths both have sizetype, so the sign is taken from
1548              the step instead.  */
1549           if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1550             {
1551               poly_uint64 seg_len_a1, seg_len_a2;
1552               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1553                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1554                 continue;
1555
1556               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1557               if (TREE_CODE (indicator_a) != INTEGER_CST)
1558                 continue;
1559
1560               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1561               if (TREE_CODE (indicator_b) != INTEGER_CST)
1562                 continue;
1563
1564               int sign_a = tree_int_cst_sgn (indicator_a);
1565               int sign_b = tree_int_cst_sgn (indicator_b);
1566
1567               poly_uint64 new_seg_len;
1568               if (sign_a <= 0 && sign_b <= 0)
1569                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1570               else if (sign_a >= 0 && sign_b >= 0)
1571                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1572               else
1573                 continue;
1574
1575               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1576                                               new_seg_len);
1577               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1578             }
1579
1580           /* This is always positive due to the swap above.  */
1581           poly_uint64 diff = init_a2 - init_a1;
1582
1583           /* The new check will start at DR_A1.  Make sure that its access
1584              size encompasses the initial DR_A2.  */
1585           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1586             {
1587               dr_a1->access_size = upper_bound (dr_a1->access_size,
1588                                                 diff + dr_a2->access_size);
1589               unsigned int new_align = known_alignment (dr_a1->access_size);
1590               dr_a1->align = MIN (dr_a1->align, new_align);
1591             }
1592           if (dump_enabled_p ())
1593             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1594                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1595                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1596           alias_pairs->ordered_remove (i);
1597           i--;
1598         }
1599     }
1600 }
1601
1602 /* Given LOOP's two data references and segment lengths described by DR_A
1603    and DR_B, create expression checking if the two addresses ranges intersect
1604    with each other based on index of the two addresses.  This can only be
1605    done if DR_A and DR_B referring to the same (array) object and the index
1606    is the only difference.  For example:
1607
1608                        DR_A                           DR_B
1609       data-ref         arr[i]                         arr[j]
1610       base_object      arr                            arr
1611       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1612
1613    The addresses and their index are like:
1614
1615         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1616      ------------------------------------------------------->
1617         |   |   |   |   |          |   |   |   |   |
1618      ------------------------------------------------------->
1619         i_0 ...         i_0+4      j_0 ...         j_0+4
1620
1621    We can create expression based on index rather than address:
1622
1623      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1624
1625    Note evolution step of index needs to be considered in comparison.  */
1626
1627 static bool
1628 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1629                                      const dr_with_seg_len& dr_a,
1630                                      const dr_with_seg_len& dr_b)
1631 {
1632   if (integer_zerop (DR_STEP (dr_a.dr))
1633       || integer_zerop (DR_STEP (dr_b.dr))
1634       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1635     return false;
1636
1637   poly_uint64 seg_len1, seg_len2;
1638   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1639       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1640     return false;
1641
1642   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1643     return false;
1644
1645   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1646     return false;
1647
1648   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1649     return false;
1650
1651   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1652
1653   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1654   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1655   if (neg_step)
1656     {
1657       abs_step = -abs_step;
1658       seg_len1 = -seg_len1;
1659       seg_len2 = -seg_len2;
1660     }
1661   else
1662     {
1663       /* Include the access size in the length, so that we only have one
1664          tree addition below.  */
1665       seg_len1 += dr_a.access_size;
1666       seg_len2 += dr_b.access_size;
1667     }
1668
1669   /* Infer the number of iterations with which the memory segment is accessed
1670      by DR.  In other words, alias is checked if memory segment accessed by
1671      DR_A in some iterations intersect with memory segment accessed by DR_B
1672      in the same amount iterations.
1673      Note segnment length is a linear function of number of iterations with
1674      DR_STEP as the coefficient.  */
1675   poly_uint64 niter_len1, niter_len2;
1676   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1677       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1678     return false;
1679
1680   poly_uint64 niter_access1 = 0, niter_access2 = 0;
1681   if (neg_step)
1682     {
1683       /* Divide each access size by the byte step, rounding up.  */
1684       if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1685                             abs_step, &niter_access1)
1686           || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1687                                abs_step, &niter_access2))
1688         return false;
1689     }
1690
1691   unsigned int i;
1692   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1693     {
1694       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1695       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1696       /* Two indices must be the same if they are not scev, or not scev wrto
1697          current loop being vecorized.  */
1698       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1699           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1700           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1701           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1702         {
1703           if (operand_equal_p (access1, access2, 0))
1704             continue;
1705
1706           return false;
1707         }
1708       /* The two indices must have the same step.  */
1709       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1710         return false;
1711
1712       tree idx_step = CHREC_RIGHT (access1);
1713       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1714       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1715       /* Index must evaluate in the same direction as DR.  */
1716       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1717
1718       tree min1 = CHREC_LEFT (access1);
1719       tree min2 = CHREC_LEFT (access2);
1720       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1721         return false;
1722
1723       /* Ideally, alias can be checked against loop's control IV, but we
1724          need to prove linear mapping between control IV and reference
1725          index.  Although that should be true, we check against (array)
1726          index of data reference.  Like segment length, index length is
1727          linear function of the number of iterations with index_step as
1728          the coefficient, i.e, niter_len * idx_step.  */
1729       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1730                                    build_int_cst (TREE_TYPE (min1),
1731                                                   niter_len1));
1732       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1733                                    build_int_cst (TREE_TYPE (min2),
1734                                                   niter_len2));
1735       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1736       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1737       /* Adjust ranges for negative step.  */
1738       if (neg_step)
1739         {
1740           /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
1741           std::swap (min1, max1);
1742           std::swap (min2, max2);
1743
1744           /* As with the lengths just calculated, we've measured the access
1745              sizes in iterations, so multiply them by the index step.  */
1746           tree idx_access1
1747             = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1748                            build_int_cst (TREE_TYPE (min1), niter_access1));
1749           tree idx_access2
1750             = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1751                            build_int_cst (TREE_TYPE (min2), niter_access2));
1752
1753           /* MINUS_EXPR because the above values are negative.  */
1754           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1755           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1756         }
1757       tree part_cond_expr
1758         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1759             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1760             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1761       if (*cond_expr)
1762         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1763                                   *cond_expr, part_cond_expr);
1764       else
1765         *cond_expr = part_cond_expr;
1766     }
1767   return true;
1768 }
1769
1770 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1771    every address ADDR accessed by D:
1772
1773      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1774
1775    In this case, every element accessed by D is aligned to at least
1776    ALIGN bytes.
1777
1778    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1779
1780      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
1781
1782 static void
1783 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1784                      tree *seg_max_out, HOST_WIDE_INT align)
1785 {
1786   /* Each access has the following pattern:
1787
1788           <- |seg_len| ->
1789           <--- A: -ve step --->
1790           +-----+-------+-----+-------+-----+
1791           | n-1 | ,.... |  0  | ..... | n-1 |
1792           +-----+-------+-----+-------+-----+
1793                         <--- B: +ve step --->
1794                         <- |seg_len| ->
1795                         |
1796                    base address
1797
1798      where "n" is the number of scalar iterations covered by the segment.
1799      (This should be VF for a particular pair if we know that both steps
1800      are the same, otherwise it will be the full number of scalar loop
1801      iterations.)
1802
1803      A is the range of bytes accessed when the step is negative,
1804      B is the range when the step is positive.
1805
1806      If the access size is "access_size" bytes, the lowest addressed byte is:
1807
1808          base + (step < 0 ? seg_len : 0)   [LB]
1809
1810      and the highest addressed byte is always below:
1811
1812          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
1813
1814      Thus:
1815
1816          LB <= ADDR < UB
1817
1818      If ALIGN is nonzero, all three values are aligned to at least ALIGN
1819      bytes, so:
1820
1821          LB <= ADDR <= UB - ALIGN
1822
1823      where "- ALIGN" folds naturally with the "+ access_size" and often
1824      cancels it out.
1825
1826      We don't try to simplify LB and UB beyond this (e.g. by using
1827      MIN and MAX based on whether seg_len rather than the stride is
1828      negative) because it is possible for the absolute size of the
1829      segment to overflow the range of a ssize_t.
1830
1831      Keeping the pointer_plus outside of the cond_expr should allow
1832      the cond_exprs to be shared with other alias checks.  */
1833   tree indicator = dr_direction_indicator (d.dr);
1834   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1835                                fold_convert (ssizetype, indicator),
1836                                ssize_int (0));
1837   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1838                                             DR_OFFSET (d.dr));
1839   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1840   tree seg_len
1841     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1842
1843   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1844                                 seg_len, size_zero_node);
1845   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1846                                 size_zero_node, seg_len);
1847   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1848                            size_int (d.access_size - align));
1849
1850   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1851   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1852 }
1853
1854 /* Given two data references and segment lengths described by DR_A and DR_B,
1855    create expression checking if the two addresses ranges intersect with
1856    each other:
1857
1858      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1859      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1860
1861 static void
1862 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1863                                const dr_with_seg_len& dr_a,
1864                                const dr_with_seg_len& dr_b)
1865 {
1866   *cond_expr = NULL_TREE;
1867   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1868     return;
1869
1870   unsigned HOST_WIDE_INT min_align;
1871   tree_code cmp_code;
1872   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1873       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1874     {
1875       /* In this case adding access_size to seg_len is likely to give
1876          a simple X * step, where X is either the number of scalar
1877          iterations or the vectorization factor.  We're better off
1878          keeping that, rather than subtracting an alignment from it.
1879
1880          In this case the maximum values are exclusive and so there is
1881          no alias if the maximum of one segment equals the minimum
1882          of another.  */
1883       min_align = 0;
1884       cmp_code = LE_EXPR;
1885     }
1886   else
1887     {
1888       /* Calculate the minimum alignment shared by all four pointers,
1889          then arrange for this alignment to be subtracted from the
1890          exclusive maximum values to get inclusive maximum values.
1891          This "- min_align" is cumulative with a "+ access_size"
1892          in the calculation of the maximum values.  In the best
1893          (and common) case, the two cancel each other out, leaving
1894          us with an inclusive bound based only on seg_len.  In the
1895          worst case we're simply adding a smaller number than before.
1896
1897          Because the maximum values are inclusive, there is an alias
1898          if the maximum value of one segment is equal to the minimum
1899          value of the other.  */
1900       min_align = MIN (dr_a.align, dr_b.align);
1901       cmp_code = LT_EXPR;
1902     }
1903
1904   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1905   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1906   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1907
1908   *cond_expr
1909     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1910         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1911         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1912 }
1913
1914 /* Create a conditional expression that represents the run-time checks for
1915    overlapping of address ranges represented by a list of data references
1916    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1917    COND_EXPR is the conditional expression to be used in the if statement
1918    that controls which version of the loop gets executed at runtime.  */
1919
1920 void
1921 create_runtime_alias_checks (struct loop *loop,
1922                              vec<dr_with_seg_len_pair_t> *alias_pairs,
1923                              tree * cond_expr)
1924 {
1925   tree part_cond_expr;
1926
1927   fold_defer_overflow_warnings ();
1928   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1929     {
1930       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1931       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1932
1933       if (dump_enabled_p ())
1934         dump_printf (MSG_NOTE,
1935                      "create runtime check for data references %T and %T\n",
1936                      DR_REF (dr_a.dr), DR_REF (dr_b.dr));
1937
1938       /* Create condition expression for each pair data references.  */
1939       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1940       if (*cond_expr)
1941         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1942                                   *cond_expr, part_cond_expr);
1943       else
1944         *cond_expr = part_cond_expr;
1945     }
1946   fold_undefer_and_ignore_overflow_warnings ();
1947 }
1948
1949 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1950    expressions.  */
1951 static bool
1952 dr_equal_offsets_p1 (tree offset1, tree offset2)
1953 {
1954   bool res;
1955
1956   STRIP_NOPS (offset1);
1957   STRIP_NOPS (offset2);
1958
1959   if (offset1 == offset2)
1960     return true;
1961
1962   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1963       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1964     return false;
1965
1966   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1967                              TREE_OPERAND (offset2, 0));
1968
1969   if (!res || !BINARY_CLASS_P (offset1))
1970     return res;
1971
1972   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1973                              TREE_OPERAND (offset2, 1));
1974
1975   return res;
1976 }
1977
1978 /* Check if DRA and DRB have equal offsets.  */
1979 bool
1980 dr_equal_offsets_p (struct data_reference *dra,
1981                     struct data_reference *drb)
1982 {
1983   tree offset1, offset2;
1984
1985   offset1 = DR_OFFSET (dra);
1986   offset2 = DR_OFFSET (drb);
1987
1988   return dr_equal_offsets_p1 (offset1, offset2);
1989 }
1990
1991 /* Returns true if FNA == FNB.  */
1992
1993 static bool
1994 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1995 {
1996   unsigned i, n = fna.length ();
1997
1998   if (n != fnb.length ())
1999     return false;
2000
2001   for (i = 0; i < n; i++)
2002     if (!operand_equal_p (fna[i], fnb[i], 0))
2003       return false;
2004
2005   return true;
2006 }
2007
2008 /* If all the functions in CF are the same, returns one of them,
2009    otherwise returns NULL.  */
2010
2011 static affine_fn
2012 common_affine_function (conflict_function *cf)
2013 {
2014   unsigned i;
2015   affine_fn comm;
2016
2017   if (!CF_NONTRIVIAL_P (cf))
2018     return affine_fn ();
2019
2020   comm = cf->fns[0];
2021
2022   for (i = 1; i < cf->n; i++)
2023     if (!affine_function_equal_p (comm, cf->fns[i]))
2024       return affine_fn ();
2025
2026   return comm;
2027 }
2028
2029 /* Returns the base of the affine function FN.  */
2030
2031 static tree
2032 affine_function_base (affine_fn fn)
2033 {
2034   return fn[0];
2035 }
2036
2037 /* Returns true if FN is a constant.  */
2038
2039 static bool
2040 affine_function_constant_p (affine_fn fn)
2041 {
2042   unsigned i;
2043   tree coef;
2044
2045   for (i = 1; fn.iterate (i, &coef); i++)
2046     if (!integer_zerop (coef))
2047       return false;
2048
2049   return true;
2050 }
2051
2052 /* Returns true if FN is the zero constant function.  */
2053
2054 static bool
2055 affine_function_zero_p (affine_fn fn)
2056 {
2057   return (integer_zerop (affine_function_base (fn))
2058           && affine_function_constant_p (fn));
2059 }
2060
2061 /* Returns a signed integer type with the largest precision from TA
2062    and TB.  */
2063
2064 static tree
2065 signed_type_for_types (tree ta, tree tb)
2066 {
2067   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2068     return signed_type_for (ta);
2069   else
2070     return signed_type_for (tb);
2071 }
2072
2073 /* Applies operation OP on affine functions FNA and FNB, and returns the
2074    result.  */
2075
2076 static affine_fn
2077 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2078 {
2079   unsigned i, n, m;
2080   affine_fn ret;
2081   tree coef;
2082
2083   if (fnb.length () > fna.length ())
2084     {
2085       n = fna.length ();
2086       m = fnb.length ();
2087     }
2088   else
2089     {
2090       n = fnb.length ();
2091       m = fna.length ();
2092     }
2093
2094   ret.create (m);
2095   for (i = 0; i < n; i++)
2096     {
2097       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2098                                          TREE_TYPE (fnb[i]));
2099       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2100     }
2101
2102   for (; fna.iterate (i, &coef); i++)
2103     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2104                                  coef, integer_zero_node));
2105   for (; fnb.iterate (i, &coef); i++)
2106     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2107                                  integer_zero_node, coef));
2108
2109   return ret;
2110 }
2111
2112 /* Returns the sum of affine functions FNA and FNB.  */
2113
2114 static affine_fn
2115 affine_fn_plus (affine_fn fna, affine_fn fnb)
2116 {
2117   return affine_fn_op (PLUS_EXPR, fna, fnb);
2118 }
2119
2120 /* Returns the difference of affine functions FNA and FNB.  */
2121
2122 static affine_fn
2123 affine_fn_minus (affine_fn fna, affine_fn fnb)
2124 {
2125   return affine_fn_op (MINUS_EXPR, fna, fnb);
2126 }
2127
2128 /* Frees affine function FN.  */
2129
2130 static void
2131 affine_fn_free (affine_fn fn)
2132 {
2133   fn.release ();
2134 }
2135
2136 /* Determine for each subscript in the data dependence relation DDR
2137    the distance.  */
2138
2139 static void
2140 compute_subscript_distance (struct data_dependence_relation *ddr)
2141 {
2142   conflict_function *cf_a, *cf_b;
2143   affine_fn fn_a, fn_b, diff;
2144
2145   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2146     {
2147       unsigned int i;
2148
2149       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2150         {
2151           struct subscript *subscript;
2152
2153           subscript = DDR_SUBSCRIPT (ddr, i);
2154           cf_a = SUB_CONFLICTS_IN_A (subscript);
2155           cf_b = SUB_CONFLICTS_IN_B (subscript);
2156
2157           fn_a = common_affine_function (cf_a);
2158           fn_b = common_affine_function (cf_b);
2159           if (!fn_a.exists () || !fn_b.exists ())
2160             {
2161               SUB_DISTANCE (subscript) = chrec_dont_know;
2162               return;
2163             }
2164           diff = affine_fn_minus (fn_a, fn_b);
2165
2166           if (affine_function_constant_p (diff))
2167             SUB_DISTANCE (subscript) = affine_function_base (diff);
2168           else
2169             SUB_DISTANCE (subscript) = chrec_dont_know;
2170
2171           affine_fn_free (diff);
2172         }
2173     }
2174 }
2175
2176 /* Returns the conflict function for "unknown".  */
2177
2178 static conflict_function *
2179 conflict_fn_not_known (void)
2180 {
2181   conflict_function *fn = XCNEW (conflict_function);
2182   fn->n = NOT_KNOWN;
2183
2184   return fn;
2185 }
2186
2187 /* Returns the conflict function for "independent".  */
2188
2189 static conflict_function *
2190 conflict_fn_no_dependence (void)
2191 {
2192   conflict_function *fn = XCNEW (conflict_function);
2193   fn->n = NO_DEPENDENCE;
2194
2195   return fn;
2196 }
2197
2198 /* Returns true if the address of OBJ is invariant in LOOP.  */
2199
2200 static bool
2201 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2202 {
2203   while (handled_component_p (obj))
2204     {
2205       if (TREE_CODE (obj) == ARRAY_REF)
2206         {
2207           for (int i = 1; i < 4; ++i)
2208             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2209                                                         loop->num))
2210               return false;
2211         }
2212       else if (TREE_CODE (obj) == COMPONENT_REF)
2213         {
2214           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2215                                                       loop->num))
2216             return false;
2217         }
2218       obj = TREE_OPERAND (obj, 0);
2219     }
2220
2221   if (!INDIRECT_REF_P (obj)
2222       && TREE_CODE (obj) != MEM_REF)
2223     return true;
2224
2225   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2226                                                   loop->num);
2227 }
2228
2229 /* Returns false if we can prove that data references A and B do not alias,
2230    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2231    considered.  */
2232
2233 bool
2234 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2235                 bool loop_nest)
2236 {
2237   tree addr_a = DR_BASE_OBJECT (a);
2238   tree addr_b = DR_BASE_OBJECT (b);
2239
2240   /* If we are not processing a loop nest but scalar code we
2241      do not need to care about possible cross-iteration dependences
2242      and thus can process the full original reference.  Do so,
2243      similar to how loop invariant motion applies extra offset-based
2244      disambiguation.  */
2245   if (!loop_nest)
2246     {
2247       aff_tree off1, off2;
2248       poly_widest_int size1, size2;
2249       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2250       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2251       aff_combination_scale (&off1, -1);
2252       aff_combination_add (&off2, &off1);
2253       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2254         return false;
2255     }
2256
2257   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2258       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2259       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2260       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2261     return false;
2262
2263   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2264      do not know the size of the base-object.  So we cannot do any
2265      offset/overlap based analysis but have to rely on points-to
2266      information only.  */
2267   if (TREE_CODE (addr_a) == MEM_REF
2268       && (DR_UNCONSTRAINED_BASE (a)
2269           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2270     {
2271       /* For true dependences we can apply TBAA.  */
2272       if (flag_strict_aliasing
2273           && DR_IS_WRITE (a) && DR_IS_READ (b)
2274           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2275                                      get_alias_set (DR_REF (b))))
2276         return false;
2277       if (TREE_CODE (addr_b) == MEM_REF)
2278         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2279                                        TREE_OPERAND (addr_b, 0));
2280       else
2281         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2282                                        build_fold_addr_expr (addr_b));
2283     }
2284   else if (TREE_CODE (addr_b) == MEM_REF
2285            && (DR_UNCONSTRAINED_BASE (b)
2286                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2287     {
2288       /* For true dependences we can apply TBAA.  */
2289       if (flag_strict_aliasing
2290           && DR_IS_WRITE (a) && DR_IS_READ (b)
2291           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2292                                      get_alias_set (DR_REF (b))))
2293         return false;
2294       if (TREE_CODE (addr_a) == MEM_REF)
2295         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2296                                        TREE_OPERAND (addr_b, 0));
2297       else
2298         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2299                                        TREE_OPERAND (addr_b, 0));
2300     }
2301
2302   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2303      that is being subsetted in the loop nest.  */
2304   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2305     return refs_output_dependent_p (addr_a, addr_b);
2306   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2307     return refs_anti_dependent_p (addr_a, addr_b);
2308   return refs_may_alias_p (addr_a, addr_b);
2309 }
2310
2311 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2312    if it is meaningful to compare their associated access functions
2313    when checking for dependencies.  */
2314
2315 static bool
2316 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2317 {
2318   /* Allow pairs of component refs from the following sets:
2319
2320        { REALPART_EXPR, IMAGPART_EXPR }
2321        { COMPONENT_REF }
2322        { ARRAY_REF }.  */
2323   tree_code code_a = TREE_CODE (ref_a);
2324   tree_code code_b = TREE_CODE (ref_b);
2325   if (code_a == IMAGPART_EXPR)
2326     code_a = REALPART_EXPR;
2327   if (code_b == IMAGPART_EXPR)
2328     code_b = REALPART_EXPR;
2329   if (code_a != code_b)
2330     return false;
2331
2332   if (TREE_CODE (ref_a) == COMPONENT_REF)
2333     /* ??? We cannot simply use the type of operand #0 of the refs here as
2334        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2335        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2336     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2337             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2338
2339   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2340                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2341 }
2342
2343 /* Initialize a data dependence relation between data accesses A and
2344    B.  NB_LOOPS is the number of loops surrounding the references: the
2345    size of the classic distance/direction vectors.  */
2346
2347 struct data_dependence_relation *
2348 initialize_data_dependence_relation (struct data_reference *a,
2349                                      struct data_reference *b,
2350                                      vec<loop_p> loop_nest)
2351 {
2352   struct data_dependence_relation *res;
2353   unsigned int i;
2354
2355   res = XCNEW (struct data_dependence_relation);
2356   DDR_A (res) = a;
2357   DDR_B (res) = b;
2358   DDR_LOOP_NEST (res).create (0);
2359   DDR_SUBSCRIPTS (res).create (0);
2360   DDR_DIR_VECTS (res).create (0);
2361   DDR_DIST_VECTS (res).create (0);
2362
2363   if (a == NULL || b == NULL)
2364     {
2365       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2366       return res;
2367     }
2368
2369   /* If the data references do not alias, then they are independent.  */
2370   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2371     {
2372       DDR_ARE_DEPENDENT (res) = chrec_known;
2373       return res;
2374     }
2375
2376   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2377   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2378   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2379     {
2380       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2381       return res;
2382     }
2383
2384   /* For unconstrained bases, the root (highest-indexed) subscript
2385      describes a variation in the base of the original DR_REF rather
2386      than a component access.  We have no type that accurately describes
2387      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2388      applying this subscript) so limit the search to the last real
2389      component access.
2390
2391      E.g. for:
2392
2393         void
2394         f (int a[][8], int b[][8])
2395         {
2396           for (int i = 0; i < 8; ++i)
2397             a[i * 2][0] = b[i][0];
2398         }
2399
2400      the a and b accesses have a single ARRAY_REF component reference [0]
2401      but have two subscripts.  */
2402   if (DR_UNCONSTRAINED_BASE (a))
2403     num_dimensions_a -= 1;
2404   if (DR_UNCONSTRAINED_BASE (b))
2405     num_dimensions_b -= 1;
2406
2407   /* These structures describe sequences of component references in
2408      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2409      specific access function.  */
2410   struct {
2411     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2412        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2413        indices.  In C notation, these are the indices of the rightmost
2414        component references; e.g. for a sequence .b.c.d, the start
2415        index is for .d.  */
2416     unsigned int start_a;
2417     unsigned int start_b;
2418
2419     /* The sequence contains LENGTH consecutive access functions from
2420        each DR.  */
2421     unsigned int length;
2422
2423     /* The enclosing objects for the A and B sequences respectively,
2424        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2425        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2426     tree object_a;
2427     tree object_b;
2428   } full_seq = {}, struct_seq = {};
2429
2430   /* Before each iteration of the loop:
2431
2432      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2433      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2434   unsigned int index_a = 0;
2435   unsigned int index_b = 0;
2436   tree ref_a = DR_REF (a);
2437   tree ref_b = DR_REF (b);
2438
2439   /* Now walk the component references from the final DR_REFs back up to
2440      the enclosing base objects.  Each component reference corresponds
2441      to one access function in the DR, with access function 0 being for
2442      the final DR_REF and the highest-indexed access function being the
2443      one that is applied to the base of the DR.
2444
2445      Look for a sequence of component references whose access functions
2446      are comparable (see access_fn_components_comparable_p).  If more
2447      than one such sequence exists, pick the one nearest the base
2448      (which is the leftmost sequence in C notation).  Store this sequence
2449      in FULL_SEQ.
2450
2451      For example, if we have:
2452
2453         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2454
2455         A: a[0][i].s.c.d
2456         B: __real b[0][i].s.e[i].f
2457
2458      (where d is the same type as the real component of f) then the access
2459      functions would be:
2460
2461                          0   1   2   3
2462         A:              .d  .c  .s [i]
2463
2464                  0   1   2   3   4   5
2465         B:  __real  .f [i]  .e  .s [i]
2466
2467      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2468      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2469      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2470      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2471      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2472      index foo[10] arrays, so is again comparable.  The sequence is
2473      therefore:
2474
2475         A: [1, 3]  (i.e. [i].s.c)
2476         B: [3, 5]  (i.e. [i].s.e)
2477
2478      Also look for sequences of component references whose access
2479      functions are comparable and whose enclosing objects have the same
2480      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2481      example, STRUCT_SEQ would be:
2482
2483         A: [1, 2]  (i.e. s.c)
2484         B: [3, 4]  (i.e. s.e)  */
2485   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2486     {
2487       /* REF_A and REF_B must be one of the component access types
2488          allowed by dr_analyze_indices.  */
2489       gcc_checking_assert (access_fn_component_p (ref_a));
2490       gcc_checking_assert (access_fn_component_p (ref_b));
2491
2492       /* Get the immediately-enclosing objects for REF_A and REF_B,
2493          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2494          and DR_ACCESS_FN (B, INDEX_B).  */
2495       tree object_a = TREE_OPERAND (ref_a, 0);
2496       tree object_b = TREE_OPERAND (ref_b, 0);
2497
2498       tree type_a = TREE_TYPE (object_a);
2499       tree type_b = TREE_TYPE (object_b);
2500       if (access_fn_components_comparable_p (ref_a, ref_b))
2501         {
2502           /* This pair of component accesses is comparable for dependence
2503              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2504              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2505           if (full_seq.start_a + full_seq.length != index_a
2506               || full_seq.start_b + full_seq.length != index_b)
2507             {
2508               /* The accesses don't extend the current sequence,
2509                  so start a new one here.  */
2510               full_seq.start_a = index_a;
2511               full_seq.start_b = index_b;
2512               full_seq.length = 0;
2513             }
2514
2515           /* Add this pair of references to the sequence.  */
2516           full_seq.length += 1;
2517           full_seq.object_a = object_a;
2518           full_seq.object_b = object_b;
2519
2520           /* If the enclosing objects are structures (and thus have the
2521              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2522           if (TREE_CODE (type_a) == RECORD_TYPE)
2523             struct_seq = full_seq;
2524
2525           /* Move to the next containing reference for both A and B.  */
2526           ref_a = object_a;
2527           ref_b = object_b;
2528           index_a += 1;
2529           index_b += 1;
2530           continue;
2531         }
2532
2533       /* Try to approach equal type sizes.  */
2534       if (!COMPLETE_TYPE_P (type_a)
2535           || !COMPLETE_TYPE_P (type_b)
2536           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2537           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2538         break;
2539
2540       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2541       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2542       if (size_a <= size_b)
2543         {
2544           index_a += 1;
2545           ref_a = object_a;
2546         }
2547       if (size_b <= size_a)
2548         {
2549           index_b += 1;
2550           ref_b = object_b;
2551         }
2552     }
2553
2554   /* See whether FULL_SEQ ends at the base and whether the two bases
2555      are equal.  We do not care about TBAA or alignment info so we can
2556      use OEP_ADDRESS_OF to avoid false negatives.  */
2557   tree base_a = DR_BASE_OBJECT (a);
2558   tree base_b = DR_BASE_OBJECT (b);
2559   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2560                       && full_seq.start_b + full_seq.length == num_dimensions_b
2561                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2562                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2563                       && types_compatible_p (TREE_TYPE (base_a),
2564                                              TREE_TYPE (base_b))
2565                       && (!loop_nest.exists ()
2566                           || (object_address_invariant_in_loop_p
2567                               (loop_nest[0], base_a))));
2568
2569   /* If the bases are the same, we can include the base variation too.
2570      E.g. the b accesses in:
2571
2572        for (int i = 0; i < n; ++i)
2573          b[i + 4][0] = b[i][0];
2574
2575      have a definite dependence distance of 4, while for:
2576
2577        for (int i = 0; i < n; ++i)
2578          a[i + 4][0] = b[i][0];
2579
2580      the dependence distance depends on the gap between a and b.
2581
2582      If the bases are different then we can only rely on the sequence
2583      rooted at a structure access, since arrays are allowed to overlap
2584      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2585      valid code:
2586
2587        int a[256];
2588        ...
2589        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2590
2591      where two lvalues with the same int[4][3] type overlap, and where
2592      both lvalues are distinct from the object's declared type.  */
2593   if (same_base_p)
2594     {
2595       if (DR_UNCONSTRAINED_BASE (a))
2596         full_seq.length += 1;
2597     }
2598   else
2599     full_seq = struct_seq;
2600
2601   /* Punt if we didn't find a suitable sequence.  */
2602   if (full_seq.length == 0)
2603     {
2604       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2605       return res;
2606     }
2607
2608   if (!same_base_p)
2609     {
2610       /* Partial overlap is possible for different bases when strict aliasing
2611          is not in effect.  It's also possible if either base involves a union
2612          access; e.g. for:
2613
2614            struct s1 { int a[2]; };
2615            struct s2 { struct s1 b; int c; };
2616            struct s3 { int d; struct s1 e; };
2617            union u { struct s2 f; struct s3 g; } *p, *q;
2618
2619          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2620          "p->g.e" (base "p->g") and might partially overlap the s1 at
2621          "q->g.e" (base "q->g").  */
2622       if (!flag_strict_aliasing
2623           || ref_contains_union_access_p (full_seq.object_a)
2624           || ref_contains_union_access_p (full_seq.object_b))
2625         {
2626           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2627           return res;
2628         }
2629
2630       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2631       if (!loop_nest.exists ()
2632           || (object_address_invariant_in_loop_p (loop_nest[0],
2633                                                   full_seq.object_a)
2634               && object_address_invariant_in_loop_p (loop_nest[0],
2635                                                      full_seq.object_b)))
2636         {
2637           DDR_OBJECT_A (res) = full_seq.object_a;
2638           DDR_OBJECT_B (res) = full_seq.object_b;
2639         }
2640     }
2641
2642   DDR_AFFINE_P (res) = true;
2643   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2644   DDR_SUBSCRIPTS (res).create (full_seq.length);
2645   DDR_LOOP_NEST (res) = loop_nest;
2646   DDR_INNER_LOOP (res) = 0;
2647   DDR_SELF_REFERENCE (res) = false;
2648
2649   for (i = 0; i < full_seq.length; ++i)
2650     {
2651       struct subscript *subscript;
2652
2653       subscript = XNEW (struct subscript);
2654       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2655       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2656       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2657       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2658       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2659       SUB_DISTANCE (subscript) = chrec_dont_know;
2660       DDR_SUBSCRIPTS (res).safe_push (subscript);
2661     }
2662
2663   return res;
2664 }
2665
2666 /* Frees memory used by the conflict function F.  */
2667
2668 static void
2669 free_conflict_function (conflict_function *f)
2670 {
2671   unsigned i;
2672
2673   if (CF_NONTRIVIAL_P (f))
2674     {
2675       for (i = 0; i < f->n; i++)
2676         affine_fn_free (f->fns[i]);
2677     }
2678   free (f);
2679 }
2680
2681 /* Frees memory used by SUBSCRIPTS.  */
2682
2683 static void
2684 free_subscripts (vec<subscript_p> subscripts)
2685 {
2686   unsigned i;
2687   subscript_p s;
2688
2689   FOR_EACH_VEC_ELT (subscripts, i, s)
2690     {
2691       free_conflict_function (s->conflicting_iterations_in_a);
2692       free_conflict_function (s->conflicting_iterations_in_b);
2693       free (s);
2694     }
2695   subscripts.release ();
2696 }
2697
2698 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2699    description.  */
2700
2701 static inline void
2702 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2703                         tree chrec)
2704 {
2705   DDR_ARE_DEPENDENT (ddr) = chrec;
2706   free_subscripts (DDR_SUBSCRIPTS (ddr));
2707   DDR_SUBSCRIPTS (ddr).create (0);
2708 }
2709
2710 /* The dependence relation DDR cannot be represented by a distance
2711    vector.  */
2712
2713 static inline void
2714 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2715 {
2716   if (dump_file && (dump_flags & TDF_DETAILS))
2717     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2718
2719   DDR_AFFINE_P (ddr) = false;
2720 }
2721
2722 \f
2723
2724 /* This section contains the classic Banerjee tests.  */
2725
2726 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2727    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2728
2729 static inline bool
2730 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2731 {
2732   return (evolution_function_is_constant_p (chrec_a)
2733           && evolution_function_is_constant_p (chrec_b));
2734 }
2735
2736 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2737    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2738
2739 static bool
2740 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2741 {
2742   if ((evolution_function_is_constant_p (chrec_a)
2743        && evolution_function_is_univariate_p (chrec_b))
2744       || (evolution_function_is_constant_p (chrec_b)
2745           && evolution_function_is_univariate_p (chrec_a)))
2746     return true;
2747
2748   if (evolution_function_is_univariate_p (chrec_a)
2749       && evolution_function_is_univariate_p (chrec_b))
2750     {
2751       switch (TREE_CODE (chrec_a))
2752         {
2753         case POLYNOMIAL_CHREC:
2754           switch (TREE_CODE (chrec_b))
2755             {
2756             case POLYNOMIAL_CHREC:
2757               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2758                 return false;
2759               /* FALLTHRU */
2760
2761             default:
2762               return true;
2763             }
2764
2765         default:
2766           return true;
2767         }
2768     }
2769
2770   return false;
2771 }
2772
2773 /* Creates a conflict function with N dimensions.  The affine functions
2774    in each dimension follow.  */
2775
2776 static conflict_function *
2777 conflict_fn (unsigned n, ...)
2778 {
2779   unsigned i;
2780   conflict_function *ret = XCNEW (conflict_function);
2781   va_list ap;
2782
2783   gcc_assert (n > 0 && n <= MAX_DIM);
2784   va_start (ap, n);
2785
2786   ret->n = n;
2787   for (i = 0; i < n; i++)
2788     ret->fns[i] = va_arg (ap, affine_fn);
2789   va_end (ap);
2790
2791   return ret;
2792 }
2793
2794 /* Returns constant affine function with value CST.  */
2795
2796 static affine_fn
2797 affine_fn_cst (tree cst)
2798 {
2799   affine_fn fn;
2800   fn.create (1);
2801   fn.quick_push (cst);
2802   return fn;
2803 }
2804
2805 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2806
2807 static affine_fn
2808 affine_fn_univar (tree cst, unsigned dim, tree coef)
2809 {
2810   affine_fn fn;
2811   fn.create (dim + 1);
2812   unsigned i;
2813
2814   gcc_assert (dim > 0);
2815   fn.quick_push (cst);
2816   for (i = 1; i < dim; i++)
2817     fn.quick_push (integer_zero_node);
2818   fn.quick_push (coef);
2819   return fn;
2820 }
2821
2822 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2823    *OVERLAPS_B are initialized to the functions that describe the
2824    relation between the elements accessed twice by CHREC_A and
2825    CHREC_B.  For k >= 0, the following property is verified:
2826
2827    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2828
2829 static void
2830 analyze_ziv_subscript (tree chrec_a,
2831                        tree chrec_b,
2832                        conflict_function **overlaps_a,
2833                        conflict_function **overlaps_b,
2834                        tree *last_conflicts)
2835 {
2836   tree type, difference;
2837   dependence_stats.num_ziv++;
2838
2839   if (dump_file && (dump_flags & TDF_DETAILS))
2840     fprintf (dump_file, "(analyze_ziv_subscript \n");
2841
2842   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2843   chrec_a = chrec_convert (type, chrec_a, NULL);
2844   chrec_b = chrec_convert (type, chrec_b, NULL);
2845   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2846
2847   switch (TREE_CODE (difference))
2848     {
2849     case INTEGER_CST:
2850       if (integer_zerop (difference))
2851         {
2852           /* The difference is equal to zero: the accessed index
2853              overlaps for each iteration in the loop.  */
2854           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2855           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2856           *last_conflicts = chrec_dont_know;
2857           dependence_stats.num_ziv_dependent++;
2858         }
2859       else
2860         {
2861           /* The accesses do not overlap.  */
2862           *overlaps_a = conflict_fn_no_dependence ();
2863           *overlaps_b = conflict_fn_no_dependence ();
2864           *last_conflicts = integer_zero_node;
2865           dependence_stats.num_ziv_independent++;
2866         }
2867       break;
2868
2869     default:
2870       /* We're not sure whether the indexes overlap.  For the moment,
2871          conservatively answer "don't know".  */
2872       if (dump_file && (dump_flags & TDF_DETAILS))
2873         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2874
2875       *overlaps_a = conflict_fn_not_known ();
2876       *overlaps_b = conflict_fn_not_known ();
2877       *last_conflicts = chrec_dont_know;
2878       dependence_stats.num_ziv_unimplemented++;
2879       break;
2880     }
2881
2882   if (dump_file && (dump_flags & TDF_DETAILS))
2883     fprintf (dump_file, ")\n");
2884 }
2885
2886 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2887    and only if it fits to the int type.  If this is not the case, or the
2888    bound  on the number of iterations of LOOP could not be derived, returns
2889    chrec_dont_know.  */
2890
2891 static tree
2892 max_stmt_executions_tree (struct loop *loop)
2893 {
2894   widest_int nit;
2895
2896   if (!max_stmt_executions (loop, &nit))
2897     return chrec_dont_know;
2898
2899   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2900     return chrec_dont_know;
2901
2902   return wide_int_to_tree (unsigned_type_node, nit);
2903 }
2904
2905 /* Determine whether the CHREC is always positive/negative.  If the expression
2906    cannot be statically analyzed, return false, otherwise set the answer into
2907    VALUE.  */
2908
2909 static bool
2910 chrec_is_positive (tree chrec, bool *value)
2911 {
2912   bool value0, value1, value2;
2913   tree end_value, nb_iter;
2914
2915   switch (TREE_CODE (chrec))
2916     {
2917     case POLYNOMIAL_CHREC:
2918       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2919           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2920         return false;
2921
2922       /* FIXME -- overflows.  */
2923       if (value0 == value1)
2924         {
2925           *value = value0;
2926           return true;
2927         }
2928
2929       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2930          and the proof consists in showing that the sign never
2931          changes during the execution of the loop, from 0 to
2932          loop->nb_iterations.  */
2933       if (!evolution_function_is_affine_p (chrec))
2934         return false;
2935
2936       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2937       if (chrec_contains_undetermined (nb_iter))
2938         return false;
2939
2940 #if 0
2941       /* TODO -- If the test is after the exit, we may decrease the number of
2942          iterations by one.  */
2943       if (after_exit)
2944         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2945 #endif
2946
2947       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2948
2949       if (!chrec_is_positive (end_value, &value2))
2950         return false;
2951
2952       *value = value0;
2953       return value0 == value1;
2954
2955     case INTEGER_CST:
2956       switch (tree_int_cst_sgn (chrec))
2957         {
2958         case -1:
2959           *value = false;
2960           break;
2961         case 1:
2962           *value = true;
2963           break;
2964         default:
2965           return false;
2966         }
2967       return true;
2968
2969     default:
2970       return false;
2971     }
2972 }
2973
2974
2975 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2976    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
2977    *OVERLAPS_B are initialized to the functions that describe the
2978    relation between the elements accessed twice by CHREC_A and
2979    CHREC_B.  For k >= 0, the following property is verified:
2980
2981    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2982
2983 static void
2984 analyze_siv_subscript_cst_affine (tree chrec_a,
2985                                   tree chrec_b,
2986                                   conflict_function **overlaps_a,
2987                                   conflict_function **overlaps_b,
2988                                   tree *last_conflicts)
2989 {
2990   bool value0, value1, value2;
2991   tree type, difference, tmp;
2992
2993   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2994   chrec_a = chrec_convert (type, chrec_a, NULL);
2995   chrec_b = chrec_convert (type, chrec_b, NULL);
2996   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2997
2998   /* Special case overlap in the first iteration.  */
2999   if (integer_zerop (difference))
3000     {
3001       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3002       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3003       *last_conflicts = integer_one_node;
3004       return;
3005     }
3006
3007   if (!chrec_is_positive (initial_condition (difference), &value0))
3008     {
3009       if (dump_file && (dump_flags & TDF_DETAILS))
3010         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3011
3012       dependence_stats.num_siv_unimplemented++;
3013       *overlaps_a = conflict_fn_not_known ();
3014       *overlaps_b = conflict_fn_not_known ();
3015       *last_conflicts = chrec_dont_know;
3016       return;
3017     }
3018   else
3019     {
3020       if (value0 == false)
3021         {
3022           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3023               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3024             {
3025               if (dump_file && (dump_flags & TDF_DETAILS))
3026                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3027
3028               *overlaps_a = conflict_fn_not_known ();
3029               *overlaps_b = conflict_fn_not_known ();
3030               *last_conflicts = chrec_dont_know;
3031               dependence_stats.num_siv_unimplemented++;
3032               return;
3033             }
3034           else
3035             {
3036               if (value1 == true)
3037                 {
3038                   /* Example:
3039                      chrec_a = 12
3040                      chrec_b = {10, +, 1}
3041                   */
3042
3043                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3044                     {
3045                       HOST_WIDE_INT numiter;
3046                       struct loop *loop = get_chrec_loop (chrec_b);
3047
3048                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3049                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3050                                          fold_build1 (ABS_EXPR, type, difference),
3051                                          CHREC_RIGHT (chrec_b));
3052                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3053                       *last_conflicts = integer_one_node;
3054
3055
3056                       /* Perform weak-zero siv test to see if overlap is
3057                          outside the loop bounds.  */
3058                       numiter = max_stmt_executions_int (loop);
3059
3060                       if (numiter >= 0
3061                           && compare_tree_int (tmp, numiter) > 0)
3062                         {
3063                           free_conflict_function (*overlaps_a);
3064                           free_conflict_function (*overlaps_b);
3065                           *overlaps_a = conflict_fn_no_dependence ();
3066                           *overlaps_b = conflict_fn_no_dependence ();
3067                           *last_conflicts = integer_zero_node;
3068                           dependence_stats.num_siv_independent++;
3069                           return;
3070                         }
3071                       dependence_stats.num_siv_dependent++;
3072                       return;
3073                     }
3074
3075                   /* When the step does not divide the difference, there are
3076                      no overlaps.  */
3077                   else
3078                     {
3079                       *overlaps_a = conflict_fn_no_dependence ();
3080                       *overlaps_b = conflict_fn_no_dependence ();
3081                       *last_conflicts = integer_zero_node;
3082                       dependence_stats.num_siv_independent++;
3083                       return;
3084                     }
3085                 }
3086
3087               else
3088                 {
3089                   /* Example:
3090                      chrec_a = 12
3091                      chrec_b = {10, +, -1}
3092
3093                      In this case, chrec_a will not overlap with chrec_b.  */
3094                   *overlaps_a = conflict_fn_no_dependence ();
3095                   *overlaps_b = conflict_fn_no_dependence ();
3096                   *last_conflicts = integer_zero_node;
3097                   dependence_stats.num_siv_independent++;
3098                   return;
3099                 }
3100             }
3101         }
3102       else
3103         {
3104           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3105               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3106             {
3107               if (dump_file && (dump_flags & TDF_DETAILS))
3108                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3109
3110               *overlaps_a = conflict_fn_not_known ();
3111               *overlaps_b = conflict_fn_not_known ();
3112               *last_conflicts = chrec_dont_know;
3113               dependence_stats.num_siv_unimplemented++;
3114               return;
3115             }
3116           else
3117             {
3118               if (value2 == false)
3119                 {
3120                   /* Example:
3121                      chrec_a = 3
3122                      chrec_b = {10, +, -1}
3123                   */
3124                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3125                     {
3126                       HOST_WIDE_INT numiter;
3127                       struct loop *loop = get_chrec_loop (chrec_b);
3128
3129                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3130                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3131                                          CHREC_RIGHT (chrec_b));
3132                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3133                       *last_conflicts = integer_one_node;
3134
3135                       /* Perform weak-zero siv test to see if overlap is
3136                          outside the loop bounds.  */
3137                       numiter = max_stmt_executions_int (loop);
3138
3139                       if (numiter >= 0
3140                           && compare_tree_int (tmp, numiter) > 0)
3141                         {
3142                           free_conflict_function (*overlaps_a);
3143                           free_conflict_function (*overlaps_b);
3144                           *overlaps_a = conflict_fn_no_dependence ();
3145                           *overlaps_b = conflict_fn_no_dependence ();
3146                           *last_conflicts = integer_zero_node;
3147                           dependence_stats.num_siv_independent++;
3148                           return;
3149                         }
3150                       dependence_stats.num_siv_dependent++;
3151                       return;
3152                     }
3153
3154                   /* When the step does not divide the difference, there
3155                      are no overlaps.  */
3156                   else
3157                     {
3158                       *overlaps_a = conflict_fn_no_dependence ();
3159                       *overlaps_b = conflict_fn_no_dependence ();
3160                       *last_conflicts = integer_zero_node;
3161                       dependence_stats.num_siv_independent++;
3162                       return;
3163                     }
3164                 }
3165               else
3166                 {
3167                   /* Example:
3168                      chrec_a = 3
3169                      chrec_b = {4, +, 1}
3170
3171                      In this case, chrec_a will not overlap with chrec_b.  */
3172                   *overlaps_a = conflict_fn_no_dependence ();
3173                   *overlaps_b = conflict_fn_no_dependence ();
3174                   *last_conflicts = integer_zero_node;
3175                   dependence_stats.num_siv_independent++;
3176                   return;
3177                 }
3178             }
3179         }
3180     }
3181 }
3182
3183 /* Helper recursive function for initializing the matrix A.  Returns
3184    the initial value of CHREC.  */
3185
3186 static tree
3187 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3188 {
3189   gcc_assert (chrec);
3190
3191   switch (TREE_CODE (chrec))
3192     {
3193     case POLYNOMIAL_CHREC:
3194       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3195         return chrec_dont_know;
3196       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3197       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3198
3199     case PLUS_EXPR:
3200     case MULT_EXPR:
3201     case MINUS_EXPR:
3202       {
3203         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3204         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3205
3206         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3207       }
3208
3209     CASE_CONVERT:
3210       {
3211         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3212         return chrec_convert (chrec_type (chrec), op, NULL);
3213       }
3214
3215     case BIT_NOT_EXPR:
3216       {
3217         /* Handle ~X as -1 - X.  */
3218         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3219         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3220                               build_int_cst (TREE_TYPE (chrec), -1), op);
3221       }
3222
3223     case INTEGER_CST:
3224       return chrec;
3225
3226     default:
3227       gcc_unreachable ();
3228       return NULL_TREE;
3229     }
3230 }
3231
3232 #define FLOOR_DIV(x,y) ((x) / (y))
3233
3234 /* Solves the special case of the Diophantine equation:
3235    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3236
3237    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3238    number of iterations that loops X and Y run.  The overlaps will be
3239    constructed as evolutions in dimension DIM.  */
3240
3241 static void
3242 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3243                                          HOST_WIDE_INT step_a,
3244                                          HOST_WIDE_INT step_b,
3245                                          affine_fn *overlaps_a,
3246                                          affine_fn *overlaps_b,
3247                                          tree *last_conflicts, int dim)
3248 {
3249   if (((step_a > 0 && step_b > 0)
3250        || (step_a < 0 && step_b < 0)))
3251     {
3252       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3253       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3254
3255       gcd_steps_a_b = gcd (step_a, step_b);
3256       step_overlaps_a = step_b / gcd_steps_a_b;
3257       step_overlaps_b = step_a / gcd_steps_a_b;
3258
3259       if (niter > 0)
3260         {
3261           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3262           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3263           last_conflict = tau2;
3264           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3265         }
3266       else
3267         *last_conflicts = chrec_dont_know;
3268
3269       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3270                                       build_int_cst (NULL_TREE,
3271                                                      step_overlaps_a));
3272       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3273                                       build_int_cst (NULL_TREE,
3274                                                      step_overlaps_b));
3275     }
3276
3277   else
3278     {
3279       *overlaps_a = affine_fn_cst (integer_zero_node);
3280       *overlaps_b = affine_fn_cst (integer_zero_node);
3281       *last_conflicts = integer_zero_node;
3282     }
3283 }
3284
3285 /* Solves the special case of a Diophantine equation where CHREC_A is
3286    an affine bivariate function, and CHREC_B is an affine univariate
3287    function.  For example,
3288
3289    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3290
3291    has the following overlapping functions:
3292
3293    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3294    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3295    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3296
3297    FORNOW: This is a specialized implementation for a case occurring in
3298    a common benchmark.  Implement the general algorithm.  */
3299
3300 static void
3301 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3302                                       conflict_function **overlaps_a,
3303                                       conflict_function **overlaps_b,
3304                                       tree *last_conflicts)
3305 {
3306   bool xz_p, yz_p, xyz_p;
3307   HOST_WIDE_INT step_x, step_y, step_z;
3308   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3309   affine_fn overlaps_a_xz, overlaps_b_xz;
3310   affine_fn overlaps_a_yz, overlaps_b_yz;
3311   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3312   affine_fn ova1, ova2, ovb;
3313   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3314
3315   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3316   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3317   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3318
3319   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3320   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3321   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3322
3323   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3324     {
3325       if (dump_file && (dump_flags & TDF_DETAILS))
3326         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3327
3328       *overlaps_a = conflict_fn_not_known ();
3329       *overlaps_b = conflict_fn_not_known ();
3330       *last_conflicts = chrec_dont_know;
3331       return;
3332     }
3333
3334   niter = MIN (niter_x, niter_z);
3335   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3336                                            &overlaps_a_xz,
3337                                            &overlaps_b_xz,
3338                                            &last_conflicts_xz, 1);
3339   niter = MIN (niter_y, niter_z);
3340   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3341                                            &overlaps_a_yz,
3342                                            &overlaps_b_yz,
3343                                            &last_conflicts_yz, 2);
3344   niter = MIN (niter_x, niter_z);
3345   niter = MIN (niter_y, niter);
3346   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3347                                            &overlaps_a_xyz,
3348                                            &overlaps_b_xyz,
3349                                            &last_conflicts_xyz, 3);
3350
3351   xz_p = !integer_zerop (last_conflicts_xz);
3352   yz_p = !integer_zerop (last_conflicts_yz);
3353   xyz_p = !integer_zerop (last_conflicts_xyz);
3354
3355   if (xz_p || yz_p || xyz_p)
3356     {
3357       ova1 = affine_fn_cst (integer_zero_node);
3358       ova2 = affine_fn_cst (integer_zero_node);
3359       ovb = affine_fn_cst (integer_zero_node);
3360       if (xz_p)
3361         {
3362           affine_fn t0 = ova1;
3363           affine_fn t2 = ovb;
3364
3365           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3366           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3367           affine_fn_free (t0);
3368           affine_fn_free (t2);
3369           *last_conflicts = last_conflicts_xz;
3370         }
3371       if (yz_p)
3372         {
3373           affine_fn t0 = ova2;
3374           affine_fn t2 = ovb;
3375
3376           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3377           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3378           affine_fn_free (t0);
3379           affine_fn_free (t2);
3380           *last_conflicts = last_conflicts_yz;
3381         }
3382       if (xyz_p)
3383         {
3384           affine_fn t0 = ova1;
3385           affine_fn t2 = ova2;
3386           affine_fn t4 = ovb;
3387
3388           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3389           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3390           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3391           affine_fn_free (t0);
3392           affine_fn_free (t2);
3393           affine_fn_free (t4);
3394           *last_conflicts = last_conflicts_xyz;
3395         }
3396       *overlaps_a = conflict_fn (2, ova1, ova2);
3397       *overlaps_b = conflict_fn (1, ovb);
3398     }
3399   else
3400     {
3401       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3402       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3403       *last_conflicts = integer_zero_node;
3404     }
3405
3406   affine_fn_free (overlaps_a_xz);
3407   affine_fn_free (overlaps_b_xz);
3408   affine_fn_free (overlaps_a_yz);
3409   affine_fn_free (overlaps_b_yz);
3410   affine_fn_free (overlaps_a_xyz);
3411   affine_fn_free (overlaps_b_xyz);
3412 }
3413
3414 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3415
3416 static void
3417 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3418                     int size)
3419 {
3420   memcpy (vec2, vec1, size * sizeof (*vec1));
3421 }
3422
3423 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3424
3425 static void
3426 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3427                     int m, int n)
3428 {
3429   int i;
3430
3431   for (i = 0; i < m; i++)
3432     lambda_vector_copy (mat1[i], mat2[i], n);
3433 }
3434
3435 /* Store the N x N identity matrix in MAT.  */
3436
3437 static void
3438 lambda_matrix_id (lambda_matrix mat, int size)
3439 {
3440   int i, j;
3441
3442   for (i = 0; i < size; i++)
3443     for (j = 0; j < size; j++)
3444       mat[i][j] = (i == j) ? 1 : 0;
3445 }
3446
3447 /* Return the index of the first nonzero element of vector VEC1 between
3448    START and N.  We must have START <= N.
3449    Returns N if VEC1 is the zero vector.  */
3450
3451 static int
3452 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3453 {
3454   int j = start;
3455   while (j < n && vec1[j] == 0)
3456     j++;
3457   return j;
3458 }
3459
3460 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3461    R2 = R2 + CONST1 * R1.  */
3462
3463 static void
3464 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3465                        lambda_int const1)
3466 {
3467   int i;
3468
3469   if (const1 == 0)
3470     return;
3471
3472   for (i = 0; i < n; i++)
3473     mat[r2][i] += const1 * mat[r1][i];
3474 }
3475
3476 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3477    and store the result in VEC2.  */
3478
3479 static void
3480 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3481                           int size, lambda_int const1)
3482 {
3483   int i;
3484
3485   if (const1 == 0)
3486     lambda_vector_clear (vec2, size);
3487   else
3488     for (i = 0; i < size; i++)
3489       vec2[i] = const1 * vec1[i];
3490 }
3491
3492 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3493
3494 static void
3495 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3496                       int size)
3497 {
3498   lambda_vector_mult_const (vec1, vec2, size, -1);
3499 }
3500
3501 /* Negate row R1 of matrix MAT which has N columns.  */
3502
3503 static void
3504 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3505 {
3506   lambda_vector_negate (mat[r1], mat[r1], n);
3507 }
3508
3509 /* Return true if two vectors are equal.  */
3510
3511 static bool
3512 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3513 {
3514   int i;
3515   for (i = 0; i < size; i++)
3516     if (vec1[i] != vec2[i])
3517       return false;
3518   return true;
3519 }
3520
3521 /* Given an M x N integer matrix A, this function determines an M x
3522    M unimodular matrix U, and an M x N echelon matrix S such that
3523    "U.A = S".  This decomposition is also known as "right Hermite".
3524
3525    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3526    Restructuring Compilers" Utpal Banerjee.  */
3527
3528 static void
3529 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3530                              lambda_matrix S, lambda_matrix U)
3531 {
3532   int i, j, i0 = 0;
3533
3534   lambda_matrix_copy (A, S, m, n);
3535   lambda_matrix_id (U, m);
3536
3537   for (j = 0; j < n; j++)
3538     {
3539       if (lambda_vector_first_nz (S[j], m, i0) < m)
3540         {
3541           ++i0;
3542           for (i = m - 1; i >= i0; i--)
3543             {
3544               while (S[i][j] != 0)
3545                 {
3546                   lambda_int sigma, factor, a, b;
3547
3548                   a = S[i-1][j];
3549                   b = S[i][j];
3550                   sigma = (a * b < 0) ? -1: 1;
3551                   a = abs_hwi (a);
3552                   b = abs_hwi (b);
3553                   factor = sigma * (a / b);
3554
3555                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3556                   std::swap (S[i], S[i-1]);
3557
3558                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3559                   std::swap (U[i], U[i-1]);
3560                 }
3561             }
3562         }
3563     }
3564 }
3565
3566 /* Determines the overlapping elements due to accesses CHREC_A and
3567    CHREC_B, that are affine functions.  This function cannot handle
3568    symbolic evolution functions, ie. when initial conditions are
3569    parameters, because it uses lambda matrices of integers.  */
3570
3571 static void
3572 analyze_subscript_affine_affine (tree chrec_a,
3573                                  tree chrec_b,
3574                                  conflict_function **overlaps_a,
3575                                  conflict_function **overlaps_b,
3576                                  tree *last_conflicts)
3577 {
3578   unsigned nb_vars_a, nb_vars_b, dim;
3579   HOST_WIDE_INT gamma, gcd_alpha_beta;
3580   lambda_matrix A, U, S;
3581   struct obstack scratch_obstack;
3582
3583   if (eq_evolutions_p (chrec_a, chrec_b))
3584     {
3585       /* The accessed index overlaps for each iteration in the
3586          loop.  */
3587       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3588       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3589       *last_conflicts = chrec_dont_know;
3590       return;
3591     }
3592   if (dump_file && (dump_flags & TDF_DETAILS))
3593     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3594
3595   /* For determining the initial intersection, we have to solve a
3596      Diophantine equation.  This is the most time consuming part.
3597
3598      For answering to the question: "Is there a dependence?" we have
3599      to prove that there exists a solution to the Diophantine
3600      equation, and that the solution is in the iteration domain,
3601      i.e. the solution is positive or zero, and that the solution
3602      happens before the upper bound loop.nb_iterations.  Otherwise
3603      there is no dependence.  This function outputs a description of
3604      the iterations that hold the intersections.  */
3605
3606   nb_vars_a = nb_vars_in_chrec (chrec_a);
3607   nb_vars_b = nb_vars_in_chrec (chrec_b);
3608
3609   gcc_obstack_init (&scratch_obstack);
3610
3611   dim = nb_vars_a + nb_vars_b;
3612   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3613   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3614   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3615
3616   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
3617   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
3618   if (init_a == chrec_dont_know
3619       || init_b == chrec_dont_know)
3620     {
3621       if (dump_file && (dump_flags & TDF_DETAILS))
3622         fprintf (dump_file, "affine-affine test failed: "
3623                  "representation issue.\n");
3624       *overlaps_a = conflict_fn_not_known ();
3625       *overlaps_b = conflict_fn_not_known ();
3626       *last_conflicts = chrec_dont_know;
3627       goto end_analyze_subs_aa;
3628     }
3629   gamma = int_cst_value (init_b) - int_cst_value (init_a);
3630
3631   /* Don't do all the hard work of solving the Diophantine equation
3632      when we already know the solution: for example,
3633      | {3, +, 1}_1
3634      | {3, +, 4}_2
3635      | gamma = 3 - 3 = 0.
3636      Then the first overlap occurs during the first iterations:
3637      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3638   */
3639   if (gamma == 0)
3640     {
3641       if (nb_vars_a == 1 && nb_vars_b == 1)
3642         {
3643           HOST_WIDE_INT step_a, step_b;
3644           HOST_WIDE_INT niter, niter_a, niter_b;
3645           affine_fn ova, ovb;
3646
3647           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3648           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3649           niter = MIN (niter_a, niter_b);
3650           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3651           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3652
3653           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3654                                                    &ova, &ovb,
3655                                                    last_conflicts, 1);
3656           *overlaps_a = conflict_fn (1, ova);
3657           *overlaps_b = conflict_fn (1, ovb);
3658         }
3659
3660       else if (nb_vars_a == 2 && nb_vars_b == 1)
3661         compute_overlap_steps_for_affine_1_2
3662           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3663
3664       else if (nb_vars_a == 1 && nb_vars_b == 2)
3665         compute_overlap_steps_for_affine_1_2
3666           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3667
3668       else
3669         {
3670           if (dump_file && (dump_flags & TDF_DETAILS))
3671             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3672           *overlaps_a = conflict_fn_not_known ();
3673           *overlaps_b = conflict_fn_not_known ();
3674           *last_conflicts = chrec_dont_know;
3675         }
3676       goto end_analyze_subs_aa;
3677     }
3678
3679   /* U.A = S */
3680   lambda_matrix_right_hermite (A, dim, 1, S, U);
3681
3682   if (S[0][0] < 0)
3683     {
3684       S[0][0] *= -1;
3685       lambda_matrix_row_negate (U, dim, 0);
3686     }
3687   gcd_alpha_beta = S[0][0];
3688
3689   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3690      but that is a quite strange case.  Instead of ICEing, answer
3691      don't know.  */
3692   if (gcd_alpha_beta == 0)
3693     {
3694       *overlaps_a = conflict_fn_not_known ();
3695       *overlaps_b = conflict_fn_not_known ();
3696       *last_conflicts = chrec_dont_know;
3697       goto end_analyze_subs_aa;
3698     }
3699
3700   /* The classic "gcd-test".  */
3701   if (!int_divides_p (gcd_alpha_beta, gamma))
3702     {
3703       /* The "gcd-test" has determined that there is no integer
3704          solution, i.e. there is no dependence.  */
3705       *overlaps_a = conflict_fn_no_dependence ();
3706       *overlaps_b = conflict_fn_no_dependence ();
3707       *last_conflicts = integer_zero_node;
3708     }
3709
3710   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3711   else if (nb_vars_a == 1 && nb_vars_b == 1)
3712     {
3713       /* Both functions should have the same evolution sign.  */
3714       if (((A[0][0] > 0 && -A[1][0] > 0)
3715            || (A[0][0] < 0 && -A[1][0] < 0)))
3716         {
3717           /* The solutions are given by:
3718              |
3719              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3720              |                           [u21 u22]    [y0]
3721
3722              For a given integer t.  Using the following variables,
3723
3724              | i0 = u11 * gamma / gcd_alpha_beta
3725              | j0 = u12 * gamma / gcd_alpha_beta
3726              | i1 = u21
3727              | j1 = u22
3728
3729              the solutions are:
3730
3731              | x0 = i0 + i1 * t,
3732              | y0 = j0 + j1 * t.  */
3733           HOST_WIDE_INT i0, j0, i1, j1;
3734
3735           i0 = U[0][0] * gamma / gcd_alpha_beta;
3736           j0 = U[0][1] * gamma / gcd_alpha_beta;
3737           i1 = U[1][0];
3738           j1 = U[1][1];
3739
3740           if ((i1 == 0 && i0 < 0)
3741               || (j1 == 0 && j0 < 0))
3742             {
3743               /* There is no solution.
3744                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3745                  falls in here, but for the moment we don't look at the
3746                  upper bound of the iteration domain.  */
3747               *overlaps_a = conflict_fn_no_dependence ();
3748               *overlaps_b = conflict_fn_no_dependence ();
3749               *last_conflicts = integer_zero_node;
3750               goto end_analyze_subs_aa;
3751             }
3752
3753           if (i1 > 0 && j1 > 0)
3754             {
3755               HOST_WIDE_INT niter_a
3756                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3757               HOST_WIDE_INT niter_b
3758                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3759               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3760
3761               /* (X0, Y0) is a solution of the Diophantine equation:
3762                  "chrec_a (X0) = chrec_b (Y0)".  */
3763               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3764                                         CEIL (-j0, j1));
3765               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3766               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3767
3768               /* (X1, Y1) is the smallest positive solution of the eq
3769                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3770                  first conflict occurs.  */
3771               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3772               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3773               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3774
3775               if (niter > 0)
3776                 {
3777                   /* If the overlap occurs outside of the bounds of the
3778                      loop, there is no dependence.  */
3779                   if (x1 >= niter_a || y1 >= niter_b)
3780                     {
3781                       *overlaps_a = conflict_fn_no_dependence ();
3782                       *overlaps_b = conflict_fn_no_dependence ();
3783                       *last_conflicts = integer_zero_node;
3784                       goto end_analyze_subs_aa;
3785                     }
3786
3787                   /* max stmt executions can get quite large, avoid
3788                      overflows by using wide ints here.  */
3789                   widest_int tau2
3790                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
3791                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
3792                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
3793                   if (wi::min_precision (last_conflict, SIGNED)
3794                       <= TYPE_PRECISION (integer_type_node))
3795                     *last_conflicts
3796                        = build_int_cst (integer_type_node,
3797                                         last_conflict.to_shwi ());
3798                   else
3799                     *last_conflicts = chrec_dont_know;
3800                 }
3801               else
3802                 *last_conflicts = chrec_dont_know;
3803
3804               *overlaps_a
3805                 = conflict_fn (1,
3806                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3807                                                  1,
3808                                                  build_int_cst (NULL_TREE, i1)));
3809               *overlaps_b
3810                 = conflict_fn (1,
3811                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3812                                                  1,
3813                                                  build_int_cst (NULL_TREE, j1)));
3814             }
3815           else
3816             {
3817               /* FIXME: For the moment, the upper bound of the
3818                  iteration domain for i and j is not checked.  */
3819               if (dump_file && (dump_flags & TDF_DETAILS))
3820                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3821               *overlaps_a = conflict_fn_not_known ();
3822               *overlaps_b = conflict_fn_not_known ();
3823               *last_conflicts = chrec_dont_know;
3824             }
3825         }
3826       else
3827         {
3828           if (dump_file && (dump_flags & TDF_DETAILS))
3829             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3830           *overlaps_a = conflict_fn_not_known ();
3831           *overlaps_b = conflict_fn_not_known ();
3832           *last_conflicts = chrec_dont_know;
3833         }
3834     }
3835   else
3836     {
3837       if (dump_file && (dump_flags & TDF_DETAILS))
3838         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3839       *overlaps_a = conflict_fn_not_known ();
3840       *overlaps_b = conflict_fn_not_known ();
3841       *last_conflicts = chrec_dont_know;
3842     }
3843
3844 end_analyze_subs_aa:
3845   obstack_free (&scratch_obstack, NULL);
3846   if (dump_file && (dump_flags & TDF_DETAILS))
3847     {
3848       fprintf (dump_file, "  (overlaps_a = ");
3849       dump_conflict_function (dump_file, *overlaps_a);
3850       fprintf (dump_file, ")\n  (overlaps_b = ");
3851       dump_conflict_function (dump_file, *overlaps_b);
3852       fprintf (dump_file, "))\n");
3853     }
3854 }
3855
3856 /* Returns true when analyze_subscript_affine_affine can be used for
3857    determining the dependence relation between chrec_a and chrec_b,
3858    that contain symbols.  This function modifies chrec_a and chrec_b
3859    such that the analysis result is the same, and such that they don't
3860    contain symbols, and then can safely be passed to the analyzer.
3861
3862    Example: The analysis of the following tuples of evolutions produce
3863    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3864    vs. {0, +, 1}_1
3865
3866    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3867    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3868 */
3869
3870 static bool
3871 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3872 {
3873   tree diff, type, left_a, left_b, right_b;
3874
3875   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3876       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3877     /* FIXME: For the moment not handled.  Might be refined later.  */
3878     return false;
3879
3880   type = chrec_type (*chrec_a);
3881   left_a = CHREC_LEFT (*chrec_a);
3882   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3883   diff = chrec_fold_minus (type, left_a, left_b);
3884
3885   if (!evolution_function_is_constant_p (diff))
3886     return false;
3887
3888   if (dump_file && (dump_flags & TDF_DETAILS))
3889     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3890
3891   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3892                                      diff, CHREC_RIGHT (*chrec_a));
3893   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3894   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3895                                      build_int_cst (type, 0),
3896                                      right_b);
3897   return true;
3898 }
3899
3900 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3901    *OVERLAPS_B are initialized to the functions that describe the
3902    relation between the elements accessed twice by CHREC_A and
3903    CHREC_B.  For k >= 0, the following property is verified:
3904
3905    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3906
3907 static void
3908 analyze_siv_subscript (tree chrec_a,
3909                        tree chrec_b,
3910                        conflict_function **overlaps_a,
3911                        conflict_function **overlaps_b,
3912                        tree *last_conflicts,
3913                        int loop_nest_num)
3914 {
3915   dependence_stats.num_siv++;
3916
3917   if (dump_file && (dump_flags & TDF_DETAILS))
3918     fprintf (dump_file, "(analyze_siv_subscript \n");
3919
3920   if (evolution_function_is_constant_p (chrec_a)
3921       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3922     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3923                                       overlaps_a, overlaps_b, last_conflicts);
3924
3925   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3926            && evolution_function_is_constant_p (chrec_b))
3927     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3928                                       overlaps_b, overlaps_a, last_conflicts);
3929
3930   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3931            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3932     {
3933       if (!chrec_contains_symbols (chrec_a)
3934           && !chrec_contains_symbols (chrec_b))
3935         {
3936           analyze_subscript_affine_affine (chrec_a, chrec_b,
3937                                            overlaps_a, overlaps_b,
3938                                            last_conflicts);
3939
3940           if (CF_NOT_KNOWN_P (*overlaps_a)
3941               || CF_NOT_KNOWN_P (*overlaps_b))
3942             dependence_stats.num_siv_unimplemented++;
3943           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3944                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3945             dependence_stats.num_siv_independent++;
3946           else
3947             dependence_stats.num_siv_dependent++;
3948         }
3949       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3950                                                         &chrec_b))
3951         {
3952           analyze_subscript_affine_affine (chrec_a, chrec_b,
3953                                            overlaps_a, overlaps_b,
3954                                            last_conflicts);
3955
3956           if (CF_NOT_KNOWN_P (*overlaps_a)
3957               || CF_NOT_KNOWN_P (*overlaps_b))
3958             dependence_stats.num_siv_unimplemented++;
3959           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3960                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3961             dependence_stats.num_siv_independent++;
3962           else
3963             dependence_stats.num_siv_dependent++;
3964         }
3965       else
3966         goto siv_subscript_dontknow;
3967     }
3968
3969   else
3970     {
3971     siv_subscript_dontknow:;
3972       if (dump_file && (dump_flags & TDF_DETAILS))
3973         fprintf (dump_file, "  siv test failed: unimplemented");
3974       *overlaps_a = conflict_fn_not_known ();
3975       *overlaps_b = conflict_fn_not_known ();
3976       *last_conflicts = chrec_dont_know;
3977       dependence_stats.num_siv_unimplemented++;
3978     }
3979
3980   if (dump_file && (dump_flags & TDF_DETAILS))
3981     fprintf (dump_file, ")\n");
3982 }
3983
3984 /* Returns false if we can prove that the greatest common divisor of the steps
3985    of CHREC does not divide CST, false otherwise.  */
3986
3987 static bool
3988 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3989 {
3990   HOST_WIDE_INT cd = 0, val;
3991   tree step;
3992
3993   if (!tree_fits_shwi_p (cst))
3994     return true;
3995   val = tree_to_shwi (cst);
3996
3997   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3998     {
3999       step = CHREC_RIGHT (chrec);
4000       if (!tree_fits_shwi_p (step))
4001         return true;
4002       cd = gcd (cd, tree_to_shwi (step));
4003       chrec = CHREC_LEFT (chrec);
4004     }
4005
4006   return val % cd == 0;
4007 }
4008
4009 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4010    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4011    functions that describe the relation between the elements accessed
4012    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4013    is verified:
4014
4015    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4016
4017 static void
4018 analyze_miv_subscript (tree chrec_a,
4019                        tree chrec_b,
4020                        conflict_function **overlaps_a,
4021                        conflict_function **overlaps_b,
4022                        tree *last_conflicts,
4023                        struct loop *loop_nest)
4024 {
4025   tree type, difference;
4026
4027   dependence_stats.num_miv++;
4028   if (dump_file && (dump_flags & TDF_DETAILS))
4029     fprintf (dump_file, "(analyze_miv_subscript \n");
4030
4031   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4032   chrec_a = chrec_convert (type, chrec_a, NULL);
4033   chrec_b = chrec_convert (type, chrec_b, NULL);
4034   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4035
4036   if (eq_evolutions_p (chrec_a, chrec_b))
4037     {
4038       /* Access functions are the same: all the elements are accessed
4039          in the same order.  */
4040       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4041       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4042       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4043       dependence_stats.num_miv_dependent++;
4044     }
4045
4046   else if (evolution_function_is_constant_p (difference)
4047            && evolution_function_is_affine_multivariate_p (chrec_a,
4048                                                            loop_nest->num)
4049            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4050     {
4051       /* testsuite/.../ssa-chrec-33.c
4052          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4053
4054          The difference is 1, and all the evolution steps are multiples
4055          of 2, consequently there are no overlapping elements.  */
4056       *overlaps_a = conflict_fn_no_dependence ();
4057       *overlaps_b = conflict_fn_no_dependence ();
4058       *last_conflicts = integer_zero_node;
4059       dependence_stats.num_miv_independent++;
4060     }
4061
4062   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4063            && !chrec_contains_symbols (chrec_a)
4064            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4065            && !chrec_contains_symbols (chrec_b))
4066     {
4067       /* testsuite/.../ssa-chrec-35.c
4068          {0, +, 1}_2  vs.  {0, +, 1}_3
4069          the overlapping elements are respectively located at iterations:
4070          {0, +, 1}_x and {0, +, 1}_x,
4071          in other words, we have the equality:
4072          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4073
4074          Other examples:
4075          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4076          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4077
4078          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4079          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4080       */
4081       analyze_subscript_affine_affine (chrec_a, chrec_b,
4082                                        overlaps_a, overlaps_b, last_conflicts);
4083
4084       if (CF_NOT_KNOWN_P (*overlaps_a)
4085           || CF_NOT_KNOWN_P (*overlaps_b))
4086         dependence_stats.num_miv_unimplemented++;
4087       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4088                || CF_NO_DEPENDENCE_P (*overlaps_b))
4089         dependence_stats.num_miv_independent++;
4090       else
4091         dependence_stats.num_miv_dependent++;
4092     }
4093
4094   else
4095     {
4096       /* When the analysis is too difficult, answer "don't know".  */
4097       if (dump_file && (dump_flags & TDF_DETAILS))
4098         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4099
4100       *overlaps_a = conflict_fn_not_known ();
4101       *overlaps_b = conflict_fn_not_known ();
4102       *last_conflicts = chrec_dont_know;
4103       dependence_stats.num_miv_unimplemented++;
4104     }
4105
4106   if (dump_file && (dump_flags & TDF_DETAILS))
4107     fprintf (dump_file, ")\n");
4108 }
4109
4110 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4111    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4112    OVERLAP_ITERATIONS_B are initialized with two functions that
4113    describe the iterations that contain conflicting elements.
4114
4115    Remark: For an integer k >= 0, the following equality is true:
4116
4117    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4118 */
4119
4120 static void
4121 analyze_overlapping_iterations (tree chrec_a,
4122                                 tree chrec_b,
4123                                 conflict_function **overlap_iterations_a,
4124                                 conflict_function **overlap_iterations_b,
4125                                 tree *last_conflicts, struct loop *loop_nest)
4126 {
4127   unsigned int lnn = loop_nest->num;
4128
4129   dependence_stats.num_subscript_tests++;
4130
4131   if (dump_file && (dump_flags & TDF_DETAILS))
4132     {
4133       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4134       fprintf (dump_file, "  (chrec_a = ");
4135       print_generic_expr (dump_file, chrec_a);
4136       fprintf (dump_file, ")\n  (chrec_b = ");
4137       print_generic_expr (dump_file, chrec_b);
4138       fprintf (dump_file, ")\n");
4139     }
4140
4141   if (chrec_a == NULL_TREE
4142       || chrec_b == NULL_TREE
4143       || chrec_contains_undetermined (chrec_a)
4144       || chrec_contains_undetermined (chrec_b))
4145     {
4146       dependence_stats.num_subscript_undetermined++;
4147
4148       *overlap_iterations_a = conflict_fn_not_known ();
4149       *overlap_iterations_b = conflict_fn_not_known ();
4150     }
4151
4152   /* If they are the same chrec, and are affine, they overlap
4153      on every iteration.  */
4154   else if (eq_evolutions_p (chrec_a, chrec_b)
4155            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4156                || operand_equal_p (chrec_a, chrec_b, 0)))
4157     {
4158       dependence_stats.num_same_subscript_function++;
4159       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4160       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4161       *last_conflicts = chrec_dont_know;
4162     }
4163
4164   /* If they aren't the same, and aren't affine, we can't do anything
4165      yet.  */
4166   else if ((chrec_contains_symbols (chrec_a)
4167             || chrec_contains_symbols (chrec_b))
4168            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4169                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4170     {
4171       dependence_stats.num_subscript_undetermined++;
4172       *overlap_iterations_a = conflict_fn_not_known ();
4173       *overlap_iterations_b = conflict_fn_not_known ();
4174     }
4175
4176   else if (ziv_subscript_p (chrec_a, chrec_b))
4177     analyze_ziv_subscript (chrec_a, chrec_b,
4178                            overlap_iterations_a, overlap_iterations_b,
4179                            last_conflicts);
4180
4181   else if (siv_subscript_p (chrec_a, chrec_b))
4182     analyze_siv_subscript (chrec_a, chrec_b,
4183                            overlap_iterations_a, overlap_iterations_b,
4184                            last_conflicts, lnn);
4185
4186   else
4187     analyze_miv_subscript (chrec_a, chrec_b,
4188                            overlap_iterations_a, overlap_iterations_b,
4189                            last_conflicts, loop_nest);
4190
4191   if (dump_file && (dump_flags & TDF_DETAILS))
4192     {
4193       fprintf (dump_file, "  (overlap_iterations_a = ");
4194       dump_conflict_function (dump_file, *overlap_iterations_a);
4195       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4196       dump_conflict_function (dump_file, *overlap_iterations_b);
4197       fprintf (dump_file, "))\n");
4198     }
4199 }
4200
4201 /* Helper function for uniquely inserting distance vectors.  */
4202
4203 static void
4204 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4205 {
4206   unsigned i;
4207   lambda_vector v;
4208
4209   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4210     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4211       return;
4212
4213   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4214 }
4215
4216 /* Helper function for uniquely inserting direction vectors.  */
4217
4218 static void
4219 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4220 {
4221   unsigned i;
4222   lambda_vector v;
4223
4224   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4225     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4226       return;
4227
4228   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4229 }
4230
4231 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4232    haven't yet determined a distance for this outer loop, push a new
4233    distance vector composed of the previous distance, and a distance
4234    of 1 for this outer loop.  Example:
4235
4236    | loop_1
4237    |   loop_2
4238    |     A[10]
4239    |   endloop_2
4240    | endloop_1
4241
4242    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4243    save (0, 1), then we have to save (1, 0).  */
4244
4245 static void
4246 add_outer_distances (struct data_dependence_relation *ddr,
4247                      lambda_vector dist_v, int index)
4248 {
4249   /* For each outer loop where init_v is not set, the accesses are
4250      in dependence of distance 1 in the loop.  */
4251   while (--index >= 0)
4252     {
4253       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4254       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4255       save_v[index] = 1;
4256       save_dist_v (ddr, save_v);
4257     }
4258 }
4259
4260 /* Return false when fail to represent the data dependence as a
4261    distance vector.  A_INDEX is the index of the first reference
4262    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4263    second reference.  INIT_B is set to true when a component has been
4264    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4265    the index in DIST_V that carries the dependence.  */
4266
4267 static bool
4268 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4269                              unsigned int a_index, unsigned int b_index,
4270                              lambda_vector dist_v, bool *init_b,
4271                              int *index_carry)
4272 {
4273   unsigned i;
4274   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4275
4276   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4277     {
4278       tree access_fn_a, access_fn_b;
4279       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4280
4281       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4282         {
4283           non_affine_dependence_relation (ddr);
4284           return false;
4285         }
4286
4287       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4288       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4289
4290       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4291           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4292         {
4293           HOST_WIDE_INT dist;
4294           int index;
4295           int var_a = CHREC_VARIABLE (access_fn_a);
4296           int var_b = CHREC_VARIABLE (access_fn_b);
4297
4298           if (var_a != var_b
4299               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4300             {
4301               non_affine_dependence_relation (ddr);
4302               return false;
4303             }
4304
4305           dist = int_cst_value (SUB_DISTANCE (subscript));
4306           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4307           *index_carry = MIN (index, *index_carry);
4308
4309           /* This is the subscript coupling test.  If we have already
4310              recorded a distance for this loop (a distance coming from
4311              another subscript), it should be the same.  For example,
4312              in the following code, there is no dependence:
4313
4314              | loop i = 0, N, 1
4315              |   T[i+1][i] = ...
4316              |   ... = T[i][i]
4317              | endloop
4318           */
4319           if (init_v[index] != 0 && dist_v[index] != dist)
4320             {
4321               finalize_ddr_dependent (ddr, chrec_known);
4322               return false;
4323             }
4324
4325           dist_v[index] = dist;
4326           init_v[index] = 1;
4327           *init_b = true;
4328         }
4329       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4330         {
4331           /* This can be for example an affine vs. constant dependence
4332              (T[i] vs. T[3]) that is not an affine dependence and is
4333              not representable as a distance vector.  */
4334           non_affine_dependence_relation (ddr);
4335           return false;
4336         }
4337     }
4338
4339   return true;
4340 }
4341
4342 /* Return true when the DDR contains only constant access functions.  */
4343
4344 static bool
4345 constant_access_functions (const struct data_dependence_relation *ddr)
4346 {
4347   unsigned i;
4348   subscript *sub;
4349
4350   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4351     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4352         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4353       return false;
4354
4355   return true;
4356 }
4357
4358 /* Helper function for the case where DDR_A and DDR_B are the same
4359    multivariate access function with a constant step.  For an example
4360    see pr34635-1.c.  */
4361
4362 static void
4363 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4364 {
4365   int x_1, x_2;
4366   tree c_1 = CHREC_LEFT (c_2);
4367   tree c_0 = CHREC_LEFT (c_1);
4368   lambda_vector dist_v;
4369   HOST_WIDE_INT v1, v2, cd;
4370
4371   /* Polynomials with more than 2 variables are not handled yet.  When
4372      the evolution steps are parameters, it is not possible to
4373      represent the dependence using classical distance vectors.  */
4374   if (TREE_CODE (c_0) != INTEGER_CST
4375       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4376       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4377     {
4378       DDR_AFFINE_P (ddr) = false;
4379       return;
4380     }
4381
4382   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4383   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4384
4385   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4386   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4387   v1 = int_cst_value (CHREC_RIGHT (c_1));
4388   v2 = int_cst_value (CHREC_RIGHT (c_2));
4389   cd = gcd (v1, v2);
4390   v1 /= cd;
4391   v2 /= cd;
4392
4393   if (v2 < 0)
4394     {
4395       v2 = -v2;
4396       v1 = -v1;
4397     }
4398
4399   dist_v[x_1] = v2;
4400   dist_v[x_2] = -v1;
4401   save_dist_v (ddr, dist_v);
4402
4403   add_outer_distances (ddr, dist_v, x_1);
4404 }
4405
4406 /* Helper function for the case where DDR_A and DDR_B are the same
4407    access functions.  */
4408
4409 static void
4410 add_other_self_distances (struct data_dependence_relation *ddr)
4411 {
4412   lambda_vector dist_v;
4413   unsigned i;
4414   int index_carry = DDR_NB_LOOPS (ddr);
4415   subscript *sub;
4416
4417   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4418     {
4419       tree access_fun = SUB_ACCESS_FN (sub, 0);
4420
4421       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4422         {
4423           if (!evolution_function_is_univariate_p (access_fun))
4424             {
4425               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4426                 {
4427                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4428                   return;
4429                 }
4430
4431               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4432
4433               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4434                 add_multivariate_self_dist (ddr, access_fun);
4435               else
4436                 /* The evolution step is not constant: it varies in
4437                    the outer loop, so this cannot be represented by a
4438                    distance vector.  For example in pr34635.c the
4439                    evolution is {0, +, {0, +, 4}_1}_2.  */
4440                 DDR_AFFINE_P (ddr) = false;
4441
4442               return;
4443             }
4444
4445           index_carry = MIN (index_carry,
4446                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4447                                                  DDR_LOOP_NEST (ddr)));
4448         }
4449     }
4450
4451   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4452   add_outer_distances (ddr, dist_v, index_carry);
4453 }
4454
4455 static void
4456 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4457 {
4458   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4459
4460   dist_v[DDR_INNER_LOOP (ddr)] = 1;
4461   save_dist_v (ddr, dist_v);
4462 }
4463
4464 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4465    is the case for example when access functions are the same and
4466    equal to a constant, as in:
4467
4468    | loop_1
4469    |   A[3] = ...
4470    |   ... = A[3]
4471    | endloop_1
4472
4473    in which case the distance vectors are (0) and (1).  */
4474
4475 static void
4476 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4477 {
4478   unsigned i, j;
4479
4480   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4481     {
4482       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4483       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4484       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4485
4486       for (j = 0; j < ca->n; j++)
4487         if (affine_function_zero_p (ca->fns[j]))
4488           {
4489             insert_innermost_unit_dist_vector (ddr);
4490             return;
4491           }
4492
4493       for (j = 0; j < cb->n; j++)
4494         if (affine_function_zero_p (cb->fns[j]))
4495           {
4496             insert_innermost_unit_dist_vector (ddr);
4497             return;
4498           }
4499     }
4500 }
4501
4502 /* Return true when the DDR contains two data references that have the
4503    same access functions.  */
4504
4505 static inline bool
4506 same_access_functions (const struct data_dependence_relation *ddr)
4507 {
4508   unsigned i;
4509   subscript *sub;
4510
4511   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4512     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4513                           SUB_ACCESS_FN (sub, 1)))
4514       return false;
4515
4516   return true;
4517 }
4518
4519 /* Compute the classic per loop distance vector.  DDR is the data
4520    dependence relation to build a vector from.  Return false when fail
4521    to represent the data dependence as a distance vector.  */
4522
4523 static bool
4524 build_classic_dist_vector (struct data_dependence_relation *ddr,
4525                            struct loop *loop_nest)
4526 {
4527   bool init_b = false;
4528   int index_carry = DDR_NB_LOOPS (ddr);
4529   lambda_vector dist_v;
4530
4531   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4532     return false;
4533
4534   if (same_access_functions (ddr))
4535     {
4536       /* Save the 0 vector.  */
4537       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4538       save_dist_v (ddr, dist_v);
4539
4540       if (constant_access_functions (ddr))
4541         add_distance_for_zero_overlaps (ddr);
4542
4543       if (DDR_NB_LOOPS (ddr) > 1)
4544         add_other_self_distances (ddr);
4545
4546       return true;
4547     }
4548
4549   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4550   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4551     return false;
4552
4553   /* Save the distance vector if we initialized one.  */
4554   if (init_b)
4555     {
4556       /* Verify a basic constraint: classic distance vectors should
4557          always be lexicographically positive.
4558
4559          Data references are collected in the order of execution of
4560          the program, thus for the following loop
4561
4562          | for (i = 1; i < 100; i++)
4563          |   for (j = 1; j < 100; j++)
4564          |     {
4565          |       t = T[j+1][i-1];  // A
4566          |       T[j][i] = t + 2;  // B
4567          |     }
4568
4569          references are collected following the direction of the wind:
4570          A then B.  The data dependence tests are performed also
4571          following this order, such that we're looking at the distance
4572          separating the elements accessed by A from the elements later
4573          accessed by B.  But in this example, the distance returned by
4574          test_dep (A, B) is lexicographically negative (-1, 1), that
4575          means that the access A occurs later than B with respect to
4576          the outer loop, ie. we're actually looking upwind.  In this
4577          case we solve test_dep (B, A) looking downwind to the
4578          lexicographically positive solution, that returns the
4579          distance vector (1, -1).  */
4580       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4581         {
4582           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4583           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4584             return false;
4585           compute_subscript_distance (ddr);
4586           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4587                                             &index_carry))
4588             return false;
4589           save_dist_v (ddr, save_v);
4590           DDR_REVERSED_P (ddr) = true;
4591
4592           /* In this case there is a dependence forward for all the
4593              outer loops:
4594
4595              | for (k = 1; k < 100; k++)
4596              |  for (i = 1; i < 100; i++)
4597              |   for (j = 1; j < 100; j++)
4598              |     {
4599              |       t = T[j+1][i-1];  // A
4600              |       T[j][i] = t + 2;  // B
4601              |     }
4602
4603              the vectors are:
4604              (0,  1, -1)
4605              (1,  1, -1)
4606              (1, -1,  1)
4607           */
4608           if (DDR_NB_LOOPS (ddr) > 1)
4609             {
4610               add_outer_distances (ddr, save_v, index_carry);
4611               add_outer_distances (ddr, dist_v, index_carry);
4612             }
4613         }
4614       else
4615         {
4616           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4617           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4618
4619           if (DDR_NB_LOOPS (ddr) > 1)
4620             {
4621               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4622
4623               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4624                 return false;
4625               compute_subscript_distance (ddr);
4626               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4627                                                 &index_carry))
4628                 return false;
4629
4630               save_dist_v (ddr, save_v);
4631               add_outer_distances (ddr, dist_v, index_carry);
4632               add_outer_distances (ddr, opposite_v, index_carry);
4633             }
4634           else
4635             save_dist_v (ddr, save_v);
4636         }
4637     }
4638   else
4639     {
4640       /* There is a distance of 1 on all the outer loops: Example:
4641          there is a dependence of distance 1 on loop_1 for the array A.
4642
4643          | loop_1
4644          |   A[5] = ...
4645          | endloop
4646       */
4647       add_outer_distances (ddr, dist_v,
4648                            lambda_vector_first_nz (dist_v,
4649                                                    DDR_NB_LOOPS (ddr), 0));
4650     }
4651
4652   if (dump_file && (dump_flags & TDF_DETAILS))
4653     {
4654       unsigned i;
4655
4656       fprintf (dump_file, "(build_classic_dist_vector\n");
4657       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4658         {
4659           fprintf (dump_file, "  dist_vector = (");
4660           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4661                                DDR_NB_LOOPS (ddr));
4662           fprintf (dump_file, "  )\n");
4663         }
4664       fprintf (dump_file, ")\n");
4665     }
4666
4667   return true;
4668 }
4669
4670 /* Return the direction for a given distance.
4671    FIXME: Computing dir this way is suboptimal, since dir can catch
4672    cases that dist is unable to represent.  */
4673
4674 static inline enum data_dependence_direction
4675 dir_from_dist (int dist)
4676 {
4677   if (dist > 0)
4678     return dir_positive;
4679   else if (dist < 0)
4680     return dir_negative;
4681   else
4682     return dir_equal;
4683 }
4684
4685 /* Compute the classic per loop direction vector.  DDR is the data
4686    dependence relation to build a vector from.  */
4687
4688 static void
4689 build_classic_dir_vector (struct data_dependence_relation *ddr)
4690 {
4691   unsigned i, j;
4692   lambda_vector dist_v;
4693
4694   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4695     {
4696       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4697
4698       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4699         dir_v[j] = dir_from_dist (dist_v[j]);
4700
4701       save_dir_v (ddr, dir_v);
4702     }
4703 }
4704
4705 /* Helper function.  Returns true when there is a dependence between the
4706    data references.  A_INDEX is the index of the first reference (0 for
4707    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4708
4709 static bool
4710 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4711                                unsigned int a_index, unsigned int b_index,
4712                                struct loop *loop_nest)
4713 {
4714   unsigned int i;
4715   tree last_conflicts;
4716   struct subscript *subscript;
4717   tree res = NULL_TREE;
4718
4719   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4720     {
4721       conflict_function *overlaps_a, *overlaps_b;
4722
4723       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4724                                       SUB_ACCESS_FN (subscript, b_index),
4725                                       &overlaps_a, &overlaps_b,
4726                                       &last_conflicts, loop_nest);
4727
4728       if (SUB_CONFLICTS_IN_A (subscript))
4729         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4730       if (SUB_CONFLICTS_IN_B (subscript))
4731         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4732
4733       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4734       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4735       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4736
4737       /* If there is any undetermined conflict function we have to
4738          give a conservative answer in case we cannot prove that
4739          no dependence exists when analyzing another subscript.  */
4740       if (CF_NOT_KNOWN_P (overlaps_a)
4741           || CF_NOT_KNOWN_P (overlaps_b))
4742         {
4743           res = chrec_dont_know;
4744           continue;
4745         }
4746
4747       /* When there is a subscript with no dependence we can stop.  */
4748       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4749                || CF_NO_DEPENDENCE_P (overlaps_b))
4750         {
4751           res = chrec_known;
4752           break;
4753         }
4754     }
4755
4756   if (res == NULL_TREE)
4757     return true;
4758
4759   if (res == chrec_known)
4760     dependence_stats.num_dependence_independent++;
4761   else
4762     dependence_stats.num_dependence_undetermined++;
4763   finalize_ddr_dependent (ddr, res);
4764   return false;
4765 }
4766
4767 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4768
4769 static void
4770 subscript_dependence_tester (struct data_dependence_relation *ddr,
4771                              struct loop *loop_nest)
4772 {
4773   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4774     dependence_stats.num_dependence_dependent++;
4775
4776   compute_subscript_distance (ddr);
4777   if (build_classic_dist_vector (ddr, loop_nest))
4778     build_classic_dir_vector (ddr);
4779 }
4780
4781 /* Returns true when all the access functions of A are affine or
4782    constant with respect to LOOP_NEST.  */
4783
4784 static bool
4785 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4786                                            const struct loop *loop_nest)
4787 {
4788   unsigned int i;
4789   vec<tree> fns = DR_ACCESS_FNS (a);
4790   tree t;
4791
4792   FOR_EACH_VEC_ELT (fns, i, t)
4793     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4794         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4795       return false;
4796
4797   return true;
4798 }
4799
4800 /* This computes the affine dependence relation between A and B with
4801    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4802    independence between two accesses, while CHREC_DONT_KNOW is used
4803    for representing the unknown relation.
4804
4805    Note that it is possible to stop the computation of the dependence
4806    relation the first time we detect a CHREC_KNOWN element for a given
4807    subscript.  */
4808
4809 void
4810 compute_affine_dependence (struct data_dependence_relation *ddr,
4811                            struct loop *loop_nest)
4812 {
4813   struct data_reference *dra = DDR_A (ddr);
4814   struct data_reference *drb = DDR_B (ddr);
4815
4816   if (dump_file && (dump_flags & TDF_DETAILS))
4817     {
4818       fprintf (dump_file, "(compute_affine_dependence\n");
4819       fprintf (dump_file, "  stmt_a: ");
4820       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4821       fprintf (dump_file, "  stmt_b: ");
4822       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4823     }
4824
4825   /* Analyze only when the dependence relation is not yet known.  */
4826   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4827     {
4828       dependence_stats.num_dependence_tests++;
4829
4830       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4831           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4832         subscript_dependence_tester (ddr, loop_nest);
4833
4834       /* As a last case, if the dependence cannot be determined, or if
4835          the dependence is considered too difficult to determine, answer
4836          "don't know".  */
4837       else
4838         {
4839           dependence_stats.num_dependence_undetermined++;
4840
4841           if (dump_file && (dump_flags & TDF_DETAILS))
4842             {
4843               fprintf (dump_file, "Data ref a:\n");
4844               dump_data_reference (dump_file, dra);
4845               fprintf (dump_file, "Data ref b:\n");
4846               dump_data_reference (dump_file, drb);
4847               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4848             }
4849           finalize_ddr_dependent (ddr, chrec_dont_know);
4850         }
4851     }
4852
4853   if (dump_file && (dump_flags & TDF_DETAILS))
4854     {
4855       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4856         fprintf (dump_file, ") -> no dependence\n");
4857       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4858         fprintf (dump_file, ") -> dependence analysis failed\n");
4859       else
4860         fprintf (dump_file, ")\n");
4861     }
4862 }
4863
4864 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4865    the data references in DATAREFS, in the LOOP_NEST.  When
4866    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4867    relations.  Return true when successful, i.e. data references number
4868    is small enough to be handled.  */
4869
4870 bool
4871 compute_all_dependences (vec<data_reference_p> datarefs,
4872                          vec<ddr_p> *dependence_relations,
4873                          vec<loop_p> loop_nest,
4874                          bool compute_self_and_rr)
4875 {
4876   struct data_dependence_relation *ddr;
4877   struct data_reference *a, *b;
4878   unsigned int i, j;
4879
4880   if ((int) datarefs.length ()
4881       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4882     {
4883       struct data_dependence_relation *ddr;
4884
4885       /* Insert a single relation into dependence_relations:
4886          chrec_dont_know.  */
4887       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4888       dependence_relations->safe_push (ddr);
4889       return false;
4890     }
4891
4892   FOR_EACH_VEC_ELT (datarefs, i, a)
4893     for (j = i + 1; datarefs.iterate (j, &b); j++)
4894       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4895         {
4896           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4897           dependence_relations->safe_push (ddr);
4898           if (loop_nest.exists ())
4899             compute_affine_dependence (ddr, loop_nest[0]);
4900         }
4901
4902   if (compute_self_and_rr)
4903     FOR_EACH_VEC_ELT (datarefs, i, a)
4904       {
4905         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4906         dependence_relations->safe_push (ddr);
4907         if (loop_nest.exists ())
4908           compute_affine_dependence (ddr, loop_nest[0]);
4909       }
4910
4911   return true;
4912 }
4913
4914 /* Describes a location of a memory reference.  */
4915
4916 struct data_ref_loc
4917 {
4918   /* The memory reference.  */
4919   tree ref;
4920
4921   /* True if the memory reference is read.  */
4922   bool is_read;
4923
4924   /* True if the data reference is conditional within the containing
4925      statement, i.e. if it might not occur even when the statement
4926      is executed and runs to completion.  */
4927   bool is_conditional_in_stmt;
4928 };
4929
4930
4931 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4932    true if STMT clobbers memory, false otherwise.  */
4933
4934 static bool
4935 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4936 {
4937   bool clobbers_memory = false;
4938   data_ref_loc ref;
4939   tree op0, op1;
4940   enum gimple_code stmt_code = gimple_code (stmt);
4941
4942   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4943      As we cannot model data-references to not spelled out
4944      accesses give up if they may occur.  */
4945   if (stmt_code == GIMPLE_CALL
4946       && !(gimple_call_flags (stmt) & ECF_CONST))
4947     {
4948       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4949       if (gimple_call_internal_p (stmt))
4950         switch (gimple_call_internal_fn (stmt))
4951           {
4952           case IFN_GOMP_SIMD_LANE:
4953             {
4954               struct loop *loop = gimple_bb (stmt)->loop_father;
4955               tree uid = gimple_call_arg (stmt, 0);
4956               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4957               if (loop == NULL
4958                   || loop->simduid != SSA_NAME_VAR (uid))
4959                 clobbers_memory = true;
4960               break;
4961             }
4962           case IFN_MASK_LOAD:
4963           case IFN_MASK_STORE:
4964             break;
4965           default:
4966             clobbers_memory = true;
4967             break;
4968           }
4969       else
4970         clobbers_memory = true;
4971     }
4972   else if (stmt_code == GIMPLE_ASM
4973            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4974                || gimple_vuse (stmt)))
4975     clobbers_memory = true;
4976
4977   if (!gimple_vuse (stmt))
4978     return clobbers_memory;
4979
4980   if (stmt_code == GIMPLE_ASSIGN)
4981     {
4982       tree base;
4983       op0 = gimple_assign_lhs (stmt);
4984       op1 = gimple_assign_rhs1 (stmt);
4985
4986       if (DECL_P (op1)
4987           || (REFERENCE_CLASS_P (op1)
4988               && (base = get_base_address (op1))
4989               && TREE_CODE (base) != SSA_NAME
4990               && !is_gimple_min_invariant (base)))
4991         {
4992           ref.ref = op1;
4993           ref.is_read = true;
4994           ref.is_conditional_in_stmt = false;
4995           references->safe_push (ref);
4996         }
4997     }
4998   else if (stmt_code == GIMPLE_CALL)
4999     {
5000       unsigned i, n;
5001       tree ptr, type;
5002       unsigned int align;
5003
5004       ref.is_read = false;
5005       if (gimple_call_internal_p (stmt))
5006         switch (gimple_call_internal_fn (stmt))
5007           {
5008           case IFN_MASK_LOAD:
5009             if (gimple_call_lhs (stmt) == NULL_TREE)
5010               break;
5011             ref.is_read = true;
5012             /* FALLTHRU */
5013           case IFN_MASK_STORE:
5014             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5015             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5016             if (ref.is_read)
5017               type = TREE_TYPE (gimple_call_lhs (stmt));
5018             else
5019               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5020             if (TYPE_ALIGN (type) != align)
5021               type = build_aligned_type (type, align);
5022             ref.is_conditional_in_stmt = true;
5023             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5024                                    ptr);
5025             references->safe_push (ref);
5026             return false;
5027           default:
5028             break;
5029           }
5030
5031       op0 = gimple_call_lhs (stmt);
5032       n = gimple_call_num_args (stmt);
5033       for (i = 0; i < n; i++)
5034         {
5035           op1 = gimple_call_arg (stmt, i);
5036
5037           if (DECL_P (op1)
5038               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5039             {
5040               ref.ref = op1;
5041               ref.is_read = true;
5042               ref.is_conditional_in_stmt = false;
5043               references->safe_push (ref);
5044             }
5045         }
5046     }
5047   else
5048     return clobbers_memory;
5049
5050   if (op0
5051       && (DECL_P (op0)
5052           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5053     {
5054       ref.ref = op0;
5055       ref.is_read = false;
5056       ref.is_conditional_in_stmt = false;
5057       references->safe_push (ref);
5058     }
5059   return clobbers_memory;
5060 }
5061
5062
5063 /* Returns true if the loop-nest has any data reference.  */
5064
5065 bool
5066 loop_nest_has_data_refs (loop_p loop)
5067 {
5068   basic_block *bbs = get_loop_body (loop);
5069   auto_vec<data_ref_loc, 3> references;
5070
5071   for (unsigned i = 0; i < loop->num_nodes; i++)
5072     {
5073       basic_block bb = bbs[i];
5074       gimple_stmt_iterator bsi;
5075
5076       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5077         {
5078           gimple *stmt = gsi_stmt (bsi);
5079           get_references_in_stmt (stmt, &references);
5080           if (references.length ())
5081             {
5082               free (bbs);
5083               return true;
5084             }
5085         }
5086     }
5087   free (bbs);
5088   return false;
5089 }
5090
5091 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5092    reference, returns false, otherwise returns true.  NEST is the outermost
5093    loop of the loop nest in which the references should be analyzed.  */
5094
5095 opt_result
5096 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
5097                               vec<data_reference_p> *datarefs)
5098 {
5099   unsigned i;
5100   auto_vec<data_ref_loc, 2> references;
5101   data_ref_loc *ref;
5102   data_reference_p dr;
5103
5104   if (get_references_in_stmt (stmt, &references))
5105     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5106                                    stmt);
5107
5108   FOR_EACH_VEC_ELT (references, i, ref)
5109     {
5110       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5111                             loop_containing_stmt (stmt), ref->ref,
5112                             stmt, ref->is_read, ref->is_conditional_in_stmt);
5113       gcc_assert (dr != NULL);
5114       datarefs->safe_push (dr);
5115     }
5116
5117   return opt_result::success ();
5118 }
5119
5120 /* Stores the data references in STMT to DATAREFS.  If there is an
5121    unanalyzable reference, returns false, otherwise returns true.
5122    NEST is the outermost loop of the loop nest in which the references
5123    should be instantiated, LOOP is the loop in which the references
5124    should be analyzed.  */
5125
5126 bool
5127 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5128                                        vec<data_reference_p> *datarefs)
5129 {
5130   unsigned i;
5131   auto_vec<data_ref_loc, 2> references;
5132   data_ref_loc *ref;
5133   bool ret = true;
5134   data_reference_p dr;
5135
5136   if (get_references_in_stmt (stmt, &references))
5137     return false;
5138
5139   FOR_EACH_VEC_ELT (references, i, ref)
5140     {
5141       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5142                             ref->is_conditional_in_stmt);
5143       gcc_assert (dr != NULL);
5144       datarefs->safe_push (dr);
5145     }
5146
5147   return ret;
5148 }
5149
5150 /* Search the data references in LOOP, and record the information into
5151    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5152    difficult case, returns NULL_TREE otherwise.  */
5153
5154 tree
5155 find_data_references_in_bb (struct loop *loop, basic_block bb,
5156                             vec<data_reference_p> *datarefs)
5157 {
5158   gimple_stmt_iterator bsi;
5159
5160   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5161     {
5162       gimple *stmt = gsi_stmt (bsi);
5163
5164       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5165         {
5166           struct data_reference *res;
5167           res = XCNEW (struct data_reference);
5168           datarefs->safe_push (res);
5169
5170           return chrec_dont_know;
5171         }
5172     }
5173
5174   return NULL_TREE;
5175 }
5176
5177 /* Search the data references in LOOP, and record the information into
5178    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5179    difficult case, returns NULL_TREE otherwise.
5180
5181    TODO: This function should be made smarter so that it can handle address
5182    arithmetic as if they were array accesses, etc.  */
5183
5184 tree
5185 find_data_references_in_loop (struct loop *loop,
5186                               vec<data_reference_p> *datarefs)
5187 {
5188   basic_block bb, *bbs;
5189   unsigned int i;
5190
5191   bbs = get_loop_body_in_dom_order (loop);
5192
5193   for (i = 0; i < loop->num_nodes; i++)
5194     {
5195       bb = bbs[i];
5196
5197       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5198         {
5199           free (bbs);
5200           return chrec_dont_know;
5201         }
5202     }
5203   free (bbs);
5204
5205   return NULL_TREE;
5206 }
5207
5208 /* Return the alignment in bytes that DRB is guaranteed to have at all
5209    times.  */
5210
5211 unsigned int
5212 dr_alignment (innermost_loop_behavior *drb)
5213 {
5214   /* Get the alignment of BASE_ADDRESS + INIT.  */
5215   unsigned int alignment = drb->base_alignment;
5216   unsigned int misalignment = (drb->base_misalignment
5217                                + TREE_INT_CST_LOW (drb->init));
5218   if (misalignment != 0)
5219     alignment = MIN (alignment, misalignment & -misalignment);
5220
5221   /* Cap it to the alignment of OFFSET.  */
5222   if (!integer_zerop (drb->offset))
5223     alignment = MIN (alignment, drb->offset_alignment);
5224
5225   /* Cap it to the alignment of STEP.  */
5226   if (!integer_zerop (drb->step))
5227     alignment = MIN (alignment, drb->step_alignment);
5228
5229   return alignment;
5230 }
5231
5232 /* If BASE is a pointer-typed SSA name, try to find the object that it
5233    is based on.  Return this object X on success and store the alignment
5234    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
5235
5236 static tree
5237 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5238 {
5239   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5240     return NULL_TREE;
5241
5242   gimple *def = SSA_NAME_DEF_STMT (base);
5243   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5244
5245   /* Peel chrecs and record the minimum alignment preserved by
5246      all steps.  */
5247   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5248   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5249     {
5250       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5251       alignment = MIN (alignment, step_alignment);
5252       base = CHREC_LEFT (base);
5253     }
5254
5255   /* Punt if the expression is too complicated to handle.  */
5256   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5257     return NULL_TREE;
5258
5259   /* The only useful cases are those for which a dereference folds to something
5260      other than an INDIRECT_REF.  */
5261   tree ref_type = TREE_TYPE (TREE_TYPE (base));
5262   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5263   if (!ref)
5264     return NULL_TREE;
5265
5266   /* Analyze the base to which the steps we peeled were applied.  */
5267   poly_int64 bitsize, bitpos, bytepos;
5268   machine_mode mode;
5269   int unsignedp, reversep, volatilep;
5270   tree offset;
5271   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5272                               &unsignedp, &reversep, &volatilep);
5273   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5274     return NULL_TREE;
5275
5276   /* Restrict the alignment to that guaranteed by the offsets.  */
5277   unsigned int bytepos_alignment = known_alignment (bytepos);
5278   if (bytepos_alignment != 0)
5279     alignment = MIN (alignment, bytepos_alignment);
5280   if (offset)
5281     {
5282       unsigned int offset_alignment = highest_pow2_factor (offset);
5283       alignment = MIN (alignment, offset_alignment);
5284     }
5285
5286   *alignment_out = alignment;
5287   return base;
5288 }
5289
5290 /* Return the object whose alignment would need to be changed in order
5291    to increase the alignment of ADDR.  Store the maximum achievable
5292    alignment in *MAX_ALIGNMENT.  */
5293
5294 tree
5295 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5296 {
5297   tree base = get_base_for_alignment_1 (addr, max_alignment);
5298   if (base)
5299     return base;
5300
5301   if (TREE_CODE (addr) == ADDR_EXPR)
5302     addr = TREE_OPERAND (addr, 0);
5303   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5304   return addr;
5305 }
5306
5307 /* Recursive helper function.  */
5308
5309 static bool
5310 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5311 {
5312   /* Inner loops of the nest should not contain siblings.  Example:
5313      when there are two consecutive loops,
5314
5315      | loop_0
5316      |   loop_1
5317      |     A[{0, +, 1}_1]
5318      |   endloop_1
5319      |   loop_2
5320      |     A[{0, +, 1}_2]
5321      |   endloop_2
5322      | endloop_0
5323
5324      the dependence relation cannot be captured by the distance
5325      abstraction.  */
5326   if (loop->next)
5327     return false;
5328
5329   loop_nest->safe_push (loop);
5330   if (loop->inner)
5331     return find_loop_nest_1 (loop->inner, loop_nest);
5332   return true;
5333 }
5334
5335 /* Return false when the LOOP is not well nested.  Otherwise return
5336    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5337    contain the loops from the outermost to the innermost, as they will
5338    appear in the classic distance vector.  */
5339
5340 bool
5341 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5342 {
5343   loop_nest->safe_push (loop);
5344   if (loop->inner)
5345     return find_loop_nest_1 (loop->inner, loop_nest);
5346   return true;
5347 }
5348
5349 /* Returns true when the data dependences have been computed, false otherwise.
5350    Given a loop nest LOOP, the following vectors are returned:
5351    DATAREFS is initialized to all the array elements contained in this loop,
5352    DEPENDENCE_RELATIONS contains the relations between the data references.
5353    Compute read-read and self relations if
5354    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5355
5356 bool
5357 compute_data_dependences_for_loop (struct loop *loop,
5358                                    bool compute_self_and_read_read_dependences,
5359                                    vec<loop_p> *loop_nest,
5360                                    vec<data_reference_p> *datarefs,
5361                                    vec<ddr_p> *dependence_relations)
5362 {
5363   bool res = true;
5364
5365   memset (&dependence_stats, 0, sizeof (dependence_stats));
5366
5367   /* If the loop nest is not well formed, or one of the data references
5368      is not computable, give up without spending time to compute other
5369      dependences.  */
5370   if (!loop
5371       || !find_loop_nest (loop, loop_nest)
5372       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5373       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5374                                    compute_self_and_read_read_dependences))
5375     res = false;
5376
5377   if (dump_file && (dump_flags & TDF_STATS))
5378     {
5379       fprintf (dump_file, "Dependence tester statistics:\n");
5380
5381       fprintf (dump_file, "Number of dependence tests: %d\n",
5382                dependence_stats.num_dependence_tests);
5383       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5384                dependence_stats.num_dependence_dependent);
5385       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5386                dependence_stats.num_dependence_independent);
5387       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5388                dependence_stats.num_dependence_undetermined);
5389
5390       fprintf (dump_file, "Number of subscript tests: %d\n",
5391                dependence_stats.num_subscript_tests);
5392       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5393                dependence_stats.num_subscript_undetermined);
5394       fprintf (dump_file, "Number of same subscript function: %d\n",
5395                dependence_stats.num_same_subscript_function);
5396
5397       fprintf (dump_file, "Number of ziv tests: %d\n",
5398                dependence_stats.num_ziv);
5399       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5400                dependence_stats.num_ziv_dependent);
5401       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5402                dependence_stats.num_ziv_independent);
5403       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5404                dependence_stats.num_ziv_unimplemented);
5405
5406       fprintf (dump_file, "Number of siv tests: %d\n",
5407                dependence_stats.num_siv);
5408       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5409                dependence_stats.num_siv_dependent);
5410       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5411                dependence_stats.num_siv_independent);
5412       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5413                dependence_stats.num_siv_unimplemented);
5414
5415       fprintf (dump_file, "Number of miv tests: %d\n",
5416                dependence_stats.num_miv);
5417       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5418                dependence_stats.num_miv_dependent);
5419       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5420                dependence_stats.num_miv_independent);
5421       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5422                dependence_stats.num_miv_unimplemented);
5423     }
5424
5425   return res;
5426 }
5427
5428 /* Free the memory used by a data dependence relation DDR.  */
5429
5430 void
5431 free_dependence_relation (struct data_dependence_relation *ddr)
5432 {
5433   if (ddr == NULL)
5434     return;
5435
5436   if (DDR_SUBSCRIPTS (ddr).exists ())
5437     free_subscripts (DDR_SUBSCRIPTS (ddr));
5438   DDR_DIST_VECTS (ddr).release ();
5439   DDR_DIR_VECTS (ddr).release ();
5440
5441   free (ddr);
5442 }
5443
5444 /* Free the memory used by the data dependence relations from
5445    DEPENDENCE_RELATIONS.  */
5446
5447 void
5448 free_dependence_relations (vec<ddr_p> dependence_relations)
5449 {
5450   unsigned int i;
5451   struct data_dependence_relation *ddr;
5452
5453   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5454     if (ddr)
5455       free_dependence_relation (ddr);
5456
5457   dependence_relations.release ();
5458 }
5459
5460 /* Free the memory used by the data references from DATAREFS.  */
5461
5462 void
5463 free_data_refs (vec<data_reference_p> datarefs)
5464 {
5465   unsigned int i;
5466   struct data_reference *dr;
5467
5468   FOR_EACH_VEC_ELT (datarefs, i, dr)
5469     free_data_ref (dr);
5470   datarefs.release ();
5471 }
5472
5473 /* Common routine implementing both dr_direction_indicator and
5474    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5475    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5476    Return the step as the indicator otherwise.  */
5477
5478 static tree
5479 dr_step_indicator (struct data_reference *dr, int useful_min)
5480 {
5481   tree step = DR_STEP (dr);
5482   if (!step)
5483     return NULL_TREE;
5484   STRIP_NOPS (step);
5485   /* Look for cases where the step is scaled by a positive constant
5486      integer, which will often be the access size.  If the multiplication
5487      doesn't change the sign (due to overflow effects) then we can
5488      test the unscaled value instead.  */
5489   if (TREE_CODE (step) == MULT_EXPR
5490       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5491       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5492     {
5493       tree factor = TREE_OPERAND (step, 1);
5494       step = TREE_OPERAND (step, 0);
5495
5496       /* Strip widening and truncating conversions as well as nops.  */
5497       if (CONVERT_EXPR_P (step)
5498           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5499         step = TREE_OPERAND (step, 0);
5500       tree type = TREE_TYPE (step);
5501
5502       /* Get the range of step values that would not cause overflow.  */
5503       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5504                          / wi::to_widest (factor));
5505       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5506                          / wi::to_widest (factor));
5507
5508       /* Get the range of values that the unconverted step actually has.  */
5509       wide_int step_min, step_max;
5510       if (TREE_CODE (step) != SSA_NAME
5511           || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5512         {
5513           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5514           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5515         }
5516
5517       /* Check whether the unconverted step has an acceptable range.  */
5518       signop sgn = TYPE_SIGN (type);
5519       if (wi::les_p (minv, widest_int::from (step_min, sgn))
5520           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5521         {
5522           if (wi::ge_p (step_min, useful_min, sgn))
5523             return ssize_int (useful_min);
5524           else if (wi::lt_p (step_max, 0, sgn))
5525             return ssize_int (-1);
5526           else
5527             return fold_convert (ssizetype, step);
5528         }
5529     }
5530   return DR_STEP (dr);
5531 }
5532
5533 /* Return a value that is negative iff DR has a negative step.  */
5534
5535 tree
5536 dr_direction_indicator (struct data_reference *dr)
5537 {
5538   return dr_step_indicator (dr, 0);
5539 }
5540
5541 /* Return a value that is zero iff DR has a zero step.  */
5542
5543 tree
5544 dr_zero_step_indicator (struct data_reference *dr)
5545 {
5546   return dr_step_indicator (dr, 1);
5547 }
5548
5549 /* Return true if DR is known to have a nonnegative (but possibly zero)
5550    step.  */
5551
5552 bool
5553 dr_known_forward_stride_p (struct data_reference *dr)
5554 {
5555   tree indicator = dr_direction_indicator (dr);
5556   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5557                                    fold_convert (ssizetype, indicator),
5558                                    ssize_int (0));
5559   return neg_step_val && integer_zerop (neg_step_val);
5560 }