gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "params.h"
  97 #include "builtins.h"
  98 #include "tree-eh.h"
  99 #include "ssa.h"
 100
 101 static struct datadep_stats
 102 {
 103   int num_dependence_tests;
 104   int num_dependence_dependent;
 105   int num_dependence_independent;
 106   int num_dependence_undetermined;
 107
 108   int num_subscript_tests;
 109   int num_subscript_undetermined;
 110   int num_same_subscript_function;
 111
 112   int num_ziv;
 113   int num_ziv_independent;
 114   int num_ziv_dependent;
 115   int num_ziv_unimplemented;
 116
 117   int num_siv;
 118   int num_siv_independent;
 119   int num_siv_dependent;
 120   int num_siv_unimplemented;
 121
 122   int num_miv;
 123   int num_miv_independent;
 124   int num_miv_dependent;
 125   int num_miv_unimplemented;
 126 } dependence_stats;
 127
 128 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 129                                            unsigned int, unsigned int,
 130                                            class loop *);
 131 /* Returns true iff A divides B.  */
 132
 133 static inline bool
 134 tree_fold_divides_p (const_tree a, const_tree b)
 135 {
 136   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 137   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 138   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 139 }
 140
 141 /* Returns true iff A divides B.  */
 142
 143 static inline bool
 144 int_divides_p (int a, int b)
 145 {
 146   return ((b % a) == 0);
 147 }
 148
 149 /* Return true if reference REF contains a union access.  */
 150
 151 static bool
 152 ref_contains_union_access_p (tree ref)
 153 {
 154   while (handled_component_p (ref))
 155     {
 156       ref = TREE_OPERAND (ref, 0);
 157       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 158           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 159         return true;
 160     }
 161   return false;
 162 }
 163
 164 \f
 165
 166 /* Dump into FILE all the data references from DATAREFS.  */
 167
 168 static void
 169 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 170 {
 171   unsigned int i;
 172   struct data_reference *dr;
 173
 174   FOR_EACH_VEC_ELT (datarefs, i, dr)
 175     dump_data_reference (file, dr);
 176 }
 177
 178 /* Unified dump into FILE all the data references from DATAREFS.  */
 179
 180 DEBUG_FUNCTION void
 181 debug (vec<data_reference_p> &ref)
 182 {
 183   dump_data_references (stderr, ref);
 184 }
 185
 186 DEBUG_FUNCTION void
 187 debug (vec<data_reference_p> *ptr)
 188 {
 189   if (ptr)
 190     debug (*ptr);
 191   else
 192     fprintf (stderr, "<nil>\n");
 193 }
 194
 195
 196 /* Dump into STDERR all the data references from DATAREFS.  */
 197
 198 DEBUG_FUNCTION void
 199 debug_data_references (vec<data_reference_p> datarefs)
 200 {
 201   dump_data_references (stderr, datarefs);
 202 }
 203
 204 /* Print to STDERR the data_reference DR.  */
 205
 206 DEBUG_FUNCTION void
 207 debug_data_reference (struct data_reference *dr)
 208 {
 209   dump_data_reference (stderr, dr);
 210 }
 211
 212 /* Dump function for a DATA_REFERENCE structure.  */
 213
 214 void
 215 dump_data_reference (FILE *outf,
 216                      struct data_reference *dr)
 217 {
 218   unsigned int i;
 219
 220   fprintf (outf, "#(Data Ref: \n");
 221   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 222   fprintf (outf, "#  stmt: ");
 223   print_gimple_stmt (outf, DR_STMT (dr), 0);
 224   fprintf (outf, "#  ref: ");
 225   print_generic_stmt (outf, DR_REF (dr));
 226   fprintf (outf, "#  base_object: ");
 227   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 228
 229   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 230     {
 231       fprintf (outf, "#  Access function %d: ", i);
 232       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 233     }
 234   fprintf (outf, "#)\n");
 235 }
 236
 237 /* Unified dump function for a DATA_REFERENCE structure.  */
 238
 239 DEBUG_FUNCTION void
 240 debug (data_reference &ref)
 241 {
 242   dump_data_reference (stderr, &ref);
 243 }
 244
 245 DEBUG_FUNCTION void
 246 debug (data_reference *ptr)
 247 {
 248   if (ptr)
 249     debug (*ptr);
 250   else
 251     fprintf (stderr, "<nil>\n");
 252 }
 253
 254
 255 /* Dumps the affine function described by FN to the file OUTF.  */
 256
 257 DEBUG_FUNCTION void
 258 dump_affine_function (FILE *outf, affine_fn fn)
 259 {
 260   unsigned i;
 261   tree coef;
 262
 263   print_generic_expr (outf, fn[0], TDF_SLIM);
 264   for (i = 1; fn.iterate (i, &coef); i++)
 265     {
 266       fprintf (outf, " + ");
 267       print_generic_expr (outf, coef, TDF_SLIM);
 268       fprintf (outf, " * x_%u", i);
 269     }
 270 }
 271
 272 /* Dumps the conflict function CF to the file OUTF.  */
 273
 274 DEBUG_FUNCTION void
 275 dump_conflict_function (FILE *outf, conflict_function *cf)
 276 {
 277   unsigned i;
 278
 279   if (cf->n == NO_DEPENDENCE)
 280     fprintf (outf, "no dependence");
 281   else if (cf->n == NOT_KNOWN)
 282     fprintf (outf, "not known");
 283   else
 284     {
 285       for (i = 0; i < cf->n; i++)
 286         {
 287           if (i != 0)
 288             fprintf (outf, " ");
 289           fprintf (outf, "[");
 290           dump_affine_function (outf, cf->fns[i]);
 291           fprintf (outf, "]");
 292         }
 293     }
 294 }
 295
 296 /* Dump function for a SUBSCRIPT structure.  */
 297
 298 DEBUG_FUNCTION void
 299 dump_subscript (FILE *outf, struct subscript *subscript)
 300 {
 301   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 302
 303   fprintf (outf, "\n (subscript \n");
 304   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 305   dump_conflict_function (outf, cf);
 306   if (CF_NONTRIVIAL_P (cf))
 307     {
 308       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 309       fprintf (outf, "\n  last_conflict: ");
 310       print_generic_expr (outf, last_iteration);
 311     }
 312
 313   cf = SUB_CONFLICTS_IN_B (subscript);
 314   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 315   dump_conflict_function (outf, cf);
 316   if (CF_NONTRIVIAL_P (cf))
 317     {
 318       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 319       fprintf (outf, "\n  last_conflict: ");
 320       print_generic_expr (outf, last_iteration);
 321     }
 322
 323   fprintf (outf, "\n  (Subscript distance: ");
 324   print_generic_expr (outf, SUB_DISTANCE (subscript));
 325   fprintf (outf, " ))\n");
 326 }
 327
 328 /* Print the classic direction vector DIRV to OUTF.  */
 329
 330 DEBUG_FUNCTION void
 331 print_direction_vector (FILE *outf,
 332                         lambda_vector dirv,
 333                         int length)
 334 {
 335   int eq;
 336
 337   for (eq = 0; eq < length; eq++)
 338     {
 339       enum data_dependence_direction dir = ((enum data_dependence_direction)
 340                                             dirv[eq]);
 341
 342       switch (dir)
 343         {
 344         case dir_positive:
 345           fprintf (outf, "    +");
 346           break;
 347         case dir_negative:
 348           fprintf (outf, "    -");
 349           break;
 350         case dir_equal:
 351           fprintf (outf, "    =");
 352           break;
 353         case dir_positive_or_equal:
 354           fprintf (outf, "   +=");
 355           break;
 356         case dir_positive_or_negative:
 357           fprintf (outf, "   +-");
 358           break;
 359         case dir_negative_or_equal:
 360           fprintf (outf, "   -=");
 361           break;
 362         case dir_star:
 363           fprintf (outf, "    *");
 364           break;
 365         default:
 366           fprintf (outf, "indep");
 367           break;
 368         }
 369     }
 370   fprintf (outf, "\n");
 371 }
 372
 373 /* Print a vector of direction vectors.  */
 374
 375 DEBUG_FUNCTION void
 376 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 377                    int length)
 378 {
 379   unsigned j;
 380   lambda_vector v;
 381
 382   FOR_EACH_VEC_ELT (dir_vects, j, v)
 383     print_direction_vector (outf, v, length);
 384 }
 385
 386 /* Print out a vector VEC of length N to OUTFILE.  */
 387
 388 DEBUG_FUNCTION void
 389 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 390 {
 391   int i;
 392
 393   for (i = 0; i < n; i++)
 394     fprintf (outfile, "%3d ", (int)vector[i]);
 395   fprintf (outfile, "\n");
 396 }
 397
 398 /* Print a vector of distance vectors.  */
 399
 400 DEBUG_FUNCTION void
 401 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 402                     int length)
 403 {
 404   unsigned j;
 405   lambda_vector v;
 406
 407   FOR_EACH_VEC_ELT (dist_vects, j, v)
 408     print_lambda_vector (outf, v, length);
 409 }
 410
 411 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 412
 413 DEBUG_FUNCTION void
 414 dump_data_dependence_relation (FILE *outf,
 415                                struct data_dependence_relation *ddr)
 416 {
 417   struct data_reference *dra, *drb;
 418
 419   fprintf (outf, "(Data Dep: \n");
 420
 421   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 422     {
 423       if (ddr)
 424         {
 425           dra = DDR_A (ddr);
 426           drb = DDR_B (ddr);
 427           if (dra)
 428             dump_data_reference (outf, dra);
 429           else
 430             fprintf (outf, "    (nil)\n");
 431           if (drb)
 432             dump_data_reference (outf, drb);
 433           else
 434             fprintf (outf, "    (nil)\n");
 435         }
 436       fprintf (outf, "    (don't know)\n)\n");
 437       return;
 438     }
 439
 440   dra = DDR_A (ddr);
 441   drb = DDR_B (ddr);
 442   dump_data_reference (outf, dra);
 443   dump_data_reference (outf, drb);
 444
 445   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 446     fprintf (outf, "    (no dependence)\n");
 447
 448   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 449     {
 450       unsigned int i;
 451       class loop *loopi;
 452
 453       subscript *sub;
 454       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 455         {
 456           fprintf (outf, "  access_fn_A: ");
 457           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 458           fprintf (outf, "  access_fn_B: ");
 459           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 460           dump_subscript (outf, sub);
 461         }
 462
 463       fprintf (outf, "  loop nest: (");
 464       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 465         fprintf (outf, "%d ", loopi->num);
 466       fprintf (outf, ")\n");
 467
 468       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 469         {
 470           fprintf (outf, "  distance_vector: ");
 471           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 472                                DDR_NB_LOOPS (ddr));
 473         }
 474
 475       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 476         {
 477           fprintf (outf, "  direction_vector: ");
 478           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 479                                   DDR_NB_LOOPS (ddr));
 480         }
 481     }
 482
 483   fprintf (outf, ")\n");
 484 }
 485
 486 /* Debug version.  */
 487
 488 DEBUG_FUNCTION void
 489 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 490 {
 491   dump_data_dependence_relation (stderr, ddr);
 492 }
 493
 494 /* Dump into FILE all the dependence relations from DDRS.  */
 495
 496 DEBUG_FUNCTION void
 497 dump_data_dependence_relations (FILE *file,
 498                                 vec<ddr_p> ddrs)
 499 {
 500   unsigned int i;
 501   struct data_dependence_relation *ddr;
 502
 503   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 504     dump_data_dependence_relation (file, ddr);
 505 }
 506
 507 DEBUG_FUNCTION void
 508 debug (vec<ddr_p> &ref)
 509 {
 510   dump_data_dependence_relations (stderr, ref);
 511 }
 512
 513 DEBUG_FUNCTION void
 514 debug (vec<ddr_p> *ptr)
 515 {
 516   if (ptr)
 517     debug (*ptr);
 518   else
 519     fprintf (stderr, "<nil>\n");
 520 }
 521
 522
 523 /* Dump to STDERR all the dependence relations from DDRS.  */
 524
 525 DEBUG_FUNCTION void
 526 debug_data_dependence_relations (vec<ddr_p> ddrs)
 527 {
 528   dump_data_dependence_relations (stderr, ddrs);
 529 }
 530
 531 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 532    the dependence relations, and VECT_SIZE is the size of the
 533    dependence vectors, or in other words the number of loops in the
 534    considered nest.  */
 535
 536 DEBUG_FUNCTION void
 537 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 538 {
 539   unsigned int i, j;
 540   struct data_dependence_relation *ddr;
 541   lambda_vector v;
 542
 543   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 544     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 545       {
 546         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 547           {
 548             fprintf (file, "DISTANCE_V (");
 549             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 550             fprintf (file, ")\n");
 551           }
 552
 553         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 554           {
 555             fprintf (file, "DIRECTION_V (");
 556             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 557             fprintf (file, ")\n");
 558           }
 559       }
 560
 561   fprintf (file, "\n\n");
 562 }
 563
 564 /* Dumps the data dependence relations DDRS in FILE.  */
 565
 566 DEBUG_FUNCTION void
 567 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 568 {
 569   unsigned int i;
 570   struct data_dependence_relation *ddr;
 571
 572   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 573     dump_data_dependence_relation (file, ddr);
 574
 575   fprintf (file, "\n\n");
 576 }
 577
 578 DEBUG_FUNCTION void
 579 debug_ddrs (vec<ddr_p> ddrs)
 580 {
 581   dump_ddrs (stderr, ddrs);
 582 }
 583
 584 static void
 585 split_constant_offset (tree exp, tree *var, tree *off,
 586                        hash_map<tree, std::pair<tree, tree> > &cache,
 587                        unsigned *limit);
 588
 589 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 590    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 591    constant of type ssizetype, and returns true.  If we cannot do this
 592    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 593    is returned.  */
 594
 595 static bool
 596 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 597                          tree *var, tree *off,
 598                          hash_map<tree, std::pair<tree, tree> > &cache,
 599                          unsigned *limit)
 600 {
 601   tree var0, var1;
 602   tree off0, off1;
 603   enum tree_code ocode = code;
 604
 605   *var = NULL_TREE;
 606   *off = NULL_TREE;
 607
 608   switch (code)
 609     {
 610     case INTEGER_CST:
 611       *var = build_int_cst (type, 0);
 612       *off = fold_convert (ssizetype, op0);
 613       return true;
 614
 615     case POINTER_PLUS_EXPR:
 616       ocode = PLUS_EXPR;
 617       /* FALLTHROUGH */
 618     case PLUS_EXPR:
 619     case MINUS_EXPR:
 620       if (TREE_CODE (op1) == INTEGER_CST)
 621         {
 622           split_constant_offset (op0, &var0, &off0, cache, limit);
 623           *var = var0;
 624           *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
 625           return true;
 626         }
 627       split_constant_offset (op0, &var0, &off0, cache, limit);
 628       split_constant_offset (op1, &var1, &off1, cache, limit);
 629       *var = fold_build2 (code, type, var0, var1);
 630       *off = size_binop (ocode, off0, off1);
 631       return true;
 632
 633     case MULT_EXPR:
 634       if (TREE_CODE (op1) != INTEGER_CST)
 635         return false;
 636
 637       split_constant_offset (op0, &var0, &off0, cache, limit);
 638       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 639       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 640       return true;
 641
 642     case ADDR_EXPR:
 643       {
 644         tree base, poffset;
 645         poly_int64 pbitsize, pbitpos, pbytepos;
 646         machine_mode pmode;
 647         int punsignedp, preversep, pvolatilep;
 648
 649         op0 = TREE_OPERAND (op0, 0);
 650         base
 651           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 652                                  &punsignedp, &preversep, &pvolatilep);
 653
 654         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 655           return false;
 656         base = build_fold_addr_expr (base);
 657         off0 = ssize_int (pbytepos);
 658
 659         if (poffset)
 660           {
 661             split_constant_offset (poffset, &poffset, &off1, cache, limit);
 662             off0 = size_binop (PLUS_EXPR, off0, off1);
 663             if (POINTER_TYPE_P (TREE_TYPE (base)))
 664               base = fold_build_pointer_plus (base, poffset);
 665             else
 666               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 667                                   fold_convert (TREE_TYPE (base), poffset));
 668           }
 669
 670         var0 = fold_convert (type, base);
 671
 672         /* If variable length types are involved, punt, otherwise casts
 673            might be converted into ARRAY_REFs in gimplify_conversion.
 674            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 675            possibly no longer appears in current GIMPLE, might resurface.
 676            This perhaps could run
 677            if (CONVERT_EXPR_P (var0))
 678              {
 679                gimplify_conversion (&var0);
 680                // Attempt to fill in any within var0 found ARRAY_REF's
 681                // element size from corresponding op embedded ARRAY_REF,
 682                // if unsuccessful, just punt.
 683              }  */
 684         while (POINTER_TYPE_P (type))
 685           type = TREE_TYPE (type);
 686         if (int_size_in_bytes (type) < 0)
 687           return false;
 688
 689         *var = var0;
 690         *off = off0;
 691         return true;
 692       }
 693
 694     case SSA_NAME:
 695       {
 696         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 697           return false;
 698
 699         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 700         enum tree_code subcode;
 701
 702         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 703           return false;
 704
 705         subcode = gimple_assign_rhs_code (def_stmt);
 706
 707         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 708         bool use_cache = false;
 709         if (!has_single_use (op0)
 710             && (subcode == POINTER_PLUS_EXPR
 711                 || subcode == PLUS_EXPR
 712                 || subcode == MINUS_EXPR
 713                 || subcode == MULT_EXPR
 714                 || subcode == ADDR_EXPR
 715                 || CONVERT_EXPR_CODE_P (subcode)))
 716           {
 717             use_cache = true;
 718             bool existed;
 719             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 720             if (existed)
 721               {
 722                 if (integer_zerop (e.second))
 723                   return false;
 724                 *var = e.first;
 725                 *off = e.second;
 726                 return true;
 727               }
 728             e = std::make_pair (op0, ssize_int (0));
 729           }
 730
 731         if (*limit == 0)
 732           return false;
 733         --*limit;
 734
 735         var0 = gimple_assign_rhs1 (def_stmt);
 736         var1 = gimple_assign_rhs2 (def_stmt);
 737
 738         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 739                                             var, off, cache, limit);
 740         if (res && use_cache)
 741           *cache.get (op0) = std::make_pair (*var, *off);
 742         return res;
 743       }
 744     CASE_CONVERT:
 745       {
 746         /* We must not introduce undefined overflow, and we must not change
 747            the value.  Hence we're okay if the inner type doesn't overflow
 748            to start with (pointer or signed), the outer type also is an
 749            integer or pointer and the outer precision is at least as large
 750            as the inner.  */
 751         tree itype = TREE_TYPE (op0);
 752         if ((POINTER_TYPE_P (itype)
 753              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 754             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 755             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 756           {
 757             if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
 758               {
 759                 /* Split the unconverted operand and try to prove that
 760                    wrapping isn't a problem.  */
 761                 tree tmp_var, tmp_off;
 762                 split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
 763
 764                 /* See whether we have an SSA_NAME whose range is known
 765                    to be [A, B].  */
 766                 if (TREE_CODE (tmp_var) != SSA_NAME)
 767                   return false;
 768                 wide_int var_min, var_max;
 769                 value_range_kind vr_type = get_range_info (tmp_var, &var_min,
 770                                                            &var_max);
 771                 wide_int var_nonzero = get_nonzero_bits (tmp_var);
 772                 signop sgn = TYPE_SIGN (itype);
 773                 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
 774                                                        &var_max, var_nonzero,
 775                                                        sgn) != VR_RANGE)
 776                   return false;
 777
 778                 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
 779                    is known to be [A + TMP_OFF, B + TMP_OFF], with all
 780                    operations done in ITYPE.  The addition must overflow
 781                    at both ends of the range or at neither.  */
 782                 wi::overflow_type overflow[2];
 783                 unsigned int prec = TYPE_PRECISION (itype);
 784                 wide_int woff = wi::to_wide (tmp_off, prec);
 785                 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
 786                 wi::add (var_max, woff, sgn, &overflow[1]);
 787                 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
 788                   return false;
 789
 790                 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
 791                 widest_int diff = (widest_int::from (op0_min, sgn)
 792                                    - widest_int::from (var_min, sgn));
 793                 var0 = tmp_var;
 794                 *off = wide_int_to_tree (ssizetype, diff);
 795               }
 796             else
 797               split_constant_offset (op0, &var0, off, cache, limit);
 798             *var = fold_convert (type, var0);
 799             return true;
 800           }
 801         return false;
 802       }
 803
 804     default:
 805       return false;
 806     }
 807 }
 808
 809 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 810    will be ssizetype.  */
 811
 812 static void
 813 split_constant_offset (tree exp, tree *var, tree *off,
 814                        hash_map<tree, std::pair<tree, tree> > &cache,
 815                        unsigned *limit)
 816 {
 817   tree type = TREE_TYPE (exp), op0, op1, e, o;
 818   enum tree_code code;
 819
 820   *var = exp;
 821   *off = ssize_int (0);
 822
 823   if (tree_is_chrec (exp)
 824       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 825     return;
 826
 827   code = TREE_CODE (exp);
 828   extract_ops_from_tree (exp, &code, &op0, &op1);
 829   if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
 830     {
 831       *var = e;
 832       *off = o;
 833     }
 834 }
 835
 836 void
 837 split_constant_offset (tree exp, tree *var, tree *off)
 838 {
 839   unsigned limit = param_ssa_name_def_chain_limit;
 840   static hash_map<tree, std::pair<tree, tree> > *cache;
 841   if (!cache)
 842     cache = new hash_map<tree, std::pair<tree, tree> > (37);
 843   split_constant_offset (exp, var, off, *cache, &limit);
 844   cache->empty ();
 845 }
 846
 847 /* Returns the address ADDR of an object in a canonical shape (without nop
 848    casts, and with type of pointer to the object).  */
 849
 850 static tree
 851 canonicalize_base_object_address (tree addr)
 852 {
 853   tree orig = addr;
 854
 855   STRIP_NOPS (addr);
 856
 857   /* The base address may be obtained by casting from integer, in that case
 858      keep the cast.  */
 859   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 860     return orig;
 861
 862   if (TREE_CODE (addr) != ADDR_EXPR)
 863     return addr;
 864
 865   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 866 }
 867
 868 /* Analyze the behavior of memory reference REF within STMT.
 869    There are two modes:
 870
 871    - BB analysis.  In this case we simply split the address into base,
 872      init and offset components, without reference to any containing loop.
 873      The resulting base and offset are general expressions and they can
 874      vary arbitrarily from one iteration of the containing loop to the next.
 875      The step is always zero.
 876
 877    - loop analysis.  In this case we analyze the reference both wrt LOOP
 878      and on the basis that the reference occurs (is "used") in LOOP;
 879      see the comment above analyze_scalar_evolution_in_loop for more
 880      information about this distinction.  The base, init, offset and
 881      step fields are all invariant in LOOP.
 882
 883    Perform BB analysis if LOOP is null, or if LOOP is the function's
 884    dummy outermost loop.  In other cases perform loop analysis.
 885
 886    Return true if the analysis succeeded and store the results in DRB if so.
 887    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 888
 889 opt_result
 890 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 891                       class loop *loop, const gimple *stmt)
 892 {
 893   poly_int64 pbitsize, pbitpos;
 894   tree base, poffset;
 895   machine_mode pmode;
 896   int punsignedp, preversep, pvolatilep;
 897   affine_iv base_iv, offset_iv;
 898   tree init, dinit, step;
 899   bool in_loop = (loop && loop->num);
 900
 901   if (dump_file && (dump_flags & TDF_DETAILS))
 902     fprintf (dump_file, "analyze_innermost: ");
 903
 904   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 905                               &punsignedp, &preversep, &pvolatilep);
 906   gcc_assert (base != NULL_TREE);
 907
 908   poly_int64 pbytepos;
 909   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 910     return opt_result::failure_at (stmt,
 911                                    "failed: bit offset alignment.\n");
 912
 913   if (preversep)
 914     return opt_result::failure_at (stmt,
 915                                    "failed: reverse storage order.\n");
 916
 917   /* Calculate the alignment and misalignment for the inner reference.  */
 918   unsigned int HOST_WIDE_INT bit_base_misalignment;
 919   unsigned int bit_base_alignment;
 920   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
 921
 922   /* There are no bitfield references remaining in BASE, so the values
 923      we got back must be whole bytes.  */
 924   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
 925               && bit_base_misalignment % BITS_PER_UNIT == 0);
 926   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
 927   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
 928
 929   if (TREE_CODE (base) == MEM_REF)
 930     {
 931       if (!integer_zerop (TREE_OPERAND (base, 1)))
 932         {
 933           /* Subtract MOFF from the base and add it to POFFSET instead.
 934              Adjust the misalignment to reflect the amount we subtracted.  */
 935           poly_offset_int moff = mem_ref_offset (base);
 936           base_misalignment -= moff.force_shwi ();
 937           tree mofft = wide_int_to_tree (sizetype, moff);
 938           if (!poffset)
 939             poffset = mofft;
 940           else
 941             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 942         }
 943       base = TREE_OPERAND (base, 0);
 944     }
 945   else
 946     base = build_fold_addr_expr (base);
 947
 948   if (in_loop)
 949     {
 950       if (!simple_iv (loop, loop, base, &base_iv, true))
 951         return opt_result::failure_at
 952           (stmt, "failed: evolution of base is not affine.\n");
 953     }
 954   else
 955     {
 956       base_iv.base = base;
 957       base_iv.step = ssize_int (0);
 958       base_iv.no_overflow = true;
 959     }
 960
 961   if (!poffset)
 962     {
 963       offset_iv.base = ssize_int (0);
 964       offset_iv.step = ssize_int (0);
 965     }
 966   else
 967     {
 968       if (!in_loop)
 969         {
 970           offset_iv.base = poffset;
 971           offset_iv.step = ssize_int (0);
 972         }
 973       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 974         return opt_result::failure_at
 975           (stmt, "failed: evolution of offset is not affine.\n");
 976     }
 977
 978   init = ssize_int (pbytepos);
 979
 980   /* Subtract any constant component from the base and add it to INIT instead.
 981      Adjust the misalignment to reflect the amount we subtracted.  */
 982   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 983   init = size_binop (PLUS_EXPR, init, dinit);
 984   base_misalignment -= TREE_INT_CST_LOW (dinit);
 985
 986   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 987   init = size_binop (PLUS_EXPR, init, dinit);
 988
 989   step = size_binop (PLUS_EXPR,
 990                      fold_convert (ssizetype, base_iv.step),
 991                      fold_convert (ssizetype, offset_iv.step));
 992
 993   base = canonicalize_base_object_address (base_iv.base);
 994
 995   /* See if get_pointer_alignment can guarantee a higher alignment than
 996      the one we calculated above.  */
 997   unsigned int HOST_WIDE_INT alt_misalignment;
 998   unsigned int alt_alignment;
 999   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1000
1001   /* As above, these values must be whole bytes.  */
1002   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1003               && alt_misalignment % BITS_PER_UNIT == 0);
1004   alt_alignment /= BITS_PER_UNIT;
1005   alt_misalignment /= BITS_PER_UNIT;
1006
1007   if (base_alignment < alt_alignment)
1008     {
1009       base_alignment = alt_alignment;
1010       base_misalignment = alt_misalignment;
1011     }
1012
1013   drb->base_address = base;
1014   drb->offset = fold_convert (ssizetype, offset_iv.base);
1015   drb->init = init;
1016   drb->step = step;
1017   if (known_misalignment (base_misalignment, base_alignment,
1018                           &drb->base_misalignment))
1019     drb->base_alignment = base_alignment;
1020   else
1021     {
1022       drb->base_alignment = known_alignment (base_misalignment);
1023       drb->base_misalignment = 0;
1024     }
1025   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1026   drb->step_alignment = highest_pow2_factor (step);
1027
1028   if (dump_file && (dump_flags & TDF_DETAILS))
1029     fprintf (dump_file, "success.\n");
1030
1031   return opt_result::success ();
1032 }
1033
1034 /* Return true if OP is a valid component reference for a DR access
1035    function.  This accepts a subset of what handled_component_p accepts.  */
1036
1037 static bool
1038 access_fn_component_p (tree op)
1039 {
1040   switch (TREE_CODE (op))
1041     {
1042     case REALPART_EXPR:
1043     case IMAGPART_EXPR:
1044     case ARRAY_REF:
1045       return true;
1046
1047     case COMPONENT_REF:
1048       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1049
1050     default:
1051       return false;
1052     }
1053 }
1054
1055 /* Determines the base object and the list of indices of memory reference
1056    DR, analyzed in LOOP and instantiated before NEST.  */
1057
1058 static void
1059 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1060 {
1061   vec<tree> access_fns = vNULL;
1062   tree ref, op;
1063   tree base, off, access_fn;
1064
1065   /* If analyzing a basic-block there are no indices to analyze
1066      and thus no access functions.  */
1067   if (!nest)
1068     {
1069       DR_BASE_OBJECT (dr) = DR_REF (dr);
1070       DR_ACCESS_FNS (dr).create (0);
1071       return;
1072     }
1073
1074   ref = DR_REF (dr);
1075
1076   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1077      into a two element array with a constant index.  The base is
1078      then just the immediate underlying object.  */
1079   if (TREE_CODE (ref) == REALPART_EXPR)
1080     {
1081       ref = TREE_OPERAND (ref, 0);
1082       access_fns.safe_push (integer_zero_node);
1083     }
1084   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1085     {
1086       ref = TREE_OPERAND (ref, 0);
1087       access_fns.safe_push (integer_one_node);
1088     }
1089
1090   /* Analyze access functions of dimensions we know to be independent.
1091      The list of component references handled here should be kept in
1092      sync with access_fn_component_p.  */
1093   while (handled_component_p (ref))
1094     {
1095       if (TREE_CODE (ref) == ARRAY_REF)
1096         {
1097           op = TREE_OPERAND (ref, 1);
1098           access_fn = analyze_scalar_evolution (loop, op);
1099           access_fn = instantiate_scev (nest, loop, access_fn);
1100           access_fns.safe_push (access_fn);
1101         }
1102       else if (TREE_CODE (ref) == COMPONENT_REF
1103                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1104         {
1105           /* For COMPONENT_REFs of records (but not unions!) use the
1106              FIELD_DECL offset as constant access function so we can
1107              disambiguate a[i].f1 and a[i].f2.  */
1108           tree off = component_ref_field_offset (ref);
1109           off = size_binop (PLUS_EXPR,
1110                             size_binop (MULT_EXPR,
1111                                         fold_convert (bitsizetype, off),
1112                                         bitsize_int (BITS_PER_UNIT)),
1113                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1114           access_fns.safe_push (off);
1115         }
1116       else
1117         /* If we have an unhandled component we could not translate
1118            to an access function stop analyzing.  We have determined
1119            our base object in this case.  */
1120         break;
1121
1122       ref = TREE_OPERAND (ref, 0);
1123     }
1124
1125   /* If the address operand of a MEM_REF base has an evolution in the
1126      analyzed nest, add it as an additional independent access-function.  */
1127   if (TREE_CODE (ref) == MEM_REF)
1128     {
1129       op = TREE_OPERAND (ref, 0);
1130       access_fn = analyze_scalar_evolution (loop, op);
1131       access_fn = instantiate_scev (nest, loop, access_fn);
1132       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1133         {
1134           tree orig_type;
1135           tree memoff = TREE_OPERAND (ref, 1);
1136           base = initial_condition (access_fn);
1137           orig_type = TREE_TYPE (base);
1138           STRIP_USELESS_TYPE_CONVERSION (base);
1139           split_constant_offset (base, &base, &off);
1140           STRIP_USELESS_TYPE_CONVERSION (base);
1141           /* Fold the MEM_REF offset into the evolutions initial
1142              value to make more bases comparable.  */
1143           if (!integer_zerop (memoff))
1144             {
1145               off = size_binop (PLUS_EXPR, off,
1146                                 fold_convert (ssizetype, memoff));
1147               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1148             }
1149           /* Adjust the offset so it is a multiple of the access type
1150              size and thus we separate bases that can possibly be used
1151              to produce partial overlaps (which the access_fn machinery
1152              cannot handle).  */
1153           wide_int rem;
1154           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1155               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1156               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1157             rem = wi::mod_trunc
1158               (wi::to_wide (off),
1159                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1160                SIGNED);
1161           else
1162             /* If we can't compute the remainder simply force the initial
1163                condition to zero.  */
1164             rem = wi::to_wide (off);
1165           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1166           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1167           /* And finally replace the initial condition.  */
1168           access_fn = chrec_replace_initial_condition
1169               (access_fn, fold_convert (orig_type, off));
1170           /* ???  This is still not a suitable base object for
1171              dr_may_alias_p - the base object needs to be an
1172              access that covers the object as whole.  With
1173              an evolution in the pointer this cannot be
1174              guaranteed.
1175              As a band-aid, mark the access so we can special-case
1176              it in dr_may_alias_p.  */
1177           tree old = ref;
1178           ref = fold_build2_loc (EXPR_LOCATION (ref),
1179                                  MEM_REF, TREE_TYPE (ref),
1180                                  base, memoff);
1181           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1182           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1183           DR_UNCONSTRAINED_BASE (dr) = true;
1184           access_fns.safe_push (access_fn);
1185         }
1186     }
1187   else if (DECL_P (ref))
1188     {
1189       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1190       ref = build2 (MEM_REF, TREE_TYPE (ref),
1191                     build_fold_addr_expr (ref),
1192                     build_int_cst (reference_alias_ptr_type (ref), 0));
1193     }
1194
1195   DR_BASE_OBJECT (dr) = ref;
1196   DR_ACCESS_FNS (dr) = access_fns;
1197 }
1198
1199 /* Extracts the alias analysis information from the memory reference DR.  */
1200
1201 static void
1202 dr_analyze_alias (struct data_reference *dr)
1203 {
1204   tree ref = DR_REF (dr);
1205   tree base = get_base_address (ref), addr;
1206
1207   if (INDIRECT_REF_P (base)
1208       || TREE_CODE (base) == MEM_REF)
1209     {
1210       addr = TREE_OPERAND (base, 0);
1211       if (TREE_CODE (addr) == SSA_NAME)
1212         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1213     }
1214 }
1215
1216 /* Frees data reference DR.  */
1217
1218 void
1219 free_data_ref (data_reference_p dr)
1220 {
1221   DR_ACCESS_FNS (dr).release ();
1222   free (dr);
1223 }
1224
1225 /* Analyze memory reference MEMREF, which is accessed in STMT.
1226    The reference is a read if IS_READ is true, otherwise it is a write.
1227    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1228    within STMT, i.e. that it might not occur even if STMT is executed
1229    and runs to completion.
1230
1231    Return the data_reference description of MEMREF.  NEST is the outermost
1232    loop in which the reference should be instantiated, LOOP is the loop
1233    in which the data reference should be analyzed.  */
1234
1235 struct data_reference *
1236 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1237                  bool is_read, bool is_conditional_in_stmt)
1238 {
1239   struct data_reference *dr;
1240
1241   if (dump_file && (dump_flags & TDF_DETAILS))
1242     {
1243       fprintf (dump_file, "Creating dr for ");
1244       print_generic_expr (dump_file, memref, TDF_SLIM);
1245       fprintf (dump_file, "\n");
1246     }
1247
1248   dr = XCNEW (struct data_reference);
1249   DR_STMT (dr) = stmt;
1250   DR_REF (dr) = memref;
1251   DR_IS_READ (dr) = is_read;
1252   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1253
1254   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1255                         nest != NULL ? loop : NULL, stmt);
1256   dr_analyze_indices (dr, nest, loop);
1257   dr_analyze_alias (dr);
1258
1259   if (dump_file && (dump_flags & TDF_DETAILS))
1260     {
1261       unsigned i;
1262       fprintf (dump_file, "\tbase_address: ");
1263       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1264       fprintf (dump_file, "\n\toffset from base address: ");
1265       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1266       fprintf (dump_file, "\n\tconstant offset from base address: ");
1267       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1268       fprintf (dump_file, "\n\tstep: ");
1269       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1270       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1271       fprintf (dump_file, "\n\tbase misalignment: %d",
1272                DR_BASE_MISALIGNMENT (dr));
1273       fprintf (dump_file, "\n\toffset alignment: %d",
1274                DR_OFFSET_ALIGNMENT (dr));
1275       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1276       fprintf (dump_file, "\n\tbase_object: ");
1277       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1278       fprintf (dump_file, "\n");
1279       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1280         {
1281           fprintf (dump_file, "\tAccess function %d: ", i);
1282           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1283         }
1284     }
1285
1286   return dr;
1287 }
1288
1289 /*  A helper function computes order between two tree expressions T1 and T2.
1290     This is used in comparator functions sorting objects based on the order
1291     of tree expressions.  The function returns -1, 0, or 1.  */
1292
1293 int
1294 data_ref_compare_tree (tree t1, tree t2)
1295 {
1296   int i, cmp;
1297   enum tree_code code;
1298   char tclass;
1299
1300   if (t1 == t2)
1301     return 0;
1302   if (t1 == NULL)
1303     return -1;
1304   if (t2 == NULL)
1305     return 1;
1306
1307   STRIP_USELESS_TYPE_CONVERSION (t1);
1308   STRIP_USELESS_TYPE_CONVERSION (t2);
1309   if (t1 == t2)
1310     return 0;
1311
1312   if (TREE_CODE (t1) != TREE_CODE (t2)
1313       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1314     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1315
1316   code = TREE_CODE (t1);
1317   switch (code)
1318     {
1319     case INTEGER_CST:
1320       return tree_int_cst_compare (t1, t2);
1321
1322     case STRING_CST:
1323       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1324         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1325       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1326                      TREE_STRING_LENGTH (t1));
1327
1328     case SSA_NAME:
1329       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1330         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1331       break;
1332
1333     default:
1334       if (POLY_INT_CST_P (t1))
1335         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1336                                        wi::to_poly_widest (t2));
1337
1338       tclass = TREE_CODE_CLASS (code);
1339
1340       /* For decls, compare their UIDs.  */
1341       if (tclass == tcc_declaration)
1342         {
1343           if (DECL_UID (t1) != DECL_UID (t2))
1344             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1345           break;
1346         }
1347       /* For expressions, compare their operands recursively.  */
1348       else if (IS_EXPR_CODE_CLASS (tclass))
1349         {
1350           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1351             {
1352               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1353                                            TREE_OPERAND (t2, i));
1354               if (cmp != 0)
1355                 return cmp;
1356             }
1357         }
1358       else
1359         gcc_unreachable ();
1360     }
1361
1362   return 0;
1363 }
1364
1365 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1366    check.  */
1367
1368 opt_result
1369 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1370 {
1371   if (dump_enabled_p ())
1372     dump_printf (MSG_NOTE,
1373                  "consider run-time aliasing test between %T and %T\n",
1374                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1375
1376   if (!speed_p)
1377     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1378                                    "runtime alias check not supported when"
1379                                    " optimizing for size.\n");
1380
1381   /* FORNOW: We don't support versioning with outer-loop in either
1382      vectorization or loop distribution.  */
1383   if (loop != NULL && loop->inner != NULL)
1384     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1385                                    "runtime alias check not supported for"
1386                                    " outer loop.\n");
1387
1388   return opt_result::success ();
1389 }
1390
1391 /* Operator == between two dr_with_seg_len objects.
1392
1393    This equality operator is used to make sure two data refs
1394    are the same one so that we will consider to combine the
1395    aliasing checks of those two pairs of data dependent data
1396    refs.  */
1397
1398 static bool
1399 operator == (const dr_with_seg_len& d1,
1400              const dr_with_seg_len& d2)
1401 {
1402   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1403                            DR_BASE_ADDRESS (d2.dr), 0)
1404           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1405           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1406           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1407           && known_eq (d1.access_size, d2.access_size)
1408           && d1.align == d2.align);
1409 }
1410
1411 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1412    so that we can combine aliasing checks in one scan.  */
1413
1414 static int
1415 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1416 {
1417   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1418   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1419   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1420   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1421
1422   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1423      if a and c have the same basic address snd step, and b and d have the same
1424      address and step.  Therefore, if any a&c or b&d don't have the same address
1425      and step, we don't care the order of those two pairs after sorting.  */
1426   int comp_res;
1427
1428   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1429                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1430     return comp_res;
1431   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1432                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1433     return comp_res;
1434   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1435                                          DR_STEP (b1.dr))) != 0)
1436     return comp_res;
1437   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1438                                          DR_STEP (b2.dr))) != 0)
1439     return comp_res;
1440   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1441                                          DR_OFFSET (b1.dr))) != 0)
1442     return comp_res;
1443   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1444                                          DR_INIT (b1.dr))) != 0)
1445     return comp_res;
1446   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1447                                          DR_OFFSET (b2.dr))) != 0)
1448     return comp_res;
1449   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1450                                          DR_INIT (b2.dr))) != 0)
1451     return comp_res;
1452
1453   return 0;
1454 }
1455
1456 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1457    FACTOR is number of iterations that each data reference is accessed.
1458
1459    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1460    we create an expression:
1461
1462    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1463    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1464
1465    for aliasing checks.  However, in some cases we can decrease the number
1466    of checks by combining two checks into one.  For example, suppose we have
1467    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1468    condition is satisfied:
1469
1470    load_ptr_0 < load_ptr_1  &&
1471    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1472
1473    (this condition means, in each iteration of vectorized loop, the accessed
1474    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1475    load_ptr_1.)
1476
1477    we then can use only the following expression to finish the alising checks
1478    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1479
1480    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1481    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1482
1483    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1484    basic address.  */
1485
1486 void
1487 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1488                                poly_uint64)
1489 {
1490   /* Sort the collected data ref pairs so that we can scan them once to
1491      combine all possible aliasing checks.  */
1492   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1493
1494   /* Scan the sorted dr pairs and check if we can combine alias checks
1495      of two neighboring dr pairs.  */
1496   for (size_t i = 1; i < alias_pairs->length (); ++i)
1497     {
1498       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1499       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1500                       *dr_b1 = &(*alias_pairs)[i-1].second,
1501                       *dr_a2 = &(*alias_pairs)[i].first,
1502                       *dr_b2 = &(*alias_pairs)[i].second;
1503
1504       /* Remove duplicate data ref pairs.  */
1505       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1506         {
1507           if (dump_enabled_p ())
1508             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1509                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1510                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1511           alias_pairs->ordered_remove (i--);
1512           continue;
1513         }
1514
1515       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1516         {
1517           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1518              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1519           if (*dr_a1 == *dr_a2)
1520             {
1521               std::swap (dr_a1, dr_b1);
1522               std::swap (dr_a2, dr_b2);
1523             }
1524
1525           poly_int64 init_a1, init_a2;
1526           /* Only consider cases in which the distance between the initial
1527              DR_A1 and the initial DR_A2 is known at compile time.  */
1528           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1529                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1530               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1531                                    DR_OFFSET (dr_a2->dr), 0)
1532               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1533               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1534             continue;
1535
1536           /* Don't combine if we can't tell which one comes first.  */
1537           if (!ordered_p (init_a1, init_a2))
1538             continue;
1539
1540           /* Make sure dr_a1 starts left of dr_a2.  */
1541           if (maybe_gt (init_a1, init_a2))
1542             {
1543               std::swap (*dr_a1, *dr_a2);
1544               std::swap (init_a1, init_a2);
1545             }
1546
1547           /* Work out what the segment length would be if we did combine
1548              DR_A1 and DR_A2:
1549
1550              - If DR_A1 and DR_A2 have equal lengths, that length is
1551                also the combined length.
1552
1553              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1554                length is the lower bound on those lengths.
1555
1556              - If DR_A1 and DR_A2 both have positive lengths, the combined
1557                length is the upper bound on those lengths.
1558
1559              Other cases are unlikely to give a useful combination.
1560
1561              The lengths both have sizetype, so the sign is taken from
1562              the step instead.  */
1563           if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1564             {
1565               poly_uint64 seg_len_a1, seg_len_a2;
1566               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1567                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1568                 continue;
1569
1570               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1571               if (TREE_CODE (indicator_a) != INTEGER_CST)
1572                 continue;
1573
1574               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1575               if (TREE_CODE (indicator_b) != INTEGER_CST)
1576                 continue;
1577
1578               int sign_a = tree_int_cst_sgn (indicator_a);
1579               int sign_b = tree_int_cst_sgn (indicator_b);
1580
1581               poly_uint64 new_seg_len;
1582               if (sign_a <= 0 && sign_b <= 0)
1583                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1584               else if (sign_a >= 0 && sign_b >= 0)
1585                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1586               else
1587                 continue;
1588
1589               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1590                                               new_seg_len);
1591               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1592             }
1593
1594           /* This is always positive due to the swap above.  */
1595           poly_uint64 diff = init_a2 - init_a1;
1596
1597           /* The new check will start at DR_A1.  Make sure that its access
1598              size encompasses the initial DR_A2.  */
1599           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1600             {
1601               dr_a1->access_size = upper_bound (dr_a1->access_size,
1602                                                 diff + dr_a2->access_size);
1603               unsigned int new_align = known_alignment (dr_a1->access_size);
1604               dr_a1->align = MIN (dr_a1->align, new_align);
1605             }
1606           if (dump_enabled_p ())
1607             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1608                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1609                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1610           alias_pairs->ordered_remove (i);
1611           i--;
1612         }
1613     }
1614 }
1615
1616 /* Given LOOP's two data references and segment lengths described by DR_A
1617    and DR_B, create expression checking if the two addresses ranges intersect
1618    with each other based on index of the two addresses.  This can only be
1619    done if DR_A and DR_B referring to the same (array) object and the index
1620    is the only difference.  For example:
1621
1622                        DR_A                           DR_B
1623       data-ref         arr[i]                         arr[j]
1624       base_object      arr                            arr
1625       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1626
1627    The addresses and their index are like:
1628
1629         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1630      ------------------------------------------------------->
1631         |   |   |   |   |          |   |   |   |   |
1632      ------------------------------------------------------->
1633         i_0 ...         i_0+4      j_0 ...         j_0+4
1634
1635    We can create expression based on index rather than address:
1636
1637      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1638
1639    Note evolution step of index needs to be considered in comparison.  */
1640
1641 static bool
1642 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
1643                                      const dr_with_seg_len& dr_a,
1644                                      const dr_with_seg_len& dr_b)
1645 {
1646   if (integer_zerop (DR_STEP (dr_a.dr))
1647       || integer_zerop (DR_STEP (dr_b.dr))
1648       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1649     return false;
1650
1651   poly_uint64 seg_len1, seg_len2;
1652   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1653       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1654     return false;
1655
1656   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1657     return false;
1658
1659   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1660     return false;
1661
1662   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1663     return false;
1664
1665   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1666
1667   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1668   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1669   if (neg_step)
1670     {
1671       abs_step = -abs_step;
1672       seg_len1 = -seg_len1;
1673       seg_len2 = -seg_len2;
1674     }
1675   else
1676     {
1677       /* Include the access size in the length, so that we only have one
1678          tree addition below.  */
1679       seg_len1 += dr_a.access_size;
1680       seg_len2 += dr_b.access_size;
1681     }
1682
1683   /* Infer the number of iterations with which the memory segment is accessed
1684      by DR.  In other words, alias is checked if memory segment accessed by
1685      DR_A in some iterations intersect with memory segment accessed by DR_B
1686      in the same amount iterations.
1687      Note segnment length is a linear function of number of iterations with
1688      DR_STEP as the coefficient.  */
1689   poly_uint64 niter_len1, niter_len2;
1690   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1691       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1692     return false;
1693
1694   poly_uint64 niter_access1 = 0, niter_access2 = 0;
1695   if (neg_step)
1696     {
1697       /* Divide each access size by the byte step, rounding up.  */
1698       if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1699                             abs_step, &niter_access1)
1700           || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1701                                abs_step, &niter_access2))
1702         return false;
1703     }
1704
1705   unsigned int i;
1706   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1707     {
1708       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1709       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1710       /* Two indices must be the same if they are not scev, or not scev wrto
1711          current loop being vecorized.  */
1712       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1713           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1714           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1715           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1716         {
1717           if (operand_equal_p (access1, access2, 0))
1718             continue;
1719
1720           return false;
1721         }
1722       /* The two indices must have the same step.  */
1723       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1724         return false;
1725
1726       tree idx_step = CHREC_RIGHT (access1);
1727       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1728       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1729       /* Index must evaluate in the same direction as DR.  */
1730       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1731
1732       tree min1 = CHREC_LEFT (access1);
1733       tree min2 = CHREC_LEFT (access2);
1734       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1735         return false;
1736
1737       /* Ideally, alias can be checked against loop's control IV, but we
1738          need to prove linear mapping between control IV and reference
1739          index.  Although that should be true, we check against (array)
1740          index of data reference.  Like segment length, index length is
1741          linear function of the number of iterations with index_step as
1742          the coefficient, i.e, niter_len * idx_step.  */
1743       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1744                                    build_int_cst (TREE_TYPE (min1),
1745                                                   niter_len1));
1746       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1747                                    build_int_cst (TREE_TYPE (min2),
1748                                                   niter_len2));
1749       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1750       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1751       /* Adjust ranges for negative step.  */
1752       if (neg_step)
1753         {
1754           /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
1755           std::swap (min1, max1);
1756           std::swap (min2, max2);
1757
1758           /* As with the lengths just calculated, we've measured the access
1759              sizes in iterations, so multiply them by the index step.  */
1760           tree idx_access1
1761             = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1762                            build_int_cst (TREE_TYPE (min1), niter_access1));
1763           tree idx_access2
1764             = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1765                            build_int_cst (TREE_TYPE (min2), niter_access2));
1766
1767           /* MINUS_EXPR because the above values are negative.  */
1768           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1769           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1770         }
1771       tree part_cond_expr
1772         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1773             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1774             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1775       if (*cond_expr)
1776         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1777                                   *cond_expr, part_cond_expr);
1778       else
1779         *cond_expr = part_cond_expr;
1780     }
1781   return true;
1782 }
1783
1784 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1785    every address ADDR accessed by D:
1786
1787      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1788
1789    In this case, every element accessed by D is aligned to at least
1790    ALIGN bytes.
1791
1792    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1793
1794      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
1795
1796 static void
1797 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1798                      tree *seg_max_out, HOST_WIDE_INT align)
1799 {
1800   /* Each access has the following pattern:
1801
1802           <- |seg_len| ->
1803           <--- A: -ve step --->
1804           +-----+-------+-----+-------+-----+
1805           | n-1 | ,.... |  0  | ..... | n-1 |
1806           +-----+-------+-----+-------+-----+
1807                         <--- B: +ve step --->
1808                         <- |seg_len| ->
1809                         |
1810                    base address
1811
1812      where "n" is the number of scalar iterations covered by the segment.
1813      (This should be VF for a particular pair if we know that both steps
1814      are the same, otherwise it will be the full number of scalar loop
1815      iterations.)
1816
1817      A is the range of bytes accessed when the step is negative,
1818      B is the range when the step is positive.
1819
1820      If the access size is "access_size" bytes, the lowest addressed byte is:
1821
1822          base + (step < 0 ? seg_len : 0)   [LB]
1823
1824      and the highest addressed byte is always below:
1825
1826          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
1827
1828      Thus:
1829
1830          LB <= ADDR < UB
1831
1832      If ALIGN is nonzero, all three values are aligned to at least ALIGN
1833      bytes, so:
1834
1835          LB <= ADDR <= UB - ALIGN
1836
1837      where "- ALIGN" folds naturally with the "+ access_size" and often
1838      cancels it out.
1839
1840      We don't try to simplify LB and UB beyond this (e.g. by using
1841      MIN and MAX based on whether seg_len rather than the stride is
1842      negative) because it is possible for the absolute size of the
1843      segment to overflow the range of a ssize_t.
1844
1845      Keeping the pointer_plus outside of the cond_expr should allow
1846      the cond_exprs to be shared with other alias checks.  */
1847   tree indicator = dr_direction_indicator (d.dr);
1848   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1849                                fold_convert (ssizetype, indicator),
1850                                ssize_int (0));
1851   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1852                                             DR_OFFSET (d.dr));
1853   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1854   tree seg_len
1855     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1856
1857   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1858                                 seg_len, size_zero_node);
1859   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1860                                 size_zero_node, seg_len);
1861   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1862                            size_int (d.access_size - align));
1863
1864   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1865   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1866 }
1867
1868 /* Given two data references and segment lengths described by DR_A and DR_B,
1869    create expression checking if the two addresses ranges intersect with
1870    each other:
1871
1872      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1873      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1874
1875 static void
1876 create_intersect_range_checks (class loop *loop, tree *cond_expr,
1877                                const dr_with_seg_len& dr_a,
1878                                const dr_with_seg_len& dr_b)
1879 {
1880   *cond_expr = NULL_TREE;
1881   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1882     return;
1883
1884   unsigned HOST_WIDE_INT min_align;
1885   tree_code cmp_code;
1886   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1887       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1888     {
1889       /* In this case adding access_size to seg_len is likely to give
1890          a simple X * step, where X is either the number of scalar
1891          iterations or the vectorization factor.  We're better off
1892          keeping that, rather than subtracting an alignment from it.
1893
1894          In this case the maximum values are exclusive and so there is
1895          no alias if the maximum of one segment equals the minimum
1896          of another.  */
1897       min_align = 0;
1898       cmp_code = LE_EXPR;
1899     }
1900   else
1901     {
1902       /* Calculate the minimum alignment shared by all four pointers,
1903          then arrange for this alignment to be subtracted from the
1904          exclusive maximum values to get inclusive maximum values.
1905          This "- min_align" is cumulative with a "+ access_size"
1906          in the calculation of the maximum values.  In the best
1907          (and common) case, the two cancel each other out, leaving
1908          us with an inclusive bound based only on seg_len.  In the
1909          worst case we're simply adding a smaller number than before.
1910
1911          Because the maximum values are inclusive, there is an alias
1912          if the maximum value of one segment is equal to the minimum
1913          value of the other.  */
1914       min_align = MIN (dr_a.align, dr_b.align);
1915       cmp_code = LT_EXPR;
1916     }
1917
1918   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1919   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1920   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1921
1922   *cond_expr
1923     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1924         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1925         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1926 }
1927
1928 /* Create a conditional expression that represents the run-time checks for
1929    overlapping of address ranges represented by a list of data references
1930    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1931    COND_EXPR is the conditional expression to be used in the if statement
1932    that controls which version of the loop gets executed at runtime.  */
1933
1934 void
1935 create_runtime_alias_checks (class loop *loop,
1936                              vec<dr_with_seg_len_pair_t> *alias_pairs,
1937                              tree * cond_expr)
1938 {
1939   tree part_cond_expr;
1940
1941   fold_defer_overflow_warnings ();
1942   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1943     {
1944       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1945       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1946
1947       if (dump_enabled_p ())
1948         dump_printf (MSG_NOTE,
1949                      "create runtime check for data references %T and %T\n",
1950                      DR_REF (dr_a.dr), DR_REF (dr_b.dr));
1951
1952       /* Create condition expression for each pair data references.  */
1953       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1954       if (*cond_expr)
1955         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1956                                   *cond_expr, part_cond_expr);
1957       else
1958         *cond_expr = part_cond_expr;
1959     }
1960   fold_undefer_and_ignore_overflow_warnings ();
1961 }
1962
1963 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1964    expressions.  */
1965 static bool
1966 dr_equal_offsets_p1 (tree offset1, tree offset2)
1967 {
1968   bool res;
1969
1970   STRIP_NOPS (offset1);
1971   STRIP_NOPS (offset2);
1972
1973   if (offset1 == offset2)
1974     return true;
1975
1976   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1977       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1978     return false;
1979
1980   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1981                              TREE_OPERAND (offset2, 0));
1982
1983   if (!res || !BINARY_CLASS_P (offset1))
1984     return res;
1985
1986   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1987                              TREE_OPERAND (offset2, 1));
1988
1989   return res;
1990 }
1991
1992 /* Check if DRA and DRB have equal offsets.  */
1993 bool
1994 dr_equal_offsets_p (struct data_reference *dra,
1995                     struct data_reference *drb)
1996 {
1997   tree offset1, offset2;
1998
1999   offset1 = DR_OFFSET (dra);
2000   offset2 = DR_OFFSET (drb);
2001
2002   return dr_equal_offsets_p1 (offset1, offset2);
2003 }
2004
2005 /* Returns true if FNA == FNB.  */
2006
2007 static bool
2008 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2009 {
2010   unsigned i, n = fna.length ();
2011
2012   if (n != fnb.length ())
2013     return false;
2014
2015   for (i = 0; i < n; i++)
2016     if (!operand_equal_p (fna[i], fnb[i], 0))
2017       return false;
2018
2019   return true;
2020 }
2021
2022 /* If all the functions in CF are the same, returns one of them,
2023    otherwise returns NULL.  */
2024
2025 static affine_fn
2026 common_affine_function (conflict_function *cf)
2027 {
2028   unsigned i;
2029   affine_fn comm;
2030
2031   if (!CF_NONTRIVIAL_P (cf))
2032     return affine_fn ();
2033
2034   comm = cf->fns[0];
2035
2036   for (i = 1; i < cf->n; i++)
2037     if (!affine_function_equal_p (comm, cf->fns[i]))
2038       return affine_fn ();
2039
2040   return comm;
2041 }
2042
2043 /* Returns the base of the affine function FN.  */
2044
2045 static tree
2046 affine_function_base (affine_fn fn)
2047 {
2048   return fn[0];
2049 }
2050
2051 /* Returns true if FN is a constant.  */
2052
2053 static bool
2054 affine_function_constant_p (affine_fn fn)
2055 {
2056   unsigned i;
2057   tree coef;
2058
2059   for (i = 1; fn.iterate (i, &coef); i++)
2060     if (!integer_zerop (coef))
2061       return false;
2062
2063   return true;
2064 }
2065
2066 /* Returns true if FN is the zero constant function.  */
2067
2068 static bool
2069 affine_function_zero_p (affine_fn fn)
2070 {
2071   return (integer_zerop (affine_function_base (fn))
2072           && affine_function_constant_p (fn));
2073 }
2074
2075 /* Returns a signed integer type with the largest precision from TA
2076    and TB.  */
2077
2078 static tree
2079 signed_type_for_types (tree ta, tree tb)
2080 {
2081   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2082     return signed_type_for (ta);
2083   else
2084     return signed_type_for (tb);
2085 }
2086
2087 /* Applies operation OP on affine functions FNA and FNB, and returns the
2088    result.  */
2089
2090 static affine_fn
2091 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2092 {
2093   unsigned i, n, m;
2094   affine_fn ret;
2095   tree coef;
2096
2097   if (fnb.length () > fna.length ())
2098     {
2099       n = fna.length ();
2100       m = fnb.length ();
2101     }
2102   else
2103     {
2104       n = fnb.length ();
2105       m = fna.length ();
2106     }
2107
2108   ret.create (m);
2109   for (i = 0; i < n; i++)
2110     {
2111       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2112                                          TREE_TYPE (fnb[i]));
2113       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2114     }
2115
2116   for (; fna.iterate (i, &coef); i++)
2117     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2118                                  coef, integer_zero_node));
2119   for (; fnb.iterate (i, &coef); i++)
2120     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2121                                  integer_zero_node, coef));
2122
2123   return ret;
2124 }
2125
2126 /* Returns the sum of affine functions FNA and FNB.  */
2127
2128 static affine_fn
2129 affine_fn_plus (affine_fn fna, affine_fn fnb)
2130 {
2131   return affine_fn_op (PLUS_EXPR, fna, fnb);
2132 }
2133
2134 /* Returns the difference of affine functions FNA and FNB.  */
2135
2136 static affine_fn
2137 affine_fn_minus (affine_fn fna, affine_fn fnb)
2138 {
2139   return affine_fn_op (MINUS_EXPR, fna, fnb);
2140 }
2141
2142 /* Frees affine function FN.  */
2143
2144 static void
2145 affine_fn_free (affine_fn fn)
2146 {
2147   fn.release ();
2148 }
2149
2150 /* Determine for each subscript in the data dependence relation DDR
2151    the distance.  */
2152
2153 static void
2154 compute_subscript_distance (struct data_dependence_relation *ddr)
2155 {
2156   conflict_function *cf_a, *cf_b;
2157   affine_fn fn_a, fn_b, diff;
2158
2159   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2160     {
2161       unsigned int i;
2162
2163       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2164         {
2165           struct subscript *subscript;
2166
2167           subscript = DDR_SUBSCRIPT (ddr, i);
2168           cf_a = SUB_CONFLICTS_IN_A (subscript);
2169           cf_b = SUB_CONFLICTS_IN_B (subscript);
2170
2171           fn_a = common_affine_function (cf_a);
2172           fn_b = common_affine_function (cf_b);
2173           if (!fn_a.exists () || !fn_b.exists ())
2174             {
2175               SUB_DISTANCE (subscript) = chrec_dont_know;
2176               return;
2177             }
2178           diff = affine_fn_minus (fn_a, fn_b);
2179
2180           if (affine_function_constant_p (diff))
2181             SUB_DISTANCE (subscript) = affine_function_base (diff);
2182           else
2183             SUB_DISTANCE (subscript) = chrec_dont_know;
2184
2185           affine_fn_free (diff);
2186         }
2187     }
2188 }
2189
2190 /* Returns the conflict function for "unknown".  */
2191
2192 static conflict_function *
2193 conflict_fn_not_known (void)
2194 {
2195   conflict_function *fn = XCNEW (conflict_function);
2196   fn->n = NOT_KNOWN;
2197
2198   return fn;
2199 }
2200
2201 /* Returns the conflict function for "independent".  */
2202
2203 static conflict_function *
2204 conflict_fn_no_dependence (void)
2205 {
2206   conflict_function *fn = XCNEW (conflict_function);
2207   fn->n = NO_DEPENDENCE;
2208
2209   return fn;
2210 }
2211
2212 /* Returns true if the address of OBJ is invariant in LOOP.  */
2213
2214 static bool
2215 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2216 {
2217   while (handled_component_p (obj))
2218     {
2219       if (TREE_CODE (obj) == ARRAY_REF)
2220         {
2221           for (int i = 1; i < 4; ++i)
2222             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2223                                                         loop->num))
2224               return false;
2225         }
2226       else if (TREE_CODE (obj) == COMPONENT_REF)
2227         {
2228           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2229                                                       loop->num))
2230             return false;
2231         }
2232       obj = TREE_OPERAND (obj, 0);
2233     }
2234
2235   if (!INDIRECT_REF_P (obj)
2236       && TREE_CODE (obj) != MEM_REF)
2237     return true;
2238
2239   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2240                                                   loop->num);
2241 }
2242
2243 /* Returns false if we can prove that data references A and B do not alias,
2244    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2245    considered.  */
2246
2247 bool
2248 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2249                 class loop *loop_nest)
2250 {
2251   tree addr_a = DR_BASE_OBJECT (a);
2252   tree addr_b = DR_BASE_OBJECT (b);
2253
2254   /* If we are not processing a loop nest but scalar code we
2255      do not need to care about possible cross-iteration dependences
2256      and thus can process the full original reference.  Do so,
2257      similar to how loop invariant motion applies extra offset-based
2258      disambiguation.  */
2259   if (!loop_nest)
2260     {
2261       aff_tree off1, off2;
2262       poly_widest_int size1, size2;
2263       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2264       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2265       aff_combination_scale (&off1, -1);
2266       aff_combination_add (&off2, &off1);
2267       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2268         return false;
2269     }
2270
2271   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2272       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2273       /* For cross-iteration dependences the cliques must be valid for the
2274          whole loop, not just individual iterations.  */
2275       && (!loop_nest
2276           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2277           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2278       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2279       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2280     return false;
2281
2282   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2283      do not know the size of the base-object.  So we cannot do any
2284      offset/overlap based analysis but have to rely on points-to
2285      information only.  */
2286   if (TREE_CODE (addr_a) == MEM_REF
2287       && (DR_UNCONSTRAINED_BASE (a)
2288           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2289     {
2290       /* For true dependences we can apply TBAA.  */
2291       if (flag_strict_aliasing
2292           && DR_IS_WRITE (a) && DR_IS_READ (b)
2293           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2294                                      get_alias_set (DR_REF (b))))
2295         return false;
2296       if (TREE_CODE (addr_b) == MEM_REF)
2297         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2298                                        TREE_OPERAND (addr_b, 0));
2299       else
2300         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2301                                        build_fold_addr_expr (addr_b));
2302     }
2303   else if (TREE_CODE (addr_b) == MEM_REF
2304            && (DR_UNCONSTRAINED_BASE (b)
2305                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2306     {
2307       /* For true dependences we can apply TBAA.  */
2308       if (flag_strict_aliasing
2309           && DR_IS_WRITE (a) && DR_IS_READ (b)
2310           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2311                                      get_alias_set (DR_REF (b))))
2312         return false;
2313       if (TREE_CODE (addr_a) == MEM_REF)
2314         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2315                                        TREE_OPERAND (addr_b, 0));
2316       else
2317         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2318                                        TREE_OPERAND (addr_b, 0));
2319     }
2320
2321   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2322      that is being subsetted in the loop nest.  */
2323   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2324     return refs_output_dependent_p (addr_a, addr_b);
2325   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2326     return refs_anti_dependent_p (addr_a, addr_b);
2327   return refs_may_alias_p (addr_a, addr_b);
2328 }
2329
2330 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2331    if it is meaningful to compare their associated access functions
2332    when checking for dependencies.  */
2333
2334 static bool
2335 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2336 {
2337   /* Allow pairs of component refs from the following sets:
2338
2339        { REALPART_EXPR, IMAGPART_EXPR }
2340        { COMPONENT_REF }
2341        { ARRAY_REF }.  */
2342   tree_code code_a = TREE_CODE (ref_a);
2343   tree_code code_b = TREE_CODE (ref_b);
2344   if (code_a == IMAGPART_EXPR)
2345     code_a = REALPART_EXPR;
2346   if (code_b == IMAGPART_EXPR)
2347     code_b = REALPART_EXPR;
2348   if (code_a != code_b)
2349     return false;
2350
2351   if (TREE_CODE (ref_a) == COMPONENT_REF)
2352     /* ??? We cannot simply use the type of operand #0 of the refs here as
2353        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2354        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2355     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2356             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2357
2358   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2359                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2360 }
2361
2362 /* Initialize a data dependence relation between data accesses A and
2363    B.  NB_LOOPS is the number of loops surrounding the references: the
2364    size of the classic distance/direction vectors.  */
2365
2366 struct data_dependence_relation *
2367 initialize_data_dependence_relation (struct data_reference *a,
2368                                      struct data_reference *b,
2369                                      vec<loop_p> loop_nest)
2370 {
2371   struct data_dependence_relation *res;
2372   unsigned int i;
2373
2374   res = XCNEW (struct data_dependence_relation);
2375   DDR_A (res) = a;
2376   DDR_B (res) = b;
2377   DDR_LOOP_NEST (res).create (0);
2378   DDR_SUBSCRIPTS (res).create (0);
2379   DDR_DIR_VECTS (res).create (0);
2380   DDR_DIST_VECTS (res).create (0);
2381
2382   if (a == NULL || b == NULL)
2383     {
2384       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2385       return res;
2386     }
2387
2388   /* If the data references do not alias, then they are independent.  */
2389   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
2390     {
2391       DDR_ARE_DEPENDENT (res) = chrec_known;
2392       return res;
2393     }
2394
2395   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2396   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2397   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2398     {
2399       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2400       return res;
2401     }
2402
2403   /* For unconstrained bases, the root (highest-indexed) subscript
2404      describes a variation in the base of the original DR_REF rather
2405      than a component access.  We have no type that accurately describes
2406      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2407      applying this subscript) so limit the search to the last real
2408      component access.
2409
2410      E.g. for:
2411
2412         void
2413         f (int a[][8], int b[][8])
2414         {
2415           for (int i = 0; i < 8; ++i)
2416             a[i * 2][0] = b[i][0];
2417         }
2418
2419      the a and b accesses have a single ARRAY_REF component reference [0]
2420      but have two subscripts.  */
2421   if (DR_UNCONSTRAINED_BASE (a))
2422     num_dimensions_a -= 1;
2423   if (DR_UNCONSTRAINED_BASE (b))
2424     num_dimensions_b -= 1;
2425
2426   /* These structures describe sequences of component references in
2427      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2428      specific access function.  */
2429   struct {
2430     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2431        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2432        indices.  In C notation, these are the indices of the rightmost
2433        component references; e.g. for a sequence .b.c.d, the start
2434        index is for .d.  */
2435     unsigned int start_a;
2436     unsigned int start_b;
2437
2438     /* The sequence contains LENGTH consecutive access functions from
2439        each DR.  */
2440     unsigned int length;
2441
2442     /* The enclosing objects for the A and B sequences respectively,
2443        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2444        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2445     tree object_a;
2446     tree object_b;
2447   } full_seq = {}, struct_seq = {};
2448
2449   /* Before each iteration of the loop:
2450
2451      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2452      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2453   unsigned int index_a = 0;
2454   unsigned int index_b = 0;
2455   tree ref_a = DR_REF (a);
2456   tree ref_b = DR_REF (b);
2457
2458   /* Now walk the component references from the final DR_REFs back up to
2459      the enclosing base objects.  Each component reference corresponds
2460      to one access function in the DR, with access function 0 being for
2461      the final DR_REF and the highest-indexed access function being the
2462      one that is applied to the base of the DR.
2463
2464      Look for a sequence of component references whose access functions
2465      are comparable (see access_fn_components_comparable_p).  If more
2466      than one such sequence exists, pick the one nearest the base
2467      (which is the leftmost sequence in C notation).  Store this sequence
2468      in FULL_SEQ.
2469
2470      For example, if we have:
2471
2472         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2473
2474         A: a[0][i].s.c.d
2475         B: __real b[0][i].s.e[i].f
2476
2477      (where d is the same type as the real component of f) then the access
2478      functions would be:
2479
2480                          0   1   2   3
2481         A:              .d  .c  .s [i]
2482
2483                  0   1   2   3   4   5
2484         B:  __real  .f [i]  .e  .s [i]
2485
2486      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2487      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2488      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2489      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2490      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2491      index foo[10] arrays, so is again comparable.  The sequence is
2492      therefore:
2493
2494         A: [1, 3]  (i.e. [i].s.c)
2495         B: [3, 5]  (i.e. [i].s.e)
2496
2497      Also look for sequences of component references whose access
2498      functions are comparable and whose enclosing objects have the same
2499      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2500      example, STRUCT_SEQ would be:
2501
2502         A: [1, 2]  (i.e. s.c)
2503         B: [3, 4]  (i.e. s.e)  */
2504   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2505     {
2506       /* REF_A and REF_B must be one of the component access types
2507          allowed by dr_analyze_indices.  */
2508       gcc_checking_assert (access_fn_component_p (ref_a));
2509       gcc_checking_assert (access_fn_component_p (ref_b));
2510
2511       /* Get the immediately-enclosing objects for REF_A and REF_B,
2512          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2513          and DR_ACCESS_FN (B, INDEX_B).  */
2514       tree object_a = TREE_OPERAND (ref_a, 0);
2515       tree object_b = TREE_OPERAND (ref_b, 0);
2516
2517       tree type_a = TREE_TYPE (object_a);
2518       tree type_b = TREE_TYPE (object_b);
2519       if (access_fn_components_comparable_p (ref_a, ref_b))
2520         {
2521           /* This pair of component accesses is comparable for dependence
2522              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2523              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2524           if (full_seq.start_a + full_seq.length != index_a
2525               || full_seq.start_b + full_seq.length != index_b)
2526             {
2527               /* The accesses don't extend the current sequence,
2528                  so start a new one here.  */
2529               full_seq.start_a = index_a;
2530               full_seq.start_b = index_b;
2531               full_seq.length = 0;
2532             }
2533
2534           /* Add this pair of references to the sequence.  */
2535           full_seq.length += 1;
2536           full_seq.object_a = object_a;
2537           full_seq.object_b = object_b;
2538
2539           /* If the enclosing objects are structures (and thus have the
2540              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2541           if (TREE_CODE (type_a) == RECORD_TYPE)
2542             struct_seq = full_seq;
2543
2544           /* Move to the next containing reference for both A and B.  */
2545           ref_a = object_a;
2546           ref_b = object_b;
2547           index_a += 1;
2548           index_b += 1;
2549           continue;
2550         }
2551
2552       /* Try to approach equal type sizes.  */
2553       if (!COMPLETE_TYPE_P (type_a)
2554           || !COMPLETE_TYPE_P (type_b)
2555           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2556           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2557         break;
2558
2559       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2560       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2561       if (size_a <= size_b)
2562         {
2563           index_a += 1;
2564           ref_a = object_a;
2565         }
2566       if (size_b <= size_a)
2567         {
2568           index_b += 1;
2569           ref_b = object_b;
2570         }
2571     }
2572
2573   /* See whether FULL_SEQ ends at the base and whether the two bases
2574      are equal.  We do not care about TBAA or alignment info so we can
2575      use OEP_ADDRESS_OF to avoid false negatives.  */
2576   tree base_a = DR_BASE_OBJECT (a);
2577   tree base_b = DR_BASE_OBJECT (b);
2578   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2579                       && full_seq.start_b + full_seq.length == num_dimensions_b
2580                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2581                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2582                       && types_compatible_p (TREE_TYPE (base_a),
2583                                              TREE_TYPE (base_b))
2584                       && (!loop_nest.exists ()
2585                           || (object_address_invariant_in_loop_p
2586                               (loop_nest[0], base_a))));
2587
2588   /* If the bases are the same, we can include the base variation too.
2589      E.g. the b accesses in:
2590
2591        for (int i = 0; i < n; ++i)
2592          b[i + 4][0] = b[i][0];
2593
2594      have a definite dependence distance of 4, while for:
2595
2596        for (int i = 0; i < n; ++i)
2597          a[i + 4][0] = b[i][0];
2598
2599      the dependence distance depends on the gap between a and b.
2600
2601      If the bases are different then we can only rely on the sequence
2602      rooted at a structure access, since arrays are allowed to overlap
2603      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2604      valid code:
2605
2606        int a[256];
2607        ...
2608        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2609
2610      where two lvalues with the same int[4][3] type overlap, and where
2611      both lvalues are distinct from the object's declared type.  */
2612   if (same_base_p)
2613     {
2614       if (DR_UNCONSTRAINED_BASE (a))
2615         full_seq.length += 1;
2616     }
2617   else
2618     full_seq = struct_seq;
2619
2620   /* Punt if we didn't find a suitable sequence.  */
2621   if (full_seq.length == 0)
2622     {
2623       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2624       return res;
2625     }
2626
2627   if (!same_base_p)
2628     {
2629       /* Partial overlap is possible for different bases when strict aliasing
2630          is not in effect.  It's also possible if either base involves a union
2631          access; e.g. for:
2632
2633            struct s1 { int a[2]; };
2634            struct s2 { struct s1 b; int c; };
2635            struct s3 { int d; struct s1 e; };
2636            union u { struct s2 f; struct s3 g; } *p, *q;
2637
2638          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2639          "p->g.e" (base "p->g") and might partially overlap the s1 at
2640          "q->g.e" (base "q->g").  */
2641       if (!flag_strict_aliasing
2642           || ref_contains_union_access_p (full_seq.object_a)
2643           || ref_contains_union_access_p (full_seq.object_b))
2644         {
2645           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2646           return res;
2647         }
2648
2649       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2650       if (!loop_nest.exists ()
2651           || (object_address_invariant_in_loop_p (loop_nest[0],
2652                                                   full_seq.object_a)
2653               && object_address_invariant_in_loop_p (loop_nest[0],
2654                                                      full_seq.object_b)))
2655         {
2656           DDR_OBJECT_A (res) = full_seq.object_a;
2657           DDR_OBJECT_B (res) = full_seq.object_b;
2658         }
2659     }
2660
2661   DDR_AFFINE_P (res) = true;
2662   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2663   DDR_SUBSCRIPTS (res).create (full_seq.length);
2664   DDR_LOOP_NEST (res) = loop_nest;
2665   DDR_SELF_REFERENCE (res) = false;
2666
2667   for (i = 0; i < full_seq.length; ++i)
2668     {
2669       struct subscript *subscript;
2670
2671       subscript = XNEW (struct subscript);
2672       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2673       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2674       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2675       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2676       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2677       SUB_DISTANCE (subscript) = chrec_dont_know;
2678       DDR_SUBSCRIPTS (res).safe_push (subscript);
2679     }
2680
2681   return res;
2682 }
2683
2684 /* Frees memory used by the conflict function F.  */
2685
2686 static void
2687 free_conflict_function (conflict_function *f)
2688 {
2689   unsigned i;
2690
2691   if (CF_NONTRIVIAL_P (f))
2692     {
2693       for (i = 0; i < f->n; i++)
2694         affine_fn_free (f->fns[i]);
2695     }
2696   free (f);
2697 }
2698
2699 /* Frees memory used by SUBSCRIPTS.  */
2700
2701 static void
2702 free_subscripts (vec<subscript_p> subscripts)
2703 {
2704   unsigned i;
2705   subscript_p s;
2706
2707   FOR_EACH_VEC_ELT (subscripts, i, s)
2708     {
2709       free_conflict_function (s->conflicting_iterations_in_a);
2710       free_conflict_function (s->conflicting_iterations_in_b);
2711       free (s);
2712     }
2713   subscripts.release ();
2714 }
2715
2716 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2717    description.  */
2718
2719 static inline void
2720 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2721                         tree chrec)
2722 {
2723   DDR_ARE_DEPENDENT (ddr) = chrec;
2724   free_subscripts (DDR_SUBSCRIPTS (ddr));
2725   DDR_SUBSCRIPTS (ddr).create (0);
2726 }
2727
2728 /* The dependence relation DDR cannot be represented by a distance
2729    vector.  */
2730
2731 static inline void
2732 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2733 {
2734   if (dump_file && (dump_flags & TDF_DETAILS))
2735     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2736
2737   DDR_AFFINE_P (ddr) = false;
2738 }
2739
2740 \f
2741
2742 /* This section contains the classic Banerjee tests.  */
2743
2744 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2745    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2746
2747 static inline bool
2748 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2749 {
2750   return (evolution_function_is_constant_p (chrec_a)
2751           && evolution_function_is_constant_p (chrec_b));
2752 }
2753
2754 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2755    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2756
2757 static bool
2758 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2759 {
2760   if ((evolution_function_is_constant_p (chrec_a)
2761        && evolution_function_is_univariate_p (chrec_b))
2762       || (evolution_function_is_constant_p (chrec_b)
2763           && evolution_function_is_univariate_p (chrec_a)))
2764     return true;
2765
2766   if (evolution_function_is_univariate_p (chrec_a)
2767       && evolution_function_is_univariate_p (chrec_b))
2768     {
2769       switch (TREE_CODE (chrec_a))
2770         {
2771         case POLYNOMIAL_CHREC:
2772           switch (TREE_CODE (chrec_b))
2773             {
2774             case POLYNOMIAL_CHREC:
2775               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2776                 return false;
2777               /* FALLTHRU */
2778
2779             default:
2780               return true;
2781             }
2782
2783         default:
2784           return true;
2785         }
2786     }
2787
2788   return false;
2789 }
2790
2791 /* Creates a conflict function with N dimensions.  The affine functions
2792    in each dimension follow.  */
2793
2794 static conflict_function *
2795 conflict_fn (unsigned n, ...)
2796 {
2797   unsigned i;
2798   conflict_function *ret = XCNEW (conflict_function);
2799   va_list ap;
2800
2801   gcc_assert (n > 0 && n <= MAX_DIM);
2802   va_start (ap, n);
2803
2804   ret->n = n;
2805   for (i = 0; i < n; i++)
2806     ret->fns[i] = va_arg (ap, affine_fn);
2807   va_end (ap);
2808
2809   return ret;
2810 }
2811
2812 /* Returns constant affine function with value CST.  */
2813
2814 static affine_fn
2815 affine_fn_cst (tree cst)
2816 {
2817   affine_fn fn;
2818   fn.create (1);
2819   fn.quick_push (cst);
2820   return fn;
2821 }
2822
2823 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2824
2825 static affine_fn
2826 affine_fn_univar (tree cst, unsigned dim, tree coef)
2827 {
2828   affine_fn fn;
2829   fn.create (dim + 1);
2830   unsigned i;
2831
2832   gcc_assert (dim > 0);
2833   fn.quick_push (cst);
2834   for (i = 1; i < dim; i++)
2835     fn.quick_push (integer_zero_node);
2836   fn.quick_push (coef);
2837   return fn;
2838 }
2839
2840 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2841    *OVERLAPS_B are initialized to the functions that describe the
2842    relation between the elements accessed twice by CHREC_A and
2843    CHREC_B.  For k >= 0, the following property is verified:
2844
2845    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2846
2847 static void
2848 analyze_ziv_subscript (tree chrec_a,
2849                        tree chrec_b,
2850                        conflict_function **overlaps_a,
2851                        conflict_function **overlaps_b,
2852                        tree *last_conflicts)
2853 {
2854   tree type, difference;
2855   dependence_stats.num_ziv++;
2856
2857   if (dump_file && (dump_flags & TDF_DETAILS))
2858     fprintf (dump_file, "(analyze_ziv_subscript \n");
2859
2860   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2861   chrec_a = chrec_convert (type, chrec_a, NULL);
2862   chrec_b = chrec_convert (type, chrec_b, NULL);
2863   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2864
2865   switch (TREE_CODE (difference))
2866     {
2867     case INTEGER_CST:
2868       if (integer_zerop (difference))
2869         {
2870           /* The difference is equal to zero: the accessed index
2871              overlaps for each iteration in the loop.  */
2872           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2873           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2874           *last_conflicts = chrec_dont_know;
2875           dependence_stats.num_ziv_dependent++;
2876         }
2877       else
2878         {
2879           /* The accesses do not overlap.  */
2880           *overlaps_a = conflict_fn_no_dependence ();
2881           *overlaps_b = conflict_fn_no_dependence ();
2882           *last_conflicts = integer_zero_node;
2883           dependence_stats.num_ziv_independent++;
2884         }
2885       break;
2886
2887     default:
2888       /* We're not sure whether the indexes overlap.  For the moment,
2889          conservatively answer "don't know".  */
2890       if (dump_file && (dump_flags & TDF_DETAILS))
2891         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2892
2893       *overlaps_a = conflict_fn_not_known ();
2894       *overlaps_b = conflict_fn_not_known ();
2895       *last_conflicts = chrec_dont_know;
2896       dependence_stats.num_ziv_unimplemented++;
2897       break;
2898     }
2899
2900   if (dump_file && (dump_flags & TDF_DETAILS))
2901     fprintf (dump_file, ")\n");
2902 }
2903
2904 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2905    and only if it fits to the int type.  If this is not the case, or the
2906    bound  on the number of iterations of LOOP could not be derived, returns
2907    chrec_dont_know.  */
2908
2909 static tree
2910 max_stmt_executions_tree (class loop *loop)
2911 {
2912   widest_int nit;
2913
2914   if (!max_stmt_executions (loop, &nit))
2915     return chrec_dont_know;
2916
2917   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2918     return chrec_dont_know;
2919
2920   return wide_int_to_tree (unsigned_type_node, nit);
2921 }
2922
2923 /* Determine whether the CHREC is always positive/negative.  If the expression
2924    cannot be statically analyzed, return false, otherwise set the answer into
2925    VALUE.  */
2926
2927 static bool
2928 chrec_is_positive (tree chrec, bool *value)
2929 {
2930   bool value0, value1, value2;
2931   tree end_value, nb_iter;
2932
2933   switch (TREE_CODE (chrec))
2934     {
2935     case POLYNOMIAL_CHREC:
2936       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2937           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2938         return false;
2939
2940       /* FIXME -- overflows.  */
2941       if (value0 == value1)
2942         {
2943           *value = value0;
2944           return true;
2945         }
2946
2947       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2948          and the proof consists in showing that the sign never
2949          changes during the execution of the loop, from 0 to
2950          loop->nb_iterations.  */
2951       if (!evolution_function_is_affine_p (chrec))
2952         return false;
2953
2954       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2955       if (chrec_contains_undetermined (nb_iter))
2956         return false;
2957
2958 #if 0
2959       /* TODO -- If the test is after the exit, we may decrease the number of
2960          iterations by one.  */
2961       if (after_exit)
2962         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2963 #endif
2964
2965       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2966
2967       if (!chrec_is_positive (end_value, &value2))
2968         return false;
2969
2970       *value = value0;
2971       return value0 == value1;
2972
2973     case INTEGER_CST:
2974       switch (tree_int_cst_sgn (chrec))
2975         {
2976         case -1:
2977           *value = false;
2978           break;
2979         case 1:
2980           *value = true;
2981           break;
2982         default:
2983           return false;
2984         }
2985       return true;
2986
2987     default:
2988       return false;
2989     }
2990 }
2991
2992
2993 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2994    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
2995    *OVERLAPS_B are initialized to the functions that describe the
2996    relation between the elements accessed twice by CHREC_A and
2997    CHREC_B.  For k >= 0, the following property is verified:
2998
2999    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3000
3001 static void
3002 analyze_siv_subscript_cst_affine (tree chrec_a,
3003                                   tree chrec_b,
3004                                   conflict_function **overlaps_a,
3005                                   conflict_function **overlaps_b,
3006                                   tree *last_conflicts)
3007 {
3008   bool value0, value1, value2;
3009   tree type, difference, tmp;
3010
3011   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3012   chrec_a = chrec_convert (type, chrec_a, NULL);
3013   chrec_b = chrec_convert (type, chrec_b, NULL);
3014   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3015
3016   /* Special case overlap in the first iteration.  */
3017   if (integer_zerop (difference))
3018     {
3019       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3020       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3021       *last_conflicts = integer_one_node;
3022       return;
3023     }
3024
3025   if (!chrec_is_positive (initial_condition (difference), &value0))
3026     {
3027       if (dump_file && (dump_flags & TDF_DETAILS))
3028         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3029
3030       dependence_stats.num_siv_unimplemented++;
3031       *overlaps_a = conflict_fn_not_known ();
3032       *overlaps_b = conflict_fn_not_known ();
3033       *last_conflicts = chrec_dont_know;
3034       return;
3035     }
3036   else
3037     {
3038       if (value0 == false)
3039         {
3040           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3041               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3042             {
3043               if (dump_file && (dump_flags & TDF_DETAILS))
3044                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3045
3046               *overlaps_a = conflict_fn_not_known ();
3047               *overlaps_b = conflict_fn_not_known ();
3048               *last_conflicts = chrec_dont_know;
3049               dependence_stats.num_siv_unimplemented++;
3050               return;
3051             }
3052           else
3053             {
3054               if (value1 == true)
3055                 {
3056                   /* Example:
3057                      chrec_a = 12
3058                      chrec_b = {10, +, 1}
3059                   */
3060
3061                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3062                     {
3063                       HOST_WIDE_INT numiter;
3064                       class loop *loop = get_chrec_loop (chrec_b);
3065
3066                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3067                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3068                                          fold_build1 (ABS_EXPR, type, difference),
3069                                          CHREC_RIGHT (chrec_b));
3070                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3071                       *last_conflicts = integer_one_node;
3072
3073
3074                       /* Perform weak-zero siv test to see if overlap is
3075                          outside the loop bounds.  */
3076                       numiter = max_stmt_executions_int (loop);
3077
3078                       if (numiter >= 0
3079                           && compare_tree_int (tmp, numiter) > 0)
3080                         {
3081                           free_conflict_function (*overlaps_a);
3082                           free_conflict_function (*overlaps_b);
3083                           *overlaps_a = conflict_fn_no_dependence ();
3084                           *overlaps_b = conflict_fn_no_dependence ();
3085                           *last_conflicts = integer_zero_node;
3086                           dependence_stats.num_siv_independent++;
3087                           return;
3088                         }
3089                       dependence_stats.num_siv_dependent++;
3090                       return;
3091                     }
3092
3093                   /* When the step does not divide the difference, there are
3094                      no overlaps.  */
3095                   else
3096                     {
3097                       *overlaps_a = conflict_fn_no_dependence ();
3098                       *overlaps_b = conflict_fn_no_dependence ();
3099                       *last_conflicts = integer_zero_node;
3100                       dependence_stats.num_siv_independent++;
3101                       return;
3102                     }
3103                 }
3104
3105               else
3106                 {
3107                   /* Example:
3108                      chrec_a = 12
3109                      chrec_b = {10, +, -1}
3110
3111                      In this case, chrec_a will not overlap with chrec_b.  */
3112                   *overlaps_a = conflict_fn_no_dependence ();
3113                   *overlaps_b = conflict_fn_no_dependence ();
3114                   *last_conflicts = integer_zero_node;
3115                   dependence_stats.num_siv_independent++;
3116                   return;
3117                 }
3118             }
3119         }
3120       else
3121         {
3122           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3123               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3124             {
3125               if (dump_file && (dump_flags & TDF_DETAILS))
3126                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3127
3128               *overlaps_a = conflict_fn_not_known ();
3129               *overlaps_b = conflict_fn_not_known ();
3130               *last_conflicts = chrec_dont_know;
3131               dependence_stats.num_siv_unimplemented++;
3132               return;
3133             }
3134           else
3135             {
3136               if (value2 == false)
3137                 {
3138                   /* Example:
3139                      chrec_a = 3
3140                      chrec_b = {10, +, -1}
3141                   */
3142                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3143                     {
3144                       HOST_WIDE_INT numiter;
3145                       class loop *loop = get_chrec_loop (chrec_b);
3146
3147                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3148                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3149                                          CHREC_RIGHT (chrec_b));
3150                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3151                       *last_conflicts = integer_one_node;
3152
3153                       /* Perform weak-zero siv test to see if overlap is
3154                          outside the loop bounds.  */
3155                       numiter = max_stmt_executions_int (loop);
3156
3157                       if (numiter >= 0
3158                           && compare_tree_int (tmp, numiter) > 0)
3159                         {
3160                           free_conflict_function (*overlaps_a);
3161                           free_conflict_function (*overlaps_b);
3162                           *overlaps_a = conflict_fn_no_dependence ();
3163                           *overlaps_b = conflict_fn_no_dependence ();
3164                           *last_conflicts = integer_zero_node;
3165                           dependence_stats.num_siv_independent++;
3166                           return;
3167                         }
3168                       dependence_stats.num_siv_dependent++;
3169                       return;
3170                     }
3171
3172                   /* When the step does not divide the difference, there
3173                      are no overlaps.  */
3174                   else
3175                     {
3176                       *overlaps_a = conflict_fn_no_dependence ();
3177                       *overlaps_b = conflict_fn_no_dependence ();
3178                       *last_conflicts = integer_zero_node;
3179                       dependence_stats.num_siv_independent++;
3180                       return;
3181                     }
3182                 }
3183               else
3184                 {
3185                   /* Example:
3186                      chrec_a = 3
3187                      chrec_b = {4, +, 1}
3188
3189                      In this case, chrec_a will not overlap with chrec_b.  */
3190                   *overlaps_a = conflict_fn_no_dependence ();
3191                   *overlaps_b = conflict_fn_no_dependence ();
3192                   *last_conflicts = integer_zero_node;
3193                   dependence_stats.num_siv_independent++;
3194                   return;
3195                 }
3196             }
3197         }
3198     }
3199 }
3200
3201 /* Helper recursive function for initializing the matrix A.  Returns
3202    the initial value of CHREC.  */
3203
3204 static tree
3205 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3206 {
3207   gcc_assert (chrec);
3208
3209   switch (TREE_CODE (chrec))
3210     {
3211     case POLYNOMIAL_CHREC:
3212       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3213         return chrec_dont_know;
3214       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3215       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3216
3217     case PLUS_EXPR:
3218     case MULT_EXPR:
3219     case MINUS_EXPR:
3220       {
3221         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3222         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3223
3224         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3225       }
3226
3227     CASE_CONVERT:
3228       {
3229         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3230         return chrec_convert (chrec_type (chrec), op, NULL);
3231       }
3232
3233     case BIT_NOT_EXPR:
3234       {
3235         /* Handle ~X as -1 - X.  */
3236         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3237         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3238                               build_int_cst (TREE_TYPE (chrec), -1), op);
3239       }
3240
3241     case INTEGER_CST:
3242       return chrec;
3243
3244     default:
3245       gcc_unreachable ();
3246       return NULL_TREE;
3247     }
3248 }
3249
3250 #define FLOOR_DIV(x,y) ((x) / (y))
3251
3252 /* Solves the special case of the Diophantine equation:
3253    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3254
3255    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3256    number of iterations that loops X and Y run.  The overlaps will be
3257    constructed as evolutions in dimension DIM.  */
3258
3259 static void
3260 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3261                                          HOST_WIDE_INT step_a,
3262                                          HOST_WIDE_INT step_b,
3263                                          affine_fn *overlaps_a,
3264                                          affine_fn *overlaps_b,
3265                                          tree *last_conflicts, int dim)
3266 {
3267   if (((step_a > 0 && step_b > 0)
3268        || (step_a < 0 && step_b < 0)))
3269     {
3270       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3271       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3272
3273       gcd_steps_a_b = gcd (step_a, step_b);
3274       step_overlaps_a = step_b / gcd_steps_a_b;
3275       step_overlaps_b = step_a / gcd_steps_a_b;
3276
3277       if (niter > 0)
3278         {
3279           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3280           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3281           last_conflict = tau2;
3282           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3283         }
3284       else
3285         *last_conflicts = chrec_dont_know;
3286
3287       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3288                                       build_int_cst (NULL_TREE,
3289                                                      step_overlaps_a));
3290       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3291                                       build_int_cst (NULL_TREE,
3292                                                      step_overlaps_b));
3293     }
3294
3295   else
3296     {
3297       *overlaps_a = affine_fn_cst (integer_zero_node);
3298       *overlaps_b = affine_fn_cst (integer_zero_node);
3299       *last_conflicts = integer_zero_node;
3300     }
3301 }
3302
3303 /* Solves the special case of a Diophantine equation where CHREC_A is
3304    an affine bivariate function, and CHREC_B is an affine univariate
3305    function.  For example,
3306
3307    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3308
3309    has the following overlapping functions:
3310
3311    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3312    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3313    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3314
3315    FORNOW: This is a specialized implementation for a case occurring in
3316    a common benchmark.  Implement the general algorithm.  */
3317
3318 static void
3319 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3320                                       conflict_function **overlaps_a,
3321                                       conflict_function **overlaps_b,
3322                                       tree *last_conflicts)
3323 {
3324   bool xz_p, yz_p, xyz_p;
3325   HOST_WIDE_INT step_x, step_y, step_z;
3326   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3327   affine_fn overlaps_a_xz, overlaps_b_xz;
3328   affine_fn overlaps_a_yz, overlaps_b_yz;
3329   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3330   affine_fn ova1, ova2, ovb;
3331   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3332
3333   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3334   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3335   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3336
3337   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3338   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3339   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3340
3341   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3342     {
3343       if (dump_file && (dump_flags & TDF_DETAILS))
3344         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3345
3346       *overlaps_a = conflict_fn_not_known ();
3347       *overlaps_b = conflict_fn_not_known ();
3348       *last_conflicts = chrec_dont_know;
3349       return;
3350     }
3351
3352   niter = MIN (niter_x, niter_z);
3353   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3354                                            &overlaps_a_xz,
3355                                            &overlaps_b_xz,
3356                                            &last_conflicts_xz, 1);
3357   niter = MIN (niter_y, niter_z);
3358   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3359                                            &overlaps_a_yz,
3360                                            &overlaps_b_yz,
3361                                            &last_conflicts_yz, 2);
3362   niter = MIN (niter_x, niter_z);
3363   niter = MIN (niter_y, niter);
3364   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3365                                            &overlaps_a_xyz,
3366                                            &overlaps_b_xyz,
3367                                            &last_conflicts_xyz, 3);
3368
3369   xz_p = !integer_zerop (last_conflicts_xz);
3370   yz_p = !integer_zerop (last_conflicts_yz);
3371   xyz_p = !integer_zerop (last_conflicts_xyz);
3372
3373   if (xz_p || yz_p || xyz_p)
3374     {
3375       ova1 = affine_fn_cst (integer_zero_node);
3376       ova2 = affine_fn_cst (integer_zero_node);
3377       ovb = affine_fn_cst (integer_zero_node);
3378       if (xz_p)
3379         {
3380           affine_fn t0 = ova1;
3381           affine_fn t2 = ovb;
3382
3383           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3384           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3385           affine_fn_free (t0);
3386           affine_fn_free (t2);
3387           *last_conflicts = last_conflicts_xz;
3388         }
3389       if (yz_p)
3390         {
3391           affine_fn t0 = ova2;
3392           affine_fn t2 = ovb;
3393
3394           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3395           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3396           affine_fn_free (t0);
3397           affine_fn_free (t2);
3398           *last_conflicts = last_conflicts_yz;
3399         }
3400       if (xyz_p)
3401         {
3402           affine_fn t0 = ova1;
3403           affine_fn t2 = ova2;
3404           affine_fn t4 = ovb;
3405
3406           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3407           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3408           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3409           affine_fn_free (t0);
3410           affine_fn_free (t2);
3411           affine_fn_free (t4);
3412           *last_conflicts = last_conflicts_xyz;
3413         }
3414       *overlaps_a = conflict_fn (2, ova1, ova2);
3415       *overlaps_b = conflict_fn (1, ovb);
3416     }
3417   else
3418     {
3419       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3420       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3421       *last_conflicts = integer_zero_node;
3422     }
3423
3424   affine_fn_free (overlaps_a_xz);
3425   affine_fn_free (overlaps_b_xz);
3426   affine_fn_free (overlaps_a_yz);
3427   affine_fn_free (overlaps_b_yz);
3428   affine_fn_free (overlaps_a_xyz);
3429   affine_fn_free (overlaps_b_xyz);
3430 }
3431
3432 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3433
3434 static void
3435 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3436                     int size)
3437 {
3438   memcpy (vec2, vec1, size * sizeof (*vec1));
3439 }
3440
3441 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3442
3443 static void
3444 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3445                     int m, int n)
3446 {
3447   int i;
3448
3449   for (i = 0; i < m; i++)
3450     lambda_vector_copy (mat1[i], mat2[i], n);
3451 }
3452
3453 /* Store the N x N identity matrix in MAT.  */
3454
3455 static void
3456 lambda_matrix_id (lambda_matrix mat, int size)
3457 {
3458   int i, j;
3459
3460   for (i = 0; i < size; i++)
3461     for (j = 0; j < size; j++)
3462       mat[i][j] = (i == j) ? 1 : 0;
3463 }
3464
3465 /* Return the index of the first nonzero element of vector VEC1 between
3466    START and N.  We must have START <= N.
3467    Returns N if VEC1 is the zero vector.  */
3468
3469 static int
3470 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3471 {
3472   int j = start;
3473   while (j < n && vec1[j] == 0)
3474     j++;
3475   return j;
3476 }
3477
3478 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3479    R2 = R2 + CONST1 * R1.  */
3480
3481 static void
3482 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3483                        lambda_int const1)
3484 {
3485   int i;
3486
3487   if (const1 == 0)
3488     return;
3489
3490   for (i = 0; i < n; i++)
3491     mat[r2][i] += const1 * mat[r1][i];
3492 }
3493
3494 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3495    and store the result in VEC2.  */
3496
3497 static void
3498 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3499                           int size, lambda_int const1)
3500 {
3501   int i;
3502
3503   if (const1 == 0)
3504     lambda_vector_clear (vec2, size);
3505   else
3506     for (i = 0; i < size; i++)
3507       vec2[i] = const1 * vec1[i];
3508 }
3509
3510 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3511
3512 static void
3513 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3514                       int size)
3515 {
3516   lambda_vector_mult_const (vec1, vec2, size, -1);
3517 }
3518
3519 /* Negate row R1 of matrix MAT which has N columns.  */
3520
3521 static void
3522 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3523 {
3524   lambda_vector_negate (mat[r1], mat[r1], n);
3525 }
3526
3527 /* Return true if two vectors are equal.  */
3528
3529 static bool
3530 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3531 {
3532   int i;
3533   for (i = 0; i < size; i++)
3534     if (vec1[i] != vec2[i])
3535       return false;
3536   return true;
3537 }
3538
3539 /* Given an M x N integer matrix A, this function determines an M x
3540    M unimodular matrix U, and an M x N echelon matrix S such that
3541    "U.A = S".  This decomposition is also known as "right Hermite".
3542
3543    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3544    Restructuring Compilers" Utpal Banerjee.  */
3545
3546 static void
3547 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3548                              lambda_matrix S, lambda_matrix U)
3549 {
3550   int i, j, i0 = 0;
3551
3552   lambda_matrix_copy (A, S, m, n);
3553   lambda_matrix_id (U, m);
3554
3555   for (j = 0; j < n; j++)
3556     {
3557       if (lambda_vector_first_nz (S[j], m, i0) < m)
3558         {
3559           ++i0;
3560           for (i = m - 1; i >= i0; i--)
3561             {
3562               while (S[i][j] != 0)
3563                 {
3564                   lambda_int sigma, factor, a, b;
3565
3566                   a = S[i-1][j];
3567                   b = S[i][j];
3568                   sigma = (a * b < 0) ? -1: 1;
3569                   a = abs_hwi (a);
3570                   b = abs_hwi (b);
3571                   factor = sigma * (a / b);
3572
3573                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3574                   std::swap (S[i], S[i-1]);
3575
3576                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3577                   std::swap (U[i], U[i-1]);
3578                 }
3579             }
3580         }
3581     }
3582 }
3583
3584 /* Determines the overlapping elements due to accesses CHREC_A and
3585    CHREC_B, that are affine functions.  This function cannot handle
3586    symbolic evolution functions, ie. when initial conditions are
3587    parameters, because it uses lambda matrices of integers.  */
3588
3589 static void
3590 analyze_subscript_affine_affine (tree chrec_a,
3591                                  tree chrec_b,
3592                                  conflict_function **overlaps_a,
3593                                  conflict_function **overlaps_b,
3594                                  tree *last_conflicts)
3595 {
3596   unsigned nb_vars_a, nb_vars_b, dim;
3597   HOST_WIDE_INT gamma, gcd_alpha_beta;
3598   lambda_matrix A, U, S;
3599   struct obstack scratch_obstack;
3600
3601   if (eq_evolutions_p (chrec_a, chrec_b))
3602     {
3603       /* The accessed index overlaps for each iteration in the
3604          loop.  */
3605       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3606       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3607       *last_conflicts = chrec_dont_know;
3608       return;
3609     }
3610   if (dump_file && (dump_flags & TDF_DETAILS))
3611     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3612
3613   /* For determining the initial intersection, we have to solve a
3614      Diophantine equation.  This is the most time consuming part.
3615
3616      For answering to the question: "Is there a dependence?" we have
3617      to prove that there exists a solution to the Diophantine
3618      equation, and that the solution is in the iteration domain,
3619      i.e. the solution is positive or zero, and that the solution
3620      happens before the upper bound loop.nb_iterations.  Otherwise
3621      there is no dependence.  This function outputs a description of
3622      the iterations that hold the intersections.  */
3623
3624   nb_vars_a = nb_vars_in_chrec (chrec_a);
3625   nb_vars_b = nb_vars_in_chrec (chrec_b);
3626
3627   gcc_obstack_init (&scratch_obstack);
3628
3629   dim = nb_vars_a + nb_vars_b;
3630   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3631   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3632   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3633
3634   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
3635   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
3636   if (init_a == chrec_dont_know
3637       || init_b == chrec_dont_know)
3638     {
3639       if (dump_file && (dump_flags & TDF_DETAILS))
3640         fprintf (dump_file, "affine-affine test failed: "
3641                  "representation issue.\n");
3642       *overlaps_a = conflict_fn_not_known ();
3643       *overlaps_b = conflict_fn_not_known ();
3644       *last_conflicts = chrec_dont_know;
3645       goto end_analyze_subs_aa;
3646     }
3647   gamma = int_cst_value (init_b) - int_cst_value (init_a);
3648
3649   /* Don't do all the hard work of solving the Diophantine equation
3650      when we already know the solution: for example,
3651      | {3, +, 1}_1
3652      | {3, +, 4}_2
3653      | gamma = 3 - 3 = 0.
3654      Then the first overlap occurs during the first iterations:
3655      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3656   */
3657   if (gamma == 0)
3658     {
3659       if (nb_vars_a == 1 && nb_vars_b == 1)
3660         {
3661           HOST_WIDE_INT step_a, step_b;
3662           HOST_WIDE_INT niter, niter_a, niter_b;
3663           affine_fn ova, ovb;
3664
3665           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3666           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3667           niter = MIN (niter_a, niter_b);
3668           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3669           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3670
3671           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3672                                                    &ova, &ovb,
3673                                                    last_conflicts, 1);
3674           *overlaps_a = conflict_fn (1, ova);
3675           *overlaps_b = conflict_fn (1, ovb);
3676         }
3677
3678       else if (nb_vars_a == 2 && nb_vars_b == 1)
3679         compute_overlap_steps_for_affine_1_2
3680           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3681
3682       else if (nb_vars_a == 1 && nb_vars_b == 2)
3683         compute_overlap_steps_for_affine_1_2
3684           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3685
3686       else
3687         {
3688           if (dump_file && (dump_flags & TDF_DETAILS))
3689             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3690           *overlaps_a = conflict_fn_not_known ();
3691           *overlaps_b = conflict_fn_not_known ();
3692           *last_conflicts = chrec_dont_know;
3693         }
3694       goto end_analyze_subs_aa;
3695     }
3696
3697   /* U.A = S */
3698   lambda_matrix_right_hermite (A, dim, 1, S, U);
3699
3700   if (S[0][0] < 0)
3701     {
3702       S[0][0] *= -1;
3703       lambda_matrix_row_negate (U, dim, 0);
3704     }
3705   gcd_alpha_beta = S[0][0];
3706
3707   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3708      but that is a quite strange case.  Instead of ICEing, answer
3709      don't know.  */
3710   if (gcd_alpha_beta == 0)
3711     {
3712       *overlaps_a = conflict_fn_not_known ();
3713       *overlaps_b = conflict_fn_not_known ();
3714       *last_conflicts = chrec_dont_know;
3715       goto end_analyze_subs_aa;
3716     }
3717
3718   /* The classic "gcd-test".  */
3719   if (!int_divides_p (gcd_alpha_beta, gamma))
3720     {
3721       /* The "gcd-test" has determined that there is no integer
3722          solution, i.e. there is no dependence.  */
3723       *overlaps_a = conflict_fn_no_dependence ();
3724       *overlaps_b = conflict_fn_no_dependence ();
3725       *last_conflicts = integer_zero_node;
3726     }
3727
3728   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3729   else if (nb_vars_a == 1 && nb_vars_b == 1)
3730     {
3731       /* Both functions should have the same evolution sign.  */
3732       if (((A[0][0] > 0 && -A[1][0] > 0)
3733            || (A[0][0] < 0 && -A[1][0] < 0)))
3734         {
3735           /* The solutions are given by:
3736              |
3737              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3738              |                           [u21 u22]    [y0]
3739
3740              For a given integer t.  Using the following variables,
3741
3742              | i0 = u11 * gamma / gcd_alpha_beta
3743              | j0 = u12 * gamma / gcd_alpha_beta
3744              | i1 = u21
3745              | j1 = u22
3746
3747              the solutions are:
3748
3749              | x0 = i0 + i1 * t,
3750              | y0 = j0 + j1 * t.  */
3751           HOST_WIDE_INT i0, j0, i1, j1;
3752
3753           i0 = U[0][0] * gamma / gcd_alpha_beta;
3754           j0 = U[0][1] * gamma / gcd_alpha_beta;
3755           i1 = U[1][0];
3756           j1 = U[1][1];
3757
3758           if ((i1 == 0 && i0 < 0)
3759               || (j1 == 0 && j0 < 0))
3760             {
3761               /* There is no solution.
3762                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3763                  falls in here, but for the moment we don't look at the
3764                  upper bound of the iteration domain.  */
3765               *overlaps_a = conflict_fn_no_dependence ();
3766               *overlaps_b = conflict_fn_no_dependence ();
3767               *last_conflicts = integer_zero_node;
3768               goto end_analyze_subs_aa;
3769             }
3770
3771           if (i1 > 0 && j1 > 0)
3772             {
3773               HOST_WIDE_INT niter_a
3774                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3775               HOST_WIDE_INT niter_b
3776                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3777               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3778
3779               /* (X0, Y0) is a solution of the Diophantine equation:
3780                  "chrec_a (X0) = chrec_b (Y0)".  */
3781               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3782                                         CEIL (-j0, j1));
3783               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3784               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3785
3786               /* (X1, Y1) is the smallest positive solution of the eq
3787                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3788                  first conflict occurs.  */
3789               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3790               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3791               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3792
3793               if (niter > 0)
3794                 {
3795                   /* If the overlap occurs outside of the bounds of the
3796                      loop, there is no dependence.  */
3797                   if (x1 >= niter_a || y1 >= niter_b)
3798                     {
3799                       *overlaps_a = conflict_fn_no_dependence ();
3800                       *overlaps_b = conflict_fn_no_dependence ();
3801                       *last_conflicts = integer_zero_node;
3802                       goto end_analyze_subs_aa;
3803                     }
3804
3805                   /* max stmt executions can get quite large, avoid
3806                      overflows by using wide ints here.  */
3807                   widest_int tau2
3808                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
3809                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
3810                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
3811                   if (wi::min_precision (last_conflict, SIGNED)
3812                       <= TYPE_PRECISION (integer_type_node))
3813                     *last_conflicts
3814                        = build_int_cst (integer_type_node,
3815                                         last_conflict.to_shwi ());
3816                   else
3817                     *last_conflicts = chrec_dont_know;
3818                 }
3819               else
3820                 *last_conflicts = chrec_dont_know;
3821
3822               *overlaps_a
3823                 = conflict_fn (1,
3824                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3825                                                  1,
3826                                                  build_int_cst (NULL_TREE, i1)));
3827               *overlaps_b
3828                 = conflict_fn (1,
3829                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3830                                                  1,
3831                                                  build_int_cst (NULL_TREE, j1)));
3832             }
3833           else
3834             {
3835               /* FIXME: For the moment, the upper bound of the
3836                  iteration domain for i and j is not checked.  */
3837               if (dump_file && (dump_flags & TDF_DETAILS))
3838                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3839               *overlaps_a = conflict_fn_not_known ();
3840               *overlaps_b = conflict_fn_not_known ();
3841               *last_conflicts = chrec_dont_know;
3842             }
3843         }
3844       else
3845         {
3846           if (dump_file && (dump_flags & TDF_DETAILS))
3847             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3848           *overlaps_a = conflict_fn_not_known ();
3849           *overlaps_b = conflict_fn_not_known ();
3850           *last_conflicts = chrec_dont_know;
3851         }
3852     }
3853   else
3854     {
3855       if (dump_file && (dump_flags & TDF_DETAILS))
3856         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3857       *overlaps_a = conflict_fn_not_known ();
3858       *overlaps_b = conflict_fn_not_known ();
3859       *last_conflicts = chrec_dont_know;
3860     }
3861
3862 end_analyze_subs_aa:
3863   obstack_free (&scratch_obstack, NULL);
3864   if (dump_file && (dump_flags & TDF_DETAILS))
3865     {
3866       fprintf (dump_file, "  (overlaps_a = ");
3867       dump_conflict_function (dump_file, *overlaps_a);
3868       fprintf (dump_file, ")\n  (overlaps_b = ");
3869       dump_conflict_function (dump_file, *overlaps_b);
3870       fprintf (dump_file, "))\n");
3871     }
3872 }
3873
3874 /* Returns true when analyze_subscript_affine_affine can be used for
3875    determining the dependence relation between chrec_a and chrec_b,
3876    that contain symbols.  This function modifies chrec_a and chrec_b
3877    such that the analysis result is the same, and such that they don't
3878    contain symbols, and then can safely be passed to the analyzer.
3879
3880    Example: The analysis of the following tuples of evolutions produce
3881    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3882    vs. {0, +, 1}_1
3883
3884    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3885    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3886 */
3887
3888 static bool
3889 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3890 {
3891   tree diff, type, left_a, left_b, right_b;
3892
3893   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3894       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3895     /* FIXME: For the moment not handled.  Might be refined later.  */
3896     return false;
3897
3898   type = chrec_type (*chrec_a);
3899   left_a = CHREC_LEFT (*chrec_a);
3900   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3901   diff = chrec_fold_minus (type, left_a, left_b);
3902
3903   if (!evolution_function_is_constant_p (diff))
3904     return false;
3905
3906   if (dump_file && (dump_flags & TDF_DETAILS))
3907     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3908
3909   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3910                                      diff, CHREC_RIGHT (*chrec_a));
3911   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3912   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3913                                      build_int_cst (type, 0),
3914                                      right_b);
3915   return true;
3916 }
3917
3918 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3919    *OVERLAPS_B are initialized to the functions that describe the
3920    relation between the elements accessed twice by CHREC_A and
3921    CHREC_B.  For k >= 0, the following property is verified:
3922
3923    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3924
3925 static void
3926 analyze_siv_subscript (tree chrec_a,
3927                        tree chrec_b,
3928                        conflict_function **overlaps_a,
3929                        conflict_function **overlaps_b,
3930                        tree *last_conflicts,
3931                        int loop_nest_num)
3932 {
3933   dependence_stats.num_siv++;
3934
3935   if (dump_file && (dump_flags & TDF_DETAILS))
3936     fprintf (dump_file, "(analyze_siv_subscript \n");
3937
3938   if (evolution_function_is_constant_p (chrec_a)
3939       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3940     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3941                                       overlaps_a, overlaps_b, last_conflicts);
3942
3943   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3944            && evolution_function_is_constant_p (chrec_b))
3945     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3946                                       overlaps_b, overlaps_a, last_conflicts);
3947
3948   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3949            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3950     {
3951       if (!chrec_contains_symbols (chrec_a)
3952           && !chrec_contains_symbols (chrec_b))
3953         {
3954           analyze_subscript_affine_affine (chrec_a, chrec_b,
3955                                            overlaps_a, overlaps_b,
3956                                            last_conflicts);
3957
3958           if (CF_NOT_KNOWN_P (*overlaps_a)
3959               || CF_NOT_KNOWN_P (*overlaps_b))
3960             dependence_stats.num_siv_unimplemented++;
3961           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3962                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3963             dependence_stats.num_siv_independent++;
3964           else
3965             dependence_stats.num_siv_dependent++;
3966         }
3967       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3968                                                         &chrec_b))
3969         {
3970           analyze_subscript_affine_affine (chrec_a, chrec_b,
3971                                            overlaps_a, overlaps_b,
3972                                            last_conflicts);
3973
3974           if (CF_NOT_KNOWN_P (*overlaps_a)
3975               || CF_NOT_KNOWN_P (*overlaps_b))
3976             dependence_stats.num_siv_unimplemented++;
3977           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3978                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3979             dependence_stats.num_siv_independent++;
3980           else
3981             dependence_stats.num_siv_dependent++;
3982         }
3983       else
3984         goto siv_subscript_dontknow;
3985     }
3986
3987   else
3988     {
3989     siv_subscript_dontknow:;
3990       if (dump_file && (dump_flags & TDF_DETAILS))
3991         fprintf (dump_file, "  siv test failed: unimplemented");
3992       *overlaps_a = conflict_fn_not_known ();
3993       *overlaps_b = conflict_fn_not_known ();
3994       *last_conflicts = chrec_dont_know;
3995       dependence_stats.num_siv_unimplemented++;
3996     }
3997
3998   if (dump_file && (dump_flags & TDF_DETAILS))
3999     fprintf (dump_file, ")\n");
4000 }
4001
4002 /* Returns false if we can prove that the greatest common divisor of the steps
4003    of CHREC does not divide CST, false otherwise.  */
4004
4005 static bool
4006 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4007 {
4008   HOST_WIDE_INT cd = 0, val;
4009   tree step;
4010
4011   if (!tree_fits_shwi_p (cst))
4012     return true;
4013   val = tree_to_shwi (cst);
4014
4015   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4016     {
4017       step = CHREC_RIGHT (chrec);
4018       if (!tree_fits_shwi_p (step))
4019         return true;
4020       cd = gcd (cd, tree_to_shwi (step));
4021       chrec = CHREC_LEFT (chrec);
4022     }
4023
4024   return val % cd == 0;
4025 }
4026
4027 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4028    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4029    functions that describe the relation between the elements accessed
4030    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4031    is verified:
4032
4033    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4034
4035 static void
4036 analyze_miv_subscript (tree chrec_a,
4037                        tree chrec_b,
4038                        conflict_function **overlaps_a,
4039                        conflict_function **overlaps_b,
4040                        tree *last_conflicts,
4041                        class loop *loop_nest)
4042 {
4043   tree type, difference;
4044
4045   dependence_stats.num_miv++;
4046   if (dump_file && (dump_flags & TDF_DETAILS))
4047     fprintf (dump_file, "(analyze_miv_subscript \n");
4048
4049   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4050   chrec_a = chrec_convert (type, chrec_a, NULL);
4051   chrec_b = chrec_convert (type, chrec_b, NULL);
4052   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4053
4054   if (eq_evolutions_p (chrec_a, chrec_b))
4055     {
4056       /* Access functions are the same: all the elements are accessed
4057          in the same order.  */
4058       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4059       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4060       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4061       dependence_stats.num_miv_dependent++;
4062     }
4063
4064   else if (evolution_function_is_constant_p (difference)
4065            && evolution_function_is_affine_multivariate_p (chrec_a,
4066                                                            loop_nest->num)
4067            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4068     {
4069       /* testsuite/.../ssa-chrec-33.c
4070          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4071
4072          The difference is 1, and all the evolution steps are multiples
4073          of 2, consequently there are no overlapping elements.  */
4074       *overlaps_a = conflict_fn_no_dependence ();
4075       *overlaps_b = conflict_fn_no_dependence ();
4076       *last_conflicts = integer_zero_node;
4077       dependence_stats.num_miv_independent++;
4078     }
4079
4080   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4081            && !chrec_contains_symbols (chrec_a, loop_nest)
4082            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4083            && !chrec_contains_symbols (chrec_b, loop_nest))
4084     {
4085       /* testsuite/.../ssa-chrec-35.c
4086          {0, +, 1}_2  vs.  {0, +, 1}_3
4087          the overlapping elements are respectively located at iterations:
4088          {0, +, 1}_x and {0, +, 1}_x,
4089          in other words, we have the equality:
4090          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4091
4092          Other examples:
4093          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4094          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4095
4096          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4097          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4098       */
4099       analyze_subscript_affine_affine (chrec_a, chrec_b,
4100                                        overlaps_a, overlaps_b, last_conflicts);
4101
4102       if (CF_NOT_KNOWN_P (*overlaps_a)
4103           || CF_NOT_KNOWN_P (*overlaps_b))
4104         dependence_stats.num_miv_unimplemented++;
4105       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4106                || CF_NO_DEPENDENCE_P (*overlaps_b))
4107         dependence_stats.num_miv_independent++;
4108       else
4109         dependence_stats.num_miv_dependent++;
4110     }
4111
4112   else
4113     {
4114       /* When the analysis is too difficult, answer "don't know".  */
4115       if (dump_file && (dump_flags & TDF_DETAILS))
4116         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4117
4118       *overlaps_a = conflict_fn_not_known ();
4119       *overlaps_b = conflict_fn_not_known ();
4120       *last_conflicts = chrec_dont_know;
4121       dependence_stats.num_miv_unimplemented++;
4122     }
4123
4124   if (dump_file && (dump_flags & TDF_DETAILS))
4125     fprintf (dump_file, ")\n");
4126 }
4127
4128 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4129    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4130    OVERLAP_ITERATIONS_B are initialized with two functions that
4131    describe the iterations that contain conflicting elements.
4132
4133    Remark: For an integer k >= 0, the following equality is true:
4134
4135    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4136 */
4137
4138 static void
4139 analyze_overlapping_iterations (tree chrec_a,
4140                                 tree chrec_b,
4141                                 conflict_function **overlap_iterations_a,
4142                                 conflict_function **overlap_iterations_b,
4143                                 tree *last_conflicts, class loop *loop_nest)
4144 {
4145   unsigned int lnn = loop_nest->num;
4146
4147   dependence_stats.num_subscript_tests++;
4148
4149   if (dump_file && (dump_flags & TDF_DETAILS))
4150     {
4151       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4152       fprintf (dump_file, "  (chrec_a = ");
4153       print_generic_expr (dump_file, chrec_a);
4154       fprintf (dump_file, ")\n  (chrec_b = ");
4155       print_generic_expr (dump_file, chrec_b);
4156       fprintf (dump_file, ")\n");
4157     }
4158
4159   if (chrec_a == NULL_TREE
4160       || chrec_b == NULL_TREE
4161       || chrec_contains_undetermined (chrec_a)
4162       || chrec_contains_undetermined (chrec_b))
4163     {
4164       dependence_stats.num_subscript_undetermined++;
4165
4166       *overlap_iterations_a = conflict_fn_not_known ();
4167       *overlap_iterations_b = conflict_fn_not_known ();
4168     }
4169
4170   /* If they are the same chrec, and are affine, they overlap
4171      on every iteration.  */
4172   else if (eq_evolutions_p (chrec_a, chrec_b)
4173            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4174                || operand_equal_p (chrec_a, chrec_b, 0)))
4175     {
4176       dependence_stats.num_same_subscript_function++;
4177       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4178       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4179       *last_conflicts = chrec_dont_know;
4180     }
4181
4182   /* If they aren't the same, and aren't affine, we can't do anything
4183      yet.  */
4184   else if ((chrec_contains_symbols (chrec_a)
4185             || chrec_contains_symbols (chrec_b))
4186            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4187                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4188     {
4189       dependence_stats.num_subscript_undetermined++;
4190       *overlap_iterations_a = conflict_fn_not_known ();
4191       *overlap_iterations_b = conflict_fn_not_known ();
4192     }
4193
4194   else if (ziv_subscript_p (chrec_a, chrec_b))
4195     analyze_ziv_subscript (chrec_a, chrec_b,
4196                            overlap_iterations_a, overlap_iterations_b,
4197                            last_conflicts);
4198
4199   else if (siv_subscript_p (chrec_a, chrec_b))
4200     analyze_siv_subscript (chrec_a, chrec_b,
4201                            overlap_iterations_a, overlap_iterations_b,
4202                            last_conflicts, lnn);
4203
4204   else
4205     analyze_miv_subscript (chrec_a, chrec_b,
4206                            overlap_iterations_a, overlap_iterations_b,
4207                            last_conflicts, loop_nest);
4208
4209   if (dump_file && (dump_flags & TDF_DETAILS))
4210     {
4211       fprintf (dump_file, "  (overlap_iterations_a = ");
4212       dump_conflict_function (dump_file, *overlap_iterations_a);
4213       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4214       dump_conflict_function (dump_file, *overlap_iterations_b);
4215       fprintf (dump_file, "))\n");
4216     }
4217 }
4218
4219 /* Helper function for uniquely inserting distance vectors.  */
4220
4221 static void
4222 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4223 {
4224   unsigned i;
4225   lambda_vector v;
4226
4227   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4228     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4229       return;
4230
4231   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4232 }
4233
4234 /* Helper function for uniquely inserting direction vectors.  */
4235
4236 static void
4237 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4238 {
4239   unsigned i;
4240   lambda_vector v;
4241
4242   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4243     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4244       return;
4245
4246   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4247 }
4248
4249 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4250    haven't yet determined a distance for this outer loop, push a new
4251    distance vector composed of the previous distance, and a distance
4252    of 1 for this outer loop.  Example:
4253
4254    | loop_1
4255    |   loop_2
4256    |     A[10]
4257    |   endloop_2
4258    | endloop_1
4259
4260    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4261    save (0, 1), then we have to save (1, 0).  */
4262
4263 static void
4264 add_outer_distances (struct data_dependence_relation *ddr,
4265                      lambda_vector dist_v, int index)
4266 {
4267   /* For each outer loop where init_v is not set, the accesses are
4268      in dependence of distance 1 in the loop.  */
4269   while (--index >= 0)
4270     {
4271       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4272       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4273       save_v[index] = 1;
4274       save_dist_v (ddr, save_v);
4275     }
4276 }
4277
4278 /* Return false when fail to represent the data dependence as a
4279    distance vector.  A_INDEX is the index of the first reference
4280    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4281    second reference.  INIT_B is set to true when a component has been
4282    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4283    the index in DIST_V that carries the dependence.  */
4284
4285 static bool
4286 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4287                              unsigned int a_index, unsigned int b_index,
4288                              lambda_vector dist_v, bool *init_b,
4289                              int *index_carry)
4290 {
4291   unsigned i;
4292   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4293   class loop *loop = DDR_LOOP_NEST (ddr)[0];
4294
4295   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4296     {
4297       tree access_fn_a, access_fn_b;
4298       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4299
4300       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4301         {
4302           non_affine_dependence_relation (ddr);
4303           return false;
4304         }
4305
4306       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4307       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4308
4309       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4310           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4311         {
4312           HOST_WIDE_INT dist;
4313           int index;
4314           int var_a = CHREC_VARIABLE (access_fn_a);
4315           int var_b = CHREC_VARIABLE (access_fn_b);
4316
4317           if (var_a != var_b
4318               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4319             {
4320               non_affine_dependence_relation (ddr);
4321               return false;
4322             }
4323
4324           /* When data references are collected in a loop while data
4325              dependences are analyzed in loop nest nested in the loop, we
4326              would have more number of access functions than number of
4327              loops.  Skip access functions of loops not in the loop nest.
4328
4329              See PR89725 for more information.  */
4330           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
4331             continue;
4332
4333           dist = int_cst_value (SUB_DISTANCE (subscript));
4334           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4335           *index_carry = MIN (index, *index_carry);
4336
4337           /* This is the subscript coupling test.  If we have already
4338              recorded a distance for this loop (a distance coming from
4339              another subscript), it should be the same.  For example,
4340              in the following code, there is no dependence:
4341
4342              | loop i = 0, N, 1
4343              |   T[i+1][i] = ...
4344              |   ... = T[i][i]
4345              | endloop
4346           */
4347           if (init_v[index] != 0 && dist_v[index] != dist)
4348             {
4349               finalize_ddr_dependent (ddr, chrec_known);
4350               return false;
4351             }
4352
4353           dist_v[index] = dist;
4354           init_v[index] = 1;
4355           *init_b = true;
4356         }
4357       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4358         {
4359           /* This can be for example an affine vs. constant dependence
4360              (T[i] vs. T[3]) that is not an affine dependence and is
4361              not representable as a distance vector.  */
4362           non_affine_dependence_relation (ddr);
4363           return false;
4364         }
4365     }
4366
4367   return true;
4368 }
4369
4370 /* Return true when the DDR contains only constant access functions.  */
4371
4372 static bool
4373 constant_access_functions (const struct data_dependence_relation *ddr)
4374 {
4375   unsigned i;
4376   subscript *sub;
4377
4378   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4379     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4380         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4381       return false;
4382
4383   return true;
4384 }
4385
4386 /* Helper function for the case where DDR_A and DDR_B are the same
4387    multivariate access function with a constant step.  For an example
4388    see pr34635-1.c.  */
4389
4390 static void
4391 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4392 {
4393   int x_1, x_2;
4394   tree c_1 = CHREC_LEFT (c_2);
4395   tree c_0 = CHREC_LEFT (c_1);
4396   lambda_vector dist_v;
4397   HOST_WIDE_INT v1, v2, cd;
4398
4399   /* Polynomials with more than 2 variables are not handled yet.  When
4400      the evolution steps are parameters, it is not possible to
4401      represent the dependence using classical distance vectors.  */
4402   if (TREE_CODE (c_0) != INTEGER_CST
4403       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4404       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4405     {
4406       DDR_AFFINE_P (ddr) = false;
4407       return;
4408     }
4409
4410   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4411   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4412
4413   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4414   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4415   v1 = int_cst_value (CHREC_RIGHT (c_1));
4416   v2 = int_cst_value (CHREC_RIGHT (c_2));
4417   cd = gcd (v1, v2);
4418   v1 /= cd;
4419   v2 /= cd;
4420
4421   if (v2 < 0)
4422     {
4423       v2 = -v2;
4424       v1 = -v1;
4425     }
4426
4427   dist_v[x_1] = v2;
4428   dist_v[x_2] = -v1;
4429   save_dist_v (ddr, dist_v);
4430
4431   add_outer_distances (ddr, dist_v, x_1);
4432 }
4433
4434 /* Helper function for the case where DDR_A and DDR_B are the same
4435    access functions.  */
4436
4437 static void
4438 add_other_self_distances (struct data_dependence_relation *ddr)
4439 {
4440   lambda_vector dist_v;
4441   unsigned i;
4442   int index_carry = DDR_NB_LOOPS (ddr);
4443   subscript *sub;
4444   class loop *loop = DDR_LOOP_NEST (ddr)[0];
4445
4446   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4447     {
4448       tree access_fun = SUB_ACCESS_FN (sub, 0);
4449
4450       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4451         {
4452           if (!evolution_function_is_univariate_p (access_fun, loop->num))
4453             {
4454               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4455                 {
4456                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4457                   return;
4458                 }
4459
4460               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4461
4462               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4463                 add_multivariate_self_dist (ddr, access_fun);
4464               else
4465                 /* The evolution step is not constant: it varies in
4466                    the outer loop, so this cannot be represented by a
4467                    distance vector.  For example in pr34635.c the
4468                    evolution is {0, +, {0, +, 4}_1}_2.  */
4469                 DDR_AFFINE_P (ddr) = false;
4470
4471               return;
4472             }
4473
4474           /* When data references are collected in a loop while data
4475              dependences are analyzed in loop nest nested in the loop, we
4476              would have more number of access functions than number of
4477              loops.  Skip access functions of loops not in the loop nest.
4478
4479              See PR89725 for more information.  */
4480           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
4481                                   loop))
4482             continue;
4483
4484           index_carry = MIN (index_carry,
4485                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4486                                                  DDR_LOOP_NEST (ddr)));
4487         }
4488     }
4489
4490   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4491   add_outer_distances (ddr, dist_v, index_carry);
4492 }
4493
4494 static void
4495 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4496 {
4497   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4498
4499   dist_v[0] = 1;
4500   save_dist_v (ddr, dist_v);
4501 }
4502
4503 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4504    is the case for example when access functions are the same and
4505    equal to a constant, as in:
4506
4507    | loop_1
4508    |   A[3] = ...
4509    |   ... = A[3]
4510    | endloop_1
4511
4512    in which case the distance vectors are (0) and (1).  */
4513
4514 static void
4515 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4516 {
4517   unsigned i, j;
4518
4519   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4520     {
4521       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4522       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4523       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4524
4525       for (j = 0; j < ca->n; j++)
4526         if (affine_function_zero_p (ca->fns[j]))
4527           {
4528             insert_innermost_unit_dist_vector (ddr);
4529             return;
4530           }
4531
4532       for (j = 0; j < cb->n; j++)
4533         if (affine_function_zero_p (cb->fns[j]))
4534           {
4535             insert_innermost_unit_dist_vector (ddr);
4536             return;
4537           }
4538     }
4539 }
4540
4541 /* Return true when the DDR contains two data references that have the
4542    same access functions.  */
4543
4544 static inline bool
4545 same_access_functions (const struct data_dependence_relation *ddr)
4546 {
4547   unsigned i;
4548   subscript *sub;
4549
4550   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4551     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4552                           SUB_ACCESS_FN (sub, 1)))
4553       return false;
4554
4555   return true;
4556 }
4557
4558 /* Compute the classic per loop distance vector.  DDR is the data
4559    dependence relation to build a vector from.  Return false when fail
4560    to represent the data dependence as a distance vector.  */
4561
4562 static bool
4563 build_classic_dist_vector (struct data_dependence_relation *ddr,
4564                            class loop *loop_nest)
4565 {
4566   bool init_b = false;
4567   int index_carry = DDR_NB_LOOPS (ddr);
4568   lambda_vector dist_v;
4569
4570   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4571     return false;
4572
4573   if (same_access_functions (ddr))
4574     {
4575       /* Save the 0 vector.  */
4576       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4577       save_dist_v (ddr, dist_v);
4578
4579       if (constant_access_functions (ddr))
4580         add_distance_for_zero_overlaps (ddr);
4581
4582       if (DDR_NB_LOOPS (ddr) > 1)
4583         add_other_self_distances (ddr);
4584
4585       return true;
4586     }
4587
4588   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4589   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4590     return false;
4591
4592   /* Save the distance vector if we initialized one.  */
4593   if (init_b)
4594     {
4595       /* Verify a basic constraint: classic distance vectors should
4596          always be lexicographically positive.
4597
4598          Data references are collected in the order of execution of
4599          the program, thus for the following loop
4600
4601          | for (i = 1; i < 100; i++)
4602          |   for (j = 1; j < 100; j++)
4603          |     {
4604          |       t = T[j+1][i-1];  // A
4605          |       T[j][i] = t + 2;  // B
4606          |     }
4607
4608          references are collected following the direction of the wind:
4609          A then B.  The data dependence tests are performed also
4610          following this order, such that we're looking at the distance
4611          separating the elements accessed by A from the elements later
4612          accessed by B.  But in this example, the distance returned by
4613          test_dep (A, B) is lexicographically negative (-1, 1), that
4614          means that the access A occurs later than B with respect to
4615          the outer loop, ie. we're actually looking upwind.  In this
4616          case we solve test_dep (B, A) looking downwind to the
4617          lexicographically positive solution, that returns the
4618          distance vector (1, -1).  */
4619       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4620         {
4621           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4622           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4623             return false;
4624           compute_subscript_distance (ddr);
4625           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4626                                             &index_carry))
4627             return false;
4628           save_dist_v (ddr, save_v);
4629           DDR_REVERSED_P (ddr) = true;
4630
4631           /* In this case there is a dependence forward for all the
4632              outer loops:
4633
4634              | for (k = 1; k < 100; k++)
4635              |  for (i = 1; i < 100; i++)
4636              |   for (j = 1; j < 100; j++)
4637              |     {
4638              |       t = T[j+1][i-1];  // A
4639              |       T[j][i] = t + 2;  // B
4640              |     }
4641
4642              the vectors are:
4643              (0,  1, -1)
4644              (1,  1, -1)
4645              (1, -1,  1)
4646           */
4647           if (DDR_NB_LOOPS (ddr) > 1)
4648             {
4649               add_outer_distances (ddr, save_v, index_carry);
4650               add_outer_distances (ddr, dist_v, index_carry);
4651             }
4652         }
4653       else
4654         {
4655           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4656           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4657
4658           if (DDR_NB_LOOPS (ddr) > 1)
4659             {
4660               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4661
4662               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4663                 return false;
4664               compute_subscript_distance (ddr);
4665               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4666                                                 &index_carry))
4667                 return false;
4668
4669               save_dist_v (ddr, save_v);
4670               add_outer_distances (ddr, dist_v, index_carry);
4671               add_outer_distances (ddr, opposite_v, index_carry);
4672             }
4673           else
4674             save_dist_v (ddr, save_v);
4675         }
4676     }
4677   else
4678     {
4679       /* There is a distance of 1 on all the outer loops: Example:
4680          there is a dependence of distance 1 on loop_1 for the array A.
4681
4682          | loop_1
4683          |   A[5] = ...
4684          | endloop
4685       */
4686       add_outer_distances (ddr, dist_v,
4687                            lambda_vector_first_nz (dist_v,
4688                                                    DDR_NB_LOOPS (ddr), 0));
4689     }
4690
4691   if (dump_file && (dump_flags & TDF_DETAILS))
4692     {
4693       unsigned i;
4694
4695       fprintf (dump_file, "(build_classic_dist_vector\n");
4696       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4697         {
4698           fprintf (dump_file, "  dist_vector = (");
4699           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4700                                DDR_NB_LOOPS (ddr));
4701           fprintf (dump_file, "  )\n");
4702         }
4703       fprintf (dump_file, ")\n");
4704     }
4705
4706   return true;
4707 }
4708
4709 /* Return the direction for a given distance.
4710    FIXME: Computing dir this way is suboptimal, since dir can catch
4711    cases that dist is unable to represent.  */
4712
4713 static inline enum data_dependence_direction
4714 dir_from_dist (int dist)
4715 {
4716   if (dist > 0)
4717     return dir_positive;
4718   else if (dist < 0)
4719     return dir_negative;
4720   else
4721     return dir_equal;
4722 }
4723
4724 /* Compute the classic per loop direction vector.  DDR is the data
4725    dependence relation to build a vector from.  */
4726
4727 static void
4728 build_classic_dir_vector (struct data_dependence_relation *ddr)
4729 {
4730   unsigned i, j;
4731   lambda_vector dist_v;
4732
4733   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4734     {
4735       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4736
4737       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4738         dir_v[j] = dir_from_dist (dist_v[j]);
4739
4740       save_dir_v (ddr, dir_v);
4741     }
4742 }
4743
4744 /* Helper function.  Returns true when there is a dependence between the
4745    data references.  A_INDEX is the index of the first reference (0 for
4746    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4747
4748 static bool
4749 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4750                                unsigned int a_index, unsigned int b_index,
4751                                class loop *loop_nest)
4752 {
4753   unsigned int i;
4754   tree last_conflicts;
4755   struct subscript *subscript;
4756   tree res = NULL_TREE;
4757
4758   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4759     {
4760       conflict_function *overlaps_a, *overlaps_b;
4761
4762       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4763                                       SUB_ACCESS_FN (subscript, b_index),
4764                                       &overlaps_a, &overlaps_b,
4765                                       &last_conflicts, loop_nest);
4766
4767       if (SUB_CONFLICTS_IN_A (subscript))
4768         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4769       if (SUB_CONFLICTS_IN_B (subscript))
4770         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4771
4772       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4773       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4774       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4775
4776       /* If there is any undetermined conflict function we have to
4777          give a conservative answer in case we cannot prove that
4778          no dependence exists when analyzing another subscript.  */
4779       if (CF_NOT_KNOWN_P (overlaps_a)
4780           || CF_NOT_KNOWN_P (overlaps_b))
4781         {
4782           res = chrec_dont_know;
4783           continue;
4784         }
4785
4786       /* When there is a subscript with no dependence we can stop.  */
4787       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4788                || CF_NO_DEPENDENCE_P (overlaps_b))
4789         {
4790           res = chrec_known;
4791           break;
4792         }
4793     }
4794
4795   if (res == NULL_TREE)
4796     return true;
4797
4798   if (res == chrec_known)
4799     dependence_stats.num_dependence_independent++;
4800   else
4801     dependence_stats.num_dependence_undetermined++;
4802   finalize_ddr_dependent (ddr, res);
4803   return false;
4804 }
4805
4806 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4807
4808 static void
4809 subscript_dependence_tester (struct data_dependence_relation *ddr,
4810                              class loop *loop_nest)
4811 {
4812   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4813     dependence_stats.num_dependence_dependent++;
4814
4815   compute_subscript_distance (ddr);
4816   if (build_classic_dist_vector (ddr, loop_nest))
4817     build_classic_dir_vector (ddr);
4818 }
4819
4820 /* Returns true when all the access functions of A are affine or
4821    constant with respect to LOOP_NEST.  */
4822
4823 static bool
4824 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4825                                            const class loop *loop_nest)
4826 {
4827   unsigned int i;
4828   vec<tree> fns = DR_ACCESS_FNS (a);
4829   tree t;
4830
4831   FOR_EACH_VEC_ELT (fns, i, t)
4832     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4833         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4834       return false;
4835
4836   return true;
4837 }
4838
4839 /* This computes the affine dependence relation between A and B with
4840    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4841    independence between two accesses, while CHREC_DONT_KNOW is used
4842    for representing the unknown relation.
4843
4844    Note that it is possible to stop the computation of the dependence
4845    relation the first time we detect a CHREC_KNOWN element for a given
4846    subscript.  */
4847
4848 void
4849 compute_affine_dependence (struct data_dependence_relation *ddr,
4850                            class loop *loop_nest)
4851 {
4852   struct data_reference *dra = DDR_A (ddr);
4853   struct data_reference *drb = DDR_B (ddr);
4854
4855   if (dump_file && (dump_flags & TDF_DETAILS))
4856     {
4857       fprintf (dump_file, "(compute_affine_dependence\n");
4858       fprintf (dump_file, "  stmt_a: ");
4859       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4860       fprintf (dump_file, "  stmt_b: ");
4861       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4862     }
4863
4864   /* Analyze only when the dependence relation is not yet known.  */
4865   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4866     {
4867       dependence_stats.num_dependence_tests++;
4868
4869       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4870           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4871         subscript_dependence_tester (ddr, loop_nest);
4872
4873       /* As a last case, if the dependence cannot be determined, or if
4874          the dependence is considered too difficult to determine, answer
4875          "don't know".  */
4876       else
4877         {
4878           dependence_stats.num_dependence_undetermined++;
4879
4880           if (dump_file && (dump_flags & TDF_DETAILS))
4881             {
4882               fprintf (dump_file, "Data ref a:\n");
4883               dump_data_reference (dump_file, dra);
4884               fprintf (dump_file, "Data ref b:\n");
4885               dump_data_reference (dump_file, drb);
4886               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4887             }
4888           finalize_ddr_dependent (ddr, chrec_dont_know);
4889         }
4890     }
4891
4892   if (dump_file && (dump_flags & TDF_DETAILS))
4893     {
4894       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4895         fprintf (dump_file, ") -> no dependence\n");
4896       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4897         fprintf (dump_file, ") -> dependence analysis failed\n");
4898       else
4899         fprintf (dump_file, ")\n");
4900     }
4901 }
4902
4903 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4904    the data references in DATAREFS, in the LOOP_NEST.  When
4905    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4906    relations.  Return true when successful, i.e. data references number
4907    is small enough to be handled.  */
4908
4909 bool
4910 compute_all_dependences (vec<data_reference_p> datarefs,
4911                          vec<ddr_p> *dependence_relations,
4912                          vec<loop_p> loop_nest,
4913                          bool compute_self_and_rr)
4914 {
4915   struct data_dependence_relation *ddr;
4916   struct data_reference *a, *b;
4917   unsigned int i, j;
4918
4919   if ((int) datarefs.length ()
4920       > param_loop_max_datarefs_for_datadeps)
4921     {
4922       struct data_dependence_relation *ddr;
4923
4924       /* Insert a single relation into dependence_relations:
4925          chrec_dont_know.  */
4926       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4927       dependence_relations->safe_push (ddr);
4928       return false;
4929     }
4930
4931   FOR_EACH_VEC_ELT (datarefs, i, a)
4932     for (j = i + 1; datarefs.iterate (j, &b); j++)
4933       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4934         {
4935           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4936           dependence_relations->safe_push (ddr);
4937           if (loop_nest.exists ())
4938             compute_affine_dependence (ddr, loop_nest[0]);
4939         }
4940
4941   if (compute_self_and_rr)
4942     FOR_EACH_VEC_ELT (datarefs, i, a)
4943       {
4944         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4945         dependence_relations->safe_push (ddr);
4946         if (loop_nest.exists ())
4947           compute_affine_dependence (ddr, loop_nest[0]);
4948       }
4949
4950   return true;
4951 }
4952
4953 /* Describes a location of a memory reference.  */
4954
4955 struct data_ref_loc
4956 {
4957   /* The memory reference.  */
4958   tree ref;
4959
4960   /* True if the memory reference is read.  */
4961   bool is_read;
4962
4963   /* True if the data reference is conditional within the containing
4964      statement, i.e. if it might not occur even when the statement
4965      is executed and runs to completion.  */
4966   bool is_conditional_in_stmt;
4967 };
4968
4969
4970 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4971    true if STMT clobbers memory, false otherwise.  */
4972
4973 static bool
4974 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4975 {
4976   bool clobbers_memory = false;
4977   data_ref_loc ref;
4978   tree op0, op1;
4979   enum gimple_code stmt_code = gimple_code (stmt);
4980
4981   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4982      As we cannot model data-references to not spelled out
4983      accesses give up if they may occur.  */
4984   if (stmt_code == GIMPLE_CALL
4985       && !(gimple_call_flags (stmt) & ECF_CONST))
4986     {
4987       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4988       if (gimple_call_internal_p (stmt))
4989         switch (gimple_call_internal_fn (stmt))
4990           {
4991           case IFN_GOMP_SIMD_LANE:
4992             {
4993               class loop *loop = gimple_bb (stmt)->loop_father;
4994               tree uid = gimple_call_arg (stmt, 0);
4995               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4996               if (loop == NULL
4997                   || loop->simduid != SSA_NAME_VAR (uid))
4998                 clobbers_memory = true;
4999               break;
5000             }
5001           case IFN_MASK_LOAD:
5002           case IFN_MASK_STORE:
5003             break;
5004           default:
5005             clobbers_memory = true;
5006             break;
5007           }
5008       else
5009         clobbers_memory = true;
5010     }
5011   else if (stmt_code == GIMPLE_ASM
5012            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5013                || gimple_vuse (stmt)))
5014     clobbers_memory = true;
5015
5016   if (!gimple_vuse (stmt))
5017     return clobbers_memory;
5018
5019   if (stmt_code == GIMPLE_ASSIGN)
5020     {
5021       tree base;
5022       op0 = gimple_assign_lhs (stmt);
5023       op1 = gimple_assign_rhs1 (stmt);
5024
5025       if (DECL_P (op1)
5026           || (REFERENCE_CLASS_P (op1)
5027               && (base = get_base_address (op1))
5028               && TREE_CODE (base) != SSA_NAME
5029               && !is_gimple_min_invariant (base)))
5030         {
5031           ref.ref = op1;
5032           ref.is_read = true;
5033           ref.is_conditional_in_stmt = false;
5034           references->safe_push (ref);
5035         }
5036     }
5037   else if (stmt_code == GIMPLE_CALL)
5038     {
5039       unsigned i, n;
5040       tree ptr, type;
5041       unsigned int align;
5042
5043       ref.is_read = false;
5044       if (gimple_call_internal_p (stmt))
5045         switch (gimple_call_internal_fn (stmt))
5046           {
5047           case IFN_MASK_LOAD:
5048             if (gimple_call_lhs (stmt) == NULL_TREE)
5049               break;
5050             ref.is_read = true;
5051             /* FALLTHRU */
5052           case IFN_MASK_STORE:
5053             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5054             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5055             if (ref.is_read)
5056               type = TREE_TYPE (gimple_call_lhs (stmt));
5057             else
5058               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5059             if (TYPE_ALIGN (type) != align)
5060               type = build_aligned_type (type, align);
5061             ref.is_conditional_in_stmt = true;
5062             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5063                                    ptr);
5064             references->safe_push (ref);
5065             return false;
5066           default:
5067             break;
5068           }
5069
5070       op0 = gimple_call_lhs (stmt);
5071       n = gimple_call_num_args (stmt);
5072       for (i = 0; i < n; i++)
5073         {
5074           op1 = gimple_call_arg (stmt, i);
5075
5076           if (DECL_P (op1)
5077               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5078             {
5079               ref.ref = op1;
5080               ref.is_read = true;
5081               ref.is_conditional_in_stmt = false;
5082               references->safe_push (ref);
5083             }
5084         }
5085     }
5086   else
5087     return clobbers_memory;
5088
5089   if (op0
5090       && (DECL_P (op0)
5091           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5092     {
5093       ref.ref = op0;
5094       ref.is_read = false;
5095       ref.is_conditional_in_stmt = false;
5096       references->safe_push (ref);
5097     }
5098   return clobbers_memory;
5099 }
5100
5101
5102 /* Returns true if the loop-nest has any data reference.  */
5103
5104 bool
5105 loop_nest_has_data_refs (loop_p loop)
5106 {
5107   basic_block *bbs = get_loop_body (loop);
5108   auto_vec<data_ref_loc, 3> references;
5109
5110   for (unsigned i = 0; i < loop->num_nodes; i++)
5111     {
5112       basic_block bb = bbs[i];
5113       gimple_stmt_iterator bsi;
5114
5115       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5116         {
5117           gimple *stmt = gsi_stmt (bsi);
5118           get_references_in_stmt (stmt, &references);
5119           if (references.length ())
5120             {
5121               free (bbs);
5122               return true;
5123             }
5124         }
5125     }
5126   free (bbs);
5127   return false;
5128 }
5129
5130 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5131    reference, returns false, otherwise returns true.  NEST is the outermost
5132    loop of the loop nest in which the references should be analyzed.  */
5133
5134 opt_result
5135 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5136                               vec<data_reference_p> *datarefs)
5137 {
5138   unsigned i;
5139   auto_vec<data_ref_loc, 2> references;
5140   data_ref_loc *ref;
5141   data_reference_p dr;
5142
5143   if (get_references_in_stmt (stmt, &references))
5144     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5145                                    stmt);
5146
5147   FOR_EACH_VEC_ELT (references, i, ref)
5148     {
5149       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5150                             loop_containing_stmt (stmt), ref->ref,
5151                             stmt, ref->is_read, ref->is_conditional_in_stmt);
5152       gcc_assert (dr != NULL);
5153       datarefs->safe_push (dr);
5154     }
5155
5156   return opt_result::success ();
5157 }
5158
5159 /* Stores the data references in STMT to DATAREFS.  If there is an
5160    unanalyzable reference, returns false, otherwise returns true.
5161    NEST is the outermost loop of the loop nest in which the references
5162    should be instantiated, LOOP is the loop in which the references
5163    should be analyzed.  */
5164
5165 bool
5166 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5167                                        vec<data_reference_p> *datarefs)
5168 {
5169   unsigned i;
5170   auto_vec<data_ref_loc, 2> references;
5171   data_ref_loc *ref;
5172   bool ret = true;
5173   data_reference_p dr;
5174
5175   if (get_references_in_stmt (stmt, &references))
5176     return false;
5177
5178   FOR_EACH_VEC_ELT (references, i, ref)
5179     {
5180       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5181                             ref->is_conditional_in_stmt);
5182       gcc_assert (dr != NULL);
5183       datarefs->safe_push (dr);
5184     }
5185
5186   return ret;
5187 }
5188
5189 /* Search the data references in LOOP, and record the information into
5190    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5191    difficult case, returns NULL_TREE otherwise.  */
5192
5193 tree
5194 find_data_references_in_bb (class loop *loop, basic_block bb,
5195                             vec<data_reference_p> *datarefs)
5196 {
5197   gimple_stmt_iterator bsi;
5198
5199   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5200     {
5201       gimple *stmt = gsi_stmt (bsi);
5202
5203       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5204         {
5205           struct data_reference *res;
5206           res = XCNEW (struct data_reference);
5207           datarefs->safe_push (res);
5208
5209           return chrec_dont_know;
5210         }
5211     }
5212
5213   return NULL_TREE;
5214 }
5215
5216 /* Search the data references in LOOP, and record the information into
5217    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5218    difficult case, returns NULL_TREE otherwise.
5219
5220    TODO: This function should be made smarter so that it can handle address
5221    arithmetic as if they were array accesses, etc.  */
5222
5223 tree
5224 find_data_references_in_loop (class loop *loop,
5225                               vec<data_reference_p> *datarefs)
5226 {
5227   basic_block bb, *bbs;
5228   unsigned int i;
5229
5230   bbs = get_loop_body_in_dom_order (loop);
5231
5232   for (i = 0; i < loop->num_nodes; i++)
5233     {
5234       bb = bbs[i];
5235
5236       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5237         {
5238           free (bbs);
5239           return chrec_dont_know;
5240         }
5241     }
5242   free (bbs);
5243
5244   return NULL_TREE;
5245 }
5246
5247 /* Return the alignment in bytes that DRB is guaranteed to have at all
5248    times.  */
5249
5250 unsigned int
5251 dr_alignment (innermost_loop_behavior *drb)
5252 {
5253   /* Get the alignment of BASE_ADDRESS + INIT.  */
5254   unsigned int alignment = drb->base_alignment;
5255   unsigned int misalignment = (drb->base_misalignment
5256                                + TREE_INT_CST_LOW (drb->init));
5257   if (misalignment != 0)
5258     alignment = MIN (alignment, misalignment & -misalignment);
5259
5260   /* Cap it to the alignment of OFFSET.  */
5261   if (!integer_zerop (drb->offset))
5262     alignment = MIN (alignment, drb->offset_alignment);
5263
5264   /* Cap it to the alignment of STEP.  */
5265   if (!integer_zerop (drb->step))
5266     alignment = MIN (alignment, drb->step_alignment);
5267
5268   return alignment;
5269 }
5270
5271 /* If BASE is a pointer-typed SSA name, try to find the object that it
5272    is based on.  Return this object X on success and store the alignment
5273    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
5274
5275 static tree
5276 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5277 {
5278   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5279     return NULL_TREE;
5280
5281   gimple *def = SSA_NAME_DEF_STMT (base);
5282   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5283
5284   /* Peel chrecs and record the minimum alignment preserved by
5285      all steps.  */
5286   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5287   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5288     {
5289       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5290       alignment = MIN (alignment, step_alignment);
5291       base = CHREC_LEFT (base);
5292     }
5293
5294   /* Punt if the expression is too complicated to handle.  */
5295   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5296     return NULL_TREE;
5297
5298   /* The only useful cases are those for which a dereference folds to something
5299      other than an INDIRECT_REF.  */
5300   tree ref_type = TREE_TYPE (TREE_TYPE (base));
5301   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5302   if (!ref)
5303     return NULL_TREE;
5304
5305   /* Analyze the base to which the steps we peeled were applied.  */
5306   poly_int64 bitsize, bitpos, bytepos;
5307   machine_mode mode;
5308   int unsignedp, reversep, volatilep;
5309   tree offset;
5310   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5311                               &unsignedp, &reversep, &volatilep);
5312   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5313     return NULL_TREE;
5314
5315   /* Restrict the alignment to that guaranteed by the offsets.  */
5316   unsigned int bytepos_alignment = known_alignment (bytepos);
5317   if (bytepos_alignment != 0)
5318     alignment = MIN (alignment, bytepos_alignment);
5319   if (offset)
5320     {
5321       unsigned int offset_alignment = highest_pow2_factor (offset);
5322       alignment = MIN (alignment, offset_alignment);
5323     }
5324
5325   *alignment_out = alignment;
5326   return base;
5327 }
5328
5329 /* Return the object whose alignment would need to be changed in order
5330    to increase the alignment of ADDR.  Store the maximum achievable
5331    alignment in *MAX_ALIGNMENT.  */
5332
5333 tree
5334 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5335 {
5336   tree base = get_base_for_alignment_1 (addr, max_alignment);
5337   if (base)
5338     return base;
5339
5340   if (TREE_CODE (addr) == ADDR_EXPR)
5341     addr = TREE_OPERAND (addr, 0);
5342   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5343   return addr;
5344 }
5345
5346 /* Recursive helper function.  */
5347
5348 static bool
5349 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
5350 {
5351   /* Inner loops of the nest should not contain siblings.  Example:
5352      when there are two consecutive loops,
5353
5354      | loop_0
5355      |   loop_1
5356      |     A[{0, +, 1}_1]
5357      |   endloop_1
5358      |   loop_2
5359      |     A[{0, +, 1}_2]
5360      |   endloop_2
5361      | endloop_0
5362
5363      the dependence relation cannot be captured by the distance
5364      abstraction.  */
5365   if (loop->next)
5366     return false;
5367
5368   loop_nest->safe_push (loop);
5369   if (loop->inner)
5370     return find_loop_nest_1 (loop->inner, loop_nest);
5371   return true;
5372 }
5373
5374 /* Return false when the LOOP is not well nested.  Otherwise return
5375    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5376    contain the loops from the outermost to the innermost, as they will
5377    appear in the classic distance vector.  */
5378
5379 bool
5380 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
5381 {
5382   loop_nest->safe_push (loop);
5383   if (loop->inner)
5384     return find_loop_nest_1 (loop->inner, loop_nest);
5385   return true;
5386 }
5387
5388 /* Returns true when the data dependences have been computed, false otherwise.
5389    Given a loop nest LOOP, the following vectors are returned:
5390    DATAREFS is initialized to all the array elements contained in this loop,
5391    DEPENDENCE_RELATIONS contains the relations between the data references.
5392    Compute read-read and self relations if
5393    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5394
5395 bool
5396 compute_data_dependences_for_loop (class loop *loop,
5397                                    bool compute_self_and_read_read_dependences,
5398                                    vec<loop_p> *loop_nest,
5399                                    vec<data_reference_p> *datarefs,
5400                                    vec<ddr_p> *dependence_relations)
5401 {
5402   bool res = true;
5403
5404   memset (&dependence_stats, 0, sizeof (dependence_stats));
5405
5406   /* If the loop nest is not well formed, or one of the data references
5407      is not computable, give up without spending time to compute other
5408      dependences.  */
5409   if (!loop
5410       || !find_loop_nest (loop, loop_nest)
5411       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5412       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5413                                    compute_self_and_read_read_dependences))
5414     res = false;
5415
5416   if (dump_file && (dump_flags & TDF_STATS))
5417     {
5418       fprintf (dump_file, "Dependence tester statistics:\n");
5419
5420       fprintf (dump_file, "Number of dependence tests: %d\n",
5421                dependence_stats.num_dependence_tests);
5422       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5423                dependence_stats.num_dependence_dependent);
5424       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5425                dependence_stats.num_dependence_independent);
5426       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5427                dependence_stats.num_dependence_undetermined);
5428
5429       fprintf (dump_file, "Number of subscript tests: %d\n",
5430                dependence_stats.num_subscript_tests);
5431       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5432                dependence_stats.num_subscript_undetermined);
5433       fprintf (dump_file, "Number of same subscript function: %d\n",
5434                dependence_stats.num_same_subscript_function);
5435
5436       fprintf (dump_file, "Number of ziv tests: %d\n",
5437                dependence_stats.num_ziv);
5438       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5439                dependence_stats.num_ziv_dependent);
5440       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5441                dependence_stats.num_ziv_independent);
5442       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5443                dependence_stats.num_ziv_unimplemented);
5444
5445       fprintf (dump_file, "Number of siv tests: %d\n",
5446                dependence_stats.num_siv);
5447       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5448                dependence_stats.num_siv_dependent);
5449       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5450                dependence_stats.num_siv_independent);
5451       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5452                dependence_stats.num_siv_unimplemented);
5453
5454       fprintf (dump_file, "Number of miv tests: %d\n",
5455                dependence_stats.num_miv);
5456       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5457                dependence_stats.num_miv_dependent);
5458       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5459                dependence_stats.num_miv_independent);
5460       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5461                dependence_stats.num_miv_unimplemented);
5462     }
5463
5464   return res;
5465 }
5466
5467 /* Free the memory used by a data dependence relation DDR.  */
5468
5469 void
5470 free_dependence_relation (struct data_dependence_relation *ddr)
5471 {
5472   if (ddr == NULL)
5473     return;
5474
5475   if (DDR_SUBSCRIPTS (ddr).exists ())
5476     free_subscripts (DDR_SUBSCRIPTS (ddr));
5477   DDR_DIST_VECTS (ddr).release ();
5478   DDR_DIR_VECTS (ddr).release ();
5479
5480   free (ddr);
5481 }
5482
5483 /* Free the memory used by the data dependence relations from
5484    DEPENDENCE_RELATIONS.  */
5485
5486 void
5487 free_dependence_relations (vec<ddr_p> dependence_relations)
5488 {
5489   unsigned int i;
5490   struct data_dependence_relation *ddr;
5491
5492   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5493     if (ddr)
5494       free_dependence_relation (ddr);
5495
5496   dependence_relations.release ();
5497 }
5498
5499 /* Free the memory used by the data references from DATAREFS.  */
5500
5501 void
5502 free_data_refs (vec<data_reference_p> datarefs)
5503 {
5504   unsigned int i;
5505   struct data_reference *dr;
5506
5507   FOR_EACH_VEC_ELT (datarefs, i, dr)
5508     free_data_ref (dr);
5509   datarefs.release ();
5510 }
5511
5512 /* Common routine implementing both dr_direction_indicator and
5513    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5514    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5515    Return the step as the indicator otherwise.  */
5516
5517 static tree
5518 dr_step_indicator (struct data_reference *dr, int useful_min)
5519 {
5520   tree step = DR_STEP (dr);
5521   if (!step)
5522     return NULL_TREE;
5523   STRIP_NOPS (step);
5524   /* Look for cases where the step is scaled by a positive constant
5525      integer, which will often be the access size.  If the multiplication
5526      doesn't change the sign (due to overflow effects) then we can
5527      test the unscaled value instead.  */
5528   if (TREE_CODE (step) == MULT_EXPR
5529       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5530       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5531     {
5532       tree factor = TREE_OPERAND (step, 1);
5533       step = TREE_OPERAND (step, 0);
5534
5535       /* Strip widening and truncating conversions as well as nops.  */
5536       if (CONVERT_EXPR_P (step)
5537           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5538         step = TREE_OPERAND (step, 0);
5539       tree type = TREE_TYPE (step);
5540
5541       /* Get the range of step values that would not cause overflow.  */
5542       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5543                          / wi::to_widest (factor));
5544       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5545                          / wi::to_widest (factor));
5546
5547       /* Get the range of values that the unconverted step actually has.  */
5548       wide_int step_min, step_max;
5549       if (TREE_CODE (step) != SSA_NAME
5550           || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5551         {
5552           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5553           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5554         }
5555
5556       /* Check whether the unconverted step has an acceptable range.  */
5557       signop sgn = TYPE_SIGN (type);
5558       if (wi::les_p (minv, widest_int::from (step_min, sgn))
5559           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5560         {
5561           if (wi::ge_p (step_min, useful_min, sgn))
5562             return ssize_int (useful_min);
5563           else if (wi::lt_p (step_max, 0, sgn))
5564             return ssize_int (-1);
5565           else
5566             return fold_convert (ssizetype, step);
5567         }
5568     }
5569   return DR_STEP (dr);
5570 }
5571
5572 /* Return a value that is negative iff DR has a negative step.  */
5573
5574 tree
5575 dr_direction_indicator (struct data_reference *dr)
5576 {
5577   return dr_step_indicator (dr, 0);
5578 }
5579
5580 /* Return a value that is zero iff DR has a zero step.  */
5581
5582 tree
5583 dr_zero_step_indicator (struct data_reference *dr)
5584 {
5585   return dr_step_indicator (dr, 1);
5586 }
5587
5588 /* Return true if DR is known to have a nonnegative (but possibly zero)
5589    step.  */
5590
5591 bool
5592 dr_known_forward_stride_p (struct data_reference *dr)
5593 {
5594   tree indicator = dr_direction_indicator (dr);
5595   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5596                                    fold_convert (ssizetype, indicator),
5597                                    ssize_int (0));
5598   return neg_step_val && integer_zerop (neg_step_val);
5599 }