gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "builtins.h"
  97 #include "tree-eh.h"
  98 #include "ssa.h"
  99 #include "internal-fn.h"
 100 #include "vr-values.h"
 101 #include "range-op.h"
 102
 103 static struct datadep_stats
 104 {
 105   int num_dependence_tests;
 106   int num_dependence_dependent;
 107   int num_dependence_independent;
 108   int num_dependence_undetermined;
 109
 110   int num_subscript_tests;
 111   int num_subscript_undetermined;
 112   int num_same_subscript_function;
 113
 114   int num_ziv;
 115   int num_ziv_independent;
 116   int num_ziv_dependent;
 117   int num_ziv_unimplemented;
 118
 119   int num_siv;
 120   int num_siv_independent;
 121   int num_siv_dependent;
 122   int num_siv_unimplemented;
 123
 124   int num_miv;
 125   int num_miv_independent;
 126   int num_miv_dependent;
 127   int num_miv_unimplemented;
 128 } dependence_stats;
 129
 130 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 131                                            unsigned int, unsigned int,
 132                                            class loop *);
 133 /* Returns true iff A divides B.  */
 134
 135 static inline bool
 136 tree_fold_divides_p (const_tree a, const_tree b)
 137 {
 138   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 139   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 140   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 141 }
 142
 143 /* Returns true iff A divides B.  */
 144
 145 static inline bool
 146 int_divides_p (lambda_int a, lambda_int b)
 147 {
 148   return ((b % a) == 0);
 149 }
 150
 151 /* Return true if reference REF contains a union access.  */
 152
 153 static bool
 154 ref_contains_union_access_p (tree ref)
 155 {
 156   while (handled_component_p (ref))
 157     {
 158       ref = TREE_OPERAND (ref, 0);
 159       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 160           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 161         return true;
 162     }
 163   return false;
 164 }
 165
 166 \f
 167
 168 /* Dump into FILE all the data references from DATAREFS.  */
 169
 170 static void
 171 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 172 {
 173   for (data_reference *dr : datarefs)
 174     dump_data_reference (file, dr);
 175 }
 176
 177 /* Unified dump into FILE all the data references from DATAREFS.  */
 178
 179 DEBUG_FUNCTION void
 180 debug (vec<data_reference_p> &ref)
 181 {
 182   dump_data_references (stderr, ref);
 183 }
 184
 185 DEBUG_FUNCTION void
 186 debug (vec<data_reference_p> *ptr)
 187 {
 188   if (ptr)
 189     debug (*ptr);
 190   else
 191     fprintf (stderr, "<nil>\n");
 192 }
 193
 194
 195 /* Dump into STDERR all the data references from DATAREFS.  */
 196
 197 DEBUG_FUNCTION void
 198 debug_data_references (vec<data_reference_p> datarefs)
 199 {
 200   dump_data_references (stderr, datarefs);
 201 }
 202
 203 /* Print to STDERR the data_reference DR.  */
 204
 205 DEBUG_FUNCTION void
 206 debug_data_reference (struct data_reference *dr)
 207 {
 208   dump_data_reference (stderr, dr);
 209 }
 210
 211 /* Dump function for a DATA_REFERENCE structure.  */
 212
 213 void
 214 dump_data_reference (FILE *outf,
 215                      struct data_reference *dr)
 216 {
 217   unsigned int i;
 218
 219   fprintf (outf, "#(Data Ref: \n");
 220   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 221   fprintf (outf, "#  stmt: ");
 222   print_gimple_stmt (outf, DR_STMT (dr), 0);
 223   fprintf (outf, "#  ref: ");
 224   print_generic_stmt (outf, DR_REF (dr));
 225   fprintf (outf, "#  base_object: ");
 226   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 227
 228   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 229     {
 230       fprintf (outf, "#  Access function %d: ", i);
 231       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 232     }
 233   fprintf (outf, "#)\n");
 234 }
 235
 236 /* Unified dump function for a DATA_REFERENCE structure.  */
 237
 238 DEBUG_FUNCTION void
 239 debug (data_reference &ref)
 240 {
 241   dump_data_reference (stderr, &ref);
 242 }
 243
 244 DEBUG_FUNCTION void
 245 debug (data_reference *ptr)
 246 {
 247   if (ptr)
 248     debug (*ptr);
 249   else
 250     fprintf (stderr, "<nil>\n");
 251 }
 252
 253
 254 /* Dumps the affine function described by FN to the file OUTF.  */
 255
 256 DEBUG_FUNCTION void
 257 dump_affine_function (FILE *outf, affine_fn fn)
 258 {
 259   unsigned i;
 260   tree coef;
 261
 262   print_generic_expr (outf, fn[0], TDF_SLIM);
 263   for (i = 1; fn.iterate (i, &coef); i++)
 264     {
 265       fprintf (outf, " + ");
 266       print_generic_expr (outf, coef, TDF_SLIM);
 267       fprintf (outf, " * x_%u", i);
 268     }
 269 }
 270
 271 /* Dumps the conflict function CF to the file OUTF.  */
 272
 273 DEBUG_FUNCTION void
 274 dump_conflict_function (FILE *outf, conflict_function *cf)
 275 {
 276   unsigned i;
 277
 278   if (cf->n == NO_DEPENDENCE)
 279     fprintf (outf, "no dependence");
 280   else if (cf->n == NOT_KNOWN)
 281     fprintf (outf, "not known");
 282   else
 283     {
 284       for (i = 0; i < cf->n; i++)
 285         {
 286           if (i != 0)
 287             fprintf (outf, " ");
 288           fprintf (outf, "[");
 289           dump_affine_function (outf, cf->fns[i]);
 290           fprintf (outf, "]");
 291         }
 292     }
 293 }
 294
 295 /* Dump function for a SUBSCRIPT structure.  */
 296
 297 DEBUG_FUNCTION void
 298 dump_subscript (FILE *outf, struct subscript *subscript)
 299 {
 300   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 301
 302   fprintf (outf, "\n (subscript \n");
 303   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 304   dump_conflict_function (outf, cf);
 305   if (CF_NONTRIVIAL_P (cf))
 306     {
 307       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 308       fprintf (outf, "\n  last_conflict: ");
 309       print_generic_expr (outf, last_iteration);
 310     }
 311
 312   cf = SUB_CONFLICTS_IN_B (subscript);
 313   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 314   dump_conflict_function (outf, cf);
 315   if (CF_NONTRIVIAL_P (cf))
 316     {
 317       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 318       fprintf (outf, "\n  last_conflict: ");
 319       print_generic_expr (outf, last_iteration);
 320     }
 321
 322   fprintf (outf, "\n  (Subscript distance: ");
 323   print_generic_expr (outf, SUB_DISTANCE (subscript));
 324   fprintf (outf, " ))\n");
 325 }
 326
 327 /* Print the classic direction vector DIRV to OUTF.  */
 328
 329 DEBUG_FUNCTION void
 330 print_direction_vector (FILE *outf,
 331                         lambda_vector dirv,
 332                         int length)
 333 {
 334   int eq;
 335
 336   for (eq = 0; eq < length; eq++)
 337     {
 338       enum data_dependence_direction dir = ((enum data_dependence_direction)
 339                                             dirv[eq]);
 340
 341       switch (dir)
 342         {
 343         case dir_positive:
 344           fprintf (outf, "    +");
 345           break;
 346         case dir_negative:
 347           fprintf (outf, "    -");
 348           break;
 349         case dir_equal:
 350           fprintf (outf, "    =");
 351           break;
 352         case dir_positive_or_equal:
 353           fprintf (outf, "   +=");
 354           break;
 355         case dir_positive_or_negative:
 356           fprintf (outf, "   +-");
 357           break;
 358         case dir_negative_or_equal:
 359           fprintf (outf, "   -=");
 360           break;
 361         case dir_star:
 362           fprintf (outf, "    *");
 363           break;
 364         default:
 365           fprintf (outf, "indep");
 366           break;
 367         }
 368     }
 369   fprintf (outf, "\n");
 370 }
 371
 372 /* Print a vector of direction vectors.  */
 373
 374 DEBUG_FUNCTION void
 375 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 376                    int length)
 377 {
 378   for (lambda_vector v : dir_vects)
 379     print_direction_vector (outf, v, length);
 380 }
 381
 382 /* Print out a vector VEC of length N to OUTFILE.  */
 383
 384 DEBUG_FUNCTION void
 385 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 386 {
 387   int i;
 388
 389   for (i = 0; i < n; i++)
 390     fprintf (outfile, "%3d ", (int)vector[i]);
 391   fprintf (outfile, "\n");
 392 }
 393
 394 /* Print a vector of distance vectors.  */
 395
 396 DEBUG_FUNCTION void
 397 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 398                     int length)
 399 {
 400   for (lambda_vector v : dist_vects)
 401     print_lambda_vector (outf, v, length);
 402 }
 403
 404 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 405
 406 DEBUG_FUNCTION void
 407 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
 408 {
 409   struct data_reference *dra, *drb;
 410
 411   fprintf (outf, "(Data Dep: \n");
 412
 413   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 414     {
 415       if (ddr)
 416         {
 417           dra = DDR_A (ddr);
 418           drb = DDR_B (ddr);
 419           if (dra)
 420             dump_data_reference (outf, dra);
 421           else
 422             fprintf (outf, "    (nil)\n");
 423           if (drb)
 424             dump_data_reference (outf, drb);
 425           else
 426             fprintf (outf, "    (nil)\n");
 427         }
 428       fprintf (outf, "    (don't know)\n)\n");
 429       return;
 430     }
 431
 432   dra = DDR_A (ddr);
 433   drb = DDR_B (ddr);
 434   dump_data_reference (outf, dra);
 435   dump_data_reference (outf, drb);
 436
 437   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 438     fprintf (outf, "    (no dependence)\n");
 439
 440   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 441     {
 442       unsigned int i;
 443       class loop *loopi;
 444
 445       subscript *sub;
 446       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 447         {
 448           fprintf (outf, "  access_fn_A: ");
 449           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 450           fprintf (outf, "  access_fn_B: ");
 451           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 452           dump_subscript (outf, sub);
 453         }
 454
 455       fprintf (outf, "  loop nest: (");
 456       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 457         fprintf (outf, "%d ", loopi->num);
 458       fprintf (outf, ")\n");
 459
 460       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 461         {
 462           fprintf (outf, "  distance_vector: ");
 463           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 464                                DDR_NB_LOOPS (ddr));
 465         }
 466
 467       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 468         {
 469           fprintf (outf, "  direction_vector: ");
 470           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 471                                   DDR_NB_LOOPS (ddr));
 472         }
 473     }
 474
 475   fprintf (outf, ")\n");
 476 }
 477
 478 /* Debug version.  */
 479
 480 DEBUG_FUNCTION void
 481 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
 482 {
 483   dump_data_dependence_relation (stderr, ddr);
 484 }
 485
 486 /* Dump into FILE all the dependence relations from DDRS.  */
 487
 488 DEBUG_FUNCTION void
 489 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
 490 {
 491   for (auto ddr : ddrs)
 492     dump_data_dependence_relation (file, ddr);
 493 }
 494
 495 DEBUG_FUNCTION void
 496 debug (vec<ddr_p> &ref)
 497 {
 498   dump_data_dependence_relations (stderr, ref);
 499 }
 500
 501 DEBUG_FUNCTION void
 502 debug (vec<ddr_p> *ptr)
 503 {
 504   if (ptr)
 505     debug (*ptr);
 506   else
 507     fprintf (stderr, "<nil>\n");
 508 }
 509
 510
 511 /* Dump to STDERR all the dependence relations from DDRS.  */
 512
 513 DEBUG_FUNCTION void
 514 debug_data_dependence_relations (vec<ddr_p> ddrs)
 515 {
 516   dump_data_dependence_relations (stderr, ddrs);
 517 }
 518
 519 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 520    the dependence relations, and VECT_SIZE is the size of the
 521    dependence vectors, or in other words the number of loops in the
 522    considered nest.  */
 523
 524 DEBUG_FUNCTION void
 525 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 526 {
 527   for (data_dependence_relation *ddr : ddrs)
 528     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 529       {
 530         for (lambda_vector v : DDR_DIST_VECTS (ddr))
 531           {
 532             fprintf (file, "DISTANCE_V (");
 533             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 534             fprintf (file, ")\n");
 535           }
 536
 537         for (lambda_vector v : DDR_DIR_VECTS (ddr))
 538           {
 539             fprintf (file, "DIRECTION_V (");
 540             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 541             fprintf (file, ")\n");
 542           }
 543       }
 544
 545   fprintf (file, "\n\n");
 546 }
 547
 548 /* Dumps the data dependence relations DDRS in FILE.  */
 549
 550 DEBUG_FUNCTION void
 551 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 552 {
 553   for (data_dependence_relation *ddr : ddrs)
 554     dump_data_dependence_relation (file, ddr);
 555
 556   fprintf (file, "\n\n");
 557 }
 558
 559 DEBUG_FUNCTION void
 560 debug_ddrs (vec<ddr_p> ddrs)
 561 {
 562   dump_ddrs (stderr, ddrs);
 563 }
 564
 565 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
 566    OP0 CODE OP1, where:
 567
 568    - OP0 CODE OP1 has integral type TYPE
 569    - the range of OP0 is given by OP0_RANGE and
 570    - the range of OP1 is given by OP1_RANGE.
 571
 572    Independently of RESULT_RANGE, try to compute:
 573
 574      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
 575              - (sizetype) (OP0 CODE OP1)
 576
 577    as a constant and subtract DELTA from the ssizetype constant in *OFF.
 578    Return true on success, or false if DELTA is not known at compile time.
 579
 580    Truncation and sign changes are known to distribute over CODE, i.e.
 581
 582      (itype) (A CODE B) == (itype) A CODE (itype) B
 583
 584    for any integral type ITYPE whose precision is no greater than the
 585    precision of A and B.  */
 586
 587 static bool
 588 compute_distributive_range (tree type, value_range &op0_range,
 589                             tree_code code, value_range &op1_range,
 590                             tree *off, value_range *result_range)
 591 {
 592   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
 593   if (result_range)
 594     {
 595       range_operator *op = range_op_handler (code, type);
 596       op->fold_range (*result_range, type, op0_range, op1_range);
 597     }
 598
 599   /* The distributive property guarantees that if TYPE is no narrower
 600      than SIZETYPE,
 601
 602        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
 603
 604      and so we can treat DELTA as zero.  */
 605   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
 606     return true;
 607
 608   /* If overflow is undefined, we can assume that:
 609
 610        X == (ssizetype) OP0 CODE (ssizetype) OP1
 611
 612      is within the range of TYPE, i.e.:
 613
 614        X == (ssizetype) (TYPE) X
 615
 616      Distributing the (TYPE) truncation over X gives:
 617
 618        X == (ssizetype) (OP0 CODE OP1)
 619
 620      Casting both sides to sizetype and distributing the sizetype cast
 621      over X gives:
 622
 623        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
 624
 625      and so we can treat DELTA as zero.  */
 626   if (TYPE_OVERFLOW_UNDEFINED (type))
 627     return true;
 628
 629   /* Compute the range of:
 630
 631        (ssizetype) OP0 CODE (ssizetype) OP1
 632
 633      The distributive property guarantees that this has the same bitpattern as:
 634
 635        (sizetype) OP0 CODE (sizetype) OP1
 636
 637      but its range is more conducive to analysis.  */
 638   range_cast (op0_range, ssizetype);
 639   range_cast (op1_range, ssizetype);
 640   value_range wide_range;
 641   range_operator *op = range_op_handler (code, ssizetype);
 642   bool saved_flag_wrapv = flag_wrapv;
 643   flag_wrapv = 1;
 644   op->fold_range (wide_range, ssizetype, op0_range, op1_range);
 645   flag_wrapv = saved_flag_wrapv;
 646   if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
 647     return false;
 648
 649   wide_int lb = wide_range.lower_bound ();
 650   wide_int ub = wide_range.upper_bound ();
 651
 652   /* Calculate the number of times that each end of the range overflows or
 653      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
 654   unsigned int precision = TYPE_PRECISION (type);
 655   if (!TYPE_UNSIGNED (type))
 656     {
 657       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
 658       lb -= type_min;
 659       ub -= type_min;
 660     }
 661   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
 662   lb &= upper_bits;
 663   ub &= upper_bits;
 664   if (lb != ub)
 665     return false;
 666
 667   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
 668      negative values indicating underflow.  The low PRECISION bits of LB
 669      are clear, so DELTA is therefore LB (== UB).  */
 670   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
 671   return true;
 672 }
 673
 674 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
 675    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
 676    FROM_TYPE are integral types.  */
 677
 678 static bool
 679 nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
 680 {
 681   gcc_assert (INTEGRAL_TYPE_P (to_type)
 682               && INTEGRAL_TYPE_P (from_type)
 683               && !TYPE_OVERFLOW_TRAPS (to_type)
 684               && !TYPE_OVERFLOW_TRAPS (from_type));
 685
 686   /* Converting to something no narrower than sizetype and then to sizetype
 687      is equivalent to converting directly to sizetype.  */
 688   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
 689     return true;
 690
 691   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
 692   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
 693       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
 694     return true;
 695
 696   /* For narrowing conversions, we could in principle test whether
 697      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
 698      and apply a constant adjustment.
 699
 700      For other conversions (which involve a sign change) we could
 701      check that the signs are always equal, and apply a constant
 702      adjustment if the signs are negative.
 703
 704      However, both cases should be rare.  */
 705   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
 706                             TYPE_SIGN (to_type));
 707 }
 708
 709 static void
 710 split_constant_offset (tree type, tree *var, tree *off,
 711                        value_range *result_range,
 712                        hash_map<tree, std::pair<tree, tree> > &cache,
 713                        unsigned *limit);
 714
 715 /* Helper function for split_constant_offset.  If TYPE is a pointer type,
 716    try to express OP0 CODE OP1 as:
 717
 718      POINTER_PLUS <*VAR, (sizetype) *OFF>
 719
 720    where:
 721
 722    - *VAR has type TYPE
 723    - *OFF is a constant of type ssizetype.
 724
 725    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
 726
 727      *VAR + (sizetype) *OFF
 728
 729    where:
 730
 731    - *VAR has type sizetype
 732    - *OFF is a constant of type ssizetype.
 733
 734    In both cases, OP0 CODE OP1 has type TYPE.
 735
 736    Return true on success.  A false return value indicates that we can't
 737    do better than set *OFF to zero.
 738
 739    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
 740    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
 741
 742    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
 743    visited.  LIMIT counts down the number of SSA names that we are
 744    allowed to process before giving up.  */
 745
 746 static bool
 747 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 748                          tree *var, tree *off, value_range *result_range,
 749                          hash_map<tree, std::pair<tree, tree> > &cache,
 750                          unsigned *limit)
 751 {
 752   tree var0, var1;
 753   tree off0, off1;
 754   value_range op0_range, op1_range;
 755
 756   *var = NULL_TREE;
 757   *off = NULL_TREE;
 758
 759   switch (code)
 760     {
 761     case INTEGER_CST:
 762       *var = size_int (0);
 763       *off = fold_convert (ssizetype, op0);
 764       if (result_range)
 765         result_range->set (op0, op0);
 766       return true;
 767
 768     case POINTER_PLUS_EXPR:
 769       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
 770       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
 771       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
 772       *off = size_binop (PLUS_EXPR, off0, off1);
 773       return true;
 774
 775     case PLUS_EXPR:
 776     case MINUS_EXPR:
 777       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 778       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
 779       *off = size_binop (code, off0, off1);
 780       if (!compute_distributive_range (type, op0_range, code, op1_range,
 781                                        off, result_range))
 782         return false;
 783       *var = fold_build2 (code, sizetype, var0, var1);
 784       return true;
 785
 786     case MULT_EXPR:
 787       if (TREE_CODE (op1) != INTEGER_CST)
 788         return false;
 789
 790       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 791       op1_range.set (op1, op1);
 792       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 793       if (!compute_distributive_range (type, op0_range, code, op1_range,
 794                                        off, result_range))
 795         return false;
 796       *var = fold_build2 (MULT_EXPR, sizetype, var0,
 797                           fold_convert (sizetype, op1));
 798       return true;
 799
 800     case ADDR_EXPR:
 801       {
 802         tree base, poffset;
 803         poly_int64 pbitsize, pbitpos, pbytepos;
 804         machine_mode pmode;
 805         int punsignedp, preversep, pvolatilep;
 806
 807         op0 = TREE_OPERAND (op0, 0);
 808         base
 809           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 810                                  &punsignedp, &preversep, &pvolatilep);
 811
 812         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 813           return false;
 814         base = build_fold_addr_expr (base);
 815         off0 = ssize_int (pbytepos);
 816
 817         if (poffset)
 818           {
 819             split_constant_offset (poffset, &poffset, &off1, nullptr,
 820                                    cache, limit);
 821             off0 = size_binop (PLUS_EXPR, off0, off1);
 822             base = fold_build_pointer_plus (base, poffset);
 823           }
 824
 825         var0 = fold_convert (type, base);
 826
 827         /* If variable length types are involved, punt, otherwise casts
 828            might be converted into ARRAY_REFs in gimplify_conversion.
 829            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 830            possibly no longer appears in current GIMPLE, might resurface.
 831            This perhaps could run
 832            if (CONVERT_EXPR_P (var0))
 833              {
 834                gimplify_conversion (&var0);
 835                // Attempt to fill in any within var0 found ARRAY_REF's
 836                // element size from corresponding op embedded ARRAY_REF,
 837                // if unsuccessful, just punt.
 838              }  */
 839         while (POINTER_TYPE_P (type))
 840           type = TREE_TYPE (type);
 841         if (int_size_in_bytes (type) < 0)
 842           return false;
 843
 844         *var = var0;
 845         *off = off0;
 846         return true;
 847       }
 848
 849     case SSA_NAME:
 850       {
 851         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 852           return false;
 853
 854         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 855         enum tree_code subcode;
 856
 857         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 858           return false;
 859
 860         subcode = gimple_assign_rhs_code (def_stmt);
 861
 862         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 863         bool use_cache = false;
 864         if (!has_single_use (op0)
 865             && (subcode == POINTER_PLUS_EXPR
 866                 || subcode == PLUS_EXPR
 867                 || subcode == MINUS_EXPR
 868                 || subcode == MULT_EXPR
 869                 || subcode == ADDR_EXPR
 870                 || CONVERT_EXPR_CODE_P (subcode)))
 871           {
 872             use_cache = true;
 873             bool existed;
 874             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 875             if (existed)
 876               {
 877                 if (integer_zerop (e.second))
 878                   return false;
 879                 *var = e.first;
 880                 *off = e.second;
 881                 /* The caller sets the range in this case.  */
 882                 return true;
 883               }
 884             e = std::make_pair (op0, ssize_int (0));
 885           }
 886
 887         if (*limit == 0)
 888           return false;
 889         --*limit;
 890
 891         var0 = gimple_assign_rhs1 (def_stmt);
 892         var1 = gimple_assign_rhs2 (def_stmt);
 893
 894         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 895                                             var, off, nullptr, cache, limit);
 896         if (res && use_cache)
 897           *cache.get (op0) = std::make_pair (*var, *off);
 898         /* The caller sets the range in this case.  */
 899         return res;
 900       }
 901     CASE_CONVERT:
 902       {
 903         /* We can only handle the following conversions:
 904
 905            - Conversions from one pointer type to another pointer type.
 906
 907            - Conversions from one non-trapping integral type to another
 908              non-trapping integral type.  In this case, the recursive
 909              call makes sure that:
 910
 911                (sizetype) OP0
 912
 913              can be expressed as a sizetype operation involving VAR and OFF,
 914              and all we need to do is check whether:
 915
 916                (sizetype) OP0 == (sizetype) (TYPE) OP0
 917
 918            - Conversions from a non-trapping sizetype-size integral type to
 919              a like-sized pointer type.  In this case, the recursive call
 920              makes sure that:
 921
 922                (sizetype) OP0 == *VAR + (sizetype) *OFF
 923
 924              and we can convert that to:
 925
 926                POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
 927
 928            - Conversions from a sizetype-sized pointer type to a like-sized
 929              non-trapping integral type.  In this case, the recursive call
 930              makes sure that:
 931
 932                OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
 933
 934              where the POINTER_PLUS and *VAR have the same precision as
 935              TYPE (and the same precision as sizetype).  Then:
 936
 937                (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
 938         tree itype = TREE_TYPE (op0);
 939         if ((POINTER_TYPE_P (itype)
 940              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 941             && (POINTER_TYPE_P (type)
 942                 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
 943             && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
 944                 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
 945                     && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
 946           {
 947             if (POINTER_TYPE_P (type))
 948               {
 949                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 950                 *var = fold_convert (type, *var);
 951               }
 952             else if (POINTER_TYPE_P (itype))
 953               {
 954                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 955                 *var = fold_convert (sizetype, *var);
 956               }
 957             else
 958               {
 959                 split_constant_offset (op0, var, off, &op0_range,
 960                                        cache, limit);
 961                 if (!nop_conversion_for_offset_p (type, itype, op0_range))
 962                   return false;
 963                 if (result_range)
 964                   {
 965                     *result_range = op0_range;
 966                     range_cast (*result_range, type);
 967                   }
 968               }
 969             return true;
 970           }
 971         return false;
 972       }
 973
 974     default:
 975       return false;
 976     }
 977 }
 978
 979 /* If EXP has pointer type, try to express it as:
 980
 981      POINTER_PLUS <*VAR, (sizetype) *OFF>
 982
 983    where:
 984
 985    - *VAR has the same type as EXP
 986    - *OFF is a constant of type ssizetype.
 987
 988    If EXP has an integral type, try to express (sizetype) EXP as:
 989
 990      *VAR + (sizetype) *OFF
 991
 992    where:
 993
 994    - *VAR has type sizetype
 995    - *OFF is a constant of type ssizetype.
 996
 997    If EXP_RANGE is nonnull, set it to the range of EXP.
 998
 999    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1000    visited.  LIMIT counts down the number of SSA names that we are
1001    allowed to process before giving up.  */
1002
1003 static void
1004 split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1005                        hash_map<tree, std::pair<tree, tree> > &cache,
1006                        unsigned *limit)
1007 {
1008   tree type = TREE_TYPE (exp), op0, op1;
1009   enum tree_code code;
1010
1011   code = TREE_CODE (exp);
1012   if (exp_range)
1013     {
1014       *exp_range = type;
1015       if (code == SSA_NAME)
1016         {
1017           value_range vr;
1018           get_range_query (cfun)->range_of_expr (vr, exp);
1019           if (vr.undefined_p ())
1020             vr.set_varying (TREE_TYPE (exp));
1021           wide_int var_min = wi::to_wide (vr.min ());
1022           wide_int var_max = wi::to_wide (vr.max ());
1023           value_range_kind vr_kind = vr.kind ();
1024           wide_int var_nonzero = get_nonzero_bits (exp);
1025           vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1026                                                        &var_min, &var_max,
1027                                                        var_nonzero,
1028                                                        TYPE_SIGN (type));
1029           /* This check for VR_VARYING is here because the old code
1030              using get_range_info would return VR_RANGE for the entire
1031              domain, instead of VR_VARYING.  The new code normalizes
1032              full-domain ranges to VR_VARYING.  */
1033           if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1034             *exp_range = value_range (type, var_min, var_max);
1035         }
1036     }
1037
1038   if (!tree_is_chrec (exp)
1039       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1040     {
1041       extract_ops_from_tree (exp, &code, &op0, &op1);
1042       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1043                                    exp_range, cache, limit))
1044         return;
1045     }
1046
1047   *var = exp;
1048   if (INTEGRAL_TYPE_P (type))
1049     *var = fold_convert (sizetype, *var);
1050   *off = ssize_int (0);
1051
1052   value_range r;
1053   if (exp_range && code != SSA_NAME
1054       && get_range_query (cfun)->range_of_expr (r, exp)
1055       && !r.undefined_p ())
1056     *exp_range = r;
1057 }
1058
1059 /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1060    type as EXP while OFF has type ssizetype.  */
1061
1062 void
1063 split_constant_offset (tree exp, tree *var, tree *off)
1064 {
1065   unsigned limit = param_ssa_name_def_chain_limit;
1066   static hash_map<tree, std::pair<tree, tree> > *cache;
1067   if (!cache)
1068     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1069   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1070   *var = fold_convert (TREE_TYPE (exp), *var);
1071   cache->empty ();
1072 }
1073
1074 /* Returns the address ADDR of an object in a canonical shape (without nop
1075    casts, and with type of pointer to the object).  */
1076
1077 static tree
1078 canonicalize_base_object_address (tree addr)
1079 {
1080   tree orig = addr;
1081
1082   STRIP_NOPS (addr);
1083
1084   /* The base address may be obtained by casting from integer, in that case
1085      keep the cast.  */
1086   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1087     return orig;
1088
1089   if (TREE_CODE (addr) != ADDR_EXPR)
1090     return addr;
1091
1092   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1093 }
1094
1095 /* Analyze the behavior of memory reference REF within STMT.
1096    There are two modes:
1097
1098    - BB analysis.  In this case we simply split the address into base,
1099      init and offset components, without reference to any containing loop.
1100      The resulting base and offset are general expressions and they can
1101      vary arbitrarily from one iteration of the containing loop to the next.
1102      The step is always zero.
1103
1104    - loop analysis.  In this case we analyze the reference both wrt LOOP
1105      and on the basis that the reference occurs (is "used") in LOOP;
1106      see the comment above analyze_scalar_evolution_in_loop for more
1107      information about this distinction.  The base, init, offset and
1108      step fields are all invariant in LOOP.
1109
1110    Perform BB analysis if LOOP is null, or if LOOP is the function's
1111    dummy outermost loop.  In other cases perform loop analysis.
1112
1113    Return true if the analysis succeeded and store the results in DRB if so.
1114    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1115
1116 opt_result
1117 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1118                       class loop *loop, const gimple *stmt)
1119 {
1120   poly_int64 pbitsize, pbitpos;
1121   tree base, poffset;
1122   machine_mode pmode;
1123   int punsignedp, preversep, pvolatilep;
1124   affine_iv base_iv, offset_iv;
1125   tree init, dinit, step;
1126   bool in_loop = (loop && loop->num);
1127
1128   if (dump_file && (dump_flags & TDF_DETAILS))
1129     fprintf (dump_file, "analyze_innermost: ");
1130
1131   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1132                               &punsignedp, &preversep, &pvolatilep);
1133   gcc_assert (base != NULL_TREE);
1134
1135   poly_int64 pbytepos;
1136   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1137     return opt_result::failure_at (stmt,
1138                                    "failed: bit offset alignment.\n");
1139
1140   if (preversep)
1141     return opt_result::failure_at (stmt,
1142                                    "failed: reverse storage order.\n");
1143
1144   /* Calculate the alignment and misalignment for the inner reference.  */
1145   unsigned int HOST_WIDE_INT bit_base_misalignment;
1146   unsigned int bit_base_alignment;
1147   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1148
1149   /* There are no bitfield references remaining in BASE, so the values
1150      we got back must be whole bytes.  */
1151   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1152               && bit_base_misalignment % BITS_PER_UNIT == 0);
1153   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1154   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1155
1156   if (TREE_CODE (base) == MEM_REF)
1157     {
1158       if (!integer_zerop (TREE_OPERAND (base, 1)))
1159         {
1160           /* Subtract MOFF from the base and add it to POFFSET instead.
1161              Adjust the misalignment to reflect the amount we subtracted.  */
1162           poly_offset_int moff = mem_ref_offset (base);
1163           base_misalignment -= moff.force_shwi ();
1164           tree mofft = wide_int_to_tree (sizetype, moff);
1165           if (!poffset)
1166             poffset = mofft;
1167           else
1168             poffset = size_binop (PLUS_EXPR, poffset, mofft);
1169         }
1170       base = TREE_OPERAND (base, 0);
1171     }
1172   else
1173     base = build_fold_addr_expr (base);
1174
1175   if (in_loop)
1176     {
1177       if (!simple_iv (loop, loop, base, &base_iv, true))
1178         return opt_result::failure_at
1179           (stmt, "failed: evolution of base is not affine.\n");
1180     }
1181   else
1182     {
1183       base_iv.base = base;
1184       base_iv.step = ssize_int (0);
1185       base_iv.no_overflow = true;
1186     }
1187
1188   if (!poffset)
1189     {
1190       offset_iv.base = ssize_int (0);
1191       offset_iv.step = ssize_int (0);
1192     }
1193   else
1194     {
1195       if (!in_loop)
1196         {
1197           offset_iv.base = poffset;
1198           offset_iv.step = ssize_int (0);
1199         }
1200       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1201         return opt_result::failure_at
1202           (stmt, "failed: evolution of offset is not affine.\n");
1203     }
1204
1205   init = ssize_int (pbytepos);
1206
1207   /* Subtract any constant component from the base and add it to INIT instead.
1208      Adjust the misalignment to reflect the amount we subtracted.  */
1209   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1210   init = size_binop (PLUS_EXPR, init, dinit);
1211   base_misalignment -= TREE_INT_CST_LOW (dinit);
1212
1213   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1214   init = size_binop (PLUS_EXPR, init, dinit);
1215
1216   step = size_binop (PLUS_EXPR,
1217                      fold_convert (ssizetype, base_iv.step),
1218                      fold_convert (ssizetype, offset_iv.step));
1219
1220   base = canonicalize_base_object_address (base_iv.base);
1221
1222   /* See if get_pointer_alignment can guarantee a higher alignment than
1223      the one we calculated above.  */
1224   unsigned int HOST_WIDE_INT alt_misalignment;
1225   unsigned int alt_alignment;
1226   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1227
1228   /* As above, these values must be whole bytes.  */
1229   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1230               && alt_misalignment % BITS_PER_UNIT == 0);
1231   alt_alignment /= BITS_PER_UNIT;
1232   alt_misalignment /= BITS_PER_UNIT;
1233
1234   if (base_alignment < alt_alignment)
1235     {
1236       base_alignment = alt_alignment;
1237       base_misalignment = alt_misalignment;
1238     }
1239
1240   drb->base_address = base;
1241   drb->offset = fold_convert (ssizetype, offset_iv.base);
1242   drb->init = init;
1243   drb->step = step;
1244   if (known_misalignment (base_misalignment, base_alignment,
1245                           &drb->base_misalignment))
1246     drb->base_alignment = base_alignment;
1247   else
1248     {
1249       drb->base_alignment = known_alignment (base_misalignment);
1250       drb->base_misalignment = 0;
1251     }
1252   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1253   drb->step_alignment = highest_pow2_factor (step);
1254
1255   if (dump_file && (dump_flags & TDF_DETAILS))
1256     fprintf (dump_file, "success.\n");
1257
1258   return opt_result::success ();
1259 }
1260
1261 /* Return true if OP is a valid component reference for a DR access
1262    function.  This accepts a subset of what handled_component_p accepts.  */
1263
1264 static bool
1265 access_fn_component_p (tree op)
1266 {
1267   switch (TREE_CODE (op))
1268     {
1269     case REALPART_EXPR:
1270     case IMAGPART_EXPR:
1271     case ARRAY_REF:
1272       return true;
1273
1274     case COMPONENT_REF:
1275       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1276
1277     default:
1278       return false;
1279     }
1280 }
1281
1282 /* Returns whether BASE can have a access_fn_component_p with BASE
1283    as base.  */
1284
1285 static bool
1286 base_supports_access_fn_components_p (tree base)
1287 {
1288   switch (TREE_CODE (TREE_TYPE (base)))
1289     {
1290     case COMPLEX_TYPE:
1291     case ARRAY_TYPE:
1292     case RECORD_TYPE:
1293       return true;
1294     default:
1295       return false;
1296     }
1297 }
1298
1299 /* Determines the base object and the list of indices of memory reference
1300    DR, analyzed in LOOP and instantiated before NEST.  */
1301
1302 static void
1303 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1304 {
1305   vec<tree> access_fns = vNULL;
1306   tree ref, op;
1307   tree base, off, access_fn;
1308
1309   /* If analyzing a basic-block there are no indices to analyze
1310      and thus no access functions.  */
1311   if (!nest)
1312     {
1313       DR_BASE_OBJECT (dr) = DR_REF (dr);
1314       DR_ACCESS_FNS (dr).create (0);
1315       return;
1316     }
1317
1318   ref = DR_REF (dr);
1319
1320   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1321      into a two element array with a constant index.  The base is
1322      then just the immediate underlying object.  */
1323   if (TREE_CODE (ref) == REALPART_EXPR)
1324     {
1325       ref = TREE_OPERAND (ref, 0);
1326       access_fns.safe_push (integer_zero_node);
1327     }
1328   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1329     {
1330       ref = TREE_OPERAND (ref, 0);
1331       access_fns.safe_push (integer_one_node);
1332     }
1333
1334   /* Analyze access functions of dimensions we know to be independent.
1335      The list of component references handled here should be kept in
1336      sync with access_fn_component_p.  */
1337   while (handled_component_p (ref))
1338     {
1339       if (TREE_CODE (ref) == ARRAY_REF)
1340         {
1341           op = TREE_OPERAND (ref, 1);
1342           access_fn = analyze_scalar_evolution (loop, op);
1343           access_fn = instantiate_scev (nest, loop, access_fn);
1344           access_fns.safe_push (access_fn);
1345         }
1346       else if (TREE_CODE (ref) == COMPONENT_REF
1347                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1348         {
1349           /* For COMPONENT_REFs of records (but not unions!) use the
1350              FIELD_DECL offset as constant access function so we can
1351              disambiguate a[i].f1 and a[i].f2.  */
1352           tree off = component_ref_field_offset (ref);
1353           off = size_binop (PLUS_EXPR,
1354                             size_binop (MULT_EXPR,
1355                                         fold_convert (bitsizetype, off),
1356                                         bitsize_int (BITS_PER_UNIT)),
1357                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1358           access_fns.safe_push (off);
1359         }
1360       else
1361         /* If we have an unhandled component we could not translate
1362            to an access function stop analyzing.  We have determined
1363            our base object in this case.  */
1364         break;
1365
1366       ref = TREE_OPERAND (ref, 0);
1367     }
1368
1369   /* If the address operand of a MEM_REF base has an evolution in the
1370      analyzed nest, add it as an additional independent access-function.  */
1371   if (TREE_CODE (ref) == MEM_REF)
1372     {
1373       op = TREE_OPERAND (ref, 0);
1374       access_fn = analyze_scalar_evolution (loop, op);
1375       access_fn = instantiate_scev (nest, loop, access_fn);
1376       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1377         {
1378           tree orig_type;
1379           tree memoff = TREE_OPERAND (ref, 1);
1380           base = initial_condition (access_fn);
1381           orig_type = TREE_TYPE (base);
1382           STRIP_USELESS_TYPE_CONVERSION (base);
1383           split_constant_offset (base, &base, &off);
1384           STRIP_USELESS_TYPE_CONVERSION (base);
1385           /* Fold the MEM_REF offset into the evolutions initial
1386              value to make more bases comparable.  */
1387           if (!integer_zerop (memoff))
1388             {
1389               off = size_binop (PLUS_EXPR, off,
1390                                 fold_convert (ssizetype, memoff));
1391               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1392             }
1393           /* Adjust the offset so it is a multiple of the access type
1394              size and thus we separate bases that can possibly be used
1395              to produce partial overlaps (which the access_fn machinery
1396              cannot handle).  */
1397           wide_int rem;
1398           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1399               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1400               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1401             rem = wi::mod_trunc
1402               (wi::to_wide (off),
1403                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1404                SIGNED);
1405           else
1406             /* If we can't compute the remainder simply force the initial
1407                condition to zero.  */
1408             rem = wi::to_wide (off);
1409           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1410           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1411           /* And finally replace the initial condition.  */
1412           access_fn = chrec_replace_initial_condition
1413               (access_fn, fold_convert (orig_type, off));
1414           /* ???  This is still not a suitable base object for
1415              dr_may_alias_p - the base object needs to be an
1416              access that covers the object as whole.  With
1417              an evolution in the pointer this cannot be
1418              guaranteed.
1419              As a band-aid, mark the access so we can special-case
1420              it in dr_may_alias_p.  */
1421           tree old = ref;
1422           ref = fold_build2_loc (EXPR_LOCATION (ref),
1423                                  MEM_REF, TREE_TYPE (ref),
1424                                  base, memoff);
1425           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1426           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1427           DR_UNCONSTRAINED_BASE (dr) = true;
1428           access_fns.safe_push (access_fn);
1429         }
1430     }
1431   else if (DECL_P (ref))
1432     {
1433       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1434       ref = build2 (MEM_REF, TREE_TYPE (ref),
1435                     build_fold_addr_expr (ref),
1436                     build_int_cst (reference_alias_ptr_type (ref), 0));
1437     }
1438
1439   DR_BASE_OBJECT (dr) = ref;
1440   DR_ACCESS_FNS (dr) = access_fns;
1441 }
1442
1443 /* Extracts the alias analysis information from the memory reference DR.  */
1444
1445 static void
1446 dr_analyze_alias (struct data_reference *dr)
1447 {
1448   tree ref = DR_REF (dr);
1449   tree base = get_base_address (ref), addr;
1450
1451   if (INDIRECT_REF_P (base)
1452       || TREE_CODE (base) == MEM_REF)
1453     {
1454       addr = TREE_OPERAND (base, 0);
1455       if (TREE_CODE (addr) == SSA_NAME)
1456         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1457     }
1458 }
1459
1460 /* Frees data reference DR.  */
1461
1462 void
1463 free_data_ref (data_reference_p dr)
1464 {
1465   DR_ACCESS_FNS (dr).release ();
1466   free (dr);
1467 }
1468
1469 /* Analyze memory reference MEMREF, which is accessed in STMT.
1470    The reference is a read if IS_READ is true, otherwise it is a write.
1471    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1472    within STMT, i.e. that it might not occur even if STMT is executed
1473    and runs to completion.
1474
1475    Return the data_reference description of MEMREF.  NEST is the outermost
1476    loop in which the reference should be instantiated, LOOP is the loop
1477    in which the data reference should be analyzed.  */
1478
1479 struct data_reference *
1480 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1481                  bool is_read, bool is_conditional_in_stmt)
1482 {
1483   struct data_reference *dr;
1484
1485   if (dump_file && (dump_flags & TDF_DETAILS))
1486     {
1487       fprintf (dump_file, "Creating dr for ");
1488       print_generic_expr (dump_file, memref, TDF_SLIM);
1489       fprintf (dump_file, "\n");
1490     }
1491
1492   dr = XCNEW (struct data_reference);
1493   DR_STMT (dr) = stmt;
1494   DR_REF (dr) = memref;
1495   DR_IS_READ (dr) = is_read;
1496   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1497
1498   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1499                         nest != NULL ? loop : NULL, stmt);
1500   dr_analyze_indices (dr, nest, loop);
1501   dr_analyze_alias (dr);
1502
1503   if (dump_file && (dump_flags & TDF_DETAILS))
1504     {
1505       unsigned i;
1506       fprintf (dump_file, "\tbase_address: ");
1507       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1508       fprintf (dump_file, "\n\toffset from base address: ");
1509       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1510       fprintf (dump_file, "\n\tconstant offset from base address: ");
1511       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1512       fprintf (dump_file, "\n\tstep: ");
1513       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1514       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1515       fprintf (dump_file, "\n\tbase misalignment: %d",
1516                DR_BASE_MISALIGNMENT (dr));
1517       fprintf (dump_file, "\n\toffset alignment: %d",
1518                DR_OFFSET_ALIGNMENT (dr));
1519       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1520       fprintf (dump_file, "\n\tbase_object: ");
1521       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1522       fprintf (dump_file, "\n");
1523       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1524         {
1525           fprintf (dump_file, "\tAccess function %d: ", i);
1526           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1527         }
1528     }
1529
1530   return dr;
1531 }
1532
1533 /*  A helper function computes order between two tree expressions T1 and T2.
1534     This is used in comparator functions sorting objects based on the order
1535     of tree expressions.  The function returns -1, 0, or 1.  */
1536
1537 int
1538 data_ref_compare_tree (tree t1, tree t2)
1539 {
1540   int i, cmp;
1541   enum tree_code code;
1542   char tclass;
1543
1544   if (t1 == t2)
1545     return 0;
1546   if (t1 == NULL)
1547     return -1;
1548   if (t2 == NULL)
1549     return 1;
1550
1551   STRIP_USELESS_TYPE_CONVERSION (t1);
1552   STRIP_USELESS_TYPE_CONVERSION (t2);
1553   if (t1 == t2)
1554     return 0;
1555
1556   if (TREE_CODE (t1) != TREE_CODE (t2)
1557       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1558     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1559
1560   code = TREE_CODE (t1);
1561   switch (code)
1562     {
1563     case INTEGER_CST:
1564       return tree_int_cst_compare (t1, t2);
1565
1566     case STRING_CST:
1567       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1568         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1569       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1570                      TREE_STRING_LENGTH (t1));
1571
1572     case SSA_NAME:
1573       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1574         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1575       break;
1576
1577     default:
1578       if (POLY_INT_CST_P (t1))
1579         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1580                                        wi::to_poly_widest (t2));
1581
1582       tclass = TREE_CODE_CLASS (code);
1583
1584       /* For decls, compare their UIDs.  */
1585       if (tclass == tcc_declaration)
1586         {
1587           if (DECL_UID (t1) != DECL_UID (t2))
1588             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1589           break;
1590         }
1591       /* For expressions, compare their operands recursively.  */
1592       else if (IS_EXPR_CODE_CLASS (tclass))
1593         {
1594           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1595             {
1596               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1597                                            TREE_OPERAND (t2, i));
1598               if (cmp != 0)
1599                 return cmp;
1600             }
1601         }
1602       else
1603         gcc_unreachable ();
1604     }
1605
1606   return 0;
1607 }
1608
1609 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1610    check.  */
1611
1612 opt_result
1613 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1614 {
1615   if (dump_enabled_p ())
1616     dump_printf (MSG_NOTE,
1617                  "consider run-time aliasing test between %T and %T\n",
1618                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1619
1620   if (!speed_p)
1621     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1622                                    "runtime alias check not supported when"
1623                                    " optimizing for size.\n");
1624
1625   /* FORNOW: We don't support versioning with outer-loop in either
1626      vectorization or loop distribution.  */
1627   if (loop != NULL && loop->inner != NULL)
1628     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1629                                    "runtime alias check not supported for"
1630                                    " outer loop.\n");
1631
1632   return opt_result::success ();
1633 }
1634
1635 /* Operator == between two dr_with_seg_len objects.
1636
1637    This equality operator is used to make sure two data refs
1638    are the same one so that we will consider to combine the
1639    aliasing checks of those two pairs of data dependent data
1640    refs.  */
1641
1642 static bool
1643 operator == (const dr_with_seg_len& d1,
1644              const dr_with_seg_len& d2)
1645 {
1646   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1647                            DR_BASE_ADDRESS (d2.dr), 0)
1648           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1649           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1650           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1651           && known_eq (d1.access_size, d2.access_size)
1652           && d1.align == d2.align);
1653 }
1654
1655 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1656    so that we can combine aliasing checks in one scan.  */
1657
1658 static int
1659 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1660 {
1661   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1662   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1663   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1664   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1665
1666   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1667      if a and c have the same basic address snd step, and b and d have the same
1668      address and step.  Therefore, if any a&c or b&d don't have the same address
1669      and step, we don't care the order of those two pairs after sorting.  */
1670   int comp_res;
1671
1672   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1673                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1674     return comp_res;
1675   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1676                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1677     return comp_res;
1678   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1679                                          DR_STEP (b1.dr))) != 0)
1680     return comp_res;
1681   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1682                                          DR_STEP (b2.dr))) != 0)
1683     return comp_res;
1684   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1685                                          DR_OFFSET (b1.dr))) != 0)
1686     return comp_res;
1687   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1688                                          DR_INIT (b1.dr))) != 0)
1689     return comp_res;
1690   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1691                                          DR_OFFSET (b2.dr))) != 0)
1692     return comp_res;
1693   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1694                                          DR_INIT (b2.dr))) != 0)
1695     return comp_res;
1696
1697   return 0;
1698 }
1699
1700 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1701
1702 static void
1703 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1704 {
1705   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1706                DR_REF (alias_pair->first.dr),
1707                DR_REF (alias_pair->second.dr));
1708
1709   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1710                alias_pair->first.seg_len);
1711   if (!operand_equal_p (alias_pair->first.seg_len,
1712                         alias_pair->second.seg_len, 0))
1713     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1714
1715   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1716   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1717   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1718     {
1719       dump_printf (MSG_NOTE, " vs. ");
1720       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1721     }
1722
1723   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1724                alias_pair->first.align);
1725   if (alias_pair->first.align != alias_pair->second.align)
1726     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1727
1728   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1729   if (alias_pair->flags & DR_ALIAS_RAW)
1730     dump_printf (MSG_NOTE, " RAW");
1731   if (alias_pair->flags & DR_ALIAS_WAR)
1732     dump_printf (MSG_NOTE, " WAR");
1733   if (alias_pair->flags & DR_ALIAS_WAW)
1734     dump_printf (MSG_NOTE, " WAW");
1735   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1736     dump_printf (MSG_NOTE, " ARBITRARY");
1737   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1738     dump_printf (MSG_NOTE, " SWAPPED");
1739   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1740     dump_printf (MSG_NOTE, " UNSWAPPED");
1741   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1742     dump_printf (MSG_NOTE, " MIXED_STEPS");
1743   if (alias_pair->flags == 0)
1744     dump_printf (MSG_NOTE, " <none>");
1745   dump_printf (MSG_NOTE, "\n");
1746 }
1747
1748 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1749    FACTOR is number of iterations that each data reference is accessed.
1750
1751    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1752    we create an expression:
1753
1754    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1755    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1756
1757    for aliasing checks.  However, in some cases we can decrease the number
1758    of checks by combining two checks into one.  For example, suppose we have
1759    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1760    condition is satisfied:
1761
1762    load_ptr_0 < load_ptr_1  &&
1763    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1764
1765    (this condition means, in each iteration of vectorized loop, the accessed
1766    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1767    load_ptr_1.)
1768
1769    we then can use only the following expression to finish the alising checks
1770    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1771
1772    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1773    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1774
1775    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1776    basic address.  */
1777
1778 void
1779 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1780                                poly_uint64)
1781 {
1782   if (alias_pairs->is_empty ())
1783     return;
1784
1785   /* Canonicalize each pair so that the base components are ordered wrt
1786      data_ref_compare_tree.  This allows the loop below to merge more
1787      cases.  */
1788   unsigned int i;
1789   dr_with_seg_len_pair_t *alias_pair;
1790   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1791     {
1792       data_reference_p dr_a = alias_pair->first.dr;
1793       data_reference_p dr_b = alias_pair->second.dr;
1794       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1795                                             DR_BASE_ADDRESS (dr_b));
1796       if (comp_res == 0)
1797         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1798       if (comp_res == 0)
1799         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1800       if (comp_res > 0)
1801         {
1802           std::swap (alias_pair->first, alias_pair->second);
1803           alias_pair->flags |= DR_ALIAS_SWAPPED;
1804         }
1805       else
1806         alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1807     }
1808
1809   /* Sort the collected data ref pairs so that we can scan them once to
1810      combine all possible aliasing checks.  */
1811   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1812
1813   /* Scan the sorted dr pairs and check if we can combine alias checks
1814      of two neighboring dr pairs.  */
1815   unsigned int last = 0;
1816   for (i = 1; i < alias_pairs->length (); ++i)
1817     {
1818       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1819       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1820       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1821
1822       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1823       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1824       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1825       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1826
1827       /* Remove duplicate data ref pairs.  */
1828       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1829         {
1830           if (dump_enabled_p ())
1831             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1832                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1833                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1834           alias_pair1->flags |= alias_pair2->flags;
1835           continue;
1836         }
1837
1838       /* Assume that we won't be able to merge the pairs, then correct
1839          if we do.  */
1840       last += 1;
1841       if (last != i)
1842         (*alias_pairs)[last] = (*alias_pairs)[i];
1843
1844       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1845         {
1846           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1847              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1848           if (*dr_a1 == *dr_a2)
1849             {
1850               std::swap (dr_a1, dr_b1);
1851               std::swap (dr_a2, dr_b2);
1852             }
1853
1854           poly_int64 init_a1, init_a2;
1855           /* Only consider cases in which the distance between the initial
1856              DR_A1 and the initial DR_A2 is known at compile time.  */
1857           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1858                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1859               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1860                                    DR_OFFSET (dr_a2->dr), 0)
1861               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1862               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1863             continue;
1864
1865           /* Don't combine if we can't tell which one comes first.  */
1866           if (!ordered_p (init_a1, init_a2))
1867             continue;
1868
1869           /* Work out what the segment length would be if we did combine
1870              DR_A1 and DR_A2:
1871
1872              - If DR_A1 and DR_A2 have equal lengths, that length is
1873                also the combined length.
1874
1875              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1876                length is the lower bound on those lengths.
1877
1878              - If DR_A1 and DR_A2 both have positive lengths, the combined
1879                length is the upper bound on those lengths.
1880
1881              Other cases are unlikely to give a useful combination.
1882
1883              The lengths both have sizetype, so the sign is taken from
1884              the step instead.  */
1885           poly_uint64 new_seg_len = 0;
1886           bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1887                                                  dr_a2->seg_len, 0);
1888           if (new_seg_len_p)
1889             {
1890               poly_uint64 seg_len_a1, seg_len_a2;
1891               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1892                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1893                 continue;
1894
1895               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1896               if (TREE_CODE (indicator_a) != INTEGER_CST)
1897                 continue;
1898
1899               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1900               if (TREE_CODE (indicator_b) != INTEGER_CST)
1901                 continue;
1902
1903               int sign_a = tree_int_cst_sgn (indicator_a);
1904               int sign_b = tree_int_cst_sgn (indicator_b);
1905
1906               if (sign_a <= 0 && sign_b <= 0)
1907                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1908               else if (sign_a >= 0 && sign_b >= 0)
1909                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1910               else
1911                 continue;
1912             }
1913           /* At this point we're committed to merging the refs.  */
1914
1915           /* Make sure dr_a1 starts left of dr_a2.  */
1916           if (maybe_gt (init_a1, init_a2))
1917             {
1918               std::swap (*dr_a1, *dr_a2);
1919               std::swap (init_a1, init_a2);
1920             }
1921
1922           /* The DR_Bs are equal, so only the DR_As can introduce
1923              mixed steps.  */
1924           if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1925             alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1926
1927           if (new_seg_len_p)
1928             {
1929               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1930                                               new_seg_len);
1931               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1932             }
1933
1934           /* This is always positive due to the swap above.  */
1935           poly_uint64 diff = init_a2 - init_a1;
1936
1937           /* The new check will start at DR_A1.  Make sure that its access
1938              size encompasses the initial DR_A2.  */
1939           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1940             {
1941               dr_a1->access_size = upper_bound (dr_a1->access_size,
1942                                                 diff + dr_a2->access_size);
1943               unsigned int new_align = known_alignment (dr_a1->access_size);
1944               dr_a1->align = MIN (dr_a1->align, new_align);
1945             }
1946           if (dump_enabled_p ())
1947             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1948                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1949                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1950           alias_pair1->flags |= alias_pair2->flags;
1951           last -= 1;
1952         }
1953     }
1954   alias_pairs->truncate (last + 1);
1955
1956   /* Try to restore the original dr_with_seg_len order within each
1957      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1958      unswapped pairs into the same check, we have to invalidate any
1959      RAW, WAR and WAW information for it.  */
1960   if (dump_enabled_p ())
1961     dump_printf (MSG_NOTE, "merged alias checks:\n");
1962   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1963     {
1964       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1965       unsigned int swapped = (alias_pair->flags & swap_mask);
1966       if (swapped == DR_ALIAS_SWAPPED)
1967         std::swap (alias_pair->first, alias_pair->second);
1968       else if (swapped != DR_ALIAS_UNSWAPPED)
1969         alias_pair->flags |= DR_ALIAS_ARBITRARY;
1970       alias_pair->flags &= ~swap_mask;
1971       if (dump_enabled_p ())
1972         dump_alias_pair (alias_pair, "  ");
1973     }
1974 }
1975
1976 /* A subroutine of create_intersect_range_checks, with a subset of the
1977    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1978    to optimize cases in which the references form a simple RAW, WAR or
1979    WAR dependence.  */
1980
1981 static bool
1982 create_ifn_alias_checks (tree *cond_expr,
1983                          const dr_with_seg_len_pair_t &alias_pair)
1984 {
1985   const dr_with_seg_len& dr_a = alias_pair.first;
1986   const dr_with_seg_len& dr_b = alias_pair.second;
1987
1988   /* Check for cases in which:
1989
1990      (a) we have a known RAW, WAR or WAR dependence
1991      (b) the accesses are well-ordered in both the original and new code
1992          (see the comment above the DR_ALIAS_* flags for details); and
1993      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
1994   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
1995     return false;
1996
1997   /* Make sure that both DRs access the same pattern of bytes,
1998      with a constant length and step.  */
1999   poly_uint64 seg_len;
2000   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2001       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2002       || maybe_ne (dr_a.access_size, dr_b.access_size)
2003       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2004       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2005     return false;
2006
2007   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2008   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2009   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2010
2011   /* See whether the target suports what we want to do.  WAW checks are
2012      equivalent to WAR checks here.  */
2013   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2014                      ? IFN_CHECK_RAW_PTRS
2015                      : IFN_CHECK_WAR_PTRS);
2016   unsigned int align = MIN (dr_a.align, dr_b.align);
2017   poly_uint64 full_length = seg_len + bytes;
2018   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2019                                            full_length, align))
2020     {
2021       full_length = seg_len + dr_a.access_size;
2022       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2023                                                full_length, align))
2024         return false;
2025     }
2026
2027   /* Commit to using this form of test.  */
2028   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2029   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2030
2031   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2032   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2033
2034   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2035                                              ifn, boolean_type_node,
2036                                              4, addr_a, addr_b,
2037                                              size_int (full_length),
2038                                              size_int (align));
2039
2040   if (dump_enabled_p ())
2041     {
2042       if (ifn == IFN_CHECK_RAW_PTRS)
2043         dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2044       else
2045         dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2046     }
2047   return true;
2048 }
2049
2050 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2051    free of aliases, using a condition based on index values instead
2052    of a condition based on addresses.  Return true on success,
2053    storing the condition in *COND_EXPR.
2054
2055    This can only be done if the two data references in ALIAS_PAIR access
2056    the same array object and the index is the only difference.  For example,
2057    if the two data references are DR_A and DR_B:
2058
2059                        DR_A                           DR_B
2060       data-ref         arr[i]                         arr[j]
2061       base_object      arr                            arr
2062       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2063
2064    The addresses and their index are like:
2065
2066         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2067      ------------------------------------------------------->
2068         |   |   |   |   |          |   |   |   |   |
2069      ------------------------------------------------------->
2070         i_0 ...         i_0+4      j_0 ...         j_0+4
2071
2072    We can create expression based on index rather than address:
2073
2074      (unsigned) (i_0 - j_0 + 3) <= 6
2075
2076    i.e. the indices are less than 4 apart.
2077
2078    Note evolution step of index needs to be considered in comparison.  */
2079
2080 static bool
2081 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2082                                      const dr_with_seg_len_pair_t &alias_pair)
2083 {
2084   const dr_with_seg_len &dr_a = alias_pair.first;
2085   const dr_with_seg_len &dr_b = alias_pair.second;
2086   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2087       || integer_zerop (DR_STEP (dr_a.dr))
2088       || integer_zerop (DR_STEP (dr_b.dr))
2089       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2090     return false;
2091
2092   poly_uint64 seg_len1, seg_len2;
2093   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2094       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2095     return false;
2096
2097   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2098     return false;
2099
2100   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2101     return false;
2102
2103   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2104     return false;
2105
2106   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2107
2108   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2109   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2110   if (neg_step)
2111     {
2112       abs_step = -abs_step;
2113       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2114       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2115     }
2116
2117   /* Infer the number of iterations with which the memory segment is accessed
2118      by DR.  In other words, alias is checked if memory segment accessed by
2119      DR_A in some iterations intersect with memory segment accessed by DR_B
2120      in the same amount iterations.
2121      Note segnment length is a linear function of number of iterations with
2122      DR_STEP as the coefficient.  */
2123   poly_uint64 niter_len1, niter_len2;
2124   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2125       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2126     return false;
2127
2128   /* Divide each access size by the byte step, rounding up.  */
2129   poly_uint64 niter_access1, niter_access2;
2130   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2131                         abs_step, &niter_access1)
2132       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2133                            abs_step, &niter_access2))
2134     return false;
2135
2136   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2137
2138   int found = -1;
2139   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2140     {
2141       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2142       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2143       /* Two indices must be the same if they are not scev, or not scev wrto
2144          current loop being vecorized.  */
2145       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2146           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2147           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2148           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2149         {
2150           if (operand_equal_p (access1, access2, 0))
2151             continue;
2152
2153           return false;
2154         }
2155       if (found >= 0)
2156         return false;
2157       found = i;
2158     }
2159
2160   /* Ought not to happen in practice, since if all accesses are equal then the
2161      alias should be decidable at compile time.  */
2162   if (found < 0)
2163     return false;
2164
2165   /* The two indices must have the same step.  */
2166   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2167   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2168   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2169     return false;
2170
2171   tree idx_step = CHREC_RIGHT (access1);
2172   /* Index must have const step, otherwise DR_STEP won't be constant.  */
2173   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2174   /* Index must evaluate in the same direction as DR.  */
2175   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2176
2177   tree min1 = CHREC_LEFT (access1);
2178   tree min2 = CHREC_LEFT (access2);
2179   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2180     return false;
2181
2182   /* Ideally, alias can be checked against loop's control IV, but we
2183      need to prove linear mapping between control IV and reference
2184      index.  Although that should be true, we check against (array)
2185      index of data reference.  Like segment length, index length is
2186      linear function of the number of iterations with index_step as
2187      the coefficient, i.e, niter_len * idx_step.  */
2188   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2189                                               SIGNED);
2190   if (neg_step)
2191     abs_idx_step = -abs_idx_step;
2192   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2193   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2194   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2195   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2196
2197   gcc_assert (known_ge (idx_len1, 0)
2198               && known_ge (idx_len2, 0)
2199               && known_ge (idx_access1, 0)
2200               && known_ge (idx_access2, 0));
2201
2202   /* Each access has the following pattern, with lengths measured
2203      in units of INDEX:
2204
2205           <-- idx_len -->
2206           <--- A: -ve step --->
2207           +-----+-------+-----+-------+-----+
2208           | n-1 | ..... |  0  | ..... | n-1 |
2209           +-----+-------+-----+-------+-----+
2210                         <--- B: +ve step --->
2211                         <-- idx_len -->
2212                         |
2213                        min
2214
2215      where "n" is the number of scalar iterations covered by the segment
2216      and where each access spans idx_access units.
2217
2218      A is the range of bytes accessed when the step is negative,
2219      B is the range when the step is positive.
2220
2221      When checking for general overlap, we need to test whether
2222      the range:
2223
2224        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2225
2226      overlaps:
2227
2228        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2229
2230      where:
2231
2232         low_offsetN = +ve step ? 0 : -idx_lenN;
2233        high_offsetN = +ve step ? idx_lenN : 0;
2234
2235      This is equivalent to testing whether:
2236
2237        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2238        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2239
2240      Converting this into a single test, there is an overlap if:
2241
2242        0 <= min2 - min1 + bias <= limit
2243
2244      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2245            limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2246                  + (high_offset2 - low_offset2 + idx_access2 - 1)
2247       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2248
2249      Combining the tests requires limit to be computable in an unsigned
2250      form of the index type; if it isn't, we fall back to the usual
2251      pointer-based checks.
2252
2253      We can do better if DR_B is a write and if DR_A and DR_B are
2254      well-ordered in both the original and the new code (see the
2255      comment above the DR_ALIAS_* flags for details).  In this case
2256      we know that for each i in [0, n-1], the write performed by
2257      access i of DR_B occurs after access numbers j<=i of DR_A in
2258      both the original and the new code.  Any write or anti
2259      dependencies wrt those DR_A accesses are therefore maintained.
2260
2261      We just need to make sure that each individual write in DR_B does not
2262      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2263      after the DR_B access in the original code but happen before it in
2264      the new code.
2265
2266      We know the steps for both accesses are equal, so by induction, we
2267      just need to test whether the first write of DR_B overlaps a later
2268      access of DR_A.  In other words, we need to move min1 along by
2269      one iteration:
2270
2271        min1' = min1 + idx_step
2272
2273      and use the ranges:
2274
2275        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2276
2277      and:
2278
2279        [min2, min2 + idx_access2 - 1]
2280
2281      where:
2282
2283         low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2284        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2285   if (waw_or_war_p)
2286     idx_len1 -= abs_idx_step;
2287
2288   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2289   if (!waw_or_war_p)
2290     limit += idx_len2;
2291
2292   tree utype = unsigned_type_for (TREE_TYPE (min1));
2293   if (!wi::fits_to_tree_p (limit, utype))
2294     return false;
2295
2296   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2297   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2298   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2299   /* Equivalent to adding IDX_STEP to MIN1.  */
2300   if (waw_or_war_p)
2301     bias -= wi::to_offset (idx_step);
2302
2303   tree subject = fold_build2 (MINUS_EXPR, utype,
2304                               fold_convert (utype, min2),
2305                               fold_convert (utype, min1));
2306   subject = fold_build2 (PLUS_EXPR, utype, subject,
2307                          wide_int_to_tree (utype, bias));
2308   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2309                                      wide_int_to_tree (utype, limit));
2310   if (*cond_expr)
2311     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2312                               *cond_expr, part_cond_expr);
2313   else
2314     *cond_expr = part_cond_expr;
2315   if (dump_enabled_p ())
2316     {
2317       if (waw_or_war_p)
2318         dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2319       else
2320         dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2321     }
2322   return true;
2323 }
2324
2325 /* A subroutine of create_intersect_range_checks, with a subset of the
2326    same arguments.  Try to optimize cases in which the second access
2327    is a write and in which some overlap is valid.  */
2328
2329 static bool
2330 create_waw_or_war_checks (tree *cond_expr,
2331                           const dr_with_seg_len_pair_t &alias_pair)
2332 {
2333   const dr_with_seg_len& dr_a = alias_pair.first;
2334   const dr_with_seg_len& dr_b = alias_pair.second;
2335
2336   /* Check for cases in which:
2337
2338      (a) DR_B is always a write;
2339      (b) the accesses are well-ordered in both the original and new code
2340          (see the comment above the DR_ALIAS_* flags for details); and
2341      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2342   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2343     return false;
2344
2345   /* Check for equal (but possibly variable) steps.  */
2346   tree step = DR_STEP (dr_a.dr);
2347   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2348     return false;
2349
2350   /* Make sure that we can operate on sizetype without loss of precision.  */
2351   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2352   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2353     return false;
2354
2355   /* All addresses involved are known to have a common alignment ALIGN.
2356      We can therefore subtract ALIGN from an exclusive endpoint to get
2357      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2358      same as the access sizes of both DRs, and so subtracting ALIGN
2359      cancels out the addition of an access size.  */
2360   unsigned int align = MIN (dr_a.align, dr_b.align);
2361   poly_uint64 last_chunk_a = dr_a.access_size - align;
2362   poly_uint64 last_chunk_b = dr_b.access_size - align;
2363
2364   /* Get a boolean expression that is true when the step is negative.  */
2365   tree indicator = dr_direction_indicator (dr_a.dr);
2366   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2367                                fold_convert (ssizetype, indicator),
2368                                ssize_int (0));
2369
2370   /* Get lengths in sizetype.  */
2371   tree seg_len_a
2372     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2373   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2374
2375   /* Each access has the following pattern:
2376
2377           <- |seg_len| ->
2378           <--- A: -ve step --->
2379           +-----+-------+-----+-------+-----+
2380           | n-1 | ..... |  0  | ..... | n-1 |
2381           +-----+-------+-----+-------+-----+
2382                         <--- B: +ve step --->
2383                         <- |seg_len| ->
2384                         |
2385                    base address
2386
2387      where "n" is the number of scalar iterations covered by the segment.
2388
2389      A is the range of bytes accessed when the step is negative,
2390      B is the range when the step is positive.
2391
2392      We know that DR_B is a write.  We also know (from checking that
2393      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2394      the write performed by access i of DR_B occurs after access numbers
2395      j<=i of DR_A in both the original and the new code.  Any write or
2396      anti dependencies wrt those DR_A accesses are therefore maintained.
2397
2398      We just need to make sure that each individual write in DR_B does not
2399      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2400      after the DR_B access in the original code but happen before it in
2401      the new code.
2402
2403      We know the steps for both accesses are equal, so by induction, we
2404      just need to test whether the first write of DR_B overlaps a later
2405      access of DR_A.  In other words, we need to move addr_a along by
2406      one iteration:
2407
2408        addr_a' = addr_a + step
2409
2410      and check whether:
2411
2412        [addr_b, addr_b + last_chunk_b]
2413
2414      overlaps:
2415
2416        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2417
2418      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2419
2420         low_offset_a = +ve step ? 0 : seg_len_a - step
2421        high_offset_a = +ve step ? seg_len_a - step : 0
2422
2423      This is equivalent to testing whether:
2424
2425        addr_a' + low_offset_a <= addr_b + last_chunk_b
2426        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2427
2428      Converting this into a single test, there is an overlap if:
2429
2430        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2431
2432      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2433
2434      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2435      less than the size of the object underlying DR_A.  We also know
2436      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2437      guaranteed at compile time.  There can therefore be no overflow if
2438      "limit" is calculated in an unsigned type with pointer precision.  */
2439   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2440                                          DR_OFFSET (dr_a.dr));
2441   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2442
2443   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2444                                          DR_OFFSET (dr_b.dr));
2445   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2446
2447   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2448   addr_a = fold_build_pointer_plus (addr_a, step);
2449   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2450                                            seg_len_a, step);
2451   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2452     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2453
2454   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2455                                    seg_len_a_minus_step, size_zero_node);
2456   if (!CONSTANT_CLASS_P (low_offset_a))
2457     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2458
2459   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2460      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2461   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2462                                     low_offset_a);
2463
2464   /* The amount added to addr_b - addr_a'.  */
2465   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2466                            size_int (last_chunk_b), low_offset_a);
2467
2468   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2469   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2470                        size_int (last_chunk_a + last_chunk_b));
2471
2472   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2473   subject = fold_build2 (PLUS_EXPR, sizetype,
2474                          fold_convert (sizetype, subject), bias);
2475
2476   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2477   if (dump_enabled_p ())
2478     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2479   return true;
2480 }
2481
2482 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2483    every address ADDR accessed by D:
2484
2485      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2486
2487    In this case, every element accessed by D is aligned to at least
2488    ALIGN bytes.
2489
2490    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2491
2492      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2493
2494 static void
2495 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2496                      tree *seg_max_out, HOST_WIDE_INT align)
2497 {
2498   /* Each access has the following pattern:
2499
2500           <- |seg_len| ->
2501           <--- A: -ve step --->
2502           +-----+-------+-----+-------+-----+
2503           | n-1 | ,.... |  0  | ..... | n-1 |
2504           +-----+-------+-----+-------+-----+
2505                         <--- B: +ve step --->
2506                         <- |seg_len| ->
2507                         |
2508                    base address
2509
2510      where "n" is the number of scalar iterations covered by the segment.
2511      (This should be VF for a particular pair if we know that both steps
2512      are the same, otherwise it will be the full number of scalar loop
2513      iterations.)
2514
2515      A is the range of bytes accessed when the step is negative,
2516      B is the range when the step is positive.
2517
2518      If the access size is "access_size" bytes, the lowest addressed byte is:
2519
2520          base + (step < 0 ? seg_len : 0)   [LB]
2521
2522      and the highest addressed byte is always below:
2523
2524          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2525
2526      Thus:
2527
2528          LB <= ADDR < UB
2529
2530      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2531      bytes, so:
2532
2533          LB <= ADDR <= UB - ALIGN
2534
2535      where "- ALIGN" folds naturally with the "+ access_size" and often
2536      cancels it out.
2537
2538      We don't try to simplify LB and UB beyond this (e.g. by using
2539      MIN and MAX based on whether seg_len rather than the stride is
2540      negative) because it is possible for the absolute size of the
2541      segment to overflow the range of a ssize_t.
2542
2543      Keeping the pointer_plus outside of the cond_expr should allow
2544      the cond_exprs to be shared with other alias checks.  */
2545   tree indicator = dr_direction_indicator (d.dr);
2546   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2547                                fold_convert (ssizetype, indicator),
2548                                ssize_int (0));
2549   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2550                                             DR_OFFSET (d.dr));
2551   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2552   tree seg_len
2553     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2554
2555   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2556                                 seg_len, size_zero_node);
2557   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2558                                 size_zero_node, seg_len);
2559   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2560                            size_int (d.access_size - align));
2561
2562   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2563   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2564 }
2565
2566 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2567    storing the condition in *COND_EXPR.  The fallback is to generate a
2568    a test that the two accesses do not overlap:
2569
2570      end_a <= start_b || end_b <= start_a.  */
2571
2572 static void
2573 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2574                                const dr_with_seg_len_pair_t &alias_pair)
2575 {
2576   const dr_with_seg_len& dr_a = alias_pair.first;
2577   const dr_with_seg_len& dr_b = alias_pair.second;
2578   *cond_expr = NULL_TREE;
2579   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2580     return;
2581
2582   if (create_ifn_alias_checks (cond_expr, alias_pair))
2583     return;
2584
2585   if (create_waw_or_war_checks (cond_expr, alias_pair))
2586     return;
2587
2588   unsigned HOST_WIDE_INT min_align;
2589   tree_code cmp_code;
2590   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2591      are equivalent.  This is just an optimization heuristic.  */
2592   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2593       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2594     {
2595       /* In this case adding access_size to seg_len is likely to give
2596          a simple X * step, where X is either the number of scalar
2597          iterations or the vectorization factor.  We're better off
2598          keeping that, rather than subtracting an alignment from it.
2599
2600          In this case the maximum values are exclusive and so there is
2601          no alias if the maximum of one segment equals the minimum
2602          of another.  */
2603       min_align = 0;
2604       cmp_code = LE_EXPR;
2605     }
2606   else
2607     {
2608       /* Calculate the minimum alignment shared by all four pointers,
2609          then arrange for this alignment to be subtracted from the
2610          exclusive maximum values to get inclusive maximum values.
2611          This "- min_align" is cumulative with a "+ access_size"
2612          in the calculation of the maximum values.  In the best
2613          (and common) case, the two cancel each other out, leaving
2614          us with an inclusive bound based only on seg_len.  In the
2615          worst case we're simply adding a smaller number than before.
2616
2617          Because the maximum values are inclusive, there is an alias
2618          if the maximum value of one segment is equal to the minimum
2619          value of the other.  */
2620       min_align = MIN (dr_a.align, dr_b.align);
2621       cmp_code = LT_EXPR;
2622     }
2623
2624   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2625   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2626   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2627
2628   *cond_expr
2629     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2630         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2631         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2632   if (dump_enabled_p ())
2633     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2634 }
2635
2636 /* Create a conditional expression that represents the run-time checks for
2637    overlapping of address ranges represented by a list of data references
2638    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2639    COND_EXPR is the conditional expression to be used in the if statement
2640    that controls which version of the loop gets executed at runtime.  */
2641
2642 void
2643 create_runtime_alias_checks (class loop *loop,
2644                              const vec<dr_with_seg_len_pair_t> *alias_pairs,
2645                              tree * cond_expr)
2646 {
2647   tree part_cond_expr;
2648
2649   fold_defer_overflow_warnings ();
2650   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2651     {
2652       gcc_assert (alias_pair.flags);
2653       if (dump_enabled_p ())
2654         dump_printf (MSG_NOTE,
2655                      "create runtime check for data references %T and %T\n",
2656                      DR_REF (alias_pair.first.dr),
2657                      DR_REF (alias_pair.second.dr));
2658
2659       /* Create condition expression for each pair data references.  */
2660       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2661       if (*cond_expr)
2662         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2663                                   *cond_expr, part_cond_expr);
2664       else
2665         *cond_expr = part_cond_expr;
2666     }
2667   fold_undefer_and_ignore_overflow_warnings ();
2668 }
2669
2670 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2671    expressions.  */
2672 static bool
2673 dr_equal_offsets_p1 (tree offset1, tree offset2)
2674 {
2675   bool res;
2676
2677   STRIP_NOPS (offset1);
2678   STRIP_NOPS (offset2);
2679
2680   if (offset1 == offset2)
2681     return true;
2682
2683   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2684       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2685     return false;
2686
2687   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2688                              TREE_OPERAND (offset2, 0));
2689
2690   if (!res || !BINARY_CLASS_P (offset1))
2691     return res;
2692
2693   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2694                              TREE_OPERAND (offset2, 1));
2695
2696   return res;
2697 }
2698
2699 /* Check if DRA and DRB have equal offsets.  */
2700 bool
2701 dr_equal_offsets_p (struct data_reference *dra,
2702                     struct data_reference *drb)
2703 {
2704   tree offset1, offset2;
2705
2706   offset1 = DR_OFFSET (dra);
2707   offset2 = DR_OFFSET (drb);
2708
2709   return dr_equal_offsets_p1 (offset1, offset2);
2710 }
2711
2712 /* Returns true if FNA == FNB.  */
2713
2714 static bool
2715 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2716 {
2717   unsigned i, n = fna.length ();
2718
2719   if (n != fnb.length ())
2720     return false;
2721
2722   for (i = 0; i < n; i++)
2723     if (!operand_equal_p (fna[i], fnb[i], 0))
2724       return false;
2725
2726   return true;
2727 }
2728
2729 /* If all the functions in CF are the same, returns one of them,
2730    otherwise returns NULL.  */
2731
2732 static affine_fn
2733 common_affine_function (conflict_function *cf)
2734 {
2735   unsigned i;
2736   affine_fn comm;
2737
2738   if (!CF_NONTRIVIAL_P (cf))
2739     return affine_fn ();
2740
2741   comm = cf->fns[0];
2742
2743   for (i = 1; i < cf->n; i++)
2744     if (!affine_function_equal_p (comm, cf->fns[i]))
2745       return affine_fn ();
2746
2747   return comm;
2748 }
2749
2750 /* Returns the base of the affine function FN.  */
2751
2752 static tree
2753 affine_function_base (affine_fn fn)
2754 {
2755   return fn[0];
2756 }
2757
2758 /* Returns true if FN is a constant.  */
2759
2760 static bool
2761 affine_function_constant_p (affine_fn fn)
2762 {
2763   unsigned i;
2764   tree coef;
2765
2766   for (i = 1; fn.iterate (i, &coef); i++)
2767     if (!integer_zerop (coef))
2768       return false;
2769
2770   return true;
2771 }
2772
2773 /* Returns true if FN is the zero constant function.  */
2774
2775 static bool
2776 affine_function_zero_p (affine_fn fn)
2777 {
2778   return (integer_zerop (affine_function_base (fn))
2779           && affine_function_constant_p (fn));
2780 }
2781
2782 /* Returns a signed integer type with the largest precision from TA
2783    and TB.  */
2784
2785 static tree
2786 signed_type_for_types (tree ta, tree tb)
2787 {
2788   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2789     return signed_type_for (ta);
2790   else
2791     return signed_type_for (tb);
2792 }
2793
2794 /* Applies operation OP on affine functions FNA and FNB, and returns the
2795    result.  */
2796
2797 static affine_fn
2798 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2799 {
2800   unsigned i, n, m;
2801   affine_fn ret;
2802   tree coef;
2803
2804   if (fnb.length () > fna.length ())
2805     {
2806       n = fna.length ();
2807       m = fnb.length ();
2808     }
2809   else
2810     {
2811       n = fnb.length ();
2812       m = fna.length ();
2813     }
2814
2815   ret.create (m);
2816   for (i = 0; i < n; i++)
2817     {
2818       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2819                                          TREE_TYPE (fnb[i]));
2820       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2821     }
2822
2823   for (; fna.iterate (i, &coef); i++)
2824     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2825                                  coef, integer_zero_node));
2826   for (; fnb.iterate (i, &coef); i++)
2827     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2828                                  integer_zero_node, coef));
2829
2830   return ret;
2831 }
2832
2833 /* Returns the sum of affine functions FNA and FNB.  */
2834
2835 static affine_fn
2836 affine_fn_plus (affine_fn fna, affine_fn fnb)
2837 {
2838   return affine_fn_op (PLUS_EXPR, fna, fnb);
2839 }
2840
2841 /* Returns the difference of affine functions FNA and FNB.  */
2842
2843 static affine_fn
2844 affine_fn_minus (affine_fn fna, affine_fn fnb)
2845 {
2846   return affine_fn_op (MINUS_EXPR, fna, fnb);
2847 }
2848
2849 /* Frees affine function FN.  */
2850
2851 static void
2852 affine_fn_free (affine_fn fn)
2853 {
2854   fn.release ();
2855 }
2856
2857 /* Determine for each subscript in the data dependence relation DDR
2858    the distance.  */
2859
2860 static void
2861 compute_subscript_distance (struct data_dependence_relation *ddr)
2862 {
2863   conflict_function *cf_a, *cf_b;
2864   affine_fn fn_a, fn_b, diff;
2865
2866   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2867     {
2868       unsigned int i;
2869
2870       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2871         {
2872           struct subscript *subscript;
2873
2874           subscript = DDR_SUBSCRIPT (ddr, i);
2875           cf_a = SUB_CONFLICTS_IN_A (subscript);
2876           cf_b = SUB_CONFLICTS_IN_B (subscript);
2877
2878           fn_a = common_affine_function (cf_a);
2879           fn_b = common_affine_function (cf_b);
2880           if (!fn_a.exists () || !fn_b.exists ())
2881             {
2882               SUB_DISTANCE (subscript) = chrec_dont_know;
2883               return;
2884             }
2885           diff = affine_fn_minus (fn_a, fn_b);
2886
2887           if (affine_function_constant_p (diff))
2888             SUB_DISTANCE (subscript) = affine_function_base (diff);
2889           else
2890             SUB_DISTANCE (subscript) = chrec_dont_know;
2891
2892           affine_fn_free (diff);
2893         }
2894     }
2895 }
2896
2897 /* Returns the conflict function for "unknown".  */
2898
2899 static conflict_function *
2900 conflict_fn_not_known (void)
2901 {
2902   conflict_function *fn = XCNEW (conflict_function);
2903   fn->n = NOT_KNOWN;
2904
2905   return fn;
2906 }
2907
2908 /* Returns the conflict function for "independent".  */
2909
2910 static conflict_function *
2911 conflict_fn_no_dependence (void)
2912 {
2913   conflict_function *fn = XCNEW (conflict_function);
2914   fn->n = NO_DEPENDENCE;
2915
2916   return fn;
2917 }
2918
2919 /* Returns true if the address of OBJ is invariant in LOOP.  */
2920
2921 static bool
2922 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2923 {
2924   while (handled_component_p (obj))
2925     {
2926       if (TREE_CODE (obj) == ARRAY_REF)
2927         {
2928           for (int i = 1; i < 4; ++i)
2929             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2930                                                         loop->num))
2931               return false;
2932         }
2933       else if (TREE_CODE (obj) == COMPONENT_REF)
2934         {
2935           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2936                                                       loop->num))
2937             return false;
2938         }
2939       obj = TREE_OPERAND (obj, 0);
2940     }
2941
2942   if (!INDIRECT_REF_P (obj)
2943       && TREE_CODE (obj) != MEM_REF)
2944     return true;
2945
2946   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2947                                                   loop->num);
2948 }
2949
2950 /* Returns false if we can prove that data references A and B do not alias,
2951    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2952    considered.  */
2953
2954 bool
2955 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2956                 class loop *loop_nest)
2957 {
2958   tree addr_a = DR_BASE_OBJECT (a);
2959   tree addr_b = DR_BASE_OBJECT (b);
2960
2961   /* If we are not processing a loop nest but scalar code we
2962      do not need to care about possible cross-iteration dependences
2963      and thus can process the full original reference.  Do so,
2964      similar to how loop invariant motion applies extra offset-based
2965      disambiguation.  */
2966   if (!loop_nest)
2967     {
2968       aff_tree off1, off2;
2969       poly_widest_int size1, size2;
2970       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2971       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2972       aff_combination_scale (&off1, -1);
2973       aff_combination_add (&off2, &off1);
2974       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2975         return false;
2976     }
2977
2978   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2979       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2980       /* For cross-iteration dependences the cliques must be valid for the
2981          whole loop, not just individual iterations.  */
2982       && (!loop_nest
2983           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2984           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2985       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2986       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2987     return false;
2988
2989   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2990      do not know the size of the base-object.  So we cannot do any
2991      offset/overlap based analysis but have to rely on points-to
2992      information only.  */
2993   if (TREE_CODE (addr_a) == MEM_REF
2994       && (DR_UNCONSTRAINED_BASE (a)
2995           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2996     {
2997       /* For true dependences we can apply TBAA.  */
2998       if (flag_strict_aliasing
2999           && DR_IS_WRITE (a) && DR_IS_READ (b)
3000           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3001                                      get_alias_set (DR_REF (b))))
3002         return false;
3003       if (TREE_CODE (addr_b) == MEM_REF)
3004         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3005                                        TREE_OPERAND (addr_b, 0));
3006       else
3007         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3008                                        build_fold_addr_expr (addr_b));
3009     }
3010   else if (TREE_CODE (addr_b) == MEM_REF
3011            && (DR_UNCONSTRAINED_BASE (b)
3012                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3013     {
3014       /* For true dependences we can apply TBAA.  */
3015       if (flag_strict_aliasing
3016           && DR_IS_WRITE (a) && DR_IS_READ (b)
3017           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3018                                      get_alias_set (DR_REF (b))))
3019         return false;
3020       if (TREE_CODE (addr_a) == MEM_REF)
3021         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3022                                        TREE_OPERAND (addr_b, 0));
3023       else
3024         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3025                                        TREE_OPERAND (addr_b, 0));
3026     }
3027
3028   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3029      that is being subsetted in the loop nest.  */
3030   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3031     return refs_output_dependent_p (addr_a, addr_b);
3032   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3033     return refs_anti_dependent_p (addr_a, addr_b);
3034   return refs_may_alias_p (addr_a, addr_b);
3035 }
3036
3037 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3038    if it is meaningful to compare their associated access functions
3039    when checking for dependencies.  */
3040
3041 static bool
3042 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3043 {
3044   /* Allow pairs of component refs from the following sets:
3045
3046        { REALPART_EXPR, IMAGPART_EXPR }
3047        { COMPONENT_REF }
3048        { ARRAY_REF }.  */
3049   tree_code code_a = TREE_CODE (ref_a);
3050   tree_code code_b = TREE_CODE (ref_b);
3051   if (code_a == IMAGPART_EXPR)
3052     code_a = REALPART_EXPR;
3053   if (code_b == IMAGPART_EXPR)
3054     code_b = REALPART_EXPR;
3055   if (code_a != code_b)
3056     return false;
3057
3058   if (TREE_CODE (ref_a) == COMPONENT_REF)
3059     /* ??? We cannot simply use the type of operand #0 of the refs here as
3060        the Fortran compiler smuggles type punning into COMPONENT_REFs.
3061        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3062     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3063             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3064
3065   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3066                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3067 }
3068
3069 /* Initialize a data dependence relation between data accesses A and
3070    B.  NB_LOOPS is the number of loops surrounding the references: the
3071    size of the classic distance/direction vectors.  */
3072
3073 struct data_dependence_relation *
3074 initialize_data_dependence_relation (struct data_reference *a,
3075                                      struct data_reference *b,
3076                                      vec<loop_p> loop_nest)
3077 {
3078   struct data_dependence_relation *res;
3079   unsigned int i;
3080
3081   res = XCNEW (struct data_dependence_relation);
3082   DDR_A (res) = a;
3083   DDR_B (res) = b;
3084   DDR_LOOP_NEST (res).create (0);
3085   DDR_SUBSCRIPTS (res).create (0);
3086   DDR_DIR_VECTS (res).create (0);
3087   DDR_DIST_VECTS (res).create (0);
3088
3089   if (a == NULL || b == NULL)
3090     {
3091       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3092       return res;
3093     }
3094
3095   /* If the data references do not alias, then they are independent.  */
3096   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3097     {
3098       DDR_ARE_DEPENDENT (res) = chrec_known;
3099       return res;
3100     }
3101
3102   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
3103   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
3104   if (num_dimensions_a == 0 || num_dimensions_b == 0)
3105     {
3106       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3107       return res;
3108     }
3109
3110   /* For unconstrained bases, the root (highest-indexed) subscript
3111      describes a variation in the base of the original DR_REF rather
3112      than a component access.  We have no type that accurately describes
3113      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3114      applying this subscript) so limit the search to the last real
3115      component access.
3116
3117      E.g. for:
3118
3119         void
3120         f (int a[][8], int b[][8])
3121         {
3122           for (int i = 0; i < 8; ++i)
3123             a[i * 2][0] = b[i][0];
3124         }
3125
3126      the a and b accesses have a single ARRAY_REF component reference [0]
3127      but have two subscripts.  */
3128   if (DR_UNCONSTRAINED_BASE (a))
3129     num_dimensions_a -= 1;
3130   if (DR_UNCONSTRAINED_BASE (b))
3131     num_dimensions_b -= 1;
3132
3133   /* These structures describe sequences of component references in
3134      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3135      specific access function.  */
3136   struct {
3137     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3138        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3139        indices.  In C notation, these are the indices of the rightmost
3140        component references; e.g. for a sequence .b.c.d, the start
3141        index is for .d.  */
3142     unsigned int start_a;
3143     unsigned int start_b;
3144
3145     /* The sequence contains LENGTH consecutive access functions from
3146        each DR.  */
3147     unsigned int length;
3148
3149     /* The enclosing objects for the A and B sequences respectively,
3150        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3151        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3152     tree object_a;
3153     tree object_b;
3154   } full_seq = {}, struct_seq = {};
3155
3156   /* Before each iteration of the loop:
3157
3158      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3159      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3160   unsigned int index_a = 0;
3161   unsigned int index_b = 0;
3162   tree ref_a = DR_REF (a);
3163   tree ref_b = DR_REF (b);
3164
3165   /* Now walk the component references from the final DR_REFs back up to
3166      the enclosing base objects.  Each component reference corresponds
3167      to one access function in the DR, with access function 0 being for
3168      the final DR_REF and the highest-indexed access function being the
3169      one that is applied to the base of the DR.
3170
3171      Look for a sequence of component references whose access functions
3172      are comparable (see access_fn_components_comparable_p).  If more
3173      than one such sequence exists, pick the one nearest the base
3174      (which is the leftmost sequence in C notation).  Store this sequence
3175      in FULL_SEQ.
3176
3177      For example, if we have:
3178
3179         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3180
3181         A: a[0][i].s.c.d
3182         B: __real b[0][i].s.e[i].f
3183
3184      (where d is the same type as the real component of f) then the access
3185      functions would be:
3186
3187                          0   1   2   3
3188         A:              .d  .c  .s [i]
3189
3190                  0   1   2   3   4   5
3191         B:  __real  .f [i]  .e  .s [i]
3192
3193      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3194      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3195      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3196      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3197      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3198      index foo[10] arrays, so is again comparable.  The sequence is
3199      therefore:
3200
3201         A: [1, 3]  (i.e. [i].s.c)
3202         B: [3, 5]  (i.e. [i].s.e)
3203
3204      Also look for sequences of component references whose access
3205      functions are comparable and whose enclosing objects have the same
3206      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3207      example, STRUCT_SEQ would be:
3208
3209         A: [1, 2]  (i.e. s.c)
3210         B: [3, 4]  (i.e. s.e)  */
3211   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3212     {
3213       /* REF_A and REF_B must be one of the component access types
3214          allowed by dr_analyze_indices.  */
3215       gcc_checking_assert (access_fn_component_p (ref_a));
3216       gcc_checking_assert (access_fn_component_p (ref_b));
3217
3218       /* Get the immediately-enclosing objects for REF_A and REF_B,
3219          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3220          and DR_ACCESS_FN (B, INDEX_B).  */
3221       tree object_a = TREE_OPERAND (ref_a, 0);
3222       tree object_b = TREE_OPERAND (ref_b, 0);
3223
3224       tree type_a = TREE_TYPE (object_a);
3225       tree type_b = TREE_TYPE (object_b);
3226       if (access_fn_components_comparable_p (ref_a, ref_b))
3227         {
3228           /* This pair of component accesses is comparable for dependence
3229              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3230              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3231           if (full_seq.start_a + full_seq.length != index_a
3232               || full_seq.start_b + full_seq.length != index_b)
3233             {
3234               /* The accesses don't extend the current sequence,
3235                  so start a new one here.  */
3236               full_seq.start_a = index_a;
3237               full_seq.start_b = index_b;
3238               full_seq.length = 0;
3239             }
3240
3241           /* Add this pair of references to the sequence.  */
3242           full_seq.length += 1;
3243           full_seq.object_a = object_a;
3244           full_seq.object_b = object_b;
3245
3246           /* If the enclosing objects are structures (and thus have the
3247              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3248           if (TREE_CODE (type_a) == RECORD_TYPE)
3249             struct_seq = full_seq;
3250
3251           /* Move to the next containing reference for both A and B.  */
3252           ref_a = object_a;
3253           ref_b = object_b;
3254           index_a += 1;
3255           index_b += 1;
3256           continue;
3257         }
3258
3259       /* Try to approach equal type sizes.  */
3260       if (!COMPLETE_TYPE_P (type_a)
3261           || !COMPLETE_TYPE_P (type_b)
3262           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3263           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3264         break;
3265
3266       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3267       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3268       if (size_a <= size_b)
3269         {
3270           index_a += 1;
3271           ref_a = object_a;
3272         }
3273       if (size_b <= size_a)
3274         {
3275           index_b += 1;
3276           ref_b = object_b;
3277         }
3278     }
3279
3280   /* See whether FULL_SEQ ends at the base and whether the two bases
3281      are equal.  We do not care about TBAA or alignment info so we can
3282      use OEP_ADDRESS_OF to avoid false negatives.  */
3283   tree base_a = DR_BASE_OBJECT (a);
3284   tree base_b = DR_BASE_OBJECT (b);
3285   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3286                       && full_seq.start_b + full_seq.length == num_dimensions_b
3287                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
3288                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3289                       && (types_compatible_p (TREE_TYPE (base_a),
3290                                               TREE_TYPE (base_b))
3291                           || (!base_supports_access_fn_components_p (base_a)
3292                               && !base_supports_access_fn_components_p (base_b)
3293                               && operand_equal_p
3294                                    (TYPE_SIZE (TREE_TYPE (base_a)),
3295                                     TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3296                       && (!loop_nest.exists ()
3297                           || (object_address_invariant_in_loop_p
3298                               (loop_nest[0], base_a))));
3299
3300   /* If the bases are the same, we can include the base variation too.
3301      E.g. the b accesses in:
3302
3303        for (int i = 0; i < n; ++i)
3304          b[i + 4][0] = b[i][0];
3305
3306      have a definite dependence distance of 4, while for:
3307
3308        for (int i = 0; i < n; ++i)
3309          a[i + 4][0] = b[i][0];
3310
3311      the dependence distance depends on the gap between a and b.
3312
3313      If the bases are different then we can only rely on the sequence
3314      rooted at a structure access, since arrays are allowed to overlap
3315      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3316      valid code:
3317
3318        int a[256];
3319        ...
3320        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3321
3322      where two lvalues with the same int[4][3] type overlap, and where
3323      both lvalues are distinct from the object's declared type.  */
3324   if (same_base_p)
3325     {
3326       if (DR_UNCONSTRAINED_BASE (a))
3327         full_seq.length += 1;
3328     }
3329   else
3330     full_seq = struct_seq;
3331
3332   /* Punt if we didn't find a suitable sequence.  */
3333   if (full_seq.length == 0)
3334     {
3335       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3336       return res;
3337     }
3338
3339   if (!same_base_p)
3340     {
3341       /* Partial overlap is possible for different bases when strict aliasing
3342          is not in effect.  It's also possible if either base involves a union
3343          access; e.g. for:
3344
3345            struct s1 { int a[2]; };
3346            struct s2 { struct s1 b; int c; };
3347            struct s3 { int d; struct s1 e; };
3348            union u { struct s2 f; struct s3 g; } *p, *q;
3349
3350          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3351          "p->g.e" (base "p->g") and might partially overlap the s1 at
3352          "q->g.e" (base "q->g").  */
3353       if (!flag_strict_aliasing
3354           || ref_contains_union_access_p (full_seq.object_a)
3355           || ref_contains_union_access_p (full_seq.object_b))
3356         {
3357           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3358           return res;
3359         }
3360
3361       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3362       if (!loop_nest.exists ()
3363           || (object_address_invariant_in_loop_p (loop_nest[0],
3364                                                   full_seq.object_a)
3365               && object_address_invariant_in_loop_p (loop_nest[0],
3366                                                      full_seq.object_b)))
3367         {
3368           DDR_OBJECT_A (res) = full_seq.object_a;
3369           DDR_OBJECT_B (res) = full_seq.object_b;
3370         }
3371     }
3372
3373   DDR_AFFINE_P (res) = true;
3374   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3375   DDR_SUBSCRIPTS (res).create (full_seq.length);
3376   DDR_LOOP_NEST (res) = loop_nest;
3377   DDR_SELF_REFERENCE (res) = false;
3378
3379   for (i = 0; i < full_seq.length; ++i)
3380     {
3381       struct subscript *subscript;
3382
3383       subscript = XNEW (struct subscript);
3384       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
3385       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
3386       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3387       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3388       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3389       SUB_DISTANCE (subscript) = chrec_dont_know;
3390       DDR_SUBSCRIPTS (res).safe_push (subscript);
3391     }
3392
3393   return res;
3394 }
3395
3396 /* Frees memory used by the conflict function F.  */
3397
3398 static void
3399 free_conflict_function (conflict_function *f)
3400 {
3401   unsigned i;
3402
3403   if (CF_NONTRIVIAL_P (f))
3404     {
3405       for (i = 0; i < f->n; i++)
3406         affine_fn_free (f->fns[i]);
3407     }
3408   free (f);
3409 }
3410
3411 /* Frees memory used by SUBSCRIPTS.  */
3412
3413 static void
3414 free_subscripts (vec<subscript_p> subscripts)
3415 {
3416   for (subscript_p s : subscripts)
3417     {
3418       free_conflict_function (s->conflicting_iterations_in_a);
3419       free_conflict_function (s->conflicting_iterations_in_b);
3420       free (s);
3421     }
3422   subscripts.release ();
3423 }
3424
3425 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3426    description.  */
3427
3428 static inline void
3429 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3430                         tree chrec)
3431 {
3432   DDR_ARE_DEPENDENT (ddr) = chrec;
3433   free_subscripts (DDR_SUBSCRIPTS (ddr));
3434   DDR_SUBSCRIPTS (ddr).create (0);
3435 }
3436
3437 /* The dependence relation DDR cannot be represented by a distance
3438    vector.  */
3439
3440 static inline void
3441 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3442 {
3443   if (dump_file && (dump_flags & TDF_DETAILS))
3444     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3445
3446   DDR_AFFINE_P (ddr) = false;
3447 }
3448
3449 \f
3450
3451 /* This section contains the classic Banerjee tests.  */
3452
3453 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3454    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3455
3456 static inline bool
3457 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3458 {
3459   return (evolution_function_is_constant_p (chrec_a)
3460           && evolution_function_is_constant_p (chrec_b));
3461 }
3462
3463 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3464    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3465
3466 static bool
3467 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3468 {
3469   if ((evolution_function_is_constant_p (chrec_a)
3470        && evolution_function_is_univariate_p (chrec_b))
3471       || (evolution_function_is_constant_p (chrec_b)
3472           && evolution_function_is_univariate_p (chrec_a)))
3473     return true;
3474
3475   if (evolution_function_is_univariate_p (chrec_a)
3476       && evolution_function_is_univariate_p (chrec_b))
3477     {
3478       switch (TREE_CODE (chrec_a))
3479         {
3480         case POLYNOMIAL_CHREC:
3481           switch (TREE_CODE (chrec_b))
3482             {
3483             case POLYNOMIAL_CHREC:
3484               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3485                 return false;
3486               /* FALLTHRU */
3487
3488             default:
3489               return true;
3490             }
3491
3492         default:
3493           return true;
3494         }
3495     }
3496
3497   return false;
3498 }
3499
3500 /* Creates a conflict function with N dimensions.  The affine functions
3501    in each dimension follow.  */
3502
3503 static conflict_function *
3504 conflict_fn (unsigned n, ...)
3505 {
3506   unsigned i;
3507   conflict_function *ret = XCNEW (conflict_function);
3508   va_list ap;
3509
3510   gcc_assert (n > 0 && n <= MAX_DIM);
3511   va_start (ap, n);
3512
3513   ret->n = n;
3514   for (i = 0; i < n; i++)
3515     ret->fns[i] = va_arg (ap, affine_fn);
3516   va_end (ap);
3517
3518   return ret;
3519 }
3520
3521 /* Returns constant affine function with value CST.  */
3522
3523 static affine_fn
3524 affine_fn_cst (tree cst)
3525 {
3526   affine_fn fn;
3527   fn.create (1);
3528   fn.quick_push (cst);
3529   return fn;
3530 }
3531
3532 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3533
3534 static affine_fn
3535 affine_fn_univar (tree cst, unsigned dim, tree coef)
3536 {
3537   affine_fn fn;
3538   fn.create (dim + 1);
3539   unsigned i;
3540
3541   gcc_assert (dim > 0);
3542   fn.quick_push (cst);
3543   for (i = 1; i < dim; i++)
3544     fn.quick_push (integer_zero_node);
3545   fn.quick_push (coef);
3546   return fn;
3547 }
3548
3549 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3550    *OVERLAPS_B are initialized to the functions that describe the
3551    relation between the elements accessed twice by CHREC_A and
3552    CHREC_B.  For k >= 0, the following property is verified:
3553
3554    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3555
3556 static void
3557 analyze_ziv_subscript (tree chrec_a,
3558                        tree chrec_b,
3559                        conflict_function **overlaps_a,
3560                        conflict_function **overlaps_b,
3561                        tree *last_conflicts)
3562 {
3563   tree type, difference;
3564   dependence_stats.num_ziv++;
3565
3566   if (dump_file && (dump_flags & TDF_DETAILS))
3567     fprintf (dump_file, "(analyze_ziv_subscript \n");
3568
3569   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3570   chrec_a = chrec_convert (type, chrec_a, NULL);
3571   chrec_b = chrec_convert (type, chrec_b, NULL);
3572   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3573
3574   switch (TREE_CODE (difference))
3575     {
3576     case INTEGER_CST:
3577       if (integer_zerop (difference))
3578         {
3579           /* The difference is equal to zero: the accessed index
3580              overlaps for each iteration in the loop.  */
3581           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3582           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3583           *last_conflicts = chrec_dont_know;
3584           dependence_stats.num_ziv_dependent++;
3585         }
3586       else
3587         {
3588           /* The accesses do not overlap.  */
3589           *overlaps_a = conflict_fn_no_dependence ();
3590           *overlaps_b = conflict_fn_no_dependence ();
3591           *last_conflicts = integer_zero_node;
3592           dependence_stats.num_ziv_independent++;
3593         }
3594       break;
3595
3596     default:
3597       /* We're not sure whether the indexes overlap.  For the moment,
3598          conservatively answer "don't know".  */
3599       if (dump_file && (dump_flags & TDF_DETAILS))
3600         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3601
3602       *overlaps_a = conflict_fn_not_known ();
3603       *overlaps_b = conflict_fn_not_known ();
3604       *last_conflicts = chrec_dont_know;
3605       dependence_stats.num_ziv_unimplemented++;
3606       break;
3607     }
3608
3609   if (dump_file && (dump_flags & TDF_DETAILS))
3610     fprintf (dump_file, ")\n");
3611 }
3612
3613 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3614    and only if it fits to the int type.  If this is not the case, or the
3615    bound  on the number of iterations of LOOP could not be derived, returns
3616    chrec_dont_know.  */
3617
3618 static tree
3619 max_stmt_executions_tree (class loop *loop)
3620 {
3621   widest_int nit;
3622
3623   if (!max_stmt_executions (loop, &nit))
3624     return chrec_dont_know;
3625
3626   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3627     return chrec_dont_know;
3628
3629   return wide_int_to_tree (unsigned_type_node, nit);
3630 }
3631
3632 /* Determine whether the CHREC is always positive/negative.  If the expression
3633    cannot be statically analyzed, return false, otherwise set the answer into
3634    VALUE.  */
3635
3636 static bool
3637 chrec_is_positive (tree chrec, bool *value)
3638 {
3639   bool value0, value1, value2;
3640   tree end_value, nb_iter;
3641
3642   switch (TREE_CODE (chrec))
3643     {
3644     case POLYNOMIAL_CHREC:
3645       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3646           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3647         return false;
3648
3649       /* FIXME -- overflows.  */
3650       if (value0 == value1)
3651         {
3652           *value = value0;
3653           return true;
3654         }
3655
3656       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3657          and the proof consists in showing that the sign never
3658          changes during the execution of the loop, from 0 to
3659          loop->nb_iterations.  */
3660       if (!evolution_function_is_affine_p (chrec))
3661         return false;
3662
3663       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3664       if (chrec_contains_undetermined (nb_iter))
3665         return false;
3666
3667 #if 0
3668       /* TODO -- If the test is after the exit, we may decrease the number of
3669          iterations by one.  */
3670       if (after_exit)
3671         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3672 #endif
3673
3674       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3675
3676       if (!chrec_is_positive (end_value, &value2))
3677         return false;
3678
3679       *value = value0;
3680       return value0 == value1;
3681
3682     case INTEGER_CST:
3683       switch (tree_int_cst_sgn (chrec))
3684         {
3685         case -1:
3686           *value = false;
3687           break;
3688         case 1:
3689           *value = true;
3690           break;
3691         default:
3692           return false;
3693         }
3694       return true;
3695
3696     default:
3697       return false;
3698     }
3699 }
3700
3701
3702 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3703    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3704    *OVERLAPS_B are initialized to the functions that describe the
3705    relation between the elements accessed twice by CHREC_A and
3706    CHREC_B.  For k >= 0, the following property is verified:
3707
3708    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3709
3710 static void
3711 analyze_siv_subscript_cst_affine (tree chrec_a,
3712                                   tree chrec_b,
3713                                   conflict_function **overlaps_a,
3714                                   conflict_function **overlaps_b,
3715                                   tree *last_conflicts)
3716 {
3717   bool value0, value1, value2;
3718   tree type, difference, tmp;
3719
3720   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3721   chrec_a = chrec_convert (type, chrec_a, NULL);
3722   chrec_b = chrec_convert (type, chrec_b, NULL);
3723   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3724
3725   /* Special case overlap in the first iteration.  */
3726   if (integer_zerop (difference))
3727     {
3728       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3729       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3730       *last_conflicts = integer_one_node;
3731       return;
3732     }
3733
3734   if (!chrec_is_positive (initial_condition (difference), &value0))
3735     {
3736       if (dump_file && (dump_flags & TDF_DETAILS))
3737         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3738
3739       dependence_stats.num_siv_unimplemented++;
3740       *overlaps_a = conflict_fn_not_known ();
3741       *overlaps_b = conflict_fn_not_known ();
3742       *last_conflicts = chrec_dont_know;
3743       return;
3744     }
3745   else
3746     {
3747       if (value0 == false)
3748         {
3749           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3750               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3751             {
3752               if (dump_file && (dump_flags & TDF_DETAILS))
3753                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3754
3755               *overlaps_a = conflict_fn_not_known ();
3756               *overlaps_b = conflict_fn_not_known ();
3757               *last_conflicts = chrec_dont_know;
3758               dependence_stats.num_siv_unimplemented++;
3759               return;
3760             }
3761           else
3762             {
3763               if (value1 == true)
3764                 {
3765                   /* Example:
3766                      chrec_a = 12
3767                      chrec_b = {10, +, 1}
3768                   */
3769
3770                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3771                     {
3772                       HOST_WIDE_INT numiter;
3773                       class loop *loop = get_chrec_loop (chrec_b);
3774
3775                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3776                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3777                                          fold_build1 (ABS_EXPR, type, difference),
3778                                          CHREC_RIGHT (chrec_b));
3779                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3780                       *last_conflicts = integer_one_node;
3781
3782
3783                       /* Perform weak-zero siv test to see if overlap is
3784                          outside the loop bounds.  */
3785                       numiter = max_stmt_executions_int (loop);
3786
3787                       if (numiter >= 0
3788                           && compare_tree_int (tmp, numiter) > 0)
3789                         {
3790                           free_conflict_function (*overlaps_a);
3791                           free_conflict_function (*overlaps_b);
3792                           *overlaps_a = conflict_fn_no_dependence ();
3793                           *overlaps_b = conflict_fn_no_dependence ();
3794                           *last_conflicts = integer_zero_node;
3795                           dependence_stats.num_siv_independent++;
3796                           return;
3797                         }
3798                       dependence_stats.num_siv_dependent++;
3799                       return;
3800                     }
3801
3802                   /* When the step does not divide the difference, there are
3803                      no overlaps.  */
3804                   else
3805                     {
3806                       *overlaps_a = conflict_fn_no_dependence ();
3807                       *overlaps_b = conflict_fn_no_dependence ();
3808                       *last_conflicts = integer_zero_node;
3809                       dependence_stats.num_siv_independent++;
3810                       return;
3811                     }
3812                 }
3813
3814               else
3815                 {
3816                   /* Example:
3817                      chrec_a = 12
3818                      chrec_b = {10, +, -1}
3819
3820                      In this case, chrec_a will not overlap with chrec_b.  */
3821                   *overlaps_a = conflict_fn_no_dependence ();
3822                   *overlaps_b = conflict_fn_no_dependence ();
3823                   *last_conflicts = integer_zero_node;
3824                   dependence_stats.num_siv_independent++;
3825                   return;
3826                 }
3827             }
3828         }
3829       else
3830         {
3831           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3832               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3833             {
3834               if (dump_file && (dump_flags & TDF_DETAILS))
3835                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3836
3837               *overlaps_a = conflict_fn_not_known ();
3838               *overlaps_b = conflict_fn_not_known ();
3839               *last_conflicts = chrec_dont_know;
3840               dependence_stats.num_siv_unimplemented++;
3841               return;
3842             }
3843           else
3844             {
3845               if (value2 == false)
3846                 {
3847                   /* Example:
3848                      chrec_a = 3
3849                      chrec_b = {10, +, -1}
3850                   */
3851                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3852                     {
3853                       HOST_WIDE_INT numiter;
3854                       class loop *loop = get_chrec_loop (chrec_b);
3855
3856                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3857                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3858                                          CHREC_RIGHT (chrec_b));
3859                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3860                       *last_conflicts = integer_one_node;
3861
3862                       /* Perform weak-zero siv test to see if overlap is
3863                          outside the loop bounds.  */
3864                       numiter = max_stmt_executions_int (loop);
3865
3866                       if (numiter >= 0
3867                           && compare_tree_int (tmp, numiter) > 0)
3868                         {
3869                           free_conflict_function (*overlaps_a);
3870                           free_conflict_function (*overlaps_b);
3871                           *overlaps_a = conflict_fn_no_dependence ();
3872                           *overlaps_b = conflict_fn_no_dependence ();
3873                           *last_conflicts = integer_zero_node;
3874                           dependence_stats.num_siv_independent++;
3875                           return;
3876                         }
3877                       dependence_stats.num_siv_dependent++;
3878                       return;
3879                     }
3880
3881                   /* When the step does not divide the difference, there
3882                      are no overlaps.  */
3883                   else
3884                     {
3885                       *overlaps_a = conflict_fn_no_dependence ();
3886                       *overlaps_b = conflict_fn_no_dependence ();
3887                       *last_conflicts = integer_zero_node;
3888                       dependence_stats.num_siv_independent++;
3889                       return;
3890                     }
3891                 }
3892               else
3893                 {
3894                   /* Example:
3895                      chrec_a = 3
3896                      chrec_b = {4, +, 1}
3897
3898                      In this case, chrec_a will not overlap with chrec_b.  */
3899                   *overlaps_a = conflict_fn_no_dependence ();
3900                   *overlaps_b = conflict_fn_no_dependence ();
3901                   *last_conflicts = integer_zero_node;
3902                   dependence_stats.num_siv_independent++;
3903                   return;
3904                 }
3905             }
3906         }
3907     }
3908 }
3909
3910 /* Helper recursive function for initializing the matrix A.  Returns
3911    the initial value of CHREC.  */
3912
3913 static tree
3914 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3915 {
3916   gcc_assert (chrec);
3917
3918   switch (TREE_CODE (chrec))
3919     {
3920     case POLYNOMIAL_CHREC:
3921       HOST_WIDE_INT chrec_right;
3922       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3923         return chrec_dont_know;
3924       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
3925       /* We want to be able to negate without overflow.  */
3926       if (chrec_right == HOST_WIDE_INT_MIN)
3927         return chrec_dont_know;
3928       A[index][0] = mult * chrec_right;
3929       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3930
3931     case PLUS_EXPR:
3932     case MULT_EXPR:
3933     case MINUS_EXPR:
3934       {
3935         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3936         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3937
3938         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3939       }
3940
3941     CASE_CONVERT:
3942       {
3943         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3944         return chrec_convert (chrec_type (chrec), op, NULL);
3945       }
3946
3947     case BIT_NOT_EXPR:
3948       {
3949         /* Handle ~X as -1 - X.  */
3950         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3951         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3952                               build_int_cst (TREE_TYPE (chrec), -1), op);
3953       }
3954
3955     case INTEGER_CST:
3956       return chrec;
3957
3958     default:
3959       gcc_unreachable ();
3960       return NULL_TREE;
3961     }
3962 }
3963
3964 #define FLOOR_DIV(x,y) ((x) / (y))
3965
3966 /* Solves the special case of the Diophantine equation:
3967    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3968
3969    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3970    number of iterations that loops X and Y run.  The overlaps will be
3971    constructed as evolutions in dimension DIM.  */
3972
3973 static void
3974 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3975                                          HOST_WIDE_INT step_a,
3976                                          HOST_WIDE_INT step_b,
3977                                          affine_fn *overlaps_a,
3978                                          affine_fn *overlaps_b,
3979                                          tree *last_conflicts, int dim)
3980 {
3981   if (((step_a > 0 && step_b > 0)
3982        || (step_a < 0 && step_b < 0)))
3983     {
3984       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3985       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3986
3987       gcd_steps_a_b = gcd (step_a, step_b);
3988       step_overlaps_a = step_b / gcd_steps_a_b;
3989       step_overlaps_b = step_a / gcd_steps_a_b;
3990
3991       if (niter > 0)
3992         {
3993           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3994           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3995           last_conflict = tau2;
3996           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3997         }
3998       else
3999         *last_conflicts = chrec_dont_know;
4000
4001       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4002                                       build_int_cst (NULL_TREE,
4003                                                      step_overlaps_a));
4004       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4005                                       build_int_cst (NULL_TREE,
4006                                                      step_overlaps_b));
4007     }
4008
4009   else
4010     {
4011       *overlaps_a = affine_fn_cst (integer_zero_node);
4012       *overlaps_b = affine_fn_cst (integer_zero_node);
4013       *last_conflicts = integer_zero_node;
4014     }
4015 }
4016
4017 /* Solves the special case of a Diophantine equation where CHREC_A is
4018    an affine bivariate function, and CHREC_B is an affine univariate
4019    function.  For example,
4020
4021    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4022
4023    has the following overlapping functions:
4024
4025    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4026    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4027    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4028
4029    FORNOW: This is a specialized implementation for a case occurring in
4030    a common benchmark.  Implement the general algorithm.  */
4031
4032 static void
4033 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4034                                       conflict_function **overlaps_a,
4035                                       conflict_function **overlaps_b,
4036                                       tree *last_conflicts)
4037 {
4038   bool xz_p, yz_p, xyz_p;
4039   HOST_WIDE_INT step_x, step_y, step_z;
4040   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4041   affine_fn overlaps_a_xz, overlaps_b_xz;
4042   affine_fn overlaps_a_yz, overlaps_b_yz;
4043   affine_fn overlaps_a_xyz, overlaps_b_xyz;
4044   affine_fn ova1, ova2, ovb;
4045   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4046
4047   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4048   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4049   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4050
4051   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4052   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4053   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4054
4055   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4056     {
4057       if (dump_file && (dump_flags & TDF_DETAILS))
4058         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4059
4060       *overlaps_a = conflict_fn_not_known ();
4061       *overlaps_b = conflict_fn_not_known ();
4062       *last_conflicts = chrec_dont_know;
4063       return;
4064     }
4065
4066   niter = MIN (niter_x, niter_z);
4067   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4068                                            &overlaps_a_xz,
4069                                            &overlaps_b_xz,
4070                                            &last_conflicts_xz, 1);
4071   niter = MIN (niter_y, niter_z);
4072   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4073                                            &overlaps_a_yz,
4074                                            &overlaps_b_yz,
4075                                            &last_conflicts_yz, 2);
4076   niter = MIN (niter_x, niter_z);
4077   niter = MIN (niter_y, niter);
4078   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4079                                            &overlaps_a_xyz,
4080                                            &overlaps_b_xyz,
4081                                            &last_conflicts_xyz, 3);
4082
4083   xz_p = !integer_zerop (last_conflicts_xz);
4084   yz_p = !integer_zerop (last_conflicts_yz);
4085   xyz_p = !integer_zerop (last_conflicts_xyz);
4086
4087   if (xz_p || yz_p || xyz_p)
4088     {
4089       ova1 = affine_fn_cst (integer_zero_node);
4090       ova2 = affine_fn_cst (integer_zero_node);
4091       ovb = affine_fn_cst (integer_zero_node);
4092       if (xz_p)
4093         {
4094           affine_fn t0 = ova1;
4095           affine_fn t2 = ovb;
4096
4097           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4098           ovb = affine_fn_plus (ovb, overlaps_b_xz);
4099           affine_fn_free (t0);
4100           affine_fn_free (t2);
4101           *last_conflicts = last_conflicts_xz;
4102         }
4103       if (yz_p)
4104         {
4105           affine_fn t0 = ova2;
4106           affine_fn t2 = ovb;
4107
4108           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4109           ovb = affine_fn_plus (ovb, overlaps_b_yz);
4110           affine_fn_free (t0);
4111           affine_fn_free (t2);
4112           *last_conflicts = last_conflicts_yz;
4113         }
4114       if (xyz_p)
4115         {
4116           affine_fn t0 = ova1;
4117           affine_fn t2 = ova2;
4118           affine_fn t4 = ovb;
4119
4120           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4121           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4122           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4123           affine_fn_free (t0);
4124           affine_fn_free (t2);
4125           affine_fn_free (t4);
4126           *last_conflicts = last_conflicts_xyz;
4127         }
4128       *overlaps_a = conflict_fn (2, ova1, ova2);
4129       *overlaps_b = conflict_fn (1, ovb);
4130     }
4131   else
4132     {
4133       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4134       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4135       *last_conflicts = integer_zero_node;
4136     }
4137
4138   affine_fn_free (overlaps_a_xz);
4139   affine_fn_free (overlaps_b_xz);
4140   affine_fn_free (overlaps_a_yz);
4141   affine_fn_free (overlaps_b_yz);
4142   affine_fn_free (overlaps_a_xyz);
4143   affine_fn_free (overlaps_b_xyz);
4144 }
4145
4146 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4147
4148 static void
4149 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4150                     int size)
4151 {
4152   memcpy (vec2, vec1, size * sizeof (*vec1));
4153 }
4154
4155 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
4156
4157 static void
4158 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4159                     int m, int n)
4160 {
4161   int i;
4162
4163   for (i = 0; i < m; i++)
4164     lambda_vector_copy (mat1[i], mat2[i], n);
4165 }
4166
4167 /* Store the N x N identity matrix in MAT.  */
4168
4169 static void
4170 lambda_matrix_id (lambda_matrix mat, int size)
4171 {
4172   int i, j;
4173
4174   for (i = 0; i < size; i++)
4175     for (j = 0; j < size; j++)
4176       mat[i][j] = (i == j) ? 1 : 0;
4177 }
4178
4179 /* Return the index of the first nonzero element of vector VEC1 between
4180    START and N.  We must have START <= N.
4181    Returns N if VEC1 is the zero vector.  */
4182
4183 static int
4184 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4185 {
4186   int j = start;
4187   while (j < n && vec1[j] == 0)
4188     j++;
4189   return j;
4190 }
4191
4192 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4193    R2 = R2 + CONST1 * R1.  */
4194
4195 static bool
4196 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4197                        lambda_int const1)
4198 {
4199   int i;
4200
4201   if (const1 == 0)
4202     return true;
4203
4204   for (i = 0; i < n; i++)
4205     {
4206       bool ovf;
4207       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4208       if (ovf)
4209         return false;
4210       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4211       if (ovf || tem2 == HOST_WIDE_INT_MIN)
4212         return false;
4213       mat[r2][i] = tem2;
4214     }
4215
4216   return true;
4217 }
4218
4219 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4220    and store the result in VEC2.  */
4221
4222 static void
4223 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4224                           int size, lambda_int const1)
4225 {
4226   int i;
4227
4228   if (const1 == 0)
4229     lambda_vector_clear (vec2, size);
4230   else
4231     for (i = 0; i < size; i++)
4232       vec2[i] = const1 * vec1[i];
4233 }
4234
4235 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4236
4237 static void
4238 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4239                       int size)
4240 {
4241   lambda_vector_mult_const (vec1, vec2, size, -1);
4242 }
4243
4244 /* Negate row R1 of matrix MAT which has N columns.  */
4245
4246 static void
4247 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4248 {
4249   lambda_vector_negate (mat[r1], mat[r1], n);
4250 }
4251
4252 /* Return true if two vectors are equal.  */
4253
4254 static bool
4255 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4256 {
4257   int i;
4258   for (i = 0; i < size; i++)
4259     if (vec1[i] != vec2[i])
4260       return false;
4261   return true;
4262 }
4263
4264 /* Given an M x N integer matrix A, this function determines an M x
4265    M unimodular matrix U, and an M x N echelon matrix S such that
4266    "U.A = S".  This decomposition is also known as "right Hermite".
4267
4268    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4269    Restructuring Compilers" Utpal Banerjee.  */
4270
4271 static bool
4272 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4273                              lambda_matrix S, lambda_matrix U)
4274 {
4275   int i, j, i0 = 0;
4276
4277   lambda_matrix_copy (A, S, m, n);
4278   lambda_matrix_id (U, m);
4279
4280   for (j = 0; j < n; j++)
4281     {
4282       if (lambda_vector_first_nz (S[j], m, i0) < m)
4283         {
4284           ++i0;
4285           for (i = m - 1; i >= i0; i--)
4286             {
4287               while (S[i][j] != 0)
4288                 {
4289                   lambda_int factor, a, b;
4290
4291                   a = S[i-1][j];
4292                   b = S[i][j];
4293                   gcc_assert (a != HOST_WIDE_INT_MIN);
4294                   factor = a / b;
4295
4296                   if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4297                     return false;
4298                   std::swap (S[i], S[i-1]);
4299
4300                   if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4301                     return false;
4302                   std::swap (U[i], U[i-1]);
4303                 }
4304             }
4305         }
4306     }
4307
4308   return true;
4309 }
4310
4311 /* Determines the overlapping elements due to accesses CHREC_A and
4312    CHREC_B, that are affine functions.  This function cannot handle
4313    symbolic evolution functions, ie. when initial conditions are
4314    parameters, because it uses lambda matrices of integers.  */
4315
4316 static void
4317 analyze_subscript_affine_affine (tree chrec_a,
4318                                  tree chrec_b,
4319                                  conflict_function **overlaps_a,
4320                                  conflict_function **overlaps_b,
4321                                  tree *last_conflicts)
4322 {
4323   unsigned nb_vars_a, nb_vars_b, dim;
4324   lambda_int gamma, gcd_alpha_beta;
4325   lambda_matrix A, U, S;
4326   struct obstack scratch_obstack;
4327
4328   if (eq_evolutions_p (chrec_a, chrec_b))
4329     {
4330       /* The accessed index overlaps for each iteration in the
4331          loop.  */
4332       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4333       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4334       *last_conflicts = chrec_dont_know;
4335       return;
4336     }
4337   if (dump_file && (dump_flags & TDF_DETAILS))
4338     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4339
4340   /* For determining the initial intersection, we have to solve a
4341      Diophantine equation.  This is the most time consuming part.
4342
4343      For answering to the question: "Is there a dependence?" we have
4344      to prove that there exists a solution to the Diophantine
4345      equation, and that the solution is in the iteration domain,
4346      i.e. the solution is positive or zero, and that the solution
4347      happens before the upper bound loop.nb_iterations.  Otherwise
4348      there is no dependence.  This function outputs a description of
4349      the iterations that hold the intersections.  */
4350
4351   nb_vars_a = nb_vars_in_chrec (chrec_a);
4352   nb_vars_b = nb_vars_in_chrec (chrec_b);
4353
4354   gcc_obstack_init (&scratch_obstack);
4355
4356   dim = nb_vars_a + nb_vars_b;
4357   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4358   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4359   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4360
4361   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4362   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4363   if (init_a == chrec_dont_know
4364       || init_b == chrec_dont_know)
4365     {
4366       if (dump_file && (dump_flags & TDF_DETAILS))
4367         fprintf (dump_file, "affine-affine test failed: "
4368                  "representation issue.\n");
4369       *overlaps_a = conflict_fn_not_known ();
4370       *overlaps_b = conflict_fn_not_known ();
4371       *last_conflicts = chrec_dont_know;
4372       goto end_analyze_subs_aa;
4373     }
4374   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4375
4376   /* Don't do all the hard work of solving the Diophantine equation
4377      when we already know the solution: for example,
4378      | {3, +, 1}_1
4379      | {3, +, 4}_2
4380      | gamma = 3 - 3 = 0.
4381      Then the first overlap occurs during the first iterations:
4382      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4383   */
4384   if (gamma == 0)
4385     {
4386       if (nb_vars_a == 1 && nb_vars_b == 1)
4387         {
4388           HOST_WIDE_INT step_a, step_b;
4389           HOST_WIDE_INT niter, niter_a, niter_b;
4390           affine_fn ova, ovb;
4391
4392           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4393           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4394           niter = MIN (niter_a, niter_b);
4395           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4396           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4397
4398           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4399                                                    &ova, &ovb,
4400                                                    last_conflicts, 1);
4401           *overlaps_a = conflict_fn (1, ova);
4402           *overlaps_b = conflict_fn (1, ovb);
4403         }
4404
4405       else if (nb_vars_a == 2 && nb_vars_b == 1)
4406         compute_overlap_steps_for_affine_1_2
4407           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4408
4409       else if (nb_vars_a == 1 && nb_vars_b == 2)
4410         compute_overlap_steps_for_affine_1_2
4411           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4412
4413       else
4414         {
4415           if (dump_file && (dump_flags & TDF_DETAILS))
4416             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4417           *overlaps_a = conflict_fn_not_known ();
4418           *overlaps_b = conflict_fn_not_known ();
4419           *last_conflicts = chrec_dont_know;
4420         }
4421       goto end_analyze_subs_aa;
4422     }
4423
4424   /* U.A = S */
4425   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4426     {
4427       *overlaps_a = conflict_fn_not_known ();
4428       *overlaps_b = conflict_fn_not_known ();
4429       *last_conflicts = chrec_dont_know;
4430       goto end_analyze_subs_aa;
4431     }
4432
4433   if (S[0][0] < 0)
4434     {
4435       S[0][0] *= -1;
4436       lambda_matrix_row_negate (U, dim, 0);
4437     }
4438   gcd_alpha_beta = S[0][0];
4439
4440   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4441      but that is a quite strange case.  Instead of ICEing, answer
4442      don't know.  */
4443   if (gcd_alpha_beta == 0)
4444     {
4445       *overlaps_a = conflict_fn_not_known ();
4446       *overlaps_b = conflict_fn_not_known ();
4447       *last_conflicts = chrec_dont_know;
4448       goto end_analyze_subs_aa;
4449     }
4450
4451   /* The classic "gcd-test".  */
4452   if (!int_divides_p (gcd_alpha_beta, gamma))
4453     {
4454       /* The "gcd-test" has determined that there is no integer
4455          solution, i.e. there is no dependence.  */
4456       *overlaps_a = conflict_fn_no_dependence ();
4457       *overlaps_b = conflict_fn_no_dependence ();
4458       *last_conflicts = integer_zero_node;
4459     }
4460
4461   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4462   else if (nb_vars_a == 1 && nb_vars_b == 1)
4463     {
4464       /* Both functions should have the same evolution sign.  */
4465       if (((A[0][0] > 0 && -A[1][0] > 0)
4466            || (A[0][0] < 0 && -A[1][0] < 0)))
4467         {
4468           /* The solutions are given by:
4469              |
4470              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4471              |                           [u21 u22]    [y0]
4472
4473              For a given integer t.  Using the following variables,
4474
4475              | i0 = u11 * gamma / gcd_alpha_beta
4476              | j0 = u12 * gamma / gcd_alpha_beta
4477              | i1 = u21
4478              | j1 = u22
4479
4480              the solutions are:
4481
4482              | x0 = i0 + i1 * t,
4483              | y0 = j0 + j1 * t.  */
4484           HOST_WIDE_INT i0, j0, i1, j1;
4485
4486           i0 = U[0][0] * gamma / gcd_alpha_beta;
4487           j0 = U[0][1] * gamma / gcd_alpha_beta;
4488           i1 = U[1][0];
4489           j1 = U[1][1];
4490
4491           if ((i1 == 0 && i0 < 0)
4492               || (j1 == 0 && j0 < 0))
4493             {
4494               /* There is no solution.
4495                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4496                  falls in here, but for the moment we don't look at the
4497                  upper bound of the iteration domain.  */
4498               *overlaps_a = conflict_fn_no_dependence ();
4499               *overlaps_b = conflict_fn_no_dependence ();
4500               *last_conflicts = integer_zero_node;
4501               goto end_analyze_subs_aa;
4502             }
4503
4504           if (i1 > 0 && j1 > 0)
4505             {
4506               HOST_WIDE_INT niter_a
4507                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4508               HOST_WIDE_INT niter_b
4509                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4510               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4511
4512               /* (X0, Y0) is a solution of the Diophantine equation:
4513                  "chrec_a (X0) = chrec_b (Y0)".  */
4514               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4515                                         CEIL (-j0, j1));
4516               HOST_WIDE_INT x0 = i1 * tau1 + i0;
4517               HOST_WIDE_INT y0 = j1 * tau1 + j0;
4518
4519               /* (X1, Y1) is the smallest positive solution of the eq
4520                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4521                  first conflict occurs.  */
4522               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4523               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4524               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4525
4526               if (niter > 0)
4527                 {
4528                   /* If the overlap occurs outside of the bounds of the
4529                      loop, there is no dependence.  */
4530                   if (x1 >= niter_a || y1 >= niter_b)
4531                     {
4532                       *overlaps_a = conflict_fn_no_dependence ();
4533                       *overlaps_b = conflict_fn_no_dependence ();
4534                       *last_conflicts = integer_zero_node;
4535                       goto end_analyze_subs_aa;
4536                     }
4537
4538                   /* max stmt executions can get quite large, avoid
4539                      overflows by using wide ints here.  */
4540                   widest_int tau2
4541                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4542                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4543                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4544                   if (wi::min_precision (last_conflict, SIGNED)
4545                       <= TYPE_PRECISION (integer_type_node))
4546                     *last_conflicts
4547                        = build_int_cst (integer_type_node,
4548                                         last_conflict.to_shwi ());
4549                   else
4550                     *last_conflicts = chrec_dont_know;
4551                 }
4552               else
4553                 *last_conflicts = chrec_dont_know;
4554
4555               *overlaps_a
4556                 = conflict_fn (1,
4557                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
4558                                                  1,
4559                                                  build_int_cst (NULL_TREE, i1)));
4560               *overlaps_b
4561                 = conflict_fn (1,
4562                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
4563                                                  1,
4564                                                  build_int_cst (NULL_TREE, j1)));
4565             }
4566           else
4567             {
4568               /* FIXME: For the moment, the upper bound of the
4569                  iteration domain for i and j is not checked.  */
4570               if (dump_file && (dump_flags & TDF_DETAILS))
4571                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4572               *overlaps_a = conflict_fn_not_known ();
4573               *overlaps_b = conflict_fn_not_known ();
4574               *last_conflicts = chrec_dont_know;
4575             }
4576         }
4577       else
4578         {
4579           if (dump_file && (dump_flags & TDF_DETAILS))
4580             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4581           *overlaps_a = conflict_fn_not_known ();
4582           *overlaps_b = conflict_fn_not_known ();
4583           *last_conflicts = chrec_dont_know;
4584         }
4585     }
4586   else
4587     {
4588       if (dump_file && (dump_flags & TDF_DETAILS))
4589         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4590       *overlaps_a = conflict_fn_not_known ();
4591       *overlaps_b = conflict_fn_not_known ();
4592       *last_conflicts = chrec_dont_know;
4593     }
4594
4595 end_analyze_subs_aa:
4596   obstack_free (&scratch_obstack, NULL);
4597   if (dump_file && (dump_flags & TDF_DETAILS))
4598     {
4599       fprintf (dump_file, "  (overlaps_a = ");
4600       dump_conflict_function (dump_file, *overlaps_a);
4601       fprintf (dump_file, ")\n  (overlaps_b = ");
4602       dump_conflict_function (dump_file, *overlaps_b);
4603       fprintf (dump_file, "))\n");
4604     }
4605 }
4606
4607 /* Returns true when analyze_subscript_affine_affine can be used for
4608    determining the dependence relation between chrec_a and chrec_b,
4609    that contain symbols.  This function modifies chrec_a and chrec_b
4610    such that the analysis result is the same, and such that they don't
4611    contain symbols, and then can safely be passed to the analyzer.
4612
4613    Example: The analysis of the following tuples of evolutions produce
4614    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4615    vs. {0, +, 1}_1
4616
4617    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4618    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4619 */
4620
4621 static bool
4622 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4623 {
4624   tree diff, type, left_a, left_b, right_b;
4625
4626   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4627       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4628     /* FIXME: For the moment not handled.  Might be refined later.  */
4629     return false;
4630
4631   type = chrec_type (*chrec_a);
4632   left_a = CHREC_LEFT (*chrec_a);
4633   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4634   diff = chrec_fold_minus (type, left_a, left_b);
4635
4636   if (!evolution_function_is_constant_p (diff))
4637     return false;
4638
4639   if (dump_file && (dump_flags & TDF_DETAILS))
4640     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4641
4642   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4643                                      diff, CHREC_RIGHT (*chrec_a));
4644   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4645   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4646                                      build_int_cst (type, 0),
4647                                      right_b);
4648   return true;
4649 }
4650
4651 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4652    *OVERLAPS_B are initialized to the functions that describe the
4653    relation between the elements accessed twice by CHREC_A and
4654    CHREC_B.  For k >= 0, the following property is verified:
4655
4656    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4657
4658 static void
4659 analyze_siv_subscript (tree chrec_a,
4660                        tree chrec_b,
4661                        conflict_function **overlaps_a,
4662                        conflict_function **overlaps_b,
4663                        tree *last_conflicts,
4664                        int loop_nest_num)
4665 {
4666   dependence_stats.num_siv++;
4667
4668   if (dump_file && (dump_flags & TDF_DETAILS))
4669     fprintf (dump_file, "(analyze_siv_subscript \n");
4670
4671   if (evolution_function_is_constant_p (chrec_a)
4672       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4673     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4674                                       overlaps_a, overlaps_b, last_conflicts);
4675
4676   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4677            && evolution_function_is_constant_p (chrec_b))
4678     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4679                                       overlaps_b, overlaps_a, last_conflicts);
4680
4681   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4682            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4683     {
4684       if (!chrec_contains_symbols (chrec_a)
4685           && !chrec_contains_symbols (chrec_b))
4686         {
4687           analyze_subscript_affine_affine (chrec_a, chrec_b,
4688                                            overlaps_a, overlaps_b,
4689                                            last_conflicts);
4690
4691           if (CF_NOT_KNOWN_P (*overlaps_a)
4692               || CF_NOT_KNOWN_P (*overlaps_b))
4693             dependence_stats.num_siv_unimplemented++;
4694           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4695                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4696             dependence_stats.num_siv_independent++;
4697           else
4698             dependence_stats.num_siv_dependent++;
4699         }
4700       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4701                                                         &chrec_b))
4702         {
4703           analyze_subscript_affine_affine (chrec_a, chrec_b,
4704                                            overlaps_a, overlaps_b,
4705                                            last_conflicts);
4706
4707           if (CF_NOT_KNOWN_P (*overlaps_a)
4708               || CF_NOT_KNOWN_P (*overlaps_b))
4709             dependence_stats.num_siv_unimplemented++;
4710           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4711                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4712             dependence_stats.num_siv_independent++;
4713           else
4714             dependence_stats.num_siv_dependent++;
4715         }
4716       else
4717         goto siv_subscript_dontknow;
4718     }
4719
4720   else
4721     {
4722     siv_subscript_dontknow:;
4723       if (dump_file && (dump_flags & TDF_DETAILS))
4724         fprintf (dump_file, "  siv test failed: unimplemented");
4725       *overlaps_a = conflict_fn_not_known ();
4726       *overlaps_b = conflict_fn_not_known ();
4727       *last_conflicts = chrec_dont_know;
4728       dependence_stats.num_siv_unimplemented++;
4729     }
4730
4731   if (dump_file && (dump_flags & TDF_DETAILS))
4732     fprintf (dump_file, ")\n");
4733 }
4734
4735 /* Returns false if we can prove that the greatest common divisor of the steps
4736    of CHREC does not divide CST, false otherwise.  */
4737
4738 static bool
4739 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4740 {
4741   HOST_WIDE_INT cd = 0, val;
4742   tree step;
4743
4744   if (!tree_fits_shwi_p (cst))
4745     return true;
4746   val = tree_to_shwi (cst);
4747
4748   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4749     {
4750       step = CHREC_RIGHT (chrec);
4751       if (!tree_fits_shwi_p (step))
4752         return true;
4753       cd = gcd (cd, tree_to_shwi (step));
4754       chrec = CHREC_LEFT (chrec);
4755     }
4756
4757   return val % cd == 0;
4758 }
4759
4760 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4761    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4762    functions that describe the relation between the elements accessed
4763    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4764    is verified:
4765
4766    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4767
4768 static void
4769 analyze_miv_subscript (tree chrec_a,
4770                        tree chrec_b,
4771                        conflict_function **overlaps_a,
4772                        conflict_function **overlaps_b,
4773                        tree *last_conflicts,
4774                        class loop *loop_nest)
4775 {
4776   tree type, difference;
4777
4778   dependence_stats.num_miv++;
4779   if (dump_file && (dump_flags & TDF_DETAILS))
4780     fprintf (dump_file, "(analyze_miv_subscript \n");
4781
4782   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4783   chrec_a = chrec_convert (type, chrec_a, NULL);
4784   chrec_b = chrec_convert (type, chrec_b, NULL);
4785   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4786
4787   if (eq_evolutions_p (chrec_a, chrec_b))
4788     {
4789       /* Access functions are the same: all the elements are accessed
4790          in the same order.  */
4791       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4792       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4793       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4794       dependence_stats.num_miv_dependent++;
4795     }
4796
4797   else if (evolution_function_is_constant_p (difference)
4798            && evolution_function_is_affine_multivariate_p (chrec_a,
4799                                                            loop_nest->num)
4800            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4801     {
4802       /* testsuite/.../ssa-chrec-33.c
4803          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4804
4805          The difference is 1, and all the evolution steps are multiples
4806          of 2, consequently there are no overlapping elements.  */
4807       *overlaps_a = conflict_fn_no_dependence ();
4808       *overlaps_b = conflict_fn_no_dependence ();
4809       *last_conflicts = integer_zero_node;
4810       dependence_stats.num_miv_independent++;
4811     }
4812
4813   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4814            && !chrec_contains_symbols (chrec_a, loop_nest)
4815            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4816            && !chrec_contains_symbols (chrec_b, loop_nest))
4817     {
4818       /* testsuite/.../ssa-chrec-35.c
4819          {0, +, 1}_2  vs.  {0, +, 1}_3
4820          the overlapping elements are respectively located at iterations:
4821          {0, +, 1}_x and {0, +, 1}_x,
4822          in other words, we have the equality:
4823          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4824
4825          Other examples:
4826          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4827          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4828
4829          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4830          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4831       */
4832       analyze_subscript_affine_affine (chrec_a, chrec_b,
4833                                        overlaps_a, overlaps_b, last_conflicts);
4834
4835       if (CF_NOT_KNOWN_P (*overlaps_a)
4836           || CF_NOT_KNOWN_P (*overlaps_b))
4837         dependence_stats.num_miv_unimplemented++;
4838       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4839                || CF_NO_DEPENDENCE_P (*overlaps_b))
4840         dependence_stats.num_miv_independent++;
4841       else
4842         dependence_stats.num_miv_dependent++;
4843     }
4844
4845   else
4846     {
4847       /* When the analysis is too difficult, answer "don't know".  */
4848       if (dump_file && (dump_flags & TDF_DETAILS))
4849         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4850
4851       *overlaps_a = conflict_fn_not_known ();
4852       *overlaps_b = conflict_fn_not_known ();
4853       *last_conflicts = chrec_dont_know;
4854       dependence_stats.num_miv_unimplemented++;
4855     }
4856
4857   if (dump_file && (dump_flags & TDF_DETAILS))
4858     fprintf (dump_file, ")\n");
4859 }
4860
4861 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4862    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4863    OVERLAP_ITERATIONS_B are initialized with two functions that
4864    describe the iterations that contain conflicting elements.
4865
4866    Remark: For an integer k >= 0, the following equality is true:
4867
4868    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4869 */
4870
4871 static void
4872 analyze_overlapping_iterations (tree chrec_a,
4873                                 tree chrec_b,
4874                                 conflict_function **overlap_iterations_a,
4875                                 conflict_function **overlap_iterations_b,
4876                                 tree *last_conflicts, class loop *loop_nest)
4877 {
4878   unsigned int lnn = loop_nest->num;
4879
4880   dependence_stats.num_subscript_tests++;
4881
4882   if (dump_file && (dump_flags & TDF_DETAILS))
4883     {
4884       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4885       fprintf (dump_file, "  (chrec_a = ");
4886       print_generic_expr (dump_file, chrec_a);
4887       fprintf (dump_file, ")\n  (chrec_b = ");
4888       print_generic_expr (dump_file, chrec_b);
4889       fprintf (dump_file, ")\n");
4890     }
4891
4892   if (chrec_a == NULL_TREE
4893       || chrec_b == NULL_TREE
4894       || chrec_contains_undetermined (chrec_a)
4895       || chrec_contains_undetermined (chrec_b))
4896     {
4897       dependence_stats.num_subscript_undetermined++;
4898
4899       *overlap_iterations_a = conflict_fn_not_known ();
4900       *overlap_iterations_b = conflict_fn_not_known ();
4901     }
4902
4903   /* If they are the same chrec, and are affine, they overlap
4904      on every iteration.  */
4905   else if (eq_evolutions_p (chrec_a, chrec_b)
4906            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4907                || operand_equal_p (chrec_a, chrec_b, 0)))
4908     {
4909       dependence_stats.num_same_subscript_function++;
4910       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4911       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4912       *last_conflicts = chrec_dont_know;
4913     }
4914
4915   /* If they aren't the same, and aren't affine, we can't do anything
4916      yet.  */
4917   else if ((chrec_contains_symbols (chrec_a)
4918             || chrec_contains_symbols (chrec_b))
4919            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4920                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4921     {
4922       dependence_stats.num_subscript_undetermined++;
4923       *overlap_iterations_a = conflict_fn_not_known ();
4924       *overlap_iterations_b = conflict_fn_not_known ();
4925     }
4926
4927   else if (ziv_subscript_p (chrec_a, chrec_b))
4928     analyze_ziv_subscript (chrec_a, chrec_b,
4929                            overlap_iterations_a, overlap_iterations_b,
4930                            last_conflicts);
4931
4932   else if (siv_subscript_p (chrec_a, chrec_b))
4933     analyze_siv_subscript (chrec_a, chrec_b,
4934                            overlap_iterations_a, overlap_iterations_b,
4935                            last_conflicts, lnn);
4936
4937   else
4938     analyze_miv_subscript (chrec_a, chrec_b,
4939                            overlap_iterations_a, overlap_iterations_b,
4940                            last_conflicts, loop_nest);
4941
4942   if (dump_file && (dump_flags & TDF_DETAILS))
4943     {
4944       fprintf (dump_file, "  (overlap_iterations_a = ");
4945       dump_conflict_function (dump_file, *overlap_iterations_a);
4946       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4947       dump_conflict_function (dump_file, *overlap_iterations_b);
4948       fprintf (dump_file, "))\n");
4949     }
4950 }
4951
4952 /* Helper function for uniquely inserting distance vectors.  */
4953
4954 static void
4955 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4956 {
4957   for (lambda_vector v : DDR_DIST_VECTS (ddr))
4958     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4959       return;
4960
4961   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4962 }
4963
4964 /* Helper function for uniquely inserting direction vectors.  */
4965
4966 static void
4967 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4968 {
4969   for (lambda_vector v : DDR_DIR_VECTS (ddr))
4970     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4971       return;
4972
4973   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4974 }
4975
4976 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4977    haven't yet determined a distance for this outer loop, push a new
4978    distance vector composed of the previous distance, and a distance
4979    of 1 for this outer loop.  Example:
4980
4981    | loop_1
4982    |   loop_2
4983    |     A[10]
4984    |   endloop_2
4985    | endloop_1
4986
4987    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4988    save (0, 1), then we have to save (1, 0).  */
4989
4990 static void
4991 add_outer_distances (struct data_dependence_relation *ddr,
4992                      lambda_vector dist_v, int index)
4993 {
4994   /* For each outer loop where init_v is not set, the accesses are
4995      in dependence of distance 1 in the loop.  */
4996   while (--index >= 0)
4997     {
4998       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4999       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5000       save_v[index] = 1;
5001       save_dist_v (ddr, save_v);
5002     }
5003 }
5004
5005 /* Return false when fail to represent the data dependence as a
5006    distance vector.  A_INDEX is the index of the first reference
5007    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5008    second reference.  INIT_B is set to true when a component has been
5009    added to the distance vector DIST_V.  INDEX_CARRY is then set to
5010    the index in DIST_V that carries the dependence.  */
5011
5012 static bool
5013 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5014                              unsigned int a_index, unsigned int b_index,
5015                              lambda_vector dist_v, bool *init_b,
5016                              int *index_carry)
5017 {
5018   unsigned i;
5019   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5020   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5021
5022   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5023     {
5024       tree access_fn_a, access_fn_b;
5025       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5026
5027       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5028         {
5029           non_affine_dependence_relation (ddr);
5030           return false;
5031         }
5032
5033       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5034       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5035
5036       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5037           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5038         {
5039           HOST_WIDE_INT dist;
5040           int index;
5041           int var_a = CHREC_VARIABLE (access_fn_a);
5042           int var_b = CHREC_VARIABLE (access_fn_b);
5043
5044           if (var_a != var_b
5045               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5046             {
5047               non_affine_dependence_relation (ddr);
5048               return false;
5049             }
5050
5051           /* When data references are collected in a loop while data
5052              dependences are analyzed in loop nest nested in the loop, we
5053              would have more number of access functions than number of
5054              loops.  Skip access functions of loops not in the loop nest.
5055
5056              See PR89725 for more information.  */
5057           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5058             continue;
5059
5060           dist = int_cst_value (SUB_DISTANCE (subscript));
5061           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5062           *index_carry = MIN (index, *index_carry);
5063
5064           /* This is the subscript coupling test.  If we have already
5065              recorded a distance for this loop (a distance coming from
5066              another subscript), it should be the same.  For example,
5067              in the following code, there is no dependence:
5068
5069              | loop i = 0, N, 1
5070              |   T[i+1][i] = ...
5071              |   ... = T[i][i]
5072              | endloop
5073           */
5074           if (init_v[index] != 0 && dist_v[index] != dist)
5075             {
5076               finalize_ddr_dependent (ddr, chrec_known);
5077               return false;
5078             }
5079
5080           dist_v[index] = dist;
5081           init_v[index] = 1;
5082           *init_b = true;
5083         }
5084       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5085         {
5086           /* This can be for example an affine vs. constant dependence
5087              (T[i] vs. T[3]) that is not an affine dependence and is
5088              not representable as a distance vector.  */
5089           non_affine_dependence_relation (ddr);
5090           return false;
5091         }
5092       else
5093         *init_b = true;
5094     }
5095
5096   return true;
5097 }
5098
5099 /* Return true when the DDR contains only invariant access functions wrto. loop
5100    number LNUM.  */
5101
5102 static bool
5103 invariant_access_functions (const struct data_dependence_relation *ddr,
5104                             int lnum)
5105 {
5106   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5107     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5108         || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5109       return false;
5110
5111   return true;
5112 }
5113
5114 /* Helper function for the case where DDR_A and DDR_B are the same
5115    multivariate access function with a constant step.  For an example
5116    see pr34635-1.c.  */
5117
5118 static void
5119 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5120 {
5121   int x_1, x_2;
5122   tree c_1 = CHREC_LEFT (c_2);
5123   tree c_0 = CHREC_LEFT (c_1);
5124   lambda_vector dist_v;
5125   HOST_WIDE_INT v1, v2, cd;
5126
5127   /* Polynomials with more than 2 variables are not handled yet.  When
5128      the evolution steps are parameters, it is not possible to
5129      represent the dependence using classical distance vectors.  */
5130   if (TREE_CODE (c_0) != INTEGER_CST
5131       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5132       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5133     {
5134       DDR_AFFINE_P (ddr) = false;
5135       return;
5136     }
5137
5138   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5139   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5140
5141   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5142   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5143   v1 = int_cst_value (CHREC_RIGHT (c_1));
5144   v2 = int_cst_value (CHREC_RIGHT (c_2));
5145   cd = gcd (v1, v2);
5146   v1 /= cd;
5147   v2 /= cd;
5148
5149   if (v2 < 0)
5150     {
5151       v2 = -v2;
5152       v1 = -v1;
5153     }
5154
5155   dist_v[x_1] = v2;
5156   dist_v[x_2] = -v1;
5157   save_dist_v (ddr, dist_v);
5158
5159   add_outer_distances (ddr, dist_v, x_1);
5160 }
5161
5162 /* Helper function for the case where DDR_A and DDR_B are the same
5163    access functions.  */
5164
5165 static void
5166 add_other_self_distances (struct data_dependence_relation *ddr)
5167 {
5168   lambda_vector dist_v;
5169   unsigned i;
5170   int index_carry = DDR_NB_LOOPS (ddr);
5171   subscript *sub;
5172   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5173
5174   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5175     {
5176       tree access_fun = SUB_ACCESS_FN (sub, 0);
5177
5178       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5179         {
5180           if (!evolution_function_is_univariate_p (access_fun, loop->num))
5181             {
5182               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5183                 {
5184                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5185                   return;
5186                 }
5187
5188               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5189
5190               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5191                 add_multivariate_self_dist (ddr, access_fun);
5192               else
5193                 /* The evolution step is not constant: it varies in
5194                    the outer loop, so this cannot be represented by a
5195                    distance vector.  For example in pr34635.c the
5196                    evolution is {0, +, {0, +, 4}_1}_2.  */
5197                 DDR_AFFINE_P (ddr) = false;
5198
5199               return;
5200             }
5201
5202           /* When data references are collected in a loop while data
5203              dependences are analyzed in loop nest nested in the loop, we
5204              would have more number of access functions than number of
5205              loops.  Skip access functions of loops not in the loop nest.
5206
5207              See PR89725 for more information.  */
5208           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5209                                   loop))
5210             continue;
5211
5212           index_carry = MIN (index_carry,
5213                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
5214                                                  DDR_LOOP_NEST (ddr)));
5215         }
5216     }
5217
5218   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5219   add_outer_distances (ddr, dist_v, index_carry);
5220 }
5221
5222 static void
5223 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5224 {
5225   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5226
5227   dist_v[0] = 1;
5228   save_dist_v (ddr, dist_v);
5229 }
5230
5231 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5232    is the case for example when access functions are the same and
5233    equal to a constant, as in:
5234
5235    | loop_1
5236    |   A[3] = ...
5237    |   ... = A[3]
5238    | endloop_1
5239
5240    in which case the distance vectors are (0) and (1).  */
5241
5242 static void
5243 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5244 {
5245   unsigned i, j;
5246
5247   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5248     {
5249       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5250       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5251       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5252
5253       for (j = 0; j < ca->n; j++)
5254         if (affine_function_zero_p (ca->fns[j]))
5255           {
5256             insert_innermost_unit_dist_vector (ddr);
5257             return;
5258           }
5259
5260       for (j = 0; j < cb->n; j++)
5261         if (affine_function_zero_p (cb->fns[j]))
5262           {
5263             insert_innermost_unit_dist_vector (ddr);
5264             return;
5265           }
5266     }
5267 }
5268
5269 /* Return true when the DDR contains two data references that have the
5270    same access functions.  */
5271
5272 static inline bool
5273 same_access_functions (const struct data_dependence_relation *ddr)
5274 {
5275   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5276     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5277                           SUB_ACCESS_FN (sub, 1)))
5278       return false;
5279
5280   return true;
5281 }
5282
5283 /* Compute the classic per loop distance vector.  DDR is the data
5284    dependence relation to build a vector from.  Return false when fail
5285    to represent the data dependence as a distance vector.  */
5286
5287 static bool
5288 build_classic_dist_vector (struct data_dependence_relation *ddr,
5289                            class loop *loop_nest)
5290 {
5291   bool init_b = false;
5292   int index_carry = DDR_NB_LOOPS (ddr);
5293   lambda_vector dist_v;
5294
5295   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5296     return false;
5297
5298   if (same_access_functions (ddr))
5299     {
5300       /* Save the 0 vector.  */
5301       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5302       save_dist_v (ddr, dist_v);
5303
5304       if (invariant_access_functions (ddr, loop_nest->num))
5305         add_distance_for_zero_overlaps (ddr);
5306
5307       if (DDR_NB_LOOPS (ddr) > 1)
5308         add_other_self_distances (ddr);
5309
5310       return true;
5311     }
5312
5313   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5314   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5315     return false;
5316
5317   /* Save the distance vector if we initialized one.  */
5318   if (init_b)
5319     {
5320       /* Verify a basic constraint: classic distance vectors should
5321          always be lexicographically positive.
5322
5323          Data references are collected in the order of execution of
5324          the program, thus for the following loop
5325
5326          | for (i = 1; i < 100; i++)
5327          |   for (j = 1; j < 100; j++)
5328          |     {
5329          |       t = T[j+1][i-1];  // A
5330          |       T[j][i] = t + 2;  // B
5331          |     }
5332
5333          references are collected following the direction of the wind:
5334          A then B.  The data dependence tests are performed also
5335          following this order, such that we're looking at the distance
5336          separating the elements accessed by A from the elements later
5337          accessed by B.  But in this example, the distance returned by
5338          test_dep (A, B) is lexicographically negative (-1, 1), that
5339          means that the access A occurs later than B with respect to
5340          the outer loop, ie. we're actually looking upwind.  In this
5341          case we solve test_dep (B, A) looking downwind to the
5342          lexicographically positive solution, that returns the
5343          distance vector (1, -1).  */
5344       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5345         {
5346           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5347           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5348             return false;
5349           compute_subscript_distance (ddr);
5350           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5351                                             &index_carry))
5352             return false;
5353           save_dist_v (ddr, save_v);
5354           DDR_REVERSED_P (ddr) = true;
5355
5356           /* In this case there is a dependence forward for all the
5357              outer loops:
5358
5359              | for (k = 1; k < 100; k++)
5360              |  for (i = 1; i < 100; i++)
5361              |   for (j = 1; j < 100; j++)
5362              |     {
5363              |       t = T[j+1][i-1];  // A
5364              |       T[j][i] = t + 2;  // B
5365              |     }
5366
5367              the vectors are:
5368              (0,  1, -1)
5369              (1,  1, -1)
5370              (1, -1,  1)
5371           */
5372           if (DDR_NB_LOOPS (ddr) > 1)
5373             {
5374               add_outer_distances (ddr, save_v, index_carry);
5375               add_outer_distances (ddr, dist_v, index_carry);
5376             }
5377         }
5378       else
5379         {
5380           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5381           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5382
5383           if (DDR_NB_LOOPS (ddr) > 1)
5384             {
5385               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5386
5387               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5388                 return false;
5389               compute_subscript_distance (ddr);
5390               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5391                                                 &index_carry))
5392                 return false;
5393
5394               save_dist_v (ddr, save_v);
5395               add_outer_distances (ddr, dist_v, index_carry);
5396               add_outer_distances (ddr, opposite_v, index_carry);
5397             }
5398           else
5399             save_dist_v (ddr, save_v);
5400         }
5401     }
5402   else
5403     {
5404       /* There is a distance of 1 on all the outer loops: Example:
5405          there is a dependence of distance 1 on loop_1 for the array A.
5406
5407          | loop_1
5408          |   A[5] = ...
5409          | endloop
5410       */
5411       add_outer_distances (ddr, dist_v,
5412                            lambda_vector_first_nz (dist_v,
5413                                                    DDR_NB_LOOPS (ddr), 0));
5414     }
5415
5416   if (dump_file && (dump_flags & TDF_DETAILS))
5417     {
5418       unsigned i;
5419
5420       fprintf (dump_file, "(build_classic_dist_vector\n");
5421       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5422         {
5423           fprintf (dump_file, "  dist_vector = (");
5424           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5425                                DDR_NB_LOOPS (ddr));
5426           fprintf (dump_file, "  )\n");
5427         }
5428       fprintf (dump_file, ")\n");
5429     }
5430
5431   return true;
5432 }
5433
5434 /* Return the direction for a given distance.
5435    FIXME: Computing dir this way is suboptimal, since dir can catch
5436    cases that dist is unable to represent.  */
5437
5438 static inline enum data_dependence_direction
5439 dir_from_dist (int dist)
5440 {
5441   if (dist > 0)
5442     return dir_positive;
5443   else if (dist < 0)
5444     return dir_negative;
5445   else
5446     return dir_equal;
5447 }
5448
5449 /* Compute the classic per loop direction vector.  DDR is the data
5450    dependence relation to build a vector from.  */
5451
5452 static void
5453 build_classic_dir_vector (struct data_dependence_relation *ddr)
5454 {
5455   unsigned i, j;
5456   lambda_vector dist_v;
5457
5458   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5459     {
5460       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5461
5462       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5463         dir_v[j] = dir_from_dist (dist_v[j]);
5464
5465       save_dir_v (ddr, dir_v);
5466     }
5467 }
5468
5469 /* Helper function.  Returns true when there is a dependence between the
5470    data references.  A_INDEX is the index of the first reference (0 for
5471    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5472
5473 static bool
5474 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5475                                unsigned int a_index, unsigned int b_index,
5476                                class loop *loop_nest)
5477 {
5478   unsigned int i;
5479   tree last_conflicts;
5480   struct subscript *subscript;
5481   tree res = NULL_TREE;
5482
5483   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5484     {
5485       conflict_function *overlaps_a, *overlaps_b;
5486
5487       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5488                                       SUB_ACCESS_FN (subscript, b_index),
5489                                       &overlaps_a, &overlaps_b,
5490                                       &last_conflicts, loop_nest);
5491
5492       if (SUB_CONFLICTS_IN_A (subscript))
5493         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5494       if (SUB_CONFLICTS_IN_B (subscript))
5495         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5496
5497       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5498       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5499       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5500
5501       /* If there is any undetermined conflict function we have to
5502          give a conservative answer in case we cannot prove that
5503          no dependence exists when analyzing another subscript.  */
5504       if (CF_NOT_KNOWN_P (overlaps_a)
5505           || CF_NOT_KNOWN_P (overlaps_b))
5506         {
5507           res = chrec_dont_know;
5508           continue;
5509         }
5510
5511       /* When there is a subscript with no dependence we can stop.  */
5512       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5513                || CF_NO_DEPENDENCE_P (overlaps_b))
5514         {
5515           res = chrec_known;
5516           break;
5517         }
5518     }
5519
5520   if (res == NULL_TREE)
5521     return true;
5522
5523   if (res == chrec_known)
5524     dependence_stats.num_dependence_independent++;
5525   else
5526     dependence_stats.num_dependence_undetermined++;
5527   finalize_ddr_dependent (ddr, res);
5528   return false;
5529 }
5530
5531 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5532
5533 static void
5534 subscript_dependence_tester (struct data_dependence_relation *ddr,
5535                              class loop *loop_nest)
5536 {
5537   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5538     dependence_stats.num_dependence_dependent++;
5539
5540   compute_subscript_distance (ddr);
5541   if (build_classic_dist_vector (ddr, loop_nest))
5542     build_classic_dir_vector (ddr);
5543 }
5544
5545 /* Returns true when all the access functions of A are affine or
5546    constant with respect to LOOP_NEST.  */
5547
5548 static bool
5549 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5550                                            const class loop *loop_nest)
5551 {
5552   vec<tree> fns = DR_ACCESS_FNS (a);
5553   for (tree t : fns)
5554     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5555         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5556       return false;
5557
5558   return true;
5559 }
5560
5561 /* This computes the affine dependence relation between A and B with
5562    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5563    independence between two accesses, while CHREC_DONT_KNOW is used
5564    for representing the unknown relation.
5565
5566    Note that it is possible to stop the computation of the dependence
5567    relation the first time we detect a CHREC_KNOWN element for a given
5568    subscript.  */
5569
5570 void
5571 compute_affine_dependence (struct data_dependence_relation *ddr,
5572                            class loop *loop_nest)
5573 {
5574   struct data_reference *dra = DDR_A (ddr);
5575   struct data_reference *drb = DDR_B (ddr);
5576
5577   if (dump_file && (dump_flags & TDF_DETAILS))
5578     {
5579       fprintf (dump_file, "(compute_affine_dependence\n");
5580       fprintf (dump_file, "  ref_a: ");
5581       print_generic_expr (dump_file, DR_REF (dra));
5582       fprintf (dump_file, ", stmt_a: ");
5583       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5584       fprintf (dump_file, "  ref_b: ");
5585       print_generic_expr (dump_file, DR_REF (drb));
5586       fprintf (dump_file, ", stmt_b: ");
5587       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5588     }
5589
5590   /* Analyze only when the dependence relation is not yet known.  */
5591   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5592     {
5593       dependence_stats.num_dependence_tests++;
5594
5595       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5596           && access_functions_are_affine_or_constant_p (drb, loop_nest))
5597         subscript_dependence_tester (ddr, loop_nest);
5598
5599       /* As a last case, if the dependence cannot be determined, or if
5600          the dependence is considered too difficult to determine, answer
5601          "don't know".  */
5602       else
5603         {
5604           dependence_stats.num_dependence_undetermined++;
5605
5606           if (dump_file && (dump_flags & TDF_DETAILS))
5607             {
5608               fprintf (dump_file, "Data ref a:\n");
5609               dump_data_reference (dump_file, dra);
5610               fprintf (dump_file, "Data ref b:\n");
5611               dump_data_reference (dump_file, drb);
5612               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5613             }
5614           finalize_ddr_dependent (ddr, chrec_dont_know);
5615         }
5616     }
5617
5618   if (dump_file && (dump_flags & TDF_DETAILS))
5619     {
5620       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5621         fprintf (dump_file, ") -> no dependence\n");
5622       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5623         fprintf (dump_file, ") -> dependence analysis failed\n");
5624       else
5625         fprintf (dump_file, ")\n");
5626     }
5627 }
5628
5629 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5630    the data references in DATAREFS, in the LOOP_NEST.  When
5631    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5632    relations.  Return true when successful, i.e. data references number
5633    is small enough to be handled.  */
5634
5635 bool
5636 compute_all_dependences (const vec<data_reference_p> &datarefs,
5637                          vec<ddr_p> *dependence_relations,
5638                          const vec<loop_p> &loop_nest,
5639                          bool compute_self_and_rr)
5640 {
5641   struct data_dependence_relation *ddr;
5642   struct data_reference *a, *b;
5643   unsigned int i, j;
5644
5645   if ((int) datarefs.length ()
5646       > param_loop_max_datarefs_for_datadeps)
5647     {
5648       struct data_dependence_relation *ddr;
5649
5650       /* Insert a single relation into dependence_relations:
5651          chrec_dont_know.  */
5652       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5653       dependence_relations->safe_push (ddr);
5654       return false;
5655     }
5656
5657   FOR_EACH_VEC_ELT (datarefs, i, a)
5658     for (j = i + 1; datarefs.iterate (j, &b); j++)
5659       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5660         {
5661           ddr = initialize_data_dependence_relation (a, b, loop_nest);
5662           dependence_relations->safe_push (ddr);
5663           if (loop_nest.exists ())
5664             compute_affine_dependence (ddr, loop_nest[0]);
5665         }
5666
5667   if (compute_self_and_rr)
5668     FOR_EACH_VEC_ELT (datarefs, i, a)
5669       {
5670         ddr = initialize_data_dependence_relation (a, a, loop_nest);
5671         dependence_relations->safe_push (ddr);
5672         if (loop_nest.exists ())
5673           compute_affine_dependence (ddr, loop_nest[0]);
5674       }
5675
5676   return true;
5677 }
5678
5679 /* Describes a location of a memory reference.  */
5680
5681 struct data_ref_loc
5682 {
5683   /* The memory reference.  */
5684   tree ref;
5685
5686   /* True if the memory reference is read.  */
5687   bool is_read;
5688
5689   /* True if the data reference is conditional within the containing
5690      statement, i.e. if it might not occur even when the statement
5691      is executed and runs to completion.  */
5692   bool is_conditional_in_stmt;
5693 };
5694
5695
5696 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5697    true if STMT clobbers memory, false otherwise.  */
5698
5699 static bool
5700 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5701 {
5702   bool clobbers_memory = false;
5703   data_ref_loc ref;
5704   tree op0, op1;
5705   enum gimple_code stmt_code = gimple_code (stmt);
5706
5707   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5708      As we cannot model data-references to not spelled out
5709      accesses give up if they may occur.  */
5710   if (stmt_code == GIMPLE_CALL
5711       && !(gimple_call_flags (stmt) & ECF_CONST))
5712     {
5713       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5714       if (gimple_call_internal_p (stmt))
5715         switch (gimple_call_internal_fn (stmt))
5716           {
5717           case IFN_GOMP_SIMD_LANE:
5718             {
5719               class loop *loop = gimple_bb (stmt)->loop_father;
5720               tree uid = gimple_call_arg (stmt, 0);
5721               gcc_assert (TREE_CODE (uid) == SSA_NAME);
5722               if (loop == NULL
5723                   || loop->simduid != SSA_NAME_VAR (uid))
5724                 clobbers_memory = true;
5725               break;
5726             }
5727           case IFN_MASK_LOAD:
5728           case IFN_MASK_STORE:
5729             break;
5730           default:
5731             clobbers_memory = true;
5732             break;
5733           }
5734       else
5735         clobbers_memory = true;
5736     }
5737   else if (stmt_code == GIMPLE_ASM
5738            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5739                || gimple_vuse (stmt)))
5740     clobbers_memory = true;
5741
5742   if (!gimple_vuse (stmt))
5743     return clobbers_memory;
5744
5745   if (stmt_code == GIMPLE_ASSIGN)
5746     {
5747       tree base;
5748       op0 = gimple_assign_lhs (stmt);
5749       op1 = gimple_assign_rhs1 (stmt);
5750
5751       if (DECL_P (op1)
5752           || (REFERENCE_CLASS_P (op1)
5753               && (base = get_base_address (op1))
5754               && TREE_CODE (base) != SSA_NAME
5755               && !is_gimple_min_invariant (base)))
5756         {
5757           ref.ref = op1;
5758           ref.is_read = true;
5759           ref.is_conditional_in_stmt = false;
5760           references->safe_push (ref);
5761         }
5762     }
5763   else if (stmt_code == GIMPLE_CALL)
5764     {
5765       unsigned i, n;
5766       tree ptr, type;
5767       unsigned int align;
5768
5769       ref.is_read = false;
5770       if (gimple_call_internal_p (stmt))
5771         switch (gimple_call_internal_fn (stmt))
5772           {
5773           case IFN_MASK_LOAD:
5774             if (gimple_call_lhs (stmt) == NULL_TREE)
5775               break;
5776             ref.is_read = true;
5777             /* FALLTHRU */
5778           case IFN_MASK_STORE:
5779             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5780             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5781             if (ref.is_read)
5782               type = TREE_TYPE (gimple_call_lhs (stmt));
5783             else
5784               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5785             if (TYPE_ALIGN (type) != align)
5786               type = build_aligned_type (type, align);
5787             ref.is_conditional_in_stmt = true;
5788             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5789                                    ptr);
5790             references->safe_push (ref);
5791             return false;
5792           default:
5793             break;
5794           }
5795
5796       op0 = gimple_call_lhs (stmt);
5797       n = gimple_call_num_args (stmt);
5798       for (i = 0; i < n; i++)
5799         {
5800           op1 = gimple_call_arg (stmt, i);
5801
5802           if (DECL_P (op1)
5803               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5804             {
5805               ref.ref = op1;
5806               ref.is_read = true;
5807               ref.is_conditional_in_stmt = false;
5808               references->safe_push (ref);
5809             }
5810         }
5811     }
5812   else
5813     return clobbers_memory;
5814
5815   if (op0
5816       && (DECL_P (op0)
5817           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5818     {
5819       ref.ref = op0;
5820       ref.is_read = false;
5821       ref.is_conditional_in_stmt = false;
5822       references->safe_push (ref);
5823     }
5824   return clobbers_memory;
5825 }
5826
5827
5828 /* Returns true if the loop-nest has any data reference.  */
5829
5830 bool
5831 loop_nest_has_data_refs (loop_p loop)
5832 {
5833   basic_block *bbs = get_loop_body (loop);
5834   auto_vec<data_ref_loc, 3> references;
5835
5836   for (unsigned i = 0; i < loop->num_nodes; i++)
5837     {
5838       basic_block bb = bbs[i];
5839       gimple_stmt_iterator bsi;
5840
5841       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5842         {
5843           gimple *stmt = gsi_stmt (bsi);
5844           get_references_in_stmt (stmt, &references);
5845           if (references.length ())
5846             {
5847               free (bbs);
5848               return true;
5849             }
5850         }
5851     }
5852   free (bbs);
5853   return false;
5854 }
5855
5856 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5857    reference, returns false, otherwise returns true.  NEST is the outermost
5858    loop of the loop nest in which the references should be analyzed.  */
5859
5860 opt_result
5861 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5862                               vec<data_reference_p> *datarefs)
5863 {
5864   auto_vec<data_ref_loc, 2> references;
5865   data_reference_p dr;
5866
5867   if (get_references_in_stmt (stmt, &references))
5868     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5869                                    stmt);
5870
5871   for (const data_ref_loc &ref : references)
5872     {
5873       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5874                             loop_containing_stmt (stmt), ref.ref,
5875                             stmt, ref.is_read, ref.is_conditional_in_stmt);
5876       gcc_assert (dr != NULL);
5877       datarefs->safe_push (dr);
5878     }
5879
5880   return opt_result::success ();
5881 }
5882
5883 /* Stores the data references in STMT to DATAREFS.  If there is an
5884    unanalyzable reference, returns false, otherwise returns true.
5885    NEST is the outermost loop of the loop nest in which the references
5886    should be instantiated, LOOP is the loop in which the references
5887    should be analyzed.  */
5888
5889 bool
5890 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5891                                        vec<data_reference_p> *datarefs)
5892 {
5893   auto_vec<data_ref_loc, 2> references;
5894   bool ret = true;
5895   data_reference_p dr;
5896
5897   if (get_references_in_stmt (stmt, &references))
5898     return false;
5899
5900   for (const data_ref_loc &ref : references)
5901     {
5902       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5903                             ref.is_conditional_in_stmt);
5904       gcc_assert (dr != NULL);
5905       datarefs->safe_push (dr);
5906     }
5907
5908   return ret;
5909 }
5910
5911 /* Search the data references in LOOP, and record the information into
5912    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5913    difficult case, returns NULL_TREE otherwise.  */
5914
5915 tree
5916 find_data_references_in_bb (class loop *loop, basic_block bb,
5917                             vec<data_reference_p> *datarefs)
5918 {
5919   gimple_stmt_iterator bsi;
5920
5921   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5922     {
5923       gimple *stmt = gsi_stmt (bsi);
5924
5925       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5926         {
5927           struct data_reference *res;
5928           res = XCNEW (struct data_reference);
5929           datarefs->safe_push (res);
5930
5931           return chrec_dont_know;
5932         }
5933     }
5934
5935   return NULL_TREE;
5936 }
5937
5938 /* Search the data references in LOOP, and record the information into
5939    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5940    difficult case, returns NULL_TREE otherwise.
5941
5942    TODO: This function should be made smarter so that it can handle address
5943    arithmetic as if they were array accesses, etc.  */
5944
5945 tree
5946 find_data_references_in_loop (class loop *loop,
5947                               vec<data_reference_p> *datarefs)
5948 {
5949   basic_block bb, *bbs;
5950   unsigned int i;
5951
5952   bbs = get_loop_body_in_dom_order (loop);
5953
5954   for (i = 0; i < loop->num_nodes; i++)
5955     {
5956       bb = bbs[i];
5957
5958       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5959         {
5960           free (bbs);
5961           return chrec_dont_know;
5962         }
5963     }
5964   free (bbs);
5965
5966   return NULL_TREE;
5967 }
5968
5969 /* Return the alignment in bytes that DRB is guaranteed to have at all
5970    times.  */
5971
5972 unsigned int
5973 dr_alignment (innermost_loop_behavior *drb)
5974 {
5975   /* Get the alignment of BASE_ADDRESS + INIT.  */
5976   unsigned int alignment = drb->base_alignment;
5977   unsigned int misalignment = (drb->base_misalignment
5978                                + TREE_INT_CST_LOW (drb->init));
5979   if (misalignment != 0)
5980     alignment = MIN (alignment, misalignment & -misalignment);
5981
5982   /* Cap it to the alignment of OFFSET.  */
5983   if (!integer_zerop (drb->offset))
5984     alignment = MIN (alignment, drb->offset_alignment);
5985
5986   /* Cap it to the alignment of STEP.  */
5987   if (!integer_zerop (drb->step))
5988     alignment = MIN (alignment, drb->step_alignment);
5989
5990   return alignment;
5991 }
5992
5993 /* If BASE is a pointer-typed SSA name, try to find the object that it
5994    is based on.  Return this object X on success and store the alignment
5995    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
5996
5997 static tree
5998 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5999 {
6000   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6001     return NULL_TREE;
6002
6003   gimple *def = SSA_NAME_DEF_STMT (base);
6004   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6005
6006   /* Peel chrecs and record the minimum alignment preserved by
6007      all steps.  */
6008   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6009   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6010     {
6011       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6012       alignment = MIN (alignment, step_alignment);
6013       base = CHREC_LEFT (base);
6014     }
6015
6016   /* Punt if the expression is too complicated to handle.  */
6017   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6018     return NULL_TREE;
6019
6020   /* The only useful cases are those for which a dereference folds to something
6021      other than an INDIRECT_REF.  */
6022   tree ref_type = TREE_TYPE (TREE_TYPE (base));
6023   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6024   if (!ref)
6025     return NULL_TREE;
6026
6027   /* Analyze the base to which the steps we peeled were applied.  */
6028   poly_int64 bitsize, bitpos, bytepos;
6029   machine_mode mode;
6030   int unsignedp, reversep, volatilep;
6031   tree offset;
6032   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6033                               &unsignedp, &reversep, &volatilep);
6034   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6035     return NULL_TREE;
6036
6037   /* Restrict the alignment to that guaranteed by the offsets.  */
6038   unsigned int bytepos_alignment = known_alignment (bytepos);
6039   if (bytepos_alignment != 0)
6040     alignment = MIN (alignment, bytepos_alignment);
6041   if (offset)
6042     {
6043       unsigned int offset_alignment = highest_pow2_factor (offset);
6044       alignment = MIN (alignment, offset_alignment);
6045     }
6046
6047   *alignment_out = alignment;
6048   return base;
6049 }
6050
6051 /* Return the object whose alignment would need to be changed in order
6052    to increase the alignment of ADDR.  Store the maximum achievable
6053    alignment in *MAX_ALIGNMENT.  */
6054
6055 tree
6056 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6057 {
6058   tree base = get_base_for_alignment_1 (addr, max_alignment);
6059   if (base)
6060     return base;
6061
6062   if (TREE_CODE (addr) == ADDR_EXPR)
6063     addr = TREE_OPERAND (addr, 0);
6064   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6065   return addr;
6066 }
6067
6068 /* Recursive helper function.  */
6069
6070 static bool
6071 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6072 {
6073   /* Inner loops of the nest should not contain siblings.  Example:
6074      when there are two consecutive loops,
6075
6076      | loop_0
6077      |   loop_1
6078      |     A[{0, +, 1}_1]
6079      |   endloop_1
6080      |   loop_2
6081      |     A[{0, +, 1}_2]
6082      |   endloop_2
6083      | endloop_0
6084
6085      the dependence relation cannot be captured by the distance
6086      abstraction.  */
6087   if (loop->next)
6088     return false;
6089
6090   loop_nest->safe_push (loop);
6091   if (loop->inner)
6092     return find_loop_nest_1 (loop->inner, loop_nest);
6093   return true;
6094 }
6095
6096 /* Return false when the LOOP is not well nested.  Otherwise return
6097    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6098    contain the loops from the outermost to the innermost, as they will
6099    appear in the classic distance vector.  */
6100
6101 bool
6102 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6103 {
6104   loop_nest->safe_push (loop);
6105   if (loop->inner)
6106     return find_loop_nest_1 (loop->inner, loop_nest);
6107   return true;
6108 }
6109
6110 /* Returns true when the data dependences have been computed, false otherwise.
6111    Given a loop nest LOOP, the following vectors are returned:
6112    DATAREFS is initialized to all the array elements contained in this loop,
6113    DEPENDENCE_RELATIONS contains the relations between the data references.
6114    Compute read-read and self relations if
6115    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6116
6117 bool
6118 compute_data_dependences_for_loop (class loop *loop,
6119                                    bool compute_self_and_read_read_dependences,
6120                                    vec<loop_p> *loop_nest,
6121                                    vec<data_reference_p> *datarefs,
6122                                    vec<ddr_p> *dependence_relations)
6123 {
6124   bool res = true;
6125
6126   memset (&dependence_stats, 0, sizeof (dependence_stats));
6127
6128   /* If the loop nest is not well formed, or one of the data references
6129      is not computable, give up without spending time to compute other
6130      dependences.  */
6131   if (!loop
6132       || !find_loop_nest (loop, loop_nest)
6133       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6134       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6135                                    compute_self_and_read_read_dependences))
6136     res = false;
6137
6138   if (dump_file && (dump_flags & TDF_STATS))
6139     {
6140       fprintf (dump_file, "Dependence tester statistics:\n");
6141
6142       fprintf (dump_file, "Number of dependence tests: %d\n",
6143                dependence_stats.num_dependence_tests);
6144       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6145                dependence_stats.num_dependence_dependent);
6146       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6147                dependence_stats.num_dependence_independent);
6148       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6149                dependence_stats.num_dependence_undetermined);
6150
6151       fprintf (dump_file, "Number of subscript tests: %d\n",
6152                dependence_stats.num_subscript_tests);
6153       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6154                dependence_stats.num_subscript_undetermined);
6155       fprintf (dump_file, "Number of same subscript function: %d\n",
6156                dependence_stats.num_same_subscript_function);
6157
6158       fprintf (dump_file, "Number of ziv tests: %d\n",
6159                dependence_stats.num_ziv);
6160       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6161                dependence_stats.num_ziv_dependent);
6162       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6163                dependence_stats.num_ziv_independent);
6164       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6165                dependence_stats.num_ziv_unimplemented);
6166
6167       fprintf (dump_file, "Number of siv tests: %d\n",
6168                dependence_stats.num_siv);
6169       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6170                dependence_stats.num_siv_dependent);
6171       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6172                dependence_stats.num_siv_independent);
6173       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6174                dependence_stats.num_siv_unimplemented);
6175
6176       fprintf (dump_file, "Number of miv tests: %d\n",
6177                dependence_stats.num_miv);
6178       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6179                dependence_stats.num_miv_dependent);
6180       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6181                dependence_stats.num_miv_independent);
6182       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6183                dependence_stats.num_miv_unimplemented);
6184     }
6185
6186   return res;
6187 }
6188
6189 /* Free the memory used by a data dependence relation DDR.  */
6190
6191 void
6192 free_dependence_relation (struct data_dependence_relation *ddr)
6193 {
6194   if (ddr == NULL)
6195     return;
6196
6197   if (DDR_SUBSCRIPTS (ddr).exists ())
6198     free_subscripts (DDR_SUBSCRIPTS (ddr));
6199   DDR_DIST_VECTS (ddr).release ();
6200   DDR_DIR_VECTS (ddr).release ();
6201
6202   free (ddr);
6203 }
6204
6205 /* Free the memory used by the data dependence relations from
6206    DEPENDENCE_RELATIONS.  */
6207
6208 void
6209 free_dependence_relations (vec<ddr_p>& dependence_relations)
6210 {
6211   for (data_dependence_relation *ddr : dependence_relations)
6212     if (ddr)
6213       free_dependence_relation (ddr);
6214
6215   dependence_relations.release ();
6216 }
6217
6218 /* Free the memory used by the data references from DATAREFS.  */
6219
6220 void
6221 free_data_refs (vec<data_reference_p>& datarefs)
6222 {
6223   for (data_reference *dr : datarefs)
6224     free_data_ref (dr);
6225   datarefs.release ();
6226 }
6227
6228 /* Common routine implementing both dr_direction_indicator and
6229    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6230    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6231    Return the step as the indicator otherwise.  */
6232
6233 static tree
6234 dr_step_indicator (struct data_reference *dr, int useful_min)
6235 {
6236   tree step = DR_STEP (dr);
6237   if (!step)
6238     return NULL_TREE;
6239   STRIP_NOPS (step);
6240   /* Look for cases where the step is scaled by a positive constant
6241      integer, which will often be the access size.  If the multiplication
6242      doesn't change the sign (due to overflow effects) then we can
6243      test the unscaled value instead.  */
6244   if (TREE_CODE (step) == MULT_EXPR
6245       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6246       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6247     {
6248       tree factor = TREE_OPERAND (step, 1);
6249       step = TREE_OPERAND (step, 0);
6250
6251       /* Strip widening and truncating conversions as well as nops.  */
6252       if (CONVERT_EXPR_P (step)
6253           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6254         step = TREE_OPERAND (step, 0);
6255       tree type = TREE_TYPE (step);
6256
6257       /* Get the range of step values that would not cause overflow.  */
6258       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6259                          / wi::to_widest (factor));
6260       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6261                          / wi::to_widest (factor));
6262
6263       /* Get the range of values that the unconverted step actually has.  */
6264       wide_int step_min, step_max;
6265       value_range vr;
6266       if (TREE_CODE (step) != SSA_NAME
6267           || !get_range_query (cfun)->range_of_expr (vr, step)
6268           || vr.kind () != VR_RANGE)
6269         {
6270           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6271           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6272         }
6273       else
6274         {
6275           step_min = vr.lower_bound ();
6276           step_max = vr.upper_bound ();
6277         }
6278
6279       /* Check whether the unconverted step has an acceptable range.  */
6280       signop sgn = TYPE_SIGN (type);
6281       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6282           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6283         {
6284           if (wi::ge_p (step_min, useful_min, sgn))
6285             return ssize_int (useful_min);
6286           else if (wi::lt_p (step_max, 0, sgn))
6287             return ssize_int (-1);
6288           else
6289             return fold_convert (ssizetype, step);
6290         }
6291     }
6292   return DR_STEP (dr);
6293 }
6294
6295 /* Return a value that is negative iff DR has a negative step.  */
6296
6297 tree
6298 dr_direction_indicator (struct data_reference *dr)
6299 {
6300   return dr_step_indicator (dr, 0);
6301 }
6302
6303 /* Return a value that is zero iff DR has a zero step.  */
6304
6305 tree
6306 dr_zero_step_indicator (struct data_reference *dr)
6307 {
6308   return dr_step_indicator (dr, 1);
6309 }
6310
6311 /* Return true if DR is known to have a nonnegative (but possibly zero)
6312    step.  */
6313
6314 bool
6315 dr_known_forward_stride_p (struct data_reference *dr)
6316 {
6317   tree indicator = dr_direction_indicator (dr);
6318   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6319                                    fold_convert (ssizetype, indicator),
6320                                    ssize_int (0));
6321   return neg_step_val && integer_zerop (neg_step_val);
6322 }