gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "builtins.h"
  97 #include "tree-eh.h"
  98 #include "ssa.h"
  99
 100 static struct datadep_stats
 101 {
 102   int num_dependence_tests;
 103   int num_dependence_dependent;
 104   int num_dependence_independent;
 105   int num_dependence_undetermined;
 106
 107   int num_subscript_tests;
 108   int num_subscript_undetermined;
 109   int num_same_subscript_function;
 110
 111   int num_ziv;
 112   int num_ziv_independent;
 113   int num_ziv_dependent;
 114   int num_ziv_unimplemented;
 115
 116   int num_siv;
 117   int num_siv_independent;
 118   int num_siv_dependent;
 119   int num_siv_unimplemented;
 120
 121   int num_miv;
 122   int num_miv_independent;
 123   int num_miv_dependent;
 124   int num_miv_unimplemented;
 125 } dependence_stats;
 126
 127 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 128                                            unsigned int, unsigned int,
 129                                            class loop *);
 130 /* Returns true iff A divides B.  */
 131
 132 static inline bool
 133 tree_fold_divides_p (const_tree a, const_tree b)
 134 {
 135   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 136   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 137   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 138 }
 139
 140 /* Returns true iff A divides B.  */
 141
 142 static inline bool
 143 int_divides_p (int a, int b)
 144 {
 145   return ((b % a) == 0);
 146 }
 147
 148 /* Return true if reference REF contains a union access.  */
 149
 150 static bool
 151 ref_contains_union_access_p (tree ref)
 152 {
 153   while (handled_component_p (ref))
 154     {
 155       ref = TREE_OPERAND (ref, 0);
 156       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 157           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 158         return true;
 159     }
 160   return false;
 161 }
 162
 163 \f
 164
 165 /* Dump into FILE all the data references from DATAREFS.  */
 166
 167 static void
 168 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 169 {
 170   unsigned int i;
 171   struct data_reference *dr;
 172
 173   FOR_EACH_VEC_ELT (datarefs, i, dr)
 174     dump_data_reference (file, dr);
 175 }
 176
 177 /* Unified dump into FILE all the data references from DATAREFS.  */
 178
 179 DEBUG_FUNCTION void
 180 debug (vec<data_reference_p> &ref)
 181 {
 182   dump_data_references (stderr, ref);
 183 }
 184
 185 DEBUG_FUNCTION void
 186 debug (vec<data_reference_p> *ptr)
 187 {
 188   if (ptr)
 189     debug (*ptr);
 190   else
 191     fprintf (stderr, "<nil>\n");
 192 }
 193
 194
 195 /* Dump into STDERR all the data references from DATAREFS.  */
 196
 197 DEBUG_FUNCTION void
 198 debug_data_references (vec<data_reference_p> datarefs)
 199 {
 200   dump_data_references (stderr, datarefs);
 201 }
 202
 203 /* Print to STDERR the data_reference DR.  */
 204
 205 DEBUG_FUNCTION void
 206 debug_data_reference (struct data_reference *dr)
 207 {
 208   dump_data_reference (stderr, dr);
 209 }
 210
 211 /* Dump function for a DATA_REFERENCE structure.  */
 212
 213 void
 214 dump_data_reference (FILE *outf,
 215                      struct data_reference *dr)
 216 {
 217   unsigned int i;
 218
 219   fprintf (outf, "#(Data Ref: \n");
 220   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 221   fprintf (outf, "#  stmt: ");
 222   print_gimple_stmt (outf, DR_STMT (dr), 0);
 223   fprintf (outf, "#  ref: ");
 224   print_generic_stmt (outf, DR_REF (dr));
 225   fprintf (outf, "#  base_object: ");
 226   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 227
 228   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 229     {
 230       fprintf (outf, "#  Access function %d: ", i);
 231       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 232     }
 233   fprintf (outf, "#)\n");
 234 }
 235
 236 /* Unified dump function for a DATA_REFERENCE structure.  */
 237
 238 DEBUG_FUNCTION void
 239 debug (data_reference &ref)
 240 {
 241   dump_data_reference (stderr, &ref);
 242 }
 243
 244 DEBUG_FUNCTION void
 245 debug (data_reference *ptr)
 246 {
 247   if (ptr)
 248     debug (*ptr);
 249   else
 250     fprintf (stderr, "<nil>\n");
 251 }
 252
 253
 254 /* Dumps the affine function described by FN to the file OUTF.  */
 255
 256 DEBUG_FUNCTION void
 257 dump_affine_function (FILE *outf, affine_fn fn)
 258 {
 259   unsigned i;
 260   tree coef;
 261
 262   print_generic_expr (outf, fn[0], TDF_SLIM);
 263   for (i = 1; fn.iterate (i, &coef); i++)
 264     {
 265       fprintf (outf, " + ");
 266       print_generic_expr (outf, coef, TDF_SLIM);
 267       fprintf (outf, " * x_%u", i);
 268     }
 269 }
 270
 271 /* Dumps the conflict function CF to the file OUTF.  */
 272
 273 DEBUG_FUNCTION void
 274 dump_conflict_function (FILE *outf, conflict_function *cf)
 275 {
 276   unsigned i;
 277
 278   if (cf->n == NO_DEPENDENCE)
 279     fprintf (outf, "no dependence");
 280   else if (cf->n == NOT_KNOWN)
 281     fprintf (outf, "not known");
 282   else
 283     {
 284       for (i = 0; i < cf->n; i++)
 285         {
 286           if (i != 0)
 287             fprintf (outf, " ");
 288           fprintf (outf, "[");
 289           dump_affine_function (outf, cf->fns[i]);
 290           fprintf (outf, "]");
 291         }
 292     }
 293 }
 294
 295 /* Dump function for a SUBSCRIPT structure.  */
 296
 297 DEBUG_FUNCTION void
 298 dump_subscript (FILE *outf, struct subscript *subscript)
 299 {
 300   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 301
 302   fprintf (outf, "\n (subscript \n");
 303   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 304   dump_conflict_function (outf, cf);
 305   if (CF_NONTRIVIAL_P (cf))
 306     {
 307       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 308       fprintf (outf, "\n  last_conflict: ");
 309       print_generic_expr (outf, last_iteration);
 310     }
 311
 312   cf = SUB_CONFLICTS_IN_B (subscript);
 313   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 314   dump_conflict_function (outf, cf);
 315   if (CF_NONTRIVIAL_P (cf))
 316     {
 317       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 318       fprintf (outf, "\n  last_conflict: ");
 319       print_generic_expr (outf, last_iteration);
 320     }
 321
 322   fprintf (outf, "\n  (Subscript distance: ");
 323   print_generic_expr (outf, SUB_DISTANCE (subscript));
 324   fprintf (outf, " ))\n");
 325 }
 326
 327 /* Print the classic direction vector DIRV to OUTF.  */
 328
 329 DEBUG_FUNCTION void
 330 print_direction_vector (FILE *outf,
 331                         lambda_vector dirv,
 332                         int length)
 333 {
 334   int eq;
 335
 336   for (eq = 0; eq < length; eq++)
 337     {
 338       enum data_dependence_direction dir = ((enum data_dependence_direction)
 339                                             dirv[eq]);
 340
 341       switch (dir)
 342         {
 343         case dir_positive:
 344           fprintf (outf, "    +");
 345           break;
 346         case dir_negative:
 347           fprintf (outf, "    -");
 348           break;
 349         case dir_equal:
 350           fprintf (outf, "    =");
 351           break;
 352         case dir_positive_or_equal:
 353           fprintf (outf, "   +=");
 354           break;
 355         case dir_positive_or_negative:
 356           fprintf (outf, "   +-");
 357           break;
 358         case dir_negative_or_equal:
 359           fprintf (outf, "   -=");
 360           break;
 361         case dir_star:
 362           fprintf (outf, "    *");
 363           break;
 364         default:
 365           fprintf (outf, "indep");
 366           break;
 367         }
 368     }
 369   fprintf (outf, "\n");
 370 }
 371
 372 /* Print a vector of direction vectors.  */
 373
 374 DEBUG_FUNCTION void
 375 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 376                    int length)
 377 {
 378   unsigned j;
 379   lambda_vector v;
 380
 381   FOR_EACH_VEC_ELT (dir_vects, j, v)
 382     print_direction_vector (outf, v, length);
 383 }
 384
 385 /* Print out a vector VEC of length N to OUTFILE.  */
 386
 387 DEBUG_FUNCTION void
 388 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 389 {
 390   int i;
 391
 392   for (i = 0; i < n; i++)
 393     fprintf (outfile, "%3d ", (int)vector[i]);
 394   fprintf (outfile, "\n");
 395 }
 396
 397 /* Print a vector of distance vectors.  */
 398
 399 DEBUG_FUNCTION void
 400 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 401                     int length)
 402 {
 403   unsigned j;
 404   lambda_vector v;
 405
 406   FOR_EACH_VEC_ELT (dist_vects, j, v)
 407     print_lambda_vector (outf, v, length);
 408 }
 409
 410 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 411
 412 DEBUG_FUNCTION void
 413 dump_data_dependence_relation (FILE *outf,
 414                                struct data_dependence_relation *ddr)
 415 {
 416   struct data_reference *dra, *drb;
 417
 418   fprintf (outf, "(Data Dep: \n");
 419
 420   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 421     {
 422       if (ddr)
 423         {
 424           dra = DDR_A (ddr);
 425           drb = DDR_B (ddr);
 426           if (dra)
 427             dump_data_reference (outf, dra);
 428           else
 429             fprintf (outf, "    (nil)\n");
 430           if (drb)
 431             dump_data_reference (outf, drb);
 432           else
 433             fprintf (outf, "    (nil)\n");
 434         }
 435       fprintf (outf, "    (don't know)\n)\n");
 436       return;
 437     }
 438
 439   dra = DDR_A (ddr);
 440   drb = DDR_B (ddr);
 441   dump_data_reference (outf, dra);
 442   dump_data_reference (outf, drb);
 443
 444   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 445     fprintf (outf, "    (no dependence)\n");
 446
 447   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 448     {
 449       unsigned int i;
 450       class loop *loopi;
 451
 452       subscript *sub;
 453       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 454         {
 455           fprintf (outf, "  access_fn_A: ");
 456           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 457           fprintf (outf, "  access_fn_B: ");
 458           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 459           dump_subscript (outf, sub);
 460         }
 461
 462       fprintf (outf, "  loop nest: (");
 463       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 464         fprintf (outf, "%d ", loopi->num);
 465       fprintf (outf, ")\n");
 466
 467       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 468         {
 469           fprintf (outf, "  distance_vector: ");
 470           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 471                                DDR_NB_LOOPS (ddr));
 472         }
 473
 474       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 475         {
 476           fprintf (outf, "  direction_vector: ");
 477           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 478                                   DDR_NB_LOOPS (ddr));
 479         }
 480     }
 481
 482   fprintf (outf, ")\n");
 483 }
 484
 485 /* Debug version.  */
 486
 487 DEBUG_FUNCTION void
 488 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 489 {
 490   dump_data_dependence_relation (stderr, ddr);
 491 }
 492
 493 /* Dump into FILE all the dependence relations from DDRS.  */
 494
 495 DEBUG_FUNCTION void
 496 dump_data_dependence_relations (FILE *file,
 497                                 vec<ddr_p> ddrs)
 498 {
 499   unsigned int i;
 500   struct data_dependence_relation *ddr;
 501
 502   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 503     dump_data_dependence_relation (file, ddr);
 504 }
 505
 506 DEBUG_FUNCTION void
 507 debug (vec<ddr_p> &ref)
 508 {
 509   dump_data_dependence_relations (stderr, ref);
 510 }
 511
 512 DEBUG_FUNCTION void
 513 debug (vec<ddr_p> *ptr)
 514 {
 515   if (ptr)
 516     debug (*ptr);
 517   else
 518     fprintf (stderr, "<nil>\n");
 519 }
 520
 521
 522 /* Dump to STDERR all the dependence relations from DDRS.  */
 523
 524 DEBUG_FUNCTION void
 525 debug_data_dependence_relations (vec<ddr_p> ddrs)
 526 {
 527   dump_data_dependence_relations (stderr, ddrs);
 528 }
 529
 530 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 531    the dependence relations, and VECT_SIZE is the size of the
 532    dependence vectors, or in other words the number of loops in the
 533    considered nest.  */
 534
 535 DEBUG_FUNCTION void
 536 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 537 {
 538   unsigned int i, j;
 539   struct data_dependence_relation *ddr;
 540   lambda_vector v;
 541
 542   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 543     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 544       {
 545         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 546           {
 547             fprintf (file, "DISTANCE_V (");
 548             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 549             fprintf (file, ")\n");
 550           }
 551
 552         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 553           {
 554             fprintf (file, "DIRECTION_V (");
 555             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 556             fprintf (file, ")\n");
 557           }
 558       }
 559
 560   fprintf (file, "\n\n");
 561 }
 562
 563 /* Dumps the data dependence relations DDRS in FILE.  */
 564
 565 DEBUG_FUNCTION void
 566 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 567 {
 568   unsigned int i;
 569   struct data_dependence_relation *ddr;
 570
 571   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 572     dump_data_dependence_relation (file, ddr);
 573
 574   fprintf (file, "\n\n");
 575 }
 576
 577 DEBUG_FUNCTION void
 578 debug_ddrs (vec<ddr_p> ddrs)
 579 {
 580   dump_ddrs (stderr, ddrs);
 581 }
 582
 583 static void
 584 split_constant_offset (tree exp, tree *var, tree *off,
 585                        hash_map<tree, std::pair<tree, tree> > &cache,
 586                        unsigned *limit);
 587
 588 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 589    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 590    constant of type ssizetype, and returns true.  If we cannot do this
 591    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 592    is returned.  */
 593
 594 static bool
 595 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 596                          tree *var, tree *off,
 597                          hash_map<tree, std::pair<tree, tree> > &cache,
 598                          unsigned *limit)
 599 {
 600   tree var0, var1;
 601   tree off0, off1;
 602   enum tree_code ocode = code;
 603
 604   *var = NULL_TREE;
 605   *off = NULL_TREE;
 606
 607   switch (code)
 608     {
 609     case INTEGER_CST:
 610       *var = build_int_cst (type, 0);
 611       *off = fold_convert (ssizetype, op0);
 612       return true;
 613
 614     case POINTER_PLUS_EXPR:
 615       ocode = PLUS_EXPR;
 616       /* FALLTHROUGH */
 617     case PLUS_EXPR:
 618     case MINUS_EXPR:
 619       if (TREE_CODE (op1) == INTEGER_CST)
 620         {
 621           split_constant_offset (op0, &var0, &off0, cache, limit);
 622           *var = var0;
 623           *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
 624           return true;
 625         }
 626       split_constant_offset (op0, &var0, &off0, cache, limit);
 627       split_constant_offset (op1, &var1, &off1, cache, limit);
 628       *var = fold_build2 (code, type, var0, var1);
 629       *off = size_binop (ocode, off0, off1);
 630       return true;
 631
 632     case MULT_EXPR:
 633       if (TREE_CODE (op1) != INTEGER_CST)
 634         return false;
 635
 636       split_constant_offset (op0, &var0, &off0, cache, limit);
 637       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 638       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 639       return true;
 640
 641     case ADDR_EXPR:
 642       {
 643         tree base, poffset;
 644         poly_int64 pbitsize, pbitpos, pbytepos;
 645         machine_mode pmode;
 646         int punsignedp, preversep, pvolatilep;
 647
 648         op0 = TREE_OPERAND (op0, 0);
 649         base
 650           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 651                                  &punsignedp, &preversep, &pvolatilep);
 652
 653         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 654           return false;
 655         base = build_fold_addr_expr (base);
 656         off0 = ssize_int (pbytepos);
 657
 658         if (poffset)
 659           {
 660             split_constant_offset (poffset, &poffset, &off1, cache, limit);
 661             off0 = size_binop (PLUS_EXPR, off0, off1);
 662             if (POINTER_TYPE_P (TREE_TYPE (base)))
 663               base = fold_build_pointer_plus (base, poffset);
 664             else
 665               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 666                                   fold_convert (TREE_TYPE (base), poffset));
 667           }
 668
 669         var0 = fold_convert (type, base);
 670
 671         /* If variable length types are involved, punt, otherwise casts
 672            might be converted into ARRAY_REFs in gimplify_conversion.
 673            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 674            possibly no longer appears in current GIMPLE, might resurface.
 675            This perhaps could run
 676            if (CONVERT_EXPR_P (var0))
 677              {
 678                gimplify_conversion (&var0);
 679                // Attempt to fill in any within var0 found ARRAY_REF's
 680                // element size from corresponding op embedded ARRAY_REF,
 681                // if unsuccessful, just punt.
 682              }  */
 683         while (POINTER_TYPE_P (type))
 684           type = TREE_TYPE (type);
 685         if (int_size_in_bytes (type) < 0)
 686           return false;
 687
 688         *var = var0;
 689         *off = off0;
 690         return true;
 691       }
 692
 693     case SSA_NAME:
 694       {
 695         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 696           return false;
 697
 698         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 699         enum tree_code subcode;
 700
 701         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 702           return false;
 703
 704         subcode = gimple_assign_rhs_code (def_stmt);
 705
 706         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 707         bool use_cache = false;
 708         if (!has_single_use (op0)
 709             && (subcode == POINTER_PLUS_EXPR
 710                 || subcode == PLUS_EXPR
 711                 || subcode == MINUS_EXPR
 712                 || subcode == MULT_EXPR
 713                 || subcode == ADDR_EXPR
 714                 || CONVERT_EXPR_CODE_P (subcode)))
 715           {
 716             use_cache = true;
 717             bool existed;
 718             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 719             if (existed)
 720               {
 721                 if (integer_zerop (e.second))
 722                   return false;
 723                 *var = e.first;
 724                 *off = e.second;
 725                 return true;
 726               }
 727             e = std::make_pair (op0, ssize_int (0));
 728           }
 729
 730         if (*limit == 0)
 731           return false;
 732         --*limit;
 733
 734         var0 = gimple_assign_rhs1 (def_stmt);
 735         var1 = gimple_assign_rhs2 (def_stmt);
 736
 737         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 738                                             var, off, cache, limit);
 739         if (res && use_cache)
 740           *cache.get (op0) = std::make_pair (*var, *off);
 741         return res;
 742       }
 743     CASE_CONVERT:
 744       {
 745         /* We must not introduce undefined overflow, and we must not change
 746            the value.  Hence we're okay if the inner type doesn't overflow
 747            to start with (pointer or signed), the outer type also is an
 748            integer or pointer and the outer precision is at least as large
 749            as the inner.  */
 750         tree itype = TREE_TYPE (op0);
 751         if ((POINTER_TYPE_P (itype)
 752              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 753             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 754             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 755           {
 756             if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
 757               {
 758                 /* Split the unconverted operand and try to prove that
 759                    wrapping isn't a problem.  */
 760                 tree tmp_var, tmp_off;
 761                 split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
 762
 763                 /* See whether we have an SSA_NAME whose range is known
 764                    to be [A, B].  */
 765                 if (TREE_CODE (tmp_var) != SSA_NAME)
 766                   return false;
 767                 wide_int var_min, var_max;
 768                 value_range_kind vr_type = get_range_info (tmp_var, &var_min,
 769                                                            &var_max);
 770                 wide_int var_nonzero = get_nonzero_bits (tmp_var);
 771                 signop sgn = TYPE_SIGN (itype);
 772                 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
 773                                                        &var_max, var_nonzero,
 774                                                        sgn) != VR_RANGE)
 775                   return false;
 776
 777                 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
 778                    is known to be [A + TMP_OFF, B + TMP_OFF], with all
 779                    operations done in ITYPE.  The addition must overflow
 780                    at both ends of the range or at neither.  */
 781                 wi::overflow_type overflow[2];
 782                 unsigned int prec = TYPE_PRECISION (itype);
 783                 wide_int woff = wi::to_wide (tmp_off, prec);
 784                 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
 785                 wi::add (var_max, woff, sgn, &overflow[1]);
 786                 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
 787                   return false;
 788
 789                 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
 790                 widest_int diff = (widest_int::from (op0_min, sgn)
 791                                    - widest_int::from (var_min, sgn));
 792                 var0 = tmp_var;
 793                 *off = wide_int_to_tree (ssizetype, diff);
 794               }
 795             else
 796               split_constant_offset (op0, &var0, off, cache, limit);
 797             *var = fold_convert (type, var0);
 798             return true;
 799           }
 800         return false;
 801       }
 802
 803     default:
 804       return false;
 805     }
 806 }
 807
 808 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 809    will be ssizetype.  */
 810
 811 static void
 812 split_constant_offset (tree exp, tree *var, tree *off,
 813                        hash_map<tree, std::pair<tree, tree> > &cache,
 814                        unsigned *limit)
 815 {
 816   tree type = TREE_TYPE (exp), op0, op1, e, o;
 817   enum tree_code code;
 818
 819   *var = exp;
 820   *off = ssize_int (0);
 821
 822   if (tree_is_chrec (exp)
 823       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 824     return;
 825
 826   code = TREE_CODE (exp);
 827   extract_ops_from_tree (exp, &code, &op0, &op1);
 828   if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
 829     {
 830       *var = e;
 831       *off = o;
 832     }
 833 }
 834
 835 void
 836 split_constant_offset (tree exp, tree *var, tree *off)
 837 {
 838   unsigned limit = param_ssa_name_def_chain_limit;
 839   static hash_map<tree, std::pair<tree, tree> > *cache;
 840   if (!cache)
 841     cache = new hash_map<tree, std::pair<tree, tree> > (37);
 842   split_constant_offset (exp, var, off, *cache, &limit);
 843   cache->empty ();
 844 }
 845
 846 /* Returns the address ADDR of an object in a canonical shape (without nop
 847    casts, and with type of pointer to the object).  */
 848
 849 static tree
 850 canonicalize_base_object_address (tree addr)
 851 {
 852   tree orig = addr;
 853
 854   STRIP_NOPS (addr);
 855
 856   /* The base address may be obtained by casting from integer, in that case
 857      keep the cast.  */
 858   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 859     return orig;
 860
 861   if (TREE_CODE (addr) != ADDR_EXPR)
 862     return addr;
 863
 864   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 865 }
 866
 867 /* Analyze the behavior of memory reference REF within STMT.
 868    There are two modes:
 869
 870    - BB analysis.  In this case we simply split the address into base,
 871      init and offset components, without reference to any containing loop.
 872      The resulting base and offset are general expressions and they can
 873      vary arbitrarily from one iteration of the containing loop to the next.
 874      The step is always zero.
 875
 876    - loop analysis.  In this case we analyze the reference both wrt LOOP
 877      and on the basis that the reference occurs (is "used") in LOOP;
 878      see the comment above analyze_scalar_evolution_in_loop for more
 879      information about this distinction.  The base, init, offset and
 880      step fields are all invariant in LOOP.
 881
 882    Perform BB analysis if LOOP is null, or if LOOP is the function's
 883    dummy outermost loop.  In other cases perform loop analysis.
 884
 885    Return true if the analysis succeeded and store the results in DRB if so.
 886    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 887
 888 opt_result
 889 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 890                       class loop *loop, const gimple *stmt)
 891 {
 892   poly_int64 pbitsize, pbitpos;
 893   tree base, poffset;
 894   machine_mode pmode;
 895   int punsignedp, preversep, pvolatilep;
 896   affine_iv base_iv, offset_iv;
 897   tree init, dinit, step;
 898   bool in_loop = (loop && loop->num);
 899
 900   if (dump_file && (dump_flags & TDF_DETAILS))
 901     fprintf (dump_file, "analyze_innermost: ");
 902
 903   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 904                               &punsignedp, &preversep, &pvolatilep);
 905   gcc_assert (base != NULL_TREE);
 906
 907   poly_int64 pbytepos;
 908   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 909     return opt_result::failure_at (stmt,
 910                                    "failed: bit offset alignment.\n");
 911
 912   if (preversep)
 913     return opt_result::failure_at (stmt,
 914                                    "failed: reverse storage order.\n");
 915
 916   /* Calculate the alignment and misalignment for the inner reference.  */
 917   unsigned int HOST_WIDE_INT bit_base_misalignment;
 918   unsigned int bit_base_alignment;
 919   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
 920
 921   /* There are no bitfield references remaining in BASE, so the values
 922      we got back must be whole bytes.  */
 923   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
 924               && bit_base_misalignment % BITS_PER_UNIT == 0);
 925   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
 926   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
 927
 928   if (TREE_CODE (base) == MEM_REF)
 929     {
 930       if (!integer_zerop (TREE_OPERAND (base, 1)))
 931         {
 932           /* Subtract MOFF from the base and add it to POFFSET instead.
 933              Adjust the misalignment to reflect the amount we subtracted.  */
 934           poly_offset_int moff = mem_ref_offset (base);
 935           base_misalignment -= moff.force_shwi ();
 936           tree mofft = wide_int_to_tree (sizetype, moff);
 937           if (!poffset)
 938             poffset = mofft;
 939           else
 940             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 941         }
 942       base = TREE_OPERAND (base, 0);
 943     }
 944   else
 945     base = build_fold_addr_expr (base);
 946
 947   if (in_loop)
 948     {
 949       if (!simple_iv (loop, loop, base, &base_iv, true))
 950         return opt_result::failure_at
 951           (stmt, "failed: evolution of base is not affine.\n");
 952     }
 953   else
 954     {
 955       base_iv.base = base;
 956       base_iv.step = ssize_int (0);
 957       base_iv.no_overflow = true;
 958     }
 959
 960   if (!poffset)
 961     {
 962       offset_iv.base = ssize_int (0);
 963       offset_iv.step = ssize_int (0);
 964     }
 965   else
 966     {
 967       if (!in_loop)
 968         {
 969           offset_iv.base = poffset;
 970           offset_iv.step = ssize_int (0);
 971         }
 972       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 973         return opt_result::failure_at
 974           (stmt, "failed: evolution of offset is not affine.\n");
 975     }
 976
 977   init = ssize_int (pbytepos);
 978
 979   /* Subtract any constant component from the base and add it to INIT instead.
 980      Adjust the misalignment to reflect the amount we subtracted.  */
 981   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 982   init = size_binop (PLUS_EXPR, init, dinit);
 983   base_misalignment -= TREE_INT_CST_LOW (dinit);
 984
 985   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 986   init = size_binop (PLUS_EXPR, init, dinit);
 987
 988   step = size_binop (PLUS_EXPR,
 989                      fold_convert (ssizetype, base_iv.step),
 990                      fold_convert (ssizetype, offset_iv.step));
 991
 992   base = canonicalize_base_object_address (base_iv.base);
 993
 994   /* See if get_pointer_alignment can guarantee a higher alignment than
 995      the one we calculated above.  */
 996   unsigned int HOST_WIDE_INT alt_misalignment;
 997   unsigned int alt_alignment;
 998   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
 999
1000   /* As above, these values must be whole bytes.  */
1001   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1002               && alt_misalignment % BITS_PER_UNIT == 0);
1003   alt_alignment /= BITS_PER_UNIT;
1004   alt_misalignment /= BITS_PER_UNIT;
1005
1006   if (base_alignment < alt_alignment)
1007     {
1008       base_alignment = alt_alignment;
1009       base_misalignment = alt_misalignment;
1010     }
1011
1012   drb->base_address = base;
1013   drb->offset = fold_convert (ssizetype, offset_iv.base);
1014   drb->init = init;
1015   drb->step = step;
1016   if (known_misalignment (base_misalignment, base_alignment,
1017                           &drb->base_misalignment))
1018     drb->base_alignment = base_alignment;
1019   else
1020     {
1021       drb->base_alignment = known_alignment (base_misalignment);
1022       drb->base_misalignment = 0;
1023     }
1024   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1025   drb->step_alignment = highest_pow2_factor (step);
1026
1027   if (dump_file && (dump_flags & TDF_DETAILS))
1028     fprintf (dump_file, "success.\n");
1029
1030   return opt_result::success ();
1031 }
1032
1033 /* Return true if OP is a valid component reference for a DR access
1034    function.  This accepts a subset of what handled_component_p accepts.  */
1035
1036 static bool
1037 access_fn_component_p (tree op)
1038 {
1039   switch (TREE_CODE (op))
1040     {
1041     case REALPART_EXPR:
1042     case IMAGPART_EXPR:
1043     case ARRAY_REF:
1044       return true;
1045
1046     case COMPONENT_REF:
1047       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1048
1049     default:
1050       return false;
1051     }
1052 }
1053
1054 /* Determines the base object and the list of indices of memory reference
1055    DR, analyzed in LOOP and instantiated before NEST.  */
1056
1057 static void
1058 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1059 {
1060   vec<tree> access_fns = vNULL;
1061   tree ref, op;
1062   tree base, off, access_fn;
1063
1064   /* If analyzing a basic-block there are no indices to analyze
1065      and thus no access functions.  */
1066   if (!nest)
1067     {
1068       DR_BASE_OBJECT (dr) = DR_REF (dr);
1069       DR_ACCESS_FNS (dr).create (0);
1070       return;
1071     }
1072
1073   ref = DR_REF (dr);
1074
1075   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1076      into a two element array with a constant index.  The base is
1077      then just the immediate underlying object.  */
1078   if (TREE_CODE (ref) == REALPART_EXPR)
1079     {
1080       ref = TREE_OPERAND (ref, 0);
1081       access_fns.safe_push (integer_zero_node);
1082     }
1083   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1084     {
1085       ref = TREE_OPERAND (ref, 0);
1086       access_fns.safe_push (integer_one_node);
1087     }
1088
1089   /* Analyze access functions of dimensions we know to be independent.
1090      The list of component references handled here should be kept in
1091      sync with access_fn_component_p.  */
1092   while (handled_component_p (ref))
1093     {
1094       if (TREE_CODE (ref) == ARRAY_REF)
1095         {
1096           op = TREE_OPERAND (ref, 1);
1097           access_fn = analyze_scalar_evolution (loop, op);
1098           access_fn = instantiate_scev (nest, loop, access_fn);
1099           access_fns.safe_push (access_fn);
1100         }
1101       else if (TREE_CODE (ref) == COMPONENT_REF
1102                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1103         {
1104           /* For COMPONENT_REFs of records (but not unions!) use the
1105              FIELD_DECL offset as constant access function so we can
1106              disambiguate a[i].f1 and a[i].f2.  */
1107           tree off = component_ref_field_offset (ref);
1108           off = size_binop (PLUS_EXPR,
1109                             size_binop (MULT_EXPR,
1110                                         fold_convert (bitsizetype, off),
1111                                         bitsize_int (BITS_PER_UNIT)),
1112                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1113           access_fns.safe_push (off);
1114         }
1115       else
1116         /* If we have an unhandled component we could not translate
1117            to an access function stop analyzing.  We have determined
1118            our base object in this case.  */
1119         break;
1120
1121       ref = TREE_OPERAND (ref, 0);
1122     }
1123
1124   /* If the address operand of a MEM_REF base has an evolution in the
1125      analyzed nest, add it as an additional independent access-function.  */
1126   if (TREE_CODE (ref) == MEM_REF)
1127     {
1128       op = TREE_OPERAND (ref, 0);
1129       access_fn = analyze_scalar_evolution (loop, op);
1130       access_fn = instantiate_scev (nest, loop, access_fn);
1131       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1132         {
1133           tree orig_type;
1134           tree memoff = TREE_OPERAND (ref, 1);
1135           base = initial_condition (access_fn);
1136           orig_type = TREE_TYPE (base);
1137           STRIP_USELESS_TYPE_CONVERSION (base);
1138           split_constant_offset (base, &base, &off);
1139           STRIP_USELESS_TYPE_CONVERSION (base);
1140           /* Fold the MEM_REF offset into the evolutions initial
1141              value to make more bases comparable.  */
1142           if (!integer_zerop (memoff))
1143             {
1144               off = size_binop (PLUS_EXPR, off,
1145                                 fold_convert (ssizetype, memoff));
1146               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1147             }
1148           /* Adjust the offset so it is a multiple of the access type
1149              size and thus we separate bases that can possibly be used
1150              to produce partial overlaps (which the access_fn machinery
1151              cannot handle).  */
1152           wide_int rem;
1153           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1154               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1155               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1156             rem = wi::mod_trunc
1157               (wi::to_wide (off),
1158                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1159                SIGNED);
1160           else
1161             /* If we can't compute the remainder simply force the initial
1162                condition to zero.  */
1163             rem = wi::to_wide (off);
1164           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1165           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1166           /* And finally replace the initial condition.  */
1167           access_fn = chrec_replace_initial_condition
1168               (access_fn, fold_convert (orig_type, off));
1169           /* ???  This is still not a suitable base object for
1170              dr_may_alias_p - the base object needs to be an
1171              access that covers the object as whole.  With
1172              an evolution in the pointer this cannot be
1173              guaranteed.
1174              As a band-aid, mark the access so we can special-case
1175              it in dr_may_alias_p.  */
1176           tree old = ref;
1177           ref = fold_build2_loc (EXPR_LOCATION (ref),
1178                                  MEM_REF, TREE_TYPE (ref),
1179                                  base, memoff);
1180           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1181           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1182           DR_UNCONSTRAINED_BASE (dr) = true;
1183           access_fns.safe_push (access_fn);
1184         }
1185     }
1186   else if (DECL_P (ref))
1187     {
1188       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1189       ref = build2 (MEM_REF, TREE_TYPE (ref),
1190                     build_fold_addr_expr (ref),
1191                     build_int_cst (reference_alias_ptr_type (ref), 0));
1192     }
1193
1194   DR_BASE_OBJECT (dr) = ref;
1195   DR_ACCESS_FNS (dr) = access_fns;
1196 }
1197
1198 /* Extracts the alias analysis information from the memory reference DR.  */
1199
1200 static void
1201 dr_analyze_alias (struct data_reference *dr)
1202 {
1203   tree ref = DR_REF (dr);
1204   tree base = get_base_address (ref), addr;
1205
1206   if (INDIRECT_REF_P (base)
1207       || TREE_CODE (base) == MEM_REF)
1208     {
1209       addr = TREE_OPERAND (base, 0);
1210       if (TREE_CODE (addr) == SSA_NAME)
1211         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1212     }
1213 }
1214
1215 /* Frees data reference DR.  */
1216
1217 void
1218 free_data_ref (data_reference_p dr)
1219 {
1220   DR_ACCESS_FNS (dr).release ();
1221   free (dr);
1222 }
1223
1224 /* Analyze memory reference MEMREF, which is accessed in STMT.
1225    The reference is a read if IS_READ is true, otherwise it is a write.
1226    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1227    within STMT, i.e. that it might not occur even if STMT is executed
1228    and runs to completion.
1229
1230    Return the data_reference description of MEMREF.  NEST is the outermost
1231    loop in which the reference should be instantiated, LOOP is the loop
1232    in which the data reference should be analyzed.  */
1233
1234 struct data_reference *
1235 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1236                  bool is_read, bool is_conditional_in_stmt)
1237 {
1238   struct data_reference *dr;
1239
1240   if (dump_file && (dump_flags & TDF_DETAILS))
1241     {
1242       fprintf (dump_file, "Creating dr for ");
1243       print_generic_expr (dump_file, memref, TDF_SLIM);
1244       fprintf (dump_file, "\n");
1245     }
1246
1247   dr = XCNEW (struct data_reference);
1248   DR_STMT (dr) = stmt;
1249   DR_REF (dr) = memref;
1250   DR_IS_READ (dr) = is_read;
1251   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1252
1253   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1254                         nest != NULL ? loop : NULL, stmt);
1255   dr_analyze_indices (dr, nest, loop);
1256   dr_analyze_alias (dr);
1257
1258   if (dump_file && (dump_flags & TDF_DETAILS))
1259     {
1260       unsigned i;
1261       fprintf (dump_file, "\tbase_address: ");
1262       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1263       fprintf (dump_file, "\n\toffset from base address: ");
1264       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1265       fprintf (dump_file, "\n\tconstant offset from base address: ");
1266       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1267       fprintf (dump_file, "\n\tstep: ");
1268       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1269       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1270       fprintf (dump_file, "\n\tbase misalignment: %d",
1271                DR_BASE_MISALIGNMENT (dr));
1272       fprintf (dump_file, "\n\toffset alignment: %d",
1273                DR_OFFSET_ALIGNMENT (dr));
1274       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1275       fprintf (dump_file, "\n\tbase_object: ");
1276       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1277       fprintf (dump_file, "\n");
1278       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1279         {
1280           fprintf (dump_file, "\tAccess function %d: ", i);
1281           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1282         }
1283     }
1284
1285   return dr;
1286 }
1287
1288 /*  A helper function computes order between two tree expressions T1 and T2.
1289     This is used in comparator functions sorting objects based on the order
1290     of tree expressions.  The function returns -1, 0, or 1.  */
1291
1292 int
1293 data_ref_compare_tree (tree t1, tree t2)
1294 {
1295   int i, cmp;
1296   enum tree_code code;
1297   char tclass;
1298
1299   if (t1 == t2)
1300     return 0;
1301   if (t1 == NULL)
1302     return -1;
1303   if (t2 == NULL)
1304     return 1;
1305
1306   STRIP_USELESS_TYPE_CONVERSION (t1);
1307   STRIP_USELESS_TYPE_CONVERSION (t2);
1308   if (t1 == t2)
1309     return 0;
1310
1311   if (TREE_CODE (t1) != TREE_CODE (t2)
1312       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1313     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1314
1315   code = TREE_CODE (t1);
1316   switch (code)
1317     {
1318     case INTEGER_CST:
1319       return tree_int_cst_compare (t1, t2);
1320
1321     case STRING_CST:
1322       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1323         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1324       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1325                      TREE_STRING_LENGTH (t1));
1326
1327     case SSA_NAME:
1328       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1329         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1330       break;
1331
1332     default:
1333       if (POLY_INT_CST_P (t1))
1334         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1335                                        wi::to_poly_widest (t2));
1336
1337       tclass = TREE_CODE_CLASS (code);
1338
1339       /* For decls, compare their UIDs.  */
1340       if (tclass == tcc_declaration)
1341         {
1342           if (DECL_UID (t1) != DECL_UID (t2))
1343             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1344           break;
1345         }
1346       /* For expressions, compare their operands recursively.  */
1347       else if (IS_EXPR_CODE_CLASS (tclass))
1348         {
1349           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1350             {
1351               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1352                                            TREE_OPERAND (t2, i));
1353               if (cmp != 0)
1354                 return cmp;
1355             }
1356         }
1357       else
1358         gcc_unreachable ();
1359     }
1360
1361   return 0;
1362 }
1363
1364 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1365    check.  */
1366
1367 opt_result
1368 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1369 {
1370   if (dump_enabled_p ())
1371     dump_printf (MSG_NOTE,
1372                  "consider run-time aliasing test between %T and %T\n",
1373                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1374
1375   if (!speed_p)
1376     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1377                                    "runtime alias check not supported when"
1378                                    " optimizing for size.\n");
1379
1380   /* FORNOW: We don't support versioning with outer-loop in either
1381      vectorization or loop distribution.  */
1382   if (loop != NULL && loop->inner != NULL)
1383     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1384                                    "runtime alias check not supported for"
1385                                    " outer loop.\n");
1386
1387   return opt_result::success ();
1388 }
1389
1390 /* Operator == between two dr_with_seg_len objects.
1391
1392    This equality operator is used to make sure two data refs
1393    are the same one so that we will consider to combine the
1394    aliasing checks of those two pairs of data dependent data
1395    refs.  */
1396
1397 static bool
1398 operator == (const dr_with_seg_len& d1,
1399              const dr_with_seg_len& d2)
1400 {
1401   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1402                            DR_BASE_ADDRESS (d2.dr), 0)
1403           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1404           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1405           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1406           && known_eq (d1.access_size, d2.access_size)
1407           && d1.align == d2.align);
1408 }
1409
1410 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1411    so that we can combine aliasing checks in one scan.  */
1412
1413 static int
1414 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1415 {
1416   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1417   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1418   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1419   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1420
1421   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1422      if a and c have the same basic address snd step, and b and d have the same
1423      address and step.  Therefore, if any a&c or b&d don't have the same address
1424      and step, we don't care the order of those two pairs after sorting.  */
1425   int comp_res;
1426
1427   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1428                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1429     return comp_res;
1430   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1431                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1432     return comp_res;
1433   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1434                                          DR_STEP (b1.dr))) != 0)
1435     return comp_res;
1436   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1437                                          DR_STEP (b2.dr))) != 0)
1438     return comp_res;
1439   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1440                                          DR_OFFSET (b1.dr))) != 0)
1441     return comp_res;
1442   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1443                                          DR_INIT (b1.dr))) != 0)
1444     return comp_res;
1445   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1446                                          DR_OFFSET (b2.dr))) != 0)
1447     return comp_res;
1448   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1449                                          DR_INIT (b2.dr))) != 0)
1450     return comp_res;
1451
1452   return 0;
1453 }
1454
1455 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1456
1457 static void
1458 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1459 {
1460   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1461                DR_REF (alias_pair->first.dr),
1462                DR_REF (alias_pair->second.dr));
1463
1464   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1465                alias_pair->first.seg_len);
1466   if (!operand_equal_p (alias_pair->first.seg_len,
1467                         alias_pair->second.seg_len, 0))
1468     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1469
1470   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1471   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1472   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1473     {
1474       dump_printf (MSG_NOTE, " vs. ");
1475       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1476     }
1477
1478   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1479                alias_pair->first.align);
1480   if (alias_pair->first.align != alias_pair->second.align)
1481     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1482
1483   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1484   if (alias_pair->flags & DR_ALIAS_RAW)
1485     dump_printf (MSG_NOTE, " RAW");
1486   if (alias_pair->flags & DR_ALIAS_WAR)
1487     dump_printf (MSG_NOTE, " WAR");
1488   if (alias_pair->flags & DR_ALIAS_WAW)
1489     dump_printf (MSG_NOTE, " WAW");
1490   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1491     dump_printf (MSG_NOTE, " ARBITRARY");
1492   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1493     dump_printf (MSG_NOTE, " SWAPPED");
1494   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1495     dump_printf (MSG_NOTE, " UNSWAPPED");
1496   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1497     dump_printf (MSG_NOTE, " MIXED_STEPS");
1498   if (alias_pair->flags == 0)
1499     dump_printf (MSG_NOTE, " <none>");
1500   dump_printf (MSG_NOTE, "\n");
1501 }
1502
1503 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1504    FACTOR is number of iterations that each data reference is accessed.
1505
1506    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1507    we create an expression:
1508
1509    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1510    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1511
1512    for aliasing checks.  However, in some cases we can decrease the number
1513    of checks by combining two checks into one.  For example, suppose we have
1514    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1515    condition is satisfied:
1516
1517    load_ptr_0 < load_ptr_1  &&
1518    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1519
1520    (this condition means, in each iteration of vectorized loop, the accessed
1521    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1522    load_ptr_1.)
1523
1524    we then can use only the following expression to finish the alising checks
1525    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1526
1527    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1528    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1529
1530    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1531    basic address.  */
1532
1533 void
1534 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1535                                poly_uint64)
1536 {
1537   /* Canonicalize each pair so that the base components are ordered wrt
1538      data_ref_compare_tree.  This allows the loop below to merge more
1539      cases.  */
1540   unsigned int i;
1541   dr_with_seg_len_pair_t *alias_pair;
1542   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1543     {
1544       data_reference_p dr_a = alias_pair->first.dr;
1545       data_reference_p dr_b = alias_pair->second.dr;
1546       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1547                                             DR_BASE_ADDRESS (dr_b));
1548       if (comp_res == 0)
1549         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1550       if (comp_res == 0)
1551         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1552       if (comp_res > 0)
1553         {
1554           std::swap (alias_pair->first, alias_pair->second);
1555           alias_pair->flags |= DR_ALIAS_SWAPPED;
1556         }
1557       else
1558         alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1559     }
1560
1561   /* Sort the collected data ref pairs so that we can scan them once to
1562      combine all possible aliasing checks.  */
1563   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1564
1565   /* Scan the sorted dr pairs and check if we can combine alias checks
1566      of two neighboring dr pairs.  */
1567   for (i = 1; i < alias_pairs->length (); ++i)
1568     {
1569       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1570       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[i - 1];
1571       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1572
1573       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1574       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1575       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1576       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1577
1578       /* Remove duplicate data ref pairs.  */
1579       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1580         {
1581           if (dump_enabled_p ())
1582             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1583                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1584                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1585           alias_pair1->flags |= alias_pair2->flags;
1586           alias_pairs->ordered_remove (i--);
1587           continue;
1588         }
1589
1590       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1591         {
1592           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1593              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1594           if (*dr_a1 == *dr_a2)
1595             {
1596               std::swap (dr_a1, dr_b1);
1597               std::swap (dr_a2, dr_b2);
1598             }
1599
1600           poly_int64 init_a1, init_a2;
1601           /* Only consider cases in which the distance between the initial
1602              DR_A1 and the initial DR_A2 is known at compile time.  */
1603           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1604                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1605               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1606                                    DR_OFFSET (dr_a2->dr), 0)
1607               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1608               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1609             continue;
1610
1611           /* Don't combine if we can't tell which one comes first.  */
1612           if (!ordered_p (init_a1, init_a2))
1613             continue;
1614
1615           /* Work out what the segment length would be if we did combine
1616              DR_A1 and DR_A2:
1617
1618              - If DR_A1 and DR_A2 have equal lengths, that length is
1619                also the combined length.
1620
1621              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1622                length is the lower bound on those lengths.
1623
1624              - If DR_A1 and DR_A2 both have positive lengths, the combined
1625                length is the upper bound on those lengths.
1626
1627              Other cases are unlikely to give a useful combination.
1628
1629              The lengths both have sizetype, so the sign is taken from
1630              the step instead.  */
1631           poly_uint64 new_seg_len = 0;
1632           bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1633                                                  dr_a2->seg_len, 0);
1634           if (new_seg_len_p)
1635             {
1636               poly_uint64 seg_len_a1, seg_len_a2;
1637               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1638                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1639                 continue;
1640
1641               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1642               if (TREE_CODE (indicator_a) != INTEGER_CST)
1643                 continue;
1644
1645               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1646               if (TREE_CODE (indicator_b) != INTEGER_CST)
1647                 continue;
1648
1649               int sign_a = tree_int_cst_sgn (indicator_a);
1650               int sign_b = tree_int_cst_sgn (indicator_b);
1651
1652               if (sign_a <= 0 && sign_b <= 0)
1653                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1654               else if (sign_a >= 0 && sign_b >= 0)
1655                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1656               else
1657                 continue;
1658             }
1659           /* At this point we're committed to merging the refs.  */
1660
1661           /* Make sure dr_a1 starts left of dr_a2.  */
1662           if (maybe_gt (init_a1, init_a2))
1663             {
1664               std::swap (*dr_a1, *dr_a2);
1665               std::swap (init_a1, init_a2);
1666             }
1667
1668           /* The DR_Bs are equal, so only the DR_As can introduce
1669              mixed steps.  */
1670           if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1671             alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1672
1673           if (new_seg_len_p)
1674             {
1675               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1676                                               new_seg_len);
1677               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1678             }
1679
1680           /* This is always positive due to the swap above.  */
1681           poly_uint64 diff = init_a2 - init_a1;
1682
1683           /* The new check will start at DR_A1.  Make sure that its access
1684              size encompasses the initial DR_A2.  */
1685           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1686             {
1687               dr_a1->access_size = upper_bound (dr_a1->access_size,
1688                                                 diff + dr_a2->access_size);
1689               unsigned int new_align = known_alignment (dr_a1->access_size);
1690               dr_a1->align = MIN (dr_a1->align, new_align);
1691             }
1692           if (dump_enabled_p ())
1693             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1694                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1695                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1696           alias_pair1->flags |= alias_pair2->flags;
1697           alias_pairs->ordered_remove (i);
1698           i--;
1699         }
1700     }
1701
1702   /* Try to restore the original dr_with_seg_len order within each
1703      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1704      unswapped pairs into the same check, we have to invalidate any
1705      RAW, WAR and WAW information for it.  */
1706   if (dump_enabled_p ())
1707     dump_printf (MSG_NOTE, "merged alias checks:\n");
1708   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1709     {
1710       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1711       unsigned int swapped = (alias_pair->flags & swap_mask);
1712       if (swapped == DR_ALIAS_SWAPPED)
1713         std::swap (alias_pair->first, alias_pair->second);
1714       else if (swapped != DR_ALIAS_UNSWAPPED)
1715         alias_pair->flags |= DR_ALIAS_ARBITRARY;
1716       alias_pair->flags &= ~swap_mask;
1717       if (dump_enabled_p ())
1718         dump_alias_pair (alias_pair, "  ");
1719     }
1720 }
1721
1722 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
1723    free of aliases, using a condition based on index values instead
1724    of a condition based on addresses.  Return true on success,
1725    storing the condition in *COND_EXPR.
1726
1727    This can only be done if the two data references in ALIAS_PAIR access
1728    the same array object and the index is the only difference.  For example,
1729    if the two data references are DR_A and DR_B:
1730
1731                        DR_A                           DR_B
1732       data-ref         arr[i]                         arr[j]
1733       base_object      arr                            arr
1734       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1735
1736    The addresses and their index are like:
1737
1738         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1739      ------------------------------------------------------->
1740         |   |   |   |   |          |   |   |   |   |
1741      ------------------------------------------------------->
1742         i_0 ...         i_0+4      j_0 ...         j_0+4
1743
1744    We can create expression based on index rather than address:
1745
1746      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1747
1748    Note evolution step of index needs to be considered in comparison.  */
1749
1750 static bool
1751 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
1752                                      const dr_with_seg_len_pair_t &alias_pair)
1753 {
1754   const dr_with_seg_len &dr_a = alias_pair.first;
1755   const dr_with_seg_len &dr_b = alias_pair.second;
1756   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
1757       || integer_zerop (DR_STEP (dr_a.dr))
1758       || integer_zerop (DR_STEP (dr_b.dr))
1759       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1760     return false;
1761
1762   poly_uint64 seg_len1, seg_len2;
1763   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1764       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1765     return false;
1766
1767   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1768     return false;
1769
1770   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1771     return false;
1772
1773   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1774     return false;
1775
1776   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1777
1778   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1779   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1780   if (neg_step)
1781     {
1782       abs_step = -abs_step;
1783       seg_len1 = -seg_len1;
1784       seg_len2 = -seg_len2;
1785     }
1786   else
1787     {
1788       /* Include the access size in the length, so that we only have one
1789          tree addition below.  */
1790       seg_len1 += dr_a.access_size;
1791       seg_len2 += dr_b.access_size;
1792     }
1793
1794   /* Infer the number of iterations with which the memory segment is accessed
1795      by DR.  In other words, alias is checked if memory segment accessed by
1796      DR_A in some iterations intersect with memory segment accessed by DR_B
1797      in the same amount iterations.
1798      Note segnment length is a linear function of number of iterations with
1799      DR_STEP as the coefficient.  */
1800   poly_uint64 niter_len1, niter_len2;
1801   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1802       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1803     return false;
1804
1805   poly_uint64 niter_access1 = 0, niter_access2 = 0;
1806   if (neg_step)
1807     {
1808       /* Divide each access size by the byte step, rounding up.  */
1809       if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1810                             abs_step, &niter_access1)
1811           || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1812                                abs_step, &niter_access2))
1813         return false;
1814     }
1815
1816   unsigned int i;
1817   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1818     {
1819       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1820       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1821       /* Two indices must be the same if they are not scev, or not scev wrto
1822          current loop being vecorized.  */
1823       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1824           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1825           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1826           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1827         {
1828           if (operand_equal_p (access1, access2, 0))
1829             continue;
1830
1831           return false;
1832         }
1833       /* The two indices must have the same step.  */
1834       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1835         return false;
1836
1837       tree idx_step = CHREC_RIGHT (access1);
1838       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1839       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1840       /* Index must evaluate in the same direction as DR.  */
1841       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1842
1843       tree min1 = CHREC_LEFT (access1);
1844       tree min2 = CHREC_LEFT (access2);
1845       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1846         return false;
1847
1848       /* Ideally, alias can be checked against loop's control IV, but we
1849          need to prove linear mapping between control IV and reference
1850          index.  Although that should be true, we check against (array)
1851          index of data reference.  Like segment length, index length is
1852          linear function of the number of iterations with index_step as
1853          the coefficient, i.e, niter_len * idx_step.  */
1854       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1855                                    build_int_cst (TREE_TYPE (min1),
1856                                                   niter_len1));
1857       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1858                                    build_int_cst (TREE_TYPE (min2),
1859                                                   niter_len2));
1860       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1861       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1862       /* Adjust ranges for negative step.  */
1863       if (neg_step)
1864         {
1865           /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
1866           std::swap (min1, max1);
1867           std::swap (min2, max2);
1868
1869           /* As with the lengths just calculated, we've measured the access
1870              sizes in iterations, so multiply them by the index step.  */
1871           tree idx_access1
1872             = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1873                            build_int_cst (TREE_TYPE (min1), niter_access1));
1874           tree idx_access2
1875             = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1876                            build_int_cst (TREE_TYPE (min2), niter_access2));
1877
1878           /* MINUS_EXPR because the above values are negative.  */
1879           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1880           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1881         }
1882       tree part_cond_expr
1883         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1884             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1885             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1886       if (*cond_expr)
1887         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1888                                   *cond_expr, part_cond_expr);
1889       else
1890         *cond_expr = part_cond_expr;
1891     }
1892   if (dump_enabled_p ())
1893     dump_printf (MSG_NOTE, "using an index-based overlap test\n");
1894   return true;
1895 }
1896
1897 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1898    every address ADDR accessed by D:
1899
1900      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1901
1902    In this case, every element accessed by D is aligned to at least
1903    ALIGN bytes.
1904
1905    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1906
1907      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
1908
1909 static void
1910 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1911                      tree *seg_max_out, HOST_WIDE_INT align)
1912 {
1913   /* Each access has the following pattern:
1914
1915           <- |seg_len| ->
1916           <--- A: -ve step --->
1917           +-----+-------+-----+-------+-----+
1918           | n-1 | ,.... |  0  | ..... | n-1 |
1919           +-----+-------+-----+-------+-----+
1920                         <--- B: +ve step --->
1921                         <- |seg_len| ->
1922                         |
1923                    base address
1924
1925      where "n" is the number of scalar iterations covered by the segment.
1926      (This should be VF for a particular pair if we know that both steps
1927      are the same, otherwise it will be the full number of scalar loop
1928      iterations.)
1929
1930      A is the range of bytes accessed when the step is negative,
1931      B is the range when the step is positive.
1932
1933      If the access size is "access_size" bytes, the lowest addressed byte is:
1934
1935          base + (step < 0 ? seg_len : 0)   [LB]
1936
1937      and the highest addressed byte is always below:
1938
1939          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
1940
1941      Thus:
1942
1943          LB <= ADDR < UB
1944
1945      If ALIGN is nonzero, all three values are aligned to at least ALIGN
1946      bytes, so:
1947
1948          LB <= ADDR <= UB - ALIGN
1949
1950      where "- ALIGN" folds naturally with the "+ access_size" and often
1951      cancels it out.
1952
1953      We don't try to simplify LB and UB beyond this (e.g. by using
1954      MIN and MAX based on whether seg_len rather than the stride is
1955      negative) because it is possible for the absolute size of the
1956      segment to overflow the range of a ssize_t.
1957
1958      Keeping the pointer_plus outside of the cond_expr should allow
1959      the cond_exprs to be shared with other alias checks.  */
1960   tree indicator = dr_direction_indicator (d.dr);
1961   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1962                                fold_convert (ssizetype, indicator),
1963                                ssize_int (0));
1964   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1965                                             DR_OFFSET (d.dr));
1966   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1967   tree seg_len
1968     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1969
1970   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1971                                 seg_len, size_zero_node);
1972   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1973                                 size_zero_node, seg_len);
1974   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1975                            size_int (d.access_size - align));
1976
1977   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1978   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1979 }
1980
1981 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
1982    storing the condition in *COND_EXPR.  The fallback is to generate a
1983    a test that the two accesses do not overlap:
1984
1985      end_a <= start_b || end_b <= start_a.  */
1986
1987 static void
1988 create_intersect_range_checks (class loop *loop, tree *cond_expr,
1989                                const dr_with_seg_len_pair_t &alias_pair)
1990 {
1991   const dr_with_seg_len& dr_a = alias_pair.first;
1992   const dr_with_seg_len& dr_b = alias_pair.second;
1993   *cond_expr = NULL_TREE;
1994   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
1995     return;
1996
1997   unsigned HOST_WIDE_INT min_align;
1998   tree_code cmp_code;
1999   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2000      are equivalent.  This is just an optimization heuristic.  */
2001   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2002       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2003     {
2004       /* In this case adding access_size to seg_len is likely to give
2005          a simple X * step, where X is either the number of scalar
2006          iterations or the vectorization factor.  We're better off
2007          keeping that, rather than subtracting an alignment from it.
2008
2009          In this case the maximum values are exclusive and so there is
2010          no alias if the maximum of one segment equals the minimum
2011          of another.  */
2012       min_align = 0;
2013       cmp_code = LE_EXPR;
2014     }
2015   else
2016     {
2017       /* Calculate the minimum alignment shared by all four pointers,
2018          then arrange for this alignment to be subtracted from the
2019          exclusive maximum values to get inclusive maximum values.
2020          This "- min_align" is cumulative with a "+ access_size"
2021          in the calculation of the maximum values.  In the best
2022          (and common) case, the two cancel each other out, leaving
2023          us with an inclusive bound based only on seg_len.  In the
2024          worst case we're simply adding a smaller number than before.
2025
2026          Because the maximum values are inclusive, there is an alias
2027          if the maximum value of one segment is equal to the minimum
2028          value of the other.  */
2029       min_align = MIN (dr_a.align, dr_b.align);
2030       cmp_code = LT_EXPR;
2031     }
2032
2033   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2034   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2035   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2036
2037   *cond_expr
2038     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2039         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2040         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2041   if (dump_enabled_p ())
2042     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2043 }
2044
2045 /* Create a conditional expression that represents the run-time checks for
2046    overlapping of address ranges represented by a list of data references
2047    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2048    COND_EXPR is the conditional expression to be used in the if statement
2049    that controls which version of the loop gets executed at runtime.  */
2050
2051 void
2052 create_runtime_alias_checks (class loop *loop,
2053                              vec<dr_with_seg_len_pair_t> *alias_pairs,
2054                              tree * cond_expr)
2055 {
2056   tree part_cond_expr;
2057
2058   fold_defer_overflow_warnings ();
2059   dr_with_seg_len_pair_t *alias_pair;
2060   unsigned int i;
2061   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
2062     {
2063       gcc_assert (alias_pair->flags);
2064       if (dump_enabled_p ())
2065         dump_printf (MSG_NOTE,
2066                      "create runtime check for data references %T and %T\n",
2067                      DR_REF (alias_pair->first.dr),
2068                      DR_REF (alias_pair->second.dr));
2069
2070       /* Create condition expression for each pair data references.  */
2071       create_intersect_range_checks (loop, &part_cond_expr, *alias_pair);
2072       if (*cond_expr)
2073         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2074                                   *cond_expr, part_cond_expr);
2075       else
2076         *cond_expr = part_cond_expr;
2077     }
2078   fold_undefer_and_ignore_overflow_warnings ();
2079 }
2080
2081 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2082    expressions.  */
2083 static bool
2084 dr_equal_offsets_p1 (tree offset1, tree offset2)
2085 {
2086   bool res;
2087
2088   STRIP_NOPS (offset1);
2089   STRIP_NOPS (offset2);
2090
2091   if (offset1 == offset2)
2092     return true;
2093
2094   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2095       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2096     return false;
2097
2098   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2099                              TREE_OPERAND (offset2, 0));
2100
2101   if (!res || !BINARY_CLASS_P (offset1))
2102     return res;
2103
2104   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2105                              TREE_OPERAND (offset2, 1));
2106
2107   return res;
2108 }
2109
2110 /* Check if DRA and DRB have equal offsets.  */
2111 bool
2112 dr_equal_offsets_p (struct data_reference *dra,
2113                     struct data_reference *drb)
2114 {
2115   tree offset1, offset2;
2116
2117   offset1 = DR_OFFSET (dra);
2118   offset2 = DR_OFFSET (drb);
2119
2120   return dr_equal_offsets_p1 (offset1, offset2);
2121 }
2122
2123 /* Returns true if FNA == FNB.  */
2124
2125 static bool
2126 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2127 {
2128   unsigned i, n = fna.length ();
2129
2130   if (n != fnb.length ())
2131     return false;
2132
2133   for (i = 0; i < n; i++)
2134     if (!operand_equal_p (fna[i], fnb[i], 0))
2135       return false;
2136
2137   return true;
2138 }
2139
2140 /* If all the functions in CF are the same, returns one of them,
2141    otherwise returns NULL.  */
2142
2143 static affine_fn
2144 common_affine_function (conflict_function *cf)
2145 {
2146   unsigned i;
2147   affine_fn comm;
2148
2149   if (!CF_NONTRIVIAL_P (cf))
2150     return affine_fn ();
2151
2152   comm = cf->fns[0];
2153
2154   for (i = 1; i < cf->n; i++)
2155     if (!affine_function_equal_p (comm, cf->fns[i]))
2156       return affine_fn ();
2157
2158   return comm;
2159 }
2160
2161 /* Returns the base of the affine function FN.  */
2162
2163 static tree
2164 affine_function_base (affine_fn fn)
2165 {
2166   return fn[0];
2167 }
2168
2169 /* Returns true if FN is a constant.  */
2170
2171 static bool
2172 affine_function_constant_p (affine_fn fn)
2173 {
2174   unsigned i;
2175   tree coef;
2176
2177   for (i = 1; fn.iterate (i, &coef); i++)
2178     if (!integer_zerop (coef))
2179       return false;
2180
2181   return true;
2182 }
2183
2184 /* Returns true if FN is the zero constant function.  */
2185
2186 static bool
2187 affine_function_zero_p (affine_fn fn)
2188 {
2189   return (integer_zerop (affine_function_base (fn))
2190           && affine_function_constant_p (fn));
2191 }
2192
2193 /* Returns a signed integer type with the largest precision from TA
2194    and TB.  */
2195
2196 static tree
2197 signed_type_for_types (tree ta, tree tb)
2198 {
2199   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2200     return signed_type_for (ta);
2201   else
2202     return signed_type_for (tb);
2203 }
2204
2205 /* Applies operation OP on affine functions FNA and FNB, and returns the
2206    result.  */
2207
2208 static affine_fn
2209 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2210 {
2211   unsigned i, n, m;
2212   affine_fn ret;
2213   tree coef;
2214
2215   if (fnb.length () > fna.length ())
2216     {
2217       n = fna.length ();
2218       m = fnb.length ();
2219     }
2220   else
2221     {
2222       n = fnb.length ();
2223       m = fna.length ();
2224     }
2225
2226   ret.create (m);
2227   for (i = 0; i < n; i++)
2228     {
2229       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2230                                          TREE_TYPE (fnb[i]));
2231       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2232     }
2233
2234   for (; fna.iterate (i, &coef); i++)
2235     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2236                                  coef, integer_zero_node));
2237   for (; fnb.iterate (i, &coef); i++)
2238     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2239                                  integer_zero_node, coef));
2240
2241   return ret;
2242 }
2243
2244 /* Returns the sum of affine functions FNA and FNB.  */
2245
2246 static affine_fn
2247 affine_fn_plus (affine_fn fna, affine_fn fnb)
2248 {
2249   return affine_fn_op (PLUS_EXPR, fna, fnb);
2250 }
2251
2252 /* Returns the difference of affine functions FNA and FNB.  */
2253
2254 static affine_fn
2255 affine_fn_minus (affine_fn fna, affine_fn fnb)
2256 {
2257   return affine_fn_op (MINUS_EXPR, fna, fnb);
2258 }
2259
2260 /* Frees affine function FN.  */
2261
2262 static void
2263 affine_fn_free (affine_fn fn)
2264 {
2265   fn.release ();
2266 }
2267
2268 /* Determine for each subscript in the data dependence relation DDR
2269    the distance.  */
2270
2271 static void
2272 compute_subscript_distance (struct data_dependence_relation *ddr)
2273 {
2274   conflict_function *cf_a, *cf_b;
2275   affine_fn fn_a, fn_b, diff;
2276
2277   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2278     {
2279       unsigned int i;
2280
2281       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2282         {
2283           struct subscript *subscript;
2284
2285           subscript = DDR_SUBSCRIPT (ddr, i);
2286           cf_a = SUB_CONFLICTS_IN_A (subscript);
2287           cf_b = SUB_CONFLICTS_IN_B (subscript);
2288
2289           fn_a = common_affine_function (cf_a);
2290           fn_b = common_affine_function (cf_b);
2291           if (!fn_a.exists () || !fn_b.exists ())
2292             {
2293               SUB_DISTANCE (subscript) = chrec_dont_know;
2294               return;
2295             }
2296           diff = affine_fn_minus (fn_a, fn_b);
2297
2298           if (affine_function_constant_p (diff))
2299             SUB_DISTANCE (subscript) = affine_function_base (diff);
2300           else
2301             SUB_DISTANCE (subscript) = chrec_dont_know;
2302
2303           affine_fn_free (diff);
2304         }
2305     }
2306 }
2307
2308 /* Returns the conflict function for "unknown".  */
2309
2310 static conflict_function *
2311 conflict_fn_not_known (void)
2312 {
2313   conflict_function *fn = XCNEW (conflict_function);
2314   fn->n = NOT_KNOWN;
2315
2316   return fn;
2317 }
2318
2319 /* Returns the conflict function for "independent".  */
2320
2321 static conflict_function *
2322 conflict_fn_no_dependence (void)
2323 {
2324   conflict_function *fn = XCNEW (conflict_function);
2325   fn->n = NO_DEPENDENCE;
2326
2327   return fn;
2328 }
2329
2330 /* Returns true if the address of OBJ is invariant in LOOP.  */
2331
2332 static bool
2333 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2334 {
2335   while (handled_component_p (obj))
2336     {
2337       if (TREE_CODE (obj) == ARRAY_REF)
2338         {
2339           for (int i = 1; i < 4; ++i)
2340             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2341                                                         loop->num))
2342               return false;
2343         }
2344       else if (TREE_CODE (obj) == COMPONENT_REF)
2345         {
2346           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2347                                                       loop->num))
2348             return false;
2349         }
2350       obj = TREE_OPERAND (obj, 0);
2351     }
2352
2353   if (!INDIRECT_REF_P (obj)
2354       && TREE_CODE (obj) != MEM_REF)
2355     return true;
2356
2357   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2358                                                   loop->num);
2359 }
2360
2361 /* Returns false if we can prove that data references A and B do not alias,
2362    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2363    considered.  */
2364
2365 bool
2366 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2367                 class loop *loop_nest)
2368 {
2369   tree addr_a = DR_BASE_OBJECT (a);
2370   tree addr_b = DR_BASE_OBJECT (b);
2371
2372   /* If we are not processing a loop nest but scalar code we
2373      do not need to care about possible cross-iteration dependences
2374      and thus can process the full original reference.  Do so,
2375      similar to how loop invariant motion applies extra offset-based
2376      disambiguation.  */
2377   if (!loop_nest)
2378     {
2379       aff_tree off1, off2;
2380       poly_widest_int size1, size2;
2381       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2382       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2383       aff_combination_scale (&off1, -1);
2384       aff_combination_add (&off2, &off1);
2385       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2386         return false;
2387     }
2388
2389   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2390       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2391       /* For cross-iteration dependences the cliques must be valid for the
2392          whole loop, not just individual iterations.  */
2393       && (!loop_nest
2394           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2395           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2396       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2397       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2398     return false;
2399
2400   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2401      do not know the size of the base-object.  So we cannot do any
2402      offset/overlap based analysis but have to rely on points-to
2403      information only.  */
2404   if (TREE_CODE (addr_a) == MEM_REF
2405       && (DR_UNCONSTRAINED_BASE (a)
2406           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2407     {
2408       /* For true dependences we can apply TBAA.  */
2409       if (flag_strict_aliasing
2410           && DR_IS_WRITE (a) && DR_IS_READ (b)
2411           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2412                                      get_alias_set (DR_REF (b))))
2413         return false;
2414       if (TREE_CODE (addr_b) == MEM_REF)
2415         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2416                                        TREE_OPERAND (addr_b, 0));
2417       else
2418         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2419                                        build_fold_addr_expr (addr_b));
2420     }
2421   else if (TREE_CODE (addr_b) == MEM_REF
2422            && (DR_UNCONSTRAINED_BASE (b)
2423                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2424     {
2425       /* For true dependences we can apply TBAA.  */
2426       if (flag_strict_aliasing
2427           && DR_IS_WRITE (a) && DR_IS_READ (b)
2428           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2429                                      get_alias_set (DR_REF (b))))
2430         return false;
2431       if (TREE_CODE (addr_a) == MEM_REF)
2432         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2433                                        TREE_OPERAND (addr_b, 0));
2434       else
2435         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2436                                        TREE_OPERAND (addr_b, 0));
2437     }
2438
2439   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2440      that is being subsetted in the loop nest.  */
2441   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2442     return refs_output_dependent_p (addr_a, addr_b);
2443   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2444     return refs_anti_dependent_p (addr_a, addr_b);
2445   return refs_may_alias_p (addr_a, addr_b);
2446 }
2447
2448 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2449    if it is meaningful to compare their associated access functions
2450    when checking for dependencies.  */
2451
2452 static bool
2453 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2454 {
2455   /* Allow pairs of component refs from the following sets:
2456
2457        { REALPART_EXPR, IMAGPART_EXPR }
2458        { COMPONENT_REF }
2459        { ARRAY_REF }.  */
2460   tree_code code_a = TREE_CODE (ref_a);
2461   tree_code code_b = TREE_CODE (ref_b);
2462   if (code_a == IMAGPART_EXPR)
2463     code_a = REALPART_EXPR;
2464   if (code_b == IMAGPART_EXPR)
2465     code_b = REALPART_EXPR;
2466   if (code_a != code_b)
2467     return false;
2468
2469   if (TREE_CODE (ref_a) == COMPONENT_REF)
2470     /* ??? We cannot simply use the type of operand #0 of the refs here as
2471        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2472        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2473     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2474             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2475
2476   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2477                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2478 }
2479
2480 /* Initialize a data dependence relation between data accesses A and
2481    B.  NB_LOOPS is the number of loops surrounding the references: the
2482    size of the classic distance/direction vectors.  */
2483
2484 struct data_dependence_relation *
2485 initialize_data_dependence_relation (struct data_reference *a,
2486                                      struct data_reference *b,
2487                                      vec<loop_p> loop_nest)
2488 {
2489   struct data_dependence_relation *res;
2490   unsigned int i;
2491
2492   res = XCNEW (struct data_dependence_relation);
2493   DDR_A (res) = a;
2494   DDR_B (res) = b;
2495   DDR_LOOP_NEST (res).create (0);
2496   DDR_SUBSCRIPTS (res).create (0);
2497   DDR_DIR_VECTS (res).create (0);
2498   DDR_DIST_VECTS (res).create (0);
2499
2500   if (a == NULL || b == NULL)
2501     {
2502       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2503       return res;
2504     }
2505
2506   /* If the data references do not alias, then they are independent.  */
2507   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
2508     {
2509       DDR_ARE_DEPENDENT (res) = chrec_known;
2510       return res;
2511     }
2512
2513   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2514   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2515   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2516     {
2517       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2518       return res;
2519     }
2520
2521   /* For unconstrained bases, the root (highest-indexed) subscript
2522      describes a variation in the base of the original DR_REF rather
2523      than a component access.  We have no type that accurately describes
2524      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2525      applying this subscript) so limit the search to the last real
2526      component access.
2527
2528      E.g. for:
2529
2530         void
2531         f (int a[][8], int b[][8])
2532         {
2533           for (int i = 0; i < 8; ++i)
2534             a[i * 2][0] = b[i][0];
2535         }
2536
2537      the a and b accesses have a single ARRAY_REF component reference [0]
2538      but have two subscripts.  */
2539   if (DR_UNCONSTRAINED_BASE (a))
2540     num_dimensions_a -= 1;
2541   if (DR_UNCONSTRAINED_BASE (b))
2542     num_dimensions_b -= 1;
2543
2544   /* These structures describe sequences of component references in
2545      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2546      specific access function.  */
2547   struct {
2548     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2549        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2550        indices.  In C notation, these are the indices of the rightmost
2551        component references; e.g. for a sequence .b.c.d, the start
2552        index is for .d.  */
2553     unsigned int start_a;
2554     unsigned int start_b;
2555
2556     /* The sequence contains LENGTH consecutive access functions from
2557        each DR.  */
2558     unsigned int length;
2559
2560     /* The enclosing objects for the A and B sequences respectively,
2561        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2562        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2563     tree object_a;
2564     tree object_b;
2565   } full_seq = {}, struct_seq = {};
2566
2567   /* Before each iteration of the loop:
2568
2569      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2570      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2571   unsigned int index_a = 0;
2572   unsigned int index_b = 0;
2573   tree ref_a = DR_REF (a);
2574   tree ref_b = DR_REF (b);
2575
2576   /* Now walk the component references from the final DR_REFs back up to
2577      the enclosing base objects.  Each component reference corresponds
2578      to one access function in the DR, with access function 0 being for
2579      the final DR_REF and the highest-indexed access function being the
2580      one that is applied to the base of the DR.
2581
2582      Look for a sequence of component references whose access functions
2583      are comparable (see access_fn_components_comparable_p).  If more
2584      than one such sequence exists, pick the one nearest the base
2585      (which is the leftmost sequence in C notation).  Store this sequence
2586      in FULL_SEQ.
2587
2588      For example, if we have:
2589
2590         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2591
2592         A: a[0][i].s.c.d
2593         B: __real b[0][i].s.e[i].f
2594
2595      (where d is the same type as the real component of f) then the access
2596      functions would be:
2597
2598                          0   1   2   3
2599         A:              .d  .c  .s [i]
2600
2601                  0   1   2   3   4   5
2602         B:  __real  .f [i]  .e  .s [i]
2603
2604      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2605      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2606      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2607      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2608      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2609      index foo[10] arrays, so is again comparable.  The sequence is
2610      therefore:
2611
2612         A: [1, 3]  (i.e. [i].s.c)
2613         B: [3, 5]  (i.e. [i].s.e)
2614
2615      Also look for sequences of component references whose access
2616      functions are comparable and whose enclosing objects have the same
2617      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2618      example, STRUCT_SEQ would be:
2619
2620         A: [1, 2]  (i.e. s.c)
2621         B: [3, 4]  (i.e. s.e)  */
2622   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2623     {
2624       /* REF_A and REF_B must be one of the component access types
2625          allowed by dr_analyze_indices.  */
2626       gcc_checking_assert (access_fn_component_p (ref_a));
2627       gcc_checking_assert (access_fn_component_p (ref_b));
2628
2629       /* Get the immediately-enclosing objects for REF_A and REF_B,
2630          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2631          and DR_ACCESS_FN (B, INDEX_B).  */
2632       tree object_a = TREE_OPERAND (ref_a, 0);
2633       tree object_b = TREE_OPERAND (ref_b, 0);
2634
2635       tree type_a = TREE_TYPE (object_a);
2636       tree type_b = TREE_TYPE (object_b);
2637       if (access_fn_components_comparable_p (ref_a, ref_b))
2638         {
2639           /* This pair of component accesses is comparable for dependence
2640              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2641              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2642           if (full_seq.start_a + full_seq.length != index_a
2643               || full_seq.start_b + full_seq.length != index_b)
2644             {
2645               /* The accesses don't extend the current sequence,
2646                  so start a new one here.  */
2647               full_seq.start_a = index_a;
2648               full_seq.start_b = index_b;
2649               full_seq.length = 0;
2650             }
2651
2652           /* Add this pair of references to the sequence.  */
2653           full_seq.length += 1;
2654           full_seq.object_a = object_a;
2655           full_seq.object_b = object_b;
2656
2657           /* If the enclosing objects are structures (and thus have the
2658              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2659           if (TREE_CODE (type_a) == RECORD_TYPE)
2660             struct_seq = full_seq;
2661
2662           /* Move to the next containing reference for both A and B.  */
2663           ref_a = object_a;
2664           ref_b = object_b;
2665           index_a += 1;
2666           index_b += 1;
2667           continue;
2668         }
2669
2670       /* Try to approach equal type sizes.  */
2671       if (!COMPLETE_TYPE_P (type_a)
2672           || !COMPLETE_TYPE_P (type_b)
2673           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2674           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2675         break;
2676
2677       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2678       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2679       if (size_a <= size_b)
2680         {
2681           index_a += 1;
2682           ref_a = object_a;
2683         }
2684       if (size_b <= size_a)
2685         {
2686           index_b += 1;
2687           ref_b = object_b;
2688         }
2689     }
2690
2691   /* See whether FULL_SEQ ends at the base and whether the two bases
2692      are equal.  We do not care about TBAA or alignment info so we can
2693      use OEP_ADDRESS_OF to avoid false negatives.  */
2694   tree base_a = DR_BASE_OBJECT (a);
2695   tree base_b = DR_BASE_OBJECT (b);
2696   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2697                       && full_seq.start_b + full_seq.length == num_dimensions_b
2698                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2699                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2700                       && types_compatible_p (TREE_TYPE (base_a),
2701                                              TREE_TYPE (base_b))
2702                       && (!loop_nest.exists ()
2703                           || (object_address_invariant_in_loop_p
2704                               (loop_nest[0], base_a))));
2705
2706   /* If the bases are the same, we can include the base variation too.
2707      E.g. the b accesses in:
2708
2709        for (int i = 0; i < n; ++i)
2710          b[i + 4][0] = b[i][0];
2711
2712      have a definite dependence distance of 4, while for:
2713
2714        for (int i = 0; i < n; ++i)
2715          a[i + 4][0] = b[i][0];
2716
2717      the dependence distance depends on the gap between a and b.
2718
2719      If the bases are different then we can only rely on the sequence
2720      rooted at a structure access, since arrays are allowed to overlap
2721      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2722      valid code:
2723
2724        int a[256];
2725        ...
2726        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2727
2728      where two lvalues with the same int[4][3] type overlap, and where
2729      both lvalues are distinct from the object's declared type.  */
2730   if (same_base_p)
2731     {
2732       if (DR_UNCONSTRAINED_BASE (a))
2733         full_seq.length += 1;
2734     }
2735   else
2736     full_seq = struct_seq;
2737
2738   /* Punt if we didn't find a suitable sequence.  */
2739   if (full_seq.length == 0)
2740     {
2741       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2742       return res;
2743     }
2744
2745   if (!same_base_p)
2746     {
2747       /* Partial overlap is possible for different bases when strict aliasing
2748          is not in effect.  It's also possible if either base involves a union
2749          access; e.g. for:
2750
2751            struct s1 { int a[2]; };
2752            struct s2 { struct s1 b; int c; };
2753            struct s3 { int d; struct s1 e; };
2754            union u { struct s2 f; struct s3 g; } *p, *q;
2755
2756          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2757          "p->g.e" (base "p->g") and might partially overlap the s1 at
2758          "q->g.e" (base "q->g").  */
2759       if (!flag_strict_aliasing
2760           || ref_contains_union_access_p (full_seq.object_a)
2761           || ref_contains_union_access_p (full_seq.object_b))
2762         {
2763           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2764           return res;
2765         }
2766
2767       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2768       if (!loop_nest.exists ()
2769           || (object_address_invariant_in_loop_p (loop_nest[0],
2770                                                   full_seq.object_a)
2771               && object_address_invariant_in_loop_p (loop_nest[0],
2772                                                      full_seq.object_b)))
2773         {
2774           DDR_OBJECT_A (res) = full_seq.object_a;
2775           DDR_OBJECT_B (res) = full_seq.object_b;
2776         }
2777     }
2778
2779   DDR_AFFINE_P (res) = true;
2780   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2781   DDR_SUBSCRIPTS (res).create (full_seq.length);
2782   DDR_LOOP_NEST (res) = loop_nest;
2783   DDR_SELF_REFERENCE (res) = false;
2784
2785   for (i = 0; i < full_seq.length; ++i)
2786     {
2787       struct subscript *subscript;
2788
2789       subscript = XNEW (struct subscript);
2790       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2791       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2792       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2793       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2794       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2795       SUB_DISTANCE (subscript) = chrec_dont_know;
2796       DDR_SUBSCRIPTS (res).safe_push (subscript);
2797     }
2798
2799   return res;
2800 }
2801
2802 /* Frees memory used by the conflict function F.  */
2803
2804 static void
2805 free_conflict_function (conflict_function *f)
2806 {
2807   unsigned i;
2808
2809   if (CF_NONTRIVIAL_P (f))
2810     {
2811       for (i = 0; i < f->n; i++)
2812         affine_fn_free (f->fns[i]);
2813     }
2814   free (f);
2815 }
2816
2817 /* Frees memory used by SUBSCRIPTS.  */
2818
2819 static void
2820 free_subscripts (vec<subscript_p> subscripts)
2821 {
2822   unsigned i;
2823   subscript_p s;
2824
2825   FOR_EACH_VEC_ELT (subscripts, i, s)
2826     {
2827       free_conflict_function (s->conflicting_iterations_in_a);
2828       free_conflict_function (s->conflicting_iterations_in_b);
2829       free (s);
2830     }
2831   subscripts.release ();
2832 }
2833
2834 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2835    description.  */
2836
2837 static inline void
2838 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2839                         tree chrec)
2840 {
2841   DDR_ARE_DEPENDENT (ddr) = chrec;
2842   free_subscripts (DDR_SUBSCRIPTS (ddr));
2843   DDR_SUBSCRIPTS (ddr).create (0);
2844 }
2845
2846 /* The dependence relation DDR cannot be represented by a distance
2847    vector.  */
2848
2849 static inline void
2850 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2851 {
2852   if (dump_file && (dump_flags & TDF_DETAILS))
2853     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2854
2855   DDR_AFFINE_P (ddr) = false;
2856 }
2857
2858 \f
2859
2860 /* This section contains the classic Banerjee tests.  */
2861
2862 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2863    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2864
2865 static inline bool
2866 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2867 {
2868   return (evolution_function_is_constant_p (chrec_a)
2869           && evolution_function_is_constant_p (chrec_b));
2870 }
2871
2872 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2873    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2874
2875 static bool
2876 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2877 {
2878   if ((evolution_function_is_constant_p (chrec_a)
2879        && evolution_function_is_univariate_p (chrec_b))
2880       || (evolution_function_is_constant_p (chrec_b)
2881           && evolution_function_is_univariate_p (chrec_a)))
2882     return true;
2883
2884   if (evolution_function_is_univariate_p (chrec_a)
2885       && evolution_function_is_univariate_p (chrec_b))
2886     {
2887       switch (TREE_CODE (chrec_a))
2888         {
2889         case POLYNOMIAL_CHREC:
2890           switch (TREE_CODE (chrec_b))
2891             {
2892             case POLYNOMIAL_CHREC:
2893               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2894                 return false;
2895               /* FALLTHRU */
2896
2897             default:
2898               return true;
2899             }
2900
2901         default:
2902           return true;
2903         }
2904     }
2905
2906   return false;
2907 }
2908
2909 /* Creates a conflict function with N dimensions.  The affine functions
2910    in each dimension follow.  */
2911
2912 static conflict_function *
2913 conflict_fn (unsigned n, ...)
2914 {
2915   unsigned i;
2916   conflict_function *ret = XCNEW (conflict_function);
2917   va_list ap;
2918
2919   gcc_assert (n > 0 && n <= MAX_DIM);
2920   va_start (ap, n);
2921
2922   ret->n = n;
2923   for (i = 0; i < n; i++)
2924     ret->fns[i] = va_arg (ap, affine_fn);
2925   va_end (ap);
2926
2927   return ret;
2928 }
2929
2930 /* Returns constant affine function with value CST.  */
2931
2932 static affine_fn
2933 affine_fn_cst (tree cst)
2934 {
2935   affine_fn fn;
2936   fn.create (1);
2937   fn.quick_push (cst);
2938   return fn;
2939 }
2940
2941 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2942
2943 static affine_fn
2944 affine_fn_univar (tree cst, unsigned dim, tree coef)
2945 {
2946   affine_fn fn;
2947   fn.create (dim + 1);
2948   unsigned i;
2949
2950   gcc_assert (dim > 0);
2951   fn.quick_push (cst);
2952   for (i = 1; i < dim; i++)
2953     fn.quick_push (integer_zero_node);
2954   fn.quick_push (coef);
2955   return fn;
2956 }
2957
2958 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2959    *OVERLAPS_B are initialized to the functions that describe the
2960    relation between the elements accessed twice by CHREC_A and
2961    CHREC_B.  For k >= 0, the following property is verified:
2962
2963    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2964
2965 static void
2966 analyze_ziv_subscript (tree chrec_a,
2967                        tree chrec_b,
2968                        conflict_function **overlaps_a,
2969                        conflict_function **overlaps_b,
2970                        tree *last_conflicts)
2971 {
2972   tree type, difference;
2973   dependence_stats.num_ziv++;
2974
2975   if (dump_file && (dump_flags & TDF_DETAILS))
2976     fprintf (dump_file, "(analyze_ziv_subscript \n");
2977
2978   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2979   chrec_a = chrec_convert (type, chrec_a, NULL);
2980   chrec_b = chrec_convert (type, chrec_b, NULL);
2981   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2982
2983   switch (TREE_CODE (difference))
2984     {
2985     case INTEGER_CST:
2986       if (integer_zerop (difference))
2987         {
2988           /* The difference is equal to zero: the accessed index
2989              overlaps for each iteration in the loop.  */
2990           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2991           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2992           *last_conflicts = chrec_dont_know;
2993           dependence_stats.num_ziv_dependent++;
2994         }
2995       else
2996         {
2997           /* The accesses do not overlap.  */
2998           *overlaps_a = conflict_fn_no_dependence ();
2999           *overlaps_b = conflict_fn_no_dependence ();
3000           *last_conflicts = integer_zero_node;
3001           dependence_stats.num_ziv_independent++;
3002         }
3003       break;
3004
3005     default:
3006       /* We're not sure whether the indexes overlap.  For the moment,
3007          conservatively answer "don't know".  */
3008       if (dump_file && (dump_flags & TDF_DETAILS))
3009         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3010
3011       *overlaps_a = conflict_fn_not_known ();
3012       *overlaps_b = conflict_fn_not_known ();
3013       *last_conflicts = chrec_dont_know;
3014       dependence_stats.num_ziv_unimplemented++;
3015       break;
3016     }
3017
3018   if (dump_file && (dump_flags & TDF_DETAILS))
3019     fprintf (dump_file, ")\n");
3020 }
3021
3022 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3023    and only if it fits to the int type.  If this is not the case, or the
3024    bound  on the number of iterations of LOOP could not be derived, returns
3025    chrec_dont_know.  */
3026
3027 static tree
3028 max_stmt_executions_tree (class loop *loop)
3029 {
3030   widest_int nit;
3031
3032   if (!max_stmt_executions (loop, &nit))
3033     return chrec_dont_know;
3034
3035   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3036     return chrec_dont_know;
3037
3038   return wide_int_to_tree (unsigned_type_node, nit);
3039 }
3040
3041 /* Determine whether the CHREC is always positive/negative.  If the expression
3042    cannot be statically analyzed, return false, otherwise set the answer into
3043    VALUE.  */
3044
3045 static bool
3046 chrec_is_positive (tree chrec, bool *value)
3047 {
3048   bool value0, value1, value2;
3049   tree end_value, nb_iter;
3050
3051   switch (TREE_CODE (chrec))
3052     {
3053     case POLYNOMIAL_CHREC:
3054       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3055           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3056         return false;
3057
3058       /* FIXME -- overflows.  */
3059       if (value0 == value1)
3060         {
3061           *value = value0;
3062           return true;
3063         }
3064
3065       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3066          and the proof consists in showing that the sign never
3067          changes during the execution of the loop, from 0 to
3068          loop->nb_iterations.  */
3069       if (!evolution_function_is_affine_p (chrec))
3070         return false;
3071
3072       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3073       if (chrec_contains_undetermined (nb_iter))
3074         return false;
3075
3076 #if 0
3077       /* TODO -- If the test is after the exit, we may decrease the number of
3078          iterations by one.  */
3079       if (after_exit)
3080         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3081 #endif
3082
3083       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3084
3085       if (!chrec_is_positive (end_value, &value2))
3086         return false;
3087
3088       *value = value0;
3089       return value0 == value1;
3090
3091     case INTEGER_CST:
3092       switch (tree_int_cst_sgn (chrec))
3093         {
3094         case -1:
3095           *value = false;
3096           break;
3097         case 1:
3098           *value = true;
3099           break;
3100         default:
3101           return false;
3102         }
3103       return true;
3104
3105     default:
3106       return false;
3107     }
3108 }
3109
3110
3111 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3112    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3113    *OVERLAPS_B are initialized to the functions that describe the
3114    relation between the elements accessed twice by CHREC_A and
3115    CHREC_B.  For k >= 0, the following property is verified:
3116
3117    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3118
3119 static void
3120 analyze_siv_subscript_cst_affine (tree chrec_a,
3121                                   tree chrec_b,
3122                                   conflict_function **overlaps_a,
3123                                   conflict_function **overlaps_b,
3124                                   tree *last_conflicts)
3125 {
3126   bool value0, value1, value2;
3127   tree type, difference, tmp;
3128
3129   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3130   chrec_a = chrec_convert (type, chrec_a, NULL);
3131   chrec_b = chrec_convert (type, chrec_b, NULL);
3132   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3133
3134   /* Special case overlap in the first iteration.  */
3135   if (integer_zerop (difference))
3136     {
3137       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3138       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3139       *last_conflicts = integer_one_node;
3140       return;
3141     }
3142
3143   if (!chrec_is_positive (initial_condition (difference), &value0))
3144     {
3145       if (dump_file && (dump_flags & TDF_DETAILS))
3146         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3147
3148       dependence_stats.num_siv_unimplemented++;
3149       *overlaps_a = conflict_fn_not_known ();
3150       *overlaps_b = conflict_fn_not_known ();
3151       *last_conflicts = chrec_dont_know;
3152       return;
3153     }
3154   else
3155     {
3156       if (value0 == false)
3157         {
3158           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3159               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3160             {
3161               if (dump_file && (dump_flags & TDF_DETAILS))
3162                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3163
3164               *overlaps_a = conflict_fn_not_known ();
3165               *overlaps_b = conflict_fn_not_known ();
3166               *last_conflicts = chrec_dont_know;
3167               dependence_stats.num_siv_unimplemented++;
3168               return;
3169             }
3170           else
3171             {
3172               if (value1 == true)
3173                 {
3174                   /* Example:
3175                      chrec_a = 12
3176                      chrec_b = {10, +, 1}
3177                   */
3178
3179                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3180                     {
3181                       HOST_WIDE_INT numiter;
3182                       class loop *loop = get_chrec_loop (chrec_b);
3183
3184                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3185                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3186                                          fold_build1 (ABS_EXPR, type, difference),
3187                                          CHREC_RIGHT (chrec_b));
3188                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3189                       *last_conflicts = integer_one_node;
3190
3191
3192                       /* Perform weak-zero siv test to see if overlap is
3193                          outside the loop bounds.  */
3194                       numiter = max_stmt_executions_int (loop);
3195
3196                       if (numiter >= 0
3197                           && compare_tree_int (tmp, numiter) > 0)
3198                         {
3199                           free_conflict_function (*overlaps_a);
3200                           free_conflict_function (*overlaps_b);
3201                           *overlaps_a = conflict_fn_no_dependence ();
3202                           *overlaps_b = conflict_fn_no_dependence ();
3203                           *last_conflicts = integer_zero_node;
3204                           dependence_stats.num_siv_independent++;
3205                           return;
3206                         }
3207                       dependence_stats.num_siv_dependent++;
3208                       return;
3209                     }
3210
3211                   /* When the step does not divide the difference, there are
3212                      no overlaps.  */
3213                   else
3214                     {
3215                       *overlaps_a = conflict_fn_no_dependence ();
3216                       *overlaps_b = conflict_fn_no_dependence ();
3217                       *last_conflicts = integer_zero_node;
3218                       dependence_stats.num_siv_independent++;
3219                       return;
3220                     }
3221                 }
3222
3223               else
3224                 {
3225                   /* Example:
3226                      chrec_a = 12
3227                      chrec_b = {10, +, -1}
3228
3229                      In this case, chrec_a will not overlap with chrec_b.  */
3230                   *overlaps_a = conflict_fn_no_dependence ();
3231                   *overlaps_b = conflict_fn_no_dependence ();
3232                   *last_conflicts = integer_zero_node;
3233                   dependence_stats.num_siv_independent++;
3234                   return;
3235                 }
3236             }
3237         }
3238       else
3239         {
3240           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3241               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3242             {
3243               if (dump_file && (dump_flags & TDF_DETAILS))
3244                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3245
3246               *overlaps_a = conflict_fn_not_known ();
3247               *overlaps_b = conflict_fn_not_known ();
3248               *last_conflicts = chrec_dont_know;
3249               dependence_stats.num_siv_unimplemented++;
3250               return;
3251             }
3252           else
3253             {
3254               if (value2 == false)
3255                 {
3256                   /* Example:
3257                      chrec_a = 3
3258                      chrec_b = {10, +, -1}
3259                   */
3260                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3261                     {
3262                       HOST_WIDE_INT numiter;
3263                       class loop *loop = get_chrec_loop (chrec_b);
3264
3265                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3266                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3267                                          CHREC_RIGHT (chrec_b));
3268                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3269                       *last_conflicts = integer_one_node;
3270
3271                       /* Perform weak-zero siv test to see if overlap is
3272                          outside the loop bounds.  */
3273                       numiter = max_stmt_executions_int (loop);
3274
3275                       if (numiter >= 0
3276                           && compare_tree_int (tmp, numiter) > 0)
3277                         {
3278                           free_conflict_function (*overlaps_a);
3279                           free_conflict_function (*overlaps_b);
3280                           *overlaps_a = conflict_fn_no_dependence ();
3281                           *overlaps_b = conflict_fn_no_dependence ();
3282                           *last_conflicts = integer_zero_node;
3283                           dependence_stats.num_siv_independent++;
3284                           return;
3285                         }
3286                       dependence_stats.num_siv_dependent++;
3287                       return;
3288                     }
3289
3290                   /* When the step does not divide the difference, there
3291                      are no overlaps.  */
3292                   else
3293                     {
3294                       *overlaps_a = conflict_fn_no_dependence ();
3295                       *overlaps_b = conflict_fn_no_dependence ();
3296                       *last_conflicts = integer_zero_node;
3297                       dependence_stats.num_siv_independent++;
3298                       return;
3299                     }
3300                 }
3301               else
3302                 {
3303                   /* Example:
3304                      chrec_a = 3
3305                      chrec_b = {4, +, 1}
3306
3307                      In this case, chrec_a will not overlap with chrec_b.  */
3308                   *overlaps_a = conflict_fn_no_dependence ();
3309                   *overlaps_b = conflict_fn_no_dependence ();
3310                   *last_conflicts = integer_zero_node;
3311                   dependence_stats.num_siv_independent++;
3312                   return;
3313                 }
3314             }
3315         }
3316     }
3317 }
3318
3319 /* Helper recursive function for initializing the matrix A.  Returns
3320    the initial value of CHREC.  */
3321
3322 static tree
3323 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3324 {
3325   gcc_assert (chrec);
3326
3327   switch (TREE_CODE (chrec))
3328     {
3329     case POLYNOMIAL_CHREC:
3330       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3331         return chrec_dont_know;
3332       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3333       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3334
3335     case PLUS_EXPR:
3336     case MULT_EXPR:
3337     case MINUS_EXPR:
3338       {
3339         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3340         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3341
3342         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3343       }
3344
3345     CASE_CONVERT:
3346       {
3347         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3348         return chrec_convert (chrec_type (chrec), op, NULL);
3349       }
3350
3351     case BIT_NOT_EXPR:
3352       {
3353         /* Handle ~X as -1 - X.  */
3354         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3355         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3356                               build_int_cst (TREE_TYPE (chrec), -1), op);
3357       }
3358
3359     case INTEGER_CST:
3360       return chrec;
3361
3362     default:
3363       gcc_unreachable ();
3364       return NULL_TREE;
3365     }
3366 }
3367
3368 #define FLOOR_DIV(x,y) ((x) / (y))
3369
3370 /* Solves the special case of the Diophantine equation:
3371    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3372
3373    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3374    number of iterations that loops X and Y run.  The overlaps will be
3375    constructed as evolutions in dimension DIM.  */
3376
3377 static void
3378 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3379                                          HOST_WIDE_INT step_a,
3380                                          HOST_WIDE_INT step_b,
3381                                          affine_fn *overlaps_a,
3382                                          affine_fn *overlaps_b,
3383                                          tree *last_conflicts, int dim)
3384 {
3385   if (((step_a > 0 && step_b > 0)
3386        || (step_a < 0 && step_b < 0)))
3387     {
3388       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3389       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3390
3391       gcd_steps_a_b = gcd (step_a, step_b);
3392       step_overlaps_a = step_b / gcd_steps_a_b;
3393       step_overlaps_b = step_a / gcd_steps_a_b;
3394
3395       if (niter > 0)
3396         {
3397           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3398           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3399           last_conflict = tau2;
3400           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3401         }
3402       else
3403         *last_conflicts = chrec_dont_know;
3404
3405       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3406                                       build_int_cst (NULL_TREE,
3407                                                      step_overlaps_a));
3408       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3409                                       build_int_cst (NULL_TREE,
3410                                                      step_overlaps_b));
3411     }
3412
3413   else
3414     {
3415       *overlaps_a = affine_fn_cst (integer_zero_node);
3416       *overlaps_b = affine_fn_cst (integer_zero_node);
3417       *last_conflicts = integer_zero_node;
3418     }
3419 }
3420
3421 /* Solves the special case of a Diophantine equation where CHREC_A is
3422    an affine bivariate function, and CHREC_B is an affine univariate
3423    function.  For example,
3424
3425    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3426
3427    has the following overlapping functions:
3428
3429    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3430    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3431    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3432
3433    FORNOW: This is a specialized implementation for a case occurring in
3434    a common benchmark.  Implement the general algorithm.  */
3435
3436 static void
3437 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3438                                       conflict_function **overlaps_a,
3439                                       conflict_function **overlaps_b,
3440                                       tree *last_conflicts)
3441 {
3442   bool xz_p, yz_p, xyz_p;
3443   HOST_WIDE_INT step_x, step_y, step_z;
3444   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3445   affine_fn overlaps_a_xz, overlaps_b_xz;
3446   affine_fn overlaps_a_yz, overlaps_b_yz;
3447   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3448   affine_fn ova1, ova2, ovb;
3449   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3450
3451   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3452   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3453   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3454
3455   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3456   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3457   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3458
3459   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3460     {
3461       if (dump_file && (dump_flags & TDF_DETAILS))
3462         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3463
3464       *overlaps_a = conflict_fn_not_known ();
3465       *overlaps_b = conflict_fn_not_known ();
3466       *last_conflicts = chrec_dont_know;
3467       return;
3468     }
3469
3470   niter = MIN (niter_x, niter_z);
3471   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3472                                            &overlaps_a_xz,
3473                                            &overlaps_b_xz,
3474                                            &last_conflicts_xz, 1);
3475   niter = MIN (niter_y, niter_z);
3476   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3477                                            &overlaps_a_yz,
3478                                            &overlaps_b_yz,
3479                                            &last_conflicts_yz, 2);
3480   niter = MIN (niter_x, niter_z);
3481   niter = MIN (niter_y, niter);
3482   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3483                                            &overlaps_a_xyz,
3484                                            &overlaps_b_xyz,
3485                                            &last_conflicts_xyz, 3);
3486
3487   xz_p = !integer_zerop (last_conflicts_xz);
3488   yz_p = !integer_zerop (last_conflicts_yz);
3489   xyz_p = !integer_zerop (last_conflicts_xyz);
3490
3491   if (xz_p || yz_p || xyz_p)
3492     {
3493       ova1 = affine_fn_cst (integer_zero_node);
3494       ova2 = affine_fn_cst (integer_zero_node);
3495       ovb = affine_fn_cst (integer_zero_node);
3496       if (xz_p)
3497         {
3498           affine_fn t0 = ova1;
3499           affine_fn t2 = ovb;
3500
3501           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3502           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3503           affine_fn_free (t0);
3504           affine_fn_free (t2);
3505           *last_conflicts = last_conflicts_xz;
3506         }
3507       if (yz_p)
3508         {
3509           affine_fn t0 = ova2;
3510           affine_fn t2 = ovb;
3511
3512           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3513           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3514           affine_fn_free (t0);
3515           affine_fn_free (t2);
3516           *last_conflicts = last_conflicts_yz;
3517         }
3518       if (xyz_p)
3519         {
3520           affine_fn t0 = ova1;
3521           affine_fn t2 = ova2;
3522           affine_fn t4 = ovb;
3523
3524           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3525           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3526           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3527           affine_fn_free (t0);
3528           affine_fn_free (t2);
3529           affine_fn_free (t4);
3530           *last_conflicts = last_conflicts_xyz;
3531         }
3532       *overlaps_a = conflict_fn (2, ova1, ova2);
3533       *overlaps_b = conflict_fn (1, ovb);
3534     }
3535   else
3536     {
3537       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3538       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3539       *last_conflicts = integer_zero_node;
3540     }
3541
3542   affine_fn_free (overlaps_a_xz);
3543   affine_fn_free (overlaps_b_xz);
3544   affine_fn_free (overlaps_a_yz);
3545   affine_fn_free (overlaps_b_yz);
3546   affine_fn_free (overlaps_a_xyz);
3547   affine_fn_free (overlaps_b_xyz);
3548 }
3549
3550 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3551
3552 static void
3553 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3554                     int size)
3555 {
3556   memcpy (vec2, vec1, size * sizeof (*vec1));
3557 }
3558
3559 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3560
3561 static void
3562 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3563                     int m, int n)
3564 {
3565   int i;
3566
3567   for (i = 0; i < m; i++)
3568     lambda_vector_copy (mat1[i], mat2[i], n);
3569 }
3570
3571 /* Store the N x N identity matrix in MAT.  */
3572
3573 static void
3574 lambda_matrix_id (lambda_matrix mat, int size)
3575 {
3576   int i, j;
3577
3578   for (i = 0; i < size; i++)
3579     for (j = 0; j < size; j++)
3580       mat[i][j] = (i == j) ? 1 : 0;
3581 }
3582
3583 /* Return the index of the first nonzero element of vector VEC1 between
3584    START and N.  We must have START <= N.
3585    Returns N if VEC1 is the zero vector.  */
3586
3587 static int
3588 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3589 {
3590   int j = start;
3591   while (j < n && vec1[j] == 0)
3592     j++;
3593   return j;
3594 }
3595
3596 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3597    R2 = R2 + CONST1 * R1.  */
3598
3599 static void
3600 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3601                        lambda_int const1)
3602 {
3603   int i;
3604
3605   if (const1 == 0)
3606     return;
3607
3608   for (i = 0; i < n; i++)
3609     mat[r2][i] += const1 * mat[r1][i];
3610 }
3611
3612 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3613    and store the result in VEC2.  */
3614
3615 static void
3616 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3617                           int size, lambda_int const1)
3618 {
3619   int i;
3620
3621   if (const1 == 0)
3622     lambda_vector_clear (vec2, size);
3623   else
3624     for (i = 0; i < size; i++)
3625       vec2[i] = const1 * vec1[i];
3626 }
3627
3628 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3629
3630 static void
3631 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3632                       int size)
3633 {
3634   lambda_vector_mult_const (vec1, vec2, size, -1);
3635 }
3636
3637 /* Negate row R1 of matrix MAT which has N columns.  */
3638
3639 static void
3640 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3641 {
3642   lambda_vector_negate (mat[r1], mat[r1], n);
3643 }
3644
3645 /* Return true if two vectors are equal.  */
3646
3647 static bool
3648 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3649 {
3650   int i;
3651   for (i = 0; i < size; i++)
3652     if (vec1[i] != vec2[i])
3653       return false;
3654   return true;
3655 }
3656
3657 /* Given an M x N integer matrix A, this function determines an M x
3658    M unimodular matrix U, and an M x N echelon matrix S such that
3659    "U.A = S".  This decomposition is also known as "right Hermite".
3660
3661    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3662    Restructuring Compilers" Utpal Banerjee.  */
3663
3664 static void
3665 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3666                              lambda_matrix S, lambda_matrix U)
3667 {
3668   int i, j, i0 = 0;
3669
3670   lambda_matrix_copy (A, S, m, n);
3671   lambda_matrix_id (U, m);
3672
3673   for (j = 0; j < n; j++)
3674     {
3675       if (lambda_vector_first_nz (S[j], m, i0) < m)
3676         {
3677           ++i0;
3678           for (i = m - 1; i >= i0; i--)
3679             {
3680               while (S[i][j] != 0)
3681                 {
3682                   lambda_int sigma, factor, a, b;
3683
3684                   a = S[i-1][j];
3685                   b = S[i][j];
3686                   sigma = (a * b < 0) ? -1: 1;
3687                   a = abs_hwi (a);
3688                   b = abs_hwi (b);
3689                   factor = sigma * (a / b);
3690
3691                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3692                   std::swap (S[i], S[i-1]);
3693
3694                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3695                   std::swap (U[i], U[i-1]);
3696                 }
3697             }
3698         }
3699     }
3700 }
3701
3702 /* Determines the overlapping elements due to accesses CHREC_A and
3703    CHREC_B, that are affine functions.  This function cannot handle
3704    symbolic evolution functions, ie. when initial conditions are
3705    parameters, because it uses lambda matrices of integers.  */
3706
3707 static void
3708 analyze_subscript_affine_affine (tree chrec_a,
3709                                  tree chrec_b,
3710                                  conflict_function **overlaps_a,
3711                                  conflict_function **overlaps_b,
3712                                  tree *last_conflicts)
3713 {
3714   unsigned nb_vars_a, nb_vars_b, dim;
3715   HOST_WIDE_INT gamma, gcd_alpha_beta;
3716   lambda_matrix A, U, S;
3717   struct obstack scratch_obstack;
3718
3719   if (eq_evolutions_p (chrec_a, chrec_b))
3720     {
3721       /* The accessed index overlaps for each iteration in the
3722          loop.  */
3723       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3724       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3725       *last_conflicts = chrec_dont_know;
3726       return;
3727     }
3728   if (dump_file && (dump_flags & TDF_DETAILS))
3729     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3730
3731   /* For determining the initial intersection, we have to solve a
3732      Diophantine equation.  This is the most time consuming part.
3733
3734      For answering to the question: "Is there a dependence?" we have
3735      to prove that there exists a solution to the Diophantine
3736      equation, and that the solution is in the iteration domain,
3737      i.e. the solution is positive or zero, and that the solution
3738      happens before the upper bound loop.nb_iterations.  Otherwise
3739      there is no dependence.  This function outputs a description of
3740      the iterations that hold the intersections.  */
3741
3742   nb_vars_a = nb_vars_in_chrec (chrec_a);
3743   nb_vars_b = nb_vars_in_chrec (chrec_b);
3744
3745   gcc_obstack_init (&scratch_obstack);
3746
3747   dim = nb_vars_a + nb_vars_b;
3748   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3749   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3750   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3751
3752   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
3753   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
3754   if (init_a == chrec_dont_know
3755       || init_b == chrec_dont_know)
3756     {
3757       if (dump_file && (dump_flags & TDF_DETAILS))
3758         fprintf (dump_file, "affine-affine test failed: "
3759                  "representation issue.\n");
3760       *overlaps_a = conflict_fn_not_known ();
3761       *overlaps_b = conflict_fn_not_known ();
3762       *last_conflicts = chrec_dont_know;
3763       goto end_analyze_subs_aa;
3764     }
3765   gamma = int_cst_value (init_b) - int_cst_value (init_a);
3766
3767   /* Don't do all the hard work of solving the Diophantine equation
3768      when we already know the solution: for example,
3769      | {3, +, 1}_1
3770      | {3, +, 4}_2
3771      | gamma = 3 - 3 = 0.
3772      Then the first overlap occurs during the first iterations:
3773      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3774   */
3775   if (gamma == 0)
3776     {
3777       if (nb_vars_a == 1 && nb_vars_b == 1)
3778         {
3779           HOST_WIDE_INT step_a, step_b;
3780           HOST_WIDE_INT niter, niter_a, niter_b;
3781           affine_fn ova, ovb;
3782
3783           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3784           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3785           niter = MIN (niter_a, niter_b);
3786           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3787           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3788
3789           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3790                                                    &ova, &ovb,
3791                                                    last_conflicts, 1);
3792           *overlaps_a = conflict_fn (1, ova);
3793           *overlaps_b = conflict_fn (1, ovb);
3794         }
3795
3796       else if (nb_vars_a == 2 && nb_vars_b == 1)
3797         compute_overlap_steps_for_affine_1_2
3798           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3799
3800       else if (nb_vars_a == 1 && nb_vars_b == 2)
3801         compute_overlap_steps_for_affine_1_2
3802           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3803
3804       else
3805         {
3806           if (dump_file && (dump_flags & TDF_DETAILS))
3807             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3808           *overlaps_a = conflict_fn_not_known ();
3809           *overlaps_b = conflict_fn_not_known ();
3810           *last_conflicts = chrec_dont_know;
3811         }
3812       goto end_analyze_subs_aa;
3813     }
3814
3815   /* U.A = S */
3816   lambda_matrix_right_hermite (A, dim, 1, S, U);
3817
3818   if (S[0][0] < 0)
3819     {
3820       S[0][0] *= -1;
3821       lambda_matrix_row_negate (U, dim, 0);
3822     }
3823   gcd_alpha_beta = S[0][0];
3824
3825   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3826      but that is a quite strange case.  Instead of ICEing, answer
3827      don't know.  */
3828   if (gcd_alpha_beta == 0)
3829     {
3830       *overlaps_a = conflict_fn_not_known ();
3831       *overlaps_b = conflict_fn_not_known ();
3832       *last_conflicts = chrec_dont_know;
3833       goto end_analyze_subs_aa;
3834     }
3835
3836   /* The classic "gcd-test".  */
3837   if (!int_divides_p (gcd_alpha_beta, gamma))
3838     {
3839       /* The "gcd-test" has determined that there is no integer
3840          solution, i.e. there is no dependence.  */
3841       *overlaps_a = conflict_fn_no_dependence ();
3842       *overlaps_b = conflict_fn_no_dependence ();
3843       *last_conflicts = integer_zero_node;
3844     }
3845
3846   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3847   else if (nb_vars_a == 1 && nb_vars_b == 1)
3848     {
3849       /* Both functions should have the same evolution sign.  */
3850       if (((A[0][0] > 0 && -A[1][0] > 0)
3851            || (A[0][0] < 0 && -A[1][0] < 0)))
3852         {
3853           /* The solutions are given by:
3854              |
3855              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3856              |                           [u21 u22]    [y0]
3857
3858              For a given integer t.  Using the following variables,
3859
3860              | i0 = u11 * gamma / gcd_alpha_beta
3861              | j0 = u12 * gamma / gcd_alpha_beta
3862              | i1 = u21
3863              | j1 = u22
3864
3865              the solutions are:
3866
3867              | x0 = i0 + i1 * t,
3868              | y0 = j0 + j1 * t.  */
3869           HOST_WIDE_INT i0, j0, i1, j1;
3870
3871           i0 = U[0][0] * gamma / gcd_alpha_beta;
3872           j0 = U[0][1] * gamma / gcd_alpha_beta;
3873           i1 = U[1][0];
3874           j1 = U[1][1];
3875
3876           if ((i1 == 0 && i0 < 0)
3877               || (j1 == 0 && j0 < 0))
3878             {
3879               /* There is no solution.
3880                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3881                  falls in here, but for the moment we don't look at the
3882                  upper bound of the iteration domain.  */
3883               *overlaps_a = conflict_fn_no_dependence ();
3884               *overlaps_b = conflict_fn_no_dependence ();
3885               *last_conflicts = integer_zero_node;
3886               goto end_analyze_subs_aa;
3887             }
3888
3889           if (i1 > 0 && j1 > 0)
3890             {
3891               HOST_WIDE_INT niter_a
3892                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3893               HOST_WIDE_INT niter_b
3894                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3895               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3896
3897               /* (X0, Y0) is a solution of the Diophantine equation:
3898                  "chrec_a (X0) = chrec_b (Y0)".  */
3899               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3900                                         CEIL (-j0, j1));
3901               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3902               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3903
3904               /* (X1, Y1) is the smallest positive solution of the eq
3905                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3906                  first conflict occurs.  */
3907               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3908               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3909               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3910
3911               if (niter > 0)
3912                 {
3913                   /* If the overlap occurs outside of the bounds of the
3914                      loop, there is no dependence.  */
3915                   if (x1 >= niter_a || y1 >= niter_b)
3916                     {
3917                       *overlaps_a = conflict_fn_no_dependence ();
3918                       *overlaps_b = conflict_fn_no_dependence ();
3919                       *last_conflicts = integer_zero_node;
3920                       goto end_analyze_subs_aa;
3921                     }
3922
3923                   /* max stmt executions can get quite large, avoid
3924                      overflows by using wide ints here.  */
3925                   widest_int tau2
3926                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
3927                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
3928                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
3929                   if (wi::min_precision (last_conflict, SIGNED)
3930                       <= TYPE_PRECISION (integer_type_node))
3931                     *last_conflicts
3932                        = build_int_cst (integer_type_node,
3933                                         last_conflict.to_shwi ());
3934                   else
3935                     *last_conflicts = chrec_dont_know;
3936                 }
3937               else
3938                 *last_conflicts = chrec_dont_know;
3939
3940               *overlaps_a
3941                 = conflict_fn (1,
3942                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3943                                                  1,
3944                                                  build_int_cst (NULL_TREE, i1)));
3945               *overlaps_b
3946                 = conflict_fn (1,
3947                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3948                                                  1,
3949                                                  build_int_cst (NULL_TREE, j1)));
3950             }
3951           else
3952             {
3953               /* FIXME: For the moment, the upper bound of the
3954                  iteration domain for i and j is not checked.  */
3955               if (dump_file && (dump_flags & TDF_DETAILS))
3956                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3957               *overlaps_a = conflict_fn_not_known ();
3958               *overlaps_b = conflict_fn_not_known ();
3959               *last_conflicts = chrec_dont_know;
3960             }
3961         }
3962       else
3963         {
3964           if (dump_file && (dump_flags & TDF_DETAILS))
3965             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3966           *overlaps_a = conflict_fn_not_known ();
3967           *overlaps_b = conflict_fn_not_known ();
3968           *last_conflicts = chrec_dont_know;
3969         }
3970     }
3971   else
3972     {
3973       if (dump_file && (dump_flags & TDF_DETAILS))
3974         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3975       *overlaps_a = conflict_fn_not_known ();
3976       *overlaps_b = conflict_fn_not_known ();
3977       *last_conflicts = chrec_dont_know;
3978     }
3979
3980 end_analyze_subs_aa:
3981   obstack_free (&scratch_obstack, NULL);
3982   if (dump_file && (dump_flags & TDF_DETAILS))
3983     {
3984       fprintf (dump_file, "  (overlaps_a = ");
3985       dump_conflict_function (dump_file, *overlaps_a);
3986       fprintf (dump_file, ")\n  (overlaps_b = ");
3987       dump_conflict_function (dump_file, *overlaps_b);
3988       fprintf (dump_file, "))\n");
3989     }
3990 }
3991
3992 /* Returns true when analyze_subscript_affine_affine can be used for
3993    determining the dependence relation between chrec_a and chrec_b,
3994    that contain symbols.  This function modifies chrec_a and chrec_b
3995    such that the analysis result is the same, and such that they don't
3996    contain symbols, and then can safely be passed to the analyzer.
3997
3998    Example: The analysis of the following tuples of evolutions produce
3999    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4000    vs. {0, +, 1}_1
4001
4002    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4003    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4004 */
4005
4006 static bool
4007 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4008 {
4009   tree diff, type, left_a, left_b, right_b;
4010
4011   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4012       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4013     /* FIXME: For the moment not handled.  Might be refined later.  */
4014     return false;
4015
4016   type = chrec_type (*chrec_a);
4017   left_a = CHREC_LEFT (*chrec_a);
4018   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4019   diff = chrec_fold_minus (type, left_a, left_b);
4020
4021   if (!evolution_function_is_constant_p (diff))
4022     return false;
4023
4024   if (dump_file && (dump_flags & TDF_DETAILS))
4025     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4026
4027   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4028                                      diff, CHREC_RIGHT (*chrec_a));
4029   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4030   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4031                                      build_int_cst (type, 0),
4032                                      right_b);
4033   return true;
4034 }
4035
4036 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4037    *OVERLAPS_B are initialized to the functions that describe the
4038    relation between the elements accessed twice by CHREC_A and
4039    CHREC_B.  For k >= 0, the following property is verified:
4040
4041    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4042
4043 static void
4044 analyze_siv_subscript (tree chrec_a,
4045                        tree chrec_b,
4046                        conflict_function **overlaps_a,
4047                        conflict_function **overlaps_b,
4048                        tree *last_conflicts,
4049                        int loop_nest_num)
4050 {
4051   dependence_stats.num_siv++;
4052
4053   if (dump_file && (dump_flags & TDF_DETAILS))
4054     fprintf (dump_file, "(analyze_siv_subscript \n");
4055
4056   if (evolution_function_is_constant_p (chrec_a)
4057       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4058     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4059                                       overlaps_a, overlaps_b, last_conflicts);
4060
4061   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4062            && evolution_function_is_constant_p (chrec_b))
4063     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4064                                       overlaps_b, overlaps_a, last_conflicts);
4065
4066   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4067            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4068     {
4069       if (!chrec_contains_symbols (chrec_a)
4070           && !chrec_contains_symbols (chrec_b))
4071         {
4072           analyze_subscript_affine_affine (chrec_a, chrec_b,
4073                                            overlaps_a, overlaps_b,
4074                                            last_conflicts);
4075
4076           if (CF_NOT_KNOWN_P (*overlaps_a)
4077               || CF_NOT_KNOWN_P (*overlaps_b))
4078             dependence_stats.num_siv_unimplemented++;
4079           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4080                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4081             dependence_stats.num_siv_independent++;
4082           else
4083             dependence_stats.num_siv_dependent++;
4084         }
4085       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4086                                                         &chrec_b))
4087         {
4088           analyze_subscript_affine_affine (chrec_a, chrec_b,
4089                                            overlaps_a, overlaps_b,
4090                                            last_conflicts);
4091
4092           if (CF_NOT_KNOWN_P (*overlaps_a)
4093               || CF_NOT_KNOWN_P (*overlaps_b))
4094             dependence_stats.num_siv_unimplemented++;
4095           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4096                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4097             dependence_stats.num_siv_independent++;
4098           else
4099             dependence_stats.num_siv_dependent++;
4100         }
4101       else
4102         goto siv_subscript_dontknow;
4103     }
4104
4105   else
4106     {
4107     siv_subscript_dontknow:;
4108       if (dump_file && (dump_flags & TDF_DETAILS))
4109         fprintf (dump_file, "  siv test failed: unimplemented");
4110       *overlaps_a = conflict_fn_not_known ();
4111       *overlaps_b = conflict_fn_not_known ();
4112       *last_conflicts = chrec_dont_know;
4113       dependence_stats.num_siv_unimplemented++;
4114     }
4115
4116   if (dump_file && (dump_flags & TDF_DETAILS))
4117     fprintf (dump_file, ")\n");
4118 }
4119
4120 /* Returns false if we can prove that the greatest common divisor of the steps
4121    of CHREC does not divide CST, false otherwise.  */
4122
4123 static bool
4124 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4125 {
4126   HOST_WIDE_INT cd = 0, val;
4127   tree step;
4128
4129   if (!tree_fits_shwi_p (cst))
4130     return true;
4131   val = tree_to_shwi (cst);
4132
4133   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4134     {
4135       step = CHREC_RIGHT (chrec);
4136       if (!tree_fits_shwi_p (step))
4137         return true;
4138       cd = gcd (cd, tree_to_shwi (step));
4139       chrec = CHREC_LEFT (chrec);
4140     }
4141
4142   return val % cd == 0;
4143 }
4144
4145 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4146    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4147    functions that describe the relation between the elements accessed
4148    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4149    is verified:
4150
4151    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4152
4153 static void
4154 analyze_miv_subscript (tree chrec_a,
4155                        tree chrec_b,
4156                        conflict_function **overlaps_a,
4157                        conflict_function **overlaps_b,
4158                        tree *last_conflicts,
4159                        class loop *loop_nest)
4160 {
4161   tree type, difference;
4162
4163   dependence_stats.num_miv++;
4164   if (dump_file && (dump_flags & TDF_DETAILS))
4165     fprintf (dump_file, "(analyze_miv_subscript \n");
4166
4167   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4168   chrec_a = chrec_convert (type, chrec_a, NULL);
4169   chrec_b = chrec_convert (type, chrec_b, NULL);
4170   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4171
4172   if (eq_evolutions_p (chrec_a, chrec_b))
4173     {
4174       /* Access functions are the same: all the elements are accessed
4175          in the same order.  */
4176       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4177       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4178       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4179       dependence_stats.num_miv_dependent++;
4180     }
4181
4182   else if (evolution_function_is_constant_p (difference)
4183            && evolution_function_is_affine_multivariate_p (chrec_a,
4184                                                            loop_nest->num)
4185            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4186     {
4187       /* testsuite/.../ssa-chrec-33.c
4188          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4189
4190          The difference is 1, and all the evolution steps are multiples
4191          of 2, consequently there are no overlapping elements.  */
4192       *overlaps_a = conflict_fn_no_dependence ();
4193       *overlaps_b = conflict_fn_no_dependence ();
4194       *last_conflicts = integer_zero_node;
4195       dependence_stats.num_miv_independent++;
4196     }
4197
4198   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4199            && !chrec_contains_symbols (chrec_a, loop_nest)
4200            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4201            && !chrec_contains_symbols (chrec_b, loop_nest))
4202     {
4203       /* testsuite/.../ssa-chrec-35.c
4204          {0, +, 1}_2  vs.  {0, +, 1}_3
4205          the overlapping elements are respectively located at iterations:
4206          {0, +, 1}_x and {0, +, 1}_x,
4207          in other words, we have the equality:
4208          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4209
4210          Other examples:
4211          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4212          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4213
4214          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4215          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4216       */
4217       analyze_subscript_affine_affine (chrec_a, chrec_b,
4218                                        overlaps_a, overlaps_b, last_conflicts);
4219
4220       if (CF_NOT_KNOWN_P (*overlaps_a)
4221           || CF_NOT_KNOWN_P (*overlaps_b))
4222         dependence_stats.num_miv_unimplemented++;
4223       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4224                || CF_NO_DEPENDENCE_P (*overlaps_b))
4225         dependence_stats.num_miv_independent++;
4226       else
4227         dependence_stats.num_miv_dependent++;
4228     }
4229
4230   else
4231     {
4232       /* When the analysis is too difficult, answer "don't know".  */
4233       if (dump_file && (dump_flags & TDF_DETAILS))
4234         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4235
4236       *overlaps_a = conflict_fn_not_known ();
4237       *overlaps_b = conflict_fn_not_known ();
4238       *last_conflicts = chrec_dont_know;
4239       dependence_stats.num_miv_unimplemented++;
4240     }
4241
4242   if (dump_file && (dump_flags & TDF_DETAILS))
4243     fprintf (dump_file, ")\n");
4244 }
4245
4246 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4247    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4248    OVERLAP_ITERATIONS_B are initialized with two functions that
4249    describe the iterations that contain conflicting elements.
4250
4251    Remark: For an integer k >= 0, the following equality is true:
4252
4253    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4254 */
4255
4256 static void
4257 analyze_overlapping_iterations (tree chrec_a,
4258                                 tree chrec_b,
4259                                 conflict_function **overlap_iterations_a,
4260                                 conflict_function **overlap_iterations_b,
4261                                 tree *last_conflicts, class loop *loop_nest)
4262 {
4263   unsigned int lnn = loop_nest->num;
4264
4265   dependence_stats.num_subscript_tests++;
4266
4267   if (dump_file && (dump_flags & TDF_DETAILS))
4268     {
4269       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4270       fprintf (dump_file, "  (chrec_a = ");
4271       print_generic_expr (dump_file, chrec_a);
4272       fprintf (dump_file, ")\n  (chrec_b = ");
4273       print_generic_expr (dump_file, chrec_b);
4274       fprintf (dump_file, ")\n");
4275     }
4276
4277   if (chrec_a == NULL_TREE
4278       || chrec_b == NULL_TREE
4279       || chrec_contains_undetermined (chrec_a)
4280       || chrec_contains_undetermined (chrec_b))
4281     {
4282       dependence_stats.num_subscript_undetermined++;
4283
4284       *overlap_iterations_a = conflict_fn_not_known ();
4285       *overlap_iterations_b = conflict_fn_not_known ();
4286     }
4287
4288   /* If they are the same chrec, and are affine, they overlap
4289      on every iteration.  */
4290   else if (eq_evolutions_p (chrec_a, chrec_b)
4291            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4292                || operand_equal_p (chrec_a, chrec_b, 0)))
4293     {
4294       dependence_stats.num_same_subscript_function++;
4295       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4296       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4297       *last_conflicts = chrec_dont_know;
4298     }
4299
4300   /* If they aren't the same, and aren't affine, we can't do anything
4301      yet.  */
4302   else if ((chrec_contains_symbols (chrec_a)
4303             || chrec_contains_symbols (chrec_b))
4304            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4305                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4306     {
4307       dependence_stats.num_subscript_undetermined++;
4308       *overlap_iterations_a = conflict_fn_not_known ();
4309       *overlap_iterations_b = conflict_fn_not_known ();
4310     }
4311
4312   else if (ziv_subscript_p (chrec_a, chrec_b))
4313     analyze_ziv_subscript (chrec_a, chrec_b,
4314                            overlap_iterations_a, overlap_iterations_b,
4315                            last_conflicts);
4316
4317   else if (siv_subscript_p (chrec_a, chrec_b))
4318     analyze_siv_subscript (chrec_a, chrec_b,
4319                            overlap_iterations_a, overlap_iterations_b,
4320                            last_conflicts, lnn);
4321
4322   else
4323     analyze_miv_subscript (chrec_a, chrec_b,
4324                            overlap_iterations_a, overlap_iterations_b,
4325                            last_conflicts, loop_nest);
4326
4327   if (dump_file && (dump_flags & TDF_DETAILS))
4328     {
4329       fprintf (dump_file, "  (overlap_iterations_a = ");
4330       dump_conflict_function (dump_file, *overlap_iterations_a);
4331       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4332       dump_conflict_function (dump_file, *overlap_iterations_b);
4333       fprintf (dump_file, "))\n");
4334     }
4335 }
4336
4337 /* Helper function for uniquely inserting distance vectors.  */
4338
4339 static void
4340 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4341 {
4342   unsigned i;
4343   lambda_vector v;
4344
4345   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4346     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4347       return;
4348
4349   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4350 }
4351
4352 /* Helper function for uniquely inserting direction vectors.  */
4353
4354 static void
4355 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4356 {
4357   unsigned i;
4358   lambda_vector v;
4359
4360   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4361     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4362       return;
4363
4364   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4365 }
4366
4367 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4368    haven't yet determined a distance for this outer loop, push a new
4369    distance vector composed of the previous distance, and a distance
4370    of 1 for this outer loop.  Example:
4371
4372    | loop_1
4373    |   loop_2
4374    |     A[10]
4375    |   endloop_2
4376    | endloop_1
4377
4378    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4379    save (0, 1), then we have to save (1, 0).  */
4380
4381 static void
4382 add_outer_distances (struct data_dependence_relation *ddr,
4383                      lambda_vector dist_v, int index)
4384 {
4385   /* For each outer loop where init_v is not set, the accesses are
4386      in dependence of distance 1 in the loop.  */
4387   while (--index >= 0)
4388     {
4389       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4390       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4391       save_v[index] = 1;
4392       save_dist_v (ddr, save_v);
4393     }
4394 }
4395
4396 /* Return false when fail to represent the data dependence as a
4397    distance vector.  A_INDEX is the index of the first reference
4398    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4399    second reference.  INIT_B is set to true when a component has been
4400    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4401    the index in DIST_V that carries the dependence.  */
4402
4403 static bool
4404 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4405                              unsigned int a_index, unsigned int b_index,
4406                              lambda_vector dist_v, bool *init_b,
4407                              int *index_carry)
4408 {
4409   unsigned i;
4410   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4411   class loop *loop = DDR_LOOP_NEST (ddr)[0];
4412
4413   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4414     {
4415       tree access_fn_a, access_fn_b;
4416       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4417
4418       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4419         {
4420           non_affine_dependence_relation (ddr);
4421           return false;
4422         }
4423
4424       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4425       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4426
4427       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4428           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4429         {
4430           HOST_WIDE_INT dist;
4431           int index;
4432           int var_a = CHREC_VARIABLE (access_fn_a);
4433           int var_b = CHREC_VARIABLE (access_fn_b);
4434
4435           if (var_a != var_b
4436               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4437             {
4438               non_affine_dependence_relation (ddr);
4439               return false;
4440             }
4441
4442           /* When data references are collected in a loop while data
4443              dependences are analyzed in loop nest nested in the loop, we
4444              would have more number of access functions than number of
4445              loops.  Skip access functions of loops not in the loop nest.
4446
4447              See PR89725 for more information.  */
4448           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
4449             continue;
4450
4451           dist = int_cst_value (SUB_DISTANCE (subscript));
4452           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4453           *index_carry = MIN (index, *index_carry);
4454
4455           /* This is the subscript coupling test.  If we have already
4456              recorded a distance for this loop (a distance coming from
4457              another subscript), it should be the same.  For example,
4458              in the following code, there is no dependence:
4459
4460              | loop i = 0, N, 1
4461              |   T[i+1][i] = ...
4462              |   ... = T[i][i]
4463              | endloop
4464           */
4465           if (init_v[index] != 0 && dist_v[index] != dist)
4466             {
4467               finalize_ddr_dependent (ddr, chrec_known);
4468               return false;
4469             }
4470
4471           dist_v[index] = dist;
4472           init_v[index] = 1;
4473           *init_b = true;
4474         }
4475       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4476         {
4477           /* This can be for example an affine vs. constant dependence
4478              (T[i] vs. T[3]) that is not an affine dependence and is
4479              not representable as a distance vector.  */
4480           non_affine_dependence_relation (ddr);
4481           return false;
4482         }
4483     }
4484
4485   return true;
4486 }
4487
4488 /* Return true when the DDR contains only constant access functions.  */
4489
4490 static bool
4491 constant_access_functions (const struct data_dependence_relation *ddr)
4492 {
4493   unsigned i;
4494   subscript *sub;
4495
4496   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4497     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4498         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4499       return false;
4500
4501   return true;
4502 }
4503
4504 /* Helper function for the case where DDR_A and DDR_B are the same
4505    multivariate access function with a constant step.  For an example
4506    see pr34635-1.c.  */
4507
4508 static void
4509 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4510 {
4511   int x_1, x_2;
4512   tree c_1 = CHREC_LEFT (c_2);
4513   tree c_0 = CHREC_LEFT (c_1);
4514   lambda_vector dist_v;
4515   HOST_WIDE_INT v1, v2, cd;
4516
4517   /* Polynomials with more than 2 variables are not handled yet.  When
4518      the evolution steps are parameters, it is not possible to
4519      represent the dependence using classical distance vectors.  */
4520   if (TREE_CODE (c_0) != INTEGER_CST
4521       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4522       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4523     {
4524       DDR_AFFINE_P (ddr) = false;
4525       return;
4526     }
4527
4528   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4529   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4530
4531   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4532   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4533   v1 = int_cst_value (CHREC_RIGHT (c_1));
4534   v2 = int_cst_value (CHREC_RIGHT (c_2));
4535   cd = gcd (v1, v2);
4536   v1 /= cd;
4537   v2 /= cd;
4538
4539   if (v2 < 0)
4540     {
4541       v2 = -v2;
4542       v1 = -v1;
4543     }
4544
4545   dist_v[x_1] = v2;
4546   dist_v[x_2] = -v1;
4547   save_dist_v (ddr, dist_v);
4548
4549   add_outer_distances (ddr, dist_v, x_1);
4550 }
4551
4552 /* Helper function for the case where DDR_A and DDR_B are the same
4553    access functions.  */
4554
4555 static void
4556 add_other_self_distances (struct data_dependence_relation *ddr)
4557 {
4558   lambda_vector dist_v;
4559   unsigned i;
4560   int index_carry = DDR_NB_LOOPS (ddr);
4561   subscript *sub;
4562   class loop *loop = DDR_LOOP_NEST (ddr)[0];
4563
4564   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4565     {
4566       tree access_fun = SUB_ACCESS_FN (sub, 0);
4567
4568       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4569         {
4570           if (!evolution_function_is_univariate_p (access_fun, loop->num))
4571             {
4572               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4573                 {
4574                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4575                   return;
4576                 }
4577
4578               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4579
4580               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4581                 add_multivariate_self_dist (ddr, access_fun);
4582               else
4583                 /* The evolution step is not constant: it varies in
4584                    the outer loop, so this cannot be represented by a
4585                    distance vector.  For example in pr34635.c the
4586                    evolution is {0, +, {0, +, 4}_1}_2.  */
4587                 DDR_AFFINE_P (ddr) = false;
4588
4589               return;
4590             }
4591
4592           /* When data references are collected in a loop while data
4593              dependences are analyzed in loop nest nested in the loop, we
4594              would have more number of access functions than number of
4595              loops.  Skip access functions of loops not in the loop nest.
4596
4597              See PR89725 for more information.  */
4598           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
4599                                   loop))
4600             continue;
4601
4602           index_carry = MIN (index_carry,
4603                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4604                                                  DDR_LOOP_NEST (ddr)));
4605         }
4606     }
4607
4608   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4609   add_outer_distances (ddr, dist_v, index_carry);
4610 }
4611
4612 static void
4613 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4614 {
4615   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4616
4617   dist_v[0] = 1;
4618   save_dist_v (ddr, dist_v);
4619 }
4620
4621 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4622    is the case for example when access functions are the same and
4623    equal to a constant, as in:
4624
4625    | loop_1
4626    |   A[3] = ...
4627    |   ... = A[3]
4628    | endloop_1
4629
4630    in which case the distance vectors are (0) and (1).  */
4631
4632 static void
4633 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4634 {
4635   unsigned i, j;
4636
4637   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4638     {
4639       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4640       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4641       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4642
4643       for (j = 0; j < ca->n; j++)
4644         if (affine_function_zero_p (ca->fns[j]))
4645           {
4646             insert_innermost_unit_dist_vector (ddr);
4647             return;
4648           }
4649
4650       for (j = 0; j < cb->n; j++)
4651         if (affine_function_zero_p (cb->fns[j]))
4652           {
4653             insert_innermost_unit_dist_vector (ddr);
4654             return;
4655           }
4656     }
4657 }
4658
4659 /* Return true when the DDR contains two data references that have the
4660    same access functions.  */
4661
4662 static inline bool
4663 same_access_functions (const struct data_dependence_relation *ddr)
4664 {
4665   unsigned i;
4666   subscript *sub;
4667
4668   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4669     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4670                           SUB_ACCESS_FN (sub, 1)))
4671       return false;
4672
4673   return true;
4674 }
4675
4676 /* Compute the classic per loop distance vector.  DDR is the data
4677    dependence relation to build a vector from.  Return false when fail
4678    to represent the data dependence as a distance vector.  */
4679
4680 static bool
4681 build_classic_dist_vector (struct data_dependence_relation *ddr,
4682                            class loop *loop_nest)
4683 {
4684   bool init_b = false;
4685   int index_carry = DDR_NB_LOOPS (ddr);
4686   lambda_vector dist_v;
4687
4688   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4689     return false;
4690
4691   if (same_access_functions (ddr))
4692     {
4693       /* Save the 0 vector.  */
4694       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4695       save_dist_v (ddr, dist_v);
4696
4697       if (constant_access_functions (ddr))
4698         add_distance_for_zero_overlaps (ddr);
4699
4700       if (DDR_NB_LOOPS (ddr) > 1)
4701         add_other_self_distances (ddr);
4702
4703       return true;
4704     }
4705
4706   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4707   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4708     return false;
4709
4710   /* Save the distance vector if we initialized one.  */
4711   if (init_b)
4712     {
4713       /* Verify a basic constraint: classic distance vectors should
4714          always be lexicographically positive.
4715
4716          Data references are collected in the order of execution of
4717          the program, thus for the following loop
4718
4719          | for (i = 1; i < 100; i++)
4720          |   for (j = 1; j < 100; j++)
4721          |     {
4722          |       t = T[j+1][i-1];  // A
4723          |       T[j][i] = t + 2;  // B
4724          |     }
4725
4726          references are collected following the direction of the wind:
4727          A then B.  The data dependence tests are performed also
4728          following this order, such that we're looking at the distance
4729          separating the elements accessed by A from the elements later
4730          accessed by B.  But in this example, the distance returned by
4731          test_dep (A, B) is lexicographically negative (-1, 1), that
4732          means that the access A occurs later than B with respect to
4733          the outer loop, ie. we're actually looking upwind.  In this
4734          case we solve test_dep (B, A) looking downwind to the
4735          lexicographically positive solution, that returns the
4736          distance vector (1, -1).  */
4737       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4738         {
4739           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4740           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4741             return false;
4742           compute_subscript_distance (ddr);
4743           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4744                                             &index_carry))
4745             return false;
4746           save_dist_v (ddr, save_v);
4747           DDR_REVERSED_P (ddr) = true;
4748
4749           /* In this case there is a dependence forward for all the
4750              outer loops:
4751
4752              | for (k = 1; k < 100; k++)
4753              |  for (i = 1; i < 100; i++)
4754              |   for (j = 1; j < 100; j++)
4755              |     {
4756              |       t = T[j+1][i-1];  // A
4757              |       T[j][i] = t + 2;  // B
4758              |     }
4759
4760              the vectors are:
4761              (0,  1, -1)
4762              (1,  1, -1)
4763              (1, -1,  1)
4764           */
4765           if (DDR_NB_LOOPS (ddr) > 1)
4766             {
4767               add_outer_distances (ddr, save_v, index_carry);
4768               add_outer_distances (ddr, dist_v, index_carry);
4769             }
4770         }
4771       else
4772         {
4773           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4774           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4775
4776           if (DDR_NB_LOOPS (ddr) > 1)
4777             {
4778               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4779
4780               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4781                 return false;
4782               compute_subscript_distance (ddr);
4783               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4784                                                 &index_carry))
4785                 return false;
4786
4787               save_dist_v (ddr, save_v);
4788               add_outer_distances (ddr, dist_v, index_carry);
4789               add_outer_distances (ddr, opposite_v, index_carry);
4790             }
4791           else
4792             save_dist_v (ddr, save_v);
4793         }
4794     }
4795   else
4796     {
4797       /* There is a distance of 1 on all the outer loops: Example:
4798          there is a dependence of distance 1 on loop_1 for the array A.
4799
4800          | loop_1
4801          |   A[5] = ...
4802          | endloop
4803       */
4804       add_outer_distances (ddr, dist_v,
4805                            lambda_vector_first_nz (dist_v,
4806                                                    DDR_NB_LOOPS (ddr), 0));
4807     }
4808
4809   if (dump_file && (dump_flags & TDF_DETAILS))
4810     {
4811       unsigned i;
4812
4813       fprintf (dump_file, "(build_classic_dist_vector\n");
4814       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4815         {
4816           fprintf (dump_file, "  dist_vector = (");
4817           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4818                                DDR_NB_LOOPS (ddr));
4819           fprintf (dump_file, "  )\n");
4820         }
4821       fprintf (dump_file, ")\n");
4822     }
4823
4824   return true;
4825 }
4826
4827 /* Return the direction for a given distance.
4828    FIXME: Computing dir this way is suboptimal, since dir can catch
4829    cases that dist is unable to represent.  */
4830
4831 static inline enum data_dependence_direction
4832 dir_from_dist (int dist)
4833 {
4834   if (dist > 0)
4835     return dir_positive;
4836   else if (dist < 0)
4837     return dir_negative;
4838   else
4839     return dir_equal;
4840 }
4841
4842 /* Compute the classic per loop direction vector.  DDR is the data
4843    dependence relation to build a vector from.  */
4844
4845 static void
4846 build_classic_dir_vector (struct data_dependence_relation *ddr)
4847 {
4848   unsigned i, j;
4849   lambda_vector dist_v;
4850
4851   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4852     {
4853       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4854
4855       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4856         dir_v[j] = dir_from_dist (dist_v[j]);
4857
4858       save_dir_v (ddr, dir_v);
4859     }
4860 }
4861
4862 /* Helper function.  Returns true when there is a dependence between the
4863    data references.  A_INDEX is the index of the first reference (0 for
4864    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4865
4866 static bool
4867 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4868                                unsigned int a_index, unsigned int b_index,
4869                                class loop *loop_nest)
4870 {
4871   unsigned int i;
4872   tree last_conflicts;
4873   struct subscript *subscript;
4874   tree res = NULL_TREE;
4875
4876   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4877     {
4878       conflict_function *overlaps_a, *overlaps_b;
4879
4880       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4881                                       SUB_ACCESS_FN (subscript, b_index),
4882                                       &overlaps_a, &overlaps_b,
4883                                       &last_conflicts, loop_nest);
4884
4885       if (SUB_CONFLICTS_IN_A (subscript))
4886         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4887       if (SUB_CONFLICTS_IN_B (subscript))
4888         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4889
4890       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4891       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4892       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4893
4894       /* If there is any undetermined conflict function we have to
4895          give a conservative answer in case we cannot prove that
4896          no dependence exists when analyzing another subscript.  */
4897       if (CF_NOT_KNOWN_P (overlaps_a)
4898           || CF_NOT_KNOWN_P (overlaps_b))
4899         {
4900           res = chrec_dont_know;
4901           continue;
4902         }
4903
4904       /* When there is a subscript with no dependence we can stop.  */
4905       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4906                || CF_NO_DEPENDENCE_P (overlaps_b))
4907         {
4908           res = chrec_known;
4909           break;
4910         }
4911     }
4912
4913   if (res == NULL_TREE)
4914     return true;
4915
4916   if (res == chrec_known)
4917     dependence_stats.num_dependence_independent++;
4918   else
4919     dependence_stats.num_dependence_undetermined++;
4920   finalize_ddr_dependent (ddr, res);
4921   return false;
4922 }
4923
4924 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4925
4926 static void
4927 subscript_dependence_tester (struct data_dependence_relation *ddr,
4928                              class loop *loop_nest)
4929 {
4930   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4931     dependence_stats.num_dependence_dependent++;
4932
4933   compute_subscript_distance (ddr);
4934   if (build_classic_dist_vector (ddr, loop_nest))
4935     build_classic_dir_vector (ddr);
4936 }
4937
4938 /* Returns true when all the access functions of A are affine or
4939    constant with respect to LOOP_NEST.  */
4940
4941 static bool
4942 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4943                                            const class loop *loop_nest)
4944 {
4945   unsigned int i;
4946   vec<tree> fns = DR_ACCESS_FNS (a);
4947   tree t;
4948
4949   FOR_EACH_VEC_ELT (fns, i, t)
4950     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4951         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4952       return false;
4953
4954   return true;
4955 }
4956
4957 /* This computes the affine dependence relation between A and B with
4958    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4959    independence between two accesses, while CHREC_DONT_KNOW is used
4960    for representing the unknown relation.
4961
4962    Note that it is possible to stop the computation of the dependence
4963    relation the first time we detect a CHREC_KNOWN element for a given
4964    subscript.  */
4965
4966 void
4967 compute_affine_dependence (struct data_dependence_relation *ddr,
4968                            class loop *loop_nest)
4969 {
4970   struct data_reference *dra = DDR_A (ddr);
4971   struct data_reference *drb = DDR_B (ddr);
4972
4973   if (dump_file && (dump_flags & TDF_DETAILS))
4974     {
4975       fprintf (dump_file, "(compute_affine_dependence\n");
4976       fprintf (dump_file, "  stmt_a: ");
4977       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4978       fprintf (dump_file, "  stmt_b: ");
4979       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4980     }
4981
4982   /* Analyze only when the dependence relation is not yet known.  */
4983   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4984     {
4985       dependence_stats.num_dependence_tests++;
4986
4987       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4988           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4989         subscript_dependence_tester (ddr, loop_nest);
4990
4991       /* As a last case, if the dependence cannot be determined, or if
4992          the dependence is considered too difficult to determine, answer
4993          "don't know".  */
4994       else
4995         {
4996           dependence_stats.num_dependence_undetermined++;
4997
4998           if (dump_file && (dump_flags & TDF_DETAILS))
4999             {
5000               fprintf (dump_file, "Data ref a:\n");
5001               dump_data_reference (dump_file, dra);
5002               fprintf (dump_file, "Data ref b:\n");
5003               dump_data_reference (dump_file, drb);
5004               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5005             }
5006           finalize_ddr_dependent (ddr, chrec_dont_know);
5007         }
5008     }
5009
5010   if (dump_file && (dump_flags & TDF_DETAILS))
5011     {
5012       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5013         fprintf (dump_file, ") -> no dependence\n");
5014       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5015         fprintf (dump_file, ") -> dependence analysis failed\n");
5016       else
5017         fprintf (dump_file, ")\n");
5018     }
5019 }
5020
5021 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5022    the data references in DATAREFS, in the LOOP_NEST.  When
5023    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5024    relations.  Return true when successful, i.e. data references number
5025    is small enough to be handled.  */
5026
5027 bool
5028 compute_all_dependences (vec<data_reference_p> datarefs,
5029                          vec<ddr_p> *dependence_relations,
5030                          vec<loop_p> loop_nest,
5031                          bool compute_self_and_rr)
5032 {
5033   struct data_dependence_relation *ddr;
5034   struct data_reference *a, *b;
5035   unsigned int i, j;
5036
5037   if ((int) datarefs.length ()
5038       > param_loop_max_datarefs_for_datadeps)
5039     {
5040       struct data_dependence_relation *ddr;
5041
5042       /* Insert a single relation into dependence_relations:
5043          chrec_dont_know.  */
5044       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5045       dependence_relations->safe_push (ddr);
5046       return false;
5047     }
5048
5049   FOR_EACH_VEC_ELT (datarefs, i, a)
5050     for (j = i + 1; datarefs.iterate (j, &b); j++)
5051       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5052         {
5053           ddr = initialize_data_dependence_relation (a, b, loop_nest);
5054           dependence_relations->safe_push (ddr);
5055           if (loop_nest.exists ())
5056             compute_affine_dependence (ddr, loop_nest[0]);
5057         }
5058
5059   if (compute_self_and_rr)
5060     FOR_EACH_VEC_ELT (datarefs, i, a)
5061       {
5062         ddr = initialize_data_dependence_relation (a, a, loop_nest);
5063         dependence_relations->safe_push (ddr);
5064         if (loop_nest.exists ())
5065           compute_affine_dependence (ddr, loop_nest[0]);
5066       }
5067
5068   return true;
5069 }
5070
5071 /* Describes a location of a memory reference.  */
5072
5073 struct data_ref_loc
5074 {
5075   /* The memory reference.  */
5076   tree ref;
5077
5078   /* True if the memory reference is read.  */
5079   bool is_read;
5080
5081   /* True if the data reference is conditional within the containing
5082      statement, i.e. if it might not occur even when the statement
5083      is executed and runs to completion.  */
5084   bool is_conditional_in_stmt;
5085 };
5086
5087
5088 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5089    true if STMT clobbers memory, false otherwise.  */
5090
5091 static bool
5092 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5093 {
5094   bool clobbers_memory = false;
5095   data_ref_loc ref;
5096   tree op0, op1;
5097   enum gimple_code stmt_code = gimple_code (stmt);
5098
5099   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5100      As we cannot model data-references to not spelled out
5101      accesses give up if they may occur.  */
5102   if (stmt_code == GIMPLE_CALL
5103       && !(gimple_call_flags (stmt) & ECF_CONST))
5104     {
5105       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5106       if (gimple_call_internal_p (stmt))
5107         switch (gimple_call_internal_fn (stmt))
5108           {
5109           case IFN_GOMP_SIMD_LANE:
5110             {
5111               class loop *loop = gimple_bb (stmt)->loop_father;
5112               tree uid = gimple_call_arg (stmt, 0);
5113               gcc_assert (TREE_CODE (uid) == SSA_NAME);
5114               if (loop == NULL
5115                   || loop->simduid != SSA_NAME_VAR (uid))
5116                 clobbers_memory = true;
5117               break;
5118             }
5119           case IFN_MASK_LOAD:
5120           case IFN_MASK_STORE:
5121             break;
5122           default:
5123             clobbers_memory = true;
5124             break;
5125           }
5126       else
5127         clobbers_memory = true;
5128     }
5129   else if (stmt_code == GIMPLE_ASM
5130            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5131                || gimple_vuse (stmt)))
5132     clobbers_memory = true;
5133
5134   if (!gimple_vuse (stmt))
5135     return clobbers_memory;
5136
5137   if (stmt_code == GIMPLE_ASSIGN)
5138     {
5139       tree base;
5140       op0 = gimple_assign_lhs (stmt);
5141       op1 = gimple_assign_rhs1 (stmt);
5142
5143       if (DECL_P (op1)
5144           || (REFERENCE_CLASS_P (op1)
5145               && (base = get_base_address (op1))
5146               && TREE_CODE (base) != SSA_NAME
5147               && !is_gimple_min_invariant (base)))
5148         {
5149           ref.ref = op1;
5150           ref.is_read = true;
5151           ref.is_conditional_in_stmt = false;
5152           references->safe_push (ref);
5153         }
5154     }
5155   else if (stmt_code == GIMPLE_CALL)
5156     {
5157       unsigned i, n;
5158       tree ptr, type;
5159       unsigned int align;
5160
5161       ref.is_read = false;
5162       if (gimple_call_internal_p (stmt))
5163         switch (gimple_call_internal_fn (stmt))
5164           {
5165           case IFN_MASK_LOAD:
5166             if (gimple_call_lhs (stmt) == NULL_TREE)
5167               break;
5168             ref.is_read = true;
5169             /* FALLTHRU */
5170           case IFN_MASK_STORE:
5171             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5172             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5173             if (ref.is_read)
5174               type = TREE_TYPE (gimple_call_lhs (stmt));
5175             else
5176               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5177             if (TYPE_ALIGN (type) != align)
5178               type = build_aligned_type (type, align);
5179             ref.is_conditional_in_stmt = true;
5180             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5181                                    ptr);
5182             references->safe_push (ref);
5183             return false;
5184           default:
5185             break;
5186           }
5187
5188       op0 = gimple_call_lhs (stmt);
5189       n = gimple_call_num_args (stmt);
5190       for (i = 0; i < n; i++)
5191         {
5192           op1 = gimple_call_arg (stmt, i);
5193
5194           if (DECL_P (op1)
5195               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5196             {
5197               ref.ref = op1;
5198               ref.is_read = true;
5199               ref.is_conditional_in_stmt = false;
5200               references->safe_push (ref);
5201             }
5202         }
5203     }
5204   else
5205     return clobbers_memory;
5206
5207   if (op0
5208       && (DECL_P (op0)
5209           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5210     {
5211       ref.ref = op0;
5212       ref.is_read = false;
5213       ref.is_conditional_in_stmt = false;
5214       references->safe_push (ref);
5215     }
5216   return clobbers_memory;
5217 }
5218
5219
5220 /* Returns true if the loop-nest has any data reference.  */
5221
5222 bool
5223 loop_nest_has_data_refs (loop_p loop)
5224 {
5225   basic_block *bbs = get_loop_body (loop);
5226   auto_vec<data_ref_loc, 3> references;
5227
5228   for (unsigned i = 0; i < loop->num_nodes; i++)
5229     {
5230       basic_block bb = bbs[i];
5231       gimple_stmt_iterator bsi;
5232
5233       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5234         {
5235           gimple *stmt = gsi_stmt (bsi);
5236           get_references_in_stmt (stmt, &references);
5237           if (references.length ())
5238             {
5239               free (bbs);
5240               return true;
5241             }
5242         }
5243     }
5244   free (bbs);
5245   return false;
5246 }
5247
5248 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5249    reference, returns false, otherwise returns true.  NEST is the outermost
5250    loop of the loop nest in which the references should be analyzed.  */
5251
5252 opt_result
5253 find_data_references_in_stmt (class loop *nest, gimple *stmt,
5254                               vec<data_reference_p> *datarefs)
5255 {
5256   unsigned i;
5257   auto_vec<data_ref_loc, 2> references;
5258   data_ref_loc *ref;
5259   data_reference_p dr;
5260
5261   if (get_references_in_stmt (stmt, &references))
5262     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5263                                    stmt);
5264
5265   FOR_EACH_VEC_ELT (references, i, ref)
5266     {
5267       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5268                             loop_containing_stmt (stmt), ref->ref,
5269                             stmt, ref->is_read, ref->is_conditional_in_stmt);
5270       gcc_assert (dr != NULL);
5271       datarefs->safe_push (dr);
5272     }
5273
5274   return opt_result::success ();
5275 }
5276
5277 /* Stores the data references in STMT to DATAREFS.  If there is an
5278    unanalyzable reference, returns false, otherwise returns true.
5279    NEST is the outermost loop of the loop nest in which the references
5280    should be instantiated, LOOP is the loop in which the references
5281    should be analyzed.  */
5282
5283 bool
5284 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5285                                        vec<data_reference_p> *datarefs)
5286 {
5287   unsigned i;
5288   auto_vec<data_ref_loc, 2> references;
5289   data_ref_loc *ref;
5290   bool ret = true;
5291   data_reference_p dr;
5292
5293   if (get_references_in_stmt (stmt, &references))
5294     return false;
5295
5296   FOR_EACH_VEC_ELT (references, i, ref)
5297     {
5298       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5299                             ref->is_conditional_in_stmt);
5300       gcc_assert (dr != NULL);
5301       datarefs->safe_push (dr);
5302     }
5303
5304   return ret;
5305 }
5306
5307 /* Search the data references in LOOP, and record the information into
5308    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5309    difficult case, returns NULL_TREE otherwise.  */
5310
5311 tree
5312 find_data_references_in_bb (class loop *loop, basic_block bb,
5313                             vec<data_reference_p> *datarefs)
5314 {
5315   gimple_stmt_iterator bsi;
5316
5317   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5318     {
5319       gimple *stmt = gsi_stmt (bsi);
5320
5321       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5322         {
5323           struct data_reference *res;
5324           res = XCNEW (struct data_reference);
5325           datarefs->safe_push (res);
5326
5327           return chrec_dont_know;
5328         }
5329     }
5330
5331   return NULL_TREE;
5332 }
5333
5334 /* Search the data references in LOOP, and record the information into
5335    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5336    difficult case, returns NULL_TREE otherwise.
5337
5338    TODO: This function should be made smarter so that it can handle address
5339    arithmetic as if they were array accesses, etc.  */
5340
5341 tree
5342 find_data_references_in_loop (class loop *loop,
5343                               vec<data_reference_p> *datarefs)
5344 {
5345   basic_block bb, *bbs;
5346   unsigned int i;
5347
5348   bbs = get_loop_body_in_dom_order (loop);
5349
5350   for (i = 0; i < loop->num_nodes; i++)
5351     {
5352       bb = bbs[i];
5353
5354       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5355         {
5356           free (bbs);
5357           return chrec_dont_know;
5358         }
5359     }
5360   free (bbs);
5361
5362   return NULL_TREE;
5363 }
5364
5365 /* Return the alignment in bytes that DRB is guaranteed to have at all
5366    times.  */
5367
5368 unsigned int
5369 dr_alignment (innermost_loop_behavior *drb)
5370 {
5371   /* Get the alignment of BASE_ADDRESS + INIT.  */
5372   unsigned int alignment = drb->base_alignment;
5373   unsigned int misalignment = (drb->base_misalignment
5374                                + TREE_INT_CST_LOW (drb->init));
5375   if (misalignment != 0)
5376     alignment = MIN (alignment, misalignment & -misalignment);
5377
5378   /* Cap it to the alignment of OFFSET.  */
5379   if (!integer_zerop (drb->offset))
5380     alignment = MIN (alignment, drb->offset_alignment);
5381
5382   /* Cap it to the alignment of STEP.  */
5383   if (!integer_zerop (drb->step))
5384     alignment = MIN (alignment, drb->step_alignment);
5385
5386   return alignment;
5387 }
5388
5389 /* If BASE is a pointer-typed SSA name, try to find the object that it
5390    is based on.  Return this object X on success and store the alignment
5391    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
5392
5393 static tree
5394 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5395 {
5396   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5397     return NULL_TREE;
5398
5399   gimple *def = SSA_NAME_DEF_STMT (base);
5400   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5401
5402   /* Peel chrecs and record the minimum alignment preserved by
5403      all steps.  */
5404   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5405   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5406     {
5407       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5408       alignment = MIN (alignment, step_alignment);
5409       base = CHREC_LEFT (base);
5410     }
5411
5412   /* Punt if the expression is too complicated to handle.  */
5413   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5414     return NULL_TREE;
5415
5416   /* The only useful cases are those for which a dereference folds to something
5417      other than an INDIRECT_REF.  */
5418   tree ref_type = TREE_TYPE (TREE_TYPE (base));
5419   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5420   if (!ref)
5421     return NULL_TREE;
5422
5423   /* Analyze the base to which the steps we peeled were applied.  */
5424   poly_int64 bitsize, bitpos, bytepos;
5425   machine_mode mode;
5426   int unsignedp, reversep, volatilep;
5427   tree offset;
5428   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5429                               &unsignedp, &reversep, &volatilep);
5430   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5431     return NULL_TREE;
5432
5433   /* Restrict the alignment to that guaranteed by the offsets.  */
5434   unsigned int bytepos_alignment = known_alignment (bytepos);
5435   if (bytepos_alignment != 0)
5436     alignment = MIN (alignment, bytepos_alignment);
5437   if (offset)
5438     {
5439       unsigned int offset_alignment = highest_pow2_factor (offset);
5440       alignment = MIN (alignment, offset_alignment);
5441     }
5442
5443   *alignment_out = alignment;
5444   return base;
5445 }
5446
5447 /* Return the object whose alignment would need to be changed in order
5448    to increase the alignment of ADDR.  Store the maximum achievable
5449    alignment in *MAX_ALIGNMENT.  */
5450
5451 tree
5452 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5453 {
5454   tree base = get_base_for_alignment_1 (addr, max_alignment);
5455   if (base)
5456     return base;
5457
5458   if (TREE_CODE (addr) == ADDR_EXPR)
5459     addr = TREE_OPERAND (addr, 0);
5460   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5461   return addr;
5462 }
5463
5464 /* Recursive helper function.  */
5465
5466 static bool
5467 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
5468 {
5469   /* Inner loops of the nest should not contain siblings.  Example:
5470      when there are two consecutive loops,
5471
5472      | loop_0
5473      |   loop_1
5474      |     A[{0, +, 1}_1]
5475      |   endloop_1
5476      |   loop_2
5477      |     A[{0, +, 1}_2]
5478      |   endloop_2
5479      | endloop_0
5480
5481      the dependence relation cannot be captured by the distance
5482      abstraction.  */
5483   if (loop->next)
5484     return false;
5485
5486   loop_nest->safe_push (loop);
5487   if (loop->inner)
5488     return find_loop_nest_1 (loop->inner, loop_nest);
5489   return true;
5490 }
5491
5492 /* Return false when the LOOP is not well nested.  Otherwise return
5493    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5494    contain the loops from the outermost to the innermost, as they will
5495    appear in the classic distance vector.  */
5496
5497 bool
5498 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
5499 {
5500   loop_nest->safe_push (loop);
5501   if (loop->inner)
5502     return find_loop_nest_1 (loop->inner, loop_nest);
5503   return true;
5504 }
5505
5506 /* Returns true when the data dependences have been computed, false otherwise.
5507    Given a loop nest LOOP, the following vectors are returned:
5508    DATAREFS is initialized to all the array elements contained in this loop,
5509    DEPENDENCE_RELATIONS contains the relations between the data references.
5510    Compute read-read and self relations if
5511    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5512
5513 bool
5514 compute_data_dependences_for_loop (class loop *loop,
5515                                    bool compute_self_and_read_read_dependences,
5516                                    vec<loop_p> *loop_nest,
5517                                    vec<data_reference_p> *datarefs,
5518                                    vec<ddr_p> *dependence_relations)
5519 {
5520   bool res = true;
5521
5522   memset (&dependence_stats, 0, sizeof (dependence_stats));
5523
5524   /* If the loop nest is not well formed, or one of the data references
5525      is not computable, give up without spending time to compute other
5526      dependences.  */
5527   if (!loop
5528       || !find_loop_nest (loop, loop_nest)
5529       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5530       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5531                                    compute_self_and_read_read_dependences))
5532     res = false;
5533
5534   if (dump_file && (dump_flags & TDF_STATS))
5535     {
5536       fprintf (dump_file, "Dependence tester statistics:\n");
5537
5538       fprintf (dump_file, "Number of dependence tests: %d\n",
5539                dependence_stats.num_dependence_tests);
5540       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5541                dependence_stats.num_dependence_dependent);
5542       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5543                dependence_stats.num_dependence_independent);
5544       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5545                dependence_stats.num_dependence_undetermined);
5546
5547       fprintf (dump_file, "Number of subscript tests: %d\n",
5548                dependence_stats.num_subscript_tests);
5549       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5550                dependence_stats.num_subscript_undetermined);
5551       fprintf (dump_file, "Number of same subscript function: %d\n",
5552                dependence_stats.num_same_subscript_function);
5553
5554       fprintf (dump_file, "Number of ziv tests: %d\n",
5555                dependence_stats.num_ziv);
5556       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5557                dependence_stats.num_ziv_dependent);
5558       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5559                dependence_stats.num_ziv_independent);
5560       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5561                dependence_stats.num_ziv_unimplemented);
5562
5563       fprintf (dump_file, "Number of siv tests: %d\n",
5564                dependence_stats.num_siv);
5565       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5566                dependence_stats.num_siv_dependent);
5567       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5568                dependence_stats.num_siv_independent);
5569       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5570                dependence_stats.num_siv_unimplemented);
5571
5572       fprintf (dump_file, "Number of miv tests: %d\n",
5573                dependence_stats.num_miv);
5574       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5575                dependence_stats.num_miv_dependent);
5576       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5577                dependence_stats.num_miv_independent);
5578       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5579                dependence_stats.num_miv_unimplemented);
5580     }
5581
5582   return res;
5583 }
5584
5585 /* Free the memory used by a data dependence relation DDR.  */
5586
5587 void
5588 free_dependence_relation (struct data_dependence_relation *ddr)
5589 {
5590   if (ddr == NULL)
5591     return;
5592
5593   if (DDR_SUBSCRIPTS (ddr).exists ())
5594     free_subscripts (DDR_SUBSCRIPTS (ddr));
5595   DDR_DIST_VECTS (ddr).release ();
5596   DDR_DIR_VECTS (ddr).release ();
5597
5598   free (ddr);
5599 }
5600
5601 /* Free the memory used by the data dependence relations from
5602    DEPENDENCE_RELATIONS.  */
5603
5604 void
5605 free_dependence_relations (vec<ddr_p> dependence_relations)
5606 {
5607   unsigned int i;
5608   struct data_dependence_relation *ddr;
5609
5610   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5611     if (ddr)
5612       free_dependence_relation (ddr);
5613
5614   dependence_relations.release ();
5615 }
5616
5617 /* Free the memory used by the data references from DATAREFS.  */
5618
5619 void
5620 free_data_refs (vec<data_reference_p> datarefs)
5621 {
5622   unsigned int i;
5623   struct data_reference *dr;
5624
5625   FOR_EACH_VEC_ELT (datarefs, i, dr)
5626     free_data_ref (dr);
5627   datarefs.release ();
5628 }
5629
5630 /* Common routine implementing both dr_direction_indicator and
5631    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5632    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5633    Return the step as the indicator otherwise.  */
5634
5635 static tree
5636 dr_step_indicator (struct data_reference *dr, int useful_min)
5637 {
5638   tree step = DR_STEP (dr);
5639   if (!step)
5640     return NULL_TREE;
5641   STRIP_NOPS (step);
5642   /* Look for cases where the step is scaled by a positive constant
5643      integer, which will often be the access size.  If the multiplication
5644      doesn't change the sign (due to overflow effects) then we can
5645      test the unscaled value instead.  */
5646   if (TREE_CODE (step) == MULT_EXPR
5647       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5648       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5649     {
5650       tree factor = TREE_OPERAND (step, 1);
5651       step = TREE_OPERAND (step, 0);
5652
5653       /* Strip widening and truncating conversions as well as nops.  */
5654       if (CONVERT_EXPR_P (step)
5655           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5656         step = TREE_OPERAND (step, 0);
5657       tree type = TREE_TYPE (step);
5658
5659       /* Get the range of step values that would not cause overflow.  */
5660       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5661                          / wi::to_widest (factor));
5662       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5663                          / wi::to_widest (factor));
5664
5665       /* Get the range of values that the unconverted step actually has.  */
5666       wide_int step_min, step_max;
5667       if (TREE_CODE (step) != SSA_NAME
5668           || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5669         {
5670           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5671           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5672         }
5673
5674       /* Check whether the unconverted step has an acceptable range.  */
5675       signop sgn = TYPE_SIGN (type);
5676       if (wi::les_p (minv, widest_int::from (step_min, sgn))
5677           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5678         {
5679           if (wi::ge_p (step_min, useful_min, sgn))
5680             return ssize_int (useful_min);
5681           else if (wi::lt_p (step_max, 0, sgn))
5682             return ssize_int (-1);
5683           else
5684             return fold_convert (ssizetype, step);
5685         }
5686     }
5687   return DR_STEP (dr);
5688 }
5689
5690 /* Return a value that is negative iff DR has a negative step.  */
5691
5692 tree
5693 dr_direction_indicator (struct data_reference *dr)
5694 {
5695   return dr_step_indicator (dr, 0);
5696 }
5697
5698 /* Return a value that is zero iff DR has a zero step.  */
5699
5700 tree
5701 dr_zero_step_indicator (struct data_reference *dr)
5702 {
5703   return dr_step_indicator (dr, 1);
5704 }
5705
5706 /* Return true if DR is known to have a nonnegative (but possibly zero)
5707    step.  */
5708
5709 bool
5710 dr_known_forward_stride_p (struct data_reference *dr)
5711 {
5712   tree indicator = dr_direction_indicator (dr);
5713   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5714                                    fold_convert (ssizetype, indicator),
5715                                    ssize_int (0));
5716   return neg_step_val && integer_zerop (neg_step_val);
5717 }