gcc/loop-invariant.c

   1 /* RTL-level loop invariant motion.
   2    Copyright (C) 2004-2019 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This implements the loop invariant motion pass.  It is very simple
  21    (no calls, no loads/stores, etc.).  This should be sufficient to cleanup
  22    things like address arithmetics -- other more complicated invariants should
  23    be eliminated on GIMPLE either in tree-ssa-loop-im.c or in tree-ssa-pre.c.
  24
  25    We proceed loop by loop -- it is simpler than trying to handle things
  26    globally and should not lose much.  First we inspect all sets inside loop
  27    and create a dependency graph on insns (saying "to move this insn, you must
  28    also move the following insns").
  29
  30    We then need to determine what to move.  We estimate the number of registers
  31    used and move as many invariants as possible while we still have enough free
  32    registers.  We prefer the expensive invariants.
  33
  34    Then we move the selected invariants out of the loop, creating a new
  35    temporaries for them if necessary.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "coretypes.h"
  40 #include "backend.h"
  41 #include "target.h"
  42 #include "rtl.h"
  43 #include "tree.h"
  44 #include "cfghooks.h"
  45 #include "df.h"
  46 #include "memmodel.h"
  47 #include "tm_p.h"
  48 #include "insn-config.h"
  49 #include "regs.h"
  50 #include "ira.h"
  51 #include "recog.h"
  52 #include "cfgrtl.h"
  53 #include "cfgloop.h"
  54 #include "expr.h"
  55 #include "params.h"
  56 #include "rtl-iter.h"
  57 #include "dumpfile.h"
  58
  59 /* The data stored for the loop.  */
  60
  61 class loop_data
  62 {
  63 public:
  64   class loop *outermost_exit;   /* The outermost exit of the loop.  */
  65   bool has_call;                /* True if the loop contains a call.  */
  66   /* Maximal register pressure inside loop for given register class
  67      (defined only for the pressure classes).  */
  68   int max_reg_pressure[N_REG_CLASSES];
  69   /* Loop regs referenced and live pseudo-registers.  */
  70   bitmap_head regs_ref;
  71   bitmap_head regs_live;
  72 };
  73
  74 #define LOOP_DATA(LOOP) ((class loop_data *) (LOOP)->aux)
  75
  76 /* The description of an use.  */
  77
  78 struct use
  79 {
  80   rtx *pos;                     /* Position of the use.  */
  81   rtx_insn *insn;               /* The insn in that the use occurs.  */
  82   unsigned addr_use_p;          /* Whether the use occurs in an address.  */
  83   struct use *next;             /* Next use in the list.  */
  84 };
  85
  86 /* The description of a def.  */
  87
  88 struct def
  89 {
  90   struct use *uses;             /* The list of uses that are uniquely reached
  91                                    by it.  */
  92   unsigned n_uses;              /* Number of such uses.  */
  93   unsigned n_addr_uses;         /* Number of uses in addresses.  */
  94   unsigned invno;               /* The corresponding invariant.  */
  95   bool can_prop_to_addr_uses;   /* True if the corresponding inv can be
  96                                    propagated into its address uses.  */
  97 };
  98
  99 /* The data stored for each invariant.  */
 100
 101 struct invariant
 102 {
 103   /* The number of the invariant.  */
 104   unsigned invno;
 105
 106   /* The number of the invariant with the same value.  */
 107   unsigned eqto;
 108
 109   /* The number of invariants which eqto this.  */
 110   unsigned eqno;
 111
 112   /* If we moved the invariant out of the loop, the original regno
 113      that contained its value.  */
 114   int orig_regno;
 115
 116   /* If we moved the invariant out of the loop, the register that contains its
 117      value.  */
 118   rtx reg;
 119
 120   /* The definition of the invariant.  */
 121   struct def *def;
 122
 123   /* The insn in that it is defined.  */
 124   rtx_insn *insn;
 125
 126   /* Whether it is always executed.  */
 127   bool always_executed;
 128
 129   /* Whether to move the invariant.  */
 130   bool move;
 131
 132   /* Whether the invariant is cheap when used as an address.  */
 133   bool cheap_address;
 134
 135   /* Cost of the invariant.  */
 136   unsigned cost;
 137
 138   /* Used for detecting already visited invariants during determining
 139      costs of movements.  */
 140   unsigned stamp;
 141
 142   /* The invariants it depends on.  */
 143   bitmap depends_on;
 144 };
 145
 146 /* Currently processed loop.  */
 147 static class loop *curr_loop;
 148
 149 /* Table of invariants indexed by the df_ref uid field.  */
 150
 151 static unsigned int invariant_table_size = 0;
 152 static struct invariant ** invariant_table;
 153
 154 /* Entry for hash table of invariant expressions.  */
 155
 156 struct invariant_expr_entry
 157 {
 158   /* The invariant.  */
 159   struct invariant *inv;
 160
 161   /* Its value.  */
 162   rtx expr;
 163
 164   /* Its mode.  */
 165   machine_mode mode;
 166
 167   /* Its hash.  */
 168   hashval_t hash;
 169 };
 170
 171 /* The actual stamp for marking already visited invariants during determining
 172    costs of movements.  */
 173
 174 static unsigned actual_stamp;
 175
 176 typedef struct invariant *invariant_p;
 177
 178
 179 /* The invariants.  */
 180
 181 static vec<invariant_p> invariants;
 182
 183 /* Check the size of the invariant table and realloc if necessary.  */
 184
 185 static void
 186 check_invariant_table_size (void)
 187 {
 188   if (invariant_table_size < DF_DEFS_TABLE_SIZE ())
 189     {
 190       unsigned int new_size = DF_DEFS_TABLE_SIZE () + (DF_DEFS_TABLE_SIZE () / 4);
 191       invariant_table = XRESIZEVEC (struct invariant *, invariant_table, new_size);
 192       memset (&invariant_table[invariant_table_size], 0,
 193               (new_size - invariant_table_size) * sizeof (struct invariant *));
 194       invariant_table_size = new_size;
 195     }
 196 }
 197
 198 /* Test for possibility of invariantness of X.  */
 199
 200 static bool
 201 check_maybe_invariant (rtx x)
 202 {
 203   enum rtx_code code = GET_CODE (x);
 204   int i, j;
 205   const char *fmt;
 206
 207   switch (code)
 208     {
 209     CASE_CONST_ANY:
 210     case SYMBOL_REF:
 211     case CONST:
 212     case LABEL_REF:
 213       return true;
 214
 215     case PC:
 216     case CC0:
 217     case UNSPEC_VOLATILE:
 218     case CALL:
 219       return false;
 220
 221     case REG:
 222       return true;
 223
 224     case MEM:
 225       /* Load/store motion is done elsewhere.  ??? Perhaps also add it here?
 226          It should not be hard, and might be faster than "elsewhere".  */
 227
 228       /* Just handle the most trivial case where we load from an unchanging
 229          location (most importantly, pic tables).  */
 230       if (MEM_READONLY_P (x) && !MEM_VOLATILE_P (x))
 231         break;
 232
 233       return false;
 234
 235     case ASM_OPERANDS:
 236       /* Don't mess with insns declared volatile.  */
 237       if (MEM_VOLATILE_P (x))
 238         return false;
 239       break;
 240
 241     default:
 242       break;
 243     }
 244
 245   fmt = GET_RTX_FORMAT (code);
 246   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
 247     {
 248       if (fmt[i] == 'e')
 249         {
 250           if (!check_maybe_invariant (XEXP (x, i)))
 251             return false;
 252         }
 253       else if (fmt[i] == 'E')
 254         {
 255           for (j = 0; j < XVECLEN (x, i); j++)
 256             if (!check_maybe_invariant (XVECEXP (x, i, j)))
 257               return false;
 258         }
 259     }
 260
 261   return true;
 262 }
 263
 264 /* Returns the invariant definition for USE, or NULL if USE is not
 265    invariant.  */
 266
 267 static struct invariant *
 268 invariant_for_use (df_ref use)
 269 {
 270   struct df_link *defs;
 271   df_ref def;
 272   basic_block bb = DF_REF_BB (use), def_bb;
 273
 274   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
 275     return NULL;
 276
 277   defs = DF_REF_CHAIN (use);
 278   if (!defs || defs->next)
 279     return NULL;
 280   def = defs->ref;
 281   check_invariant_table_size ();
 282   if (!invariant_table[DF_REF_ID (def)])
 283     return NULL;
 284
 285   def_bb = DF_REF_BB (def);
 286   if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb))
 287     return NULL;
 288   return invariant_table[DF_REF_ID (def)];
 289 }
 290
 291 /* Computes hash value for invariant expression X in INSN.  */
 292
 293 static hashval_t
 294 hash_invariant_expr_1 (rtx_insn *insn, rtx x)
 295 {
 296   enum rtx_code code = GET_CODE (x);
 297   int i, j;
 298   const char *fmt;
 299   hashval_t val = code;
 300   int do_not_record_p;
 301   df_ref use;
 302   struct invariant *inv;
 303
 304   switch (code)
 305     {
 306     CASE_CONST_ANY:
 307     case SYMBOL_REF:
 308     case CONST:
 309     case LABEL_REF:
 310       return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
 311
 312     case REG:
 313       use = df_find_use (insn, x);
 314       if (!use)
 315         return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
 316       inv = invariant_for_use (use);
 317       if (!inv)
 318         return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
 319
 320       gcc_assert (inv->eqto != ~0u);
 321       return inv->eqto;
 322
 323     default:
 324       break;
 325     }
 326
 327   fmt = GET_RTX_FORMAT (code);
 328   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
 329     {
 330       if (fmt[i] == 'e')
 331         val ^= hash_invariant_expr_1 (insn, XEXP (x, i));
 332       else if (fmt[i] == 'E')
 333         {
 334           for (j = 0; j < XVECLEN (x, i); j++)
 335             val ^= hash_invariant_expr_1 (insn, XVECEXP (x, i, j));
 336         }
 337       else if (fmt[i] == 'i' || fmt[i] == 'n')
 338         val ^= XINT (x, i);
 339       else if (fmt[i] == 'p')
 340         val ^= constant_lower_bound (SUBREG_BYTE (x));
 341     }
 342
 343   return val;
 344 }
 345
 346 /* Returns true if the invariant expressions E1 and E2 used in insns INSN1
 347    and INSN2 have always the same value.  */
 348
 349 static bool
 350 invariant_expr_equal_p (rtx_insn *insn1, rtx e1, rtx_insn *insn2, rtx e2)
 351 {
 352   enum rtx_code code = GET_CODE (e1);
 353   int i, j;
 354   const char *fmt;
 355   df_ref use1, use2;
 356   struct invariant *inv1 = NULL, *inv2 = NULL;
 357   rtx sub1, sub2;
 358
 359   /* If mode of only one of the operands is VOIDmode, it is not equivalent to
 360      the other one.  If both are VOIDmode, we rely on the caller of this
 361      function to verify that their modes are the same.  */
 362   if (code != GET_CODE (e2) || GET_MODE (e1) != GET_MODE (e2))
 363     return false;
 364
 365   switch (code)
 366     {
 367     CASE_CONST_ANY:
 368     case SYMBOL_REF:
 369     case CONST:
 370     case LABEL_REF:
 371       return rtx_equal_p (e1, e2);
 372
 373     case REG:
 374       use1 = df_find_use (insn1, e1);
 375       use2 = df_find_use (insn2, e2);
 376       if (use1)
 377         inv1 = invariant_for_use (use1);
 378       if (use2)
 379         inv2 = invariant_for_use (use2);
 380
 381       if (!inv1 && !inv2)
 382         return rtx_equal_p (e1, e2);
 383
 384       if (!inv1 || !inv2)
 385         return false;
 386
 387       gcc_assert (inv1->eqto != ~0u);
 388       gcc_assert (inv2->eqto != ~0u);
 389       return inv1->eqto == inv2->eqto;
 390
 391     default:
 392       break;
 393     }
 394
 395   fmt = GET_RTX_FORMAT (code);
 396   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
 397     {
 398       if (fmt[i] == 'e')
 399         {
 400           sub1 = XEXP (e1, i);
 401           sub2 = XEXP (e2, i);
 402
 403           if (!invariant_expr_equal_p (insn1, sub1, insn2, sub2))
 404             return false;
 405         }
 406
 407       else if (fmt[i] == 'E')
 408         {
 409           if (XVECLEN (e1, i) != XVECLEN (e2, i))
 410             return false;
 411
 412           for (j = 0; j < XVECLEN (e1, i); j++)
 413             {
 414               sub1 = XVECEXP (e1, i, j);
 415               sub2 = XVECEXP (e2, i, j);
 416
 417               if (!invariant_expr_equal_p (insn1, sub1, insn2, sub2))
 418                 return false;
 419             }
 420         }
 421       else if (fmt[i] == 'i' || fmt[i] == 'n')
 422         {
 423           if (XINT (e1, i) != XINT (e2, i))
 424             return false;
 425         }
 426       else if (fmt[i] == 'p')
 427         {
 428           if (maybe_ne (SUBREG_BYTE (e1), SUBREG_BYTE (e2)))
 429             return false;
 430         }
 431       /* Unhandled type of subexpression, we fail conservatively.  */
 432       else
 433         return false;
 434     }
 435
 436   return true;
 437 }
 438
 439 struct invariant_expr_hasher : free_ptr_hash <invariant_expr_entry>
 440 {
 441   static inline hashval_t hash (const invariant_expr_entry *);
 442   static inline bool equal (const invariant_expr_entry *,
 443                             const invariant_expr_entry *);
 444 };
 445
 446 /* Returns hash value for invariant expression entry ENTRY.  */
 447
 448 inline hashval_t
 449 invariant_expr_hasher::hash (const invariant_expr_entry *entry)
 450 {
 451   return entry->hash;
 452 }
 453
 454 /* Compares invariant expression entries ENTRY1 and ENTRY2.  */
 455
 456 inline bool
 457 invariant_expr_hasher::equal (const invariant_expr_entry *entry1,
 458                               const invariant_expr_entry *entry2)
 459 {
 460   if (entry1->mode != entry2->mode)
 461     return 0;
 462
 463   return invariant_expr_equal_p (entry1->inv->insn, entry1->expr,
 464                                  entry2->inv->insn, entry2->expr);
 465 }
 466
 467 typedef hash_table<invariant_expr_hasher> invariant_htab_type;
 468
 469 /* Checks whether invariant with value EXPR in machine mode MODE is
 470    recorded in EQ.  If this is the case, return the invariant.  Otherwise
 471    insert INV to the table for this expression and return INV.  */
 472
 473 static struct invariant *
 474 find_or_insert_inv (invariant_htab_type *eq, rtx expr, machine_mode mode,
 475                     struct invariant *inv)
 476 {
 477   hashval_t hash = hash_invariant_expr_1 (inv->insn, expr);
 478   struct invariant_expr_entry *entry;
 479   struct invariant_expr_entry pentry;
 480   invariant_expr_entry **slot;
 481
 482   pentry.expr = expr;
 483   pentry.inv = inv;
 484   pentry.mode = mode;
 485   slot = eq->find_slot_with_hash (&pentry, hash, INSERT);
 486   entry = *slot;
 487
 488   if (entry)
 489     return entry->inv;
 490
 491   entry = XNEW (struct invariant_expr_entry);
 492   entry->inv = inv;
 493   entry->expr = expr;
 494   entry->mode = mode;
 495   entry->hash = hash;
 496   *slot = entry;
 497
 498   return inv;
 499 }
 500
 501 /* Finds invariants identical to INV and records the equivalence.  EQ is the
 502    hash table of the invariants.  */
 503
 504 static void
 505 find_identical_invariants (invariant_htab_type *eq, struct invariant *inv)
 506 {
 507   unsigned depno;
 508   bitmap_iterator bi;
 509   struct invariant *dep;
 510   rtx expr, set;
 511   machine_mode mode;
 512   struct invariant *tmp;
 513
 514   if (inv->eqto != ~0u)
 515     return;
 516
 517   EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, bi)
 518     {
 519       dep = invariants[depno];
 520       find_identical_invariants (eq, dep);
 521     }
 522
 523   set = single_set (inv->insn);
 524   expr = SET_SRC (set);
 525   mode = GET_MODE (expr);
 526   if (mode == VOIDmode)
 527     mode = GET_MODE (SET_DEST (set));
 528
 529   tmp = find_or_insert_inv (eq, expr, mode, inv);
 530   inv->eqto = tmp->invno;
 531
 532   if (tmp->invno != inv->invno && inv->always_executed)
 533     tmp->eqno++;
 534
 535   if (dump_file && inv->eqto != inv->invno)
 536     fprintf (dump_file,
 537              "Invariant %d is equivalent to invariant %d.\n",
 538              inv->invno, inv->eqto);
 539 }
 540
 541 /* Find invariants with the same value and record the equivalences.  */
 542
 543 static void
 544 merge_identical_invariants (void)
 545 {
 546   unsigned i;
 547   struct invariant *inv;
 548   invariant_htab_type eq (invariants.length ());
 549
 550   FOR_EACH_VEC_ELT (invariants, i, inv)
 551     find_identical_invariants (&eq, inv);
 552 }
 553
 554 /* Determines the basic blocks inside LOOP that are always executed and
 555    stores their bitmap to ALWAYS_REACHED.  MAY_EXIT is a bitmap of
 556    basic blocks that may either exit the loop, or contain the call that
 557    does not have to return.  BODY is body of the loop obtained by
 558    get_loop_body_in_dom_order.  */
 559
 560 static void
 561 compute_always_reached (class loop *loop, basic_block *body,
 562                         bitmap may_exit, bitmap always_reached)
 563 {
 564   unsigned i;
 565
 566   for (i = 0; i < loop->num_nodes; i++)
 567     {
 568       if (dominated_by_p (CDI_DOMINATORS, loop->latch, body[i]))
 569         bitmap_set_bit (always_reached, i);
 570
 571       if (bitmap_bit_p (may_exit, i))
 572         return;
 573     }
 574 }
 575
 576 /* Finds exits out of the LOOP with body BODY.  Marks blocks in that we may
 577    exit the loop by cfg edge to HAS_EXIT and MAY_EXIT.  In MAY_EXIT
 578    additionally mark blocks that may exit due to a call.  */
 579
 580 static void
 581 find_exits (class loop *loop, basic_block *body,
 582             bitmap may_exit, bitmap has_exit)
 583 {
 584   unsigned i;
 585   edge_iterator ei;
 586   edge e;
 587   class loop *outermost_exit = loop, *aexit;
 588   bool has_call = false;
 589   rtx_insn *insn;
 590
 591   for (i = 0; i < loop->num_nodes; i++)
 592     {
 593       if (body[i]->loop_father == loop)
 594         {
 595           FOR_BB_INSNS (body[i], insn)
 596             {
 597               if (CALL_P (insn)
 598                   && (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
 599                       || !RTL_CONST_OR_PURE_CALL_P (insn)))
 600                 {
 601                   has_call = true;
 602                   bitmap_set_bit (may_exit, i);
 603                   break;
 604                 }
 605             }
 606
 607           FOR_EACH_EDGE (e, ei, body[i]->succs)
 608             {
 609               if (! flow_bb_inside_loop_p (loop, e->dest))
 610                 {
 611                   bitmap_set_bit (may_exit, i);
 612                   bitmap_set_bit (has_exit, i);
 613                   outermost_exit = find_common_loop (outermost_exit,
 614                                                      e->dest->loop_father);
 615                 }
 616               /* If we enter a subloop that might never terminate treat
 617                  it like a possible exit.  */
 618               if (flow_loop_nested_p (loop, e->dest->loop_father))
 619                 bitmap_set_bit (may_exit, i);
 620             }
 621           continue;
 622         }
 623
 624       /* Use the data stored for the subloop to decide whether we may exit
 625          through it.  It is sufficient to do this for header of the loop,
 626          as other basic blocks inside it must be dominated by it.  */
 627       if (body[i]->loop_father->header != body[i])
 628         continue;
 629
 630       if (LOOP_DATA (body[i]->loop_father)->has_call)
 631         {
 632           has_call = true;
 633           bitmap_set_bit (may_exit, i);
 634         }
 635       aexit = LOOP_DATA (body[i]->loop_father)->outermost_exit;
 636       if (aexit != loop)
 637         {
 638           bitmap_set_bit (may_exit, i);
 639           bitmap_set_bit (has_exit, i);
 640
 641           if (flow_loop_nested_p (aexit, outermost_exit))
 642             outermost_exit = aexit;
 643         }
 644     }
 645
 646   if (loop->aux == NULL)
 647     {
 648       loop->aux = xcalloc (1, sizeof (class loop_data));
 649       bitmap_initialize (&LOOP_DATA (loop)->regs_ref, &reg_obstack);
 650       bitmap_initialize (&LOOP_DATA (loop)->regs_live, &reg_obstack);
 651     }
 652   LOOP_DATA (loop)->outermost_exit = outermost_exit;
 653   LOOP_DATA (loop)->has_call = has_call;
 654 }
 655
 656 /* Check whether we may assign a value to X from a register.  */
 657
 658 static bool
 659 may_assign_reg_p (rtx x)
 660 {
 661   return (GET_MODE (x) != VOIDmode
 662           && GET_MODE (x) != BLKmode
 663           && can_copy_p (GET_MODE (x))
 664           /* Do not mess with the frame pointer adjustments that can
 665              be generated e.g. by expand_builtin_setjmp_receiver.  */
 666           && x != frame_pointer_rtx
 667           && (!REG_P (x)
 668               || !HARD_REGISTER_P (x)
 669               || REGNO_REG_CLASS (REGNO (x)) != NO_REGS));
 670 }
 671
 672 /* Finds definitions that may correspond to invariants in LOOP with body
 673    BODY.  */
 674
 675 static void
 676 find_defs (class loop *loop)
 677 {
 678   if (dump_file)
 679     {
 680       fprintf (dump_file,
 681                "*****starting processing of loop %d ******\n",
 682                loop->num);
 683     }
 684
 685   df_chain_add_problem (DF_UD_CHAIN);
 686   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
 687   df_analyze_loop (loop);
 688   check_invariant_table_size ();
 689
 690   if (dump_file)
 691     {
 692       df_dump_region (dump_file);
 693       fprintf (dump_file,
 694                "*****ending processing of loop %d ******\n",
 695                loop->num);
 696     }
 697 }
 698
 699 /* Creates a new invariant for definition DEF in INSN, depending on invariants
 700    in DEPENDS_ON.  ALWAYS_EXECUTED is true if the insn is always executed,
 701    unless the program ends due to a function call.  The newly created invariant
 702    is returned.  */
 703
 704 static struct invariant *
 705 create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on,
 706                       bool always_executed)
 707 {
 708   struct invariant *inv = XNEW (struct invariant);
 709   rtx set = single_set (insn);
 710   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
 711
 712   inv->def = def;
 713   inv->always_executed = always_executed;
 714   inv->depends_on = depends_on;
 715
 716   /* If the set is simple, usually by moving it we move the whole store out of
 717      the loop.  Otherwise we save only cost of the computation.  */
 718   if (def)
 719     {
 720       inv->cost = set_rtx_cost (set, speed);
 721       /* ??? Try to determine cheapness of address computation.  Unfortunately
 722          the address cost is only a relative measure, we can't really compare
 723          it with any absolute number, but only with other address costs.
 724          But here we don't have any other addresses, so compare with a magic
 725          number anyway.  It has to be large enough to not regress PR33928
 726          (by avoiding to move reg+8,reg+16,reg+24 invariants), but small
 727          enough to not regress 410.bwaves either (by still moving reg+reg
 728          invariants).
 729          See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html .  */
 730       if (SCALAR_INT_MODE_P (GET_MODE (SET_DEST (set))))
 731         inv->cheap_address = address_cost (SET_SRC (set), word_mode,
 732                                            ADDR_SPACE_GENERIC, speed) < 3;
 733       else
 734         inv->cheap_address = false;
 735     }
 736   else
 737     {
 738       inv->cost = set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)),
 739                                 speed);
 740       inv->cheap_address = false;
 741     }
 742
 743   inv->move = false;
 744   inv->reg = NULL_RTX;
 745   inv->orig_regno = -1;
 746   inv->stamp = 0;
 747   inv->insn = insn;
 748
 749   inv->invno = invariants.length ();
 750   inv->eqto = ~0u;
 751
 752   /* Itself.  */
 753   inv->eqno = 1;
 754
 755   if (def)
 756     def->invno = inv->invno;
 757   invariants.safe_push (inv);
 758
 759   if (dump_file)
 760     {
 761       fprintf (dump_file,
 762                "Set in insn %d is invariant (%d), cost %d, depends on ",
 763                INSN_UID (insn), inv->invno, inv->cost);
 764       dump_bitmap (dump_file, inv->depends_on);
 765     }
 766
 767   return inv;
 768 }
 769
 770 /* Return a canonical version of X for the address, from the point of view,
 771    that all multiplications are represented as MULT instead of the multiply
 772    by a power of 2 being represented as ASHIFT.
 773
 774    Callers should prepare a copy of X because this function may modify it
 775    in place.  */
 776
 777 static void
 778 canonicalize_address_mult (rtx x)
 779 {
 780   subrtx_var_iterator::array_type array;
 781   FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
 782     {
 783       rtx sub = *iter;
 784       scalar_int_mode sub_mode;
 785       if (is_a <scalar_int_mode> (GET_MODE (sub), &sub_mode)
 786           && GET_CODE (sub) == ASHIFT
 787           && CONST_INT_P (XEXP (sub, 1))
 788           && INTVAL (XEXP (sub, 1)) < GET_MODE_BITSIZE (sub_mode)
 789           && INTVAL (XEXP (sub, 1)) >= 0)
 790         {
 791           HOST_WIDE_INT shift = INTVAL (XEXP (sub, 1));
 792           PUT_CODE (sub, MULT);
 793           XEXP (sub, 1) = gen_int_mode (HOST_WIDE_INT_1 << shift, sub_mode);
 794           iter.skip_subrtxes ();
 795         }
 796     }
 797 }
 798
 799 /* Maximum number of sub expressions in address.  We set it to
 800    a small integer since it's unlikely to have a complicated
 801    address expression.  */
 802
 803 #define MAX_CANON_ADDR_PARTS (5)
 804
 805 /* Collect sub expressions in address X with PLUS as the seperator.
 806    Sub expressions are stored in vector ADDR_PARTS.  */
 807
 808 static void
 809 collect_address_parts (rtx x, vec<rtx> *addr_parts)
 810 {
 811   subrtx_var_iterator::array_type array;
 812   FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
 813     {
 814       rtx sub = *iter;
 815
 816       if (GET_CODE (sub) != PLUS)
 817         {
 818           addr_parts->safe_push (sub);
 819           iter.skip_subrtxes ();
 820         }
 821     }
 822 }
 823
 824 /* Compare function for sorting sub expressions X and Y based on
 825    precedence defined for communitive operations.  */
 826
 827 static int
 828 compare_address_parts (const void *x, const void *y)
 829 {
 830   const rtx *rx = (const rtx *)x;
 831   const rtx *ry = (const rtx *)y;
 832   int px = commutative_operand_precedence (*rx);
 833   int py = commutative_operand_precedence (*ry);
 834
 835   return (py - px);
 836 }
 837
 838 /* Return a canonical version address for X by following steps:
 839      1) Rewrite ASHIFT into MULT recursively.
 840      2) Divide address into sub expressions with PLUS as the
 841         separator.
 842      3) Sort sub expressions according to precedence defined
 843         for communative operations.
 844      4) Simplify CONST_INT_P sub expressions.
 845      5) Create new canonicalized address and return.
 846    Callers should prepare a copy of X because this function may
 847    modify it in place.  */
 848
 849 static rtx
 850 canonicalize_address (rtx x)
 851 {
 852   rtx res;
 853   unsigned int i, j;
 854   machine_mode mode = GET_MODE (x);
 855   auto_vec<rtx, MAX_CANON_ADDR_PARTS> addr_parts;
 856
 857   /* Rewrite ASHIFT into MULT.  */
 858   canonicalize_address_mult (x);
 859   /* Divide address into sub expressions.  */
 860   collect_address_parts (x, &addr_parts);
 861   /* Unlikely to have very complicated address.  */
 862   if (addr_parts.length () < 2
 863       || addr_parts.length () > MAX_CANON_ADDR_PARTS)
 864     return x;
 865
 866   /* Sort sub expressions according to canonicalization precedence.  */
 867   addr_parts.qsort (compare_address_parts);
 868
 869   /* Simplify all constant int summary if possible.  */
 870   for (i = 0; i < addr_parts.length (); i++)
 871     if (CONST_INT_P (addr_parts[i]))
 872       break;
 873
 874   for (j = i + 1; j < addr_parts.length (); j++)
 875     {
 876       gcc_assert (CONST_INT_P (addr_parts[j]));
 877       addr_parts[i] = simplify_gen_binary (PLUS, mode,
 878                                            addr_parts[i],
 879                                            addr_parts[j]);
 880     }
 881
 882   /* Chain PLUS operators to the left for !CONST_INT_P sub expressions.  */
 883   res = addr_parts[0];
 884   for (j = 1; j < i; j++)
 885     res = simplify_gen_binary (PLUS, mode, res, addr_parts[j]);
 886
 887   /* Pickup the last CONST_INT_P sub expression.  */
 888   if (i < addr_parts.length ())
 889     res = simplify_gen_binary (PLUS, mode, res, addr_parts[i]);
 890
 891   return res;
 892 }
 893
 894 /* Given invariant DEF and its address USE, check if the corresponding
 895    invariant expr can be propagated into the use or not.  */
 896
 897 static bool
 898 inv_can_prop_to_addr_use (struct def *def, df_ref use)
 899 {
 900   struct invariant *inv;
 901   rtx *pos = DF_REF_REAL_LOC (use), def_set, use_set;
 902   rtx_insn *use_insn = DF_REF_INSN (use);
 903   rtx_insn *def_insn;
 904   bool ok;
 905
 906   inv = invariants[def->invno];
 907   /* No need to check if address expression is expensive.  */
 908   if (!inv->cheap_address)
 909     return false;
 910
 911   def_insn = inv->insn;
 912   def_set = single_set (def_insn);
 913   if (!def_set)
 914     return false;
 915
 916   validate_unshare_change (use_insn, pos, SET_SRC (def_set), true);
 917   ok = verify_changes (0);
 918   /* Try harder with canonicalization in address expression.  */
 919   if (!ok && (use_set = single_set (use_insn)) != NULL_RTX)
 920     {
 921       rtx src, dest, mem = NULL_RTX;
 922
 923       src = SET_SRC (use_set);
 924       dest = SET_DEST (use_set);
 925       if (MEM_P (src))
 926         mem = src;
 927       else if (MEM_P (dest))
 928         mem = dest;
 929
 930       if (mem != NULL_RTX
 931           && !memory_address_addr_space_p (GET_MODE (mem),
 932                                            XEXP (mem, 0),
 933                                            MEM_ADDR_SPACE (mem)))
 934         {
 935           rtx addr = canonicalize_address (copy_rtx (XEXP (mem, 0)));
 936           if (memory_address_addr_space_p (GET_MODE (mem),
 937                                            addr, MEM_ADDR_SPACE (mem)))
 938             ok = true;
 939         }
 940     }
 941   cancel_changes (0);
 942   return ok;
 943 }
 944
 945 /* Record USE at DEF.  */
 946
 947 static void
 948 record_use (struct def *def, df_ref use)
 949 {
 950   struct use *u = XNEW (struct use);
 951
 952   u->pos = DF_REF_REAL_LOC (use);
 953   u->insn = DF_REF_INSN (use);
 954   u->addr_use_p = (DF_REF_TYPE (use) == DF_REF_REG_MEM_LOAD
 955                    || DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE);
 956   u->next = def->uses;
 957   def->uses = u;
 958   def->n_uses++;
 959   if (u->addr_use_p)
 960     {
 961       /* Initialize propagation information if this is the first addr
 962          use of the inv def.  */
 963       if (def->n_addr_uses == 0)
 964         def->can_prop_to_addr_uses = true;
 965
 966       def->n_addr_uses++;
 967       if (def->can_prop_to_addr_uses && !inv_can_prop_to_addr_use (def, use))
 968         def->can_prop_to_addr_uses = false;
 969     }
 970 }
 971
 972 /* Finds the invariants USE depends on and store them to the DEPENDS_ON
 973    bitmap.  Returns true if all dependencies of USE are known to be
 974    loop invariants, false otherwise.  */
 975
 976 static bool
 977 check_dependency (basic_block bb, df_ref use, bitmap depends_on)
 978 {
 979   df_ref def;
 980   basic_block def_bb;
 981   struct df_link *defs;
 982   struct def *def_data;
 983   struct invariant *inv;
 984
 985   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
 986     return false;
 987
 988   defs = DF_REF_CHAIN (use);
 989   if (!defs)
 990     {
 991       unsigned int regno = DF_REF_REGNO (use);
 992
 993       /* If this is the use of an uninitialized argument register that is
 994          likely to be spilled, do not move it lest this might extend its
 995          lifetime and cause reload to die.  This can occur for a call to
 996          a function taking complex number arguments and moving the insns
 997          preparing the arguments without moving the call itself wouldn't
 998          gain much in practice.  */
 999       if ((DF_REF_FLAGS (use) & DF_HARD_REG_LIVE)
1000           && FUNCTION_ARG_REGNO_P (regno)
1001           && targetm.class_likely_spilled_p (REGNO_REG_CLASS (regno)))
1002         return false;
1003
1004       return true;
1005     }
1006
1007   if (defs->next)
1008     return false;
1009
1010   def = defs->ref;
1011   check_invariant_table_size ();
1012   inv = invariant_table[DF_REF_ID (def)];
1013   if (!inv)
1014     return false;
1015
1016   def_data = inv->def;
1017   gcc_assert (def_data != NULL);
1018
1019   def_bb = DF_REF_BB (def);
1020   /* Note that in case bb == def_bb, we know that the definition
1021      dominates insn, because def has invariant_table[DF_REF_ID(def)]
1022      defined and we process the insns in the basic block bb
1023      sequentially.  */
1024   if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb))
1025     return false;
1026
1027   bitmap_set_bit (depends_on, def_data->invno);
1028   return true;
1029 }
1030
1031
1032 /* Finds the invariants INSN depends on and store them to the DEPENDS_ON
1033    bitmap.  Returns true if all dependencies of INSN are known to be
1034    loop invariants, false otherwise.  */
1035
1036 static bool
1037 check_dependencies (rtx_insn *insn, bitmap depends_on)
1038 {
1039   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1040   df_ref use;
1041   basic_block bb = BLOCK_FOR_INSN (insn);
1042
1043   FOR_EACH_INSN_INFO_USE (use, insn_info)
1044     if (!check_dependency (bb, use, depends_on))
1045       return false;
1046   FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
1047     if (!check_dependency (bb, use, depends_on))
1048       return false;
1049
1050   return true;
1051 }
1052
1053 /* Pre-check candidate DEST to skip the one which cannot make a valid insn
1054    during move_invariant_reg.  SIMPLE is to skip HARD_REGISTER.  */
1055 static bool
1056 pre_check_invariant_p (bool simple, rtx dest)
1057 {
1058   if (simple && REG_P (dest) && DF_REG_DEF_COUNT (REGNO (dest)) > 1)
1059     {
1060       df_ref use;
1061       unsigned int i = REGNO (dest);
1062       struct df_insn_info *insn_info;
1063       df_ref def_rec;
1064
1065       for (use = DF_REG_USE_CHAIN (i); use; use = DF_REF_NEXT_REG (use))
1066         {
1067           rtx_insn *ref = DF_REF_INSN (use);
1068           insn_info = DF_INSN_INFO_GET (ref);
1069
1070           FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
1071             if (DF_REF_REGNO (def_rec) == i)
1072               {
1073                 /* Multi definitions at this stage, most likely are due to
1074                    instruction constraints, which requires both read and write
1075                    on the same register.  Since move_invariant_reg is not
1076                    powerful enough to handle such cases, just ignore the INV
1077                    and leave the chance to others.  */
1078                 return false;
1079               }
1080         }
1081     }
1082   return true;
1083 }
1084
1085 /* Finds invariant in INSN.  ALWAYS_REACHED is true if the insn is always
1086    executed.  ALWAYS_EXECUTED is true if the insn is always executed,
1087    unless the program ends due to a function call.  */
1088
1089 static void
1090 find_invariant_insn (rtx_insn *insn, bool always_reached, bool always_executed)
1091 {
1092   df_ref ref;
1093   struct def *def;
1094   bitmap depends_on;
1095   rtx set, dest;
1096   bool simple = true;
1097   struct invariant *inv;
1098
1099   /* We can't move a CC0 setter without the user.  */
1100   if (HAVE_cc0 && sets_cc0_p (insn))
1101     return;
1102
1103   set = single_set (insn);
1104   if (!set)
1105     return;
1106   dest = SET_DEST (set);
1107
1108   if (!REG_P (dest)
1109       || HARD_REGISTER_P (dest))
1110     simple = false;
1111
1112   if (!may_assign_reg_p (dest)
1113       || !pre_check_invariant_p (simple, dest)
1114       || !check_maybe_invariant (SET_SRC (set)))
1115     return;
1116
1117   /* If the insn can throw exception, we cannot move it at all without changing
1118      cfg.  */
1119   if (can_throw_internal (insn))
1120     return;
1121
1122   /* We cannot make trapping insn executed, unless it was executed before.  */
1123   if (may_trap_or_fault_p (PATTERN (insn)) && !always_reached)
1124     return;
1125
1126   depends_on = BITMAP_ALLOC (NULL);
1127   if (!check_dependencies (insn, depends_on))
1128     {
1129       BITMAP_FREE (depends_on);
1130       return;
1131     }
1132
1133   if (simple)
1134     def = XCNEW (struct def);
1135   else
1136     def = NULL;
1137
1138   inv = create_new_invariant (def, insn, depends_on, always_executed);
1139
1140   if (simple)
1141     {
1142       ref = df_find_def (insn, dest);
1143       check_invariant_table_size ();
1144       invariant_table[DF_REF_ID (ref)] = inv;
1145     }
1146 }
1147
1148 /* Record registers used in INSN that have a unique invariant definition.  */
1149
1150 static void
1151 record_uses (rtx_insn *insn)
1152 {
1153   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1154   df_ref use;
1155   struct invariant *inv;
1156
1157   FOR_EACH_INSN_INFO_USE (use, insn_info)
1158     {
1159       inv = invariant_for_use (use);
1160       if (inv)
1161         record_use (inv->def, use);
1162     }
1163   FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
1164     {
1165       inv = invariant_for_use (use);
1166       if (inv)
1167         record_use (inv->def, use);
1168     }
1169 }
1170
1171 /* Finds invariants in INSN.  ALWAYS_REACHED is true if the insn is always
1172    executed.  ALWAYS_EXECUTED is true if the insn is always executed,
1173    unless the program ends due to a function call.  */
1174
1175 static void
1176 find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed)
1177 {
1178   find_invariant_insn (insn, always_reached, always_executed);
1179   record_uses (insn);
1180 }
1181
1182 /* Finds invariants in basic block BB.  ALWAYS_REACHED is true if the
1183    basic block is always executed.  ALWAYS_EXECUTED is true if the basic
1184    block is always executed, unless the program ends due to a function
1185    call.  */
1186
1187 static void
1188 find_invariants_bb (basic_block bb, bool always_reached, bool always_executed)
1189 {
1190   rtx_insn *insn;
1191
1192   FOR_BB_INSNS (bb, insn)
1193     {
1194       if (!NONDEBUG_INSN_P (insn))
1195         continue;
1196
1197       find_invariants_insn (insn, always_reached, always_executed);
1198
1199       if (always_reached
1200           && CALL_P (insn)
1201           && (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
1202               || ! RTL_CONST_OR_PURE_CALL_P (insn)))
1203         always_reached = false;
1204     }
1205 }
1206
1207 /* Finds invariants in LOOP with body BODY.  ALWAYS_REACHED is the bitmap of
1208    basic blocks in BODY that are always executed.  ALWAYS_EXECUTED is the
1209    bitmap of basic blocks in BODY that are always executed unless the program
1210    ends due to a function call.  */
1211
1212 static void
1213 find_invariants_body (class loop *loop, basic_block *body,
1214                       bitmap always_reached, bitmap always_executed)
1215 {
1216   unsigned i;
1217
1218   for (i = 0; i < loop->num_nodes; i++)
1219     find_invariants_bb (body[i],
1220                         bitmap_bit_p (always_reached, i),
1221                         bitmap_bit_p (always_executed, i));
1222 }
1223
1224 /* Finds invariants in LOOP.  */
1225
1226 static void
1227 find_invariants (class loop *loop)
1228 {
1229   auto_bitmap may_exit;
1230   auto_bitmap always_reached;
1231   auto_bitmap has_exit;
1232   auto_bitmap always_executed;
1233   basic_block *body = get_loop_body_in_dom_order (loop);
1234
1235   find_exits (loop, body, may_exit, has_exit);
1236   compute_always_reached (loop, body, may_exit, always_reached);
1237   compute_always_reached (loop, body, has_exit, always_executed);
1238
1239   find_defs (loop);
1240   find_invariants_body (loop, body, always_reached, always_executed);
1241   merge_identical_invariants ();
1242
1243   free (body);
1244 }
1245
1246 /* Frees a list of uses USE.  */
1247
1248 static void
1249 free_use_list (struct use *use)
1250 {
1251   struct use *next;
1252
1253   for (; use; use = next)
1254     {
1255       next = use->next;
1256       free (use);
1257     }
1258 }
1259
1260 /* Return pressure class and number of hard registers (through *NREGS)
1261    for destination of INSN. */
1262 static enum reg_class
1263 get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
1264 {
1265   rtx reg;
1266   enum reg_class pressure_class;
1267   rtx set = single_set (insn);
1268
1269   /* Considered invariant insns have only one set.  */
1270   gcc_assert (set != NULL_RTX);
1271   reg = SET_DEST (set);
1272   if (GET_CODE (reg) == SUBREG)
1273     reg = SUBREG_REG (reg);
1274   if (MEM_P (reg))
1275     {
1276       *nregs = 0;
1277       pressure_class = NO_REGS;
1278     }
1279   else
1280     {
1281       if (! REG_P (reg))
1282         reg = NULL_RTX;
1283       if (reg == NULL_RTX)
1284         pressure_class = GENERAL_REGS;
1285       else
1286         {
1287           pressure_class = reg_allocno_class (REGNO (reg));
1288           pressure_class = ira_pressure_class_translate[pressure_class];
1289         }
1290       *nregs
1291         = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
1292     }
1293   return pressure_class;
1294 }
1295
1296 /* Calculates cost and number of registers needed for moving invariant INV
1297    out of the loop and stores them to *COST and *REGS_NEEDED.  *CL will be
1298    the REG_CLASS of INV.  Return
1299      -1: if INV is invalid.
1300       0: if INV and its depends_on have same reg_class
1301       1: if INV and its depends_on have different reg_classes.  */
1302
1303 static int
1304 get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed,
1305               enum reg_class *cl)
1306 {
1307   int i, acomp_cost;
1308   unsigned aregs_needed[N_REG_CLASSES];
1309   unsigned depno;
1310   struct invariant *dep;
1311   bitmap_iterator bi;
1312   int ret = 1;
1313
1314   /* Find the representative of the class of the equivalent invariants.  */
1315   inv = invariants[inv->eqto];
1316
1317   *comp_cost = 0;
1318   if (! flag_ira_loop_pressure)
1319     regs_needed[0] = 0;
1320   else
1321     {
1322       for (i = 0; i < ira_pressure_classes_num; i++)
1323         regs_needed[ira_pressure_classes[i]] = 0;
1324     }
1325
1326   if (inv->move
1327       || inv->stamp == actual_stamp)
1328     return -1;
1329   inv->stamp = actual_stamp;
1330
1331   if (! flag_ira_loop_pressure)
1332     regs_needed[0]++;
1333   else
1334     {
1335       int nregs;
1336       enum reg_class pressure_class;
1337
1338       pressure_class = get_pressure_class_and_nregs (inv->insn, &nregs);
1339       regs_needed[pressure_class] += nregs;
1340       *cl = pressure_class;
1341       ret = 0;
1342     }
1343
1344   if (!inv->cheap_address
1345       || inv->def->n_uses == 0
1346       || inv->def->n_addr_uses < inv->def->n_uses
1347       /* Count cost if the inv can't be propagated into address uses.  */
1348       || !inv->def->can_prop_to_addr_uses)
1349     (*comp_cost) += inv->cost * inv->eqno;
1350
1351 #ifdef STACK_REGS
1352   {
1353     /* Hoisting constant pool constants into stack regs may cost more than
1354        just single register.  On x87, the balance is affected both by the
1355        small number of FP registers, and by its register stack organization,
1356        that forces us to add compensation code in and around the loop to
1357        shuffle the operands to the top of stack before use, and pop them
1358        from the stack after the loop finishes.
1359
1360        To model this effect, we increase the number of registers needed for
1361        stack registers by two: one register push, and one register pop.
1362        This usually has the effect that FP constant loads from the constant
1363        pool are not moved out of the loop.
1364
1365        Note that this also means that dependent invariants cannot be moved.
1366        However, the primary purpose of this pass is to move loop invariant
1367        address arithmetic out of loops, and address arithmetic that depends
1368        on floating point constants is unlikely to ever occur.  */
1369     rtx set = single_set (inv->insn);
1370     if (set
1371         && IS_STACK_MODE (GET_MODE (SET_SRC (set)))
1372         && constant_pool_constant_p (SET_SRC (set)))
1373       {
1374         if (flag_ira_loop_pressure)
1375           regs_needed[ira_stack_reg_pressure_class] += 2;
1376         else
1377           regs_needed[0] += 2;
1378       }
1379   }
1380 #endif
1381
1382   EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, bi)
1383     {
1384       bool check_p;
1385       enum reg_class dep_cl = ALL_REGS;
1386       int dep_ret;
1387
1388       dep = invariants[depno];
1389
1390       /* If DEP is moved out of the loop, it is not a depends_on any more.  */
1391       if (dep->move)
1392         continue;
1393
1394       dep_ret = get_inv_cost (dep, &acomp_cost, aregs_needed, &dep_cl);
1395
1396       if (! flag_ira_loop_pressure)
1397         check_p = aregs_needed[0] != 0;
1398       else
1399         {
1400           for (i = 0; i < ira_pressure_classes_num; i++)
1401             if (aregs_needed[ira_pressure_classes[i]] != 0)
1402               break;
1403           check_p = i < ira_pressure_classes_num;
1404
1405           if ((dep_ret == 1) || ((dep_ret == 0) && (*cl != dep_cl)))
1406             {
1407               *cl = ALL_REGS;
1408               ret = 1;
1409             }
1410         }
1411       if (check_p
1412           /* We need to check always_executed, since if the original value of
1413              the invariant may be preserved, we may need to keep it in a
1414              separate register.  TODO check whether the register has an
1415              use outside of the loop.  */
1416           && dep->always_executed
1417           && !dep->def->uses->next)
1418         {
1419           /* If this is a single use, after moving the dependency we will not
1420              need a new register.  */
1421           if (! flag_ira_loop_pressure)
1422             aregs_needed[0]--;
1423           else
1424             {
1425               int nregs;
1426               enum reg_class pressure_class;
1427
1428               pressure_class = get_pressure_class_and_nregs (inv->insn, &nregs);
1429               aregs_needed[pressure_class] -= nregs;
1430             }
1431         }
1432
1433       if (! flag_ira_loop_pressure)
1434         regs_needed[0] += aregs_needed[0];
1435       else
1436         {
1437           for (i = 0; i < ira_pressure_classes_num; i++)
1438             regs_needed[ira_pressure_classes[i]]
1439               += aregs_needed[ira_pressure_classes[i]];
1440         }
1441       (*comp_cost) += acomp_cost;
1442     }
1443   return ret;
1444 }
1445
1446 /* Calculates gain for eliminating invariant INV.  REGS_USED is the number
1447    of registers used in the loop, NEW_REGS is the number of new variables
1448    already added due to the invariant motion.  The number of registers needed
1449    for it is stored in *REGS_NEEDED.  SPEED and CALL_P are flags passed
1450    through to estimate_reg_pressure_cost. */
1451
1452 static int
1453 gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
1454                     unsigned *new_regs, unsigned regs_used,
1455                     bool speed, bool call_p)
1456 {
1457   int comp_cost, size_cost;
1458   /* Workaround -Wmaybe-uninitialized false positive during
1459      profiledbootstrap by initializing it.  */
1460   enum reg_class cl = NO_REGS;
1461   int ret;
1462
1463   actual_stamp++;
1464
1465   ret = get_inv_cost (inv, &comp_cost, regs_needed, &cl);
1466
1467   if (! flag_ira_loop_pressure)
1468     {
1469       size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0],
1470                                                regs_used, speed, call_p)
1471                    - estimate_reg_pressure_cost (new_regs[0],
1472                                                  regs_used, speed, call_p));
1473     }
1474   else if (ret < 0)
1475     return -1;
1476   else if ((ret == 0) && (cl == NO_REGS))
1477     /* Hoist it anyway since it does not impact register pressure.  */
1478     return 1;
1479   else
1480     {
1481       int i;
1482       enum reg_class pressure_class;
1483
1484       for (i = 0; i < ira_pressure_classes_num; i++)
1485         {
1486           pressure_class = ira_pressure_classes[i];
1487
1488           if (!reg_classes_intersect_p (pressure_class, cl))
1489             continue;
1490
1491           if ((int) new_regs[pressure_class]
1492               + (int) regs_needed[pressure_class]
1493               + LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
1494               + IRA_LOOP_RESERVED_REGS
1495               > ira_class_hard_regs_num[pressure_class])
1496             break;
1497         }
1498       if (i < ira_pressure_classes_num)
1499         /* There will be register pressure excess and we want not to
1500            make this loop invariant motion.  All loop invariants with
1501            non-positive gains will be rejected in function
1502            find_invariants_to_move.  Therefore we return the negative
1503            number here.
1504
1505            One could think that this rejects also expensive loop
1506            invariant motions and this will hurt code performance.
1507            However numerous experiments with different heuristics
1508            taking invariant cost into account did not confirm this
1509            assumption.  There are possible explanations for this
1510            result:
1511            o probably all expensive invariants were already moved out
1512              of the loop by PRE and gimple invariant motion pass.
1513            o expensive invariant execution will be hidden by insn
1514              scheduling or OOO processor hardware because usually such
1515              invariants have a lot of freedom to be executed
1516              out-of-order.
1517            Another reason for ignoring invariant cost vs spilling cost
1518            heuristics is also in difficulties to evaluate accurately
1519            spill cost at this stage.  */
1520         return -1;
1521       else
1522         size_cost = 0;
1523     }
1524
1525   return comp_cost - size_cost;
1526 }
1527
1528 /* Finds invariant with best gain for moving.  Returns the gain, stores
1529    the invariant in *BEST and number of registers needed for it to
1530    *REGS_NEEDED.  REGS_USED is the number of registers used in the loop.
1531    NEW_REGS is the number of new variables already added due to invariant
1532    motion.  */
1533
1534 static int
1535 best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
1536                          unsigned *new_regs, unsigned regs_used,
1537                          bool speed, bool call_p)
1538 {
1539   struct invariant *inv;
1540   int i, gain = 0, again;
1541   unsigned aregs_needed[N_REG_CLASSES], invno;
1542
1543   FOR_EACH_VEC_ELT (invariants, invno, inv)
1544     {
1545       if (inv->move)
1546         continue;
1547
1548       /* Only consider the "representatives" of equivalent invariants.  */
1549       if (inv->eqto != inv->invno)
1550         continue;
1551
1552       again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used,
1553                                   speed, call_p);
1554       if (again > gain)
1555         {
1556           gain = again;
1557           *best = inv;
1558           if (! flag_ira_loop_pressure)
1559             regs_needed[0] = aregs_needed[0];
1560           else
1561             {
1562               for (i = 0; i < ira_pressure_classes_num; i++)
1563                 regs_needed[ira_pressure_classes[i]]
1564                   = aregs_needed[ira_pressure_classes[i]];
1565             }
1566         }
1567     }
1568
1569   return gain;
1570 }
1571
1572 /* Marks invariant INVNO and all its dependencies for moving.  */
1573
1574 static void
1575 set_move_mark (unsigned invno, int gain)
1576 {
1577   struct invariant *inv = invariants[invno];
1578   bitmap_iterator bi;
1579
1580   /* Find the representative of the class of the equivalent invariants.  */
1581   inv = invariants[inv->eqto];
1582
1583   if (inv->move)
1584     return;
1585   inv->move = true;
1586
1587   if (dump_file)
1588     {
1589       if (gain >= 0)
1590         fprintf (dump_file, "Decided to move invariant %d -- gain %d\n",
1591                  invno, gain);
1592       else
1593         fprintf (dump_file, "Decided to move dependent invariant %d\n",
1594                  invno);
1595     };
1596
1597   EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, invno, bi)
1598     {
1599       set_move_mark (invno, -1);
1600     }
1601 }
1602
1603 /* Determines which invariants to move.  */
1604
1605 static void
1606 find_invariants_to_move (bool speed, bool call_p)
1607 {
1608   int gain;
1609   unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES];
1610   struct invariant *inv = NULL;
1611
1612   if (!invariants.length ())
1613     return;
1614
1615   if (flag_ira_loop_pressure)
1616     /* REGS_USED is actually never used when the flag is on.  */
1617     regs_used = 0;
1618   else
1619     /* We do not really do a good job in estimating number of
1620        registers used; we put some initial bound here to stand for
1621        induction variables etc.  that we do not detect.  */
1622     {
1623       unsigned int n_regs = DF_REG_SIZE (df);
1624
1625       regs_used = 2;
1626
1627       for (i = 0; i < n_regs; i++)
1628         {
1629           if (!DF_REGNO_FIRST_DEF (i) && DF_REGNO_LAST_USE (i))
1630             {
1631               /* This is a value that is used but not changed inside loop.  */
1632               regs_used++;
1633             }
1634         }
1635     }
1636
1637   if (! flag_ira_loop_pressure)
1638     new_regs[0] = regs_needed[0] = 0;
1639   else
1640     {
1641       for (i = 0; (int) i < ira_pressure_classes_num; i++)
1642         new_regs[ira_pressure_classes[i]] = 0;
1643     }
1644   while ((gain = best_gain_for_invariant (&inv, regs_needed,
1645                                           new_regs, regs_used,
1646                                           speed, call_p)) > 0)
1647     {
1648       set_move_mark (inv->invno, gain);
1649       if (! flag_ira_loop_pressure)
1650         new_regs[0] += regs_needed[0];
1651       else
1652         {
1653           for (i = 0; (int) i < ira_pressure_classes_num; i++)
1654             new_regs[ira_pressure_classes[i]]
1655               += regs_needed[ira_pressure_classes[i]];
1656         }
1657     }
1658 }
1659
1660 /* Replace the uses, reached by the definition of invariant INV, by REG.
1661
1662    IN_GROUP is nonzero if this is part of a group of changes that must be
1663    performed as a group.  In that case, the changes will be stored.  The
1664    function `apply_change_group' will validate and apply the changes.  */
1665
1666 static int
1667 replace_uses (struct invariant *inv, rtx reg, bool in_group)
1668 {
1669   /* Replace the uses we know to be dominated.  It saves work for copy
1670      propagation, and also it is necessary so that dependent invariants
1671      are computed right.  */
1672   if (inv->def)
1673     {
1674       struct use *use;
1675       for (use = inv->def->uses; use; use = use->next)
1676         validate_change (use->insn, use->pos, reg, true);
1677
1678       /* If we aren't part of a larger group, apply the changes now.  */
1679       if (!in_group)
1680         return apply_change_group ();
1681     }
1682
1683   return 1;
1684 }
1685
1686 /* Whether invariant INV setting REG can be moved out of LOOP, at the end of
1687    the block preceding its header.  */
1688
1689 static bool
1690 can_move_invariant_reg (class loop *loop, struct invariant *inv, rtx reg)
1691 {
1692   df_ref def, use;
1693   unsigned int dest_regno, defs_in_loop_count = 0;
1694   rtx_insn *insn = inv->insn;
1695   basic_block bb = BLOCK_FOR_INSN (inv->insn);
1696
1697   /* We ignore hard register and memory access for cost and complexity reasons.
1698      Hard register are few at this stage and expensive to consider as they
1699      require building a separate data flow.  Memory access would require using
1700      df_simulate_* and can_move_insns_across functions and is more complex.  */
1701   if (!REG_P (reg) || HARD_REGISTER_P (reg))
1702     return false;
1703
1704   /* Check whether the set is always executed.  We could omit this condition if
1705      we know that the register is unused outside of the loop, but it does not
1706      seem worth finding out.  */
1707   if (!inv->always_executed)
1708     return false;
1709
1710   /* Check that all uses that would be dominated by def are already dominated
1711      by it.  */
1712   dest_regno = REGNO (reg);
1713   for (use = DF_REG_USE_CHAIN (dest_regno); use; use = DF_REF_NEXT_REG (use))
1714     {
1715       rtx_insn *use_insn;
1716       basic_block use_bb;
1717
1718       use_insn = DF_REF_INSN (use);
1719       use_bb = BLOCK_FOR_INSN (use_insn);
1720
1721       /* Ignore instruction considered for moving.  */
1722       if (use_insn == insn)
1723         continue;
1724
1725       /* Don't consider uses outside loop.  */
1726       if (!flow_bb_inside_loop_p (loop, use_bb))
1727         continue;
1728
1729       /* Don't move if a use is not dominated by def in insn.  */
1730       if (use_bb == bb && DF_INSN_LUID (insn) >= DF_INSN_LUID (use_insn))
1731         return false;
1732       if (!dominated_by_p (CDI_DOMINATORS, use_bb, bb))
1733         return false;
1734     }
1735
1736   /* Check for other defs.  Any other def in the loop might reach a use
1737      currently reached by the def in insn.  */
1738   for (def = DF_REG_DEF_CHAIN (dest_regno); def; def = DF_REF_NEXT_REG (def))
1739     {
1740       basic_block def_bb = DF_REF_BB (def);
1741
1742       /* Defs in exit block cannot reach a use they weren't already.  */
1743       if (single_succ_p (def_bb))
1744         {
1745           basic_block def_bb_succ;
1746
1747           def_bb_succ = single_succ (def_bb);
1748           if (!flow_bb_inside_loop_p (loop, def_bb_succ))
1749             continue;
1750         }
1751
1752       if (++defs_in_loop_count > 1)
1753         return false;
1754     }
1755
1756   return true;
1757 }
1758
1759 /* Move invariant INVNO out of the LOOP.  Returns true if this succeeds, false
1760    otherwise.  */
1761
1762 static bool
1763 move_invariant_reg (class loop *loop, unsigned invno)
1764 {
1765   struct invariant *inv = invariants[invno];
1766   struct invariant *repr = invariants[inv->eqto];
1767   unsigned i;
1768   basic_block preheader = loop_preheader_edge (loop)->src;
1769   rtx reg, set, dest, note;
1770   bitmap_iterator bi;
1771   int regno = -1;
1772
1773   if (inv->reg)
1774     return true;
1775   if (!repr->move)
1776     return false;
1777
1778   /* If this is a representative of the class of equivalent invariants,
1779      really move the invariant.  Otherwise just replace its use with
1780      the register used for the representative.  */
1781   if (inv == repr)
1782     {
1783       if (inv->depends_on)
1784         {
1785           EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, i, bi)
1786             {
1787               if (!move_invariant_reg (loop, i))
1788                 goto fail;
1789             }
1790         }
1791
1792       /* If possible, just move the set out of the loop.  Otherwise, we
1793          need to create a temporary register.  */
1794       set = single_set (inv->insn);
1795       reg = dest = SET_DEST (set);
1796       if (GET_CODE (reg) == SUBREG)
1797         reg = SUBREG_REG (reg);
1798       if (REG_P (reg))
1799         regno = REGNO (reg);
1800
1801       if (!can_move_invariant_reg (loop, inv, dest))
1802         {
1803           reg = gen_reg_rtx_and_attrs (dest);
1804
1805           /* Try replacing the destination by a new pseudoregister.  */
1806           validate_change (inv->insn, &SET_DEST (set), reg, true);
1807
1808           /* As well as all the dominated uses.  */
1809           replace_uses (inv, reg, true);
1810
1811           /* And validate all the changes.  */
1812           if (!apply_change_group ())
1813             goto fail;
1814
1815           emit_insn_after (gen_move_insn (dest, reg), inv->insn);
1816         }
1817       else if (dump_file)
1818         fprintf (dump_file, "Invariant %d moved without introducing a new "
1819                             "temporary register\n", invno);
1820       reorder_insns (inv->insn, inv->insn, BB_END (preheader));
1821       df_recompute_luids (preheader);
1822
1823       /* If there is a REG_EQUAL note on the insn we just moved, and the
1824          insn is in a basic block that is not always executed or the note
1825          contains something for which we don't know the invariant status,
1826          the note may no longer be valid after we move the insn.  Note that
1827          uses in REG_EQUAL notes are taken into account in the computation
1828          of invariants, so it is safe to retain the note even if it contains
1829          register references for which we know the invariant status.  */
1830       if ((note = find_reg_note (inv->insn, REG_EQUAL, NULL_RTX))
1831           && (!inv->always_executed
1832               || !check_maybe_invariant (XEXP (note, 0))))
1833         remove_note (inv->insn, note);
1834     }
1835   else
1836     {
1837       if (!move_invariant_reg (loop, repr->invno))
1838         goto fail;
1839       reg = repr->reg;
1840       regno = repr->orig_regno;
1841       if (!replace_uses (inv, reg, false))
1842         goto fail;
1843       set = single_set (inv->insn);
1844       emit_insn_after (gen_move_insn (SET_DEST (set), reg), inv->insn);
1845       delete_insn (inv->insn);
1846     }
1847
1848   inv->reg = reg;
1849   inv->orig_regno = regno;
1850
1851   return true;
1852
1853 fail:
1854   /* If we failed, clear move flag, so that we do not try to move inv
1855      again.  */
1856   if (dump_file)
1857     fprintf (dump_file, "Failed to move invariant %d\n", invno);
1858   inv->move = false;
1859   inv->reg = NULL_RTX;
1860   inv->orig_regno = -1;
1861
1862   return false;
1863 }
1864
1865 /* Move selected invariant out of the LOOP.  Newly created regs are marked
1866    in TEMPORARY_REGS.  */
1867
1868 static void
1869 move_invariants (class loop *loop)
1870 {
1871   struct invariant *inv;
1872   unsigned i;
1873
1874   FOR_EACH_VEC_ELT (invariants, i, inv)
1875     move_invariant_reg (loop, i);
1876   if (flag_ira_loop_pressure && resize_reg_info ())
1877     {
1878       FOR_EACH_VEC_ELT (invariants, i, inv)
1879         if (inv->reg != NULL_RTX)
1880           {
1881             if (inv->orig_regno >= 0)
1882               setup_reg_classes (REGNO (inv->reg),
1883                                  reg_preferred_class (inv->orig_regno),
1884                                  reg_alternate_class (inv->orig_regno),
1885                                  reg_allocno_class (inv->orig_regno));
1886             else
1887               setup_reg_classes (REGNO (inv->reg),
1888                                  GENERAL_REGS, NO_REGS, GENERAL_REGS);
1889           }
1890     }
1891   /* Remove the DF_UD_CHAIN problem added in find_defs before rescanning,
1892      to save a bit of compile time.  */
1893   df_remove_problem (df_chain);
1894   df_process_deferred_rescans ();
1895 }
1896
1897 /* Initializes invariant motion data.  */
1898
1899 static void
1900 init_inv_motion_data (void)
1901 {
1902   actual_stamp = 1;
1903
1904   invariants.create (100);
1905 }
1906
1907 /* Frees the data allocated by invariant motion.  */
1908
1909 static void
1910 free_inv_motion_data (void)
1911 {
1912   unsigned i;
1913   struct def *def;
1914   struct invariant *inv;
1915
1916   check_invariant_table_size ();
1917   for (i = 0; i < DF_DEFS_TABLE_SIZE (); i++)
1918     {
1919       inv = invariant_table[i];
1920       if (inv)
1921         {
1922           def = inv->def;
1923           gcc_assert (def != NULL);
1924
1925           free_use_list (def->uses);
1926           free (def);
1927           invariant_table[i] = NULL;
1928         }
1929     }
1930
1931   FOR_EACH_VEC_ELT (invariants, i, inv)
1932     {
1933       BITMAP_FREE (inv->depends_on);
1934       free (inv);
1935     }
1936   invariants.release ();
1937 }
1938
1939 /* Move the invariants out of the LOOP.  */
1940
1941 static void
1942 move_single_loop_invariants (class loop *loop)
1943 {
1944   init_inv_motion_data ();
1945
1946   find_invariants (loop);
1947   find_invariants_to_move (optimize_loop_for_speed_p (loop),
1948                            LOOP_DATA (loop)->has_call);
1949   move_invariants (loop);
1950
1951   free_inv_motion_data ();
1952 }
1953
1954 /* Releases the auxiliary data for LOOP.  */
1955
1956 static void
1957 free_loop_data (class loop *loop)
1958 {
1959   class loop_data *data = LOOP_DATA (loop);
1960   if (!data)
1961     return;
1962
1963   bitmap_clear (&LOOP_DATA (loop)->regs_ref);
1964   bitmap_clear (&LOOP_DATA (loop)->regs_live);
1965   free (data);
1966   loop->aux = NULL;
1967 }
1968
1969 \f
1970
1971 /* Registers currently living.  */
1972 static bitmap_head curr_regs_live;
1973
1974 /* Current reg pressure for each pressure class.  */
1975 static int curr_reg_pressure[N_REG_CLASSES];
1976
1977 /* Record all regs that are set in any one insn.  Communication from
1978    mark_reg_{store,clobber} and global_conflicts.  Asm can refer to
1979    all hard-registers.  */
1980 static rtx regs_set[(FIRST_PSEUDO_REGISTER > MAX_RECOG_OPERANDS
1981                      ? FIRST_PSEUDO_REGISTER : MAX_RECOG_OPERANDS) * 2];
1982 /* Number of regs stored in the previous array.  */
1983 static int n_regs_set;
1984
1985 /* Return pressure class and number of needed hard registers (through
1986    *NREGS) of register REGNO.  */
1987 static enum reg_class
1988 get_regno_pressure_class (int regno, int *nregs)
1989 {
1990   if (regno >= FIRST_PSEUDO_REGISTER)
1991     {
1992       enum reg_class pressure_class;
1993
1994       pressure_class = reg_allocno_class (regno);
1995       pressure_class = ira_pressure_class_translate[pressure_class];
1996       *nregs
1997         = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
1998       return pressure_class;
1999     }
2000   else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
2001            && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
2002     {
2003       *nregs = 1;
2004       return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
2005     }
2006   else
2007     {
2008       *nregs = 0;
2009       return NO_REGS;
2010     }
2011 }
2012
2013 /* Increase (if INCR_P) or decrease current register pressure for
2014    register REGNO.  */
2015 static void
2016 change_pressure (int regno, bool incr_p)
2017 {
2018   int nregs;
2019   enum reg_class pressure_class;
2020
2021   pressure_class = get_regno_pressure_class (regno, &nregs);
2022   if (! incr_p)
2023     curr_reg_pressure[pressure_class] -= nregs;
2024   else
2025     {
2026       curr_reg_pressure[pressure_class] += nregs;
2027       if (LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
2028           < curr_reg_pressure[pressure_class])
2029         LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
2030           = curr_reg_pressure[pressure_class];
2031     }
2032 }
2033
2034 /* Mark REGNO birth.  */
2035 static void
2036 mark_regno_live (int regno)
2037 {
2038   class loop *loop;
2039
2040   for (loop = curr_loop;
2041        loop != current_loops->tree_root;
2042        loop = loop_outer (loop))
2043     bitmap_set_bit (&LOOP_DATA (loop)->regs_live, regno);
2044   if (!bitmap_set_bit (&curr_regs_live, regno))
2045     return;
2046   change_pressure (regno, true);
2047 }
2048
2049 /* Mark REGNO death.  */
2050 static void
2051 mark_regno_death (int regno)
2052 {
2053   if (! bitmap_clear_bit (&curr_regs_live, regno))
2054     return;
2055   change_pressure (regno, false);
2056 }
2057
2058 /* Mark setting register REG.  */
2059 static void
2060 mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED,
2061                 void *data ATTRIBUTE_UNUSED)
2062 {
2063   if (GET_CODE (reg) == SUBREG)
2064     reg = SUBREG_REG (reg);
2065
2066   if (! REG_P (reg))
2067     return;
2068
2069   regs_set[n_regs_set++] = reg;
2070
2071   unsigned int end_regno = END_REGNO (reg);
2072   for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
2073     mark_regno_live (regno);
2074 }
2075
2076 /* Mark clobbering register REG.  */
2077 static void
2078 mark_reg_clobber (rtx reg, const_rtx setter, void *data)
2079 {
2080   if (GET_CODE (setter) == CLOBBER)
2081     mark_reg_store (reg, setter, data);
2082 }
2083
2084 /* Mark register REG death.  */
2085 static void
2086 mark_reg_death (rtx reg)
2087 {
2088   unsigned int end_regno = END_REGNO (reg);
2089   for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
2090     mark_regno_death (regno);
2091 }
2092
2093 /* Mark occurrence of registers in X for the current loop.  */
2094 static void
2095 mark_ref_regs (rtx x)
2096 {
2097   RTX_CODE code;
2098   int i;
2099   const char *fmt;
2100
2101   if (!x)
2102     return;
2103
2104   code = GET_CODE (x);
2105   if (code == REG)
2106     {
2107       class loop *loop;
2108
2109       for (loop = curr_loop;
2110            loop != current_loops->tree_root;
2111            loop = loop_outer (loop))
2112         bitmap_set_bit (&LOOP_DATA (loop)->regs_ref, REGNO (x));
2113       return;
2114     }
2115
2116   fmt = GET_RTX_FORMAT (code);
2117   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2118     if (fmt[i] == 'e')
2119       mark_ref_regs (XEXP (x, i));
2120     else if (fmt[i] == 'E')
2121       {
2122         int j;
2123
2124         for (j = 0; j < XVECLEN (x, i); j++)
2125           mark_ref_regs (XVECEXP (x, i, j));
2126       }
2127 }
2128
2129 /* Calculate register pressure in the loops.  */
2130 static void
2131 calculate_loop_reg_pressure (void)
2132 {
2133   int i;
2134   unsigned int j;
2135   bitmap_iterator bi;
2136   basic_block bb;
2137   rtx_insn *insn;
2138   rtx link;
2139   class loop *loop, *parent;
2140
2141   FOR_EACH_LOOP (loop, 0)
2142     if (loop->aux == NULL)
2143       {
2144         loop->aux = xcalloc (1, sizeof (class loop_data));
2145         bitmap_initialize (&LOOP_DATA (loop)->regs_ref, &reg_obstack);
2146         bitmap_initialize (&LOOP_DATA (loop)->regs_live, &reg_obstack);
2147       }
2148   ira_setup_eliminable_regset ();
2149   bitmap_initialize (&curr_regs_live, &reg_obstack);
2150   FOR_EACH_BB_FN (bb, cfun)
2151     {
2152       curr_loop = bb->loop_father;
2153       if (curr_loop == current_loops->tree_root)
2154         continue;
2155
2156       for (loop = curr_loop;
2157            loop != current_loops->tree_root;
2158            loop = loop_outer (loop))
2159         bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb));
2160
2161       bitmap_copy (&curr_regs_live, DF_LR_IN (bb));
2162       for (i = 0; i < ira_pressure_classes_num; i++)
2163         curr_reg_pressure[ira_pressure_classes[i]] = 0;
2164       EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi)
2165         change_pressure (j, true);
2166
2167       FOR_BB_INSNS (bb, insn)
2168         {
2169           if (! NONDEBUG_INSN_P (insn))
2170             continue;
2171
2172           mark_ref_regs (PATTERN (insn));
2173           n_regs_set = 0;
2174           note_stores (PATTERN (insn), mark_reg_clobber, NULL);
2175
2176           /* Mark any registers dead after INSN as dead now.  */
2177
2178           for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
2179             if (REG_NOTE_KIND (link) == REG_DEAD)
2180               mark_reg_death (XEXP (link, 0));
2181
2182           /* Mark any registers set in INSN as live,
2183              and mark them as conflicting with all other live regs.
2184              Clobbers are processed again, so they conflict with
2185              the registers that are set.  */
2186
2187           note_stores (PATTERN (insn), mark_reg_store, NULL);
2188
2189           if (AUTO_INC_DEC)
2190             for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
2191               if (REG_NOTE_KIND (link) == REG_INC)
2192                 mark_reg_store (XEXP (link, 0), NULL_RTX, NULL);
2193
2194           while (n_regs_set-- > 0)
2195             {
2196               rtx note = find_regno_note (insn, REG_UNUSED,
2197                                           REGNO (regs_set[n_regs_set]));
2198               if (! note)
2199                 continue;
2200
2201               mark_reg_death (XEXP (note, 0));
2202             }
2203         }
2204     }
2205   bitmap_release (&curr_regs_live);
2206   if (flag_ira_region == IRA_REGION_MIXED
2207       || flag_ira_region == IRA_REGION_ALL)
2208     FOR_EACH_LOOP (loop, 0)
2209       {
2210         EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
2211           if (! bitmap_bit_p (&LOOP_DATA (loop)->regs_ref, j))
2212             {
2213               enum reg_class pressure_class;
2214               int nregs;
2215
2216               pressure_class = get_regno_pressure_class (j, &nregs);
2217               LOOP_DATA (loop)->max_reg_pressure[pressure_class] -= nregs;
2218             }
2219       }
2220   if (dump_file == NULL)
2221     return;
2222   FOR_EACH_LOOP (loop, 0)
2223     {
2224       parent = loop_outer (loop);
2225       fprintf (dump_file, "\n  Loop %d (parent %d, header bb%d, depth %d)\n",
2226                loop->num, (parent == NULL ? -1 : parent->num),
2227                loop->header->index, loop_depth (loop));
2228       fprintf (dump_file, "\n    ref. regnos:");
2229       EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_ref, 0, j, bi)
2230         fprintf (dump_file, " %d", j);
2231       fprintf (dump_file, "\n    live regnos:");
2232       EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
2233         fprintf (dump_file, " %d", j);
2234       fprintf (dump_file, "\n    Pressure:");
2235       for (i = 0; (int) i < ira_pressure_classes_num; i++)
2236         {
2237           enum reg_class pressure_class;
2238
2239           pressure_class = ira_pressure_classes[i];
2240           if (LOOP_DATA (loop)->max_reg_pressure[pressure_class] == 0)
2241             continue;
2242           fprintf (dump_file, " %s=%d", reg_class_names[pressure_class],
2243                    LOOP_DATA (loop)->max_reg_pressure[pressure_class]);
2244         }
2245       fprintf (dump_file, "\n");
2246     }
2247 }
2248
2249 \f
2250
2251 /* Move the invariants out of the loops.  */
2252
2253 void
2254 move_loop_invariants (void)
2255 {
2256   class loop *loop;
2257
2258   if (optimize == 1)
2259     df_live_add_problem ();
2260   /* ??? This is a hack.  We should only need to call df_live_set_all_dirty
2261      for optimize == 1, but can_move_invariant_reg relies on DF_INSN_LUID
2262      being up-to-date.  That isn't always true (even after df_analyze)
2263      because df_process_deferred_rescans doesn't necessarily cause
2264      blocks to be rescanned.  */
2265   df_live_set_all_dirty ();
2266   if (flag_ira_loop_pressure)
2267     {
2268       df_analyze ();
2269       regstat_init_n_sets_and_refs ();
2270       ira_set_pseudo_classes (true, dump_file);
2271       calculate_loop_reg_pressure ();
2272       regstat_free_n_sets_and_refs ();
2273     }
2274   df_set_flags (DF_EQ_NOTES + DF_DEFER_INSN_RESCAN);
2275   /* Process the loops, innermost first.  */
2276   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2277     {
2278       curr_loop = loop;
2279       /* move_single_loop_invariants for very large loops
2280          is time consuming and might need a lot of memory.  */
2281       if (loop->num_nodes <= (unsigned) LOOP_INVARIANT_MAX_BBS_IN_LOOP)
2282         move_single_loop_invariants (loop);
2283     }
2284
2285   FOR_EACH_LOOP (loop, 0)
2286     {
2287       free_loop_data (loop);
2288     }
2289
2290   if (flag_ira_loop_pressure)
2291     /* There is no sense to keep this info because it was most
2292        probably outdated by subsequent passes.  */
2293     free_reg_info ();
2294   free (invariant_table);
2295   invariant_table = NULL;
2296   invariant_table_size = 0;
2297
2298   if (optimize == 1)
2299     df_remove_problem (df_live);
2300
2301   checking_verify_flow_info ();
2302 }