gcc/loop-doloop.c

   1 /* Perform doloop optimizations
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3    Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "rtl.h"
  26 #include "flags.h"
  27 #include "tree.h"
  28 #include "alias.h"
  29 #include "insn-config.h"
  30 #include "expmed.h"
  31 #include "dojump.h"
  32 #include "explow.h"
  33 #include "calls.h"
  34 #include "emit-rtl.h"
  35 #include "varasm.h"
  36 #include "stmt.h"
  37 #include "expr.h"
  38 #include "diagnostic-core.h"
  39 #include "tm_p.h"
  40 #include "cfgloop.h"
  41 #include "cfgrtl.h"
  42 #include "params.h"
  43 #include "target.h"
  44 #include "dumpfile.h"
  45 #include "loop-unroll.h"
  46
  47 /* This module is used to modify loops with a determinable number of
  48    iterations to use special low-overhead looping instructions.
  49
  50    It first validates whether the loop is well behaved and has a
  51    determinable number of iterations (either at compile or run-time).
  52    It then modifies the loop to use a low-overhead looping pattern as
  53    follows:
  54
  55    1. A pseudo register is allocated as the loop iteration counter.
  56
  57    2. The number of loop iterations is calculated and is stored
  58       in the loop counter.
  59
  60    3. At the end of the loop, the jump insn is replaced by the
  61       doloop_end pattern.  The compare must remain because it might be
  62       used elsewhere.  If the loop-variable or condition register are
  63       used elsewhere, they will be eliminated by flow.
  64
  65    4. An optional doloop_begin pattern is inserted at the top of the
  66       loop.
  67
  68    TODO The optimization should only performed when either the biv used for exit
  69    condition is unused at all except for the exit test, or if we do not have to
  70    change its value, since otherwise we have to add a new induction variable,
  71    which usually will not pay up (unless the cost of the doloop pattern is
  72    somehow extremely lower than the cost of compare & jump, or unless the bct
  73    register cannot be used for anything else but doloop -- ??? detect these
  74    cases).  */
  75
  76 /* Return the loop termination condition for PATTERN or zero
  77    if it is not a decrement and branch jump insn.  */
  78
  79 rtx
  80 doloop_condition_get (rtx doloop_pat)
  81 {
  82   rtx cmp;
  83   rtx inc;
  84   rtx reg;
  85   rtx inc_src;
  86   rtx condition;
  87   rtx pattern;
  88   rtx cc_reg = NULL_RTX;
  89   rtx reg_orig = NULL_RTX;
  90
  91   /* The canonical doloop pattern we expect has one of the following
  92      forms:
  93
  94      1)  (parallel [(set (pc) (if_then_else (condition)
  95                                             (label_ref (label))
  96                                             (pc)))
  97                      (set (reg) (plus (reg) (const_int -1)))
  98                      (additional clobbers and uses)])
  99
 100      The branch must be the first entry of the parallel (also required
 101      by jump.c), and the second entry of the parallel must be a set of
 102      the loop counter register.  Some targets (IA-64) wrap the set of
 103      the loop counter in an if_then_else too.
 104
 105      2)  (set (reg) (plus (reg) (const_int -1))
 106          (set (pc) (if_then_else (reg != 0)
 107                                  (label_ref (label))
 108                                  (pc))).
 109
 110      Some targets (ARM) do the comparison before the branch, as in the
 111      following form:
 112
 113      3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
 114                    (set (reg) (plus (reg) (const_int -1)))])
 115         (set (pc) (if_then_else (cc == NE)
 116                                 (label_ref (label))
 117                                 (pc))) */
 118
 119   pattern = PATTERN (doloop_pat);
 120
 121   if (GET_CODE (pattern) != PARALLEL)
 122     {
 123       rtx cond;
 124       rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat);
 125       rtx cmp_arg1, cmp_arg2;
 126       rtx cmp_orig;
 127
 128       /* In case the pattern is not PARALLEL we expect two forms
 129          of doloop which are cases 2) and 3) above: in case 2) the
 130          decrement immediately precedes the branch, while in case 3)
 131          the compare and decrement instructions immediately precede
 132          the branch.  */
 133
 134       if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
 135         return 0;
 136
 137       cmp = pattern;
 138       if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
 139         {
 140           /* The third case: the compare and decrement instructions
 141              immediately precede the branch.  */
 142           cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
 143           if (GET_CODE (cmp_orig) != SET)
 144             return 0;
 145           if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
 146             return 0;
 147           cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
 148           cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
 149           if (cmp_arg2 != const0_rtx
 150               || GET_CODE (cmp_arg1) != PLUS)
 151             return 0;
 152           reg_orig = XEXP (cmp_arg1, 0);
 153           if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
 154               || !REG_P (reg_orig))
 155             return 0;
 156           cc_reg = SET_DEST (cmp_orig);
 157
 158           inc = XVECEXP (PATTERN (prev_insn), 0, 1);
 159         }
 160       else
 161         inc = PATTERN (prev_insn);
 162       /* We expect the condition to be of the form (reg != 0)  */
 163       cond = XEXP (SET_SRC (cmp), 0);
 164       if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
 165         return 0;
 166     }
 167   else
 168     {
 169       cmp = XVECEXP (pattern, 0, 0);
 170       inc = XVECEXP (pattern, 0, 1);
 171     }
 172
 173   /* Check for (set (reg) (something)).  */
 174   if (GET_CODE (inc) != SET)
 175     return 0;
 176   reg = SET_DEST (inc);
 177   if (! REG_P (reg))
 178     return 0;
 179
 180   /* Check if something = (plus (reg) (const_int -1)).
 181      On IA-64, this decrement is wrapped in an if_then_else.  */
 182   inc_src = SET_SRC (inc);
 183   if (GET_CODE (inc_src) == IF_THEN_ELSE)
 184     inc_src = XEXP (inc_src, 1);
 185   if (GET_CODE (inc_src) != PLUS
 186       || XEXP (inc_src, 0) != reg
 187       || XEXP (inc_src, 1) != constm1_rtx)
 188     return 0;
 189
 190   /* Check for (set (pc) (if_then_else (condition)
 191                                        (label_ref (label))
 192                                        (pc))).  */
 193   if (GET_CODE (cmp) != SET
 194       || SET_DEST (cmp) != pc_rtx
 195       || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
 196       || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
 197       || XEXP (SET_SRC (cmp), 2) != pc_rtx)
 198     return 0;
 199
 200   /* Extract loop termination condition.  */
 201   condition = XEXP (SET_SRC (cmp), 0);
 202
 203   /* We expect a GE or NE comparison with 0 or 1.  */
 204   if ((GET_CODE (condition) != GE
 205        && GET_CODE (condition) != NE)
 206       || (XEXP (condition, 1) != const0_rtx
 207           && XEXP (condition, 1) != const1_rtx))
 208     return 0;
 209
 210   if ((XEXP (condition, 0) == reg)
 211       /* For the third case:  */
 212       || ((cc_reg != NULL_RTX)
 213           && (XEXP (condition, 0) == cc_reg)
 214           && (reg_orig == reg))
 215       || (GET_CODE (XEXP (condition, 0)) == PLUS
 216           && XEXP (XEXP (condition, 0), 0) == reg))
 217    {
 218      if (GET_CODE (pattern) != PARALLEL)
 219      /*  For the second form we expect:
 220
 221          (set (reg) (plus (reg) (const_int -1))
 222          (set (pc) (if_then_else (reg != 0)
 223                                  (label_ref (label))
 224                                  (pc))).
 225
 226          is equivalent to the following:
 227
 228          (parallel [(set (pc) (if_then_else (reg != 1)
 229                                             (label_ref (label))
 230                                             (pc)))
 231                      (set (reg) (plus (reg) (const_int -1)))
 232                      (additional clobbers and uses)])
 233
 234         For the third form we expect:
 235
 236         (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
 237                    (set (reg) (plus (reg) (const_int -1)))])
 238         (set (pc) (if_then_else (cc == NE)
 239                                 (label_ref (label))
 240                                 (pc)))
 241
 242         which is equivalent to the following:
 243
 244         (parallel [(set (cc) (compare (reg,  1))
 245                    (set (reg) (plus (reg) (const_int -1)))
 246                    (set (pc) (if_then_else (NE == cc)
 247                                            (label_ref (label))
 248                                            (pc))))])
 249
 250         So we return the second form instead for the two cases.
 251
 252      */
 253         condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
 254
 255     return condition;
 256    }
 257
 258   /* ??? If a machine uses a funny comparison, we could return a
 259      canonicalized form here.  */
 260
 261   return 0;
 262 }
 263
 264 /* Return nonzero if the loop specified by LOOP is suitable for
 265    the use of special low-overhead looping instructions.  DESC
 266    describes the number of iterations of the loop.  */
 267
 268 static bool
 269 doloop_valid_p (struct loop *loop, struct niter_desc *desc)
 270 {
 271   basic_block *body = get_loop_body (loop), bb;
 272   rtx_insn *insn;
 273   unsigned i;
 274   bool result = true;
 275
 276   /* Check for loops that may not terminate under special conditions.  */
 277   if (!desc->simple_p
 278       || desc->assumptions
 279       || desc->infinite)
 280     {
 281       /* There are some cases that would require a special attention.
 282          For example if the comparison is LEU and the comparison value
 283          is UINT_MAX then the loop will not terminate.  Similarly, if the
 284          comparison code is GEU and the comparison value is 0, the
 285          loop will not terminate.
 286
 287          If the absolute increment is not 1, the loop can be infinite
 288          even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
 289
 290          ??? We could compute these conditions at run-time and have a
 291          additional jump around the loop to ensure an infinite loop.
 292          However, it is very unlikely that this is the intended
 293          behavior of the loop and checking for these rare boundary
 294          conditions would pessimize all other code.
 295
 296          If the loop is executed only a few times an extra check to
 297          restart the loop could use up most of the benefits of using a
 298          count register loop.  Note however, that normally, this
 299          restart branch would never execute, so it could be predicted
 300          well by the CPU.  We should generate the pessimistic code by
 301          default, and have an option, e.g. -funsafe-loops that would
 302          enable count-register loops in this case.  */
 303       if (dump_file)
 304         fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
 305       result = false;
 306       goto cleanup;
 307     }
 308
 309   for (i = 0; i < loop->num_nodes; i++)
 310     {
 311       bb = body[i];
 312
 313       for (insn = BB_HEAD (bb);
 314            insn != NEXT_INSN (BB_END (bb));
 315            insn = NEXT_INSN (insn))
 316         {
 317           /* Different targets have different necessities for low-overhead
 318              looping.  Call the back end for each instruction within the loop
 319              to let it decide whether the insn prohibits a low-overhead loop.
 320              It will then return the cause for it to emit to the dump file.  */
 321           const char * invalid = targetm.invalid_within_doloop (insn);
 322           if (invalid)
 323             {
 324               if (dump_file)
 325                 fprintf (dump_file, "Doloop: %s\n", invalid);
 326               result = false;
 327               goto cleanup;
 328             }
 329         }
 330     }
 331   result = true;
 332
 333 cleanup:
 334   free (body);
 335
 336   return result;
 337 }
 338
 339 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
 340    edge.  If the condition is always false, do not do anything.  If it is always
 341    true, redirect E to DEST and return false.  In all other cases, true is
 342    returned.  */
 343
 344 static bool
 345 add_test (rtx cond, edge *e, basic_block dest)
 346 {
 347   rtx_insn *seq, *jump;
 348   rtx_code_label *label;
 349   machine_mode mode;
 350   rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
 351   enum rtx_code code = GET_CODE (cond);
 352   basic_block bb;
 353
 354   mode = GET_MODE (XEXP (cond, 0));
 355   if (mode == VOIDmode)
 356     mode = GET_MODE (XEXP (cond, 1));
 357
 358   start_sequence ();
 359   op0 = force_operand (op0, NULL_RTX);
 360   op1 = force_operand (op1, NULL_RTX);
 361   label = block_label (dest);
 362   do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL, label, -1);
 363
 364   jump = get_last_insn ();
 365   if (!jump || !JUMP_P (jump))
 366     {
 367       /* The condition is always false and the jump was optimized out.  */
 368       end_sequence ();
 369       return true;
 370     }
 371
 372   seq = get_insns ();
 373   end_sequence ();
 374
 375   /* There always is at least the jump insn in the sequence.  */
 376   gcc_assert (seq != NULL_RTX);
 377
 378   bb = split_edge_and_insert (*e, seq);
 379   *e = single_succ_edge (bb);
 380
 381   if (any_uncondjump_p (jump))
 382     {
 383       /* The condition is always true.  */
 384       delete_insn (jump);
 385       redirect_edge_and_branch_force (*e, dest);
 386       return false;
 387     }
 388
 389   JUMP_LABEL (jump) = label;
 390
 391   /* The jump is supposed to handle an unlikely special case.  */
 392   add_int_reg_note (jump, REG_BR_PROB, 0);
 393
 394   LABEL_NUSES (label)++;
 395
 396   make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
 397   return true;
 398 }
 399
 400 /* Modify the loop to use the low-overhead looping insn where LOOP
 401    describes the loop, DESC describes the number of iterations of the
 402    loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
 403    end of the loop.  CONDITION is the condition separated from the
 404    DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.  */
 405
 406 static void
 407 doloop_modify (struct loop *loop, struct niter_desc *desc,
 408                rtx_insn *doloop_seq, rtx condition, rtx count)
 409 {
 410   rtx counter_reg;
 411   rtx tmp, noloop = NULL_RTX;
 412   rtx_insn *sequence;
 413   rtx_insn *jump_insn;
 414   rtx_code_label *jump_label;
 415   int nonneg = 0;
 416   bool increment_count;
 417   basic_block loop_end = desc->out_edge->src;
 418   machine_mode mode;
 419   rtx true_prob_val;
 420   widest_int iterations;
 421
 422   jump_insn = BB_END (loop_end);
 423
 424   if (dump_file)
 425     {
 426       fprintf (dump_file, "Doloop: Inserting doloop pattern (");
 427       if (desc->const_iter)
 428         fprintf (dump_file, "%" PRId64, desc->niter);
 429       else
 430         fputs ("runtime", dump_file);
 431       fputs (" iterations).\n", dump_file);
 432     }
 433
 434   /* Get the probability of the original branch. If it exists we would
 435      need to update REG_BR_PROB of the new jump_insn.  */
 436   true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX);
 437
 438   /* Discard original jump to continue loop.  The original compare
 439      result may still be live, so it cannot be discarded explicitly.  */
 440   delete_insn (jump_insn);
 441
 442   counter_reg = XEXP (condition, 0);
 443   if (GET_CODE (counter_reg) == PLUS)
 444     counter_reg = XEXP (counter_reg, 0);
 445   mode = GET_MODE (counter_reg);
 446
 447   increment_count = false;
 448   switch (GET_CODE (condition))
 449     {
 450     case NE:
 451       /* Currently only NE tests against zero and one are supported.  */
 452       noloop = XEXP (condition, 1);
 453       if (noloop != const0_rtx)
 454         {
 455           gcc_assert (noloop == const1_rtx);
 456           increment_count = true;
 457         }
 458       break;
 459
 460     case GE:
 461       /* Currently only GE tests against zero are supported.  */
 462       gcc_assert (XEXP (condition, 1) == const0_rtx);
 463
 464       noloop = constm1_rtx;
 465
 466       /* The iteration count does not need incrementing for a GE test.  */
 467       increment_count = false;
 468
 469       /* Determine if the iteration counter will be non-negative.
 470          Note that the maximum value loaded is iterations_max - 1.  */
 471       if (get_max_loop_iterations (loop, &iterations)
 472           && wi::leu_p (iterations,
 473                         wi::set_bit_in_zero <widest_int>
 474                         (GET_MODE_PRECISION (mode) - 1)))
 475         nonneg = 1;
 476       break;
 477
 478       /* Abort if an invalid doloop pattern has been generated.  */
 479     default:
 480       gcc_unreachable ();
 481     }
 482
 483   if (increment_count)
 484     count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
 485
 486   /* Insert initialization of the count register into the loop header.  */
 487   start_sequence ();
 488   tmp = force_operand (count, counter_reg);
 489   convert_move (counter_reg, tmp, 1);
 490   sequence = get_insns ();
 491   end_sequence ();
 492   emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
 493
 494   if (desc->noloop_assumptions)
 495     {
 496       rtx ass = copy_rtx (desc->noloop_assumptions);
 497       basic_block preheader = loop_preheader_edge (loop)->src;
 498       basic_block set_zero
 499               = split_edge (loop_preheader_edge (loop));
 500       basic_block new_preheader
 501               = split_edge (loop_preheader_edge (loop));
 502       edge te;
 503
 504       /* Expand the condition testing the assumptions and if it does not pass,
 505          reset the count register to 0.  */
 506       redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
 507       set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
 508
 509       set_zero->count = 0;
 510       set_zero->frequency = 0;
 511
 512       te = single_succ_edge (preheader);
 513       for (; ass; ass = XEXP (ass, 1))
 514         if (!add_test (XEXP (ass, 0), &te, set_zero))
 515           break;
 516
 517       if (ass)
 518         {
 519           /* We reached a condition that is always true.  This is very hard to
 520              reproduce (such a loop does not roll, and thus it would most
 521              likely get optimized out by some of the preceding optimizations).
 522              In fact, I do not have any testcase for it.  However, it would
 523              also be very hard to show that it is impossible, so we must
 524              handle this case.  */
 525           set_zero->count = preheader->count;
 526           set_zero->frequency = preheader->frequency;
 527         }
 528
 529       if (EDGE_COUNT (set_zero->preds) == 0)
 530         {
 531           /* All the conditions were simplified to false, remove the
 532              unreachable set_zero block.  */
 533           delete_basic_block (set_zero);
 534         }
 535       else
 536         {
 537           /* Reset the counter to zero in the set_zero block.  */
 538           start_sequence ();
 539           convert_move (counter_reg, noloop, 0);
 540           sequence = get_insns ();
 541           end_sequence ();
 542           emit_insn_after (sequence, BB_END (set_zero));
 543
 544           set_immediate_dominator (CDI_DOMINATORS, set_zero,
 545                                    recompute_dominator (CDI_DOMINATORS,
 546                                                         set_zero));
 547         }
 548
 549       set_immediate_dominator (CDI_DOMINATORS, new_preheader,
 550                                recompute_dominator (CDI_DOMINATORS,
 551                                                     new_preheader));
 552     }
 553
 554   /* Some targets (eg, C4x) need to initialize special looping
 555      registers.  */
 556   if (targetm.have_doloop_begin ())
 557     if (rtx_insn *seq = targetm.gen_doloop_begin (counter_reg, doloop_seq))
 558       emit_insn_after (seq, BB_END (loop_preheader_edge (loop)->src));
 559
 560   /* Insert the new low-overhead looping insn.  */
 561   emit_jump_insn_after (doloop_seq, BB_END (loop_end));
 562   jump_insn = BB_END (loop_end);
 563   jump_label = block_label (desc->in_edge->dest);
 564   JUMP_LABEL (jump_insn) = jump_label;
 565   LABEL_NUSES (jump_label)++;
 566
 567   /* Ensure the right fallthru edge is marked, for case we have reversed
 568      the condition.  */
 569   desc->in_edge->flags &= ~EDGE_FALLTHRU;
 570   desc->out_edge->flags |= EDGE_FALLTHRU;
 571
 572   /* Add a REG_NONNEG note if the actual or estimated maximum number
 573      of iterations is non-negative.  */
 574   if (nonneg)
 575     add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
 576
 577   /* Update the REG_BR_PROB note.  */
 578   if (true_prob_val)
 579     {
 580       /* Seems safer to use the branch probability.  */
 581       add_int_reg_note (jump_insn, REG_BR_PROB, desc->in_edge->probability);
 582     }
 583 }
 584
 585 /* Process loop described by LOOP validating that the loop is suitable for
 586    conversion to use a low overhead looping instruction, replacing the jump
 587    insn where suitable.  Returns true if the loop was successfully
 588    modified.  */
 589
 590 static bool
 591 doloop_optimize (struct loop *loop)
 592 {
 593   machine_mode mode;
 594   rtx doloop_reg;
 595   rtx count;
 596   widest_int iterations, iterations_max;
 597   rtx_code_label *start_label;
 598   rtx condition;
 599   unsigned level, est_niter;
 600   int max_cost;
 601   struct niter_desc *desc;
 602   unsigned word_mode_size;
 603   unsigned HOST_WIDE_INT word_mode_max;
 604   int entered_at_top;
 605
 606   if (dump_file)
 607     fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
 608
 609   iv_analysis_loop_init (loop);
 610
 611   /* Find the simple exit of a LOOP.  */
 612   desc = get_simple_loop_desc (loop);
 613
 614   /* Check that loop is a candidate for a low-overhead looping insn.  */
 615   if (!doloop_valid_p (loop, desc))
 616     {
 617       if (dump_file)
 618         fprintf (dump_file,
 619                  "Doloop: The loop is not suitable.\n");
 620       return false;
 621     }
 622   mode = desc->mode;
 623
 624   est_niter = 3;
 625   if (desc->const_iter)
 626     est_niter = desc->niter;
 627   /* If the estimate on number of iterations is reliable (comes from profile
 628      feedback), use it.  Do not use it normally, since the expected number
 629      of iterations of an unrolled loop is 2.  */
 630   if (loop->header->count)
 631     est_niter = expected_loop_iterations (loop);
 632
 633   if (est_niter < 3)
 634     {
 635       if (dump_file)
 636         fprintf (dump_file,
 637                  "Doloop: Too few iterations (%u) to be profitable.\n",
 638                  est_niter);
 639       return false;
 640     }
 641
 642   max_cost
 643     = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
 644   if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop))
 645       > max_cost)
 646     {
 647       if (dump_file)
 648         fprintf (dump_file,
 649                  "Doloop: number of iterations too costly to compute.\n");
 650       return false;
 651     }
 652
 653   if (desc->const_iter)
 654     iterations = widest_int::from (std::make_pair (desc->niter_expr, mode),
 655                                    UNSIGNED);
 656   else
 657     iterations = 0;
 658   if (!get_max_loop_iterations (loop, &iterations_max))
 659     iterations_max = 0;
 660   level = get_loop_level (loop) + 1;
 661   entered_at_top = (loop->latch == desc->in_edge->dest
 662                     && contains_no_active_insn_p (loop->latch));
 663   if (!targetm.can_use_doloop_p (iterations, iterations_max, level,
 664                                  entered_at_top))
 665     {
 666       if (dump_file)
 667         fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n");
 668       return false;
 669     }
 670
 671   /* Generate looping insn.  If the pattern FAILs then give up trying
 672      to modify the loop since there is some aspect the back-end does
 673      not like.  */
 674   count = copy_rtx (desc->niter_expr);
 675   start_label = block_label (desc->in_edge->dest);
 676   doloop_reg = gen_reg_rtx (mode);
 677   rtx_insn *doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
 678
 679   word_mode_size = GET_MODE_PRECISION (word_mode);
 680   word_mode_max
 681           = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
 682   if (! doloop_seq
 683       && mode != word_mode
 684       /* Before trying mode different from the one in that # of iterations is
 685          computed, we must be sure that the number of iterations fits into
 686          the new mode.  */
 687       && (word_mode_size >= GET_MODE_PRECISION (mode)
 688           || wi::leu_p (iterations_max, word_mode_max)))
 689     {
 690       if (word_mode_size > GET_MODE_PRECISION (mode))
 691         count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode);
 692       else
 693         count = lowpart_subreg (word_mode, count, mode);
 694       PUT_MODE (doloop_reg, word_mode);
 695       doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
 696     }
 697   if (! doloop_seq)
 698     {
 699       if (dump_file)
 700         fprintf (dump_file,
 701                  "Doloop: Target unwilling to use doloop pattern!\n");
 702       return false;
 703     }
 704
 705   /* If multiple instructions were created, the last must be the
 706      jump instruction.  */
 707   rtx_insn *doloop_insn = doloop_seq;
 708   while (NEXT_INSN (doloop_insn) != NULL_RTX)
 709     doloop_insn = NEXT_INSN (doloop_insn);
 710   if (!JUMP_P (doloop_insn)
 711       || !(condition = doloop_condition_get (doloop_insn)))
 712     {
 713       if (dump_file)
 714         fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
 715       return false;
 716     }
 717
 718   doloop_modify (loop, desc, doloop_seq, condition, count);
 719   return true;
 720 }
 721
 722 /* This is the main entry point.  Process all loops using doloop_optimize.  */
 723
 724 void
 725 doloop_optimize_loops (void)
 726 {
 727   struct loop *loop;
 728
 729   FOR_EACH_LOOP (loop, 0)
 730     {
 731       doloop_optimize (loop);
 732     }
 733
 734   iv_analysis_done ();
 735
 736 #ifdef ENABLE_CHECKING
 737   verify_loop_structure ();
 738 #endif
 739 }