gcc/loop-doloop.c

   1 /* Perform doloop optimizations
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3    Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "flags.h"
  27 #include "symtab.h"
  28 #include "hard-reg-set.h"
  29 #include "input.h"
  30 #include "function.h"
  31 #include "alias.h"
  32 #include "tree.h"
  33 #include "insn-config.h"
  34 #include "expmed.h"
  35 #include "dojump.h"
  36 #include "explow.h"
  37 #include "calls.h"
  38 #include "emit-rtl.h"
  39 #include "varasm.h"
  40 #include "stmt.h"
  41 #include "expr.h"
  42 #include "diagnostic-core.h"
  43 #include "tm_p.h"
  44 #include "predict.h"
  45 #include "dominance.h"
  46 #include "cfg.h"
  47 #include "cfgloop.h"
  48 #include "cfgrtl.h"
  49 #include "basic-block.h"
  50 #include "params.h"
  51 #include "target.h"
  52 #include "dumpfile.h"
  53 #include "loop-unroll.h"
  54
  55 /* This module is used to modify loops with a determinable number of
  56    iterations to use special low-overhead looping instructions.
  57
  58    It first validates whether the loop is well behaved and has a
  59    determinable number of iterations (either at compile or run-time).
  60    It then modifies the loop to use a low-overhead looping pattern as
  61    follows:
  62
  63    1. A pseudo register is allocated as the loop iteration counter.
  64
  65    2. The number of loop iterations is calculated and is stored
  66       in the loop counter.
  67
  68    3. At the end of the loop, the jump insn is replaced by the
  69       doloop_end pattern.  The compare must remain because it might be
  70       used elsewhere.  If the loop-variable or condition register are
  71       used elsewhere, they will be eliminated by flow.
  72
  73    4. An optional doloop_begin pattern is inserted at the top of the
  74       loop.
  75
  76    TODO The optimization should only performed when either the biv used for exit
  77    condition is unused at all except for the exit test, or if we do not have to
  78    change its value, since otherwise we have to add a new induction variable,
  79    which usually will not pay up (unless the cost of the doloop pattern is
  80    somehow extremely lower than the cost of compare & jump, or unless the bct
  81    register cannot be used for anything else but doloop -- ??? detect these
  82    cases).  */
  83
  84 #ifdef HAVE_doloop_end
  85
  86 /* Return the loop termination condition for PATTERN or zero
  87    if it is not a decrement and branch jump insn.  */
  88
  89 rtx
  90 doloop_condition_get (rtx doloop_pat)
  91 {
  92   rtx cmp;
  93   rtx inc;
  94   rtx reg;
  95   rtx inc_src;
  96   rtx condition;
  97   rtx pattern;
  98   rtx cc_reg = NULL_RTX;
  99   rtx reg_orig = NULL_RTX;
 100
 101   /* The canonical doloop pattern we expect has one of the following
 102      forms:
 103
 104      1)  (parallel [(set (pc) (if_then_else (condition)
 105                                             (label_ref (label))
 106                                             (pc)))
 107                      (set (reg) (plus (reg) (const_int -1)))
 108                      (additional clobbers and uses)])
 109
 110      The branch must be the first entry of the parallel (also required
 111      by jump.c), and the second entry of the parallel must be a set of
 112      the loop counter register.  Some targets (IA-64) wrap the set of
 113      the loop counter in an if_then_else too.
 114
 115      2)  (set (reg) (plus (reg) (const_int -1))
 116          (set (pc) (if_then_else (reg != 0)
 117                                  (label_ref (label))
 118                                  (pc))).
 119
 120      Some targets (ARM) do the comparison before the branch, as in the
 121      following form:
 122
 123      3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
 124                    (set (reg) (plus (reg) (const_int -1)))])
 125         (set (pc) (if_then_else (cc == NE)
 126                                 (label_ref (label))
 127                                 (pc))) */
 128
 129   pattern = PATTERN (doloop_pat);
 130
 131   if (GET_CODE (pattern) != PARALLEL)
 132     {
 133       rtx cond;
 134       rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat);
 135       rtx cmp_arg1, cmp_arg2;
 136       rtx cmp_orig;
 137
 138       /* In case the pattern is not PARALLEL we expect two forms
 139          of doloop which are cases 2) and 3) above: in case 2) the
 140          decrement immediately precedes the branch, while in case 3)
 141          the compare and decrement instructions immediately precede
 142          the branch.  */
 143
 144       if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
 145         return 0;
 146
 147       cmp = pattern;
 148       if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
 149         {
 150           /* The third case: the compare and decrement instructions
 151              immediately precede the branch.  */
 152           cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
 153           if (GET_CODE (cmp_orig) != SET)
 154             return 0;
 155           if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
 156             return 0;
 157           cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
 158           cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
 159           if (cmp_arg2 != const0_rtx
 160               || GET_CODE (cmp_arg1) != PLUS)
 161             return 0;
 162           reg_orig = XEXP (cmp_arg1, 0);
 163           if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
 164               || !REG_P (reg_orig))
 165             return 0;
 166           cc_reg = SET_DEST (cmp_orig);
 167
 168           inc = XVECEXP (PATTERN (prev_insn), 0, 1);
 169         }
 170       else
 171         inc = PATTERN (prev_insn);
 172       /* We expect the condition to be of the form (reg != 0)  */
 173       cond = XEXP (SET_SRC (cmp), 0);
 174       if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
 175         return 0;
 176     }
 177   else
 178     {
 179       cmp = XVECEXP (pattern, 0, 0);
 180       inc = XVECEXP (pattern, 0, 1);
 181     }
 182
 183   /* Check for (set (reg) (something)).  */
 184   if (GET_CODE (inc) != SET)
 185     return 0;
 186   reg = SET_DEST (inc);
 187   if (! REG_P (reg))
 188     return 0;
 189
 190   /* Check if something = (plus (reg) (const_int -1)).
 191      On IA-64, this decrement is wrapped in an if_then_else.  */
 192   inc_src = SET_SRC (inc);
 193   if (GET_CODE (inc_src) == IF_THEN_ELSE)
 194     inc_src = XEXP (inc_src, 1);
 195   if (GET_CODE (inc_src) != PLUS
 196       || XEXP (inc_src, 0) != reg
 197       || XEXP (inc_src, 1) != constm1_rtx)
 198     return 0;
 199
 200   /* Check for (set (pc) (if_then_else (condition)
 201                                        (label_ref (label))
 202                                        (pc))).  */
 203   if (GET_CODE (cmp) != SET
 204       || SET_DEST (cmp) != pc_rtx
 205       || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
 206       || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
 207       || XEXP (SET_SRC (cmp), 2) != pc_rtx)
 208     return 0;
 209
 210   /* Extract loop termination condition.  */
 211   condition = XEXP (SET_SRC (cmp), 0);
 212
 213   /* We expect a GE or NE comparison with 0 or 1.  */
 214   if ((GET_CODE (condition) != GE
 215        && GET_CODE (condition) != NE)
 216       || (XEXP (condition, 1) != const0_rtx
 217           && XEXP (condition, 1) != const1_rtx))
 218     return 0;
 219
 220   if ((XEXP (condition, 0) == reg)
 221       /* For the third case:  */
 222       || ((cc_reg != NULL_RTX)
 223           && (XEXP (condition, 0) == cc_reg)
 224           && (reg_orig == reg))
 225       || (GET_CODE (XEXP (condition, 0)) == PLUS
 226           && XEXP (XEXP (condition, 0), 0) == reg))
 227    {
 228      if (GET_CODE (pattern) != PARALLEL)
 229      /*  For the second form we expect:
 230
 231          (set (reg) (plus (reg) (const_int -1))
 232          (set (pc) (if_then_else (reg != 0)
 233                                  (label_ref (label))
 234                                  (pc))).
 235
 236          is equivalent to the following:
 237
 238          (parallel [(set (pc) (if_then_else (reg != 1)
 239                                             (label_ref (label))
 240                                             (pc)))
 241                      (set (reg) (plus (reg) (const_int -1)))
 242                      (additional clobbers and uses)])
 243
 244         For the third form we expect:
 245
 246         (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
 247                    (set (reg) (plus (reg) (const_int -1)))])
 248         (set (pc) (if_then_else (cc == NE)
 249                                 (label_ref (label))
 250                                 (pc)))
 251
 252         which is equivalent to the following:
 253
 254         (parallel [(set (cc) (compare (reg,  1))
 255                    (set (reg) (plus (reg) (const_int -1)))
 256                    (set (pc) (if_then_else (NE == cc)
 257                                            (label_ref (label))
 258                                            (pc))))])
 259
 260         So we return the second form instead for the two cases.
 261
 262      */
 263         condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
 264
 265     return condition;
 266    }
 267
 268   /* ??? If a machine uses a funny comparison, we could return a
 269      canonicalized form here.  */
 270
 271   return 0;
 272 }
 273
 274 /* Return nonzero if the loop specified by LOOP is suitable for
 275    the use of special low-overhead looping instructions.  DESC
 276    describes the number of iterations of the loop.  */
 277
 278 static bool
 279 doloop_valid_p (struct loop *loop, struct niter_desc *desc)
 280 {
 281   basic_block *body = get_loop_body (loop), bb;
 282   rtx_insn *insn;
 283   unsigned i;
 284   bool result = true;
 285
 286   /* Check for loops that may not terminate under special conditions.  */
 287   if (!desc->simple_p
 288       || desc->assumptions
 289       || desc->infinite)
 290     {
 291       /* There are some cases that would require a special attention.
 292          For example if the comparison is LEU and the comparison value
 293          is UINT_MAX then the loop will not terminate.  Similarly, if the
 294          comparison code is GEU and the comparison value is 0, the
 295          loop will not terminate.
 296
 297          If the absolute increment is not 1, the loop can be infinite
 298          even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
 299
 300          ??? We could compute these conditions at run-time and have a
 301          additional jump around the loop to ensure an infinite loop.
 302          However, it is very unlikely that this is the intended
 303          behavior of the loop and checking for these rare boundary
 304          conditions would pessimize all other code.
 305
 306          If the loop is executed only a few times an extra check to
 307          restart the loop could use up most of the benefits of using a
 308          count register loop.  Note however, that normally, this
 309          restart branch would never execute, so it could be predicted
 310          well by the CPU.  We should generate the pessimistic code by
 311          default, and have an option, e.g. -funsafe-loops that would
 312          enable count-register loops in this case.  */
 313       if (dump_file)
 314         fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
 315       result = false;
 316       goto cleanup;
 317     }
 318
 319   for (i = 0; i < loop->num_nodes; i++)
 320     {
 321       bb = body[i];
 322
 323       for (insn = BB_HEAD (bb);
 324            insn != NEXT_INSN (BB_END (bb));
 325            insn = NEXT_INSN (insn))
 326         {
 327           /* Different targets have different necessities for low-overhead
 328              looping.  Call the back end for each instruction within the loop
 329              to let it decide whether the insn prohibits a low-overhead loop.
 330              It will then return the cause for it to emit to the dump file.  */
 331           const char * invalid = targetm.invalid_within_doloop (insn);
 332           if (invalid)
 333             {
 334               if (dump_file)
 335                 fprintf (dump_file, "Doloop: %s\n", invalid);
 336               result = false;
 337               goto cleanup;
 338             }
 339         }
 340     }
 341   result = true;
 342
 343 cleanup:
 344   free (body);
 345
 346   return result;
 347 }
 348
 349 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
 350    edge.  If the condition is always false, do not do anything.  If it is always
 351    true, redirect E to DEST and return false.  In all other cases, true is
 352    returned.  */
 353
 354 static bool
 355 add_test (rtx cond, edge *e, basic_block dest)
 356 {
 357   rtx_insn *seq, *jump;
 358   rtx_code_label *label;
 359   machine_mode mode;
 360   rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
 361   enum rtx_code code = GET_CODE (cond);
 362   basic_block bb;
 363
 364   mode = GET_MODE (XEXP (cond, 0));
 365   if (mode == VOIDmode)
 366     mode = GET_MODE (XEXP (cond, 1));
 367
 368   start_sequence ();
 369   op0 = force_operand (op0, NULL_RTX);
 370   op1 = force_operand (op1, NULL_RTX);
 371   label = block_label (dest);
 372   do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL, label, -1);
 373
 374   jump = get_last_insn ();
 375   if (!jump || !JUMP_P (jump))
 376     {
 377       /* The condition is always false and the jump was optimized out.  */
 378       end_sequence ();
 379       return true;
 380     }
 381
 382   seq = get_insns ();
 383   end_sequence ();
 384
 385   /* There always is at least the jump insn in the sequence.  */
 386   gcc_assert (seq != NULL_RTX);
 387
 388   bb = split_edge_and_insert (*e, seq);
 389   *e = single_succ_edge (bb);
 390
 391   if (any_uncondjump_p (jump))
 392     {
 393       /* The condition is always true.  */
 394       delete_insn (jump);
 395       redirect_edge_and_branch_force (*e, dest);
 396       return false;
 397     }
 398
 399   JUMP_LABEL (jump) = label;
 400
 401   /* The jump is supposed to handle an unlikely special case.  */
 402   add_int_reg_note (jump, REG_BR_PROB, 0);
 403
 404   LABEL_NUSES (label)++;
 405
 406   make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
 407   return true;
 408 }
 409
 410 /* Modify the loop to use the low-overhead looping insn where LOOP
 411    describes the loop, DESC describes the number of iterations of the
 412    loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
 413    end of the loop.  CONDITION is the condition separated from the
 414    DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.  */
 415
 416 static void
 417 doloop_modify (struct loop *loop, struct niter_desc *desc,
 418                rtx doloop_seq, rtx condition, rtx count)
 419 {
 420   rtx counter_reg;
 421   rtx tmp, noloop = NULL_RTX;
 422   rtx_insn *sequence;
 423   rtx_insn *jump_insn;
 424   rtx_code_label *jump_label;
 425   int nonneg = 0;
 426   bool increment_count;
 427   basic_block loop_end = desc->out_edge->src;
 428   machine_mode mode;
 429   rtx true_prob_val;
 430   widest_int iterations;
 431
 432   jump_insn = BB_END (loop_end);
 433
 434   if (dump_file)
 435     {
 436       fprintf (dump_file, "Doloop: Inserting doloop pattern (");
 437       if (desc->const_iter)
 438         fprintf (dump_file, "%" PRId64, desc->niter);
 439       else
 440         fputs ("runtime", dump_file);
 441       fputs (" iterations).\n", dump_file);
 442     }
 443
 444   /* Get the probability of the original branch. If it exists we would
 445      need to update REG_BR_PROB of the new jump_insn.  */
 446   true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX);
 447
 448   /* Discard original jump to continue loop.  The original compare
 449      result may still be live, so it cannot be discarded explicitly.  */
 450   delete_insn (jump_insn);
 451
 452   counter_reg = XEXP (condition, 0);
 453   if (GET_CODE (counter_reg) == PLUS)
 454     counter_reg = XEXP (counter_reg, 0);
 455   mode = GET_MODE (counter_reg);
 456
 457   increment_count = false;
 458   switch (GET_CODE (condition))
 459     {
 460     case NE:
 461       /* Currently only NE tests against zero and one are supported.  */
 462       noloop = XEXP (condition, 1);
 463       if (noloop != const0_rtx)
 464         {
 465           gcc_assert (noloop == const1_rtx);
 466           increment_count = true;
 467         }
 468       break;
 469
 470     case GE:
 471       /* Currently only GE tests against zero are supported.  */
 472       gcc_assert (XEXP (condition, 1) == const0_rtx);
 473
 474       noloop = constm1_rtx;
 475
 476       /* The iteration count does not need incrementing for a GE test.  */
 477       increment_count = false;
 478
 479       /* Determine if the iteration counter will be non-negative.
 480          Note that the maximum value loaded is iterations_max - 1.  */
 481       if (get_max_loop_iterations (loop, &iterations)
 482           && wi::leu_p (iterations,
 483                         wi::set_bit_in_zero <widest_int>
 484                         (GET_MODE_PRECISION (mode) - 1)))
 485         nonneg = 1;
 486       break;
 487
 488       /* Abort if an invalid doloop pattern has been generated.  */
 489     default:
 490       gcc_unreachable ();
 491     }
 492
 493   if (increment_count)
 494     count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
 495
 496   /* Insert initialization of the count register into the loop header.  */
 497   start_sequence ();
 498   tmp = force_operand (count, counter_reg);
 499   convert_move (counter_reg, tmp, 1);
 500   sequence = get_insns ();
 501   end_sequence ();
 502   emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
 503
 504   if (desc->noloop_assumptions)
 505     {
 506       rtx ass = copy_rtx (desc->noloop_assumptions);
 507       basic_block preheader = loop_preheader_edge (loop)->src;
 508       basic_block set_zero
 509               = split_edge (loop_preheader_edge (loop));
 510       basic_block new_preheader
 511               = split_edge (loop_preheader_edge (loop));
 512       edge te;
 513
 514       /* Expand the condition testing the assumptions and if it does not pass,
 515          reset the count register to 0.  */
 516       redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
 517       set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
 518
 519       set_zero->count = 0;
 520       set_zero->frequency = 0;
 521
 522       te = single_succ_edge (preheader);
 523       for (; ass; ass = XEXP (ass, 1))
 524         if (!add_test (XEXP (ass, 0), &te, set_zero))
 525           break;
 526
 527       if (ass)
 528         {
 529           /* We reached a condition that is always true.  This is very hard to
 530              reproduce (such a loop does not roll, and thus it would most
 531              likely get optimized out by some of the preceding optimizations).
 532              In fact, I do not have any testcase for it.  However, it would
 533              also be very hard to show that it is impossible, so we must
 534              handle this case.  */
 535           set_zero->count = preheader->count;
 536           set_zero->frequency = preheader->frequency;
 537         }
 538
 539       if (EDGE_COUNT (set_zero->preds) == 0)
 540         {
 541           /* All the conditions were simplified to false, remove the
 542              unreachable set_zero block.  */
 543           delete_basic_block (set_zero);
 544         }
 545       else
 546         {
 547           /* Reset the counter to zero in the set_zero block.  */
 548           start_sequence ();
 549           convert_move (counter_reg, noloop, 0);
 550           sequence = get_insns ();
 551           end_sequence ();
 552           emit_insn_after (sequence, BB_END (set_zero));
 553
 554           set_immediate_dominator (CDI_DOMINATORS, set_zero,
 555                                    recompute_dominator (CDI_DOMINATORS,
 556                                                         set_zero));
 557         }
 558
 559       set_immediate_dominator (CDI_DOMINATORS, new_preheader,
 560                                recompute_dominator (CDI_DOMINATORS,
 561                                                     new_preheader));
 562     }
 563
 564   /* Some targets (eg, C4x) need to initialize special looping
 565      registers.  */
 566 #ifdef HAVE_doloop_begin
 567   {
 568     rtx init;
 569
 570     init = gen_doloop_begin (counter_reg, doloop_seq);
 571     if (init)
 572       {
 573         start_sequence ();
 574         emit_insn (init);
 575         sequence = get_insns ();
 576         end_sequence ();
 577         emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
 578       }
 579   }
 580 #endif
 581
 582   /* Insert the new low-overhead looping insn.  */
 583   emit_jump_insn_after (doloop_seq, BB_END (loop_end));
 584   jump_insn = BB_END (loop_end);
 585   jump_label = block_label (desc->in_edge->dest);
 586   JUMP_LABEL (jump_insn) = jump_label;
 587   LABEL_NUSES (jump_label)++;
 588
 589   /* Ensure the right fallthru edge is marked, for case we have reversed
 590      the condition.  */
 591   desc->in_edge->flags &= ~EDGE_FALLTHRU;
 592   desc->out_edge->flags |= EDGE_FALLTHRU;
 593
 594   /* Add a REG_NONNEG note if the actual or estimated maximum number
 595      of iterations is non-negative.  */
 596   if (nonneg)
 597     add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
 598
 599   /* Update the REG_BR_PROB note.  */
 600   if (true_prob_val)
 601     {
 602       /* Seems safer to use the branch probability.  */
 603       add_int_reg_note (jump_insn, REG_BR_PROB, desc->in_edge->probability);
 604     }
 605 }
 606
 607 /* Process loop described by LOOP validating that the loop is suitable for
 608    conversion to use a low overhead looping instruction, replacing the jump
 609    insn where suitable.  Returns true if the loop was successfully
 610    modified.  */
 611
 612 static bool
 613 doloop_optimize (struct loop *loop)
 614 {
 615   machine_mode mode;
 616   rtx doloop_seq, doloop_pat, doloop_reg;
 617   rtx count;
 618   widest_int iterations, iterations_max;
 619   rtx_code_label *start_label;
 620   rtx condition;
 621   unsigned level, est_niter;
 622   int max_cost;
 623   struct niter_desc *desc;
 624   unsigned word_mode_size;
 625   unsigned HOST_WIDE_INT word_mode_max;
 626   int entered_at_top;
 627
 628   if (dump_file)
 629     fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
 630
 631   iv_analysis_loop_init (loop);
 632
 633   /* Find the simple exit of a LOOP.  */
 634   desc = get_simple_loop_desc (loop);
 635
 636   /* Check that loop is a candidate for a low-overhead looping insn.  */
 637   if (!doloop_valid_p (loop, desc))
 638     {
 639       if (dump_file)
 640         fprintf (dump_file,
 641                  "Doloop: The loop is not suitable.\n");
 642       return false;
 643     }
 644   mode = desc->mode;
 645
 646   est_niter = 3;
 647   if (desc->const_iter)
 648     est_niter = desc->niter;
 649   /* If the estimate on number of iterations is reliable (comes from profile
 650      feedback), use it.  Do not use it normally, since the expected number
 651      of iterations of an unrolled loop is 2.  */
 652   if (loop->header->count)
 653     est_niter = expected_loop_iterations (loop);
 654
 655   if (est_niter < 3)
 656     {
 657       if (dump_file)
 658         fprintf (dump_file,
 659                  "Doloop: Too few iterations (%u) to be profitable.\n",
 660                  est_niter);
 661       return false;
 662     }
 663
 664   max_cost
 665     = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
 666   if (set_src_cost (desc->niter_expr, optimize_loop_for_speed_p (loop))
 667       > max_cost)
 668     {
 669       if (dump_file)
 670         fprintf (dump_file,
 671                  "Doloop: number of iterations too costly to compute.\n");
 672       return false;
 673     }
 674
 675   if (desc->const_iter)
 676     iterations = widest_int::from (std::make_pair (desc->niter_expr, mode),
 677                                    UNSIGNED);
 678   else
 679     iterations = 0;
 680   if (!get_max_loop_iterations (loop, &iterations_max))
 681     iterations_max = 0;
 682   level = get_loop_level (loop) + 1;
 683   entered_at_top = (loop->latch == desc->in_edge->dest
 684                     && contains_no_active_insn_p (loop->latch));
 685   if (!targetm.can_use_doloop_p (iterations, iterations_max, level,
 686                                  entered_at_top))
 687     {
 688       if (dump_file)
 689         fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n");
 690       return false;
 691     }
 692
 693   /* Generate looping insn.  If the pattern FAILs then give up trying
 694      to modify the loop since there is some aspect the back-end does
 695      not like.  */
 696   count = copy_rtx (desc->niter_expr);
 697   start_label = block_label (desc->in_edge->dest);
 698   doloop_reg = gen_reg_rtx (mode);
 699   doloop_seq = gen_doloop_end (doloop_reg, start_label);
 700
 701   word_mode_size = GET_MODE_PRECISION (word_mode);
 702   word_mode_max
 703           = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
 704   if (! doloop_seq
 705       && mode != word_mode
 706       /* Before trying mode different from the one in that # of iterations is
 707          computed, we must be sure that the number of iterations fits into
 708          the new mode.  */
 709       && (word_mode_size >= GET_MODE_PRECISION (mode)
 710           || wi::leu_p (iterations_max, word_mode_max)))
 711     {
 712       if (word_mode_size > GET_MODE_PRECISION (mode))
 713         count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode);
 714       else
 715         count = lowpart_subreg (word_mode, count, mode);
 716       PUT_MODE (doloop_reg, word_mode);
 717       doloop_seq = gen_doloop_end (doloop_reg, start_label);
 718     }
 719   if (! doloop_seq)
 720     {
 721       if (dump_file)
 722         fprintf (dump_file,
 723                  "Doloop: Target unwilling to use doloop pattern!\n");
 724       return false;
 725     }
 726
 727   /* If multiple instructions were created, the last must be the
 728      jump instruction.  Also, a raw define_insn may yield a plain
 729      pattern.  */
 730   doloop_pat = doloop_seq;
 731   if (INSN_P (doloop_pat))
 732     {
 733       rtx_insn *doloop_insn = as_a <rtx_insn *> (doloop_pat);
 734       while (NEXT_INSN (doloop_insn) != NULL_RTX)
 735         doloop_insn = NEXT_INSN (doloop_insn);
 736       if (!JUMP_P (doloop_insn))
 737         doloop_insn = NULL;
 738       doloop_pat = doloop_insn;
 739     }
 740
 741   if (! doloop_pat
 742       || ! (condition = doloop_condition_get (doloop_pat)))
 743     {
 744       if (dump_file)
 745         fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
 746       return false;
 747     }
 748
 749   doloop_modify (loop, desc, doloop_seq, condition, count);
 750   return true;
 751 }
 752
 753 /* This is the main entry point.  Process all loops using doloop_optimize.  */
 754
 755 void
 756 doloop_optimize_loops (void)
 757 {
 758   struct loop *loop;
 759
 760   FOR_EACH_LOOP (loop, 0)
 761     {
 762       doloop_optimize (loop);
 763     }
 764
 765   iv_analysis_done ();
 766
 767 #ifdef ENABLE_CHECKING
 768   verify_loop_structure ();
 769 #endif
 770 }
 771 #endif /* HAVE_doloop_end */
 772