gcc/loop-doloop.c

   1 /* Perform doloop optimizations
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3    Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "flags.h"
  27 #include "expr.h"
  28 #include "hard-reg-set.h"
  29 #include "vec.h"
  30 #include "hashtab.h"
  31 #include "hash-set.h"
  32 #include "machmode.h"
  33 #include "input.h"
  34 #include "function.h"
  35 #include "diagnostic-core.h"
  36 #include "tm_p.h"
  37 #include "predict.h"
  38 #include "dominance.h"
  39 #include "cfg.h"
  40 #include "cfgloop.h"
  41 #include "cfgrtl.h"
  42 #include "basic-block.h"
  43 #include "params.h"
  44 #include "target.h"
  45 #include "dumpfile.h"
  46 #include "loop-unroll.h"
  47
  48 /* This module is used to modify loops with a determinable number of
  49    iterations to use special low-overhead looping instructions.
  50
  51    It first validates whether the loop is well behaved and has a
  52    determinable number of iterations (either at compile or run-time).
  53    It then modifies the loop to use a low-overhead looping pattern as
  54    follows:
  55
  56    1. A pseudo register is allocated as the loop iteration counter.
  57
  58    2. The number of loop iterations is calculated and is stored
  59       in the loop counter.
  60
  61    3. At the end of the loop, the jump insn is replaced by the
  62       doloop_end pattern.  The compare must remain because it might be
  63       used elsewhere.  If the loop-variable or condition register are
  64       used elsewhere, they will be eliminated by flow.
  65
  66    4. An optional doloop_begin pattern is inserted at the top of the
  67       loop.
  68
  69    TODO The optimization should only performed when either the biv used for exit
  70    condition is unused at all except for the exit test, or if we do not have to
  71    change its value, since otherwise we have to add a new induction variable,
  72    which usually will not pay up (unless the cost of the doloop pattern is
  73    somehow extremely lower than the cost of compare & jump, or unless the bct
  74    register cannot be used for anything else but doloop -- ??? detect these
  75    cases).  */
  76
  77 #ifdef HAVE_doloop_end
  78
  79 /* Return the loop termination condition for PATTERN or zero
  80    if it is not a decrement and branch jump insn.  */
  81
  82 rtx
  83 doloop_condition_get (rtx doloop_pat)
  84 {
  85   rtx cmp;
  86   rtx inc;
  87   rtx reg;
  88   rtx inc_src;
  89   rtx condition;
  90   rtx pattern;
  91   rtx cc_reg = NULL_RTX;
  92   rtx reg_orig = NULL_RTX;
  93
  94   /* The canonical doloop pattern we expect has one of the following
  95      forms:
  96
  97      1)  (parallel [(set (pc) (if_then_else (condition)
  98                                             (label_ref (label))
  99                                             (pc)))
 100                      (set (reg) (plus (reg) (const_int -1)))
 101                      (additional clobbers and uses)])
 102
 103      The branch must be the first entry of the parallel (also required
 104      by jump.c), and the second entry of the parallel must be a set of
 105      the loop counter register.  Some targets (IA-64) wrap the set of
 106      the loop counter in an if_then_else too.
 107
 108      2)  (set (reg) (plus (reg) (const_int -1))
 109          (set (pc) (if_then_else (reg != 0)
 110                                  (label_ref (label))
 111                                  (pc))).
 112
 113      Some targets (ARM) do the comparison before the branch, as in the
 114      following form:
 115
 116      3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
 117                    (set (reg) (plus (reg) (const_int -1)))])
 118         (set (pc) (if_then_else (cc == NE)
 119                                 (label_ref (label))
 120                                 (pc))) */
 121
 122   pattern = PATTERN (doloop_pat);
 123
 124   if (GET_CODE (pattern) != PARALLEL)
 125     {
 126       rtx cond;
 127       rtx prev_insn = prev_nondebug_insn (doloop_pat);
 128       rtx cmp_arg1, cmp_arg2;
 129       rtx cmp_orig;
 130
 131       /* In case the pattern is not PARALLEL we expect two forms
 132          of doloop which are cases 2) and 3) above: in case 2) the
 133          decrement immediately precedes the branch, while in case 3)
 134          the compare and decrement instructions immediately precede
 135          the branch.  */
 136
 137       if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
 138         return 0;
 139
 140       cmp = pattern;
 141       if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
 142         {
 143           /* The third case: the compare and decrement instructions
 144              immediately precede the branch.  */
 145           cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
 146           if (GET_CODE (cmp_orig) != SET)
 147             return 0;
 148           if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
 149             return 0;
 150           cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
 151           cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
 152           if (cmp_arg2 != const0_rtx
 153               || GET_CODE (cmp_arg1) != PLUS)
 154             return 0;
 155           reg_orig = XEXP (cmp_arg1, 0);
 156           if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
 157               || !REG_P (reg_orig))
 158             return 0;
 159           cc_reg = SET_DEST (cmp_orig);
 160
 161           inc = XVECEXP (PATTERN (prev_insn), 0, 1);
 162         }
 163       else
 164         inc = PATTERN (prev_insn);
 165       /* We expect the condition to be of the form (reg != 0)  */
 166       cond = XEXP (SET_SRC (cmp), 0);
 167       if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
 168         return 0;
 169     }
 170   else
 171     {
 172       cmp = XVECEXP (pattern, 0, 0);
 173       inc = XVECEXP (pattern, 0, 1);
 174     }
 175
 176   /* Check for (set (reg) (something)).  */
 177   if (GET_CODE (inc) != SET)
 178     return 0;
 179   reg = SET_DEST (inc);
 180   if (! REG_P (reg))
 181     return 0;
 182
 183   /* Check if something = (plus (reg) (const_int -1)).
 184      On IA-64, this decrement is wrapped in an if_then_else.  */
 185   inc_src = SET_SRC (inc);
 186   if (GET_CODE (inc_src) == IF_THEN_ELSE)
 187     inc_src = XEXP (inc_src, 1);
 188   if (GET_CODE (inc_src) != PLUS
 189       || XEXP (inc_src, 0) != reg
 190       || XEXP (inc_src, 1) != constm1_rtx)
 191     return 0;
 192
 193   /* Check for (set (pc) (if_then_else (condition)
 194                                        (label_ref (label))
 195                                        (pc))).  */
 196   if (GET_CODE (cmp) != SET
 197       || SET_DEST (cmp) != pc_rtx
 198       || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
 199       || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
 200       || XEXP (SET_SRC (cmp), 2) != pc_rtx)
 201     return 0;
 202
 203   /* Extract loop termination condition.  */
 204   condition = XEXP (SET_SRC (cmp), 0);
 205
 206   /* We expect a GE or NE comparison with 0 or 1.  */
 207   if ((GET_CODE (condition) != GE
 208        && GET_CODE (condition) != NE)
 209       || (XEXP (condition, 1) != const0_rtx
 210           && XEXP (condition, 1) != const1_rtx))
 211     return 0;
 212
 213   if ((XEXP (condition, 0) == reg)
 214       /* For the third case:  */
 215       || ((cc_reg != NULL_RTX)
 216           && (XEXP (condition, 0) == cc_reg)
 217           && (reg_orig == reg))
 218       || (GET_CODE (XEXP (condition, 0)) == PLUS
 219           && XEXP (XEXP (condition, 0), 0) == reg))
 220    {
 221      if (GET_CODE (pattern) != PARALLEL)
 222      /*  For the second form we expect:
 223
 224          (set (reg) (plus (reg) (const_int -1))
 225          (set (pc) (if_then_else (reg != 0)
 226                                  (label_ref (label))
 227                                  (pc))).
 228
 229          is equivalent to the following:
 230
 231          (parallel [(set (pc) (if_then_else (reg != 1)
 232                                             (label_ref (label))
 233                                             (pc)))
 234                      (set (reg) (plus (reg) (const_int -1)))
 235                      (additional clobbers and uses)])
 236
 237         For the third form we expect:
 238
 239         (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
 240                    (set (reg) (plus (reg) (const_int -1)))])
 241         (set (pc) (if_then_else (cc == NE)
 242                                 (label_ref (label))
 243                                 (pc)))
 244
 245         which is equivalent to the following:
 246
 247         (parallel [(set (cc) (compare (reg,  1))
 248                    (set (reg) (plus (reg) (const_int -1)))
 249                    (set (pc) (if_then_else (NE == cc)
 250                                            (label_ref (label))
 251                                            (pc))))])
 252
 253         So we return the second form instead for the two cases.
 254
 255      */
 256         condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
 257
 258     return condition;
 259    }
 260
 261   /* ??? If a machine uses a funny comparison, we could return a
 262      canonicalized form here.  */
 263
 264   return 0;
 265 }
 266
 267 /* Return nonzero if the loop specified by LOOP is suitable for
 268    the use of special low-overhead looping instructions.  DESC
 269    describes the number of iterations of the loop.  */
 270
 271 static bool
 272 doloop_valid_p (struct loop *loop, struct niter_desc *desc)
 273 {
 274   basic_block *body = get_loop_body (loop), bb;
 275   rtx_insn *insn;
 276   unsigned i;
 277   bool result = true;
 278
 279   /* Check for loops that may not terminate under special conditions.  */
 280   if (!desc->simple_p
 281       || desc->assumptions
 282       || desc->infinite)
 283     {
 284       /* There are some cases that would require a special attention.
 285          For example if the comparison is LEU and the comparison value
 286          is UINT_MAX then the loop will not terminate.  Similarly, if the
 287          comparison code is GEU and the comparison value is 0, the
 288          loop will not terminate.
 289
 290          If the absolute increment is not 1, the loop can be infinite
 291          even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
 292
 293          ??? We could compute these conditions at run-time and have a
 294          additional jump around the loop to ensure an infinite loop.
 295          However, it is very unlikely that this is the intended
 296          behavior of the loop and checking for these rare boundary
 297          conditions would pessimize all other code.
 298
 299          If the loop is executed only a few times an extra check to
 300          restart the loop could use up most of the benefits of using a
 301          count register loop.  Note however, that normally, this
 302          restart branch would never execute, so it could be predicted
 303          well by the CPU.  We should generate the pessimistic code by
 304          default, and have an option, e.g. -funsafe-loops that would
 305          enable count-register loops in this case.  */
 306       if (dump_file)
 307         fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
 308       result = false;
 309       goto cleanup;
 310     }
 311
 312   for (i = 0; i < loop->num_nodes; i++)
 313     {
 314       bb = body[i];
 315
 316       for (insn = BB_HEAD (bb);
 317            insn != NEXT_INSN (BB_END (bb));
 318            insn = NEXT_INSN (insn))
 319         {
 320           /* Different targets have different necessities for low-overhead
 321              looping.  Call the back end for each instruction within the loop
 322              to let it decide whether the insn prohibits a low-overhead loop.
 323              It will then return the cause for it to emit to the dump file.  */
 324           const char * invalid = targetm.invalid_within_doloop (insn);
 325           if (invalid)
 326             {
 327               if (dump_file)
 328                 fprintf (dump_file, "Doloop: %s\n", invalid);
 329               result = false;
 330               goto cleanup;
 331             }
 332         }
 333     }
 334   result = true;
 335
 336 cleanup:
 337   free (body);
 338
 339   return result;
 340 }
 341
 342 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
 343    edge.  If the condition is always false, do not do anything.  If it is always
 344    true, redirect E to DEST and return false.  In all other cases, true is
 345    returned.  */
 346
 347 static bool
 348 add_test (rtx cond, edge *e, basic_block dest)
 349 {
 350   rtx_insn *seq, *jump;
 351   rtx label;
 352   machine_mode mode;
 353   rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
 354   enum rtx_code code = GET_CODE (cond);
 355   basic_block bb;
 356
 357   mode = GET_MODE (XEXP (cond, 0));
 358   if (mode == VOIDmode)
 359     mode = GET_MODE (XEXP (cond, 1));
 360
 361   start_sequence ();
 362   op0 = force_operand (op0, NULL_RTX);
 363   op1 = force_operand (op1, NULL_RTX);
 364   label = block_label (dest);
 365   do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX,
 366                            NULL_RTX, label, -1);
 367
 368   jump = get_last_insn ();
 369   if (!jump || !JUMP_P (jump))
 370     {
 371       /* The condition is always false and the jump was optimized out.  */
 372       end_sequence ();
 373       return true;
 374     }
 375
 376   seq = get_insns ();
 377   end_sequence ();
 378
 379   /* There always is at least the jump insn in the sequence.  */
 380   gcc_assert (seq != NULL_RTX);
 381
 382   bb = split_edge_and_insert (*e, seq);
 383   *e = single_succ_edge (bb);
 384
 385   if (any_uncondjump_p (jump))
 386     {
 387       /* The condition is always true.  */
 388       delete_insn (jump);
 389       redirect_edge_and_branch_force (*e, dest);
 390       return false;
 391     }
 392
 393   JUMP_LABEL (jump) = label;
 394
 395   /* The jump is supposed to handle an unlikely special case.  */
 396   add_int_reg_note (jump, REG_BR_PROB, 0);
 397
 398   LABEL_NUSES (label)++;
 399
 400   make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
 401   return true;
 402 }
 403
 404 /* Modify the loop to use the low-overhead looping insn where LOOP
 405    describes the loop, DESC describes the number of iterations of the
 406    loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
 407    end of the loop.  CONDITION is the condition separated from the
 408    DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.  */
 409
 410 static void
 411 doloop_modify (struct loop *loop, struct niter_desc *desc,
 412                rtx doloop_seq, rtx condition, rtx count)
 413 {
 414   rtx counter_reg;
 415   rtx tmp, noloop = NULL_RTX;
 416   rtx_insn *sequence;
 417   rtx_insn *jump_insn;
 418   rtx jump_label;
 419   int nonneg = 0;
 420   bool increment_count;
 421   basic_block loop_end = desc->out_edge->src;
 422   machine_mode mode;
 423   rtx true_prob_val;
 424   widest_int iterations;
 425
 426   jump_insn = BB_END (loop_end);
 427
 428   if (dump_file)
 429     {
 430       fprintf (dump_file, "Doloop: Inserting doloop pattern (");
 431       if (desc->const_iter)
 432         fprintf (dump_file, "%"PRId64, desc->niter);
 433       else
 434         fputs ("runtime", dump_file);
 435       fputs (" iterations).\n", dump_file);
 436     }
 437
 438   /* Get the probability of the original branch. If it exists we would
 439      need to update REG_BR_PROB of the new jump_insn.  */
 440   true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX);
 441
 442   /* Discard original jump to continue loop.  The original compare
 443      result may still be live, so it cannot be discarded explicitly.  */
 444   delete_insn (jump_insn);
 445
 446   counter_reg = XEXP (condition, 0);
 447   if (GET_CODE (counter_reg) == PLUS)
 448     counter_reg = XEXP (counter_reg, 0);
 449   mode = GET_MODE (counter_reg);
 450
 451   increment_count = false;
 452   switch (GET_CODE (condition))
 453     {
 454     case NE:
 455       /* Currently only NE tests against zero and one are supported.  */
 456       noloop = XEXP (condition, 1);
 457       if (noloop != const0_rtx)
 458         {
 459           gcc_assert (noloop == const1_rtx);
 460           increment_count = true;
 461         }
 462       break;
 463
 464     case GE:
 465       /* Currently only GE tests against zero are supported.  */
 466       gcc_assert (XEXP (condition, 1) == const0_rtx);
 467
 468       noloop = constm1_rtx;
 469
 470       /* The iteration count does not need incrementing for a GE test.  */
 471       increment_count = false;
 472
 473       /* Determine if the iteration counter will be non-negative.
 474          Note that the maximum value loaded is iterations_max - 1.  */
 475       if (get_max_loop_iterations (loop, &iterations)
 476           && wi::leu_p (iterations,
 477                         wi::set_bit_in_zero <widest_int>
 478                         (GET_MODE_PRECISION (mode) - 1)))
 479         nonneg = 1;
 480       break;
 481
 482       /* Abort if an invalid doloop pattern has been generated.  */
 483     default:
 484       gcc_unreachable ();
 485     }
 486
 487   if (increment_count)
 488     count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
 489
 490   /* Insert initialization of the count register into the loop header.  */
 491   start_sequence ();
 492   tmp = force_operand (count, counter_reg);
 493   convert_move (counter_reg, tmp, 1);
 494   sequence = get_insns ();
 495   end_sequence ();
 496   emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
 497
 498   if (desc->noloop_assumptions)
 499     {
 500       rtx ass = copy_rtx (desc->noloop_assumptions);
 501       basic_block preheader = loop_preheader_edge (loop)->src;
 502       basic_block set_zero
 503               = split_edge (loop_preheader_edge (loop));
 504       basic_block new_preheader
 505               = split_edge (loop_preheader_edge (loop));
 506       edge te;
 507
 508       /* Expand the condition testing the assumptions and if it does not pass,
 509          reset the count register to 0.  */
 510       redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
 511       set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
 512
 513       set_zero->count = 0;
 514       set_zero->frequency = 0;
 515
 516       te = single_succ_edge (preheader);
 517       for (; ass; ass = XEXP (ass, 1))
 518         if (!add_test (XEXP (ass, 0), &te, set_zero))
 519           break;
 520
 521       if (ass)
 522         {
 523           /* We reached a condition that is always true.  This is very hard to
 524              reproduce (such a loop does not roll, and thus it would most
 525              likely get optimized out by some of the preceding optimizations).
 526              In fact, I do not have any testcase for it.  However, it would
 527              also be very hard to show that it is impossible, so we must
 528              handle this case.  */
 529           set_zero->count = preheader->count;
 530           set_zero->frequency = preheader->frequency;
 531         }
 532
 533       if (EDGE_COUNT (set_zero->preds) == 0)
 534         {
 535           /* All the conditions were simplified to false, remove the
 536              unreachable set_zero block.  */
 537           delete_basic_block (set_zero);
 538         }
 539       else
 540         {
 541           /* Reset the counter to zero in the set_zero block.  */
 542           start_sequence ();
 543           convert_move (counter_reg, noloop, 0);
 544           sequence = get_insns ();
 545           end_sequence ();
 546           emit_insn_after (sequence, BB_END (set_zero));
 547
 548           set_immediate_dominator (CDI_DOMINATORS, set_zero,
 549                                    recompute_dominator (CDI_DOMINATORS,
 550                                                         set_zero));
 551         }
 552
 553       set_immediate_dominator (CDI_DOMINATORS, new_preheader,
 554                                recompute_dominator (CDI_DOMINATORS,
 555                                                     new_preheader));
 556     }
 557
 558   /* Some targets (eg, C4x) need to initialize special looping
 559      registers.  */
 560 #ifdef HAVE_doloop_begin
 561   {
 562     rtx init;
 563
 564     init = gen_doloop_begin (counter_reg, doloop_seq);
 565     if (init)
 566       {
 567         start_sequence ();
 568         emit_insn (init);
 569         sequence = get_insns ();
 570         end_sequence ();
 571         emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
 572       }
 573   }
 574 #endif
 575
 576   /* Insert the new low-overhead looping insn.  */
 577   emit_jump_insn_after (doloop_seq, BB_END (loop_end));
 578   jump_insn = BB_END (loop_end);
 579   jump_label = block_label (desc->in_edge->dest);
 580   JUMP_LABEL (jump_insn) = jump_label;
 581   LABEL_NUSES (jump_label)++;
 582
 583   /* Ensure the right fallthru edge is marked, for case we have reversed
 584      the condition.  */
 585   desc->in_edge->flags &= ~EDGE_FALLTHRU;
 586   desc->out_edge->flags |= EDGE_FALLTHRU;
 587
 588   /* Add a REG_NONNEG note if the actual or estimated maximum number
 589      of iterations is non-negative.  */
 590   if (nonneg)
 591     add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
 592
 593   /* Update the REG_BR_PROB note.  */
 594   if (true_prob_val)
 595     {
 596       /* Seems safer to use the branch probability.  */
 597       add_int_reg_note (jump_insn, REG_BR_PROB, desc->in_edge->probability);
 598     }
 599 }
 600
 601 /* Process loop described by LOOP validating that the loop is suitable for
 602    conversion to use a low overhead looping instruction, replacing the jump
 603    insn where suitable.  Returns true if the loop was successfully
 604    modified.  */
 605
 606 static bool
 607 doloop_optimize (struct loop *loop)
 608 {
 609   machine_mode mode;
 610   rtx doloop_seq, doloop_pat, doloop_reg;
 611   rtx count;
 612   widest_int iterations, iterations_max;
 613   rtx start_label;
 614   rtx condition;
 615   unsigned level, est_niter;
 616   int max_cost;
 617   struct niter_desc *desc;
 618   unsigned word_mode_size;
 619   unsigned HOST_WIDE_INT word_mode_max;
 620   int entered_at_top;
 621
 622   if (dump_file)
 623     fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
 624
 625   iv_analysis_loop_init (loop);
 626
 627   /* Find the simple exit of a LOOP.  */
 628   desc = get_simple_loop_desc (loop);
 629
 630   /* Check that loop is a candidate for a low-overhead looping insn.  */
 631   if (!doloop_valid_p (loop, desc))
 632     {
 633       if (dump_file)
 634         fprintf (dump_file,
 635                  "Doloop: The loop is not suitable.\n");
 636       return false;
 637     }
 638   mode = desc->mode;
 639
 640   est_niter = 3;
 641   if (desc->const_iter)
 642     est_niter = desc->niter;
 643   /* If the estimate on number of iterations is reliable (comes from profile
 644      feedback), use it.  Do not use it normally, since the expected number
 645      of iterations of an unrolled loop is 2.  */
 646   if (loop->header->count)
 647     est_niter = expected_loop_iterations (loop);
 648
 649   if (est_niter < 3)
 650     {
 651       if (dump_file)
 652         fprintf (dump_file,
 653                  "Doloop: Too few iterations (%u) to be profitable.\n",
 654                  est_niter);
 655       return false;
 656     }
 657
 658   max_cost
 659     = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
 660   if (set_src_cost (desc->niter_expr, optimize_loop_for_speed_p (loop))
 661       > max_cost)
 662     {
 663       if (dump_file)
 664         fprintf (dump_file,
 665                  "Doloop: number of iterations too costly to compute.\n");
 666       return false;
 667     }
 668
 669   if (desc->const_iter)
 670     iterations = widest_int::from (std::make_pair (desc->niter_expr, mode),
 671                                    UNSIGNED);
 672   else
 673     iterations = 0;
 674   if (!get_max_loop_iterations (loop, &iterations_max))
 675     iterations_max = 0;
 676   level = get_loop_level (loop) + 1;
 677   entered_at_top = (loop->latch == desc->in_edge->dest
 678                     && contains_no_active_insn_p (loop->latch));
 679   if (!targetm.can_use_doloop_p (iterations, iterations_max, level,
 680                                  entered_at_top))
 681     {
 682       if (dump_file)
 683         fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n");
 684       return false;
 685     }
 686
 687   /* Generate looping insn.  If the pattern FAILs then give up trying
 688      to modify the loop since there is some aspect the back-end does
 689      not like.  */
 690   count = copy_rtx (desc->niter_expr);
 691   start_label = block_label (desc->in_edge->dest);
 692   doloop_reg = gen_reg_rtx (mode);
 693   doloop_seq = gen_doloop_end (doloop_reg, start_label);
 694
 695   word_mode_size = GET_MODE_PRECISION (word_mode);
 696   word_mode_max
 697           = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
 698   if (! doloop_seq
 699       && mode != word_mode
 700       /* Before trying mode different from the one in that # of iterations is
 701          computed, we must be sure that the number of iterations fits into
 702          the new mode.  */
 703       && (word_mode_size >= GET_MODE_PRECISION (mode)
 704           || wi::leu_p (iterations_max, word_mode_max)))
 705     {
 706       if (word_mode_size > GET_MODE_PRECISION (mode))
 707         count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode);
 708       else
 709         count = lowpart_subreg (word_mode, count, mode);
 710       PUT_MODE (doloop_reg, word_mode);
 711       doloop_seq = gen_doloop_end (doloop_reg, start_label);
 712     }
 713   if (! doloop_seq)
 714     {
 715       if (dump_file)
 716         fprintf (dump_file,
 717                  "Doloop: Target unwilling to use doloop pattern!\n");
 718       return false;
 719     }
 720
 721   /* If multiple instructions were created, the last must be the
 722      jump instruction.  Also, a raw define_insn may yield a plain
 723      pattern.  */
 724   doloop_pat = doloop_seq;
 725   if (INSN_P (doloop_pat))
 726     {
 727       rtx_insn *doloop_insn = as_a <rtx_insn *> (doloop_pat);
 728       while (NEXT_INSN (doloop_insn) != NULL_RTX)
 729         doloop_insn = NEXT_INSN (doloop_insn);
 730       if (!JUMP_P (doloop_insn))
 731         doloop_insn = NULL;
 732       doloop_pat = doloop_insn;
 733     }
 734
 735   if (! doloop_pat
 736       || ! (condition = doloop_condition_get (doloop_pat)))
 737     {
 738       if (dump_file)
 739         fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
 740       return false;
 741     }
 742
 743   doloop_modify (loop, desc, doloop_seq, condition, count);
 744   return true;
 745 }
 746
 747 /* This is the main entry point.  Process all loops using doloop_optimize.  */
 748
 749 void
 750 doloop_optimize_loops (void)
 751 {
 752   struct loop *loop;
 753
 754   FOR_EACH_LOOP (loop, 0)
 755     {
 756       doloop_optimize (loop);
 757     }
 758
 759   iv_analysis_done ();
 760
 761 #ifdef ENABLE_CHECKING
 762   verify_loop_structure ();
 763 #endif
 764 }
 765 #endif /* HAVE_doloop_end */
 766