gcc/loop-doloop.c

   1 /* Perform doloop optimizations
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3    Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "flags.h"
  27 #include "symtab.h"
  28 #include "hashtab.h"
  29 #include "hash-set.h"
  30 #include "vec.h"
  31 #include "machmode.h"
  32 #include "hard-reg-set.h"
  33 #include "input.h"
  34 #include "function.h"
  35 #include "statistics.h"
  36 #include "double-int.h"
  37 #include "real.h"
  38 #include "fixed-value.h"
  39 #include "alias.h"
  40 #include "wide-int.h"
  41 #include "inchash.h"
  42 #include "tree.h"
  43 #include "insn-config.h"
  44 #include "expmed.h"
  45 #include "dojump.h"
  46 #include "explow.h"
  47 #include "calls.h"
  48 #include "emit-rtl.h"
  49 #include "varasm.h"
  50 #include "stmt.h"
  51 #include "expr.h"
  52 #include "diagnostic-core.h"
  53 #include "tm_p.h"
  54 #include "predict.h"
  55 #include "dominance.h"
  56 #include "cfg.h"
  57 #include "cfgloop.h"
  58 #include "cfgrtl.h"
  59 #include "basic-block.h"
  60 #include "params.h"
  61 #include "target.h"
  62 #include "dumpfile.h"
  63 #include "loop-unroll.h"
  64
  65 /* This module is used to modify loops with a determinable number of
  66    iterations to use special low-overhead looping instructions.
  67
  68    It first validates whether the loop is well behaved and has a
  69    determinable number of iterations (either at compile or run-time).
  70    It then modifies the loop to use a low-overhead looping pattern as
  71    follows:
  72
  73    1. A pseudo register is allocated as the loop iteration counter.
  74
  75    2. The number of loop iterations is calculated and is stored
  76       in the loop counter.
  77
  78    3. At the end of the loop, the jump insn is replaced by the
  79       doloop_end pattern.  The compare must remain because it might be
  80       used elsewhere.  If the loop-variable or condition register are
  81       used elsewhere, they will be eliminated by flow.
  82
  83    4. An optional doloop_begin pattern is inserted at the top of the
  84       loop.
  85
  86    TODO The optimization should only performed when either the biv used for exit
  87    condition is unused at all except for the exit test, or if we do not have to
  88    change its value, since otherwise we have to add a new induction variable,
  89    which usually will not pay up (unless the cost of the doloop pattern is
  90    somehow extremely lower than the cost of compare & jump, or unless the bct
  91    register cannot be used for anything else but doloop -- ??? detect these
  92    cases).  */
  93
  94 #ifdef HAVE_doloop_end
  95
  96 /* Return the loop termination condition for PATTERN or zero
  97    if it is not a decrement and branch jump insn.  */
  98
  99 rtx
 100 doloop_condition_get (rtx doloop_pat)
 101 {
 102   rtx cmp;
 103   rtx inc;
 104   rtx reg;
 105   rtx inc_src;
 106   rtx condition;
 107   rtx pattern;
 108   rtx cc_reg = NULL_RTX;
 109   rtx reg_orig = NULL_RTX;
 110
 111   /* The canonical doloop pattern we expect has one of the following
 112      forms:
 113
 114      1)  (parallel [(set (pc) (if_then_else (condition)
 115                                             (label_ref (label))
 116                                             (pc)))
 117                      (set (reg) (plus (reg) (const_int -1)))
 118                      (additional clobbers and uses)])
 119
 120      The branch must be the first entry of the parallel (also required
 121      by jump.c), and the second entry of the parallel must be a set of
 122      the loop counter register.  Some targets (IA-64) wrap the set of
 123      the loop counter in an if_then_else too.
 124
 125      2)  (set (reg) (plus (reg) (const_int -1))
 126          (set (pc) (if_then_else (reg != 0)
 127                                  (label_ref (label))
 128                                  (pc))).
 129
 130      Some targets (ARM) do the comparison before the branch, as in the
 131      following form:
 132
 133      3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
 134                    (set (reg) (plus (reg) (const_int -1)))])
 135         (set (pc) (if_then_else (cc == NE)
 136                                 (label_ref (label))
 137                                 (pc))) */
 138
 139   pattern = PATTERN (doloop_pat);
 140
 141   if (GET_CODE (pattern) != PARALLEL)
 142     {
 143       rtx cond;
 144       rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat);
 145       rtx cmp_arg1, cmp_arg2;
 146       rtx cmp_orig;
 147
 148       /* In case the pattern is not PARALLEL we expect two forms
 149          of doloop which are cases 2) and 3) above: in case 2) the
 150          decrement immediately precedes the branch, while in case 3)
 151          the compare and decrement instructions immediately precede
 152          the branch.  */
 153
 154       if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
 155         return 0;
 156
 157       cmp = pattern;
 158       if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
 159         {
 160           /* The third case: the compare and decrement instructions
 161              immediately precede the branch.  */
 162           cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
 163           if (GET_CODE (cmp_orig) != SET)
 164             return 0;
 165           if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
 166             return 0;
 167           cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
 168           cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
 169           if (cmp_arg2 != const0_rtx
 170               || GET_CODE (cmp_arg1) != PLUS)
 171             return 0;
 172           reg_orig = XEXP (cmp_arg1, 0);
 173           if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
 174               || !REG_P (reg_orig))
 175             return 0;
 176           cc_reg = SET_DEST (cmp_orig);
 177
 178           inc = XVECEXP (PATTERN (prev_insn), 0, 1);
 179         }
 180       else
 181         inc = PATTERN (prev_insn);
 182       /* We expect the condition to be of the form (reg != 0)  */
 183       cond = XEXP (SET_SRC (cmp), 0);
 184       if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
 185         return 0;
 186     }
 187   else
 188     {
 189       cmp = XVECEXP (pattern, 0, 0);
 190       inc = XVECEXP (pattern, 0, 1);
 191     }
 192
 193   /* Check for (set (reg) (something)).  */
 194   if (GET_CODE (inc) != SET)
 195     return 0;
 196   reg = SET_DEST (inc);
 197   if (! REG_P (reg))
 198     return 0;
 199
 200   /* Check if something = (plus (reg) (const_int -1)).
 201      On IA-64, this decrement is wrapped in an if_then_else.  */
 202   inc_src = SET_SRC (inc);
 203   if (GET_CODE (inc_src) == IF_THEN_ELSE)
 204     inc_src = XEXP (inc_src, 1);
 205   if (GET_CODE (inc_src) != PLUS
 206       || XEXP (inc_src, 0) != reg
 207       || XEXP (inc_src, 1) != constm1_rtx)
 208     return 0;
 209
 210   /* Check for (set (pc) (if_then_else (condition)
 211                                        (label_ref (label))
 212                                        (pc))).  */
 213   if (GET_CODE (cmp) != SET
 214       || SET_DEST (cmp) != pc_rtx
 215       || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
 216       || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
 217       || XEXP (SET_SRC (cmp), 2) != pc_rtx)
 218     return 0;
 219
 220   /* Extract loop termination condition.  */
 221   condition = XEXP (SET_SRC (cmp), 0);
 222
 223   /* We expect a GE or NE comparison with 0 or 1.  */
 224   if ((GET_CODE (condition) != GE
 225        && GET_CODE (condition) != NE)
 226       || (XEXP (condition, 1) != const0_rtx
 227           && XEXP (condition, 1) != const1_rtx))
 228     return 0;
 229
 230   if ((XEXP (condition, 0) == reg)
 231       /* For the third case:  */
 232       || ((cc_reg != NULL_RTX)
 233           && (XEXP (condition, 0) == cc_reg)
 234           && (reg_orig == reg))
 235       || (GET_CODE (XEXP (condition, 0)) == PLUS
 236           && XEXP (XEXP (condition, 0), 0) == reg))
 237    {
 238      if (GET_CODE (pattern) != PARALLEL)
 239      /*  For the second form we expect:
 240
 241          (set (reg) (plus (reg) (const_int -1))
 242          (set (pc) (if_then_else (reg != 0)
 243                                  (label_ref (label))
 244                                  (pc))).
 245
 246          is equivalent to the following:
 247
 248          (parallel [(set (pc) (if_then_else (reg != 1)
 249                                             (label_ref (label))
 250                                             (pc)))
 251                      (set (reg) (plus (reg) (const_int -1)))
 252                      (additional clobbers and uses)])
 253
 254         For the third form we expect:
 255
 256         (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
 257                    (set (reg) (plus (reg) (const_int -1)))])
 258         (set (pc) (if_then_else (cc == NE)
 259                                 (label_ref (label))
 260                                 (pc)))
 261
 262         which is equivalent to the following:
 263
 264         (parallel [(set (cc) (compare (reg,  1))
 265                    (set (reg) (plus (reg) (const_int -1)))
 266                    (set (pc) (if_then_else (NE == cc)
 267                                            (label_ref (label))
 268                                            (pc))))])
 269
 270         So we return the second form instead for the two cases.
 271
 272      */
 273         condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
 274
 275     return condition;
 276    }
 277
 278   /* ??? If a machine uses a funny comparison, we could return a
 279      canonicalized form here.  */
 280
 281   return 0;
 282 }
 283
 284 /* Return nonzero if the loop specified by LOOP is suitable for
 285    the use of special low-overhead looping instructions.  DESC
 286    describes the number of iterations of the loop.  */
 287
 288 static bool
 289 doloop_valid_p (struct loop *loop, struct niter_desc *desc)
 290 {
 291   basic_block *body = get_loop_body (loop), bb;
 292   rtx_insn *insn;
 293   unsigned i;
 294   bool result = true;
 295
 296   /* Check for loops that may not terminate under special conditions.  */
 297   if (!desc->simple_p
 298       || desc->assumptions
 299       || desc->infinite)
 300     {
 301       /* There are some cases that would require a special attention.
 302          For example if the comparison is LEU and the comparison value
 303          is UINT_MAX then the loop will not terminate.  Similarly, if the
 304          comparison code is GEU and the comparison value is 0, the
 305          loop will not terminate.
 306
 307          If the absolute increment is not 1, the loop can be infinite
 308          even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
 309
 310          ??? We could compute these conditions at run-time and have a
 311          additional jump around the loop to ensure an infinite loop.
 312          However, it is very unlikely that this is the intended
 313          behavior of the loop and checking for these rare boundary
 314          conditions would pessimize all other code.
 315
 316          If the loop is executed only a few times an extra check to
 317          restart the loop could use up most of the benefits of using a
 318          count register loop.  Note however, that normally, this
 319          restart branch would never execute, so it could be predicted
 320          well by the CPU.  We should generate the pessimistic code by
 321          default, and have an option, e.g. -funsafe-loops that would
 322          enable count-register loops in this case.  */
 323       if (dump_file)
 324         fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
 325       result = false;
 326       goto cleanup;
 327     }
 328
 329   for (i = 0; i < loop->num_nodes; i++)
 330     {
 331       bb = body[i];
 332
 333       for (insn = BB_HEAD (bb);
 334            insn != NEXT_INSN (BB_END (bb));
 335            insn = NEXT_INSN (insn))
 336         {
 337           /* Different targets have different necessities for low-overhead
 338              looping.  Call the back end for each instruction within the loop
 339              to let it decide whether the insn prohibits a low-overhead loop.
 340              It will then return the cause for it to emit to the dump file.  */
 341           const char * invalid = targetm.invalid_within_doloop (insn);
 342           if (invalid)
 343             {
 344               if (dump_file)
 345                 fprintf (dump_file, "Doloop: %s\n", invalid);
 346               result = false;
 347               goto cleanup;
 348             }
 349         }
 350     }
 351   result = true;
 352
 353 cleanup:
 354   free (body);
 355
 356   return result;
 357 }
 358
 359 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
 360    edge.  If the condition is always false, do not do anything.  If it is always
 361    true, redirect E to DEST and return false.  In all other cases, true is
 362    returned.  */
 363
 364 static bool
 365 add_test (rtx cond, edge *e, basic_block dest)
 366 {
 367   rtx_insn *seq, *jump;
 368   rtx_code_label *label;
 369   machine_mode mode;
 370   rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
 371   enum rtx_code code = GET_CODE (cond);
 372   basic_block bb;
 373
 374   mode = GET_MODE (XEXP (cond, 0));
 375   if (mode == VOIDmode)
 376     mode = GET_MODE (XEXP (cond, 1));
 377
 378   start_sequence ();
 379   op0 = force_operand (op0, NULL_RTX);
 380   op1 = force_operand (op1, NULL_RTX);
 381   label = block_label (dest);
 382   do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL, label, -1);
 383
 384   jump = get_last_insn ();
 385   if (!jump || !JUMP_P (jump))
 386     {
 387       /* The condition is always false and the jump was optimized out.  */
 388       end_sequence ();
 389       return true;
 390     }
 391
 392   seq = get_insns ();
 393   end_sequence ();
 394
 395   /* There always is at least the jump insn in the sequence.  */
 396   gcc_assert (seq != NULL_RTX);
 397
 398   bb = split_edge_and_insert (*e, seq);
 399   *e = single_succ_edge (bb);
 400
 401   if (any_uncondjump_p (jump))
 402     {
 403       /* The condition is always true.  */
 404       delete_insn (jump);
 405       redirect_edge_and_branch_force (*e, dest);
 406       return false;
 407     }
 408
 409   JUMP_LABEL (jump) = label;
 410
 411   /* The jump is supposed to handle an unlikely special case.  */
 412   add_int_reg_note (jump, REG_BR_PROB, 0);
 413
 414   LABEL_NUSES (label)++;
 415
 416   make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
 417   return true;
 418 }
 419
 420 /* Modify the loop to use the low-overhead looping insn where LOOP
 421    describes the loop, DESC describes the number of iterations of the
 422    loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
 423    end of the loop.  CONDITION is the condition separated from the
 424    DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.  */
 425
 426 static void
 427 doloop_modify (struct loop *loop, struct niter_desc *desc,
 428                rtx doloop_seq, rtx condition, rtx count)
 429 {
 430   rtx counter_reg;
 431   rtx tmp, noloop = NULL_RTX;
 432   rtx_insn *sequence;
 433   rtx_insn *jump_insn;
 434   rtx_code_label *jump_label;
 435   int nonneg = 0;
 436   bool increment_count;
 437   basic_block loop_end = desc->out_edge->src;
 438   machine_mode mode;
 439   rtx true_prob_val;
 440   widest_int iterations;
 441
 442   jump_insn = BB_END (loop_end);
 443
 444   if (dump_file)
 445     {
 446       fprintf (dump_file, "Doloop: Inserting doloop pattern (");
 447       if (desc->const_iter)
 448         fprintf (dump_file, "%" PRId64, desc->niter);
 449       else
 450         fputs ("runtime", dump_file);
 451       fputs (" iterations).\n", dump_file);
 452     }
 453
 454   /* Get the probability of the original branch. If it exists we would
 455      need to update REG_BR_PROB of the new jump_insn.  */
 456   true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX);
 457
 458   /* Discard original jump to continue loop.  The original compare
 459      result may still be live, so it cannot be discarded explicitly.  */
 460   delete_insn (jump_insn);
 461
 462   counter_reg = XEXP (condition, 0);
 463   if (GET_CODE (counter_reg) == PLUS)
 464     counter_reg = XEXP (counter_reg, 0);
 465   mode = GET_MODE (counter_reg);
 466
 467   increment_count = false;
 468   switch (GET_CODE (condition))
 469     {
 470     case NE:
 471       /* Currently only NE tests against zero and one are supported.  */
 472       noloop = XEXP (condition, 1);
 473       if (noloop != const0_rtx)
 474         {
 475           gcc_assert (noloop == const1_rtx);
 476           increment_count = true;
 477         }
 478       break;
 479
 480     case GE:
 481       /* Currently only GE tests against zero are supported.  */
 482       gcc_assert (XEXP (condition, 1) == const0_rtx);
 483
 484       noloop = constm1_rtx;
 485
 486       /* The iteration count does not need incrementing for a GE test.  */
 487       increment_count = false;
 488
 489       /* Determine if the iteration counter will be non-negative.
 490          Note that the maximum value loaded is iterations_max - 1.  */
 491       if (get_max_loop_iterations (loop, &iterations)
 492           && wi::leu_p (iterations,
 493                         wi::set_bit_in_zero <widest_int>
 494                         (GET_MODE_PRECISION (mode) - 1)))
 495         nonneg = 1;
 496       break;
 497
 498       /* Abort if an invalid doloop pattern has been generated.  */
 499     default:
 500       gcc_unreachable ();
 501     }
 502
 503   if (increment_count)
 504     count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
 505
 506   /* Insert initialization of the count register into the loop header.  */
 507   start_sequence ();
 508   tmp = force_operand (count, counter_reg);
 509   convert_move (counter_reg, tmp, 1);
 510   sequence = get_insns ();
 511   end_sequence ();
 512   emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
 513
 514   if (desc->noloop_assumptions)
 515     {
 516       rtx ass = copy_rtx (desc->noloop_assumptions);
 517       basic_block preheader = loop_preheader_edge (loop)->src;
 518       basic_block set_zero
 519               = split_edge (loop_preheader_edge (loop));
 520       basic_block new_preheader
 521               = split_edge (loop_preheader_edge (loop));
 522       edge te;
 523
 524       /* Expand the condition testing the assumptions and if it does not pass,
 525          reset the count register to 0.  */
 526       redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
 527       set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
 528
 529       set_zero->count = 0;
 530       set_zero->frequency = 0;
 531
 532       te = single_succ_edge (preheader);
 533       for (; ass; ass = XEXP (ass, 1))
 534         if (!add_test (XEXP (ass, 0), &te, set_zero))
 535           break;
 536
 537       if (ass)
 538         {
 539           /* We reached a condition that is always true.  This is very hard to
 540              reproduce (such a loop does not roll, and thus it would most
 541              likely get optimized out by some of the preceding optimizations).
 542              In fact, I do not have any testcase for it.  However, it would
 543              also be very hard to show that it is impossible, so we must
 544              handle this case.  */
 545           set_zero->count = preheader->count;
 546           set_zero->frequency = preheader->frequency;
 547         }
 548
 549       if (EDGE_COUNT (set_zero->preds) == 0)
 550         {
 551           /* All the conditions were simplified to false, remove the
 552              unreachable set_zero block.  */
 553           delete_basic_block (set_zero);
 554         }
 555       else
 556         {
 557           /* Reset the counter to zero in the set_zero block.  */
 558           start_sequence ();
 559           convert_move (counter_reg, noloop, 0);
 560           sequence = get_insns ();
 561           end_sequence ();
 562           emit_insn_after (sequence, BB_END (set_zero));
 563
 564           set_immediate_dominator (CDI_DOMINATORS, set_zero,
 565                                    recompute_dominator (CDI_DOMINATORS,
 566                                                         set_zero));
 567         }
 568
 569       set_immediate_dominator (CDI_DOMINATORS, new_preheader,
 570                                recompute_dominator (CDI_DOMINATORS,
 571                                                     new_preheader));
 572     }
 573
 574   /* Some targets (eg, C4x) need to initialize special looping
 575      registers.  */
 576 #ifdef HAVE_doloop_begin
 577   {
 578     rtx init;
 579
 580     init = gen_doloop_begin (counter_reg, doloop_seq);
 581     if (init)
 582       {
 583         start_sequence ();
 584         emit_insn (init);
 585         sequence = get_insns ();
 586         end_sequence ();
 587         emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
 588       }
 589   }
 590 #endif
 591
 592   /* Insert the new low-overhead looping insn.  */
 593   emit_jump_insn_after (doloop_seq, BB_END (loop_end));
 594   jump_insn = BB_END (loop_end);
 595   jump_label = block_label (desc->in_edge->dest);
 596   JUMP_LABEL (jump_insn) = jump_label;
 597   LABEL_NUSES (jump_label)++;
 598
 599   /* Ensure the right fallthru edge is marked, for case we have reversed
 600      the condition.  */
 601   desc->in_edge->flags &= ~EDGE_FALLTHRU;
 602   desc->out_edge->flags |= EDGE_FALLTHRU;
 603
 604   /* Add a REG_NONNEG note if the actual or estimated maximum number
 605      of iterations is non-negative.  */
 606   if (nonneg)
 607     add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
 608
 609   /* Update the REG_BR_PROB note.  */
 610   if (true_prob_val)
 611     {
 612       /* Seems safer to use the branch probability.  */
 613       add_int_reg_note (jump_insn, REG_BR_PROB, desc->in_edge->probability);
 614     }
 615 }
 616
 617 /* Process loop described by LOOP validating that the loop is suitable for
 618    conversion to use a low overhead looping instruction, replacing the jump
 619    insn where suitable.  Returns true if the loop was successfully
 620    modified.  */
 621
 622 static bool
 623 doloop_optimize (struct loop *loop)
 624 {
 625   machine_mode mode;
 626   rtx doloop_seq, doloop_pat, doloop_reg;
 627   rtx count;
 628   widest_int iterations, iterations_max;
 629   rtx_code_label *start_label;
 630   rtx condition;
 631   unsigned level, est_niter;
 632   int max_cost;
 633   struct niter_desc *desc;
 634   unsigned word_mode_size;
 635   unsigned HOST_WIDE_INT word_mode_max;
 636   int entered_at_top;
 637
 638   if (dump_file)
 639     fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
 640
 641   iv_analysis_loop_init (loop);
 642
 643   /* Find the simple exit of a LOOP.  */
 644   desc = get_simple_loop_desc (loop);
 645
 646   /* Check that loop is a candidate for a low-overhead looping insn.  */
 647   if (!doloop_valid_p (loop, desc))
 648     {
 649       if (dump_file)
 650         fprintf (dump_file,
 651                  "Doloop: The loop is not suitable.\n");
 652       return false;
 653     }
 654   mode = desc->mode;
 655
 656   est_niter = 3;
 657   if (desc->const_iter)
 658     est_niter = desc->niter;
 659   /* If the estimate on number of iterations is reliable (comes from profile
 660      feedback), use it.  Do not use it normally, since the expected number
 661      of iterations of an unrolled loop is 2.  */
 662   if (loop->header->count)
 663     est_niter = expected_loop_iterations (loop);
 664
 665   if (est_niter < 3)
 666     {
 667       if (dump_file)
 668         fprintf (dump_file,
 669                  "Doloop: Too few iterations (%u) to be profitable.\n",
 670                  est_niter);
 671       return false;
 672     }
 673
 674   max_cost
 675     = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
 676   if (set_src_cost (desc->niter_expr, optimize_loop_for_speed_p (loop))
 677       > max_cost)
 678     {
 679       if (dump_file)
 680         fprintf (dump_file,
 681                  "Doloop: number of iterations too costly to compute.\n");
 682       return false;
 683     }
 684
 685   if (desc->const_iter)
 686     iterations = widest_int::from (std::make_pair (desc->niter_expr, mode),
 687                                    UNSIGNED);
 688   else
 689     iterations = 0;
 690   if (!get_max_loop_iterations (loop, &iterations_max))
 691     iterations_max = 0;
 692   level = get_loop_level (loop) + 1;
 693   entered_at_top = (loop->latch == desc->in_edge->dest
 694                     && contains_no_active_insn_p (loop->latch));
 695   if (!targetm.can_use_doloop_p (iterations, iterations_max, level,
 696                                  entered_at_top))
 697     {
 698       if (dump_file)
 699         fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n");
 700       return false;
 701     }
 702
 703   /* Generate looping insn.  If the pattern FAILs then give up trying
 704      to modify the loop since there is some aspect the back-end does
 705      not like.  */
 706   count = copy_rtx (desc->niter_expr);
 707   start_label = block_label (desc->in_edge->dest);
 708   doloop_reg = gen_reg_rtx (mode);
 709   doloop_seq = gen_doloop_end (doloop_reg, start_label);
 710
 711   word_mode_size = GET_MODE_PRECISION (word_mode);
 712   word_mode_max
 713           = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
 714   if (! doloop_seq
 715       && mode != word_mode
 716       /* Before trying mode different from the one in that # of iterations is
 717          computed, we must be sure that the number of iterations fits into
 718          the new mode.  */
 719       && (word_mode_size >= GET_MODE_PRECISION (mode)
 720           || wi::leu_p (iterations_max, word_mode_max)))
 721     {
 722       if (word_mode_size > GET_MODE_PRECISION (mode))
 723         count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode);
 724       else
 725         count = lowpart_subreg (word_mode, count, mode);
 726       PUT_MODE (doloop_reg, word_mode);
 727       doloop_seq = gen_doloop_end (doloop_reg, start_label);
 728     }
 729   if (! doloop_seq)
 730     {
 731       if (dump_file)
 732         fprintf (dump_file,
 733                  "Doloop: Target unwilling to use doloop pattern!\n");
 734       return false;
 735     }
 736
 737   /* If multiple instructions were created, the last must be the
 738      jump instruction.  Also, a raw define_insn may yield a plain
 739      pattern.  */
 740   doloop_pat = doloop_seq;
 741   if (INSN_P (doloop_pat))
 742     {
 743       rtx_insn *doloop_insn = as_a <rtx_insn *> (doloop_pat);
 744       while (NEXT_INSN (doloop_insn) != NULL_RTX)
 745         doloop_insn = NEXT_INSN (doloop_insn);
 746       if (!JUMP_P (doloop_insn))
 747         doloop_insn = NULL;
 748       doloop_pat = doloop_insn;
 749     }
 750
 751   if (! doloop_pat
 752       || ! (condition = doloop_condition_get (doloop_pat)))
 753     {
 754       if (dump_file)
 755         fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
 756       return false;
 757     }
 758
 759   doloop_modify (loop, desc, doloop_seq, condition, count);
 760   return true;
 761 }
 762
 763 /* This is the main entry point.  Process all loops using doloop_optimize.  */
 764
 765 void
 766 doloop_optimize_loops (void)
 767 {
 768   struct loop *loop;
 769
 770   FOR_EACH_LOOP (loop, 0)
 771     {
 772       doloop_optimize (loop);
 773     }
 774
 775   iv_analysis_done ();
 776
 777 #ifdef ENABLE_CHECKING
 778   verify_loop_structure ();
 779 #endif
 780 }
 781 #endif /* HAVE_doloop_end */
 782