gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2015 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "predict.h"
  27 #include "tree.h"
  28 #include "rtl.h"
  29 #include "df.h"
  30 #include "diagnostic-core.h"
  31 #include "alias.h"
  32 #include "fold-const.h"
  33 #include "stor-layout.h"
  34 #include "tm_p.h"
  35 #include "flags.h"
  36 #include "insn-config.h"
  37 #include "expmed.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "calls.h"
  41 #include "emit-rtl.h"
  42 #include "varasm.h"
  43 #include "stmt.h"
  44 #include "expr.h"
  45 #include "insn-codes.h"
  46 #include "optabs.h"
  47 #include "recog.h"
  48 #include "langhooks.h"
  49 #include "target.h"
  50
  51 struct target_expmed default_target_expmed;
  52 #if SWITCHABLE_TARGET
  53 struct target_expmed *this_target_expmed = &default_target_expmed;
  54 #endif
  55
  56 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    rtx);
  61 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  62                                      unsigned HOST_WIDE_INT,
  63                                      rtx);
  64 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  65                                    unsigned HOST_WIDE_INT,
  66                                    unsigned HOST_WIDE_INT,
  67                                    unsigned HOST_WIDE_INT,
  68                                    rtx);
  69 static rtx extract_fixed_bit_field (machine_mode, rtx,
  70                                     unsigned HOST_WIDE_INT,
  71                                     unsigned HOST_WIDE_INT, rtx, int);
  72 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  73                                       unsigned HOST_WIDE_INT,
  74                                       unsigned HOST_WIDE_INT, rtx, int);
  75 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  76 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, int);
  78 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  79 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  80 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  81
  82 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  83    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  84    The mask is truncated if necessary to the width of mode MODE.  The
  85    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  86
  87 static inline rtx
  88 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  89 {
  90   return immed_wide_int_const
  91     (wi::shifted_mask (bitpos, bitsize, complement,
  92                        GET_MODE_PRECISION (mode)), mode);
  93 }
  94
  95 /* Test whether a value is zero of a power of two.  */
  96 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  97   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  98
  99 struct init_expmed_rtl
 100 {
 101   rtx reg;
 102   rtx plus;
 103   rtx neg;
 104   rtx mult;
 105   rtx sdiv;
 106   rtx udiv;
 107   rtx sdiv_32;
 108   rtx smod_32;
 109   rtx wide_mult;
 110   rtx wide_lshr;
 111   rtx wide_trunc;
 112   rtx shift;
 113   rtx shift_mult;
 114   rtx shift_add;
 115   rtx shift_sub0;
 116   rtx shift_sub1;
 117   rtx zext;
 118   rtx trunc;
 119
 120   rtx pow2[MAX_BITS_PER_WORD];
 121   rtx cint[MAX_BITS_PER_WORD];
 122 };
 123
 124 static void
 125 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 126                       machine_mode from_mode, bool speed)
 127 {
 128   int to_size, from_size;
 129   rtx which;
 130
 131   to_size = GET_MODE_PRECISION (to_mode);
 132   from_size = GET_MODE_PRECISION (from_mode);
 133
 134   /* Most partial integers have a precision less than the "full"
 135      integer it requires for storage.  In case one doesn't, for
 136      comparison purposes here, reduce the bit size by one in that
 137      case.  */
 138   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 139       && exact_log2 (to_size) != -1)
 140     to_size --;
 141   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 142       && exact_log2 (from_size) != -1)
 143     from_size --;
 144
 145   /* Assume cost of zero-extend and sign-extend is the same.  */
 146   which = (to_size < from_size ? all->trunc : all->zext);
 147
 148   PUT_MODE (all->reg, from_mode);
 149   set_convert_cost (to_mode, from_mode, speed,
 150                     set_src_cost (which, to_mode, speed));
 151 }
 152
 153 static void
 154 init_expmed_one_mode (struct init_expmed_rtl *all,
 155                       machine_mode mode, int speed)
 156 {
 157   int m, n, mode_bitsize;
 158   machine_mode mode_from;
 159
 160   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 161
 162   PUT_MODE (all->reg, mode);
 163   PUT_MODE (all->plus, mode);
 164   PUT_MODE (all->neg, mode);
 165   PUT_MODE (all->mult, mode);
 166   PUT_MODE (all->sdiv, mode);
 167   PUT_MODE (all->udiv, mode);
 168   PUT_MODE (all->sdiv_32, mode);
 169   PUT_MODE (all->smod_32, mode);
 170   PUT_MODE (all->wide_trunc, mode);
 171   PUT_MODE (all->shift, mode);
 172   PUT_MODE (all->shift_mult, mode);
 173   PUT_MODE (all->shift_add, mode);
 174   PUT_MODE (all->shift_sub0, mode);
 175   PUT_MODE (all->shift_sub1, mode);
 176   PUT_MODE (all->zext, mode);
 177   PUT_MODE (all->trunc, mode);
 178
 179   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 180   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 181   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 182   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 183   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 184
 185   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 186                                      <= 2 * add_cost (speed, mode)));
 187   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 188                                      <= 4 * add_cost (speed, mode)));
 189
 190   set_shift_cost (speed, mode, 0, 0);
 191   {
 192     int cost = add_cost (speed, mode);
 193     set_shiftadd_cost (speed, mode, 0, cost);
 194     set_shiftsub0_cost (speed, mode, 0, cost);
 195     set_shiftsub1_cost (speed, mode, 0, cost);
 196   }
 197
 198   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 199   for (m = 1; m < n; m++)
 200     {
 201       XEXP (all->shift, 1) = all->cint[m];
 202       XEXP (all->shift_mult, 1) = all->pow2[m];
 203
 204       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 205       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 206                                                        speed));
 207       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 208                                                         speed));
 209       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 210                                                         speed));
 211     }
 212
 213   if (SCALAR_INT_MODE_P (mode))
 214     {
 215       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 216            mode_from = (machine_mode)(mode_from + 1))
 217         init_expmed_one_conv (all, mode, mode_from, speed);
 218     }
 219   if (GET_MODE_CLASS (mode) == MODE_INT)
 220     {
 221       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 222       if (wider_mode != VOIDmode)
 223         {
 224           PUT_MODE (all->zext, wider_mode);
 225           PUT_MODE (all->wide_mult, wider_mode);
 226           PUT_MODE (all->wide_lshr, wider_mode);
 227           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 228
 229           set_mul_widen_cost (speed, wider_mode,
 230                               set_src_cost (all->wide_mult, wider_mode, speed));
 231           set_mul_highpart_cost (speed, mode,
 232                                  set_src_cost (all->wide_trunc, mode, speed));
 233         }
 234     }
 235 }
 236
 237 void
 238 init_expmed (void)
 239 {
 240   struct init_expmed_rtl all;
 241   machine_mode mode = QImode;
 242   int m, speed;
 243
 244   memset (&all, 0, sizeof all);
 245   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 246     {
 247       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 248       all.cint[m] = GEN_INT (m);
 249     }
 250
 251   /* Avoid using hard regs in ways which may be unsupported.  */
 252   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 253   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 254   all.neg = gen_rtx_NEG (mode, all.reg);
 255   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 256   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 257   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 258   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 259   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 260   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 261   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 262   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 263   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 264   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 265   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 266   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 267   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 268   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 269   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 270
 271   for (speed = 0; speed < 2; speed++)
 272     {
 273       crtl->maybe_hot_insn_p = speed;
 274       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 275
 276       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 277            mode = (machine_mode)(mode + 1))
 278         init_expmed_one_mode (&all, mode, speed);
 279
 280       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 281         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 282              mode = (machine_mode)(mode + 1))
 283           init_expmed_one_mode (&all, mode, speed);
 284
 285       if (MIN_MODE_VECTOR_INT != VOIDmode)
 286         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 287              mode = (machine_mode)(mode + 1))
 288           init_expmed_one_mode (&all, mode, speed);
 289     }
 290
 291   if (alg_hash_used_p ())
 292     {
 293       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 294       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 295     }
 296   else
 297     set_alg_hash_used_p (true);
 298   default_rtl_profile ();
 299
 300   ggc_free (all.trunc);
 301   ggc_free (all.shift_sub1);
 302   ggc_free (all.shift_sub0);
 303   ggc_free (all.shift_add);
 304   ggc_free (all.shift_mult);
 305   ggc_free (all.shift);
 306   ggc_free (all.wide_trunc);
 307   ggc_free (all.wide_lshr);
 308   ggc_free (all.wide_mult);
 309   ggc_free (all.zext);
 310   ggc_free (all.smod_32);
 311   ggc_free (all.sdiv_32);
 312   ggc_free (all.udiv);
 313   ggc_free (all.sdiv);
 314   ggc_free (all.mult);
 315   ggc_free (all.neg);
 316   ggc_free (all.plus);
 317   ggc_free (all.reg);
 318 }
 319
 320 /* Return an rtx representing minus the value of X.
 321    MODE is the intended mode of the result,
 322    useful if X is a CONST_INT.  */
 323
 324 rtx
 325 negate_rtx (machine_mode mode, rtx x)
 326 {
 327   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 328
 329   if (result == 0)
 330     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 331
 332   return result;
 333 }
 334
 335 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 336    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 337    If MODE is BLKmode, return a reference to every byte in the bitfield.
 338    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 339
 340 static rtx
 341 narrow_bit_field_mem (rtx mem, machine_mode mode,
 342                       unsigned HOST_WIDE_INT bitsize,
 343                       unsigned HOST_WIDE_INT bitnum,
 344                       unsigned HOST_WIDE_INT *new_bitnum)
 345 {
 346   if (mode == BLKmode)
 347     {
 348       *new_bitnum = bitnum % BITS_PER_UNIT;
 349       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 350       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 351                             / BITS_PER_UNIT);
 352       return adjust_bitfield_address_size (mem, mode, offset, size);
 353     }
 354   else
 355     {
 356       unsigned int unit = GET_MODE_BITSIZE (mode);
 357       *new_bitnum = bitnum % unit;
 358       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 359       return adjust_bitfield_address (mem, mode, offset);
 360     }
 361 }
 362
 363 /* The caller wants to perform insertion or extraction PATTERN on a
 364    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 365    BITREGION_START and BITREGION_END are as for store_bit_field
 366    and FIELDMODE is the natural mode of the field.
 367
 368    Search for a mode that is compatible with the memory access
 369    restrictions and (where applicable) with a register insertion or
 370    extraction.  Return the new memory on success, storing the adjusted
 371    bit position in *NEW_BITNUM.  Return null otherwise.  */
 372
 373 static rtx
 374 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 375                               rtx op0, HOST_WIDE_INT bitsize,
 376                               HOST_WIDE_INT bitnum,
 377                               unsigned HOST_WIDE_INT bitregion_start,
 378                               unsigned HOST_WIDE_INT bitregion_end,
 379                               machine_mode fieldmode,
 380                               unsigned HOST_WIDE_INT *new_bitnum)
 381 {
 382   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 383                                 bitregion_end, MEM_ALIGN (op0),
 384                                 MEM_VOLATILE_P (op0));
 385   machine_mode best_mode;
 386   if (iter.next_mode (&best_mode))
 387     {
 388       /* We can use a memory in BEST_MODE.  See whether this is true for
 389          any wider modes.  All other things being equal, we prefer to
 390          use the widest mode possible because it tends to expose more
 391          CSE opportunities.  */
 392       if (!iter.prefer_smaller_modes ())
 393         {
 394           /* Limit the search to the mode required by the corresponding
 395              register insertion or extraction instruction, if any.  */
 396           machine_mode limit_mode = word_mode;
 397           extraction_insn insn;
 398           if (get_best_reg_extraction_insn (&insn, pattern,
 399                                             GET_MODE_BITSIZE (best_mode),
 400                                             fieldmode))
 401             limit_mode = insn.field_mode;
 402
 403           machine_mode wider_mode;
 404           while (iter.next_mode (&wider_mode)
 405                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 406             best_mode = wider_mode;
 407         }
 408       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 409                                    new_bitnum);
 410     }
 411   return NULL_RTX;
 412 }
 413
 414 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 415    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 416    offset is then BITNUM / BITS_PER_UNIT.  */
 417
 418 static bool
 419 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 420                      unsigned HOST_WIDE_INT bitsize,
 421                      machine_mode struct_mode)
 422 {
 423   if (BYTES_BIG_ENDIAN)
 424     return (bitnum % BITS_PER_UNIT == 0
 425             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 426                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 427   else
 428     return bitnum % BITS_PER_WORD == 0;
 429 }
 430
 431 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 432    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 433    Return false if the access would touch memory outside the range
 434    BITREGION_START to BITREGION_END for conformance to the C++ memory
 435    model.  */
 436
 437 static bool
 438 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 439                             unsigned HOST_WIDE_INT bitnum,
 440                             machine_mode fieldmode,
 441                             unsigned HOST_WIDE_INT bitregion_start,
 442                             unsigned HOST_WIDE_INT bitregion_end)
 443 {
 444   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 445
 446   /* -fstrict-volatile-bitfields must be enabled and we must have a
 447      volatile MEM.  */
 448   if (!MEM_P (op0)
 449       || !MEM_VOLATILE_P (op0)
 450       || flag_strict_volatile_bitfields <= 0)
 451     return false;
 452
 453   /* Non-integral modes likely only happen with packed structures.
 454      Punt.  */
 455   if (!SCALAR_INT_MODE_P (fieldmode))
 456     return false;
 457
 458   /* The bit size must not be larger than the field mode, and
 459      the field mode must not be larger than a word.  */
 460   if (bitsize > modesize || modesize > BITS_PER_WORD)
 461     return false;
 462
 463   /* Check for cases of unaligned fields that must be split.  */
 464   if (bitnum % modesize + bitsize > modesize)
 465     return false;
 466
 467   /* The memory must be sufficiently aligned for a MODESIZE access.
 468      This condition guarantees, that the memory access will not
 469      touch anything after the end of the structure.  */
 470   if (MEM_ALIGN (op0) < modesize)
 471     return false;
 472
 473   /* Check for cases where the C++ memory model applies.  */
 474   if (bitregion_end != 0
 475       && (bitnum - bitnum % modesize < bitregion_start
 476           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 477     return false;
 478
 479   return true;
 480 }
 481
 482 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 483    bit number BITNUM can be treated as a simple value of mode MODE.  */
 484
 485 static bool
 486 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 487                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 488 {
 489   return (MEM_P (op0)
 490           && bitnum % BITS_PER_UNIT == 0
 491           && bitsize == GET_MODE_BITSIZE (mode)
 492           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 493               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 494                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 495 }
 496 \f
 497 /* Try to use instruction INSV to store VALUE into a field of OP0.
 498    BITSIZE and BITNUM are as for store_bit_field.  */
 499
 500 static bool
 501 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 502                             unsigned HOST_WIDE_INT bitsize,
 503                             unsigned HOST_WIDE_INT bitnum,
 504                             rtx value)
 505 {
 506   struct expand_operand ops[4];
 507   rtx value1;
 508   rtx xop0 = op0;
 509   rtx_insn *last = get_last_insn ();
 510   bool copy_back = false;
 511
 512   machine_mode op_mode = insv->field_mode;
 513   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 514   if (bitsize == 0 || bitsize > unit)
 515     return false;
 516
 517   if (MEM_P (xop0))
 518     /* Get a reference to the first byte of the field.  */
 519     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 520                                  &bitnum);
 521   else
 522     {
 523       /* Convert from counting within OP0 to counting in OP_MODE.  */
 524       if (BYTES_BIG_ENDIAN)
 525         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 526
 527       /* If xop0 is a register, we need it in OP_MODE
 528          to make it acceptable to the format of insv.  */
 529       if (GET_CODE (xop0) == SUBREG)
 530         /* We can't just change the mode, because this might clobber op0,
 531            and we will need the original value of op0 if insv fails.  */
 532         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 533       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 534         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 535     }
 536
 537   /* If the destination is a paradoxical subreg such that we need a
 538      truncate to the inner mode, perform the insertion on a temporary and
 539      truncate the result to the original destination.  Note that we can't
 540      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 541      X) 0)) is (reg:N X).  */
 542   if (GET_CODE (xop0) == SUBREG
 543       && REG_P (SUBREG_REG (xop0))
 544       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 545                                          op_mode))
 546     {
 547       rtx tem = gen_reg_rtx (op_mode);
 548       emit_move_insn (tem, xop0);
 549       xop0 = tem;
 550       copy_back = true;
 551     }
 552
 553   /* There are similar overflow check at the start of store_bit_field_1,
 554      but that only check the situation where the field lies completely
 555      outside the register, while there do have situation where the field
 556      lies partialy in the register, we need to adjust bitsize for this
 557      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 558      will broken on those arch support bit insert instruction, like arm, aarch64
 559      etc.  */
 560   if (bitsize + bitnum > unit && bitnum < unit)
 561     {
 562       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 563                "destination object, data truncated into %wu-bit",
 564                bitsize, unit - bitnum);
 565       bitsize = unit - bitnum;
 566     }
 567
 568   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 569      "backwards" from the size of the unit we are inserting into.
 570      Otherwise, we count bits from the most significant on a
 571      BYTES/BITS_BIG_ENDIAN machine.  */
 572
 573   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 574     bitnum = unit - bitsize - bitnum;
 575
 576   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 577   value1 = value;
 578   if (GET_MODE (value) != op_mode)
 579     {
 580       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 581         {
 582           /* Optimization: Don't bother really extending VALUE
 583              if it has all the bits we will actually use.  However,
 584              if we must narrow it, be sure we do it correctly.  */
 585
 586           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 587             {
 588               rtx tmp;
 589
 590               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 591               if (! tmp)
 592                 tmp = simplify_gen_subreg (op_mode,
 593                                            force_reg (GET_MODE (value),
 594                                                       value1),
 595                                            GET_MODE (value), 0);
 596               value1 = tmp;
 597             }
 598           else
 599             value1 = gen_lowpart (op_mode, value1);
 600         }
 601       else if (CONST_INT_P (value))
 602         value1 = gen_int_mode (INTVAL (value), op_mode);
 603       else
 604         /* Parse phase is supposed to make VALUE's data type
 605            match that of the component reference, which is a type
 606            at least as wide as the field; so VALUE should have
 607            a mode that corresponds to that type.  */
 608         gcc_assert (CONSTANT_P (value));
 609     }
 610
 611   create_fixed_operand (&ops[0], xop0);
 612   create_integer_operand (&ops[1], bitsize);
 613   create_integer_operand (&ops[2], bitnum);
 614   create_input_operand (&ops[3], value1, op_mode);
 615   if (maybe_expand_insn (insv->icode, 4, ops))
 616     {
 617       if (copy_back)
 618         convert_move (op0, xop0, true);
 619       return true;
 620     }
 621   delete_insns_since (last);
 622   return false;
 623 }
 624
 625 /* A subroutine of store_bit_field, with the same arguments.  Return true
 626    if the operation could be implemented.
 627
 628    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 629    no other way of implementing the operation.  If FALLBACK_P is false,
 630    return false instead.  */
 631
 632 static bool
 633 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 634                    unsigned HOST_WIDE_INT bitnum,
 635                    unsigned HOST_WIDE_INT bitregion_start,
 636                    unsigned HOST_WIDE_INT bitregion_end,
 637                    machine_mode fieldmode,
 638                    rtx value, bool fallback_p)
 639 {
 640   rtx op0 = str_rtx;
 641   rtx orig_value;
 642
 643   while (GET_CODE (op0) == SUBREG)
 644     {
 645       /* The following line once was done only if WORDS_BIG_ENDIAN,
 646          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 647          meaningful at a much higher level; when structures are copied
 648          between memory and regs, the higher-numbered regs
 649          always get higher addresses.  */
 650       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 651       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 652       int byte_offset = 0;
 653
 654       /* Paradoxical subregs need special handling on big endian machines.  */
 655       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 656         {
 657           int difference = inner_mode_size - outer_mode_size;
 658
 659           if (WORDS_BIG_ENDIAN)
 660             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 661           if (BYTES_BIG_ENDIAN)
 662             byte_offset += difference % UNITS_PER_WORD;
 663         }
 664       else
 665         byte_offset = SUBREG_BYTE (op0);
 666
 667       bitnum += byte_offset * BITS_PER_UNIT;
 668       op0 = SUBREG_REG (op0);
 669     }
 670
 671   /* No action is needed if the target is a register and if the field
 672      lies completely outside that register.  This can occur if the source
 673      code contains an out-of-bounds access to a small array.  */
 674   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 675     return true;
 676
 677   /* Use vec_set patterns for inserting parts of vectors whenever
 678      available.  */
 679   if (VECTOR_MODE_P (GET_MODE (op0))
 680       && !MEM_P (op0)
 681       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 682       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 683       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 684       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 685     {
 686       struct expand_operand ops[3];
 687       machine_mode outermode = GET_MODE (op0);
 688       machine_mode innermode = GET_MODE_INNER (outermode);
 689       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 690       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 691
 692       create_fixed_operand (&ops[0], op0);
 693       create_input_operand (&ops[1], value, innermode);
 694       create_integer_operand (&ops[2], pos);
 695       if (maybe_expand_insn (icode, 3, ops))
 696         return true;
 697     }
 698
 699   /* If the target is a register, overwriting the entire object, or storing
 700      a full-word or multi-word field can be done with just a SUBREG.  */
 701   if (!MEM_P (op0)
 702       && bitsize == GET_MODE_BITSIZE (fieldmode)
 703       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 704           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 705     {
 706       /* Use the subreg machinery either to narrow OP0 to the required
 707          words or to cope with mode punning between equal-sized modes.
 708          In the latter case, use subreg on the rhs side, not lhs.  */
 709       rtx sub;
 710
 711       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 712         {
 713           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 714           if (sub)
 715             {
 716               emit_move_insn (op0, sub);
 717               return true;
 718             }
 719         }
 720       else
 721         {
 722           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 723                                      bitnum / BITS_PER_UNIT);
 724           if (sub)
 725             {
 726               emit_move_insn (sub, value);
 727               return true;
 728             }
 729         }
 730     }
 731
 732   /* If the target is memory, storing any naturally aligned field can be
 733      done with a simple store.  For targets that support fast unaligned
 734      memory, any naturally sized, unit aligned field can be done directly.  */
 735   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 736     {
 737       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 738       emit_move_insn (op0, value);
 739       return true;
 740     }
 741
 742   /* Make sure we are playing with integral modes.  Pun with subregs
 743      if we aren't.  This must come after the entire register case above,
 744      since that case is valid for any mode.  The following cases are only
 745      valid for integral modes.  */
 746   {
 747     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 748     if (imode != GET_MODE (op0))
 749       {
 750         if (MEM_P (op0))
 751           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 752         else
 753           {
 754             gcc_assert (imode != BLKmode);
 755             op0 = gen_lowpart (imode, op0);
 756           }
 757       }
 758   }
 759
 760   /* Storing an lsb-aligned field in a register
 761      can be done with a movstrict instruction.  */
 762
 763   if (!MEM_P (op0)
 764       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 765       && bitsize == GET_MODE_BITSIZE (fieldmode)
 766       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 767     {
 768       struct expand_operand ops[2];
 769       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 770       rtx arg0 = op0;
 771       unsigned HOST_WIDE_INT subreg_off;
 772
 773       if (GET_CODE (arg0) == SUBREG)
 774         {
 775           /* Else we've got some float mode source being extracted into
 776              a different float mode destination -- this combination of
 777              subregs results in Severe Tire Damage.  */
 778           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 779                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 780                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 781           arg0 = SUBREG_REG (arg0);
 782         }
 783
 784       subreg_off = bitnum / BITS_PER_UNIT;
 785       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 786         {
 787           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 788
 789           create_fixed_operand (&ops[0], arg0);
 790           /* Shrink the source operand to FIELDMODE.  */
 791           create_convert_operand_to (&ops[1], value, fieldmode, false);
 792           if (maybe_expand_insn (icode, 2, ops))
 793             return true;
 794         }
 795     }
 796
 797   /* Handle fields bigger than a word.  */
 798
 799   if (bitsize > BITS_PER_WORD)
 800     {
 801       /* Here we transfer the words of the field
 802          in the order least significant first.
 803          This is because the most significant word is the one which may
 804          be less than full.
 805          However, only do that if the value is not BLKmode.  */
 806
 807       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 808       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 809       unsigned int i;
 810       rtx_insn *last;
 811
 812       /* This is the mode we must force value to, so that there will be enough
 813          subwords to extract.  Note that fieldmode will often (always?) be
 814          VOIDmode, because that is what store_field uses to indicate that this
 815          is a bit field, but passing VOIDmode to operand_subword_force
 816          is not allowed.  */
 817       fieldmode = GET_MODE (value);
 818       if (fieldmode == VOIDmode)
 819         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 820
 821       last = get_last_insn ();
 822       for (i = 0; i < nwords; i++)
 823         {
 824           /* If I is 0, use the low-order word in both field and target;
 825              if I is 1, use the next to lowest word; and so on.  */
 826           unsigned int wordnum = (backwards
 827                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 828                                   - i - 1
 829                                   : i);
 830           unsigned int bit_offset = (backwards
 831                                      ? MAX ((int) bitsize - ((int) i + 1)
 832                                             * BITS_PER_WORD,
 833                                             0)
 834                                      : (int) i * BITS_PER_WORD);
 835           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 836           unsigned HOST_WIDE_INT new_bitsize =
 837             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 838
 839           /* If the remaining chunk doesn't have full wordsize we have
 840              to make sure that for big endian machines the higher order
 841              bits are used.  */
 842           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 843             value_word = simplify_expand_binop (word_mode, lshr_optab,
 844                                                 value_word,
 845                                                 GEN_INT (BITS_PER_WORD
 846                                                          - new_bitsize),
 847                                                 NULL_RTX, true,
 848                                                 OPTAB_LIB_WIDEN);
 849
 850           if (!store_bit_field_1 (op0, new_bitsize,
 851                                   bitnum + bit_offset,
 852                                   bitregion_start, bitregion_end,
 853                                   word_mode,
 854                                   value_word, fallback_p))
 855             {
 856               delete_insns_since (last);
 857               return false;
 858             }
 859         }
 860       return true;
 861     }
 862
 863   /* If VALUE has a floating-point or complex mode, access it as an
 864      integer of the corresponding size.  This can occur on a machine
 865      with 64 bit registers that uses SFmode for float.  It can also
 866      occur for unaligned float or complex fields.  */
 867   orig_value = value;
 868   if (GET_MODE (value) != VOIDmode
 869       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 870       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 871     {
 872       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 873       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 874     }
 875
 876   /* If OP0 is a multi-word register, narrow it to the affected word.
 877      If the region spans two words, defer to store_split_bit_field.  */
 878   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 879     {
 880       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 881                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 882       gcc_assert (op0);
 883       bitnum %= BITS_PER_WORD;
 884       if (bitnum + bitsize > BITS_PER_WORD)
 885         {
 886           if (!fallback_p)
 887             return false;
 888
 889           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 890                                  bitregion_end, value);
 891           return true;
 892         }
 893     }
 894
 895   /* From here on we can assume that the field to be stored in fits
 896      within a word.  If the destination is a register, it too fits
 897      in a word.  */
 898
 899   extraction_insn insv;
 900   if (!MEM_P (op0)
 901       && get_best_reg_extraction_insn (&insv, EP_insv,
 902                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 903                                        fieldmode)
 904       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 905     return true;
 906
 907   /* If OP0 is a memory, try copying it to a register and seeing if a
 908      cheap register alternative is available.  */
 909   if (MEM_P (op0))
 910     {
 911       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 912                                         fieldmode)
 913           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 914         return true;
 915
 916       rtx_insn *last = get_last_insn ();
 917
 918       /* Try loading part of OP0 into a register, inserting the bitfield
 919          into that, and then copying the result back to OP0.  */
 920       unsigned HOST_WIDE_INT bitpos;
 921       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 922                                                bitregion_start, bitregion_end,
 923                                                fieldmode, &bitpos);
 924       if (xop0)
 925         {
 926           rtx tempreg = copy_to_reg (xop0);
 927           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 928                                  bitregion_start, bitregion_end,
 929                                  fieldmode, orig_value, false))
 930             {
 931               emit_move_insn (xop0, tempreg);
 932               return true;
 933             }
 934           delete_insns_since (last);
 935         }
 936     }
 937
 938   if (!fallback_p)
 939     return false;
 940
 941   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 942                          bitregion_end, value);
 943   return true;
 944 }
 945
 946 /* Generate code to store value from rtx VALUE
 947    into a bit-field within structure STR_RTX
 948    containing BITSIZE bits starting at bit BITNUM.
 949
 950    BITREGION_START is bitpos of the first bitfield in this region.
 951    BITREGION_END is the bitpos of the ending bitfield in this region.
 952    These two fields are 0, if the C++ memory model does not apply,
 953    or we are not interested in keeping track of bitfield regions.
 954
 955    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 956
 957 void
 958 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 959                  unsigned HOST_WIDE_INT bitnum,
 960                  unsigned HOST_WIDE_INT bitregion_start,
 961                  unsigned HOST_WIDE_INT bitregion_end,
 962                  machine_mode fieldmode,
 963                  rtx value)
 964 {
 965   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 966   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 967                                   bitregion_start, bitregion_end))
 968     {
 969       /* Storing of a full word can be done with a simple store.
 970          We know here that the field can be accessed with one single
 971          instruction.  For targets that support unaligned memory,
 972          an unaligned access may be necessary.  */
 973       if (bitsize == GET_MODE_BITSIZE (fieldmode))
 974         {
 975           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 976                                              bitnum / BITS_PER_UNIT);
 977           gcc_assert (bitnum % BITS_PER_UNIT == 0);
 978           emit_move_insn (str_rtx, value);
 979         }
 980       else
 981         {
 982           rtx temp;
 983
 984           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 985                                           &bitnum);
 986           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
 987           temp = copy_to_reg (str_rtx);
 988           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
 989                                   fieldmode, value, true))
 990             gcc_unreachable ();
 991
 992           emit_move_insn (str_rtx, temp);
 993         }
 994
 995       return;
 996     }
 997
 998   /* Under the C++0x memory model, we must not touch bits outside the
 999      bit region.  Adjust the address to start at the beginning of the
1000      bit region.  */
1001   if (MEM_P (str_rtx) && bitregion_start > 0)
1002     {
1003       machine_mode bestmode;
1004       HOST_WIDE_INT offset, size;
1005
1006       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1007
1008       offset = bitregion_start / BITS_PER_UNIT;
1009       bitnum -= bitregion_start;
1010       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1011       bitregion_end -= bitregion_start;
1012       bitregion_start = 0;
1013       bestmode = get_best_mode (bitsize, bitnum,
1014                                 bitregion_start, bitregion_end,
1015                                 MEM_ALIGN (str_rtx), VOIDmode,
1016                                 MEM_VOLATILE_P (str_rtx));
1017       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1018     }
1019
1020   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1021                           bitregion_start, bitregion_end,
1022                           fieldmode, value, true))
1023     gcc_unreachable ();
1024 }
1025 \f
1026 /* Use shifts and boolean operations to store VALUE into a bit field of
1027    width BITSIZE in OP0, starting at bit BITNUM.  */
1028
1029 static void
1030 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1031                        unsigned HOST_WIDE_INT bitnum,
1032                        unsigned HOST_WIDE_INT bitregion_start,
1033                        unsigned HOST_WIDE_INT bitregion_end,
1034                        rtx value)
1035 {
1036   /* There is a case not handled here:
1037      a structure with a known alignment of just a halfword
1038      and a field split across two aligned halfwords within the structure.
1039      Or likewise a structure with a known alignment of just a byte
1040      and a field split across two bytes.
1041      Such cases are not supposed to be able to occur.  */
1042
1043   if (MEM_P (op0))
1044     {
1045       machine_mode mode = GET_MODE (op0);
1046       if (GET_MODE_BITSIZE (mode) == 0
1047           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1048         mode = word_mode;
1049       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1050                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1051
1052       if (mode == VOIDmode)
1053         {
1054           /* The only way this should occur is if the field spans word
1055              boundaries.  */
1056           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1057                                  bitregion_end, value);
1058           return;
1059         }
1060
1061       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1062     }
1063
1064   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1065 }
1066
1067 /* Helper function for store_fixed_bit_field, stores
1068    the bit field always using the MODE of OP0.  */
1069
1070 static void
1071 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1072                          unsigned HOST_WIDE_INT bitnum,
1073                          rtx value)
1074 {
1075   machine_mode mode;
1076   rtx temp;
1077   int all_zero = 0;
1078   int all_one = 0;
1079
1080   mode = GET_MODE (op0);
1081   gcc_assert (SCALAR_INT_MODE_P (mode));
1082
1083   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1084      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1085
1086   if (BYTES_BIG_ENDIAN)
1087     /* BITNUM is the distance between our msb
1088        and that of the containing datum.
1089        Convert it to the distance from the lsb.  */
1090     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1091
1092   /* Now BITNUM is always the distance between our lsb
1093      and that of OP0.  */
1094
1095   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1096      we must first convert its mode to MODE.  */
1097
1098   if (CONST_INT_P (value))
1099     {
1100       unsigned HOST_WIDE_INT v = UINTVAL (value);
1101
1102       if (bitsize < HOST_BITS_PER_WIDE_INT)
1103         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1104
1105       if (v == 0)
1106         all_zero = 1;
1107       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1108                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1109                || (bitsize == HOST_BITS_PER_WIDE_INT
1110                    && v == (unsigned HOST_WIDE_INT) -1))
1111         all_one = 1;
1112
1113       value = lshift_value (mode, v, bitnum);
1114     }
1115   else
1116     {
1117       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1118                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1119
1120       if (GET_MODE (value) != mode)
1121         value = convert_to_mode (mode, value, 1);
1122
1123       if (must_and)
1124         value = expand_binop (mode, and_optab, value,
1125                               mask_rtx (mode, 0, bitsize, 0),
1126                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1127       if (bitnum > 0)
1128         value = expand_shift (LSHIFT_EXPR, mode, value,
1129                               bitnum, NULL_RTX, 1);
1130     }
1131
1132   /* Now clear the chosen bits in OP0,
1133      except that if VALUE is -1 we need not bother.  */
1134   /* We keep the intermediates in registers to allow CSE to combine
1135      consecutive bitfield assignments.  */
1136
1137   temp = force_reg (mode, op0);
1138
1139   if (! all_one)
1140     {
1141       temp = expand_binop (mode, and_optab, temp,
1142                            mask_rtx (mode, bitnum, bitsize, 1),
1143                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1144       temp = force_reg (mode, temp);
1145     }
1146
1147   /* Now logical-or VALUE into OP0, unless it is zero.  */
1148
1149   if (! all_zero)
1150     {
1151       temp = expand_binop (mode, ior_optab, temp, value,
1152                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1153       temp = force_reg (mode, temp);
1154     }
1155
1156   if (op0 != temp)
1157     {
1158       op0 = copy_rtx (op0);
1159       emit_move_insn (op0, temp);
1160     }
1161 }
1162 \f
1163 /* Store a bit field that is split across multiple accessible memory objects.
1164
1165    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1166    BITSIZE is the field width; BITPOS the position of its first bit
1167    (within the word).
1168    VALUE is the value to store.
1169
1170    This does not yet handle fields wider than BITS_PER_WORD.  */
1171
1172 static void
1173 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1174                        unsigned HOST_WIDE_INT bitpos,
1175                        unsigned HOST_WIDE_INT bitregion_start,
1176                        unsigned HOST_WIDE_INT bitregion_end,
1177                        rtx value)
1178 {
1179   unsigned int unit;
1180   unsigned int bitsdone = 0;
1181
1182   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1183      much at a time.  */
1184   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1185     unit = BITS_PER_WORD;
1186   else
1187     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1188
1189   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1190      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1191      again, and we will mutually recurse forever.  */
1192   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1193     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1194
1195   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1196      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1197      that VALUE might be a floating-point constant.  */
1198   if (CONSTANT_P (value) && !CONST_INT_P (value))
1199     {
1200       rtx word = gen_lowpart_common (word_mode, value);
1201
1202       if (word && (value != word))
1203         value = word;
1204       else
1205         value = gen_lowpart_common (word_mode,
1206                                     force_reg (GET_MODE (value) != VOIDmode
1207                                                ? GET_MODE (value)
1208                                                : word_mode, value));
1209     }
1210
1211   while (bitsdone < bitsize)
1212     {
1213       unsigned HOST_WIDE_INT thissize;
1214       rtx part, word;
1215       unsigned HOST_WIDE_INT thispos;
1216       unsigned HOST_WIDE_INT offset;
1217
1218       offset = (bitpos + bitsdone) / unit;
1219       thispos = (bitpos + bitsdone) % unit;
1220
1221       /* When region of bytes we can touch is restricted, decrease
1222          UNIT close to the end of the region as needed.  If op0 is a REG
1223          or SUBREG of REG, don't do this, as there can't be data races
1224          on a register and we can expand shorter code in some cases.  */
1225       if (bitregion_end
1226           && unit > BITS_PER_UNIT
1227           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1228           && !REG_P (op0)
1229           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1230         {
1231           unit = unit / 2;
1232           continue;
1233         }
1234
1235       /* THISSIZE must not overrun a word boundary.  Otherwise,
1236          store_fixed_bit_field will call us again, and we will mutually
1237          recurse forever.  */
1238       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1239       thissize = MIN (thissize, unit - thispos);
1240
1241       if (BYTES_BIG_ENDIAN)
1242         {
1243           /* Fetch successively less significant portions.  */
1244           if (CONST_INT_P (value))
1245             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1246                              >> (bitsize - bitsdone - thissize))
1247                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1248           else
1249             {
1250               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1251               /* The args are chosen so that the last part includes the
1252                  lsb.  Give extract_bit_field the value it needs (with
1253                  endianness compensation) to fetch the piece we want.  */
1254               part = extract_fixed_bit_field (word_mode, value, thissize,
1255                                               total_bits - bitsize + bitsdone,
1256                                               NULL_RTX, 1);
1257             }
1258         }
1259       else
1260         {
1261           /* Fetch successively more significant portions.  */
1262           if (CONST_INT_P (value))
1263             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1264                              >> bitsdone)
1265                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1266           else
1267             part = extract_fixed_bit_field (word_mode, value, thissize,
1268                                             bitsdone, NULL_RTX, 1);
1269         }
1270
1271       /* If OP0 is a register, then handle OFFSET here.
1272
1273          When handling multiword bitfields, extract_bit_field may pass
1274          down a word_mode SUBREG of a larger REG for a bitfield that actually
1275          crosses a word boundary.  Thus, for a SUBREG, we must find
1276          the current word starting from the base register.  */
1277       if (GET_CODE (op0) == SUBREG)
1278         {
1279           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1280                             + (offset * unit / BITS_PER_WORD);
1281           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1282           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1283             word = word_offset ? const0_rtx : op0;
1284           else
1285             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1286                                           GET_MODE (SUBREG_REG (op0)));
1287           offset &= BITS_PER_WORD / unit - 1;
1288         }
1289       else if (REG_P (op0))
1290         {
1291           machine_mode op0_mode = GET_MODE (op0);
1292           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1293             word = offset ? const0_rtx : op0;
1294           else
1295             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1296                                           GET_MODE (op0));
1297           offset &= BITS_PER_WORD / unit - 1;
1298         }
1299       else
1300         word = op0;
1301
1302       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1303          it is just an out-of-bounds access.  Ignore it.  */
1304       if (word != const0_rtx)
1305         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1306                                bitregion_start, bitregion_end, part);
1307       bitsdone += thissize;
1308     }
1309 }
1310 \f
1311 /* A subroutine of extract_bit_field_1 that converts return value X
1312    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1313    to extract_bit_field.  */
1314
1315 static rtx
1316 convert_extracted_bit_field (rtx x, machine_mode mode,
1317                              machine_mode tmode, bool unsignedp)
1318 {
1319   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1320     return x;
1321
1322   /* If the x mode is not a scalar integral, first convert to the
1323      integer mode of that size and then access it as a floating-point
1324      value via a SUBREG.  */
1325   if (!SCALAR_INT_MODE_P (tmode))
1326     {
1327       machine_mode smode;
1328
1329       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1330       x = convert_to_mode (smode, x, unsignedp);
1331       x = force_reg (smode, x);
1332       return gen_lowpart (tmode, x);
1333     }
1334
1335   return convert_to_mode (tmode, x, unsignedp);
1336 }
1337
1338 /* Try to use an ext(z)v pattern to extract a field from OP0.
1339    Return the extracted value on success, otherwise return null.
1340    EXT_MODE is the mode of the extraction and the other arguments
1341    are as for extract_bit_field.  */
1342
1343 static rtx
1344 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1345                               unsigned HOST_WIDE_INT bitsize,
1346                               unsigned HOST_WIDE_INT bitnum,
1347                               int unsignedp, rtx target,
1348                               machine_mode mode, machine_mode tmode)
1349 {
1350   struct expand_operand ops[4];
1351   rtx spec_target = target;
1352   rtx spec_target_subreg = 0;
1353   machine_mode ext_mode = extv->field_mode;
1354   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1355
1356   if (bitsize == 0 || unit < bitsize)
1357     return NULL_RTX;
1358
1359   if (MEM_P (op0))
1360     /* Get a reference to the first byte of the field.  */
1361     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1362                                 &bitnum);
1363   else
1364     {
1365       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1366       if (BYTES_BIG_ENDIAN)
1367         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1368
1369       /* If op0 is a register, we need it in EXT_MODE to make it
1370          acceptable to the format of ext(z)v.  */
1371       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1372         return NULL_RTX;
1373       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1374         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1375     }
1376
1377   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1378      "backwards" from the size of the unit we are extracting from.
1379      Otherwise, we count bits from the most significant on a
1380      BYTES/BITS_BIG_ENDIAN machine.  */
1381
1382   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1383     bitnum = unit - bitsize - bitnum;
1384
1385   if (target == 0)
1386     target = spec_target = gen_reg_rtx (tmode);
1387
1388   if (GET_MODE (target) != ext_mode)
1389     {
1390       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1391          between the mode of the extraction (word_mode) and the target
1392          mode.  Instead, create a temporary and use convert_move to set
1393          the target.  */
1394       if (REG_P (target)
1395           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1396         {
1397           target = gen_lowpart (ext_mode, target);
1398           if (GET_MODE_PRECISION (ext_mode)
1399               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1400             spec_target_subreg = target;
1401         }
1402       else
1403         target = gen_reg_rtx (ext_mode);
1404     }
1405
1406   create_output_operand (&ops[0], target, ext_mode);
1407   create_fixed_operand (&ops[1], op0);
1408   create_integer_operand (&ops[2], bitsize);
1409   create_integer_operand (&ops[3], bitnum);
1410   if (maybe_expand_insn (extv->icode, 4, ops))
1411     {
1412       target = ops[0].value;
1413       if (target == spec_target)
1414         return target;
1415       if (target == spec_target_subreg)
1416         return spec_target;
1417       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1418     }
1419   return NULL_RTX;
1420 }
1421
1422 /* A subroutine of extract_bit_field, with the same arguments.
1423    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1424    if we can find no other means of implementing the operation.
1425    if FALLBACK_P is false, return NULL instead.  */
1426
1427 static rtx
1428 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1429                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1430                      machine_mode mode, machine_mode tmode,
1431                      bool fallback_p)
1432 {
1433   rtx op0 = str_rtx;
1434   machine_mode int_mode;
1435   machine_mode mode1;
1436
1437   if (tmode == VOIDmode)
1438     tmode = mode;
1439
1440   while (GET_CODE (op0) == SUBREG)
1441     {
1442       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1443       op0 = SUBREG_REG (op0);
1444     }
1445
1446   /* If we have an out-of-bounds access to a register, just return an
1447      uninitialized register of the required mode.  This can occur if the
1448      source code contains an out-of-bounds access to a small array.  */
1449   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1450     return gen_reg_rtx (tmode);
1451
1452   if (REG_P (op0)
1453       && mode == GET_MODE (op0)
1454       && bitnum == 0
1455       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1456     {
1457       /* We're trying to extract a full register from itself.  */
1458       return op0;
1459     }
1460
1461   /* See if we can get a better vector mode before extracting.  */
1462   if (VECTOR_MODE_P (GET_MODE (op0))
1463       && !MEM_P (op0)
1464       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1465     {
1466       machine_mode new_mode;
1467
1468       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1469         new_mode = MIN_MODE_VECTOR_FLOAT;
1470       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1471         new_mode = MIN_MODE_VECTOR_FRACT;
1472       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1473         new_mode = MIN_MODE_VECTOR_UFRACT;
1474       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1475         new_mode = MIN_MODE_VECTOR_ACCUM;
1476       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1477         new_mode = MIN_MODE_VECTOR_UACCUM;
1478       else
1479         new_mode = MIN_MODE_VECTOR_INT;
1480
1481       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1482         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1483             && targetm.vector_mode_supported_p (new_mode))
1484           break;
1485       if (new_mode != VOIDmode)
1486         op0 = gen_lowpart (new_mode, op0);
1487     }
1488
1489   /* Use vec_extract patterns for extracting parts of vectors whenever
1490      available.  */
1491   if (VECTOR_MODE_P (GET_MODE (op0))
1492       && !MEM_P (op0)
1493       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1494       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1495           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1496     {
1497       struct expand_operand ops[3];
1498       machine_mode outermode = GET_MODE (op0);
1499       machine_mode innermode = GET_MODE_INNER (outermode);
1500       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1501       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1502
1503       create_output_operand (&ops[0], target, innermode);
1504       create_input_operand (&ops[1], op0, outermode);
1505       create_integer_operand (&ops[2], pos);
1506       if (maybe_expand_insn (icode, 3, ops))
1507         {
1508           target = ops[0].value;
1509           if (GET_MODE (target) != mode)
1510             return gen_lowpart (tmode, target);
1511           return target;
1512         }
1513     }
1514
1515   /* Make sure we are playing with integral modes.  Pun with subregs
1516      if we aren't.  */
1517   {
1518     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1519     if (imode != GET_MODE (op0))
1520       {
1521         if (MEM_P (op0))
1522           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1523         else if (imode != BLKmode)
1524           {
1525             op0 = gen_lowpart (imode, op0);
1526
1527             /* If we got a SUBREG, force it into a register since we
1528                aren't going to be able to do another SUBREG on it.  */
1529             if (GET_CODE (op0) == SUBREG)
1530               op0 = force_reg (imode, op0);
1531           }
1532         else if (REG_P (op0))
1533           {
1534             rtx reg, subreg;
1535             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1536                                             MODE_INT);
1537             reg = gen_reg_rtx (imode);
1538             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1539             emit_move_insn (subreg, op0);
1540             op0 = reg;
1541             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1542           }
1543         else
1544           {
1545             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1546             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1547             emit_move_insn (mem, op0);
1548             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1549           }
1550       }
1551   }
1552
1553   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1554      If that's wrong, the solution is to test for it and set TARGET to 0
1555      if needed.  */
1556
1557   /* Get the mode of the field to use for atomic access or subreg
1558      conversion.  */
1559   mode1 = mode;
1560   if (SCALAR_INT_MODE_P (tmode))
1561     {
1562       machine_mode try_mode = mode_for_size (bitsize,
1563                                                   GET_MODE_CLASS (tmode), 0);
1564       if (try_mode != BLKmode)
1565         mode1 = try_mode;
1566     }
1567   gcc_assert (mode1 != BLKmode);
1568
1569   /* Extraction of a full MODE1 value can be done with a subreg as long
1570      as the least significant bit of the value is the least significant
1571      bit of either OP0 or a word of OP0.  */
1572   if (!MEM_P (op0)
1573       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1574       && bitsize == GET_MODE_BITSIZE (mode1)
1575       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1576     {
1577       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1578                                      bitnum / BITS_PER_UNIT);
1579       if (sub)
1580         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1581     }
1582
1583   /* Extraction of a full MODE1 value can be done with a load as long as
1584      the field is on a byte boundary and is sufficiently aligned.  */
1585   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1586     {
1587       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1588       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1589     }
1590
1591   /* Handle fields bigger than a word.  */
1592
1593   if (bitsize > BITS_PER_WORD)
1594     {
1595       /* Here we transfer the words of the field
1596          in the order least significant first.
1597          This is because the most significant word is the one which may
1598          be less than full.  */
1599
1600       unsigned int backwards = WORDS_BIG_ENDIAN;
1601       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1602       unsigned int i;
1603       rtx_insn *last;
1604
1605       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1606         target = gen_reg_rtx (mode);
1607
1608       /* In case we're about to clobber a base register or something
1609          (see gcc.c-torture/execute/20040625-1.c).   */
1610       if (reg_mentioned_p (target, str_rtx))
1611         target = gen_reg_rtx (mode);
1612
1613       /* Indicate for flow that the entire target reg is being set.  */
1614       emit_clobber (target);
1615
1616       last = get_last_insn ();
1617       for (i = 0; i < nwords; i++)
1618         {
1619           /* If I is 0, use the low-order word in both field and target;
1620              if I is 1, use the next to lowest word; and so on.  */
1621           /* Word number in TARGET to use.  */
1622           unsigned int wordnum
1623             = (backwards
1624                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1625                : i);
1626           /* Offset from start of field in OP0.  */
1627           unsigned int bit_offset = (backwards
1628                                      ? MAX ((int) bitsize - ((int) i + 1)
1629                                             * BITS_PER_WORD,
1630                                             0)
1631                                      : (int) i * BITS_PER_WORD);
1632           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1633           rtx result_part
1634             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1635                                              bitsize - i * BITS_PER_WORD),
1636                                    bitnum + bit_offset, 1, target_part,
1637                                    mode, word_mode, fallback_p);
1638
1639           gcc_assert (target_part);
1640           if (!result_part)
1641             {
1642               delete_insns_since (last);
1643               return NULL;
1644             }
1645
1646           if (result_part != target_part)
1647             emit_move_insn (target_part, result_part);
1648         }
1649
1650       if (unsignedp)
1651         {
1652           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1653              need to be zero'd out.  */
1654           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1655             {
1656               unsigned int i, total_words;
1657
1658               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1659               for (i = nwords; i < total_words; i++)
1660                 emit_move_insn
1661                   (operand_subword (target,
1662                                     backwards ? total_words - i - 1 : i,
1663                                     1, VOIDmode),
1664                    const0_rtx);
1665             }
1666           return target;
1667         }
1668
1669       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1670       target = expand_shift (LSHIFT_EXPR, mode, target,
1671                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1672       return expand_shift (RSHIFT_EXPR, mode, target,
1673                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1674     }
1675
1676   /* If OP0 is a multi-word register, narrow it to the affected word.
1677      If the region spans two words, defer to extract_split_bit_field.  */
1678   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1679     {
1680       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1681                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1682       bitnum %= BITS_PER_WORD;
1683       if (bitnum + bitsize > BITS_PER_WORD)
1684         {
1685           if (!fallback_p)
1686             return NULL_RTX;
1687           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1688           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1689         }
1690     }
1691
1692   /* From here on we know the desired field is smaller than a word.
1693      If OP0 is a register, it too fits within a word.  */
1694   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1695   extraction_insn extv;
1696   if (!MEM_P (op0)
1697       /* ??? We could limit the structure size to the part of OP0 that
1698          contains the field, with appropriate checks for endianness
1699          and TRULY_NOOP_TRUNCATION.  */
1700       && get_best_reg_extraction_insn (&extv, pattern,
1701                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1702                                        tmode))
1703     {
1704       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1705                                                  unsignedp, target, mode,
1706                                                  tmode);
1707       if (result)
1708         return result;
1709     }
1710
1711   /* If OP0 is a memory, try copying it to a register and seeing if a
1712      cheap register alternative is available.  */
1713   if (MEM_P (op0))
1714     {
1715       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1716                                         tmode))
1717         {
1718           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1719                                                      bitnum, unsignedp,
1720                                                      target, mode,
1721                                                      tmode);
1722           if (result)
1723             return result;
1724         }
1725
1726       rtx_insn *last = get_last_insn ();
1727
1728       /* Try loading part of OP0 into a register and extracting the
1729          bitfield from that.  */
1730       unsigned HOST_WIDE_INT bitpos;
1731       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1732                                                0, 0, tmode, &bitpos);
1733       if (xop0)
1734         {
1735           xop0 = copy_to_reg (xop0);
1736           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1737                                             unsignedp, target,
1738                                             mode, tmode, false);
1739           if (result)
1740             return result;
1741           delete_insns_since (last);
1742         }
1743     }
1744
1745   if (!fallback_p)
1746     return NULL;
1747
1748   /* Find a correspondingly-sized integer field, so we can apply
1749      shifts and masks to it.  */
1750   int_mode = int_mode_for_mode (tmode);
1751   if (int_mode == BLKmode)
1752     int_mode = int_mode_for_mode (mode);
1753   /* Should probably push op0 out to memory and then do a load.  */
1754   gcc_assert (int_mode != BLKmode);
1755
1756   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1757                                     target, unsignedp);
1758   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1759 }
1760
1761 /* Generate code to extract a byte-field from STR_RTX
1762    containing BITSIZE bits, starting at BITNUM,
1763    and put it in TARGET if possible (if TARGET is nonzero).
1764    Regardless of TARGET, we return the rtx for where the value is placed.
1765
1766    STR_RTX is the structure containing the byte (a REG or MEM).
1767    UNSIGNEDP is nonzero if this is an unsigned bit field.
1768    MODE is the natural mode of the field value once extracted.
1769    TMODE is the mode the caller would like the value to have;
1770    but the value may be returned with type MODE instead.
1771
1772    If a TARGET is specified and we can store in it at no extra cost,
1773    we do so, and return TARGET.
1774    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1775    if they are equally easy.  */
1776
1777 rtx
1778 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1779                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1780                    machine_mode mode, machine_mode tmode)
1781 {
1782   machine_mode mode1;
1783
1784   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1785   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1786     mode1 = GET_MODE (str_rtx);
1787   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1788     mode1 = GET_MODE (target);
1789   else
1790     mode1 = tmode;
1791
1792   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1793     {
1794       /* Extraction of a full MODE1 value can be done with a simple load.
1795          We know here that the field can be accessed with one single
1796          instruction.  For targets that support unaligned memory,
1797          an unaligned access may be necessary.  */
1798       if (bitsize == GET_MODE_BITSIZE (mode1))
1799         {
1800           rtx result = adjust_bitfield_address (str_rtx, mode1,
1801                                                 bitnum / BITS_PER_UNIT);
1802           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1803           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1804         }
1805
1806       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1807                                       &bitnum);
1808       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1809       str_rtx = copy_to_reg (str_rtx);
1810     }
1811
1812   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1813                               target, mode, tmode, true);
1814 }
1815 \f
1816 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1817    from bit BITNUM of OP0.
1818
1819    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1820    If TARGET is nonzero, attempts to store the value there
1821    and return TARGET, but this is not guaranteed.
1822    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1823
1824 static rtx
1825 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1826                          unsigned HOST_WIDE_INT bitsize,
1827                          unsigned HOST_WIDE_INT bitnum, rtx target,
1828                          int unsignedp)
1829 {
1830   if (MEM_P (op0))
1831     {
1832       machine_mode mode
1833         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1834                          MEM_VOLATILE_P (op0));
1835
1836       if (mode == VOIDmode)
1837         /* The only way this should occur is if the field spans word
1838            boundaries.  */
1839         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1840
1841       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1842     }
1843
1844   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1845                                     target, unsignedp);
1846 }
1847
1848 /* Helper function for extract_fixed_bit_field, extracts
1849    the bit field always using the MODE of OP0.  */
1850
1851 static rtx
1852 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1853                            unsigned HOST_WIDE_INT bitsize,
1854                            unsigned HOST_WIDE_INT bitnum, rtx target,
1855                            int unsignedp)
1856 {
1857   machine_mode mode = GET_MODE (op0);
1858   gcc_assert (SCALAR_INT_MODE_P (mode));
1859
1860   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1861      for invalid input, such as extract equivalent of f5 from
1862      gcc.dg/pr48335-2.c.  */
1863
1864   if (BYTES_BIG_ENDIAN)
1865     /* BITNUM is the distance between our msb and that of OP0.
1866        Convert it to the distance from the lsb.  */
1867     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1868
1869   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1870      We have reduced the big-endian case to the little-endian case.  */
1871
1872   if (unsignedp)
1873     {
1874       if (bitnum)
1875         {
1876           /* If the field does not already start at the lsb,
1877              shift it so it does.  */
1878           /* Maybe propagate the target for the shift.  */
1879           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1880           if (tmode != mode)
1881             subtarget = 0;
1882           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1883         }
1884       /* Convert the value to the desired mode.  */
1885       if (mode != tmode)
1886         op0 = convert_to_mode (tmode, op0, 1);
1887
1888       /* Unless the msb of the field used to be the msb when we shifted,
1889          mask out the upper bits.  */
1890
1891       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1892         return expand_binop (GET_MODE (op0), and_optab, op0,
1893                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1894                              target, 1, OPTAB_LIB_WIDEN);
1895       return op0;
1896     }
1897
1898   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1899      then arithmetic-shift its lsb to the lsb of the word.  */
1900   op0 = force_reg (mode, op0);
1901
1902   /* Find the narrowest integer mode that contains the field.  */
1903
1904   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1905        mode = GET_MODE_WIDER_MODE (mode))
1906     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1907       {
1908         op0 = convert_to_mode (mode, op0, 0);
1909         break;
1910       }
1911
1912   if (mode != tmode)
1913     target = 0;
1914
1915   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1916     {
1917       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1918       /* Maybe propagate the target for the shift.  */
1919       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1920       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1921     }
1922
1923   return expand_shift (RSHIFT_EXPR, mode, op0,
1924                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1925 }
1926
1927 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1928    VALUE << BITPOS.  */
1929
1930 static rtx
1931 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1932               int bitpos)
1933 {
1934   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1935 }
1936 \f
1937 /* Extract a bit field that is split across two words
1938    and return an RTX for the result.
1939
1940    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1941    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1942    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1943
1944 static rtx
1945 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1946                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1947 {
1948   unsigned int unit;
1949   unsigned int bitsdone = 0;
1950   rtx result = NULL_RTX;
1951   int first = 1;
1952
1953   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1954      much at a time.  */
1955   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1956     unit = BITS_PER_WORD;
1957   else
1958     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1959
1960   while (bitsdone < bitsize)
1961     {
1962       unsigned HOST_WIDE_INT thissize;
1963       rtx part, word;
1964       unsigned HOST_WIDE_INT thispos;
1965       unsigned HOST_WIDE_INT offset;
1966
1967       offset = (bitpos + bitsdone) / unit;
1968       thispos = (bitpos + bitsdone) % unit;
1969
1970       /* THISSIZE must not overrun a word boundary.  Otherwise,
1971          extract_fixed_bit_field will call us again, and we will mutually
1972          recurse forever.  */
1973       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1974       thissize = MIN (thissize, unit - thispos);
1975
1976       /* If OP0 is a register, then handle OFFSET here.
1977
1978          When handling multiword bitfields, extract_bit_field may pass
1979          down a word_mode SUBREG of a larger REG for a bitfield that actually
1980          crosses a word boundary.  Thus, for a SUBREG, we must find
1981          the current word starting from the base register.  */
1982       if (GET_CODE (op0) == SUBREG)
1983         {
1984           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1985           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1986                                         GET_MODE (SUBREG_REG (op0)));
1987           offset = 0;
1988         }
1989       else if (REG_P (op0))
1990         {
1991           word = operand_subword_force (op0, offset, GET_MODE (op0));
1992           offset = 0;
1993         }
1994       else
1995         word = op0;
1996
1997       /* Extract the parts in bit-counting order,
1998          whose meaning is determined by BYTES_PER_UNIT.
1999          OFFSET is in UNITs, and UNIT is in bits.  */
2000       part = extract_fixed_bit_field (word_mode, word, thissize,
2001                                       offset * unit + thispos, 0, 1);
2002       bitsdone += thissize;
2003
2004       /* Shift this part into place for the result.  */
2005       if (BYTES_BIG_ENDIAN)
2006         {
2007           if (bitsize != bitsdone)
2008             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2009                                  bitsize - bitsdone, 0, 1);
2010         }
2011       else
2012         {
2013           if (bitsdone != thissize)
2014             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2015                                  bitsdone - thissize, 0, 1);
2016         }
2017
2018       if (first)
2019         result = part;
2020       else
2021         /* Combine the parts with bitwise or.  This works
2022            because we extracted each part as an unsigned bit field.  */
2023         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2024                                OPTAB_LIB_WIDEN);
2025
2026       first = 0;
2027     }
2028
2029   /* Unsigned bit field: we are done.  */
2030   if (unsignedp)
2031     return result;
2032   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2033   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2034                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2035   return expand_shift (RSHIFT_EXPR, word_mode, result,
2036                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2037 }
2038 \f
2039 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2040    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2041    MODE, fill the upper bits with zeros.  Fail if the layout of either
2042    mode is unknown (as for CC modes) or if the extraction would involve
2043    unprofitable mode punning.  Return the value on success, otherwise
2044    return null.
2045
2046    This is different from gen_lowpart* in these respects:
2047
2048      - the returned value must always be considered an rvalue
2049
2050      - when MODE is wider than SRC_MODE, the extraction involves
2051        a zero extension
2052
2053      - when MODE is smaller than SRC_MODE, the extraction involves
2054        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2055
2056    In other words, this routine performs a computation, whereas the
2057    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2058    operations.  */
2059
2060 rtx
2061 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2062 {
2063   machine_mode int_mode, src_int_mode;
2064
2065   if (mode == src_mode)
2066     return src;
2067
2068   if (CONSTANT_P (src))
2069     {
2070       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2071          fails, it will happily create (subreg (symbol_ref)) or similar
2072          invalid SUBREGs.  */
2073       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2074       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2075       if (ret)
2076         return ret;
2077
2078       if (GET_MODE (src) == VOIDmode
2079           || !validate_subreg (mode, src_mode, src, byte))
2080         return NULL_RTX;
2081
2082       src = force_reg (GET_MODE (src), src);
2083       return gen_rtx_SUBREG (mode, src, byte);
2084     }
2085
2086   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2087     return NULL_RTX;
2088
2089   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2090       && MODES_TIEABLE_P (mode, src_mode))
2091     {
2092       rtx x = gen_lowpart_common (mode, src);
2093       if (x)
2094         return x;
2095     }
2096
2097   src_int_mode = int_mode_for_mode (src_mode);
2098   int_mode = int_mode_for_mode (mode);
2099   if (src_int_mode == BLKmode || int_mode == BLKmode)
2100     return NULL_RTX;
2101
2102   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2103     return NULL_RTX;
2104   if (!MODES_TIEABLE_P (int_mode, mode))
2105     return NULL_RTX;
2106
2107   src = gen_lowpart (src_int_mode, src);
2108   src = convert_modes (int_mode, src_int_mode, src, true);
2109   src = gen_lowpart (mode, src);
2110   return src;
2111 }
2112 \f
2113 /* Add INC into TARGET.  */
2114
2115 void
2116 expand_inc (rtx target, rtx inc)
2117 {
2118   rtx value = expand_binop (GET_MODE (target), add_optab,
2119                             target, inc,
2120                             target, 0, OPTAB_LIB_WIDEN);
2121   if (value != target)
2122     emit_move_insn (target, value);
2123 }
2124
2125 /* Subtract DEC from TARGET.  */
2126
2127 void
2128 expand_dec (rtx target, rtx dec)
2129 {
2130   rtx value = expand_binop (GET_MODE (target), sub_optab,
2131                             target, dec,
2132                             target, 0, OPTAB_LIB_WIDEN);
2133   if (value != target)
2134     emit_move_insn (target, value);
2135 }
2136 \f
2137 /* Output a shift instruction for expression code CODE,
2138    with SHIFTED being the rtx for the value to shift,
2139    and AMOUNT the rtx for the amount to shift by.
2140    Store the result in the rtx TARGET, if that is convenient.
2141    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2142    Return the rtx for where the value is.  */
2143
2144 static rtx
2145 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2146                 rtx amount, rtx target, int unsignedp)
2147 {
2148   rtx op1, temp = 0;
2149   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2150   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2151   optab lshift_optab = ashl_optab;
2152   optab rshift_arith_optab = ashr_optab;
2153   optab rshift_uns_optab = lshr_optab;
2154   optab lrotate_optab = rotl_optab;
2155   optab rrotate_optab = rotr_optab;
2156   machine_mode op1_mode;
2157   machine_mode scalar_mode = mode;
2158   int attempt;
2159   bool speed = optimize_insn_for_speed_p ();
2160
2161   if (VECTOR_MODE_P (mode))
2162     scalar_mode = GET_MODE_INNER (mode);
2163   op1 = amount;
2164   op1_mode = GET_MODE (op1);
2165
2166   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2167      shift amount is a vector, use the vector/vector shift patterns.  */
2168   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2169     {
2170       lshift_optab = vashl_optab;
2171       rshift_arith_optab = vashr_optab;
2172       rshift_uns_optab = vlshr_optab;
2173       lrotate_optab = vrotl_optab;
2174       rrotate_optab = vrotr_optab;
2175     }
2176
2177   /* Previously detected shift-counts computed by NEGATE_EXPR
2178      and shifted in the other direction; but that does not work
2179      on all machines.  */
2180
2181   if (SHIFT_COUNT_TRUNCATED)
2182     {
2183       if (CONST_INT_P (op1)
2184           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2185               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2186         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2187                        % GET_MODE_BITSIZE (scalar_mode));
2188       else if (GET_CODE (op1) == SUBREG
2189                && subreg_lowpart_p (op1)
2190                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2191                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2192         op1 = SUBREG_REG (op1);
2193     }
2194
2195   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2196      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2197      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2198      amount instead.  */
2199   if (rotate
2200       && CONST_INT_P (op1)
2201       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2202                    GET_MODE_BITSIZE (scalar_mode) - 1))
2203     {
2204       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2205       left = !left;
2206       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2207     }
2208
2209   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2210      Note that this is not the case for bigger values.  For instance a rotation
2211      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2212      0x04030201 (bswapsi).  */
2213   if (rotate
2214       && CONST_INT_P (op1)
2215       && INTVAL (op1) == BITS_PER_UNIT
2216       && GET_MODE_SIZE (scalar_mode) == 2
2217       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2218     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2219                                   unsignedp);
2220
2221   if (op1 == const0_rtx)
2222     return shifted;
2223
2224   /* Check whether its cheaper to implement a left shift by a constant
2225      bit count by a sequence of additions.  */
2226   if (code == LSHIFT_EXPR
2227       && CONST_INT_P (op1)
2228       && INTVAL (op1) > 0
2229       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2230       && INTVAL (op1) < MAX_BITS_PER_WORD
2231       && (shift_cost (speed, mode, INTVAL (op1))
2232           > INTVAL (op1) * add_cost (speed, mode))
2233       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2234     {
2235       int i;
2236       for (i = 0; i < INTVAL (op1); i++)
2237         {
2238           temp = force_reg (mode, shifted);
2239           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2240                                   unsignedp, OPTAB_LIB_WIDEN);
2241         }
2242       return shifted;
2243     }
2244
2245   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2246     {
2247       enum optab_methods methods;
2248
2249       if (attempt == 0)
2250         methods = OPTAB_DIRECT;
2251       else if (attempt == 1)
2252         methods = OPTAB_WIDEN;
2253       else
2254         methods = OPTAB_LIB_WIDEN;
2255
2256       if (rotate)
2257         {
2258           /* Widening does not work for rotation.  */
2259           if (methods == OPTAB_WIDEN)
2260             continue;
2261           else if (methods == OPTAB_LIB_WIDEN)
2262             {
2263               /* If we have been unable to open-code this by a rotation,
2264                  do it as the IOR of two shifts.  I.e., to rotate A
2265                  by N bits, compute
2266                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2267                  where C is the bitsize of A.
2268
2269                  It is theoretically possible that the target machine might
2270                  not be able to perform either shift and hence we would
2271                  be making two libcalls rather than just the one for the
2272                  shift (similarly if IOR could not be done).  We will allow
2273                  this extremely unlikely lossage to avoid complicating the
2274                  code below.  */
2275
2276               rtx subtarget = target == shifted ? 0 : target;
2277               rtx new_amount, other_amount;
2278               rtx temp1;
2279
2280               new_amount = op1;
2281               if (op1 == const0_rtx)
2282                 return shifted;
2283               else if (CONST_INT_P (op1))
2284                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2285                                         - INTVAL (op1));
2286               else
2287                 {
2288                   other_amount
2289                     = simplify_gen_unary (NEG, GET_MODE (op1),
2290                                           op1, GET_MODE (op1));
2291                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2292                   other_amount
2293                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2294                                            gen_int_mode (mask, GET_MODE (op1)));
2295                 }
2296
2297               shifted = force_reg (mode, shifted);
2298
2299               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2300                                      mode, shifted, new_amount, 0, 1);
2301               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2302                                       mode, shifted, other_amount,
2303                                       subtarget, 1);
2304               return expand_binop (mode, ior_optab, temp, temp1, target,
2305                                    unsignedp, methods);
2306             }
2307
2308           temp = expand_binop (mode,
2309                                left ? lrotate_optab : rrotate_optab,
2310                                shifted, op1, target, unsignedp, methods);
2311         }
2312       else if (unsignedp)
2313         temp = expand_binop (mode,
2314                              left ? lshift_optab : rshift_uns_optab,
2315                              shifted, op1, target, unsignedp, methods);
2316
2317       /* Do arithmetic shifts.
2318          Also, if we are going to widen the operand, we can just as well
2319          use an arithmetic right-shift instead of a logical one.  */
2320       if (temp == 0 && ! rotate
2321           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2322         {
2323           enum optab_methods methods1 = methods;
2324
2325           /* If trying to widen a log shift to an arithmetic shift,
2326              don't accept an arithmetic shift of the same size.  */
2327           if (unsignedp)
2328             methods1 = OPTAB_MUST_WIDEN;
2329
2330           /* Arithmetic shift */
2331
2332           temp = expand_binop (mode,
2333                                left ? lshift_optab : rshift_arith_optab,
2334                                shifted, op1, target, unsignedp, methods1);
2335         }
2336
2337       /* We used to try extzv here for logical right shifts, but that was
2338          only useful for one machine, the VAX, and caused poor code
2339          generation there for lshrdi3, so the code was deleted and a
2340          define_expand for lshrsi3 was added to vax.md.  */
2341     }
2342
2343   gcc_assert (temp);
2344   return temp;
2345 }
2346
2347 /* Output a shift instruction for expression code CODE,
2348    with SHIFTED being the rtx for the value to shift,
2349    and AMOUNT the amount to shift by.
2350    Store the result in the rtx TARGET, if that is convenient.
2351    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2352    Return the rtx for where the value is.  */
2353
2354 rtx
2355 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2356               int amount, rtx target, int unsignedp)
2357 {
2358   return expand_shift_1 (code, mode,
2359                          shifted, GEN_INT (amount), target, unsignedp);
2360 }
2361
2362 /* Output a shift instruction for expression code CODE,
2363    with SHIFTED being the rtx for the value to shift,
2364    and AMOUNT the tree for the amount to shift by.
2365    Store the result in the rtx TARGET, if that is convenient.
2366    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2367    Return the rtx for where the value is.  */
2368
2369 rtx
2370 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2371                        tree amount, rtx target, int unsignedp)
2372 {
2373   return expand_shift_1 (code, mode,
2374                          shifted, expand_normal (amount), target, unsignedp);
2375 }
2376
2377 \f
2378 /* Indicates the type of fixup needed after a constant multiplication.
2379    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2380    the result should be negated, and ADD_VARIANT means that the
2381    multiplicand should be added to the result.  */
2382 enum mult_variant {basic_variant, negate_variant, add_variant};
2383
2384 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2385                         const struct mult_cost *, machine_mode mode);
2386 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2387                                  struct algorithm *, enum mult_variant *, int);
2388 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2389                               const struct algorithm *, enum mult_variant);
2390 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2391 static rtx extract_high_half (machine_mode, rtx);
2392 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2393 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2394                                        int, int);
2395 /* Compute and return the best algorithm for multiplying by T.
2396    The algorithm must cost less than cost_limit
2397    If retval.cost >= COST_LIMIT, no algorithm was found and all
2398    other field of the returned struct are undefined.
2399    MODE is the machine mode of the multiplication.  */
2400
2401 static void
2402 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2403             const struct mult_cost *cost_limit, machine_mode mode)
2404 {
2405   int m;
2406   struct algorithm *alg_in, *best_alg;
2407   struct mult_cost best_cost;
2408   struct mult_cost new_limit;
2409   int op_cost, op_latency;
2410   unsigned HOST_WIDE_INT orig_t = t;
2411   unsigned HOST_WIDE_INT q;
2412   int maxm, hash_index;
2413   bool cache_hit = false;
2414   enum alg_code cache_alg = alg_zero;
2415   bool speed = optimize_insn_for_speed_p ();
2416   machine_mode imode;
2417   struct alg_hash_entry *entry_ptr;
2418
2419   /* Indicate that no algorithm is yet found.  If no algorithm
2420      is found, this value will be returned and indicate failure.  */
2421   alg_out->cost.cost = cost_limit->cost + 1;
2422   alg_out->cost.latency = cost_limit->latency + 1;
2423
2424   if (cost_limit->cost < 0
2425       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2426     return;
2427
2428   /* Be prepared for vector modes.  */
2429   imode = GET_MODE_INNER (mode);
2430   if (imode == VOIDmode)
2431     imode = mode;
2432
2433   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2434
2435   /* Restrict the bits of "t" to the multiplication's mode.  */
2436   t &= GET_MODE_MASK (imode);
2437
2438   /* t == 1 can be done in zero cost.  */
2439   if (t == 1)
2440     {
2441       alg_out->ops = 1;
2442       alg_out->cost.cost = 0;
2443       alg_out->cost.latency = 0;
2444       alg_out->op[0] = alg_m;
2445       return;
2446     }
2447
2448   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2449      fail now.  */
2450   if (t == 0)
2451     {
2452       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2453         return;
2454       else
2455         {
2456           alg_out->ops = 1;
2457           alg_out->cost.cost = zero_cost (speed);
2458           alg_out->cost.latency = zero_cost (speed);
2459           alg_out->op[0] = alg_zero;
2460           return;
2461         }
2462     }
2463
2464   /* We'll be needing a couple extra algorithm structures now.  */
2465
2466   alg_in = XALLOCA (struct algorithm);
2467   best_alg = XALLOCA (struct algorithm);
2468   best_cost = *cost_limit;
2469
2470   /* Compute the hash index.  */
2471   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2472
2473   /* See if we already know what to do for T.  */
2474   entry_ptr = alg_hash_entry_ptr (hash_index);
2475   if (entry_ptr->t == t
2476       && entry_ptr->mode == mode
2477       && entry_ptr->mode == mode
2478       && entry_ptr->speed == speed
2479       && entry_ptr->alg != alg_unknown)
2480     {
2481       cache_alg = entry_ptr->alg;
2482
2483       if (cache_alg == alg_impossible)
2484         {
2485           /* The cache tells us that it's impossible to synthesize
2486              multiplication by T within entry_ptr->cost.  */
2487           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2488             /* COST_LIMIT is at least as restrictive as the one
2489                recorded in the hash table, in which case we have no
2490                hope of synthesizing a multiplication.  Just
2491                return.  */
2492             return;
2493
2494           /* If we get here, COST_LIMIT is less restrictive than the
2495              one recorded in the hash table, so we may be able to
2496              synthesize a multiplication.  Proceed as if we didn't
2497              have the cache entry.  */
2498         }
2499       else
2500         {
2501           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2502             /* The cached algorithm shows that this multiplication
2503                requires more cost than COST_LIMIT.  Just return.  This
2504                way, we don't clobber this cache entry with
2505                alg_impossible but retain useful information.  */
2506             return;
2507
2508           cache_hit = true;
2509
2510           switch (cache_alg)
2511             {
2512             case alg_shift:
2513               goto do_alg_shift;
2514
2515             case alg_add_t_m2:
2516             case alg_sub_t_m2:
2517               goto do_alg_addsub_t_m2;
2518
2519             case alg_add_factor:
2520             case alg_sub_factor:
2521               goto do_alg_addsub_factor;
2522
2523             case alg_add_t2_m:
2524               goto do_alg_add_t2_m;
2525
2526             case alg_sub_t2_m:
2527               goto do_alg_sub_t2_m;
2528
2529             default:
2530               gcc_unreachable ();
2531             }
2532         }
2533     }
2534
2535   /* If we have a group of zero bits at the low-order part of T, try
2536      multiplying by the remaining bits and then doing a shift.  */
2537
2538   if ((t & 1) == 0)
2539     {
2540     do_alg_shift:
2541       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2542       if (m < maxm)
2543         {
2544           q = t >> m;
2545           /* The function expand_shift will choose between a shift and
2546              a sequence of additions, so the observed cost is given as
2547              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2548           op_cost = m * add_cost (speed, mode);
2549           if (shift_cost (speed, mode, m) < op_cost)
2550             op_cost = shift_cost (speed, mode, m);
2551           new_limit.cost = best_cost.cost - op_cost;
2552           new_limit.latency = best_cost.latency - op_cost;
2553           synth_mult (alg_in, q, &new_limit, mode);
2554
2555           alg_in->cost.cost += op_cost;
2556           alg_in->cost.latency += op_cost;
2557           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2558             {
2559               best_cost = alg_in->cost;
2560               std::swap (alg_in, best_alg);
2561               best_alg->log[best_alg->ops] = m;
2562               best_alg->op[best_alg->ops] = alg_shift;
2563             }
2564
2565           /* See if treating ORIG_T as a signed number yields a better
2566              sequence.  Try this sequence only for a negative ORIG_T
2567              as it would be useless for a non-negative ORIG_T.  */
2568           if ((HOST_WIDE_INT) orig_t < 0)
2569             {
2570               /* Shift ORIG_T as follows because a right shift of a
2571                  negative-valued signed type is implementation
2572                  defined.  */
2573               q = ~(~orig_t >> m);
2574               /* The function expand_shift will choose between a shift
2575                  and a sequence of additions, so the observed cost is
2576                  given as MIN (m * add_cost(speed, mode),
2577                  shift_cost(speed, mode, m)).  */
2578               op_cost = m * add_cost (speed, mode);
2579               if (shift_cost (speed, mode, m) < op_cost)
2580                 op_cost = shift_cost (speed, mode, m);
2581               new_limit.cost = best_cost.cost - op_cost;
2582               new_limit.latency = best_cost.latency - op_cost;
2583               synth_mult (alg_in, q, &new_limit, mode);
2584
2585               alg_in->cost.cost += op_cost;
2586               alg_in->cost.latency += op_cost;
2587               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2588                 {
2589                   best_cost = alg_in->cost;
2590                   std::swap (alg_in, best_alg);
2591                   best_alg->log[best_alg->ops] = m;
2592                   best_alg->op[best_alg->ops] = alg_shift;
2593                 }
2594             }
2595         }
2596       if (cache_hit)
2597         goto done;
2598     }
2599
2600   /* If we have an odd number, add or subtract one.  */
2601   if ((t & 1) != 0)
2602     {
2603       unsigned HOST_WIDE_INT w;
2604
2605     do_alg_addsub_t_m2:
2606       for (w = 1; (w & t) != 0; w <<= 1)
2607         ;
2608       /* If T was -1, then W will be zero after the loop.  This is another
2609          case where T ends with ...111.  Handling this with (T + 1) and
2610          subtract 1 produces slightly better code and results in algorithm
2611          selection much faster than treating it like the ...0111 case
2612          below.  */
2613       if (w == 0
2614           || (w > 2
2615               /* Reject the case where t is 3.
2616                  Thus we prefer addition in that case.  */
2617               && t != 3))
2618         {
2619           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2620
2621           op_cost = add_cost (speed, mode);
2622           new_limit.cost = best_cost.cost - op_cost;
2623           new_limit.latency = best_cost.latency - op_cost;
2624           synth_mult (alg_in, t + 1, &new_limit, mode);
2625
2626           alg_in->cost.cost += op_cost;
2627           alg_in->cost.latency += op_cost;
2628           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2629             {
2630               best_cost = alg_in->cost;
2631               std::swap (alg_in, best_alg);
2632               best_alg->log[best_alg->ops] = 0;
2633               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2634             }
2635         }
2636       else
2637         {
2638           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2639
2640           op_cost = add_cost (speed, mode);
2641           new_limit.cost = best_cost.cost - op_cost;
2642           new_limit.latency = best_cost.latency - op_cost;
2643           synth_mult (alg_in, t - 1, &new_limit, mode);
2644
2645           alg_in->cost.cost += op_cost;
2646           alg_in->cost.latency += op_cost;
2647           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2648             {
2649               best_cost = alg_in->cost;
2650               std::swap (alg_in, best_alg);
2651               best_alg->log[best_alg->ops] = 0;
2652               best_alg->op[best_alg->ops] = alg_add_t_m2;
2653             }
2654         }
2655
2656       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2657          quickly with a - a * n for some appropriate constant n.  */
2658       m = exact_log2 (-orig_t + 1);
2659       if (m >= 0 && m < maxm)
2660         {
2661           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2662           /* If the target has a cheap shift-and-subtract insn use
2663              that in preference to a shift insn followed by a sub insn.
2664              Assume that the shift-and-sub is "atomic" with a latency
2665              equal to it's cost, otherwise assume that on superscalar
2666              hardware the shift may be executed concurrently with the
2667              earlier steps in the algorithm.  */
2668           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2669             {
2670               op_cost = shiftsub1_cost (speed, mode, m);
2671               op_latency = op_cost;
2672             }
2673           else
2674             op_latency = add_cost (speed, mode);
2675
2676           new_limit.cost = best_cost.cost - op_cost;
2677           new_limit.latency = best_cost.latency - op_latency;
2678           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2679                       &new_limit, mode);
2680
2681           alg_in->cost.cost += op_cost;
2682           alg_in->cost.latency += op_latency;
2683           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2684             {
2685               best_cost = alg_in->cost;
2686               std::swap (alg_in, best_alg);
2687               best_alg->log[best_alg->ops] = m;
2688               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2689             }
2690         }
2691
2692       if (cache_hit)
2693         goto done;
2694     }
2695
2696   /* Look for factors of t of the form
2697      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2698      If we find such a factor, we can multiply by t using an algorithm that
2699      multiplies by q, shift the result by m and add/subtract it to itself.
2700
2701      We search for large factors first and loop down, even if large factors
2702      are less probable than small; if we find a large factor we will find a
2703      good sequence quickly, and therefore be able to prune (by decreasing
2704      COST_LIMIT) the search.  */
2705
2706  do_alg_addsub_factor:
2707   for (m = floor_log2 (t - 1); m >= 2; m--)
2708     {
2709       unsigned HOST_WIDE_INT d;
2710
2711       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2712       if (t % d == 0 && t > d && m < maxm
2713           && (!cache_hit || cache_alg == alg_add_factor))
2714         {
2715           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2716           if (shiftadd_cost (speed, mode, m) <= op_cost)
2717             op_cost = shiftadd_cost (speed, mode, m);
2718
2719           op_latency = op_cost;
2720
2721
2722           new_limit.cost = best_cost.cost - op_cost;
2723           new_limit.latency = best_cost.latency - op_latency;
2724           synth_mult (alg_in, t / d, &new_limit, mode);
2725
2726           alg_in->cost.cost += op_cost;
2727           alg_in->cost.latency += op_latency;
2728           if (alg_in->cost.latency < op_cost)
2729             alg_in->cost.latency = op_cost;
2730           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2731             {
2732               best_cost = alg_in->cost;
2733               std::swap (alg_in, best_alg);
2734               best_alg->log[best_alg->ops] = m;
2735               best_alg->op[best_alg->ops] = alg_add_factor;
2736             }
2737           /* Other factors will have been taken care of in the recursion.  */
2738           break;
2739         }
2740
2741       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2742       if (t % d == 0 && t > d && m < maxm
2743           && (!cache_hit || cache_alg == alg_sub_factor))
2744         {
2745           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2746           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2747             op_cost = shiftsub0_cost (speed, mode, m);
2748
2749           op_latency = op_cost;
2750
2751           new_limit.cost = best_cost.cost - op_cost;
2752           new_limit.latency = best_cost.latency - op_latency;
2753           synth_mult (alg_in, t / d, &new_limit, mode);
2754
2755           alg_in->cost.cost += op_cost;
2756           alg_in->cost.latency += op_latency;
2757           if (alg_in->cost.latency < op_cost)
2758             alg_in->cost.latency = op_cost;
2759           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2760             {
2761               best_cost = alg_in->cost;
2762               std::swap (alg_in, best_alg);
2763               best_alg->log[best_alg->ops] = m;
2764               best_alg->op[best_alg->ops] = alg_sub_factor;
2765             }
2766           break;
2767         }
2768     }
2769   if (cache_hit)
2770     goto done;
2771
2772   /* Try shift-and-add (load effective address) instructions,
2773      i.e. do a*3, a*5, a*9.  */
2774   if ((t & 1) != 0)
2775     {
2776     do_alg_add_t2_m:
2777       q = t - 1;
2778       q = q & -q;
2779       m = exact_log2 (q);
2780       if (m >= 0 && m < maxm)
2781         {
2782           op_cost = shiftadd_cost (speed, mode, m);
2783           new_limit.cost = best_cost.cost - op_cost;
2784           new_limit.latency = best_cost.latency - op_cost;
2785           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2786
2787           alg_in->cost.cost += op_cost;
2788           alg_in->cost.latency += op_cost;
2789           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2790             {
2791               best_cost = alg_in->cost;
2792               std::swap (alg_in, best_alg);
2793               best_alg->log[best_alg->ops] = m;
2794               best_alg->op[best_alg->ops] = alg_add_t2_m;
2795             }
2796         }
2797       if (cache_hit)
2798         goto done;
2799
2800     do_alg_sub_t2_m:
2801       q = t + 1;
2802       q = q & -q;
2803       m = exact_log2 (q);
2804       if (m >= 0 && m < maxm)
2805         {
2806           op_cost = shiftsub0_cost (speed, mode, m);
2807           new_limit.cost = best_cost.cost - op_cost;
2808           new_limit.latency = best_cost.latency - op_cost;
2809           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2810
2811           alg_in->cost.cost += op_cost;
2812           alg_in->cost.latency += op_cost;
2813           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2814             {
2815               best_cost = alg_in->cost;
2816               std::swap (alg_in, best_alg);
2817               best_alg->log[best_alg->ops] = m;
2818               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2819             }
2820         }
2821       if (cache_hit)
2822         goto done;
2823     }
2824
2825  done:
2826   /* If best_cost has not decreased, we have not found any algorithm.  */
2827   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2828     {
2829       /* We failed to find an algorithm.  Record alg_impossible for
2830          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2831          we are asked to find an algorithm for T within the same or
2832          lower COST_LIMIT, we can immediately return to the
2833          caller.  */
2834       entry_ptr->t = t;
2835       entry_ptr->mode = mode;
2836       entry_ptr->speed = speed;
2837       entry_ptr->alg = alg_impossible;
2838       entry_ptr->cost = *cost_limit;
2839       return;
2840     }
2841
2842   /* Cache the result.  */
2843   if (!cache_hit)
2844     {
2845       entry_ptr->t = t;
2846       entry_ptr->mode = mode;
2847       entry_ptr->speed = speed;
2848       entry_ptr->alg = best_alg->op[best_alg->ops];
2849       entry_ptr->cost.cost = best_cost.cost;
2850       entry_ptr->cost.latency = best_cost.latency;
2851     }
2852
2853   /* If we are getting a too long sequence for `struct algorithm'
2854      to record, make this search fail.  */
2855   if (best_alg->ops == MAX_BITS_PER_WORD)
2856     return;
2857
2858   /* Copy the algorithm from temporary space to the space at alg_out.
2859      We avoid using structure assignment because the majority of
2860      best_alg is normally undefined, and this is a critical function.  */
2861   alg_out->ops = best_alg->ops + 1;
2862   alg_out->cost = best_cost;
2863   memcpy (alg_out->op, best_alg->op,
2864           alg_out->ops * sizeof *alg_out->op);
2865   memcpy (alg_out->log, best_alg->log,
2866           alg_out->ops * sizeof *alg_out->log);
2867 }
2868 \f
2869 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2870    Try three variations:
2871
2872        - a shift/add sequence based on VAL itself
2873        - a shift/add sequence based on -VAL, followed by a negation
2874        - a shift/add sequence based on VAL - 1, followed by an addition.
2875
2876    Return true if the cheapest of these cost less than MULT_COST,
2877    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2878
2879 static bool
2880 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2881                      struct algorithm *alg, enum mult_variant *variant,
2882                      int mult_cost)
2883 {
2884   struct algorithm alg2;
2885   struct mult_cost limit;
2886   int op_cost;
2887   bool speed = optimize_insn_for_speed_p ();
2888
2889   /* Fail quickly for impossible bounds.  */
2890   if (mult_cost < 0)
2891     return false;
2892
2893   /* Ensure that mult_cost provides a reasonable upper bound.
2894      Any constant multiplication can be performed with less
2895      than 2 * bits additions.  */
2896   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2897   if (mult_cost > op_cost)
2898     mult_cost = op_cost;
2899
2900   *variant = basic_variant;
2901   limit.cost = mult_cost;
2902   limit.latency = mult_cost;
2903   synth_mult (alg, val, &limit, mode);
2904
2905   /* This works only if the inverted value actually fits in an
2906      `unsigned int' */
2907   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2908     {
2909       op_cost = neg_cost (speed, mode);
2910       if (MULT_COST_LESS (&alg->cost, mult_cost))
2911         {
2912           limit.cost = alg->cost.cost - op_cost;
2913           limit.latency = alg->cost.latency - op_cost;
2914         }
2915       else
2916         {
2917           limit.cost = mult_cost - op_cost;
2918           limit.latency = mult_cost - op_cost;
2919         }
2920
2921       synth_mult (&alg2, -val, &limit, mode);
2922       alg2.cost.cost += op_cost;
2923       alg2.cost.latency += op_cost;
2924       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2925         *alg = alg2, *variant = negate_variant;
2926     }
2927
2928   /* This proves very useful for division-by-constant.  */
2929   op_cost = add_cost (speed, mode);
2930   if (MULT_COST_LESS (&alg->cost, mult_cost))
2931     {
2932       limit.cost = alg->cost.cost - op_cost;
2933       limit.latency = alg->cost.latency - op_cost;
2934     }
2935   else
2936     {
2937       limit.cost = mult_cost - op_cost;
2938       limit.latency = mult_cost - op_cost;
2939     }
2940
2941   synth_mult (&alg2, val - 1, &limit, mode);
2942   alg2.cost.cost += op_cost;
2943   alg2.cost.latency += op_cost;
2944   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2945     *alg = alg2, *variant = add_variant;
2946
2947   return MULT_COST_LESS (&alg->cost, mult_cost);
2948 }
2949
2950 /* A subroutine of expand_mult, used for constant multiplications.
2951    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2952    convenient.  Use the shift/add sequence described by ALG and apply
2953    the final fixup specified by VARIANT.  */
2954
2955 static rtx
2956 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2957                    rtx target, const struct algorithm *alg,
2958                    enum mult_variant variant)
2959 {
2960   HOST_WIDE_INT val_so_far;
2961   rtx_insn *insn;
2962   rtx accum, tem;
2963   int opno;
2964   machine_mode nmode;
2965
2966   /* Avoid referencing memory over and over and invalid sharing
2967      on SUBREGs.  */
2968   op0 = force_reg (mode, op0);
2969
2970   /* ACCUM starts out either as OP0 or as a zero, depending on
2971      the first operation.  */
2972
2973   if (alg->op[0] == alg_zero)
2974     {
2975       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2976       val_so_far = 0;
2977     }
2978   else if (alg->op[0] == alg_m)
2979     {
2980       accum = copy_to_mode_reg (mode, op0);
2981       val_so_far = 1;
2982     }
2983   else
2984     gcc_unreachable ();
2985
2986   for (opno = 1; opno < alg->ops; opno++)
2987     {
2988       int log = alg->log[opno];
2989       rtx shift_subtarget = optimize ? 0 : accum;
2990       rtx add_target
2991         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2992            && !optimize)
2993           ? target : 0;
2994       rtx accum_target = optimize ? 0 : accum;
2995       rtx accum_inner;
2996
2997       switch (alg->op[opno])
2998         {
2999         case alg_shift:
3000           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3001           /* REG_EQUAL note will be attached to the following insn.  */
3002           emit_move_insn (accum, tem);
3003           val_so_far <<= log;
3004           break;
3005
3006         case alg_add_t_m2:
3007           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3008           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3009                                  add_target ? add_target : accum_target);
3010           val_so_far += (HOST_WIDE_INT) 1 << log;
3011           break;
3012
3013         case alg_sub_t_m2:
3014           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3015           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3016                                  add_target ? add_target : accum_target);
3017           val_so_far -= (HOST_WIDE_INT) 1 << log;
3018           break;
3019
3020         case alg_add_t2_m:
3021           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3022                                 log, shift_subtarget, 0);
3023           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3024                                  add_target ? add_target : accum_target);
3025           val_so_far = (val_so_far << log) + 1;
3026           break;
3027
3028         case alg_sub_t2_m:
3029           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3030                                 log, shift_subtarget, 0);
3031           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3032                                  add_target ? add_target : accum_target);
3033           val_so_far = (val_so_far << log) - 1;
3034           break;
3035
3036         case alg_add_factor:
3037           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3038           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3039                                  add_target ? add_target : accum_target);
3040           val_so_far += val_so_far << log;
3041           break;
3042
3043         case alg_sub_factor:
3044           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3045           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3046                                  (add_target
3047                                   ? add_target : (optimize ? 0 : tem)));
3048           val_so_far = (val_so_far << log) - val_so_far;
3049           break;
3050
3051         default:
3052           gcc_unreachable ();
3053         }
3054
3055       if (SCALAR_INT_MODE_P (mode))
3056         {
3057           /* Write a REG_EQUAL note on the last insn so that we can cse
3058              multiplication sequences.  Note that if ACCUM is a SUBREG,
3059              we've set the inner register and must properly indicate that.  */
3060           tem = op0, nmode = mode;
3061           accum_inner = accum;
3062           if (GET_CODE (accum) == SUBREG)
3063             {
3064               accum_inner = SUBREG_REG (accum);
3065               nmode = GET_MODE (accum_inner);
3066               tem = gen_lowpart (nmode, op0);
3067             }
3068
3069           insn = get_last_insn ();
3070           set_dst_reg_note (insn, REG_EQUAL,
3071                             gen_rtx_MULT (nmode, tem,
3072                                           gen_int_mode (val_so_far, nmode)),
3073                             accum_inner);
3074         }
3075     }
3076
3077   if (variant == negate_variant)
3078     {
3079       val_so_far = -val_so_far;
3080       accum = expand_unop (mode, neg_optab, accum, target, 0);
3081     }
3082   else if (variant == add_variant)
3083     {
3084       val_so_far = val_so_far + 1;
3085       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3086     }
3087
3088   /* Compare only the bits of val and val_so_far that are significant
3089      in the result mode, to avoid sign-/zero-extension confusion.  */
3090   nmode = GET_MODE_INNER (mode);
3091   if (nmode == VOIDmode)
3092     nmode = mode;
3093   val &= GET_MODE_MASK (nmode);
3094   val_so_far &= GET_MODE_MASK (nmode);
3095   gcc_assert (val == val_so_far);
3096
3097   return accum;
3098 }
3099
3100 /* Perform a multiplication and return an rtx for the result.
3101    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3102    TARGET is a suggestion for where to store the result (an rtx).
3103
3104    We check specially for a constant integer as OP1.
3105    If you want this check for OP0 as well, then before calling
3106    you should swap the two operands if OP0 would be constant.  */
3107
3108 rtx
3109 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3110              int unsignedp)
3111 {
3112   enum mult_variant variant;
3113   struct algorithm algorithm;
3114   rtx scalar_op1;
3115   int max_cost;
3116   bool speed = optimize_insn_for_speed_p ();
3117   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3118
3119   if (CONSTANT_P (op0))
3120     std::swap (op0, op1);
3121
3122   /* For vectors, there are several simplifications that can be made if
3123      all elements of the vector constant are identical.  */
3124   scalar_op1 = op1;
3125   if (GET_CODE (op1) == CONST_VECTOR)
3126     {
3127       int i, n = CONST_VECTOR_NUNITS (op1);
3128       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3129       for (i = 1; i < n; ++i)
3130         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3131           goto skip_scalar;
3132     }
3133
3134   if (INTEGRAL_MODE_P (mode))
3135     {
3136       rtx fake_reg;
3137       HOST_WIDE_INT coeff;
3138       bool is_neg;
3139       int mode_bitsize;
3140
3141       if (op1 == CONST0_RTX (mode))
3142         return op1;
3143       if (op1 == CONST1_RTX (mode))
3144         return op0;
3145       if (op1 == CONSTM1_RTX (mode))
3146         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3147                             op0, target, 0);
3148
3149       if (do_trapv)
3150         goto skip_synth;
3151
3152       /* If mode is integer vector mode, check if the backend supports
3153          vector lshift (by scalar or vector) at all.  If not, we can't use
3154          synthetized multiply.  */
3155       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3156           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3157           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3158         goto skip_synth;
3159
3160       /* These are the operations that are potentially turned into
3161          a sequence of shifts and additions.  */
3162       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3163
3164       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3165          less than or equal in size to `unsigned int' this doesn't matter.
3166          If the mode is larger than `unsigned int', then synth_mult works
3167          only if the constant value exactly fits in an `unsigned int' without
3168          any truncation.  This means that multiplying by negative values does
3169          not work; results are off by 2^32 on a 32 bit machine.  */
3170       if (CONST_INT_P (scalar_op1))
3171         {
3172           coeff = INTVAL (scalar_op1);
3173           is_neg = coeff < 0;
3174         }
3175 #if TARGET_SUPPORTS_WIDE_INT
3176       else if (CONST_WIDE_INT_P (scalar_op1))
3177 #else
3178       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3179 #endif
3180         {
3181           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3182           /* Perfect power of 2 (other than 1, which is handled above).  */
3183           if (shift > 0)
3184             return expand_shift (LSHIFT_EXPR, mode, op0,
3185                                  shift, target, unsignedp);
3186           else
3187             goto skip_synth;
3188         }
3189       else
3190         goto skip_synth;
3191
3192       /* We used to test optimize here, on the grounds that it's better to
3193          produce a smaller program when -O is not used.  But this causes
3194          such a terrible slowdown sometimes that it seems better to always
3195          use synth_mult.  */
3196
3197       /* Special case powers of two.  */
3198       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3199           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3200         return expand_shift (LSHIFT_EXPR, mode, op0,
3201                              floor_log2 (coeff), target, unsignedp);
3202
3203       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3204
3205       /* Attempt to handle multiplication of DImode values by negative
3206          coefficients, by performing the multiplication by a positive
3207          multiplier and then inverting the result.  */
3208       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3209         {
3210           /* Its safe to use -coeff even for INT_MIN, as the
3211              result is interpreted as an unsigned coefficient.
3212              Exclude cost of op0 from max_cost to match the cost
3213              calculation of the synth_mult.  */
3214           coeff = -(unsigned HOST_WIDE_INT) coeff;
3215           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3216                                     mode, speed)
3217                       - neg_cost (speed, mode));
3218           if (max_cost <= 0)
3219             goto skip_synth;
3220
3221           /* Special case powers of two.  */
3222           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3223             {
3224               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3225                                        floor_log2 (coeff), target, unsignedp);
3226               return expand_unop (mode, neg_optab, temp, target, 0);
3227             }
3228
3229           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3230                                    max_cost))
3231             {
3232               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3233                                             &algorithm, variant);
3234               return expand_unop (mode, neg_optab, temp, target, 0);
3235             }
3236           goto skip_synth;
3237         }
3238
3239       /* Exclude cost of op0 from max_cost to match the cost
3240          calculation of the synth_mult.  */
3241       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3242       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3243         return expand_mult_const (mode, op0, coeff, target,
3244                                   &algorithm, variant);
3245     }
3246  skip_synth:
3247
3248   /* Expand x*2.0 as x+x.  */
3249   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3250     {
3251       REAL_VALUE_TYPE d;
3252       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3253
3254       if (REAL_VALUES_EQUAL (d, dconst2))
3255         {
3256           op0 = force_reg (GET_MODE (op0), op0);
3257           return expand_binop (mode, add_optab, op0, op0,
3258                                target, unsignedp, OPTAB_LIB_WIDEN);
3259         }
3260     }
3261  skip_scalar:
3262
3263   /* This used to use umul_optab if unsigned, but for non-widening multiply
3264      there is no difference between signed and unsigned.  */
3265   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3266                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3267   gcc_assert (op0);
3268   return op0;
3269 }
3270
3271 /* Return a cost estimate for multiplying a register by the given
3272    COEFFicient in the given MODE and SPEED.  */
3273
3274 int
3275 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3276 {
3277   int max_cost;
3278   struct algorithm algorithm;
3279   enum mult_variant variant;
3280
3281   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3282   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3283                            mode, speed);
3284   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3285     return algorithm.cost.cost;
3286   else
3287     return max_cost;
3288 }
3289
3290 /* Perform a widening multiplication and return an rtx for the result.
3291    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3292    TARGET is a suggestion for where to store the result (an rtx).
3293    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3294    or smul_widen_optab.
3295
3296    We check specially for a constant integer as OP1, comparing the
3297    cost of a widening multiply against the cost of a sequence of shifts
3298    and adds.  */
3299
3300 rtx
3301 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3302                       int unsignedp, optab this_optab)
3303 {
3304   bool speed = optimize_insn_for_speed_p ();
3305   rtx cop1;
3306
3307   if (CONST_INT_P (op1)
3308       && GET_MODE (op0) != VOIDmode
3309       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3310                                 this_optab == umul_widen_optab))
3311       && CONST_INT_P (cop1)
3312       && (INTVAL (cop1) >= 0
3313           || HWI_COMPUTABLE_MODE_P (mode)))
3314     {
3315       HOST_WIDE_INT coeff = INTVAL (cop1);
3316       int max_cost;
3317       enum mult_variant variant;
3318       struct algorithm algorithm;
3319
3320       if (coeff == 0)
3321         return CONST0_RTX (mode);
3322
3323       /* Special case powers of two.  */
3324       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3325         {
3326           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3327           return expand_shift (LSHIFT_EXPR, mode, op0,
3328                                floor_log2 (coeff), target, unsignedp);
3329         }
3330
3331       /* Exclude cost of op0 from max_cost to match the cost
3332          calculation of the synth_mult.  */
3333       max_cost = mul_widen_cost (speed, mode);
3334       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3335                                max_cost))
3336         {
3337           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3338           return expand_mult_const (mode, op0, coeff, target,
3339                                     &algorithm, variant);
3340         }
3341     }
3342   return expand_binop (mode, this_optab, op0, op1, target,
3343                        unsignedp, OPTAB_LIB_WIDEN);
3344 }
3345 \f
3346 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3347    replace division by D, and put the least significant N bits of the result
3348    in *MULTIPLIER_PTR and return the most significant bit.
3349
3350    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3351    needed precision is in PRECISION (should be <= N).
3352
3353    PRECISION should be as small as possible so this function can choose
3354    multiplier more freely.
3355
3356    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3357    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3358
3359    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3360    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3361
3362 unsigned HOST_WIDE_INT
3363 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3364                    unsigned HOST_WIDE_INT *multiplier_ptr,
3365                    int *post_shift_ptr, int *lgup_ptr)
3366 {
3367   int lgup, post_shift;
3368   int pow, pow2;
3369
3370   /* lgup = ceil(log2(divisor)); */
3371   lgup = ceil_log2 (d);
3372
3373   gcc_assert (lgup <= n);
3374
3375   pow = n + lgup;
3376   pow2 = n + lgup - precision;
3377
3378   /* mlow = 2^(N + lgup)/d */
3379   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3380   wide_int mlow = wi::udiv_trunc (val, d);
3381
3382   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3383   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3384   wide_int mhigh = wi::udiv_trunc (val, d);
3385
3386   /* If precision == N, then mlow, mhigh exceed 2^N
3387      (but they do not exceed 2^(N+1)).  */
3388
3389   /* Reduce to lowest terms.  */
3390   for (post_shift = lgup; post_shift > 0; post_shift--)
3391     {
3392       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3393                                                        HOST_BITS_PER_WIDE_INT);
3394       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3395                                                        HOST_BITS_PER_WIDE_INT);
3396       if (ml_lo >= mh_lo)
3397         break;
3398
3399       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3400       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3401     }
3402
3403   *post_shift_ptr = post_shift;
3404   *lgup_ptr = lgup;
3405   if (n < HOST_BITS_PER_WIDE_INT)
3406     {
3407       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3408       *multiplier_ptr = mhigh.to_uhwi () & mask;
3409       return mhigh.to_uhwi () >= mask;
3410     }
3411   else
3412     {
3413       *multiplier_ptr = mhigh.to_uhwi ();
3414       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3415     }
3416 }
3417
3418 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3419    congruent to 1 (mod 2**N).  */
3420
3421 static unsigned HOST_WIDE_INT
3422 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3423 {
3424   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3425
3426   /* The algorithm notes that the choice y = x satisfies
3427      x*y == 1 mod 2^3, since x is assumed odd.
3428      Each iteration doubles the number of bits of significance in y.  */
3429
3430   unsigned HOST_WIDE_INT mask;
3431   unsigned HOST_WIDE_INT y = x;
3432   int nbit = 3;
3433
3434   mask = (n == HOST_BITS_PER_WIDE_INT
3435           ? ~(unsigned HOST_WIDE_INT) 0
3436           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3437
3438   while (nbit < n)
3439     {
3440       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3441       nbit *= 2;
3442     }
3443   return y;
3444 }
3445
3446 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3447    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3448    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3449    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3450    become signed.
3451
3452    The result is put in TARGET if that is convenient.
3453
3454    MODE is the mode of operation.  */
3455
3456 rtx
3457 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3458                              rtx op1, rtx target, int unsignedp)
3459 {
3460   rtx tem;
3461   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3462
3463   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3464                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3465   tem = expand_and (mode, tem, op1, NULL_RTX);
3466   adj_operand
3467     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3468                      adj_operand);
3469
3470   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3471                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3472   tem = expand_and (mode, tem, op0, NULL_RTX);
3473   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3474                           target);
3475
3476   return target;
3477 }
3478
3479 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3480
3481 static rtx
3482 extract_high_half (machine_mode mode, rtx op)
3483 {
3484   machine_mode wider_mode;
3485
3486   if (mode == word_mode)
3487     return gen_highpart (mode, op);
3488
3489   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3490
3491   wider_mode = GET_MODE_WIDER_MODE (mode);
3492   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3493                      GET_MODE_BITSIZE (mode), 0, 1);
3494   return convert_modes (mode, wider_mode, op, 0);
3495 }
3496
3497 /* Like expmed_mult_highpart, but only consider using a multiplication
3498    optab.  OP1 is an rtx for the constant operand.  */
3499
3500 static rtx
3501 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3502                             rtx target, int unsignedp, int max_cost)
3503 {
3504   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3505   machine_mode wider_mode;
3506   optab moptab;
3507   rtx tem;
3508   int size;
3509   bool speed = optimize_insn_for_speed_p ();
3510
3511   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3512
3513   wider_mode = GET_MODE_WIDER_MODE (mode);
3514   size = GET_MODE_BITSIZE (mode);
3515
3516   /* Firstly, try using a multiplication insn that only generates the needed
3517      high part of the product, and in the sign flavor of unsignedp.  */
3518   if (mul_highpart_cost (speed, mode) < max_cost)
3519     {
3520       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3521       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3522                           unsignedp, OPTAB_DIRECT);
3523       if (tem)
3524         return tem;
3525     }
3526
3527   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3528      Need to adjust the result after the multiplication.  */
3529   if (size - 1 < BITS_PER_WORD
3530       && (mul_highpart_cost (speed, mode)
3531           + 2 * shift_cost (speed, mode, size-1)
3532           + 4 * add_cost (speed, mode) < max_cost))
3533     {
3534       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3535       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3536                           unsignedp, OPTAB_DIRECT);
3537       if (tem)
3538         /* We used the wrong signedness.  Adjust the result.  */
3539         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3540                                             tem, unsignedp);
3541     }
3542
3543   /* Try widening multiplication.  */
3544   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3545   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3546       && mul_widen_cost (speed, wider_mode) < max_cost)
3547     {
3548       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3549                           unsignedp, OPTAB_WIDEN);
3550       if (tem)
3551         return extract_high_half (mode, tem);
3552     }
3553
3554   /* Try widening the mode and perform a non-widening multiplication.  */
3555   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3556       && size - 1 < BITS_PER_WORD
3557       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3558           < max_cost))
3559     {
3560       rtx_insn *insns;
3561       rtx wop0, wop1;
3562
3563       /* We need to widen the operands, for example to ensure the
3564          constant multiplier is correctly sign or zero extended.
3565          Use a sequence to clean-up any instructions emitted by
3566          the conversions if things don't work out.  */
3567       start_sequence ();
3568       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3569       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3570       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3571                           unsignedp, OPTAB_WIDEN);
3572       insns = get_insns ();
3573       end_sequence ();
3574
3575       if (tem)
3576         {
3577           emit_insn (insns);
3578           return extract_high_half (mode, tem);
3579         }
3580     }
3581
3582   /* Try widening multiplication of opposite signedness, and adjust.  */
3583   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3584   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3585       && size - 1 < BITS_PER_WORD
3586       && (mul_widen_cost (speed, wider_mode)
3587           + 2 * shift_cost (speed, mode, size-1)
3588           + 4 * add_cost (speed, mode) < max_cost))
3589     {
3590       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3591                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3592       if (tem != 0)
3593         {
3594           tem = extract_high_half (mode, tem);
3595           /* We used the wrong signedness.  Adjust the result.  */
3596           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3597                                               target, unsignedp);
3598         }
3599     }
3600
3601   return 0;
3602 }
3603
3604 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3605    putting the high half of the result in TARGET if that is convenient,
3606    and return where the result is.  If the operation can not be performed,
3607    0 is returned.
3608
3609    MODE is the mode of operation and result.
3610
3611    UNSIGNEDP nonzero means unsigned multiply.
3612
3613    MAX_COST is the total allowed cost for the expanded RTL.  */
3614
3615 static rtx
3616 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3617                       rtx target, int unsignedp, int max_cost)
3618 {
3619   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3620   unsigned HOST_WIDE_INT cnst1;
3621   int extra_cost;
3622   bool sign_adjust = false;
3623   enum mult_variant variant;
3624   struct algorithm alg;
3625   rtx tem;
3626   bool speed = optimize_insn_for_speed_p ();
3627
3628   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3629   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3630   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3631
3632   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3633
3634   /* We can't optimize modes wider than BITS_PER_WORD.
3635      ??? We might be able to perform double-word arithmetic if
3636      mode == word_mode, however all the cost calculations in
3637      synth_mult etc. assume single-word operations.  */
3638   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3639     return expmed_mult_highpart_optab (mode, op0, op1, target,
3640                                        unsignedp, max_cost);
3641
3642   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3643
3644   /* Check whether we try to multiply by a negative constant.  */
3645   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3646     {
3647       sign_adjust = true;
3648       extra_cost += add_cost (speed, mode);
3649     }
3650
3651   /* See whether shift/add multiplication is cheap enough.  */
3652   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3653                            max_cost - extra_cost))
3654     {
3655       /* See whether the specialized multiplication optabs are
3656          cheaper than the shift/add version.  */
3657       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3658                                         alg.cost.cost + extra_cost);
3659       if (tem)
3660         return tem;
3661
3662       tem = convert_to_mode (wider_mode, op0, unsignedp);
3663       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3664       tem = extract_high_half (mode, tem);
3665
3666       /* Adjust result for signedness.  */
3667       if (sign_adjust)
3668         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3669
3670       return tem;
3671     }
3672   return expmed_mult_highpart_optab (mode, op0, op1, target,
3673                                      unsignedp, max_cost);
3674 }
3675
3676
3677 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3678
3679 static rtx
3680 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3681 {
3682   rtx result, temp, shift;
3683   rtx_code_label *label;
3684   int logd;
3685   int prec = GET_MODE_PRECISION (mode);
3686
3687   logd = floor_log2 (d);
3688   result = gen_reg_rtx (mode);
3689
3690   /* Avoid conditional branches when they're expensive.  */
3691   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3692       && optimize_insn_for_speed_p ())
3693     {
3694       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3695                                       mode, 0, -1);
3696       if (signmask)
3697         {
3698           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3699           signmask = force_reg (mode, signmask);
3700           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3701
3702           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3703              which instruction sequence to use.  If logical right shifts
3704              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3705              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3706
3707           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3708           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3709               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3710                   > COSTS_N_INSNS (2)))
3711             {
3712               temp = expand_binop (mode, xor_optab, op0, signmask,
3713                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3714               temp = expand_binop (mode, sub_optab, temp, signmask,
3715                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3716               temp = expand_binop (mode, and_optab, temp,
3717                                    gen_int_mode (masklow, mode),
3718                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3719               temp = expand_binop (mode, xor_optab, temp, signmask,
3720                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3721               temp = expand_binop (mode, sub_optab, temp, signmask,
3722                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3723             }
3724           else
3725             {
3726               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3727                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3728               signmask = force_reg (mode, signmask);
3729
3730               temp = expand_binop (mode, add_optab, op0, signmask,
3731                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3732               temp = expand_binop (mode, and_optab, temp,
3733                                    gen_int_mode (masklow, mode),
3734                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3735               temp = expand_binop (mode, sub_optab, temp, signmask,
3736                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3737             }
3738           return temp;
3739         }
3740     }
3741
3742   /* Mask contains the mode's signbit and the significant bits of the
3743      modulus.  By including the signbit in the operation, many targets
3744      can avoid an explicit compare operation in the following comparison
3745      against zero.  */
3746   wide_int mask = wi::mask (logd, false, prec);
3747   mask = wi::set_bit (mask, prec - 1);
3748
3749   temp = expand_binop (mode, and_optab, op0,
3750                        immed_wide_int_const (mask, mode),
3751                        result, 1, OPTAB_LIB_WIDEN);
3752   if (temp != result)
3753     emit_move_insn (result, temp);
3754
3755   label = gen_label_rtx ();
3756   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3757
3758   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3759                        0, OPTAB_LIB_WIDEN);
3760
3761   mask = wi::mask (logd, true, prec);
3762   temp = expand_binop (mode, ior_optab, temp,
3763                        immed_wide_int_const (mask, mode),
3764                        result, 1, OPTAB_LIB_WIDEN);
3765   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3766                        0, OPTAB_LIB_WIDEN);
3767   if (temp != result)
3768     emit_move_insn (result, temp);
3769   emit_label (label);
3770   return result;
3771 }
3772
3773 /* Expand signed division of OP0 by a power of two D in mode MODE.
3774    This routine is only called for positive values of D.  */
3775
3776 static rtx
3777 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3778 {
3779   rtx temp;
3780   rtx_code_label *label;
3781   int logd;
3782
3783   logd = floor_log2 (d);
3784
3785   if (d == 2
3786       && BRANCH_COST (optimize_insn_for_speed_p (),
3787                       false) >= 1)
3788     {
3789       temp = gen_reg_rtx (mode);
3790       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3791       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3792                            0, OPTAB_LIB_WIDEN);
3793       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3794     }
3795
3796   if (HAVE_conditional_move
3797       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3798     {
3799       rtx temp2;
3800
3801       start_sequence ();
3802       temp2 = copy_to_mode_reg (mode, op0);
3803       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3804                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3805       temp = force_reg (mode, temp);
3806
3807       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3808       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3809                                      mode, temp, temp2, mode, 0);
3810       if (temp2)
3811         {
3812           rtx_insn *seq = get_insns ();
3813           end_sequence ();
3814           emit_insn (seq);
3815           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3816         }
3817       end_sequence ();
3818     }
3819
3820   if (BRANCH_COST (optimize_insn_for_speed_p (),
3821                    false) >= 2)
3822     {
3823       int ushift = GET_MODE_BITSIZE (mode) - logd;
3824
3825       temp = gen_reg_rtx (mode);
3826       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3827       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3828           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3829              > COSTS_N_INSNS (1))
3830         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3831                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3832       else
3833         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3834                              ushift, NULL_RTX, 1);
3835       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3836                            0, OPTAB_LIB_WIDEN);
3837       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3838     }
3839
3840   label = gen_label_rtx ();
3841   temp = copy_to_mode_reg (mode, op0);
3842   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3843   expand_inc (temp, gen_int_mode (d - 1, mode));
3844   emit_label (label);
3845   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3846 }
3847 \f
3848 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3849    if that is convenient, and returning where the result is.
3850    You may request either the quotient or the remainder as the result;
3851    specify REM_FLAG nonzero to get the remainder.
3852
3853    CODE is the expression code for which kind of division this is;
3854    it controls how rounding is done.  MODE is the machine mode to use.
3855    UNSIGNEDP nonzero means do unsigned division.  */
3856
3857 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3858    and then correct it by or'ing in missing high bits
3859    if result of ANDI is nonzero.
3860    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3861    This could optimize to a bfexts instruction.
3862    But C doesn't use these operations, so their optimizations are
3863    left for later.  */
3864 /* ??? For modulo, we don't actually need the highpart of the first product,
3865    the low part will do nicely.  And for small divisors, the second multiply
3866    can also be a low-part only multiply or even be completely left out.
3867    E.g. to calculate the remainder of a division by 3 with a 32 bit
3868    multiply, multiply with 0x55555556 and extract the upper two bits;
3869    the result is exact for inputs up to 0x1fffffff.
3870    The input range can be reduced by using cross-sum rules.
3871    For odd divisors >= 3, the following table gives right shift counts
3872    so that if a number is shifted by an integer multiple of the given
3873    amount, the remainder stays the same:
3874    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3875    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3876    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3877    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3878    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3879
3880    Cross-sum rules for even numbers can be derived by leaving as many bits
3881    to the right alone as the divisor has zeros to the right.
3882    E.g. if x is an unsigned 32 bit number:
3883    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3884    */
3885
3886 rtx
3887 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3888                rtx op0, rtx op1, rtx target, int unsignedp)
3889 {
3890   machine_mode compute_mode;
3891   rtx tquotient;
3892   rtx quotient = 0, remainder = 0;
3893   rtx_insn *last;
3894   int size;
3895   rtx_insn *insn;
3896   optab optab1, optab2;
3897   int op1_is_constant, op1_is_pow2 = 0;
3898   int max_cost, extra_cost;
3899   static HOST_WIDE_INT last_div_const = 0;
3900   bool speed = optimize_insn_for_speed_p ();
3901
3902   op1_is_constant = CONST_INT_P (op1);
3903   if (op1_is_constant)
3904     {
3905       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3906       if (unsignedp)
3907         ext_op1 &= GET_MODE_MASK (mode);
3908       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3909                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3910     }
3911
3912   /*
3913      This is the structure of expand_divmod:
3914
3915      First comes code to fix up the operands so we can perform the operations
3916      correctly and efficiently.
3917
3918      Second comes a switch statement with code specific for each rounding mode.
3919      For some special operands this code emits all RTL for the desired
3920      operation, for other cases, it generates only a quotient and stores it in
3921      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3922      to indicate that it has not done anything.
3923
3924      Last comes code that finishes the operation.  If QUOTIENT is set and
3925      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3926      QUOTIENT is not set, it is computed using trunc rounding.
3927
3928      We try to generate special code for division and remainder when OP1 is a
3929      constant.  If |OP1| = 2**n we can use shifts and some other fast
3930      operations.  For other values of OP1, we compute a carefully selected
3931      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3932      by m.
3933
3934      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3935      half of the product.  Different strategies for generating the product are
3936      implemented in expmed_mult_highpart.
3937
3938      If what we actually want is the remainder, we generate that by another
3939      by-constant multiplication and a subtraction.  */
3940
3941   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3942      code below will malfunction if we are, so check here and handle
3943      the special case if so.  */
3944   if (op1 == const1_rtx)
3945     return rem_flag ? const0_rtx : op0;
3946
3947     /* When dividing by -1, we could get an overflow.
3948      negv_optab can handle overflows.  */
3949   if (! unsignedp && op1 == constm1_rtx)
3950     {
3951       if (rem_flag)
3952         return const0_rtx;
3953       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3954                           ? negv_optab : neg_optab, op0, target, 0);
3955     }
3956
3957   if (target
3958       /* Don't use the function value register as a target
3959          since we have to read it as well as write it,
3960          and function-inlining gets confused by this.  */
3961       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3962           /* Don't clobber an operand while doing a multi-step calculation.  */
3963           || ((rem_flag || op1_is_constant)
3964               && (reg_mentioned_p (target, op0)
3965                   || (MEM_P (op0) && MEM_P (target))))
3966           || reg_mentioned_p (target, op1)
3967           || (MEM_P (op1) && MEM_P (target))))
3968     target = 0;
3969
3970   /* Get the mode in which to perform this computation.  Normally it will
3971      be MODE, but sometimes we can't do the desired operation in MODE.
3972      If so, pick a wider mode in which we can do the operation.  Convert
3973      to that mode at the start to avoid repeated conversions.
3974
3975      First see what operations we need.  These depend on the expression
3976      we are evaluating.  (We assume that divxx3 insns exist under the
3977      same conditions that modxx3 insns and that these insns don't normally
3978      fail.  If these assumptions are not correct, we may generate less
3979      efficient code in some cases.)
3980
3981      Then see if we find a mode in which we can open-code that operation
3982      (either a division, modulus, or shift).  Finally, check for the smallest
3983      mode for which we can do the operation with a library call.  */
3984
3985   /* We might want to refine this now that we have division-by-constant
3986      optimization.  Since expmed_mult_highpart tries so many variants, it is
3987      not straightforward to generalize this.  Maybe we should make an array
3988      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3989
3990   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3991             ? (unsignedp ? lshr_optab : ashr_optab)
3992             : (unsignedp ? udiv_optab : sdiv_optab));
3993   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3994             ? optab1
3995             : (unsignedp ? udivmod_optab : sdivmod_optab));
3996
3997   for (compute_mode = mode; compute_mode != VOIDmode;
3998        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3999     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4000         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4001       break;
4002
4003   if (compute_mode == VOIDmode)
4004     for (compute_mode = mode; compute_mode != VOIDmode;
4005          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4006       if (optab_libfunc (optab1, compute_mode)
4007           || optab_libfunc (optab2, compute_mode))
4008         break;
4009
4010   /* If we still couldn't find a mode, use MODE, but expand_binop will
4011      probably die.  */
4012   if (compute_mode == VOIDmode)
4013     compute_mode = mode;
4014
4015   if (target && GET_MODE (target) == compute_mode)
4016     tquotient = target;
4017   else
4018     tquotient = gen_reg_rtx (compute_mode);
4019
4020   size = GET_MODE_BITSIZE (compute_mode);
4021 #if 0
4022   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4023      (mode), and thereby get better code when OP1 is a constant.  Do that
4024      later.  It will require going over all usages of SIZE below.  */
4025   size = GET_MODE_BITSIZE (mode);
4026 #endif
4027
4028   /* Only deduct something for a REM if the last divide done was
4029      for a different constant.   Then set the constant of the last
4030      divide.  */
4031   max_cost = (unsignedp
4032               ? udiv_cost (speed, compute_mode)
4033               : sdiv_cost (speed, compute_mode));
4034   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4035                      && INTVAL (op1) == last_div_const))
4036     max_cost -= (mul_cost (speed, compute_mode)
4037                  + add_cost (speed, compute_mode));
4038
4039   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4040
4041   /* Now convert to the best mode to use.  */
4042   if (compute_mode != mode)
4043     {
4044       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4045       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4046
4047       /* convert_modes may have placed op1 into a register, so we
4048          must recompute the following.  */
4049       op1_is_constant = CONST_INT_P (op1);
4050       op1_is_pow2 = (op1_is_constant
4051                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4052                           || (! unsignedp
4053                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4054     }
4055
4056   /* If one of the operands is a volatile MEM, copy it into a register.  */
4057
4058   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4059     op0 = force_reg (compute_mode, op0);
4060   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4061     op1 = force_reg (compute_mode, op1);
4062
4063   /* If we need the remainder or if OP1 is constant, we need to
4064      put OP0 in a register in case it has any queued subexpressions.  */
4065   if (rem_flag || op1_is_constant)
4066     op0 = force_reg (compute_mode, op0);
4067
4068   last = get_last_insn ();
4069
4070   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4071   if (unsignedp)
4072     {
4073       if (code == FLOOR_DIV_EXPR)
4074         code = TRUNC_DIV_EXPR;
4075       if (code == FLOOR_MOD_EXPR)
4076         code = TRUNC_MOD_EXPR;
4077       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4078         code = TRUNC_DIV_EXPR;
4079     }
4080
4081   if (op1 != const0_rtx)
4082     switch (code)
4083       {
4084       case TRUNC_MOD_EXPR:
4085       case TRUNC_DIV_EXPR:
4086         if (op1_is_constant)
4087           {
4088             if (unsignedp)
4089               {
4090                 unsigned HOST_WIDE_INT mh, ml;
4091                 int pre_shift, post_shift;
4092                 int dummy;
4093                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4094                                             & GET_MODE_MASK (compute_mode));
4095
4096                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4097                   {
4098                     pre_shift = floor_log2 (d);
4099                     if (rem_flag)
4100                       {
4101                         unsigned HOST_WIDE_INT mask
4102                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4103                         remainder
4104                           = expand_binop (compute_mode, and_optab, op0,
4105                                           gen_int_mode (mask, compute_mode),
4106                                           remainder, 1,
4107                                           OPTAB_LIB_WIDEN);
4108                         if (remainder)
4109                           return gen_lowpart (mode, remainder);
4110                       }
4111                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4112                                              pre_shift, tquotient, 1);
4113                   }
4114                 else if (size <= HOST_BITS_PER_WIDE_INT)
4115                   {
4116                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4117                       {
4118                         /* Most significant bit of divisor is set; emit an scc
4119                            insn.  */
4120                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4121                                                           compute_mode, 1, 1);
4122                       }
4123                     else
4124                       {
4125                         /* Find a suitable multiplier and right shift count
4126                            instead of multiplying with D.  */
4127
4128                         mh = choose_multiplier (d, size, size,
4129                                                 &ml, &post_shift, &dummy);
4130
4131                         /* If the suggested multiplier is more than SIZE bits,
4132                            we can do better for even divisors, using an
4133                            initial right shift.  */
4134                         if (mh != 0 && (d & 1) == 0)
4135                           {
4136                             pre_shift = floor_log2 (d & -d);
4137                             mh = choose_multiplier (d >> pre_shift, size,
4138                                                     size - pre_shift,
4139                                                     &ml, &post_shift, &dummy);
4140                             gcc_assert (!mh);
4141                           }
4142                         else
4143                           pre_shift = 0;
4144
4145                         if (mh != 0)
4146                           {
4147                             rtx t1, t2, t3, t4;
4148
4149                             if (post_shift - 1 >= BITS_PER_WORD)
4150                               goto fail1;
4151
4152                             extra_cost
4153                               = (shift_cost (speed, compute_mode, post_shift - 1)
4154                                  + shift_cost (speed, compute_mode, 1)
4155                                  + 2 * add_cost (speed, compute_mode));
4156                             t1 = expmed_mult_highpart
4157                               (compute_mode, op0,
4158                                gen_int_mode (ml, compute_mode),
4159                                NULL_RTX, 1, max_cost - extra_cost);
4160                             if (t1 == 0)
4161                               goto fail1;
4162                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4163                                                                op0, t1),
4164                                                 NULL_RTX);
4165                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4166                                                t2, 1, NULL_RTX, 1);
4167                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4168                                                               t1, t3),
4169                                                 NULL_RTX);
4170                             quotient = expand_shift
4171                               (RSHIFT_EXPR, compute_mode, t4,
4172                                post_shift - 1, tquotient, 1);
4173                           }
4174                         else
4175                           {
4176                             rtx t1, t2;
4177
4178                             if (pre_shift >= BITS_PER_WORD
4179                                 || post_shift >= BITS_PER_WORD)
4180                               goto fail1;
4181
4182                             t1 = expand_shift
4183                               (RSHIFT_EXPR, compute_mode, op0,
4184                                pre_shift, NULL_RTX, 1);
4185                             extra_cost
4186                               = (shift_cost (speed, compute_mode, pre_shift)
4187                                  + shift_cost (speed, compute_mode, post_shift));
4188                             t2 = expmed_mult_highpart
4189                               (compute_mode, t1,
4190                                gen_int_mode (ml, compute_mode),
4191                                NULL_RTX, 1, max_cost - extra_cost);
4192                             if (t2 == 0)
4193                               goto fail1;
4194                             quotient = expand_shift
4195                               (RSHIFT_EXPR, compute_mode, t2,
4196                                post_shift, tquotient, 1);
4197                           }
4198                       }
4199                   }
4200                 else            /* Too wide mode to use tricky code */
4201                   break;
4202
4203                 insn = get_last_insn ();
4204                 if (insn != last)
4205                   set_dst_reg_note (insn, REG_EQUAL,
4206                                     gen_rtx_UDIV (compute_mode, op0, op1),
4207                                     quotient);
4208               }
4209             else                /* TRUNC_DIV, signed */
4210               {
4211                 unsigned HOST_WIDE_INT ml;
4212                 int lgup, post_shift;
4213                 rtx mlr;
4214                 HOST_WIDE_INT d = INTVAL (op1);
4215                 unsigned HOST_WIDE_INT abs_d;
4216
4217                 /* Since d might be INT_MIN, we have to cast to
4218                    unsigned HOST_WIDE_INT before negating to avoid
4219                    undefined signed overflow.  */
4220                 abs_d = (d >= 0
4221                          ? (unsigned HOST_WIDE_INT) d
4222                          : - (unsigned HOST_WIDE_INT) d);
4223
4224                 /* n rem d = n rem -d */
4225                 if (rem_flag && d < 0)
4226                   {
4227                     d = abs_d;
4228                     op1 = gen_int_mode (abs_d, compute_mode);
4229                   }
4230
4231                 if (d == 1)
4232                   quotient = op0;
4233                 else if (d == -1)
4234                   quotient = expand_unop (compute_mode, neg_optab, op0,
4235                                           tquotient, 0);
4236                 else if (HOST_BITS_PER_WIDE_INT >= size
4237                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4238                   {
4239                     /* This case is not handled correctly below.  */
4240                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4241                                                 compute_mode, 1, 1);
4242                     if (quotient == 0)
4243                       goto fail1;
4244                   }
4245                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4246                          && (rem_flag
4247                              ? smod_pow2_cheap (speed, compute_mode)
4248                              : sdiv_pow2_cheap (speed, compute_mode))
4249                          /* We assume that cheap metric is true if the
4250                             optab has an expander for this mode.  */
4251                          && ((optab_handler ((rem_flag ? smod_optab
4252                                               : sdiv_optab),
4253                                              compute_mode)
4254                               != CODE_FOR_nothing)
4255                              || (optab_handler (sdivmod_optab,
4256                                                 compute_mode)
4257                                  != CODE_FOR_nothing)))
4258                   ;
4259                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4260                   {
4261                     if (rem_flag)
4262                       {
4263                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4264                         if (remainder)
4265                           return gen_lowpart (mode, remainder);
4266                       }
4267
4268                     if (sdiv_pow2_cheap (speed, compute_mode)
4269                         && ((optab_handler (sdiv_optab, compute_mode)
4270                              != CODE_FOR_nothing)
4271                             || (optab_handler (sdivmod_optab, compute_mode)
4272                                 != CODE_FOR_nothing)))
4273                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4274                                                 compute_mode, op0,
4275                                                 gen_int_mode (abs_d,
4276                                                               compute_mode),
4277                                                 NULL_RTX, 0);
4278                     else
4279                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4280
4281                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4282                        negate the quotient.  */
4283                     if (d < 0)
4284                       {
4285                         insn = get_last_insn ();
4286                         if (insn != last
4287                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4288                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4289                           set_dst_reg_note (insn, REG_EQUAL,
4290                                             gen_rtx_DIV (compute_mode, op0,
4291                                                          gen_int_mode
4292                                                            (abs_d,
4293                                                             compute_mode)),
4294                                             quotient);
4295
4296                         quotient = expand_unop (compute_mode, neg_optab,
4297                                                 quotient, quotient, 0);
4298                       }
4299                   }
4300                 else if (size <= HOST_BITS_PER_WIDE_INT)
4301                   {
4302                     choose_multiplier (abs_d, size, size - 1,
4303                                        &ml, &post_shift, &lgup);
4304                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4305                       {
4306                         rtx t1, t2, t3;
4307
4308                         if (post_shift >= BITS_PER_WORD
4309                             || size - 1 >= BITS_PER_WORD)
4310                           goto fail1;
4311
4312                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4313                                       + shift_cost (speed, compute_mode, size - 1)
4314                                       + add_cost (speed, compute_mode));
4315                         t1 = expmed_mult_highpart
4316                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4317                            NULL_RTX, 0, max_cost - extra_cost);
4318                         if (t1 == 0)
4319                           goto fail1;
4320                         t2 = expand_shift
4321                           (RSHIFT_EXPR, compute_mode, t1,
4322                            post_shift, NULL_RTX, 0);
4323                         t3 = expand_shift
4324                           (RSHIFT_EXPR, compute_mode, op0,
4325                            size - 1, NULL_RTX, 0);
4326                         if (d < 0)
4327                           quotient
4328                             = force_operand (gen_rtx_MINUS (compute_mode,
4329                                                             t3, t2),
4330                                              tquotient);
4331                         else
4332                           quotient
4333                             = force_operand (gen_rtx_MINUS (compute_mode,
4334                                                             t2, t3),
4335                                              tquotient);
4336                       }
4337                     else
4338                       {
4339                         rtx t1, t2, t3, t4;
4340
4341                         if (post_shift >= BITS_PER_WORD
4342                             || size - 1 >= BITS_PER_WORD)
4343                           goto fail1;
4344
4345                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4346                         mlr = gen_int_mode (ml, compute_mode);
4347                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4348                                       + shift_cost (speed, compute_mode, size - 1)
4349                                       + 2 * add_cost (speed, compute_mode));
4350                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4351                                                    NULL_RTX, 0,
4352                                                    max_cost - extra_cost);
4353                         if (t1 == 0)
4354                           goto fail1;
4355                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4356                                                           t1, op0),
4357                                             NULL_RTX);
4358                         t3 = expand_shift
4359                           (RSHIFT_EXPR, compute_mode, t2,
4360                            post_shift, NULL_RTX, 0);
4361                         t4 = expand_shift
4362                           (RSHIFT_EXPR, compute_mode, op0,
4363                            size - 1, NULL_RTX, 0);
4364                         if (d < 0)
4365                           quotient
4366                             = force_operand (gen_rtx_MINUS (compute_mode,
4367                                                             t4, t3),
4368                                              tquotient);
4369                         else
4370                           quotient
4371                             = force_operand (gen_rtx_MINUS (compute_mode,
4372                                                             t3, t4),
4373                                              tquotient);
4374                       }
4375                   }
4376                 else            /* Too wide mode to use tricky code */
4377                   break;
4378
4379                 insn = get_last_insn ();
4380                 if (insn != last)
4381                   set_dst_reg_note (insn, REG_EQUAL,
4382                                     gen_rtx_DIV (compute_mode, op0, op1),
4383                                     quotient);
4384               }
4385             break;
4386           }
4387       fail1:
4388         delete_insns_since (last);
4389         break;
4390
4391       case FLOOR_DIV_EXPR:
4392       case FLOOR_MOD_EXPR:
4393       /* We will come here only for signed operations.  */
4394         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4395           {
4396             unsigned HOST_WIDE_INT mh, ml;
4397             int pre_shift, lgup, post_shift;
4398             HOST_WIDE_INT d = INTVAL (op1);
4399
4400             if (d > 0)
4401               {
4402                 /* We could just as easily deal with negative constants here,
4403                    but it does not seem worth the trouble for GCC 2.6.  */
4404                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4405                   {
4406                     pre_shift = floor_log2 (d);
4407                     if (rem_flag)
4408                       {
4409                         unsigned HOST_WIDE_INT mask
4410                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4411                         remainder = expand_binop
4412                           (compute_mode, and_optab, op0,
4413                            gen_int_mode (mask, compute_mode),
4414                            remainder, 0, OPTAB_LIB_WIDEN);
4415                         if (remainder)
4416                           return gen_lowpart (mode, remainder);
4417                       }
4418                     quotient = expand_shift
4419                       (RSHIFT_EXPR, compute_mode, op0,
4420                        pre_shift, tquotient, 0);
4421                   }
4422                 else
4423                   {
4424                     rtx t1, t2, t3, t4;
4425
4426                     mh = choose_multiplier (d, size, size - 1,
4427                                             &ml, &post_shift, &lgup);
4428                     gcc_assert (!mh);
4429
4430                     if (post_shift < BITS_PER_WORD
4431                         && size - 1 < BITS_PER_WORD)
4432                       {
4433                         t1 = expand_shift
4434                           (RSHIFT_EXPR, compute_mode, op0,
4435                            size - 1, NULL_RTX, 0);
4436                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4437                                            NULL_RTX, 0, OPTAB_WIDEN);
4438                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4439                                       + shift_cost (speed, compute_mode, size - 1)
4440                                       + 2 * add_cost (speed, compute_mode));
4441                         t3 = expmed_mult_highpart
4442                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4443                            NULL_RTX, 1, max_cost - extra_cost);
4444                         if (t3 != 0)
4445                           {
4446                             t4 = expand_shift
4447                               (RSHIFT_EXPR, compute_mode, t3,
4448                                post_shift, NULL_RTX, 1);
4449                             quotient = expand_binop (compute_mode, xor_optab,
4450                                                      t4, t1, tquotient, 0,
4451                                                      OPTAB_WIDEN);
4452                           }
4453                       }
4454                   }
4455               }
4456             else
4457               {
4458                 rtx nsign, t1, t2, t3, t4;
4459                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4460                                                   op0, constm1_rtx), NULL_RTX);
4461                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4462                                    0, OPTAB_WIDEN);
4463                 nsign = expand_shift
4464                   (RSHIFT_EXPR, compute_mode, t2,
4465                    size - 1, NULL_RTX, 0);
4466                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4467                                     NULL_RTX);
4468                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4469                                     NULL_RTX, 0);
4470                 if (t4)
4471                   {
4472                     rtx t5;
4473                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4474                                       NULL_RTX, 0);
4475                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4476                                                             t4, t5),
4477                                               tquotient);
4478                   }
4479               }
4480           }
4481
4482         if (quotient != 0)
4483           break;
4484         delete_insns_since (last);
4485
4486         /* Try using an instruction that produces both the quotient and
4487            remainder, using truncation.  We can easily compensate the quotient
4488            or remainder to get floor rounding, once we have the remainder.
4489            Notice that we compute also the final remainder value here,
4490            and return the result right away.  */
4491         if (target == 0 || GET_MODE (target) != compute_mode)
4492           target = gen_reg_rtx (compute_mode);
4493
4494         if (rem_flag)
4495           {
4496             remainder
4497               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4498             quotient = gen_reg_rtx (compute_mode);
4499           }
4500         else
4501           {
4502             quotient
4503               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4504             remainder = gen_reg_rtx (compute_mode);
4505           }
4506
4507         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4508                                  quotient, remainder, 0))
4509           {
4510             /* This could be computed with a branch-less sequence.
4511                Save that for later.  */
4512             rtx tem;
4513             rtx_code_label *label = gen_label_rtx ();
4514             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4515             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4516                                 NULL_RTX, 0, OPTAB_WIDEN);
4517             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4518             expand_dec (quotient, const1_rtx);
4519             expand_inc (remainder, op1);
4520             emit_label (label);
4521             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4522           }
4523
4524         /* No luck with division elimination or divmod.  Have to do it
4525            by conditionally adjusting op0 *and* the result.  */
4526         {
4527           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4528           rtx adjusted_op0;
4529           rtx tem;
4530
4531           quotient = gen_reg_rtx (compute_mode);
4532           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4533           label1 = gen_label_rtx ();
4534           label2 = gen_label_rtx ();
4535           label3 = gen_label_rtx ();
4536           label4 = gen_label_rtx ();
4537           label5 = gen_label_rtx ();
4538           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4539           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4540           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4541                               quotient, 0, OPTAB_LIB_WIDEN);
4542           if (tem != quotient)
4543             emit_move_insn (quotient, tem);
4544           emit_jump_insn (targetm.gen_jump (label5));
4545           emit_barrier ();
4546           emit_label (label1);
4547           expand_inc (adjusted_op0, const1_rtx);
4548           emit_jump_insn (targetm.gen_jump (label4));
4549           emit_barrier ();
4550           emit_label (label2);
4551           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4552           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4553                               quotient, 0, OPTAB_LIB_WIDEN);
4554           if (tem != quotient)
4555             emit_move_insn (quotient, tem);
4556           emit_jump_insn (targetm.gen_jump (label5));
4557           emit_barrier ();
4558           emit_label (label3);
4559           expand_dec (adjusted_op0, const1_rtx);
4560           emit_label (label4);
4561           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4562                               quotient, 0, OPTAB_LIB_WIDEN);
4563           if (tem != quotient)
4564             emit_move_insn (quotient, tem);
4565           expand_dec (quotient, const1_rtx);
4566           emit_label (label5);
4567         }
4568         break;
4569
4570       case CEIL_DIV_EXPR:
4571       case CEIL_MOD_EXPR:
4572         if (unsignedp)
4573           {
4574             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4575               {
4576                 rtx t1, t2, t3;
4577                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4578                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4579                                    floor_log2 (d), tquotient, 1);
4580                 t2 = expand_binop (compute_mode, and_optab, op0,
4581                                    gen_int_mode (d - 1, compute_mode),
4582                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4583                 t3 = gen_reg_rtx (compute_mode);
4584                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4585                                       compute_mode, 1, 1);
4586                 if (t3 == 0)
4587                   {
4588                     rtx_code_label *lab;
4589                     lab = gen_label_rtx ();
4590                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4591                     expand_inc (t1, const1_rtx);
4592                     emit_label (lab);
4593                     quotient = t1;
4594                   }
4595                 else
4596                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4597                                                           t1, t3),
4598                                             tquotient);
4599                 break;
4600               }
4601
4602             /* Try using an instruction that produces both the quotient and
4603                remainder, using truncation.  We can easily compensate the
4604                quotient or remainder to get ceiling rounding, once we have the
4605                remainder.  Notice that we compute also the final remainder
4606                value here, and return the result right away.  */
4607             if (target == 0 || GET_MODE (target) != compute_mode)
4608               target = gen_reg_rtx (compute_mode);
4609
4610             if (rem_flag)
4611               {
4612                 remainder = (REG_P (target)
4613                              ? target : gen_reg_rtx (compute_mode));
4614                 quotient = gen_reg_rtx (compute_mode);
4615               }
4616             else
4617               {
4618                 quotient = (REG_P (target)
4619                             ? target : gen_reg_rtx (compute_mode));
4620                 remainder = gen_reg_rtx (compute_mode);
4621               }
4622
4623             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4624                                      remainder, 1))
4625               {
4626                 /* This could be computed with a branch-less sequence.
4627                    Save that for later.  */
4628                 rtx_code_label *label = gen_label_rtx ();
4629                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4630                                  compute_mode, label);
4631                 expand_inc (quotient, const1_rtx);
4632                 expand_dec (remainder, op1);
4633                 emit_label (label);
4634                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4635               }
4636
4637             /* No luck with division elimination or divmod.  Have to do it
4638                by conditionally adjusting op0 *and* the result.  */
4639             {
4640               rtx_code_label *label1, *label2;
4641               rtx adjusted_op0, tem;
4642
4643               quotient = gen_reg_rtx (compute_mode);
4644               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4645               label1 = gen_label_rtx ();
4646               label2 = gen_label_rtx ();
4647               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4648                                compute_mode, label1);
4649               emit_move_insn  (quotient, const0_rtx);
4650               emit_jump_insn (targetm.gen_jump (label2));
4651               emit_barrier ();
4652               emit_label (label1);
4653               expand_dec (adjusted_op0, const1_rtx);
4654               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4655                                   quotient, 1, OPTAB_LIB_WIDEN);
4656               if (tem != quotient)
4657                 emit_move_insn (quotient, tem);
4658               expand_inc (quotient, const1_rtx);
4659               emit_label (label2);
4660             }
4661           }
4662         else /* signed */
4663           {
4664             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4665                 && INTVAL (op1) >= 0)
4666               {
4667                 /* This is extremely similar to the code for the unsigned case
4668                    above.  For 2.7 we should merge these variants, but for
4669                    2.6.1 I don't want to touch the code for unsigned since that
4670                    get used in C.  The signed case will only be used by other
4671                    languages (Ada).  */
4672
4673                 rtx t1, t2, t3;
4674                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4675                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4676                                    floor_log2 (d), tquotient, 0);
4677                 t2 = expand_binop (compute_mode, and_optab, op0,
4678                                    gen_int_mode (d - 1, compute_mode),
4679                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4680                 t3 = gen_reg_rtx (compute_mode);
4681                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4682                                       compute_mode, 1, 1);
4683                 if (t3 == 0)
4684                   {
4685                     rtx_code_label *lab;
4686                     lab = gen_label_rtx ();
4687                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4688                     expand_inc (t1, const1_rtx);
4689                     emit_label (lab);
4690                     quotient = t1;
4691                   }
4692                 else
4693                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4694                                                           t1, t3),
4695                                             tquotient);
4696                 break;
4697               }
4698
4699             /* Try using an instruction that produces both the quotient and
4700                remainder, using truncation.  We can easily compensate the
4701                quotient or remainder to get ceiling rounding, once we have the
4702                remainder.  Notice that we compute also the final remainder
4703                value here, and return the result right away.  */
4704             if (target == 0 || GET_MODE (target) != compute_mode)
4705               target = gen_reg_rtx (compute_mode);
4706             if (rem_flag)
4707               {
4708                 remainder= (REG_P (target)
4709                             ? target : gen_reg_rtx (compute_mode));
4710                 quotient = gen_reg_rtx (compute_mode);
4711               }
4712             else
4713               {
4714                 quotient = (REG_P (target)
4715                             ? target : gen_reg_rtx (compute_mode));
4716                 remainder = gen_reg_rtx (compute_mode);
4717               }
4718
4719             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4720                                      remainder, 0))
4721               {
4722                 /* This could be computed with a branch-less sequence.
4723                    Save that for later.  */
4724                 rtx tem;
4725                 rtx_code_label *label = gen_label_rtx ();
4726                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4727                                  compute_mode, label);
4728                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4729                                     NULL_RTX, 0, OPTAB_WIDEN);
4730                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4731                 expand_inc (quotient, const1_rtx);
4732                 expand_dec (remainder, op1);
4733                 emit_label (label);
4734                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4735               }
4736
4737             /* No luck with division elimination or divmod.  Have to do it
4738                by conditionally adjusting op0 *and* the result.  */
4739             {
4740               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4741               rtx adjusted_op0;
4742               rtx tem;
4743
4744               quotient = gen_reg_rtx (compute_mode);
4745               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4746               label1 = gen_label_rtx ();
4747               label2 = gen_label_rtx ();
4748               label3 = gen_label_rtx ();
4749               label4 = gen_label_rtx ();
4750               label5 = gen_label_rtx ();
4751               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4752               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4753                                compute_mode, label1);
4754               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4755                                   quotient, 0, OPTAB_LIB_WIDEN);
4756               if (tem != quotient)
4757                 emit_move_insn (quotient, tem);
4758               emit_jump_insn (targetm.gen_jump (label5));
4759               emit_barrier ();
4760               emit_label (label1);
4761               expand_dec (adjusted_op0, const1_rtx);
4762               emit_jump_insn (targetm.gen_jump (label4));
4763               emit_barrier ();
4764               emit_label (label2);
4765               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4766                                compute_mode, label3);
4767               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4768                                   quotient, 0, OPTAB_LIB_WIDEN);
4769               if (tem != quotient)
4770                 emit_move_insn (quotient, tem);
4771               emit_jump_insn (targetm.gen_jump (label5));
4772               emit_barrier ();
4773               emit_label (label3);
4774               expand_inc (adjusted_op0, const1_rtx);
4775               emit_label (label4);
4776               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4777                                   quotient, 0, OPTAB_LIB_WIDEN);
4778               if (tem != quotient)
4779                 emit_move_insn (quotient, tem);
4780               expand_inc (quotient, const1_rtx);
4781               emit_label (label5);
4782             }
4783           }
4784         break;
4785
4786       case EXACT_DIV_EXPR:
4787         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4788           {
4789             HOST_WIDE_INT d = INTVAL (op1);
4790             unsigned HOST_WIDE_INT ml;
4791             int pre_shift;
4792             rtx t1;
4793
4794             pre_shift = floor_log2 (d & -d);
4795             ml = invert_mod2n (d >> pre_shift, size);
4796             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4797                                pre_shift, NULL_RTX, unsignedp);
4798             quotient = expand_mult (compute_mode, t1,
4799                                     gen_int_mode (ml, compute_mode),
4800                                     NULL_RTX, 1);
4801
4802             insn = get_last_insn ();
4803             set_dst_reg_note (insn, REG_EQUAL,
4804                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4805                                               compute_mode, op0, op1),
4806                               quotient);
4807           }
4808         break;
4809
4810       case ROUND_DIV_EXPR:
4811       case ROUND_MOD_EXPR:
4812         if (unsignedp)
4813           {
4814             rtx tem;
4815             rtx_code_label *label;
4816             label = gen_label_rtx ();
4817             quotient = gen_reg_rtx (compute_mode);
4818             remainder = gen_reg_rtx (compute_mode);
4819             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4820               {
4821                 rtx tem;
4822                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4823                                          quotient, 1, OPTAB_LIB_WIDEN);
4824                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4825                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4826                                           remainder, 1, OPTAB_LIB_WIDEN);
4827               }
4828             tem = plus_constant (compute_mode, op1, -1);
4829             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4830             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4831             expand_inc (quotient, const1_rtx);
4832             expand_dec (remainder, op1);
4833             emit_label (label);
4834           }
4835         else
4836           {
4837             rtx abs_rem, abs_op1, tem, mask;
4838             rtx_code_label *label;
4839             label = gen_label_rtx ();
4840             quotient = gen_reg_rtx (compute_mode);
4841             remainder = gen_reg_rtx (compute_mode);
4842             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4843               {
4844                 rtx tem;
4845                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4846                                          quotient, 0, OPTAB_LIB_WIDEN);
4847                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4848                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4849                                           remainder, 0, OPTAB_LIB_WIDEN);
4850               }
4851             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4852             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4853             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4854                                 1, NULL_RTX, 1);
4855             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4856             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4857                                 NULL_RTX, 0, OPTAB_WIDEN);
4858             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4859                                  size - 1, NULL_RTX, 0);
4860             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4861                                 NULL_RTX, 0, OPTAB_WIDEN);
4862             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4863                                 NULL_RTX, 0, OPTAB_WIDEN);
4864             expand_inc (quotient, tem);
4865             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4866                                 NULL_RTX, 0, OPTAB_WIDEN);
4867             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4868                                 NULL_RTX, 0, OPTAB_WIDEN);
4869             expand_dec (remainder, tem);
4870             emit_label (label);
4871           }
4872         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4873
4874       default:
4875         gcc_unreachable ();
4876       }
4877
4878   if (quotient == 0)
4879     {
4880       if (target && GET_MODE (target) != compute_mode)
4881         target = 0;
4882
4883       if (rem_flag)
4884         {
4885           /* Try to produce the remainder without producing the quotient.
4886              If we seem to have a divmod pattern that does not require widening,
4887              don't try widening here.  We should really have a WIDEN argument
4888              to expand_twoval_binop, since what we'd really like to do here is
4889              1) try a mod insn in compute_mode
4890              2) try a divmod insn in compute_mode
4891              3) try a div insn in compute_mode and multiply-subtract to get
4892                 remainder
4893              4) try the same things with widening allowed.  */
4894           remainder
4895             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4896                                  op0, op1, target,
4897                                  unsignedp,
4898                                  ((optab_handler (optab2, compute_mode)
4899                                    != CODE_FOR_nothing)
4900                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4901           if (remainder == 0)
4902             {
4903               /* No luck there.  Can we do remainder and divide at once
4904                  without a library call?  */
4905               remainder = gen_reg_rtx (compute_mode);
4906               if (! expand_twoval_binop ((unsignedp
4907                                           ? udivmod_optab
4908                                           : sdivmod_optab),
4909                                          op0, op1,
4910                                          NULL_RTX, remainder, unsignedp))
4911                 remainder = 0;
4912             }
4913
4914           if (remainder)
4915             return gen_lowpart (mode, remainder);
4916         }
4917
4918       /* Produce the quotient.  Try a quotient insn, but not a library call.
4919          If we have a divmod in this mode, use it in preference to widening
4920          the div (for this test we assume it will not fail). Note that optab2
4921          is set to the one of the two optabs that the call below will use.  */
4922       quotient
4923         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4924                              op0, op1, rem_flag ? NULL_RTX : target,
4925                              unsignedp,
4926                              ((optab_handler (optab2, compute_mode)
4927                                != CODE_FOR_nothing)
4928                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4929
4930       if (quotient == 0)
4931         {
4932           /* No luck there.  Try a quotient-and-remainder insn,
4933              keeping the quotient alone.  */
4934           quotient = gen_reg_rtx (compute_mode);
4935           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4936                                      op0, op1,
4937                                      quotient, NULL_RTX, unsignedp))
4938             {
4939               quotient = 0;
4940               if (! rem_flag)
4941                 /* Still no luck.  If we are not computing the remainder,
4942                    use a library call for the quotient.  */
4943                 quotient = sign_expand_binop (compute_mode,
4944                                               udiv_optab, sdiv_optab,
4945                                               op0, op1, target,
4946                                               unsignedp, OPTAB_LIB_WIDEN);
4947             }
4948         }
4949     }
4950
4951   if (rem_flag)
4952     {
4953       if (target && GET_MODE (target) != compute_mode)
4954         target = 0;
4955
4956       if (quotient == 0)
4957         {
4958           /* No divide instruction either.  Use library for remainder.  */
4959           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4960                                          op0, op1, target,
4961                                          unsignedp, OPTAB_LIB_WIDEN);
4962           /* No remainder function.  Try a quotient-and-remainder
4963              function, keeping the remainder.  */
4964           if (!remainder)
4965             {
4966               remainder = gen_reg_rtx (compute_mode);
4967               if (!expand_twoval_binop_libfunc
4968                   (unsignedp ? udivmod_optab : sdivmod_optab,
4969                    op0, op1,
4970                    NULL_RTX, remainder,
4971                    unsignedp ? UMOD : MOD))
4972                 remainder = NULL_RTX;
4973             }
4974         }
4975       else
4976         {
4977           /* We divided.  Now finish doing X - Y * (X / Y).  */
4978           remainder = expand_mult (compute_mode, quotient, op1,
4979                                    NULL_RTX, unsignedp);
4980           remainder = expand_binop (compute_mode, sub_optab, op0,
4981                                     remainder, target, unsignedp,
4982                                     OPTAB_LIB_WIDEN);
4983         }
4984     }
4985
4986   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4987 }
4988 \f
4989 /* Return a tree node with data type TYPE, describing the value of X.
4990    Usually this is an VAR_DECL, if there is no obvious better choice.
4991    X may be an expression, however we only support those expressions
4992    generated by loop.c.  */
4993
4994 tree
4995 make_tree (tree type, rtx x)
4996 {
4997   tree t;
4998
4999   switch (GET_CODE (x))
5000     {
5001     case CONST_INT:
5002     case CONST_WIDE_INT:
5003       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5004       return t;
5005
5006     case CONST_DOUBLE:
5007       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5008       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5009         t = wide_int_to_tree (type,
5010                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5011                                                     HOST_BITS_PER_WIDE_INT * 2));
5012       else
5013         {
5014           REAL_VALUE_TYPE d;
5015
5016           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5017           t = build_real (type, d);
5018         }
5019
5020       return t;
5021
5022     case CONST_VECTOR:
5023       {
5024         int units = CONST_VECTOR_NUNITS (x);
5025         tree itype = TREE_TYPE (type);
5026         tree *elts;
5027         int i;
5028
5029         /* Build a tree with vector elements.  */
5030         elts = XALLOCAVEC (tree, units);
5031         for (i = units - 1; i >= 0; --i)
5032           {
5033             rtx elt = CONST_VECTOR_ELT (x, i);
5034             elts[i] = make_tree (itype, elt);
5035           }
5036
5037         return build_vector (type, elts);
5038       }
5039
5040     case PLUS:
5041       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5042                           make_tree (type, XEXP (x, 1)));
5043
5044     case MINUS:
5045       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5046                           make_tree (type, XEXP (x, 1)));
5047
5048     case NEG:
5049       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5050
5051     case MULT:
5052       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5053                           make_tree (type, XEXP (x, 1)));
5054
5055     case ASHIFT:
5056       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5057                           make_tree (type, XEXP (x, 1)));
5058
5059     case LSHIFTRT:
5060       t = unsigned_type_for (type);
5061       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5062                                          make_tree (t, XEXP (x, 0)),
5063                                          make_tree (type, XEXP (x, 1))));
5064
5065     case ASHIFTRT:
5066       t = signed_type_for (type);
5067       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5068                                          make_tree (t, XEXP (x, 0)),
5069                                          make_tree (type, XEXP (x, 1))));
5070
5071     case DIV:
5072       if (TREE_CODE (type) != REAL_TYPE)
5073         t = signed_type_for (type);
5074       else
5075         t = type;
5076
5077       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5078                                          make_tree (t, XEXP (x, 0)),
5079                                          make_tree (t, XEXP (x, 1))));
5080     case UDIV:
5081       t = unsigned_type_for (type);
5082       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5083                                          make_tree (t, XEXP (x, 0)),
5084                                          make_tree (t, XEXP (x, 1))));
5085
5086     case SIGN_EXTEND:
5087     case ZERO_EXTEND:
5088       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5089                                           GET_CODE (x) == ZERO_EXTEND);
5090       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5091
5092     case CONST:
5093       return make_tree (type, XEXP (x, 0));
5094
5095     case SYMBOL_REF:
5096       t = SYMBOL_REF_DECL (x);
5097       if (t)
5098         return fold_convert (type, build_fold_addr_expr (t));
5099       /* else fall through.  */
5100
5101     default:
5102       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5103
5104       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5105          address mode to pointer mode.  */
5106       if (POINTER_TYPE_P (type))
5107         x = convert_memory_address_addr_space
5108               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5109
5110       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5111          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5112       t->decl_with_rtl.rtl = x;
5113
5114       return t;
5115     }
5116 }
5117 \f
5118 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5119    and returning TARGET.
5120
5121    If TARGET is 0, a pseudo-register or constant is returned.  */
5122
5123 rtx
5124 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5125 {
5126   rtx tem = 0;
5127
5128   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5129     tem = simplify_binary_operation (AND, mode, op0, op1);
5130   if (tem == 0)
5131     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5132
5133   if (target == 0)
5134     target = tem;
5135   else if (tem != target)
5136     emit_move_insn (target, tem);
5137   return target;
5138 }
5139
5140 /* Helper function for emit_store_flag.  */
5141 rtx
5142 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5143              machine_mode mode, machine_mode compare_mode,
5144              int unsignedp, rtx x, rtx y, int normalizep,
5145              machine_mode target_mode)
5146 {
5147   struct expand_operand ops[4];
5148   rtx op0, comparison, subtarget;
5149   rtx_insn *last;
5150   machine_mode result_mode = targetm.cstore_mode (icode);
5151
5152   last = get_last_insn ();
5153   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5154   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5155   if (!x || !y)
5156     {
5157       delete_insns_since (last);
5158       return NULL_RTX;
5159     }
5160
5161   if (target_mode == VOIDmode)
5162     target_mode = result_mode;
5163   if (!target)
5164     target = gen_reg_rtx (target_mode);
5165
5166   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5167
5168   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5169   create_fixed_operand (&ops[1], comparison);
5170   create_fixed_operand (&ops[2], x);
5171   create_fixed_operand (&ops[3], y);
5172   if (!maybe_expand_insn (icode, 4, ops))
5173     {
5174       delete_insns_since (last);
5175       return NULL_RTX;
5176     }
5177   subtarget = ops[0].value;
5178
5179   /* If we are converting to a wider mode, first convert to
5180      TARGET_MODE, then normalize.  This produces better combining
5181      opportunities on machines that have a SIGN_EXTRACT when we are
5182      testing a single bit.  This mostly benefits the 68k.
5183
5184      If STORE_FLAG_VALUE does not have the sign bit set when
5185      interpreted in MODE, we can do this conversion as unsigned, which
5186      is usually more efficient.  */
5187   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5188     {
5189       convert_move (target, subtarget,
5190                     val_signbit_known_clear_p (result_mode,
5191                                                STORE_FLAG_VALUE));
5192       op0 = target;
5193       result_mode = target_mode;
5194     }
5195   else
5196     op0 = subtarget;
5197
5198   /* If we want to keep subexpressions around, don't reuse our last
5199      target.  */
5200   if (optimize)
5201     subtarget = 0;
5202
5203   /* Now normalize to the proper value in MODE.  Sometimes we don't
5204      have to do anything.  */
5205   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5206     ;
5207   /* STORE_FLAG_VALUE might be the most negative number, so write
5208      the comparison this way to avoid a compiler-time warning.  */
5209   else if (- normalizep == STORE_FLAG_VALUE)
5210     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5211
5212   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5213      it hard to use a value of just the sign bit due to ANSI integer
5214      constant typing rules.  */
5215   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5216     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5217                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5218                         normalizep == 1);
5219   else
5220     {
5221       gcc_assert (STORE_FLAG_VALUE & 1);
5222
5223       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5224       if (normalizep == -1)
5225         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5226     }
5227
5228   /* If we were converting to a smaller mode, do the conversion now.  */
5229   if (target_mode != result_mode)
5230     {
5231       convert_move (target, op0, 0);
5232       return target;
5233     }
5234   else
5235     return op0;
5236 }
5237
5238
5239 /* A subroutine of emit_store_flag only including "tricks" that do not
5240    need a recursive call.  These are kept separate to avoid infinite
5241    loops.  */
5242
5243 static rtx
5244 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5245                    machine_mode mode, int unsignedp, int normalizep,
5246                    machine_mode target_mode)
5247 {
5248   rtx subtarget;
5249   enum insn_code icode;
5250   machine_mode compare_mode;
5251   enum mode_class mclass;
5252   enum rtx_code scode;
5253
5254   if (unsignedp)
5255     code = unsigned_condition (code);
5256   scode = swap_condition (code);
5257
5258   /* If one operand is constant, make it the second one.  Only do this
5259      if the other operand is not constant as well.  */
5260
5261   if (swap_commutative_operands_p (op0, op1))
5262     {
5263       std::swap (op0, op1);
5264       code = swap_condition (code);
5265     }
5266
5267   if (mode == VOIDmode)
5268     mode = GET_MODE (op0);
5269
5270   /* For some comparisons with 1 and -1, we can convert this to
5271      comparisons with zero.  This will often produce more opportunities for
5272      store-flag insns.  */
5273
5274   switch (code)
5275     {
5276     case LT:
5277       if (op1 == const1_rtx)
5278         op1 = const0_rtx, code = LE;
5279       break;
5280     case LE:
5281       if (op1 == constm1_rtx)
5282         op1 = const0_rtx, code = LT;
5283       break;
5284     case GE:
5285       if (op1 == const1_rtx)
5286         op1 = const0_rtx, code = GT;
5287       break;
5288     case GT:
5289       if (op1 == constm1_rtx)
5290         op1 = const0_rtx, code = GE;
5291       break;
5292     case GEU:
5293       if (op1 == const1_rtx)
5294         op1 = const0_rtx, code = NE;
5295       break;
5296     case LTU:
5297       if (op1 == const1_rtx)
5298         op1 = const0_rtx, code = EQ;
5299       break;
5300     default:
5301       break;
5302     }
5303
5304   /* If we are comparing a double-word integer with zero or -1, we can
5305      convert the comparison into one involving a single word.  */
5306   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5307       && GET_MODE_CLASS (mode) == MODE_INT
5308       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5309     {
5310       rtx tem;
5311       if ((code == EQ || code == NE)
5312           && (op1 == const0_rtx || op1 == constm1_rtx))
5313         {
5314           rtx op00, op01;
5315
5316           /* Do a logical OR or AND of the two words and compare the
5317              result.  */
5318           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5319           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5320           tem = expand_binop (word_mode,
5321                               op1 == const0_rtx ? ior_optab : and_optab,
5322                               op00, op01, NULL_RTX, unsignedp,
5323                               OPTAB_DIRECT);
5324
5325           if (tem != 0)
5326             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5327                                    unsignedp, normalizep);
5328         }
5329       else if ((code == LT || code == GE) && op1 == const0_rtx)
5330         {
5331           rtx op0h;
5332
5333           /* If testing the sign bit, can just test on high word.  */
5334           op0h = simplify_gen_subreg (word_mode, op0, mode,
5335                                       subreg_highpart_offset (word_mode,
5336                                                               mode));
5337           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5338                                  unsignedp, normalizep);
5339         }
5340       else
5341         tem = NULL_RTX;
5342
5343       if (tem)
5344         {
5345           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5346             return tem;
5347           if (!target)
5348             target = gen_reg_rtx (target_mode);
5349
5350           convert_move (target, tem,
5351                         !val_signbit_known_set_p (word_mode,
5352                                                   (normalizep ? normalizep
5353                                                    : STORE_FLAG_VALUE)));
5354           return target;
5355         }
5356     }
5357
5358   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5359      complement of A (for GE) and shifting the sign bit to the low bit.  */
5360   if (op1 == const0_rtx && (code == LT || code == GE)
5361       && GET_MODE_CLASS (mode) == MODE_INT
5362       && (normalizep || STORE_FLAG_VALUE == 1
5363           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5364     {
5365       subtarget = target;
5366
5367       if (!target)
5368         target_mode = mode;
5369
5370       /* If the result is to be wider than OP0, it is best to convert it
5371          first.  If it is to be narrower, it is *incorrect* to convert it
5372          first.  */
5373       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5374         {
5375           op0 = convert_modes (target_mode, mode, op0, 0);
5376           mode = target_mode;
5377         }
5378
5379       if (target_mode != mode)
5380         subtarget = 0;
5381
5382       if (code == GE)
5383         op0 = expand_unop (mode, one_cmpl_optab, op0,
5384                            ((STORE_FLAG_VALUE == 1 || normalizep)
5385                             ? 0 : subtarget), 0);
5386
5387       if (STORE_FLAG_VALUE == 1 || normalizep)
5388         /* If we are supposed to produce a 0/1 value, we want to do
5389            a logical shift from the sign bit to the low-order bit; for
5390            a -1/0 value, we do an arithmetic shift.  */
5391         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5392                             GET_MODE_BITSIZE (mode) - 1,
5393                             subtarget, normalizep != -1);
5394
5395       if (mode != target_mode)
5396         op0 = convert_modes (target_mode, mode, op0, 0);
5397
5398       return op0;
5399     }
5400
5401   mclass = GET_MODE_CLASS (mode);
5402   for (compare_mode = mode; compare_mode != VOIDmode;
5403        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5404     {
5405      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5406      icode = optab_handler (cstore_optab, optab_mode);
5407      if (icode != CODE_FOR_nothing)
5408         {
5409           do_pending_stack_adjust ();
5410           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5411                                  unsignedp, op0, op1, normalizep, target_mode);
5412           if (tem)
5413             return tem;
5414
5415           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5416             {
5417               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5418                                  unsignedp, op1, op0, normalizep, target_mode);
5419               if (tem)
5420                 return tem;
5421             }
5422           break;
5423         }
5424     }
5425
5426   return 0;
5427 }
5428
5429 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5430    and storing in TARGET.  Normally return TARGET.
5431    Return 0 if that cannot be done.
5432
5433    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5434    it is VOIDmode, they cannot both be CONST_INT.
5435
5436    UNSIGNEDP is for the case where we have to widen the operands
5437    to perform the operation.  It says to use zero-extension.
5438
5439    NORMALIZEP is 1 if we should convert the result to be either zero
5440    or one.  Normalize is -1 if we should convert the result to be
5441    either zero or -1.  If NORMALIZEP is zero, the result will be left
5442    "raw" out of the scc insn.  */
5443
5444 rtx
5445 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5446                  machine_mode mode, int unsignedp, int normalizep)
5447 {
5448   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5449   enum rtx_code rcode;
5450   rtx subtarget;
5451   rtx tem, trueval;
5452   rtx_insn *last;
5453
5454   /* If we compare constants, we shouldn't use a store-flag operation,
5455      but a constant load.  We can get there via the vanilla route that
5456      usually generates a compare-branch sequence, but will in this case
5457      fold the comparison to a constant, and thus elide the branch.  */
5458   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5459     return NULL_RTX;
5460
5461   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5462                            target_mode);
5463   if (tem)
5464     return tem;
5465
5466   /* If we reached here, we can't do this with a scc insn, however there
5467      are some comparisons that can be done in other ways.  Don't do any
5468      of these cases if branches are very cheap.  */
5469   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5470     return 0;
5471
5472   /* See what we need to return.  We can only return a 1, -1, or the
5473      sign bit.  */
5474
5475   if (normalizep == 0)
5476     {
5477       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5478         normalizep = STORE_FLAG_VALUE;
5479
5480       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5481         ;
5482       else
5483         return 0;
5484     }
5485
5486   last = get_last_insn ();
5487
5488   /* If optimizing, use different pseudo registers for each insn, instead
5489      of reusing the same pseudo.  This leads to better CSE, but slows
5490      down the compiler, since there are more pseudos */
5491   subtarget = (!optimize
5492                && (target_mode == mode)) ? target : NULL_RTX;
5493   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5494
5495   /* For floating-point comparisons, try the reverse comparison or try
5496      changing the "orderedness" of the comparison.  */
5497   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5498     {
5499       enum rtx_code first_code;
5500       bool and_them;
5501
5502       rcode = reverse_condition_maybe_unordered (code);
5503       if (can_compare_p (rcode, mode, ccp_store_flag)
5504           && (code == ORDERED || code == UNORDERED
5505               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5506               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5507         {
5508           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5509                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5510
5511           /* For the reverse comparison, use either an addition or a XOR.  */
5512           if (want_add
5513               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5514                            optimize_insn_for_speed_p ()) == 0)
5515             {
5516               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5517                                        STORE_FLAG_VALUE, target_mode);
5518               if (tem)
5519                 return expand_binop (target_mode, add_optab, tem,
5520                                      gen_int_mode (normalizep, target_mode),
5521                                      target, 0, OPTAB_WIDEN);
5522             }
5523           else if (!want_add
5524                    && rtx_cost (trueval, mode, XOR, 1,
5525                                 optimize_insn_for_speed_p ()) == 0)
5526             {
5527               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5528                                        normalizep, target_mode);
5529               if (tem)
5530                 return expand_binop (target_mode, xor_optab, tem, trueval,
5531                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5532             }
5533         }
5534
5535       delete_insns_since (last);
5536
5537       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5538       if (code == ORDERED || code == UNORDERED)
5539         return 0;
5540
5541       and_them = split_comparison (code, mode, &first_code, &code);
5542
5543       /* If there are no NaNs, the first comparison should always fall through.
5544          Effectively change the comparison to the other one.  */
5545       if (!HONOR_NANS (mode))
5546         {
5547           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5548           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5549                                     target_mode);
5550         }
5551
5552       if (!HAVE_conditional_move)
5553         return 0;
5554
5555       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5556          conditional move.  */
5557       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5558                                normalizep, target_mode);
5559       if (tem == 0)
5560         return 0;
5561
5562       if (and_them)
5563         tem = emit_conditional_move (target, code, op0, op1, mode,
5564                                      tem, const0_rtx, GET_MODE (tem), 0);
5565       else
5566         tem = emit_conditional_move (target, code, op0, op1, mode,
5567                                      trueval, tem, GET_MODE (tem), 0);
5568
5569       if (tem == 0)
5570         delete_insns_since (last);
5571       return tem;
5572     }
5573
5574   /* The remaining tricks only apply to integer comparisons.  */
5575
5576   if (GET_MODE_CLASS (mode) != MODE_INT)
5577     return 0;
5578
5579   /* If this is an equality comparison of integers, we can try to exclusive-or
5580      (or subtract) the two operands and use a recursive call to try the
5581      comparison with zero.  Don't do any of these cases if branches are
5582      very cheap.  */
5583
5584   if ((code == EQ || code == NE) && op1 != const0_rtx)
5585     {
5586       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5587                           OPTAB_WIDEN);
5588
5589       if (tem == 0)
5590         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5591                             OPTAB_WIDEN);
5592       if (tem != 0)
5593         tem = emit_store_flag (target, code, tem, const0_rtx,
5594                                mode, unsignedp, normalizep);
5595       if (tem != 0)
5596         return tem;
5597
5598       delete_insns_since (last);
5599     }
5600
5601   /* For integer comparisons, try the reverse comparison.  However, for
5602      small X and if we'd have anyway to extend, implementing "X != 0"
5603      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5604   rcode = reverse_condition (code);
5605   if (can_compare_p (rcode, mode, ccp_store_flag)
5606       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5607             && code == NE
5608             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5609             && op1 == const0_rtx))
5610     {
5611       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5612                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5613
5614       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5615       if (want_add
5616           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5617                        optimize_insn_for_speed_p ()) == 0)
5618         {
5619           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5620                                    STORE_FLAG_VALUE, target_mode);
5621           if (tem != 0)
5622             tem = expand_binop (target_mode, add_optab, tem,
5623                                 gen_int_mode (normalizep, target_mode),
5624                                 target, 0, OPTAB_WIDEN);
5625         }
5626       else if (!want_add
5627                && rtx_cost (trueval, mode, XOR, 1,
5628                             optimize_insn_for_speed_p ()) == 0)
5629         {
5630           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5631                                    normalizep, target_mode);
5632           if (tem != 0)
5633             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5634                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5635         }
5636
5637       if (tem != 0)
5638         return tem;
5639       delete_insns_since (last);
5640     }
5641
5642   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5643      the constant zero.  Reject all other comparisons at this point.  Only
5644      do LE and GT if branches are expensive since they are expensive on
5645      2-operand machines.  */
5646
5647   if (op1 != const0_rtx
5648       || (code != EQ && code != NE
5649           && (BRANCH_COST (optimize_insn_for_speed_p (),
5650                            false) <= 1 || (code != LE && code != GT))))
5651     return 0;
5652
5653   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5654      do the necessary operation below.  */
5655
5656   tem = 0;
5657
5658   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5659      the sign bit set.  */
5660
5661   if (code == LE)
5662     {
5663       /* This is destructive, so SUBTARGET can't be OP0.  */
5664       if (rtx_equal_p (subtarget, op0))
5665         subtarget = 0;
5666
5667       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5668                           OPTAB_WIDEN);
5669       if (tem)
5670         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5671                             OPTAB_WIDEN);
5672     }
5673
5674   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5675      number of bits in the mode of OP0, minus one.  */
5676
5677   if (code == GT)
5678     {
5679       if (rtx_equal_p (subtarget, op0))
5680         subtarget = 0;
5681
5682       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5683                           GET_MODE_BITSIZE (mode) - 1,
5684                           subtarget, 0);
5685       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5686                           OPTAB_WIDEN);
5687     }
5688
5689   if (code == EQ || code == NE)
5690     {
5691       /* For EQ or NE, one way to do the comparison is to apply an operation
5692          that converts the operand into a positive number if it is nonzero
5693          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5694          for NE we negate.  This puts the result in the sign bit.  Then we
5695          normalize with a shift, if needed.
5696
5697          Two operations that can do the above actions are ABS and FFS, so try
5698          them.  If that doesn't work, and MODE is smaller than a full word,
5699          we can use zero-extension to the wider mode (an unsigned conversion)
5700          as the operation.  */
5701
5702       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5703          that is compensated by the subsequent overflow when subtracting
5704          one / negating.  */
5705
5706       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5707         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5708       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5709         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5710       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5711         {
5712           tem = convert_modes (word_mode, mode, op0, 1);
5713           mode = word_mode;
5714         }
5715
5716       if (tem != 0)
5717         {
5718           if (code == EQ)
5719             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5720                                 0, OPTAB_WIDEN);
5721           else
5722             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5723         }
5724
5725       /* If we couldn't do it that way, for NE we can "or" the two's complement
5726          of the value with itself.  For EQ, we take the one's complement of
5727          that "or", which is an extra insn, so we only handle EQ if branches
5728          are expensive.  */
5729
5730       if (tem == 0
5731           && (code == NE
5732               || BRANCH_COST (optimize_insn_for_speed_p (),
5733                               false) > 1))
5734         {
5735           if (rtx_equal_p (subtarget, op0))
5736             subtarget = 0;
5737
5738           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5739           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5740                               OPTAB_WIDEN);
5741
5742           if (tem && code == EQ)
5743             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5744         }
5745     }
5746
5747   if (tem && normalizep)
5748     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5749                         GET_MODE_BITSIZE (mode) - 1,
5750                         subtarget, normalizep == 1);
5751
5752   if (tem)
5753     {
5754       if (!target)
5755         ;
5756       else if (GET_MODE (tem) != target_mode)
5757         {
5758           convert_move (target, tem, 0);
5759           tem = target;
5760         }
5761       else if (!subtarget)
5762         {
5763           emit_move_insn (target, tem);
5764           tem = target;
5765         }
5766     }
5767   else
5768     delete_insns_since (last);
5769
5770   return tem;
5771 }
5772
5773 /* Like emit_store_flag, but always succeeds.  */
5774
5775 rtx
5776 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5777                        machine_mode mode, int unsignedp, int normalizep)
5778 {
5779   rtx tem;
5780   rtx_code_label *label;
5781   rtx trueval, falseval;
5782
5783   /* First see if emit_store_flag can do the job.  */
5784   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5785   if (tem != 0)
5786     return tem;
5787
5788   if (!target)
5789     target = gen_reg_rtx (word_mode);
5790
5791   /* If this failed, we have to do this with set/compare/jump/set code.
5792      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5793   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5794   if (code == NE
5795       && GET_MODE_CLASS (mode) == MODE_INT
5796       && REG_P (target)
5797       && op0 == target
5798       && op1 == const0_rtx)
5799     {
5800       label = gen_label_rtx ();
5801       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5802                                NULL_RTX, NULL, label, -1);
5803       emit_move_insn (target, trueval);
5804       emit_label (label);
5805       return target;
5806     }
5807
5808   if (!REG_P (target)
5809       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5810     target = gen_reg_rtx (GET_MODE (target));
5811
5812   /* Jump in the right direction if the target cannot implement CODE
5813      but can jump on its reverse condition.  */
5814   falseval = const0_rtx;
5815   if (! can_compare_p (code, mode, ccp_jump)
5816       && (! FLOAT_MODE_P (mode)
5817           || code == ORDERED || code == UNORDERED
5818           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5819           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5820     {
5821       enum rtx_code rcode;
5822       if (FLOAT_MODE_P (mode))
5823         rcode = reverse_condition_maybe_unordered (code);
5824       else
5825         rcode = reverse_condition (code);
5826
5827       /* Canonicalize to UNORDERED for the libcall.  */
5828       if (can_compare_p (rcode, mode, ccp_jump)
5829           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5830         {
5831           falseval = trueval;
5832           trueval = const0_rtx;
5833           code = rcode;
5834         }
5835     }
5836
5837   emit_move_insn (target, trueval);
5838   label = gen_label_rtx ();
5839   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5840                            label, -1);
5841
5842   emit_move_insn (target, falseval);
5843   emit_label (label);
5844
5845   return target;
5846 }
5847 \f
5848 /* Perform possibly multi-word comparison and conditional jump to LABEL
5849    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5850    now a thin wrapper around do_compare_rtx_and_jump.  */
5851
5852 static void
5853 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5854                  rtx_code_label *label)
5855 {
5856   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5857   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5858                            NULL, label, -1);
5859 }