gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2015 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "hash-set.h"
  29 #include "machmode.h"
  30 #include "vec.h"
  31 #include "double-int.h"
  32 #include "input.h"
  33 #include "alias.h"
  34 #include "symtab.h"
  35 #include "wide-int.h"
  36 #include "inchash.h"
  37 #include "tree.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "tm_p.h"
  41 #include "flags.h"
  42 #include "insn-config.h"
  43 #include "expr.h"
  44 #include "insn-codes.h"
  45 #include "optabs.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "predict.h"
  49 #include "basic-block.h"
  50 #include "df.h"
  51 #include "target.h"
  52 #include "expmed.h"
  53
  54 struct target_expmed default_target_expmed;
  55 #if SWITCHABLE_TARGET
  56 struct target_expmed *this_target_expmed = &default_target_expmed;
  57 #endif
  58
  59 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  60                                    unsigned HOST_WIDE_INT,
  61                                    unsigned HOST_WIDE_INT,
  62                                    unsigned HOST_WIDE_INT,
  63                                    rtx);
  64 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  65                                      unsigned HOST_WIDE_INT,
  66                                      rtx);
  67 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    unsigned HOST_WIDE_INT,
  70                                    unsigned HOST_WIDE_INT,
  71                                    rtx);
  72 static rtx extract_fixed_bit_field (machine_mode, rtx,
  73                                     unsigned HOST_WIDE_INT,
  74                                     unsigned HOST_WIDE_INT, rtx, int);
  75 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  76                                       unsigned HOST_WIDE_INT,
  77                                       unsigned HOST_WIDE_INT, rtx, int);
  78 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  79 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  80                                     unsigned HOST_WIDE_INT, int);
  81 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  82 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  83 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  84
  85 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  86    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  87    The mask is truncated if necessary to the width of mode MODE.  The
  88    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  89
  90 static inline rtx
  91 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  92 {
  93   return immed_wide_int_const
  94     (wi::shifted_mask (bitpos, bitsize, complement,
  95                        GET_MODE_PRECISION (mode)), mode);
  96 }
  97
  98 /* Test whether a value is zero of a power of two.  */
  99 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 100   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
 101
 102 struct init_expmed_rtl
 103 {
 104   rtx reg;
 105   rtx plus;
 106   rtx neg;
 107   rtx mult;
 108   rtx sdiv;
 109   rtx udiv;
 110   rtx sdiv_32;
 111   rtx smod_32;
 112   rtx wide_mult;
 113   rtx wide_lshr;
 114   rtx wide_trunc;
 115   rtx shift;
 116   rtx shift_mult;
 117   rtx shift_add;
 118   rtx shift_sub0;
 119   rtx shift_sub1;
 120   rtx zext;
 121   rtx trunc;
 122
 123   rtx pow2[MAX_BITS_PER_WORD];
 124   rtx cint[MAX_BITS_PER_WORD];
 125 };
 126
 127 static void
 128 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 129                       machine_mode from_mode, bool speed)
 130 {
 131   int to_size, from_size;
 132   rtx which;
 133
 134   to_size = GET_MODE_PRECISION (to_mode);
 135   from_size = GET_MODE_PRECISION (from_mode);
 136
 137   /* Most partial integers have a precision less than the "full"
 138      integer it requires for storage.  In case one doesn't, for
 139      comparison purposes here, reduce the bit size by one in that
 140      case.  */
 141   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 142       && exact_log2 (to_size) != -1)
 143     to_size --;
 144   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 145       && exact_log2 (from_size) != -1)
 146     from_size --;
 147
 148   /* Assume cost of zero-extend and sign-extend is the same.  */
 149   which = (to_size < from_size ? all->trunc : all->zext);
 150
 151   PUT_MODE (all->reg, from_mode);
 152   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 153 }
 154
 155 static void
 156 init_expmed_one_mode (struct init_expmed_rtl *all,
 157                       machine_mode mode, int speed)
 158 {
 159   int m, n, mode_bitsize;
 160   machine_mode mode_from;
 161
 162   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 163
 164   PUT_MODE (all->reg, mode);
 165   PUT_MODE (all->plus, mode);
 166   PUT_MODE (all->neg, mode);
 167   PUT_MODE (all->mult, mode);
 168   PUT_MODE (all->sdiv, mode);
 169   PUT_MODE (all->udiv, mode);
 170   PUT_MODE (all->sdiv_32, mode);
 171   PUT_MODE (all->smod_32, mode);
 172   PUT_MODE (all->wide_trunc, mode);
 173   PUT_MODE (all->shift, mode);
 174   PUT_MODE (all->shift_mult, mode);
 175   PUT_MODE (all->shift_add, mode);
 176   PUT_MODE (all->shift_sub0, mode);
 177   PUT_MODE (all->shift_sub1, mode);
 178   PUT_MODE (all->zext, mode);
 179   PUT_MODE (all->trunc, mode);
 180
 181   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
 182   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
 183   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
 184   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
 185   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
 186
 187   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
 188                                      <= 2 * add_cost (speed, mode)));
 189   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
 190                                      <= 4 * add_cost (speed, mode)));
 191
 192   set_shift_cost (speed, mode, 0, 0);
 193   {
 194     int cost = add_cost (speed, mode);
 195     set_shiftadd_cost (speed, mode, 0, cost);
 196     set_shiftsub0_cost (speed, mode, 0, cost);
 197     set_shiftsub1_cost (speed, mode, 0, cost);
 198   }
 199
 200   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 201   for (m = 1; m < n; m++)
 202     {
 203       XEXP (all->shift, 1) = all->cint[m];
 204       XEXP (all->shift_mult, 1) = all->pow2[m];
 205
 206       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
 207       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
 208       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
 209       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
 210     }
 211
 212   if (SCALAR_INT_MODE_P (mode))
 213     {
 214       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 215            mode_from = (machine_mode)(mode_from + 1))
 216         init_expmed_one_conv (all, mode, mode_from, speed);
 217     }
 218   if (GET_MODE_CLASS (mode) == MODE_INT)
 219     {
 220       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 221       if (wider_mode != VOIDmode)
 222         {
 223           PUT_MODE (all->zext, wider_mode);
 224           PUT_MODE (all->wide_mult, wider_mode);
 225           PUT_MODE (all->wide_lshr, wider_mode);
 226           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 227
 228           set_mul_widen_cost (speed, wider_mode,
 229                               set_src_cost (all->wide_mult, speed));
 230           set_mul_highpart_cost (speed, mode,
 231                                  set_src_cost (all->wide_trunc, speed));
 232         }
 233     }
 234 }
 235
 236 void
 237 init_expmed (void)
 238 {
 239   struct init_expmed_rtl all;
 240   machine_mode mode = QImode;
 241   int m, speed;
 242
 243   memset (&all, 0, sizeof all);
 244   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 245     {
 246       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 247       all.cint[m] = GEN_INT (m);
 248     }
 249
 250   /* Avoid using hard regs in ways which may be unsupported.  */
 251   all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 252   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 253   all.neg = gen_rtx_NEG (mode, all.reg);
 254   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 255   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 256   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 257   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 258   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 259   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 260   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 261   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 262   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 263   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 264   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 265   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 266   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 267   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 268   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 269
 270   for (speed = 0; speed < 2; speed++)
 271     {
 272       crtl->maybe_hot_insn_p = speed;
 273       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 274
 275       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 276            mode = (machine_mode)(mode + 1))
 277         init_expmed_one_mode (&all, mode, speed);
 278
 279       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 280         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 281              mode = (machine_mode)(mode + 1))
 282           init_expmed_one_mode (&all, mode, speed);
 283
 284       if (MIN_MODE_VECTOR_INT != VOIDmode)
 285         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 286              mode = (machine_mode)(mode + 1))
 287           init_expmed_one_mode (&all, mode, speed);
 288     }
 289
 290   if (alg_hash_used_p ())
 291     {
 292       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 293       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 294     }
 295   else
 296     set_alg_hash_used_p (true);
 297   default_rtl_profile ();
 298
 299   ggc_free (all.trunc);
 300   ggc_free (all.shift_sub1);
 301   ggc_free (all.shift_sub0);
 302   ggc_free (all.shift_add);
 303   ggc_free (all.shift_mult);
 304   ggc_free (all.shift);
 305   ggc_free (all.wide_trunc);
 306   ggc_free (all.wide_lshr);
 307   ggc_free (all.wide_mult);
 308   ggc_free (all.zext);
 309   ggc_free (all.smod_32);
 310   ggc_free (all.sdiv_32);
 311   ggc_free (all.udiv);
 312   ggc_free (all.sdiv);
 313   ggc_free (all.mult);
 314   ggc_free (all.neg);
 315   ggc_free (all.plus);
 316   ggc_free (all.reg);
 317 }
 318
 319 /* Return an rtx representing minus the value of X.
 320    MODE is the intended mode of the result,
 321    useful if X is a CONST_INT.  */
 322
 323 rtx
 324 negate_rtx (machine_mode mode, rtx x)
 325 {
 326   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 327
 328   if (result == 0)
 329     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 330
 331   return result;
 332 }
 333
 334 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 335    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 336    If MODE is BLKmode, return a reference to every byte in the bitfield.
 337    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 338
 339 static rtx
 340 narrow_bit_field_mem (rtx mem, machine_mode mode,
 341                       unsigned HOST_WIDE_INT bitsize,
 342                       unsigned HOST_WIDE_INT bitnum,
 343                       unsigned HOST_WIDE_INT *new_bitnum)
 344 {
 345   if (mode == BLKmode)
 346     {
 347       *new_bitnum = bitnum % BITS_PER_UNIT;
 348       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 349       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 350                             / BITS_PER_UNIT);
 351       return adjust_bitfield_address_size (mem, mode, offset, size);
 352     }
 353   else
 354     {
 355       unsigned int unit = GET_MODE_BITSIZE (mode);
 356       *new_bitnum = bitnum % unit;
 357       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 358       return adjust_bitfield_address (mem, mode, offset);
 359     }
 360 }
 361
 362 /* The caller wants to perform insertion or extraction PATTERN on a
 363    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 364    BITREGION_START and BITREGION_END are as for store_bit_field
 365    and FIELDMODE is the natural mode of the field.
 366
 367    Search for a mode that is compatible with the memory access
 368    restrictions and (where applicable) with a register insertion or
 369    extraction.  Return the new memory on success, storing the adjusted
 370    bit position in *NEW_BITNUM.  Return null otherwise.  */
 371
 372 static rtx
 373 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 374                               rtx op0, HOST_WIDE_INT bitsize,
 375                               HOST_WIDE_INT bitnum,
 376                               unsigned HOST_WIDE_INT bitregion_start,
 377                               unsigned HOST_WIDE_INT bitregion_end,
 378                               machine_mode fieldmode,
 379                               unsigned HOST_WIDE_INT *new_bitnum)
 380 {
 381   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 382                                 bitregion_end, MEM_ALIGN (op0),
 383                                 MEM_VOLATILE_P (op0));
 384   machine_mode best_mode;
 385   if (iter.next_mode (&best_mode))
 386     {
 387       /* We can use a memory in BEST_MODE.  See whether this is true for
 388          any wider modes.  All other things being equal, we prefer to
 389          use the widest mode possible because it tends to expose more
 390          CSE opportunities.  */
 391       if (!iter.prefer_smaller_modes ())
 392         {
 393           /* Limit the search to the mode required by the corresponding
 394              register insertion or extraction instruction, if any.  */
 395           machine_mode limit_mode = word_mode;
 396           extraction_insn insn;
 397           if (get_best_reg_extraction_insn (&insn, pattern,
 398                                             GET_MODE_BITSIZE (best_mode),
 399                                             fieldmode))
 400             limit_mode = insn.field_mode;
 401
 402           machine_mode wider_mode;
 403           while (iter.next_mode (&wider_mode)
 404                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 405             best_mode = wider_mode;
 406         }
 407       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 408                                    new_bitnum);
 409     }
 410   return NULL_RTX;
 411 }
 412
 413 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 414    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 415    offset is then BITNUM / BITS_PER_UNIT.  */
 416
 417 static bool
 418 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 419                      unsigned HOST_WIDE_INT bitsize,
 420                      machine_mode struct_mode)
 421 {
 422   if (BYTES_BIG_ENDIAN)
 423     return (bitnum % BITS_PER_UNIT == 0
 424             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 425                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 426   else
 427     return bitnum % BITS_PER_WORD == 0;
 428 }
 429
 430 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 431    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 432    Return false if the access would touch memory outside the range
 433    BITREGION_START to BITREGION_END for conformance to the C++ memory
 434    model.  */
 435
 436 static bool
 437 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 438                             unsigned HOST_WIDE_INT bitnum,
 439                             machine_mode fieldmode,
 440                             unsigned HOST_WIDE_INT bitregion_start,
 441                             unsigned HOST_WIDE_INT bitregion_end)
 442 {
 443   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 444
 445   /* -fstrict-volatile-bitfields must be enabled and we must have a
 446      volatile MEM.  */
 447   if (!MEM_P (op0)
 448       || !MEM_VOLATILE_P (op0)
 449       || flag_strict_volatile_bitfields <= 0)
 450     return false;
 451
 452   /* Non-integral modes likely only happen with packed structures.
 453      Punt.  */
 454   if (!SCALAR_INT_MODE_P (fieldmode))
 455     return false;
 456
 457   /* The bit size must not be larger than the field mode, and
 458      the field mode must not be larger than a word.  */
 459   if (bitsize > modesize || modesize > BITS_PER_WORD)
 460     return false;
 461
 462   /* Check for cases of unaligned fields that must be split.  */
 463   if (bitnum % BITS_PER_UNIT + bitsize > modesize
 464       || (STRICT_ALIGNMENT
 465           && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
 466     return false;
 467
 468   /* Check for cases where the C++ memory model applies.  */
 469   if (bitregion_end != 0
 470       && (bitnum - bitnum % modesize < bitregion_start
 471           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 472     return false;
 473
 474   return true;
 475 }
 476
 477 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 478    bit number BITNUM can be treated as a simple value of mode MODE.  */
 479
 480 static bool
 481 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 482                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 483 {
 484   return (MEM_P (op0)
 485           && bitnum % BITS_PER_UNIT == 0
 486           && bitsize == GET_MODE_BITSIZE (mode)
 487           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 488               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 489                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 490 }
 491 \f
 492 /* Try to use instruction INSV to store VALUE into a field of OP0.
 493    BITSIZE and BITNUM are as for store_bit_field.  */
 494
 495 static bool
 496 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 497                             unsigned HOST_WIDE_INT bitsize,
 498                             unsigned HOST_WIDE_INT bitnum,
 499                             rtx value)
 500 {
 501   struct expand_operand ops[4];
 502   rtx value1;
 503   rtx xop0 = op0;
 504   rtx_insn *last = get_last_insn ();
 505   bool copy_back = false;
 506
 507   machine_mode op_mode = insv->field_mode;
 508   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 509   if (bitsize == 0 || bitsize > unit)
 510     return false;
 511
 512   if (MEM_P (xop0))
 513     /* Get a reference to the first byte of the field.  */
 514     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 515                                  &bitnum);
 516   else
 517     {
 518       /* Convert from counting within OP0 to counting in OP_MODE.  */
 519       if (BYTES_BIG_ENDIAN)
 520         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 521
 522       /* If xop0 is a register, we need it in OP_MODE
 523          to make it acceptable to the format of insv.  */
 524       if (GET_CODE (xop0) == SUBREG)
 525         /* We can't just change the mode, because this might clobber op0,
 526            and we will need the original value of op0 if insv fails.  */
 527         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 528       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 529         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 530     }
 531
 532   /* If the destination is a paradoxical subreg such that we need a
 533      truncate to the inner mode, perform the insertion on a temporary and
 534      truncate the result to the original destination.  Note that we can't
 535      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 536      X) 0)) is (reg:N X).  */
 537   if (GET_CODE (xop0) == SUBREG
 538       && REG_P (SUBREG_REG (xop0))
 539       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 540                                          op_mode))
 541     {
 542       rtx tem = gen_reg_rtx (op_mode);
 543       emit_move_insn (tem, xop0);
 544       xop0 = tem;
 545       copy_back = true;
 546     }
 547
 548   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 549      "backwards" from the size of the unit we are inserting into.
 550      Otherwise, we count bits from the most significant on a
 551      BYTES/BITS_BIG_ENDIAN machine.  */
 552
 553   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 554     bitnum = unit - bitsize - bitnum;
 555
 556   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 557   value1 = value;
 558   if (GET_MODE (value) != op_mode)
 559     {
 560       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 561         {
 562           /* Optimization: Don't bother really extending VALUE
 563              if it has all the bits we will actually use.  However,
 564              if we must narrow it, be sure we do it correctly.  */
 565
 566           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 567             {
 568               rtx tmp;
 569
 570               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 571               if (! tmp)
 572                 tmp = simplify_gen_subreg (op_mode,
 573                                            force_reg (GET_MODE (value),
 574                                                       value1),
 575                                            GET_MODE (value), 0);
 576               value1 = tmp;
 577             }
 578           else
 579             value1 = gen_lowpart (op_mode, value1);
 580         }
 581       else if (CONST_INT_P (value))
 582         value1 = gen_int_mode (INTVAL (value), op_mode);
 583       else
 584         /* Parse phase is supposed to make VALUE's data type
 585            match that of the component reference, which is a type
 586            at least as wide as the field; so VALUE should have
 587            a mode that corresponds to that type.  */
 588         gcc_assert (CONSTANT_P (value));
 589     }
 590
 591   create_fixed_operand (&ops[0], xop0);
 592   create_integer_operand (&ops[1], bitsize);
 593   create_integer_operand (&ops[2], bitnum);
 594   create_input_operand (&ops[3], value1, op_mode);
 595   if (maybe_expand_insn (insv->icode, 4, ops))
 596     {
 597       if (copy_back)
 598         convert_move (op0, xop0, true);
 599       return true;
 600     }
 601   delete_insns_since (last);
 602   return false;
 603 }
 604
 605 /* A subroutine of store_bit_field, with the same arguments.  Return true
 606    if the operation could be implemented.
 607
 608    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 609    no other way of implementing the operation.  If FALLBACK_P is false,
 610    return false instead.  */
 611
 612 static bool
 613 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 614                    unsigned HOST_WIDE_INT bitnum,
 615                    unsigned HOST_WIDE_INT bitregion_start,
 616                    unsigned HOST_WIDE_INT bitregion_end,
 617                    machine_mode fieldmode,
 618                    rtx value, bool fallback_p)
 619 {
 620   rtx op0 = str_rtx;
 621   rtx orig_value;
 622
 623   while (GET_CODE (op0) == SUBREG)
 624     {
 625       /* The following line once was done only if WORDS_BIG_ENDIAN,
 626          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 627          meaningful at a much higher level; when structures are copied
 628          between memory and regs, the higher-numbered regs
 629          always get higher addresses.  */
 630       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 631       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 632       int byte_offset = 0;
 633
 634       /* Paradoxical subregs need special handling on big endian machines.  */
 635       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 636         {
 637           int difference = inner_mode_size - outer_mode_size;
 638
 639           if (WORDS_BIG_ENDIAN)
 640             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 641           if (BYTES_BIG_ENDIAN)
 642             byte_offset += difference % UNITS_PER_WORD;
 643         }
 644       else
 645         byte_offset = SUBREG_BYTE (op0);
 646
 647       bitnum += byte_offset * BITS_PER_UNIT;
 648       op0 = SUBREG_REG (op0);
 649     }
 650
 651   /* No action is needed if the target is a register and if the field
 652      lies completely outside that register.  This can occur if the source
 653      code contains an out-of-bounds access to a small array.  */
 654   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 655     return true;
 656
 657   /* Use vec_set patterns for inserting parts of vectors whenever
 658      available.  */
 659   if (VECTOR_MODE_P (GET_MODE (op0))
 660       && !MEM_P (op0)
 661       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 662       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 663       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 664       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 665     {
 666       struct expand_operand ops[3];
 667       machine_mode outermode = GET_MODE (op0);
 668       machine_mode innermode = GET_MODE_INNER (outermode);
 669       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 670       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 671
 672       create_fixed_operand (&ops[0], op0);
 673       create_input_operand (&ops[1], value, innermode);
 674       create_integer_operand (&ops[2], pos);
 675       if (maybe_expand_insn (icode, 3, ops))
 676         return true;
 677     }
 678
 679   /* If the target is a register, overwriting the entire object, or storing
 680      a full-word or multi-word field can be done with just a SUBREG.  */
 681   if (!MEM_P (op0)
 682       && bitsize == GET_MODE_BITSIZE (fieldmode)
 683       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 684           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 685     {
 686       /* Use the subreg machinery either to narrow OP0 to the required
 687          words or to cope with mode punning between equal-sized modes.
 688          In the latter case, use subreg on the rhs side, not lhs.  */
 689       rtx sub;
 690
 691       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 692         {
 693           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 694           if (sub)
 695             {
 696               emit_move_insn (op0, sub);
 697               return true;
 698             }
 699         }
 700       else
 701         {
 702           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 703                                      bitnum / BITS_PER_UNIT);
 704           if (sub)
 705             {
 706               emit_move_insn (sub, value);
 707               return true;
 708             }
 709         }
 710     }
 711
 712   /* If the target is memory, storing any naturally aligned field can be
 713      done with a simple store.  For targets that support fast unaligned
 714      memory, any naturally sized, unit aligned field can be done directly.  */
 715   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 716     {
 717       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 718       emit_move_insn (op0, value);
 719       return true;
 720     }
 721
 722   /* Make sure we are playing with integral modes.  Pun with subregs
 723      if we aren't.  This must come after the entire register case above,
 724      since that case is valid for any mode.  The following cases are only
 725      valid for integral modes.  */
 726   {
 727     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 728     if (imode != GET_MODE (op0))
 729       {
 730         if (MEM_P (op0))
 731           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 732         else
 733           {
 734             gcc_assert (imode != BLKmode);
 735             op0 = gen_lowpart (imode, op0);
 736           }
 737       }
 738   }
 739
 740   /* Storing an lsb-aligned field in a register
 741      can be done with a movstrict instruction.  */
 742
 743   if (!MEM_P (op0)
 744       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 745       && bitsize == GET_MODE_BITSIZE (fieldmode)
 746       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 747     {
 748       struct expand_operand ops[2];
 749       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 750       rtx arg0 = op0;
 751       unsigned HOST_WIDE_INT subreg_off;
 752
 753       if (GET_CODE (arg0) == SUBREG)
 754         {
 755           /* Else we've got some float mode source being extracted into
 756              a different float mode destination -- this combination of
 757              subregs results in Severe Tire Damage.  */
 758           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 759                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 760                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 761           arg0 = SUBREG_REG (arg0);
 762         }
 763
 764       subreg_off = bitnum / BITS_PER_UNIT;
 765       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 766         {
 767           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 768
 769           create_fixed_operand (&ops[0], arg0);
 770           /* Shrink the source operand to FIELDMODE.  */
 771           create_convert_operand_to (&ops[1], value, fieldmode, false);
 772           if (maybe_expand_insn (icode, 2, ops))
 773             return true;
 774         }
 775     }
 776
 777   /* Handle fields bigger than a word.  */
 778
 779   if (bitsize > BITS_PER_WORD)
 780     {
 781       /* Here we transfer the words of the field
 782          in the order least significant first.
 783          This is because the most significant word is the one which may
 784          be less than full.
 785          However, only do that if the value is not BLKmode.  */
 786
 787       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 788       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 789       unsigned int i;
 790       rtx_insn *last;
 791
 792       /* This is the mode we must force value to, so that there will be enough
 793          subwords to extract.  Note that fieldmode will often (always?) be
 794          VOIDmode, because that is what store_field uses to indicate that this
 795          is a bit field, but passing VOIDmode to operand_subword_force
 796          is not allowed.  */
 797       fieldmode = GET_MODE (value);
 798       if (fieldmode == VOIDmode)
 799         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 800
 801       last = get_last_insn ();
 802       for (i = 0; i < nwords; i++)
 803         {
 804           /* If I is 0, use the low-order word in both field and target;
 805              if I is 1, use the next to lowest word; and so on.  */
 806           unsigned int wordnum = (backwards
 807                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 808                                   - i - 1
 809                                   : i);
 810           unsigned int bit_offset = (backwards
 811                                      ? MAX ((int) bitsize - ((int) i + 1)
 812                                             * BITS_PER_WORD,
 813                                             0)
 814                                      : (int) i * BITS_PER_WORD);
 815           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 816           unsigned HOST_WIDE_INT new_bitsize =
 817             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 818
 819           /* If the remaining chunk doesn't have full wordsize we have
 820              to make sure that for big endian machines the higher order
 821              bits are used.  */
 822           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 823             value_word = simplify_expand_binop (word_mode, lshr_optab,
 824                                                 value_word,
 825                                                 GEN_INT (BITS_PER_WORD
 826                                                          - new_bitsize),
 827                                                 NULL_RTX, true,
 828                                                 OPTAB_LIB_WIDEN);
 829
 830           if (!store_bit_field_1 (op0, new_bitsize,
 831                                   bitnum + bit_offset,
 832                                   bitregion_start, bitregion_end,
 833                                   word_mode,
 834                                   value_word, fallback_p))
 835             {
 836               delete_insns_since (last);
 837               return false;
 838             }
 839         }
 840       return true;
 841     }
 842
 843   /* If VALUE has a floating-point or complex mode, access it as an
 844      integer of the corresponding size.  This can occur on a machine
 845      with 64 bit registers that uses SFmode for float.  It can also
 846      occur for unaligned float or complex fields.  */
 847   orig_value = value;
 848   if (GET_MODE (value) != VOIDmode
 849       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 850       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 851     {
 852       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 853       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 854     }
 855
 856   /* If OP0 is a multi-word register, narrow it to the affected word.
 857      If the region spans two words, defer to store_split_bit_field.  */
 858   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 859     {
 860       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 861                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 862       gcc_assert (op0);
 863       bitnum %= BITS_PER_WORD;
 864       if (bitnum + bitsize > BITS_PER_WORD)
 865         {
 866           if (!fallback_p)
 867             return false;
 868
 869           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 870                                  bitregion_end, value);
 871           return true;
 872         }
 873     }
 874
 875   /* From here on we can assume that the field to be stored in fits
 876      within a word.  If the destination is a register, it too fits
 877      in a word.  */
 878
 879   extraction_insn insv;
 880   if (!MEM_P (op0)
 881       && get_best_reg_extraction_insn (&insv, EP_insv,
 882                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 883                                        fieldmode)
 884       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 885     return true;
 886
 887   /* If OP0 is a memory, try copying it to a register and seeing if a
 888      cheap register alternative is available.  */
 889   if (MEM_P (op0))
 890     {
 891       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 892                                         fieldmode)
 893           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 894         return true;
 895
 896       rtx_insn *last = get_last_insn ();
 897
 898       /* Try loading part of OP0 into a register, inserting the bitfield
 899          into that, and then copying the result back to OP0.  */
 900       unsigned HOST_WIDE_INT bitpos;
 901       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 902                                                bitregion_start, bitregion_end,
 903                                                fieldmode, &bitpos);
 904       if (xop0)
 905         {
 906           rtx tempreg = copy_to_reg (xop0);
 907           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 908                                  bitregion_start, bitregion_end,
 909                                  fieldmode, orig_value, false))
 910             {
 911               emit_move_insn (xop0, tempreg);
 912               return true;
 913             }
 914           delete_insns_since (last);
 915         }
 916     }
 917
 918   if (!fallback_p)
 919     return false;
 920
 921   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 922                          bitregion_end, value);
 923   return true;
 924 }
 925
 926 /* Generate code to store value from rtx VALUE
 927    into a bit-field within structure STR_RTX
 928    containing BITSIZE bits starting at bit BITNUM.
 929
 930    BITREGION_START is bitpos of the first bitfield in this region.
 931    BITREGION_END is the bitpos of the ending bitfield in this region.
 932    These two fields are 0, if the C++ memory model does not apply,
 933    or we are not interested in keeping track of bitfield regions.
 934
 935    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 936
 937 void
 938 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 939                  unsigned HOST_WIDE_INT bitnum,
 940                  unsigned HOST_WIDE_INT bitregion_start,
 941                  unsigned HOST_WIDE_INT bitregion_end,
 942                  machine_mode fieldmode,
 943                  rtx value)
 944 {
 945   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 946   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 947                                   bitregion_start, bitregion_end))
 948     {
 949       /* Storing any naturally aligned field can be done with a simple
 950          store.  For targets that support fast unaligned memory, any
 951          naturally sized, unit aligned field can be done directly.  */
 952       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
 953         {
 954           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 955                                              bitnum / BITS_PER_UNIT);
 956           emit_move_insn (str_rtx, value);
 957         }
 958       else
 959         {
 960           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 961                                           &bitnum);
 962           /* Explicitly override the C/C++ memory model; ignore the
 963              bit range so that we can do the access in the mode mandated
 964              by -fstrict-volatile-bitfields instead.  */
 965           store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
 966         }
 967
 968       return;
 969     }
 970
 971   /* Under the C++0x memory model, we must not touch bits outside the
 972      bit region.  Adjust the address to start at the beginning of the
 973      bit region.  */
 974   if (MEM_P (str_rtx) && bitregion_start > 0)
 975     {
 976       machine_mode bestmode;
 977       HOST_WIDE_INT offset, size;
 978
 979       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 980
 981       offset = bitregion_start / BITS_PER_UNIT;
 982       bitnum -= bitregion_start;
 983       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 984       bitregion_end -= bitregion_start;
 985       bitregion_start = 0;
 986       bestmode = get_best_mode (bitsize, bitnum,
 987                                 bitregion_start, bitregion_end,
 988                                 MEM_ALIGN (str_rtx), VOIDmode,
 989                                 MEM_VOLATILE_P (str_rtx));
 990       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 991     }
 992
 993   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 994                           bitregion_start, bitregion_end,
 995                           fieldmode, value, true))
 996     gcc_unreachable ();
 997 }
 998 \f
 999 /* Use shifts and boolean operations to store VALUE into a bit field of
1000    width BITSIZE in OP0, starting at bit BITNUM.  */
1001
1002 static void
1003 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1004                        unsigned HOST_WIDE_INT bitnum,
1005                        unsigned HOST_WIDE_INT bitregion_start,
1006                        unsigned HOST_WIDE_INT bitregion_end,
1007                        rtx value)
1008 {
1009   /* There is a case not handled here:
1010      a structure with a known alignment of just a halfword
1011      and a field split across two aligned halfwords within the structure.
1012      Or likewise a structure with a known alignment of just a byte
1013      and a field split across two bytes.
1014      Such cases are not supposed to be able to occur.  */
1015
1016   if (MEM_P (op0))
1017     {
1018       machine_mode mode = GET_MODE (op0);
1019       if (GET_MODE_BITSIZE (mode) == 0
1020           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1021         mode = word_mode;
1022       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1023                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1024
1025       if (mode == VOIDmode)
1026         {
1027           /* The only way this should occur is if the field spans word
1028              boundaries.  */
1029           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1030                                  bitregion_end, value);
1031           return;
1032         }
1033
1034       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1035     }
1036
1037   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1038 }
1039
1040 /* Helper function for store_fixed_bit_field, stores
1041    the bit field always using the MODE of OP0.  */
1042
1043 static void
1044 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1045                          unsigned HOST_WIDE_INT bitnum,
1046                          rtx value)
1047 {
1048   machine_mode mode;
1049   rtx temp;
1050   int all_zero = 0;
1051   int all_one = 0;
1052
1053   mode = GET_MODE (op0);
1054   gcc_assert (SCALAR_INT_MODE_P (mode));
1055
1056   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1057      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1058
1059   if (BYTES_BIG_ENDIAN)
1060     /* BITNUM is the distance between our msb
1061        and that of the containing datum.
1062        Convert it to the distance from the lsb.  */
1063     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1064
1065   /* Now BITNUM is always the distance between our lsb
1066      and that of OP0.  */
1067
1068   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1069      we must first convert its mode to MODE.  */
1070
1071   if (CONST_INT_P (value))
1072     {
1073       unsigned HOST_WIDE_INT v = UINTVAL (value);
1074
1075       if (bitsize < HOST_BITS_PER_WIDE_INT)
1076         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1077
1078       if (v == 0)
1079         all_zero = 1;
1080       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1081                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1082                || (bitsize == HOST_BITS_PER_WIDE_INT
1083                    && v == (unsigned HOST_WIDE_INT) -1))
1084         all_one = 1;
1085
1086       value = lshift_value (mode, v, bitnum);
1087     }
1088   else
1089     {
1090       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1091                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1092
1093       if (GET_MODE (value) != mode)
1094         value = convert_to_mode (mode, value, 1);
1095
1096       if (must_and)
1097         value = expand_binop (mode, and_optab, value,
1098                               mask_rtx (mode, 0, bitsize, 0),
1099                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1100       if (bitnum > 0)
1101         value = expand_shift (LSHIFT_EXPR, mode, value,
1102                               bitnum, NULL_RTX, 1);
1103     }
1104
1105   /* Now clear the chosen bits in OP0,
1106      except that if VALUE is -1 we need not bother.  */
1107   /* We keep the intermediates in registers to allow CSE to combine
1108      consecutive bitfield assignments.  */
1109
1110   temp = force_reg (mode, op0);
1111
1112   if (! all_one)
1113     {
1114       temp = expand_binop (mode, and_optab, temp,
1115                            mask_rtx (mode, bitnum, bitsize, 1),
1116                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1117       temp = force_reg (mode, temp);
1118     }
1119
1120   /* Now logical-or VALUE into OP0, unless it is zero.  */
1121
1122   if (! all_zero)
1123     {
1124       temp = expand_binop (mode, ior_optab, temp, value,
1125                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1126       temp = force_reg (mode, temp);
1127     }
1128
1129   if (op0 != temp)
1130     {
1131       op0 = copy_rtx (op0);
1132       emit_move_insn (op0, temp);
1133     }
1134 }
1135 \f
1136 /* Store a bit field that is split across multiple accessible memory objects.
1137
1138    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1139    BITSIZE is the field width; BITPOS the position of its first bit
1140    (within the word).
1141    VALUE is the value to store.
1142
1143    This does not yet handle fields wider than BITS_PER_WORD.  */
1144
1145 static void
1146 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1147                        unsigned HOST_WIDE_INT bitpos,
1148                        unsigned HOST_WIDE_INT bitregion_start,
1149                        unsigned HOST_WIDE_INT bitregion_end,
1150                        rtx value)
1151 {
1152   unsigned int unit;
1153   unsigned int bitsdone = 0;
1154
1155   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1156      much at a time.  */
1157   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1158     unit = BITS_PER_WORD;
1159   else
1160     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1161
1162   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1163      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1164      again, and we will mutually recurse forever.  */
1165   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1166     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1167
1168   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1169      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1170      that VALUE might be a floating-point constant.  */
1171   if (CONSTANT_P (value) && !CONST_INT_P (value))
1172     {
1173       rtx word = gen_lowpart_common (word_mode, value);
1174
1175       if (word && (value != word))
1176         value = word;
1177       else
1178         value = gen_lowpart_common (word_mode,
1179                                     force_reg (GET_MODE (value) != VOIDmode
1180                                                ? GET_MODE (value)
1181                                                : word_mode, value));
1182     }
1183
1184   while (bitsdone < bitsize)
1185     {
1186       unsigned HOST_WIDE_INT thissize;
1187       rtx part, word;
1188       unsigned HOST_WIDE_INT thispos;
1189       unsigned HOST_WIDE_INT offset;
1190
1191       offset = (bitpos + bitsdone) / unit;
1192       thispos = (bitpos + bitsdone) % unit;
1193
1194       /* When region of bytes we can touch is restricted, decrease
1195          UNIT close to the end of the region as needed.  If op0 is a REG
1196          or SUBREG of REG, don't do this, as there can't be data races
1197          on a register and we can expand shorter code in some cases.  */
1198       if (bitregion_end
1199           && unit > BITS_PER_UNIT
1200           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1201           && !REG_P (op0)
1202           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1203         {
1204           unit = unit / 2;
1205           continue;
1206         }
1207
1208       /* THISSIZE must not overrun a word boundary.  Otherwise,
1209          store_fixed_bit_field will call us again, and we will mutually
1210          recurse forever.  */
1211       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1212       thissize = MIN (thissize, unit - thispos);
1213
1214       if (BYTES_BIG_ENDIAN)
1215         {
1216           /* Fetch successively less significant portions.  */
1217           if (CONST_INT_P (value))
1218             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1219                              >> (bitsize - bitsdone - thissize))
1220                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1221           else
1222             {
1223               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1224               /* The args are chosen so that the last part includes the
1225                  lsb.  Give extract_bit_field the value it needs (with
1226                  endianness compensation) to fetch the piece we want.  */
1227               part = extract_fixed_bit_field (word_mode, value, thissize,
1228                                               total_bits - bitsize + bitsdone,
1229                                               NULL_RTX, 1);
1230             }
1231         }
1232       else
1233         {
1234           /* Fetch successively more significant portions.  */
1235           if (CONST_INT_P (value))
1236             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1237                              >> bitsdone)
1238                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1239           else
1240             part = extract_fixed_bit_field (word_mode, value, thissize,
1241                                             bitsdone, NULL_RTX, 1);
1242         }
1243
1244       /* If OP0 is a register, then handle OFFSET here.
1245
1246          When handling multiword bitfields, extract_bit_field may pass
1247          down a word_mode SUBREG of a larger REG for a bitfield that actually
1248          crosses a word boundary.  Thus, for a SUBREG, we must find
1249          the current word starting from the base register.  */
1250       if (GET_CODE (op0) == SUBREG)
1251         {
1252           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1253                             + (offset * unit / BITS_PER_WORD);
1254           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1255           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1256             word = word_offset ? const0_rtx : op0;
1257           else
1258             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1259                                           GET_MODE (SUBREG_REG (op0)));
1260           offset &= BITS_PER_WORD / unit - 1;
1261         }
1262       else if (REG_P (op0))
1263         {
1264           machine_mode op0_mode = GET_MODE (op0);
1265           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1266             word = offset ? const0_rtx : op0;
1267           else
1268             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1269                                           GET_MODE (op0));
1270           offset &= BITS_PER_WORD / unit - 1;
1271         }
1272       else
1273         word = op0;
1274
1275       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1276          it is just an out-of-bounds access.  Ignore it.  */
1277       if (word != const0_rtx)
1278         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1279                                bitregion_start, bitregion_end, part);
1280       bitsdone += thissize;
1281     }
1282 }
1283 \f
1284 /* A subroutine of extract_bit_field_1 that converts return value X
1285    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1286    to extract_bit_field.  */
1287
1288 static rtx
1289 convert_extracted_bit_field (rtx x, machine_mode mode,
1290                              machine_mode tmode, bool unsignedp)
1291 {
1292   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1293     return x;
1294
1295   /* If the x mode is not a scalar integral, first convert to the
1296      integer mode of that size and then access it as a floating-point
1297      value via a SUBREG.  */
1298   if (!SCALAR_INT_MODE_P (tmode))
1299     {
1300       machine_mode smode;
1301
1302       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1303       x = convert_to_mode (smode, x, unsignedp);
1304       x = force_reg (smode, x);
1305       return gen_lowpart (tmode, x);
1306     }
1307
1308   return convert_to_mode (tmode, x, unsignedp);
1309 }
1310
1311 /* Try to use an ext(z)v pattern to extract a field from OP0.
1312    Return the extracted value on success, otherwise return null.
1313    EXT_MODE is the mode of the extraction and the other arguments
1314    are as for extract_bit_field.  */
1315
1316 static rtx
1317 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1318                               unsigned HOST_WIDE_INT bitsize,
1319                               unsigned HOST_WIDE_INT bitnum,
1320                               int unsignedp, rtx target,
1321                               machine_mode mode, machine_mode tmode)
1322 {
1323   struct expand_operand ops[4];
1324   rtx spec_target = target;
1325   rtx spec_target_subreg = 0;
1326   machine_mode ext_mode = extv->field_mode;
1327   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1328
1329   if (bitsize == 0 || unit < bitsize)
1330     return NULL_RTX;
1331
1332   if (MEM_P (op0))
1333     /* Get a reference to the first byte of the field.  */
1334     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1335                                 &bitnum);
1336   else
1337     {
1338       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1339       if (BYTES_BIG_ENDIAN)
1340         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1341
1342       /* If op0 is a register, we need it in EXT_MODE to make it
1343          acceptable to the format of ext(z)v.  */
1344       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1345         return NULL_RTX;
1346       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1347         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1348     }
1349
1350   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1351      "backwards" from the size of the unit we are extracting from.
1352      Otherwise, we count bits from the most significant on a
1353      BYTES/BITS_BIG_ENDIAN machine.  */
1354
1355   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1356     bitnum = unit - bitsize - bitnum;
1357
1358   if (target == 0)
1359     target = spec_target = gen_reg_rtx (tmode);
1360
1361   if (GET_MODE (target) != ext_mode)
1362     {
1363       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1364          between the mode of the extraction (word_mode) and the target
1365          mode.  Instead, create a temporary and use convert_move to set
1366          the target.  */
1367       if (REG_P (target)
1368           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1369         {
1370           target = gen_lowpart (ext_mode, target);
1371           if (GET_MODE_PRECISION (ext_mode)
1372               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1373             spec_target_subreg = target;
1374         }
1375       else
1376         target = gen_reg_rtx (ext_mode);
1377     }
1378
1379   create_output_operand (&ops[0], target, ext_mode);
1380   create_fixed_operand (&ops[1], op0);
1381   create_integer_operand (&ops[2], bitsize);
1382   create_integer_operand (&ops[3], bitnum);
1383   if (maybe_expand_insn (extv->icode, 4, ops))
1384     {
1385       target = ops[0].value;
1386       if (target == spec_target)
1387         return target;
1388       if (target == spec_target_subreg)
1389         return spec_target;
1390       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1391     }
1392   return NULL_RTX;
1393 }
1394
1395 /* A subroutine of extract_bit_field, with the same arguments.
1396    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1397    if we can find no other means of implementing the operation.
1398    if FALLBACK_P is false, return NULL instead.  */
1399
1400 static rtx
1401 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1402                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1403                      machine_mode mode, machine_mode tmode,
1404                      bool fallback_p)
1405 {
1406   rtx op0 = str_rtx;
1407   machine_mode int_mode;
1408   machine_mode mode1;
1409
1410   if (tmode == VOIDmode)
1411     tmode = mode;
1412
1413   while (GET_CODE (op0) == SUBREG)
1414     {
1415       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1416       op0 = SUBREG_REG (op0);
1417     }
1418
1419   /* If we have an out-of-bounds access to a register, just return an
1420      uninitialized register of the required mode.  This can occur if the
1421      source code contains an out-of-bounds access to a small array.  */
1422   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1423     return gen_reg_rtx (tmode);
1424
1425   if (REG_P (op0)
1426       && mode == GET_MODE (op0)
1427       && bitnum == 0
1428       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1429     {
1430       /* We're trying to extract a full register from itself.  */
1431       return op0;
1432     }
1433
1434   /* See if we can get a better vector mode before extracting.  */
1435   if (VECTOR_MODE_P (GET_MODE (op0))
1436       && !MEM_P (op0)
1437       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1438     {
1439       machine_mode new_mode;
1440
1441       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1442         new_mode = MIN_MODE_VECTOR_FLOAT;
1443       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1444         new_mode = MIN_MODE_VECTOR_FRACT;
1445       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1446         new_mode = MIN_MODE_VECTOR_UFRACT;
1447       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1448         new_mode = MIN_MODE_VECTOR_ACCUM;
1449       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1450         new_mode = MIN_MODE_VECTOR_UACCUM;
1451       else
1452         new_mode = MIN_MODE_VECTOR_INT;
1453
1454       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1455         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1456             && targetm.vector_mode_supported_p (new_mode))
1457           break;
1458       if (new_mode != VOIDmode)
1459         op0 = gen_lowpart (new_mode, op0);
1460     }
1461
1462   /* Use vec_extract patterns for extracting parts of vectors whenever
1463      available.  */
1464   if (VECTOR_MODE_P (GET_MODE (op0))
1465       && !MEM_P (op0)
1466       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1467       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1468           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1469     {
1470       struct expand_operand ops[3];
1471       machine_mode outermode = GET_MODE (op0);
1472       machine_mode innermode = GET_MODE_INNER (outermode);
1473       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1474       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1475
1476       create_output_operand (&ops[0], target, innermode);
1477       create_input_operand (&ops[1], op0, outermode);
1478       create_integer_operand (&ops[2], pos);
1479       if (maybe_expand_insn (icode, 3, ops))
1480         {
1481           target = ops[0].value;
1482           if (GET_MODE (target) != mode)
1483             return gen_lowpart (tmode, target);
1484           return target;
1485         }
1486     }
1487
1488   /* Make sure we are playing with integral modes.  Pun with subregs
1489      if we aren't.  */
1490   {
1491     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1492     if (imode != GET_MODE (op0))
1493       {
1494         if (MEM_P (op0))
1495           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1496         else if (imode != BLKmode)
1497           {
1498             op0 = gen_lowpart (imode, op0);
1499
1500             /* If we got a SUBREG, force it into a register since we
1501                aren't going to be able to do another SUBREG on it.  */
1502             if (GET_CODE (op0) == SUBREG)
1503               op0 = force_reg (imode, op0);
1504           }
1505         else if (REG_P (op0))
1506           {
1507             rtx reg, subreg;
1508             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1509                                             MODE_INT);
1510             reg = gen_reg_rtx (imode);
1511             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1512             emit_move_insn (subreg, op0);
1513             op0 = reg;
1514             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1515           }
1516         else
1517           {
1518             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1519             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1520             emit_move_insn (mem, op0);
1521             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1522           }
1523       }
1524   }
1525
1526   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1527      If that's wrong, the solution is to test for it and set TARGET to 0
1528      if needed.  */
1529
1530   /* Get the mode of the field to use for atomic access or subreg
1531      conversion.  */
1532   mode1 = mode;
1533   if (SCALAR_INT_MODE_P (tmode))
1534     {
1535       machine_mode try_mode = mode_for_size (bitsize,
1536                                                   GET_MODE_CLASS (tmode), 0);
1537       if (try_mode != BLKmode)
1538         mode1 = try_mode;
1539     }
1540   gcc_assert (mode1 != BLKmode);
1541
1542   /* Extraction of a full MODE1 value can be done with a subreg as long
1543      as the least significant bit of the value is the least significant
1544      bit of either OP0 or a word of OP0.  */
1545   if (!MEM_P (op0)
1546       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1547       && bitsize == GET_MODE_BITSIZE (mode1)
1548       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1549     {
1550       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1551                                      bitnum / BITS_PER_UNIT);
1552       if (sub)
1553         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1554     }
1555
1556   /* Extraction of a full MODE1 value can be done with a load as long as
1557      the field is on a byte boundary and is sufficiently aligned.  */
1558   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1559     {
1560       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1561       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1562     }
1563
1564   /* Handle fields bigger than a word.  */
1565
1566   if (bitsize > BITS_PER_WORD)
1567     {
1568       /* Here we transfer the words of the field
1569          in the order least significant first.
1570          This is because the most significant word is the one which may
1571          be less than full.  */
1572
1573       unsigned int backwards = WORDS_BIG_ENDIAN;
1574       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1575       unsigned int i;
1576       rtx_insn *last;
1577
1578       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1579         target = gen_reg_rtx (mode);
1580
1581       /* Indicate for flow that the entire target reg is being set.  */
1582       emit_clobber (target);
1583
1584       last = get_last_insn ();
1585       for (i = 0; i < nwords; i++)
1586         {
1587           /* If I is 0, use the low-order word in both field and target;
1588              if I is 1, use the next to lowest word; and so on.  */
1589           /* Word number in TARGET to use.  */
1590           unsigned int wordnum
1591             = (backwards
1592                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1593                : i);
1594           /* Offset from start of field in OP0.  */
1595           unsigned int bit_offset = (backwards
1596                                      ? MAX ((int) bitsize - ((int) i + 1)
1597                                             * BITS_PER_WORD,
1598                                             0)
1599                                      : (int) i * BITS_PER_WORD);
1600           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1601           rtx result_part
1602             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1603                                              bitsize - i * BITS_PER_WORD),
1604                                    bitnum + bit_offset, 1, target_part,
1605                                    mode, word_mode, fallback_p);
1606
1607           gcc_assert (target_part);
1608           if (!result_part)
1609             {
1610               delete_insns_since (last);
1611               return NULL;
1612             }
1613
1614           if (result_part != target_part)
1615             emit_move_insn (target_part, result_part);
1616         }
1617
1618       if (unsignedp)
1619         {
1620           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1621              need to be zero'd out.  */
1622           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1623             {
1624               unsigned int i, total_words;
1625
1626               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1627               for (i = nwords; i < total_words; i++)
1628                 emit_move_insn
1629                   (operand_subword (target,
1630                                     backwards ? total_words - i - 1 : i,
1631                                     1, VOIDmode),
1632                    const0_rtx);
1633             }
1634           return target;
1635         }
1636
1637       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1638       target = expand_shift (LSHIFT_EXPR, mode, target,
1639                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1640       return expand_shift (RSHIFT_EXPR, mode, target,
1641                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1642     }
1643
1644   /* If OP0 is a multi-word register, narrow it to the affected word.
1645      If the region spans two words, defer to extract_split_bit_field.  */
1646   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1647     {
1648       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1649                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1650       bitnum %= BITS_PER_WORD;
1651       if (bitnum + bitsize > BITS_PER_WORD)
1652         {
1653           if (!fallback_p)
1654             return NULL_RTX;
1655           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1656           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1657         }
1658     }
1659
1660   /* From here on we know the desired field is smaller than a word.
1661      If OP0 is a register, it too fits within a word.  */
1662   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1663   extraction_insn extv;
1664   if (!MEM_P (op0)
1665       /* ??? We could limit the structure size to the part of OP0 that
1666          contains the field, with appropriate checks for endianness
1667          and TRULY_NOOP_TRUNCATION.  */
1668       && get_best_reg_extraction_insn (&extv, pattern,
1669                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1670                                        tmode))
1671     {
1672       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1673                                                  unsignedp, target, mode,
1674                                                  tmode);
1675       if (result)
1676         return result;
1677     }
1678
1679   /* If OP0 is a memory, try copying it to a register and seeing if a
1680      cheap register alternative is available.  */
1681   if (MEM_P (op0))
1682     {
1683       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1684                                         tmode))
1685         {
1686           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1687                                                      bitnum, unsignedp,
1688                                                      target, mode,
1689                                                      tmode);
1690           if (result)
1691             return result;
1692         }
1693
1694       rtx_insn *last = get_last_insn ();
1695
1696       /* Try loading part of OP0 into a register and extracting the
1697          bitfield from that.  */
1698       unsigned HOST_WIDE_INT bitpos;
1699       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1700                                                0, 0, tmode, &bitpos);
1701       if (xop0)
1702         {
1703           xop0 = copy_to_reg (xop0);
1704           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1705                                             unsignedp, target,
1706                                             mode, tmode, false);
1707           if (result)
1708             return result;
1709           delete_insns_since (last);
1710         }
1711     }
1712
1713   if (!fallback_p)
1714     return NULL;
1715
1716   /* Find a correspondingly-sized integer field, so we can apply
1717      shifts and masks to it.  */
1718   int_mode = int_mode_for_mode (tmode);
1719   if (int_mode == BLKmode)
1720     int_mode = int_mode_for_mode (mode);
1721   /* Should probably push op0 out to memory and then do a load.  */
1722   gcc_assert (int_mode != BLKmode);
1723
1724   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1725                                     target, unsignedp);
1726   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1727 }
1728
1729 /* Generate code to extract a byte-field from STR_RTX
1730    containing BITSIZE bits, starting at BITNUM,
1731    and put it in TARGET if possible (if TARGET is nonzero).
1732    Regardless of TARGET, we return the rtx for where the value is placed.
1733
1734    STR_RTX is the structure containing the byte (a REG or MEM).
1735    UNSIGNEDP is nonzero if this is an unsigned bit field.
1736    MODE is the natural mode of the field value once extracted.
1737    TMODE is the mode the caller would like the value to have;
1738    but the value may be returned with type MODE instead.
1739
1740    If a TARGET is specified and we can store in it at no extra cost,
1741    we do so, and return TARGET.
1742    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1743    if they are equally easy.  */
1744
1745 rtx
1746 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1747                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1748                    machine_mode mode, machine_mode tmode)
1749 {
1750   machine_mode mode1;
1751
1752   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1753   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1754     mode1 = GET_MODE (str_rtx);
1755   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1756     mode1 = GET_MODE (target);
1757   else
1758     mode1 = tmode;
1759
1760   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1761     {
1762       rtx result;
1763
1764       /* Extraction of a full MODE1 value can be done with a load as long as
1765          the field is on a byte boundary and is sufficiently aligned.  */
1766       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1767         result = adjust_bitfield_address (str_rtx, mode1,
1768                                           bitnum / BITS_PER_UNIT);
1769       else
1770         {
1771           str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1772                                           &bitnum);
1773           result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1774                                               target, unsignedp);
1775         }
1776
1777       return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1778     }
1779
1780   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1781                               target, mode, tmode, true);
1782 }
1783 \f
1784 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1785    from bit BITNUM of OP0.
1786
1787    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1788    If TARGET is nonzero, attempts to store the value there
1789    and return TARGET, but this is not guaranteed.
1790    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1791
1792 static rtx
1793 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1794                          unsigned HOST_WIDE_INT bitsize,
1795                          unsigned HOST_WIDE_INT bitnum, rtx target,
1796                          int unsignedp)
1797 {
1798   if (MEM_P (op0))
1799     {
1800       machine_mode mode
1801         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1802                          MEM_VOLATILE_P (op0));
1803
1804       if (mode == VOIDmode)
1805         /* The only way this should occur is if the field spans word
1806            boundaries.  */
1807         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1808
1809       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1810     }
1811
1812   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1813                                     target, unsignedp);
1814 }
1815
1816 /* Helper function for extract_fixed_bit_field, extracts
1817    the bit field always using the MODE of OP0.  */
1818
1819 static rtx
1820 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1821                            unsigned HOST_WIDE_INT bitsize,
1822                            unsigned HOST_WIDE_INT bitnum, rtx target,
1823                            int unsignedp)
1824 {
1825   machine_mode mode = GET_MODE (op0);
1826   gcc_assert (SCALAR_INT_MODE_P (mode));
1827
1828   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1829      for invalid input, such as extract equivalent of f5 from
1830      gcc.dg/pr48335-2.c.  */
1831
1832   if (BYTES_BIG_ENDIAN)
1833     /* BITNUM is the distance between our msb and that of OP0.
1834        Convert it to the distance from the lsb.  */
1835     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1836
1837   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1838      We have reduced the big-endian case to the little-endian case.  */
1839
1840   if (unsignedp)
1841     {
1842       if (bitnum)
1843         {
1844           /* If the field does not already start at the lsb,
1845              shift it so it does.  */
1846           /* Maybe propagate the target for the shift.  */
1847           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1848           if (tmode != mode)
1849             subtarget = 0;
1850           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1851         }
1852       /* Convert the value to the desired mode.  */
1853       if (mode != tmode)
1854         op0 = convert_to_mode (tmode, op0, 1);
1855
1856       /* Unless the msb of the field used to be the msb when we shifted,
1857          mask out the upper bits.  */
1858
1859       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1860         return expand_binop (GET_MODE (op0), and_optab, op0,
1861                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1862                              target, 1, OPTAB_LIB_WIDEN);
1863       return op0;
1864     }
1865
1866   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1867      then arithmetic-shift its lsb to the lsb of the word.  */
1868   op0 = force_reg (mode, op0);
1869
1870   /* Find the narrowest integer mode that contains the field.  */
1871
1872   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1873        mode = GET_MODE_WIDER_MODE (mode))
1874     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1875       {
1876         op0 = convert_to_mode (mode, op0, 0);
1877         break;
1878       }
1879
1880   if (mode != tmode)
1881     target = 0;
1882
1883   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1884     {
1885       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1886       /* Maybe propagate the target for the shift.  */
1887       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1888       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1889     }
1890
1891   return expand_shift (RSHIFT_EXPR, mode, op0,
1892                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1893 }
1894
1895 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1896    VALUE << BITPOS.  */
1897
1898 static rtx
1899 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1900               int bitpos)
1901 {
1902   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1903 }
1904 \f
1905 /* Extract a bit field that is split across two words
1906    and return an RTX for the result.
1907
1908    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1909    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1910    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1911
1912 static rtx
1913 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1914                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1915 {
1916   unsigned int unit;
1917   unsigned int bitsdone = 0;
1918   rtx result = NULL_RTX;
1919   int first = 1;
1920
1921   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1922      much at a time.  */
1923   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1924     unit = BITS_PER_WORD;
1925   else
1926     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1927
1928   while (bitsdone < bitsize)
1929     {
1930       unsigned HOST_WIDE_INT thissize;
1931       rtx part, word;
1932       unsigned HOST_WIDE_INT thispos;
1933       unsigned HOST_WIDE_INT offset;
1934
1935       offset = (bitpos + bitsdone) / unit;
1936       thispos = (bitpos + bitsdone) % unit;
1937
1938       /* THISSIZE must not overrun a word boundary.  Otherwise,
1939          extract_fixed_bit_field will call us again, and we will mutually
1940          recurse forever.  */
1941       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1942       thissize = MIN (thissize, unit - thispos);
1943
1944       /* If OP0 is a register, then handle OFFSET here.
1945
1946          When handling multiword bitfields, extract_bit_field may pass
1947          down a word_mode SUBREG of a larger REG for a bitfield that actually
1948          crosses a word boundary.  Thus, for a SUBREG, we must find
1949          the current word starting from the base register.  */
1950       if (GET_CODE (op0) == SUBREG)
1951         {
1952           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1953           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1954                                         GET_MODE (SUBREG_REG (op0)));
1955           offset = 0;
1956         }
1957       else if (REG_P (op0))
1958         {
1959           word = operand_subword_force (op0, offset, GET_MODE (op0));
1960           offset = 0;
1961         }
1962       else
1963         word = op0;
1964
1965       /* Extract the parts in bit-counting order,
1966          whose meaning is determined by BYTES_PER_UNIT.
1967          OFFSET is in UNITs, and UNIT is in bits.  */
1968       part = extract_fixed_bit_field (word_mode, word, thissize,
1969                                       offset * unit + thispos, 0, 1);
1970       bitsdone += thissize;
1971
1972       /* Shift this part into place for the result.  */
1973       if (BYTES_BIG_ENDIAN)
1974         {
1975           if (bitsize != bitsdone)
1976             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1977                                  bitsize - bitsdone, 0, 1);
1978         }
1979       else
1980         {
1981           if (bitsdone != thissize)
1982             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1983                                  bitsdone - thissize, 0, 1);
1984         }
1985
1986       if (first)
1987         result = part;
1988       else
1989         /* Combine the parts with bitwise or.  This works
1990            because we extracted each part as an unsigned bit field.  */
1991         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1992                                OPTAB_LIB_WIDEN);
1993
1994       first = 0;
1995     }
1996
1997   /* Unsigned bit field: we are done.  */
1998   if (unsignedp)
1999     return result;
2000   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2001   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2002                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2003   return expand_shift (RSHIFT_EXPR, word_mode, result,
2004                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2005 }
2006 \f
2007 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2008    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2009    MODE, fill the upper bits with zeros.  Fail if the layout of either
2010    mode is unknown (as for CC modes) or if the extraction would involve
2011    unprofitable mode punning.  Return the value on success, otherwise
2012    return null.
2013
2014    This is different from gen_lowpart* in these respects:
2015
2016      - the returned value must always be considered an rvalue
2017
2018      - when MODE is wider than SRC_MODE, the extraction involves
2019        a zero extension
2020
2021      - when MODE is smaller than SRC_MODE, the extraction involves
2022        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2023
2024    In other words, this routine performs a computation, whereas the
2025    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2026    operations.  */
2027
2028 rtx
2029 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2030 {
2031   machine_mode int_mode, src_int_mode;
2032
2033   if (mode == src_mode)
2034     return src;
2035
2036   if (CONSTANT_P (src))
2037     {
2038       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2039          fails, it will happily create (subreg (symbol_ref)) or similar
2040          invalid SUBREGs.  */
2041       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2042       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2043       if (ret)
2044         return ret;
2045
2046       if (GET_MODE (src) == VOIDmode
2047           || !validate_subreg (mode, src_mode, src, byte))
2048         return NULL_RTX;
2049
2050       src = force_reg (GET_MODE (src), src);
2051       return gen_rtx_SUBREG (mode, src, byte);
2052     }
2053
2054   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2055     return NULL_RTX;
2056
2057   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2058       && MODES_TIEABLE_P (mode, src_mode))
2059     {
2060       rtx x = gen_lowpart_common (mode, src);
2061       if (x)
2062         return x;
2063     }
2064
2065   src_int_mode = int_mode_for_mode (src_mode);
2066   int_mode = int_mode_for_mode (mode);
2067   if (src_int_mode == BLKmode || int_mode == BLKmode)
2068     return NULL_RTX;
2069
2070   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2071     return NULL_RTX;
2072   if (!MODES_TIEABLE_P (int_mode, mode))
2073     return NULL_RTX;
2074
2075   src = gen_lowpart (src_int_mode, src);
2076   src = convert_modes (int_mode, src_int_mode, src, true);
2077   src = gen_lowpart (mode, src);
2078   return src;
2079 }
2080 \f
2081 /* Add INC into TARGET.  */
2082
2083 void
2084 expand_inc (rtx target, rtx inc)
2085 {
2086   rtx value = expand_binop (GET_MODE (target), add_optab,
2087                             target, inc,
2088                             target, 0, OPTAB_LIB_WIDEN);
2089   if (value != target)
2090     emit_move_insn (target, value);
2091 }
2092
2093 /* Subtract DEC from TARGET.  */
2094
2095 void
2096 expand_dec (rtx target, rtx dec)
2097 {
2098   rtx value = expand_binop (GET_MODE (target), sub_optab,
2099                             target, dec,
2100                             target, 0, OPTAB_LIB_WIDEN);
2101   if (value != target)
2102     emit_move_insn (target, value);
2103 }
2104 \f
2105 /* Output a shift instruction for expression code CODE,
2106    with SHIFTED being the rtx for the value to shift,
2107    and AMOUNT the rtx for the amount to shift by.
2108    Store the result in the rtx TARGET, if that is convenient.
2109    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2110    Return the rtx for where the value is.  */
2111
2112 static rtx
2113 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2114                 rtx amount, rtx target, int unsignedp)
2115 {
2116   rtx op1, temp = 0;
2117   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2118   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2119   optab lshift_optab = ashl_optab;
2120   optab rshift_arith_optab = ashr_optab;
2121   optab rshift_uns_optab = lshr_optab;
2122   optab lrotate_optab = rotl_optab;
2123   optab rrotate_optab = rotr_optab;
2124   machine_mode op1_mode;
2125   machine_mode scalar_mode = mode;
2126   int attempt;
2127   bool speed = optimize_insn_for_speed_p ();
2128
2129   if (VECTOR_MODE_P (mode))
2130     scalar_mode = GET_MODE_INNER (mode);
2131   op1 = amount;
2132   op1_mode = GET_MODE (op1);
2133
2134   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2135      shift amount is a vector, use the vector/vector shift patterns.  */
2136   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2137     {
2138       lshift_optab = vashl_optab;
2139       rshift_arith_optab = vashr_optab;
2140       rshift_uns_optab = vlshr_optab;
2141       lrotate_optab = vrotl_optab;
2142       rrotate_optab = vrotr_optab;
2143     }
2144
2145   /* Previously detected shift-counts computed by NEGATE_EXPR
2146      and shifted in the other direction; but that does not work
2147      on all machines.  */
2148
2149   if (SHIFT_COUNT_TRUNCATED)
2150     {
2151       if (CONST_INT_P (op1)
2152           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2153               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2154         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2155                        % GET_MODE_BITSIZE (scalar_mode));
2156       else if (GET_CODE (op1) == SUBREG
2157                && subreg_lowpart_p (op1)
2158                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2159                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2160         op1 = SUBREG_REG (op1);
2161     }
2162
2163   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2164      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2165      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2166      amount instead.  */
2167   if (rotate
2168       && CONST_INT_P (op1)
2169       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2170                    GET_MODE_BITSIZE (scalar_mode) - 1))
2171     {
2172       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2173       left = !left;
2174       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2175     }
2176
2177   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2178      Note that this is not the case for bigger values.  For instance a rotation
2179      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2180      0x04030201 (bswapsi).  */
2181   if (rotate
2182       && CONST_INT_P (op1)
2183       && INTVAL (op1) == BITS_PER_UNIT
2184       && GET_MODE_SIZE (scalar_mode) == 2
2185       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2186     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2187                                   unsignedp);
2188
2189   if (op1 == const0_rtx)
2190     return shifted;
2191
2192   /* Check whether its cheaper to implement a left shift by a constant
2193      bit count by a sequence of additions.  */
2194   if (code == LSHIFT_EXPR
2195       && CONST_INT_P (op1)
2196       && INTVAL (op1) > 0
2197       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2198       && INTVAL (op1) < MAX_BITS_PER_WORD
2199       && (shift_cost (speed, mode, INTVAL (op1))
2200           > INTVAL (op1) * add_cost (speed, mode))
2201       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2202     {
2203       int i;
2204       for (i = 0; i < INTVAL (op1); i++)
2205         {
2206           temp = force_reg (mode, shifted);
2207           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2208                                   unsignedp, OPTAB_LIB_WIDEN);
2209         }
2210       return shifted;
2211     }
2212
2213   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2214     {
2215       enum optab_methods methods;
2216
2217       if (attempt == 0)
2218         methods = OPTAB_DIRECT;
2219       else if (attempt == 1)
2220         methods = OPTAB_WIDEN;
2221       else
2222         methods = OPTAB_LIB_WIDEN;
2223
2224       if (rotate)
2225         {
2226           /* Widening does not work for rotation.  */
2227           if (methods == OPTAB_WIDEN)
2228             continue;
2229           else if (methods == OPTAB_LIB_WIDEN)
2230             {
2231               /* If we have been unable to open-code this by a rotation,
2232                  do it as the IOR of two shifts.  I.e., to rotate A
2233                  by N bits, compute
2234                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2235                  where C is the bitsize of A.
2236
2237                  It is theoretically possible that the target machine might
2238                  not be able to perform either shift and hence we would
2239                  be making two libcalls rather than just the one for the
2240                  shift (similarly if IOR could not be done).  We will allow
2241                  this extremely unlikely lossage to avoid complicating the
2242                  code below.  */
2243
2244               rtx subtarget = target == shifted ? 0 : target;
2245               rtx new_amount, other_amount;
2246               rtx temp1;
2247
2248               new_amount = op1;
2249               if (op1 == const0_rtx)
2250                 return shifted;
2251               else if (CONST_INT_P (op1))
2252                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2253                                         - INTVAL (op1));
2254               else
2255                 {
2256                   other_amount
2257                     = simplify_gen_unary (NEG, GET_MODE (op1),
2258                                           op1, GET_MODE (op1));
2259                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2260                   other_amount
2261                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2262                                            gen_int_mode (mask, GET_MODE (op1)));
2263                 }
2264
2265               shifted = force_reg (mode, shifted);
2266
2267               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2268                                      mode, shifted, new_amount, 0, 1);
2269               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2270                                       mode, shifted, other_amount,
2271                                       subtarget, 1);
2272               return expand_binop (mode, ior_optab, temp, temp1, target,
2273                                    unsignedp, methods);
2274             }
2275
2276           temp = expand_binop (mode,
2277                                left ? lrotate_optab : rrotate_optab,
2278                                shifted, op1, target, unsignedp, methods);
2279         }
2280       else if (unsignedp)
2281         temp = expand_binop (mode,
2282                              left ? lshift_optab : rshift_uns_optab,
2283                              shifted, op1, target, unsignedp, methods);
2284
2285       /* Do arithmetic shifts.
2286          Also, if we are going to widen the operand, we can just as well
2287          use an arithmetic right-shift instead of a logical one.  */
2288       if (temp == 0 && ! rotate
2289           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2290         {
2291           enum optab_methods methods1 = methods;
2292
2293           /* If trying to widen a log shift to an arithmetic shift,
2294              don't accept an arithmetic shift of the same size.  */
2295           if (unsignedp)
2296             methods1 = OPTAB_MUST_WIDEN;
2297
2298           /* Arithmetic shift */
2299
2300           temp = expand_binop (mode,
2301                                left ? lshift_optab : rshift_arith_optab,
2302                                shifted, op1, target, unsignedp, methods1);
2303         }
2304
2305       /* We used to try extzv here for logical right shifts, but that was
2306          only useful for one machine, the VAX, and caused poor code
2307          generation there for lshrdi3, so the code was deleted and a
2308          define_expand for lshrsi3 was added to vax.md.  */
2309     }
2310
2311   gcc_assert (temp);
2312   return temp;
2313 }
2314
2315 /* Output a shift instruction for expression code CODE,
2316    with SHIFTED being the rtx for the value to shift,
2317    and AMOUNT the amount to shift by.
2318    Store the result in the rtx TARGET, if that is convenient.
2319    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2320    Return the rtx for where the value is.  */
2321
2322 rtx
2323 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2324               int amount, rtx target, int unsignedp)
2325 {
2326   return expand_shift_1 (code, mode,
2327                          shifted, GEN_INT (amount), target, unsignedp);
2328 }
2329
2330 /* Output a shift instruction for expression code CODE,
2331    with SHIFTED being the rtx for the value to shift,
2332    and AMOUNT the tree for the amount to shift by.
2333    Store the result in the rtx TARGET, if that is convenient.
2334    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2335    Return the rtx for where the value is.  */
2336
2337 rtx
2338 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2339                        tree amount, rtx target, int unsignedp)
2340 {
2341   return expand_shift_1 (code, mode,
2342                          shifted, expand_normal (amount), target, unsignedp);
2343 }
2344
2345 \f
2346 /* Indicates the type of fixup needed after a constant multiplication.
2347    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2348    the result should be negated, and ADD_VARIANT means that the
2349    multiplicand should be added to the result.  */
2350 enum mult_variant {basic_variant, negate_variant, add_variant};
2351
2352 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2353                         const struct mult_cost *, machine_mode mode);
2354 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2355                                  struct algorithm *, enum mult_variant *, int);
2356 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2357                               const struct algorithm *, enum mult_variant);
2358 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2359 static rtx extract_high_half (machine_mode, rtx);
2360 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2361 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2362                                        int, int);
2363 /* Compute and return the best algorithm for multiplying by T.
2364    The algorithm must cost less than cost_limit
2365    If retval.cost >= COST_LIMIT, no algorithm was found and all
2366    other field of the returned struct are undefined.
2367    MODE is the machine mode of the multiplication.  */
2368
2369 static void
2370 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2371             const struct mult_cost *cost_limit, machine_mode mode)
2372 {
2373   int m;
2374   struct algorithm *alg_in, *best_alg;
2375   struct mult_cost best_cost;
2376   struct mult_cost new_limit;
2377   int op_cost, op_latency;
2378   unsigned HOST_WIDE_INT orig_t = t;
2379   unsigned HOST_WIDE_INT q;
2380   int maxm, hash_index;
2381   bool cache_hit = false;
2382   enum alg_code cache_alg = alg_zero;
2383   bool speed = optimize_insn_for_speed_p ();
2384   machine_mode imode;
2385   struct alg_hash_entry *entry_ptr;
2386
2387   /* Indicate that no algorithm is yet found.  If no algorithm
2388      is found, this value will be returned and indicate failure.  */
2389   alg_out->cost.cost = cost_limit->cost + 1;
2390   alg_out->cost.latency = cost_limit->latency + 1;
2391
2392   if (cost_limit->cost < 0
2393       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2394     return;
2395
2396   /* Be prepared for vector modes.  */
2397   imode = GET_MODE_INNER (mode);
2398   if (imode == VOIDmode)
2399     imode = mode;
2400
2401   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2402
2403   /* Restrict the bits of "t" to the multiplication's mode.  */
2404   t &= GET_MODE_MASK (imode);
2405
2406   /* t == 1 can be done in zero cost.  */
2407   if (t == 1)
2408     {
2409       alg_out->ops = 1;
2410       alg_out->cost.cost = 0;
2411       alg_out->cost.latency = 0;
2412       alg_out->op[0] = alg_m;
2413       return;
2414     }
2415
2416   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2417      fail now.  */
2418   if (t == 0)
2419     {
2420       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2421         return;
2422       else
2423         {
2424           alg_out->ops = 1;
2425           alg_out->cost.cost = zero_cost (speed);
2426           alg_out->cost.latency = zero_cost (speed);
2427           alg_out->op[0] = alg_zero;
2428           return;
2429         }
2430     }
2431
2432   /* We'll be needing a couple extra algorithm structures now.  */
2433
2434   alg_in = XALLOCA (struct algorithm);
2435   best_alg = XALLOCA (struct algorithm);
2436   best_cost = *cost_limit;
2437
2438   /* Compute the hash index.  */
2439   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2440
2441   /* See if we already know what to do for T.  */
2442   entry_ptr = alg_hash_entry_ptr (hash_index);
2443   if (entry_ptr->t == t
2444       && entry_ptr->mode == mode
2445       && entry_ptr->mode == mode
2446       && entry_ptr->speed == speed
2447       && entry_ptr->alg != alg_unknown)
2448     {
2449       cache_alg = entry_ptr->alg;
2450
2451       if (cache_alg == alg_impossible)
2452         {
2453           /* The cache tells us that it's impossible to synthesize
2454              multiplication by T within entry_ptr->cost.  */
2455           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2456             /* COST_LIMIT is at least as restrictive as the one
2457                recorded in the hash table, in which case we have no
2458                hope of synthesizing a multiplication.  Just
2459                return.  */
2460             return;
2461
2462           /* If we get here, COST_LIMIT is less restrictive than the
2463              one recorded in the hash table, so we may be able to
2464              synthesize a multiplication.  Proceed as if we didn't
2465              have the cache entry.  */
2466         }
2467       else
2468         {
2469           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2470             /* The cached algorithm shows that this multiplication
2471                requires more cost than COST_LIMIT.  Just return.  This
2472                way, we don't clobber this cache entry with
2473                alg_impossible but retain useful information.  */
2474             return;
2475
2476           cache_hit = true;
2477
2478           switch (cache_alg)
2479             {
2480             case alg_shift:
2481               goto do_alg_shift;
2482
2483             case alg_add_t_m2:
2484             case alg_sub_t_m2:
2485               goto do_alg_addsub_t_m2;
2486
2487             case alg_add_factor:
2488             case alg_sub_factor:
2489               goto do_alg_addsub_factor;
2490
2491             case alg_add_t2_m:
2492               goto do_alg_add_t2_m;
2493
2494             case alg_sub_t2_m:
2495               goto do_alg_sub_t2_m;
2496
2497             default:
2498               gcc_unreachable ();
2499             }
2500         }
2501     }
2502
2503   /* If we have a group of zero bits at the low-order part of T, try
2504      multiplying by the remaining bits and then doing a shift.  */
2505
2506   if ((t & 1) == 0)
2507     {
2508     do_alg_shift:
2509       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2510       if (m < maxm)
2511         {
2512           q = t >> m;
2513           /* The function expand_shift will choose between a shift and
2514              a sequence of additions, so the observed cost is given as
2515              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2516           op_cost = m * add_cost (speed, mode);
2517           if (shift_cost (speed, mode, m) < op_cost)
2518             op_cost = shift_cost (speed, mode, m);
2519           new_limit.cost = best_cost.cost - op_cost;
2520           new_limit.latency = best_cost.latency - op_cost;
2521           synth_mult (alg_in, q, &new_limit, mode);
2522
2523           alg_in->cost.cost += op_cost;
2524           alg_in->cost.latency += op_cost;
2525           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2526             {
2527               struct algorithm *x;
2528               best_cost = alg_in->cost;
2529               x = alg_in, alg_in = best_alg, best_alg = x;
2530               best_alg->log[best_alg->ops] = m;
2531               best_alg->op[best_alg->ops] = alg_shift;
2532             }
2533
2534           /* See if treating ORIG_T as a signed number yields a better
2535              sequence.  Try this sequence only for a negative ORIG_T
2536              as it would be useless for a non-negative ORIG_T.  */
2537           if ((HOST_WIDE_INT) orig_t < 0)
2538             {
2539               /* Shift ORIG_T as follows because a right shift of a
2540                  negative-valued signed type is implementation
2541                  defined.  */
2542               q = ~(~orig_t >> m);
2543               /* The function expand_shift will choose between a shift
2544                  and a sequence of additions, so the observed cost is
2545                  given as MIN (m * add_cost(speed, mode),
2546                  shift_cost(speed, mode, m)).  */
2547               op_cost = m * add_cost (speed, mode);
2548               if (shift_cost (speed, mode, m) < op_cost)
2549                 op_cost = shift_cost (speed, mode, m);
2550               new_limit.cost = best_cost.cost - op_cost;
2551               new_limit.latency = best_cost.latency - op_cost;
2552               synth_mult (alg_in, q, &new_limit, mode);
2553
2554               alg_in->cost.cost += op_cost;
2555               alg_in->cost.latency += op_cost;
2556               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2557                 {
2558                   struct algorithm *x;
2559                   best_cost = alg_in->cost;
2560                   x = alg_in, alg_in = best_alg, best_alg = x;
2561                   best_alg->log[best_alg->ops] = m;
2562                   best_alg->op[best_alg->ops] = alg_shift;
2563                 }
2564             }
2565         }
2566       if (cache_hit)
2567         goto done;
2568     }
2569
2570   /* If we have an odd number, add or subtract one.  */
2571   if ((t & 1) != 0)
2572     {
2573       unsigned HOST_WIDE_INT w;
2574
2575     do_alg_addsub_t_m2:
2576       for (w = 1; (w & t) != 0; w <<= 1)
2577         ;
2578       /* If T was -1, then W will be zero after the loop.  This is another
2579          case where T ends with ...111.  Handling this with (T + 1) and
2580          subtract 1 produces slightly better code and results in algorithm
2581          selection much faster than treating it like the ...0111 case
2582          below.  */
2583       if (w == 0
2584           || (w > 2
2585               /* Reject the case where t is 3.
2586                  Thus we prefer addition in that case.  */
2587               && t != 3))
2588         {
2589           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2590
2591           op_cost = add_cost (speed, mode);
2592           new_limit.cost = best_cost.cost - op_cost;
2593           new_limit.latency = best_cost.latency - op_cost;
2594           synth_mult (alg_in, t + 1, &new_limit, mode);
2595
2596           alg_in->cost.cost += op_cost;
2597           alg_in->cost.latency += op_cost;
2598           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2599             {
2600               struct algorithm *x;
2601               best_cost = alg_in->cost;
2602               x = alg_in, alg_in = best_alg, best_alg = x;
2603               best_alg->log[best_alg->ops] = 0;
2604               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2605             }
2606         }
2607       else
2608         {
2609           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2610
2611           op_cost = add_cost (speed, mode);
2612           new_limit.cost = best_cost.cost - op_cost;
2613           new_limit.latency = best_cost.latency - op_cost;
2614           synth_mult (alg_in, t - 1, &new_limit, mode);
2615
2616           alg_in->cost.cost += op_cost;
2617           alg_in->cost.latency += op_cost;
2618           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2619             {
2620               struct algorithm *x;
2621               best_cost = alg_in->cost;
2622               x = alg_in, alg_in = best_alg, best_alg = x;
2623               best_alg->log[best_alg->ops] = 0;
2624               best_alg->op[best_alg->ops] = alg_add_t_m2;
2625             }
2626         }
2627
2628       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2629          quickly with a - a * n for some appropriate constant n.  */
2630       m = exact_log2 (-orig_t + 1);
2631       if (m >= 0 && m < maxm)
2632         {
2633           op_cost = shiftsub1_cost (speed, mode, m);
2634           new_limit.cost = best_cost.cost - op_cost;
2635           new_limit.latency = best_cost.latency - op_cost;
2636           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2637                       &new_limit, mode);
2638
2639           alg_in->cost.cost += op_cost;
2640           alg_in->cost.latency += op_cost;
2641           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2642             {
2643               struct algorithm *x;
2644               best_cost = alg_in->cost;
2645               x = alg_in, alg_in = best_alg, best_alg = x;
2646               best_alg->log[best_alg->ops] = m;
2647               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2648             }
2649         }
2650
2651       if (cache_hit)
2652         goto done;
2653     }
2654
2655   /* Look for factors of t of the form
2656      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2657      If we find such a factor, we can multiply by t using an algorithm that
2658      multiplies by q, shift the result by m and add/subtract it to itself.
2659
2660      We search for large factors first and loop down, even if large factors
2661      are less probable than small; if we find a large factor we will find a
2662      good sequence quickly, and therefore be able to prune (by decreasing
2663      COST_LIMIT) the search.  */
2664
2665  do_alg_addsub_factor:
2666   for (m = floor_log2 (t - 1); m >= 2; m--)
2667     {
2668       unsigned HOST_WIDE_INT d;
2669
2670       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2671       if (t % d == 0 && t > d && m < maxm
2672           && (!cache_hit || cache_alg == alg_add_factor))
2673         {
2674           /* If the target has a cheap shift-and-add instruction use
2675              that in preference to a shift insn followed by an add insn.
2676              Assume that the shift-and-add is "atomic" with a latency
2677              equal to its cost, otherwise assume that on superscalar
2678              hardware the shift may be executed concurrently with the
2679              earlier steps in the algorithm.  */
2680           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2681           if (shiftadd_cost (speed, mode, m) < op_cost)
2682             {
2683               op_cost = shiftadd_cost (speed, mode, m);
2684               op_latency = op_cost;
2685             }
2686           else
2687             op_latency = add_cost (speed, mode);
2688
2689           new_limit.cost = best_cost.cost - op_cost;
2690           new_limit.latency = best_cost.latency - op_latency;
2691           synth_mult (alg_in, t / d, &new_limit, mode);
2692
2693           alg_in->cost.cost += op_cost;
2694           alg_in->cost.latency += op_latency;
2695           if (alg_in->cost.latency < op_cost)
2696             alg_in->cost.latency = op_cost;
2697           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2698             {
2699               struct algorithm *x;
2700               best_cost = alg_in->cost;
2701               x = alg_in, alg_in = best_alg, best_alg = x;
2702               best_alg->log[best_alg->ops] = m;
2703               best_alg->op[best_alg->ops] = alg_add_factor;
2704             }
2705           /* Other factors will have been taken care of in the recursion.  */
2706           break;
2707         }
2708
2709       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2710       if (t % d == 0 && t > d && m < maxm
2711           && (!cache_hit || cache_alg == alg_sub_factor))
2712         {
2713           /* If the target has a cheap shift-and-subtract insn use
2714              that in preference to a shift insn followed by a sub insn.
2715              Assume that the shift-and-sub is "atomic" with a latency
2716              equal to it's cost, otherwise assume that on superscalar
2717              hardware the shift may be executed concurrently with the
2718              earlier steps in the algorithm.  */
2719           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2720           if (shiftsub0_cost (speed, mode, m) < op_cost)
2721             {
2722               op_cost = shiftsub0_cost (speed, mode, m);
2723               op_latency = op_cost;
2724             }
2725           else
2726             op_latency = add_cost (speed, mode);
2727
2728           new_limit.cost = best_cost.cost - op_cost;
2729           new_limit.latency = best_cost.latency - op_latency;
2730           synth_mult (alg_in, t / d, &new_limit, mode);
2731
2732           alg_in->cost.cost += op_cost;
2733           alg_in->cost.latency += op_latency;
2734           if (alg_in->cost.latency < op_cost)
2735             alg_in->cost.latency = op_cost;
2736           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2737             {
2738               struct algorithm *x;
2739               best_cost = alg_in->cost;
2740               x = alg_in, alg_in = best_alg, best_alg = x;
2741               best_alg->log[best_alg->ops] = m;
2742               best_alg->op[best_alg->ops] = alg_sub_factor;
2743             }
2744           break;
2745         }
2746     }
2747   if (cache_hit)
2748     goto done;
2749
2750   /* Try shift-and-add (load effective address) instructions,
2751      i.e. do a*3, a*5, a*9.  */
2752   if ((t & 1) != 0)
2753     {
2754     do_alg_add_t2_m:
2755       q = t - 1;
2756       q = q & -q;
2757       m = exact_log2 (q);
2758       if (m >= 0 && m < maxm)
2759         {
2760           op_cost = shiftadd_cost (speed, mode, m);
2761           new_limit.cost = best_cost.cost - op_cost;
2762           new_limit.latency = best_cost.latency - op_cost;
2763           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2764
2765           alg_in->cost.cost += op_cost;
2766           alg_in->cost.latency += op_cost;
2767           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2768             {
2769               struct algorithm *x;
2770               best_cost = alg_in->cost;
2771               x = alg_in, alg_in = best_alg, best_alg = x;
2772               best_alg->log[best_alg->ops] = m;
2773               best_alg->op[best_alg->ops] = alg_add_t2_m;
2774             }
2775         }
2776       if (cache_hit)
2777         goto done;
2778
2779     do_alg_sub_t2_m:
2780       q = t + 1;
2781       q = q & -q;
2782       m = exact_log2 (q);
2783       if (m >= 0 && m < maxm)
2784         {
2785           op_cost = shiftsub0_cost (speed, mode, m);
2786           new_limit.cost = best_cost.cost - op_cost;
2787           new_limit.latency = best_cost.latency - op_cost;
2788           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2789
2790           alg_in->cost.cost += op_cost;
2791           alg_in->cost.latency += op_cost;
2792           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2793             {
2794               struct algorithm *x;
2795               best_cost = alg_in->cost;
2796               x = alg_in, alg_in = best_alg, best_alg = x;
2797               best_alg->log[best_alg->ops] = m;
2798               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2799             }
2800         }
2801       if (cache_hit)
2802         goto done;
2803     }
2804
2805  done:
2806   /* If best_cost has not decreased, we have not found any algorithm.  */
2807   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2808     {
2809       /* We failed to find an algorithm.  Record alg_impossible for
2810          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2811          we are asked to find an algorithm for T within the same or
2812          lower COST_LIMIT, we can immediately return to the
2813          caller.  */
2814       entry_ptr->t = t;
2815       entry_ptr->mode = mode;
2816       entry_ptr->speed = speed;
2817       entry_ptr->alg = alg_impossible;
2818       entry_ptr->cost = *cost_limit;
2819       return;
2820     }
2821
2822   /* Cache the result.  */
2823   if (!cache_hit)
2824     {
2825       entry_ptr->t = t;
2826       entry_ptr->mode = mode;
2827       entry_ptr->speed = speed;
2828       entry_ptr->alg = best_alg->op[best_alg->ops];
2829       entry_ptr->cost.cost = best_cost.cost;
2830       entry_ptr->cost.latency = best_cost.latency;
2831     }
2832
2833   /* If we are getting a too long sequence for `struct algorithm'
2834      to record, make this search fail.  */
2835   if (best_alg->ops == MAX_BITS_PER_WORD)
2836     return;
2837
2838   /* Copy the algorithm from temporary space to the space at alg_out.
2839      We avoid using structure assignment because the majority of
2840      best_alg is normally undefined, and this is a critical function.  */
2841   alg_out->ops = best_alg->ops + 1;
2842   alg_out->cost = best_cost;
2843   memcpy (alg_out->op, best_alg->op,
2844           alg_out->ops * sizeof *alg_out->op);
2845   memcpy (alg_out->log, best_alg->log,
2846           alg_out->ops * sizeof *alg_out->log);
2847 }
2848 \f
2849 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2850    Try three variations:
2851
2852        - a shift/add sequence based on VAL itself
2853        - a shift/add sequence based on -VAL, followed by a negation
2854        - a shift/add sequence based on VAL - 1, followed by an addition.
2855
2856    Return true if the cheapest of these cost less than MULT_COST,
2857    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2858
2859 static bool
2860 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2861                      struct algorithm *alg, enum mult_variant *variant,
2862                      int mult_cost)
2863 {
2864   struct algorithm alg2;
2865   struct mult_cost limit;
2866   int op_cost;
2867   bool speed = optimize_insn_for_speed_p ();
2868
2869   /* Fail quickly for impossible bounds.  */
2870   if (mult_cost < 0)
2871     return false;
2872
2873   /* Ensure that mult_cost provides a reasonable upper bound.
2874      Any constant multiplication can be performed with less
2875      than 2 * bits additions.  */
2876   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2877   if (mult_cost > op_cost)
2878     mult_cost = op_cost;
2879
2880   *variant = basic_variant;
2881   limit.cost = mult_cost;
2882   limit.latency = mult_cost;
2883   synth_mult (alg, val, &limit, mode);
2884
2885   /* This works only if the inverted value actually fits in an
2886      `unsigned int' */
2887   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2888     {
2889       op_cost = neg_cost (speed, mode);
2890       if (MULT_COST_LESS (&alg->cost, mult_cost))
2891         {
2892           limit.cost = alg->cost.cost - op_cost;
2893           limit.latency = alg->cost.latency - op_cost;
2894         }
2895       else
2896         {
2897           limit.cost = mult_cost - op_cost;
2898           limit.latency = mult_cost - op_cost;
2899         }
2900
2901       synth_mult (&alg2, -val, &limit, mode);
2902       alg2.cost.cost += op_cost;
2903       alg2.cost.latency += op_cost;
2904       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2905         *alg = alg2, *variant = negate_variant;
2906     }
2907
2908   /* This proves very useful for division-by-constant.  */
2909   op_cost = add_cost (speed, mode);
2910   if (MULT_COST_LESS (&alg->cost, mult_cost))
2911     {
2912       limit.cost = alg->cost.cost - op_cost;
2913       limit.latency = alg->cost.latency - op_cost;
2914     }
2915   else
2916     {
2917       limit.cost = mult_cost - op_cost;
2918       limit.latency = mult_cost - op_cost;
2919     }
2920
2921   synth_mult (&alg2, val - 1, &limit, mode);
2922   alg2.cost.cost += op_cost;
2923   alg2.cost.latency += op_cost;
2924   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2925     *alg = alg2, *variant = add_variant;
2926
2927   return MULT_COST_LESS (&alg->cost, mult_cost);
2928 }
2929
2930 /* A subroutine of expand_mult, used for constant multiplications.
2931    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2932    convenient.  Use the shift/add sequence described by ALG and apply
2933    the final fixup specified by VARIANT.  */
2934
2935 static rtx
2936 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2937                    rtx target, const struct algorithm *alg,
2938                    enum mult_variant variant)
2939 {
2940   HOST_WIDE_INT val_so_far;
2941   rtx_insn *insn;
2942   rtx accum, tem;
2943   int opno;
2944   machine_mode nmode;
2945
2946   /* Avoid referencing memory over and over and invalid sharing
2947      on SUBREGs.  */
2948   op0 = force_reg (mode, op0);
2949
2950   /* ACCUM starts out either as OP0 or as a zero, depending on
2951      the first operation.  */
2952
2953   if (alg->op[0] == alg_zero)
2954     {
2955       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2956       val_so_far = 0;
2957     }
2958   else if (alg->op[0] == alg_m)
2959     {
2960       accum = copy_to_mode_reg (mode, op0);
2961       val_so_far = 1;
2962     }
2963   else
2964     gcc_unreachable ();
2965
2966   for (opno = 1; opno < alg->ops; opno++)
2967     {
2968       int log = alg->log[opno];
2969       rtx shift_subtarget = optimize ? 0 : accum;
2970       rtx add_target
2971         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2972            && !optimize)
2973           ? target : 0;
2974       rtx accum_target = optimize ? 0 : accum;
2975       rtx accum_inner;
2976
2977       switch (alg->op[opno])
2978         {
2979         case alg_shift:
2980           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2981           /* REG_EQUAL note will be attached to the following insn.  */
2982           emit_move_insn (accum, tem);
2983           val_so_far <<= log;
2984           break;
2985
2986         case alg_add_t_m2:
2987           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2988           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2989                                  add_target ? add_target : accum_target);
2990           val_so_far += (HOST_WIDE_INT) 1 << log;
2991           break;
2992
2993         case alg_sub_t_m2:
2994           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2995           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2996                                  add_target ? add_target : accum_target);
2997           val_so_far -= (HOST_WIDE_INT) 1 << log;
2998           break;
2999
3000         case alg_add_t2_m:
3001           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3002                                 log, shift_subtarget, 0);
3003           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3004                                  add_target ? add_target : accum_target);
3005           val_so_far = (val_so_far << log) + 1;
3006           break;
3007
3008         case alg_sub_t2_m:
3009           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3010                                 log, shift_subtarget, 0);
3011           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3012                                  add_target ? add_target : accum_target);
3013           val_so_far = (val_so_far << log) - 1;
3014           break;
3015
3016         case alg_add_factor:
3017           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3018           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3019                                  add_target ? add_target : accum_target);
3020           val_so_far += val_so_far << log;
3021           break;
3022
3023         case alg_sub_factor:
3024           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3025           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3026                                  (add_target
3027                                   ? add_target : (optimize ? 0 : tem)));
3028           val_so_far = (val_so_far << log) - val_so_far;
3029           break;
3030
3031         default:
3032           gcc_unreachable ();
3033         }
3034
3035       if (SCALAR_INT_MODE_P (mode))
3036         {
3037           /* Write a REG_EQUAL note on the last insn so that we can cse
3038              multiplication sequences.  Note that if ACCUM is a SUBREG,
3039              we've set the inner register and must properly indicate that.  */
3040           tem = op0, nmode = mode;
3041           accum_inner = accum;
3042           if (GET_CODE (accum) == SUBREG)
3043             {
3044               accum_inner = SUBREG_REG (accum);
3045               nmode = GET_MODE (accum_inner);
3046               tem = gen_lowpart (nmode, op0);
3047             }
3048
3049           insn = get_last_insn ();
3050           set_dst_reg_note (insn, REG_EQUAL,
3051                             gen_rtx_MULT (nmode, tem,
3052                                           gen_int_mode (val_so_far, nmode)),
3053                             accum_inner);
3054         }
3055     }
3056
3057   if (variant == negate_variant)
3058     {
3059       val_so_far = -val_so_far;
3060       accum = expand_unop (mode, neg_optab, accum, target, 0);
3061     }
3062   else if (variant == add_variant)
3063     {
3064       val_so_far = val_so_far + 1;
3065       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3066     }
3067
3068   /* Compare only the bits of val and val_so_far that are significant
3069      in the result mode, to avoid sign-/zero-extension confusion.  */
3070   nmode = GET_MODE_INNER (mode);
3071   if (nmode == VOIDmode)
3072     nmode = mode;
3073   val &= GET_MODE_MASK (nmode);
3074   val_so_far &= GET_MODE_MASK (nmode);
3075   gcc_assert (val == val_so_far);
3076
3077   return accum;
3078 }
3079
3080 /* Perform a multiplication and return an rtx for the result.
3081    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3082    TARGET is a suggestion for where to store the result (an rtx).
3083
3084    We check specially for a constant integer as OP1.
3085    If you want this check for OP0 as well, then before calling
3086    you should swap the two operands if OP0 would be constant.  */
3087
3088 rtx
3089 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3090              int unsignedp)
3091 {
3092   enum mult_variant variant;
3093   struct algorithm algorithm;
3094   rtx scalar_op1;
3095   int max_cost;
3096   bool speed = optimize_insn_for_speed_p ();
3097   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3098
3099   if (CONSTANT_P (op0))
3100     std::swap (op0, op1);
3101
3102   /* For vectors, there are several simplifications that can be made if
3103      all elements of the vector constant are identical.  */
3104   scalar_op1 = op1;
3105   if (GET_CODE (op1) == CONST_VECTOR)
3106     {
3107       int i, n = CONST_VECTOR_NUNITS (op1);
3108       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3109       for (i = 1; i < n; ++i)
3110         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3111           goto skip_scalar;
3112     }
3113
3114   if (INTEGRAL_MODE_P (mode))
3115     {
3116       rtx fake_reg;
3117       HOST_WIDE_INT coeff;
3118       bool is_neg;
3119       int mode_bitsize;
3120
3121       if (op1 == CONST0_RTX (mode))
3122         return op1;
3123       if (op1 == CONST1_RTX (mode))
3124         return op0;
3125       if (op1 == CONSTM1_RTX (mode))
3126         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3127                             op0, target, 0);
3128
3129       if (do_trapv)
3130         goto skip_synth;
3131
3132       /* If mode is integer vector mode, check if the backend supports
3133          vector lshift (by scalar or vector) at all.  If not, we can't use
3134          synthetized multiply.  */
3135       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3136           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3137           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3138         goto skip_synth;
3139
3140       /* These are the operations that are potentially turned into
3141          a sequence of shifts and additions.  */
3142       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3143
3144       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3145          less than or equal in size to `unsigned int' this doesn't matter.
3146          If the mode is larger than `unsigned int', then synth_mult works
3147          only if the constant value exactly fits in an `unsigned int' without
3148          any truncation.  This means that multiplying by negative values does
3149          not work; results are off by 2^32 on a 32 bit machine.  */
3150       if (CONST_INT_P (scalar_op1))
3151         {
3152           coeff = INTVAL (scalar_op1);
3153           is_neg = coeff < 0;
3154         }
3155 #if TARGET_SUPPORTS_WIDE_INT
3156       else if (CONST_WIDE_INT_P (scalar_op1))
3157 #else
3158       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3159 #endif
3160         {
3161           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3162           /* Perfect power of 2 (other than 1, which is handled above).  */
3163           if (shift > 0)
3164             return expand_shift (LSHIFT_EXPR, mode, op0,
3165                                  shift, target, unsignedp);
3166           else
3167             goto skip_synth;
3168         }
3169       else
3170         goto skip_synth;
3171
3172       /* We used to test optimize here, on the grounds that it's better to
3173          produce a smaller program when -O is not used.  But this causes
3174          such a terrible slowdown sometimes that it seems better to always
3175          use synth_mult.  */
3176
3177       /* Special case powers of two.  */
3178       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3179           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3180         return expand_shift (LSHIFT_EXPR, mode, op0,
3181                              floor_log2 (coeff), target, unsignedp);
3182
3183       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3184
3185       /* Attempt to handle multiplication of DImode values by negative
3186          coefficients, by performing the multiplication by a positive
3187          multiplier and then inverting the result.  */
3188       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3189         {
3190           /* Its safe to use -coeff even for INT_MIN, as the
3191              result is interpreted as an unsigned coefficient.
3192              Exclude cost of op0 from max_cost to match the cost
3193              calculation of the synth_mult.  */
3194           coeff = -(unsigned HOST_WIDE_INT) coeff;
3195           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3196                       - neg_cost (speed, mode));
3197           if (max_cost <= 0)
3198             goto skip_synth;
3199
3200           /* Special case powers of two.  */
3201           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3202             {
3203               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3204                                        floor_log2 (coeff), target, unsignedp);
3205               return expand_unop (mode, neg_optab, temp, target, 0);
3206             }
3207
3208           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3209                                    max_cost))
3210             {
3211               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3212                                             &algorithm, variant);
3213               return expand_unop (mode, neg_optab, temp, target, 0);
3214             }
3215           goto skip_synth;
3216         }
3217
3218       /* Exclude cost of op0 from max_cost to match the cost
3219          calculation of the synth_mult.  */
3220       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3221       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3222         return expand_mult_const (mode, op0, coeff, target,
3223                                   &algorithm, variant);
3224     }
3225  skip_synth:
3226
3227   /* Expand x*2.0 as x+x.  */
3228   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3229     {
3230       REAL_VALUE_TYPE d;
3231       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3232
3233       if (REAL_VALUES_EQUAL (d, dconst2))
3234         {
3235           op0 = force_reg (GET_MODE (op0), op0);
3236           return expand_binop (mode, add_optab, op0, op0,
3237                                target, unsignedp, OPTAB_LIB_WIDEN);
3238         }
3239     }
3240  skip_scalar:
3241
3242   /* This used to use umul_optab if unsigned, but for non-widening multiply
3243      there is no difference between signed and unsigned.  */
3244   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3245                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3246   gcc_assert (op0);
3247   return op0;
3248 }
3249
3250 /* Return a cost estimate for multiplying a register by the given
3251    COEFFicient in the given MODE and SPEED.  */
3252
3253 int
3254 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3255 {
3256   int max_cost;
3257   struct algorithm algorithm;
3258   enum mult_variant variant;
3259
3260   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3261   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3262   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3263     return algorithm.cost.cost;
3264   else
3265     return max_cost;
3266 }
3267
3268 /* Perform a widening multiplication and return an rtx for the result.
3269    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3270    TARGET is a suggestion for where to store the result (an rtx).
3271    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3272    or smul_widen_optab.
3273
3274    We check specially for a constant integer as OP1, comparing the
3275    cost of a widening multiply against the cost of a sequence of shifts
3276    and adds.  */
3277
3278 rtx
3279 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3280                       int unsignedp, optab this_optab)
3281 {
3282   bool speed = optimize_insn_for_speed_p ();
3283   rtx cop1;
3284
3285   if (CONST_INT_P (op1)
3286       && GET_MODE (op0) != VOIDmode
3287       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3288                                 this_optab == umul_widen_optab))
3289       && CONST_INT_P (cop1)
3290       && (INTVAL (cop1) >= 0
3291           || HWI_COMPUTABLE_MODE_P (mode)))
3292     {
3293       HOST_WIDE_INT coeff = INTVAL (cop1);
3294       int max_cost;
3295       enum mult_variant variant;
3296       struct algorithm algorithm;
3297
3298       if (coeff == 0)
3299         return CONST0_RTX (mode);
3300
3301       /* Special case powers of two.  */
3302       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3303         {
3304           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3305           return expand_shift (LSHIFT_EXPR, mode, op0,
3306                                floor_log2 (coeff), target, unsignedp);
3307         }
3308
3309       /* Exclude cost of op0 from max_cost to match the cost
3310          calculation of the synth_mult.  */
3311       max_cost = mul_widen_cost (speed, mode);
3312       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3313                                max_cost))
3314         {
3315           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3316           return expand_mult_const (mode, op0, coeff, target,
3317                                     &algorithm, variant);
3318         }
3319     }
3320   return expand_binop (mode, this_optab, op0, op1, target,
3321                        unsignedp, OPTAB_LIB_WIDEN);
3322 }
3323 \f
3324 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3325    replace division by D, and put the least significant N bits of the result
3326    in *MULTIPLIER_PTR and return the most significant bit.
3327
3328    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3329    needed precision is in PRECISION (should be <= N).
3330
3331    PRECISION should be as small as possible so this function can choose
3332    multiplier more freely.
3333
3334    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3335    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3336
3337    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3338    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3339
3340 unsigned HOST_WIDE_INT
3341 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3342                    unsigned HOST_WIDE_INT *multiplier_ptr,
3343                    int *post_shift_ptr, int *lgup_ptr)
3344 {
3345   int lgup, post_shift;
3346   int pow, pow2;
3347
3348   /* lgup = ceil(log2(divisor)); */
3349   lgup = ceil_log2 (d);
3350
3351   gcc_assert (lgup <= n);
3352
3353   pow = n + lgup;
3354   pow2 = n + lgup - precision;
3355
3356   /* mlow = 2^(N + lgup)/d */
3357   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3358   wide_int mlow = wi::udiv_trunc (val, d);
3359
3360   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3361   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3362   wide_int mhigh = wi::udiv_trunc (val, d);
3363
3364   /* If precision == N, then mlow, mhigh exceed 2^N
3365      (but they do not exceed 2^(N+1)).  */
3366
3367   /* Reduce to lowest terms.  */
3368   for (post_shift = lgup; post_shift > 0; post_shift--)
3369     {
3370       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3371                                                        HOST_BITS_PER_WIDE_INT);
3372       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3373                                                        HOST_BITS_PER_WIDE_INT);
3374       if (ml_lo >= mh_lo)
3375         break;
3376
3377       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3378       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3379     }
3380
3381   *post_shift_ptr = post_shift;
3382   *lgup_ptr = lgup;
3383   if (n < HOST_BITS_PER_WIDE_INT)
3384     {
3385       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3386       *multiplier_ptr = mhigh.to_uhwi () & mask;
3387       return mhigh.to_uhwi () >= mask;
3388     }
3389   else
3390     {
3391       *multiplier_ptr = mhigh.to_uhwi ();
3392       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3393     }
3394 }
3395
3396 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3397    congruent to 1 (mod 2**N).  */
3398
3399 static unsigned HOST_WIDE_INT
3400 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3401 {
3402   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3403
3404   /* The algorithm notes that the choice y = x satisfies
3405      x*y == 1 mod 2^3, since x is assumed odd.
3406      Each iteration doubles the number of bits of significance in y.  */
3407
3408   unsigned HOST_WIDE_INT mask;
3409   unsigned HOST_WIDE_INT y = x;
3410   int nbit = 3;
3411
3412   mask = (n == HOST_BITS_PER_WIDE_INT
3413           ? ~(unsigned HOST_WIDE_INT) 0
3414           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3415
3416   while (nbit < n)
3417     {
3418       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3419       nbit *= 2;
3420     }
3421   return y;
3422 }
3423
3424 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3425    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3426    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3427    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3428    become signed.
3429
3430    The result is put in TARGET if that is convenient.
3431
3432    MODE is the mode of operation.  */
3433
3434 rtx
3435 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3436                              rtx op1, rtx target, int unsignedp)
3437 {
3438   rtx tem;
3439   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3440
3441   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3442                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3443   tem = expand_and (mode, tem, op1, NULL_RTX);
3444   adj_operand
3445     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3446                      adj_operand);
3447
3448   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3449                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3450   tem = expand_and (mode, tem, op0, NULL_RTX);
3451   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3452                           target);
3453
3454   return target;
3455 }
3456
3457 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3458
3459 static rtx
3460 extract_high_half (machine_mode mode, rtx op)
3461 {
3462   machine_mode wider_mode;
3463
3464   if (mode == word_mode)
3465     return gen_highpart (mode, op);
3466
3467   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3468
3469   wider_mode = GET_MODE_WIDER_MODE (mode);
3470   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3471                      GET_MODE_BITSIZE (mode), 0, 1);
3472   return convert_modes (mode, wider_mode, op, 0);
3473 }
3474
3475 /* Like expmed_mult_highpart, but only consider using a multiplication
3476    optab.  OP1 is an rtx for the constant operand.  */
3477
3478 static rtx
3479 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3480                             rtx target, int unsignedp, int max_cost)
3481 {
3482   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3483   machine_mode wider_mode;
3484   optab moptab;
3485   rtx tem;
3486   int size;
3487   bool speed = optimize_insn_for_speed_p ();
3488
3489   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3490
3491   wider_mode = GET_MODE_WIDER_MODE (mode);
3492   size = GET_MODE_BITSIZE (mode);
3493
3494   /* Firstly, try using a multiplication insn that only generates the needed
3495      high part of the product, and in the sign flavor of unsignedp.  */
3496   if (mul_highpart_cost (speed, mode) < max_cost)
3497     {
3498       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3499       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3500                           unsignedp, OPTAB_DIRECT);
3501       if (tem)
3502         return tem;
3503     }
3504
3505   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3506      Need to adjust the result after the multiplication.  */
3507   if (size - 1 < BITS_PER_WORD
3508       && (mul_highpart_cost (speed, mode)
3509           + 2 * shift_cost (speed, mode, size-1)
3510           + 4 * add_cost (speed, mode) < max_cost))
3511     {
3512       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3513       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3514                           unsignedp, OPTAB_DIRECT);
3515       if (tem)
3516         /* We used the wrong signedness.  Adjust the result.  */
3517         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3518                                             tem, unsignedp);
3519     }
3520
3521   /* Try widening multiplication.  */
3522   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3523   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3524       && mul_widen_cost (speed, wider_mode) < max_cost)
3525     {
3526       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3527                           unsignedp, OPTAB_WIDEN);
3528       if (tem)
3529         return extract_high_half (mode, tem);
3530     }
3531
3532   /* Try widening the mode and perform a non-widening multiplication.  */
3533   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3534       && size - 1 < BITS_PER_WORD
3535       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3536           < max_cost))
3537     {
3538       rtx_insn *insns;
3539       rtx wop0, wop1;
3540
3541       /* We need to widen the operands, for example to ensure the
3542          constant multiplier is correctly sign or zero extended.
3543          Use a sequence to clean-up any instructions emitted by
3544          the conversions if things don't work out.  */
3545       start_sequence ();
3546       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3547       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3548       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3549                           unsignedp, OPTAB_WIDEN);
3550       insns = get_insns ();
3551       end_sequence ();
3552
3553       if (tem)
3554         {
3555           emit_insn (insns);
3556           return extract_high_half (mode, tem);
3557         }
3558     }
3559
3560   /* Try widening multiplication of opposite signedness, and adjust.  */
3561   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3562   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3563       && size - 1 < BITS_PER_WORD
3564       && (mul_widen_cost (speed, wider_mode)
3565           + 2 * shift_cost (speed, mode, size-1)
3566           + 4 * add_cost (speed, mode) < max_cost))
3567     {
3568       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3569                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3570       if (tem != 0)
3571         {
3572           tem = extract_high_half (mode, tem);
3573           /* We used the wrong signedness.  Adjust the result.  */
3574           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3575                                               target, unsignedp);
3576         }
3577     }
3578
3579   return 0;
3580 }
3581
3582 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3583    putting the high half of the result in TARGET if that is convenient,
3584    and return where the result is.  If the operation can not be performed,
3585    0 is returned.
3586
3587    MODE is the mode of operation and result.
3588
3589    UNSIGNEDP nonzero means unsigned multiply.
3590
3591    MAX_COST is the total allowed cost for the expanded RTL.  */
3592
3593 static rtx
3594 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3595                       rtx target, int unsignedp, int max_cost)
3596 {
3597   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3598   unsigned HOST_WIDE_INT cnst1;
3599   int extra_cost;
3600   bool sign_adjust = false;
3601   enum mult_variant variant;
3602   struct algorithm alg;
3603   rtx tem;
3604   bool speed = optimize_insn_for_speed_p ();
3605
3606   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3607   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3608   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3609
3610   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3611
3612   /* We can't optimize modes wider than BITS_PER_WORD.
3613      ??? We might be able to perform double-word arithmetic if
3614      mode == word_mode, however all the cost calculations in
3615      synth_mult etc. assume single-word operations.  */
3616   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3617     return expmed_mult_highpart_optab (mode, op0, op1, target,
3618                                        unsignedp, max_cost);
3619
3620   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3621
3622   /* Check whether we try to multiply by a negative constant.  */
3623   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3624     {
3625       sign_adjust = true;
3626       extra_cost += add_cost (speed, mode);
3627     }
3628
3629   /* See whether shift/add multiplication is cheap enough.  */
3630   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3631                            max_cost - extra_cost))
3632     {
3633       /* See whether the specialized multiplication optabs are
3634          cheaper than the shift/add version.  */
3635       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3636                                         alg.cost.cost + extra_cost);
3637       if (tem)
3638         return tem;
3639
3640       tem = convert_to_mode (wider_mode, op0, unsignedp);
3641       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3642       tem = extract_high_half (mode, tem);
3643
3644       /* Adjust result for signedness.  */
3645       if (sign_adjust)
3646         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3647
3648       return tem;
3649     }
3650   return expmed_mult_highpart_optab (mode, op0, op1, target,
3651                                      unsignedp, max_cost);
3652 }
3653
3654
3655 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3656
3657 static rtx
3658 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3659 {
3660   rtx result, temp, shift;
3661   rtx_code_label *label;
3662   int logd;
3663   int prec = GET_MODE_PRECISION (mode);
3664
3665   logd = floor_log2 (d);
3666   result = gen_reg_rtx (mode);
3667
3668   /* Avoid conditional branches when they're expensive.  */
3669   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3670       && optimize_insn_for_speed_p ())
3671     {
3672       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3673                                       mode, 0, -1);
3674       if (signmask)
3675         {
3676           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3677           signmask = force_reg (mode, signmask);
3678           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3679
3680           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3681              which instruction sequence to use.  If logical right shifts
3682              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3683              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3684
3685           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3686           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3687               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3688                   > COSTS_N_INSNS (2)))
3689             {
3690               temp = expand_binop (mode, xor_optab, op0, signmask,
3691                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3692               temp = expand_binop (mode, sub_optab, temp, signmask,
3693                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3694               temp = expand_binop (mode, and_optab, temp,
3695                                    gen_int_mode (masklow, mode),
3696                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3697               temp = expand_binop (mode, xor_optab, temp, signmask,
3698                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3699               temp = expand_binop (mode, sub_optab, temp, signmask,
3700                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3701             }
3702           else
3703             {
3704               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3705                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3706               signmask = force_reg (mode, signmask);
3707
3708               temp = expand_binop (mode, add_optab, op0, signmask,
3709                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3710               temp = expand_binop (mode, and_optab, temp,
3711                                    gen_int_mode (masklow, mode),
3712                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3713               temp = expand_binop (mode, sub_optab, temp, signmask,
3714                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3715             }
3716           return temp;
3717         }
3718     }
3719
3720   /* Mask contains the mode's signbit and the significant bits of the
3721      modulus.  By including the signbit in the operation, many targets
3722      can avoid an explicit compare operation in the following comparison
3723      against zero.  */
3724   wide_int mask = wi::mask (logd, false, prec);
3725   mask = wi::set_bit (mask, prec - 1);
3726
3727   temp = expand_binop (mode, and_optab, op0,
3728                        immed_wide_int_const (mask, mode),
3729                        result, 1, OPTAB_LIB_WIDEN);
3730   if (temp != result)
3731     emit_move_insn (result, temp);
3732
3733   label = gen_label_rtx ();
3734   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3735
3736   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3737                        0, OPTAB_LIB_WIDEN);
3738
3739   mask = wi::mask (logd, true, prec);
3740   temp = expand_binop (mode, ior_optab, temp,
3741                        immed_wide_int_const (mask, mode),
3742                        result, 1, OPTAB_LIB_WIDEN);
3743   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3744                        0, OPTAB_LIB_WIDEN);
3745   if (temp != result)
3746     emit_move_insn (result, temp);
3747   emit_label (label);
3748   return result;
3749 }
3750
3751 /* Expand signed division of OP0 by a power of two D in mode MODE.
3752    This routine is only called for positive values of D.  */
3753
3754 static rtx
3755 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3756 {
3757   rtx temp;
3758   rtx_code_label *label;
3759   int logd;
3760
3761   logd = floor_log2 (d);
3762
3763   if (d == 2
3764       && BRANCH_COST (optimize_insn_for_speed_p (),
3765                       false) >= 1)
3766     {
3767       temp = gen_reg_rtx (mode);
3768       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3769       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3770                            0, OPTAB_LIB_WIDEN);
3771       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3772     }
3773
3774 #ifdef HAVE_conditional_move
3775   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3776       >= 2)
3777     {
3778       rtx temp2;
3779
3780       start_sequence ();
3781       temp2 = copy_to_mode_reg (mode, op0);
3782       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3783                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3784       temp = force_reg (mode, temp);
3785
3786       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3787       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3788                                      mode, temp, temp2, mode, 0);
3789       if (temp2)
3790         {
3791           rtx_insn *seq = get_insns ();
3792           end_sequence ();
3793           emit_insn (seq);
3794           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3795         }
3796       end_sequence ();
3797     }
3798 #endif
3799
3800   if (BRANCH_COST (optimize_insn_for_speed_p (),
3801                    false) >= 2)
3802     {
3803       int ushift = GET_MODE_BITSIZE (mode) - logd;
3804
3805       temp = gen_reg_rtx (mode);
3806       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3807       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3808           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3809              > COSTS_N_INSNS (1))
3810         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3811                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3812       else
3813         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3814                              ushift, NULL_RTX, 1);
3815       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3816                            0, OPTAB_LIB_WIDEN);
3817       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3818     }
3819
3820   label = gen_label_rtx ();
3821   temp = copy_to_mode_reg (mode, op0);
3822   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3823   expand_inc (temp, gen_int_mode (d - 1, mode));
3824   emit_label (label);
3825   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3826 }
3827 \f
3828 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3829    if that is convenient, and returning where the result is.
3830    You may request either the quotient or the remainder as the result;
3831    specify REM_FLAG nonzero to get the remainder.
3832
3833    CODE is the expression code for which kind of division this is;
3834    it controls how rounding is done.  MODE is the machine mode to use.
3835    UNSIGNEDP nonzero means do unsigned division.  */
3836
3837 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3838    and then correct it by or'ing in missing high bits
3839    if result of ANDI is nonzero.
3840    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3841    This could optimize to a bfexts instruction.
3842    But C doesn't use these operations, so their optimizations are
3843    left for later.  */
3844 /* ??? For modulo, we don't actually need the highpart of the first product,
3845    the low part will do nicely.  And for small divisors, the second multiply
3846    can also be a low-part only multiply or even be completely left out.
3847    E.g. to calculate the remainder of a division by 3 with a 32 bit
3848    multiply, multiply with 0x55555556 and extract the upper two bits;
3849    the result is exact for inputs up to 0x1fffffff.
3850    The input range can be reduced by using cross-sum rules.
3851    For odd divisors >= 3, the following table gives right shift counts
3852    so that if a number is shifted by an integer multiple of the given
3853    amount, the remainder stays the same:
3854    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3855    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3856    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3857    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3858    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3859
3860    Cross-sum rules for even numbers can be derived by leaving as many bits
3861    to the right alone as the divisor has zeros to the right.
3862    E.g. if x is an unsigned 32 bit number:
3863    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3864    */
3865
3866 rtx
3867 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3868                rtx op0, rtx op1, rtx target, int unsignedp)
3869 {
3870   machine_mode compute_mode;
3871   rtx tquotient;
3872   rtx quotient = 0, remainder = 0;
3873   rtx_insn *last;
3874   int size;
3875   rtx_insn *insn;
3876   optab optab1, optab2;
3877   int op1_is_constant, op1_is_pow2 = 0;
3878   int max_cost, extra_cost;
3879   static HOST_WIDE_INT last_div_const = 0;
3880   bool speed = optimize_insn_for_speed_p ();
3881
3882   op1_is_constant = CONST_INT_P (op1);
3883   if (op1_is_constant)
3884     {
3885       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3886       if (unsignedp)
3887         ext_op1 &= GET_MODE_MASK (mode);
3888       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3889                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3890     }
3891
3892   /*
3893      This is the structure of expand_divmod:
3894
3895      First comes code to fix up the operands so we can perform the operations
3896      correctly and efficiently.
3897
3898      Second comes a switch statement with code specific for each rounding mode.
3899      For some special operands this code emits all RTL for the desired
3900      operation, for other cases, it generates only a quotient and stores it in
3901      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3902      to indicate that it has not done anything.
3903
3904      Last comes code that finishes the operation.  If QUOTIENT is set and
3905      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3906      QUOTIENT is not set, it is computed using trunc rounding.
3907
3908      We try to generate special code for division and remainder when OP1 is a
3909      constant.  If |OP1| = 2**n we can use shifts and some other fast
3910      operations.  For other values of OP1, we compute a carefully selected
3911      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3912      by m.
3913
3914      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3915      half of the product.  Different strategies for generating the product are
3916      implemented in expmed_mult_highpart.
3917
3918      If what we actually want is the remainder, we generate that by another
3919      by-constant multiplication and a subtraction.  */
3920
3921   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3922      code below will malfunction if we are, so check here and handle
3923      the special case if so.  */
3924   if (op1 == const1_rtx)
3925     return rem_flag ? const0_rtx : op0;
3926
3927     /* When dividing by -1, we could get an overflow.
3928      negv_optab can handle overflows.  */
3929   if (! unsignedp && op1 == constm1_rtx)
3930     {
3931       if (rem_flag)
3932         return const0_rtx;
3933       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3934                           ? negv_optab : neg_optab, op0, target, 0);
3935     }
3936
3937   if (target
3938       /* Don't use the function value register as a target
3939          since we have to read it as well as write it,
3940          and function-inlining gets confused by this.  */
3941       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3942           /* Don't clobber an operand while doing a multi-step calculation.  */
3943           || ((rem_flag || op1_is_constant)
3944               && (reg_mentioned_p (target, op0)
3945                   || (MEM_P (op0) && MEM_P (target))))
3946           || reg_mentioned_p (target, op1)
3947           || (MEM_P (op1) && MEM_P (target))))
3948     target = 0;
3949
3950   /* Get the mode in which to perform this computation.  Normally it will
3951      be MODE, but sometimes we can't do the desired operation in MODE.
3952      If so, pick a wider mode in which we can do the operation.  Convert
3953      to that mode at the start to avoid repeated conversions.
3954
3955      First see what operations we need.  These depend on the expression
3956      we are evaluating.  (We assume that divxx3 insns exist under the
3957      same conditions that modxx3 insns and that these insns don't normally
3958      fail.  If these assumptions are not correct, we may generate less
3959      efficient code in some cases.)
3960
3961      Then see if we find a mode in which we can open-code that operation
3962      (either a division, modulus, or shift).  Finally, check for the smallest
3963      mode for which we can do the operation with a library call.  */
3964
3965   /* We might want to refine this now that we have division-by-constant
3966      optimization.  Since expmed_mult_highpart tries so many variants, it is
3967      not straightforward to generalize this.  Maybe we should make an array
3968      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3969
3970   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3971             ? (unsignedp ? lshr_optab : ashr_optab)
3972             : (unsignedp ? udiv_optab : sdiv_optab));
3973   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3974             ? optab1
3975             : (unsignedp ? udivmod_optab : sdivmod_optab));
3976
3977   for (compute_mode = mode; compute_mode != VOIDmode;
3978        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3979     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3980         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3981       break;
3982
3983   if (compute_mode == VOIDmode)
3984     for (compute_mode = mode; compute_mode != VOIDmode;
3985          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3986       if (optab_libfunc (optab1, compute_mode)
3987           || optab_libfunc (optab2, compute_mode))
3988         break;
3989
3990   /* If we still couldn't find a mode, use MODE, but expand_binop will
3991      probably die.  */
3992   if (compute_mode == VOIDmode)
3993     compute_mode = mode;
3994
3995   if (target && GET_MODE (target) == compute_mode)
3996     tquotient = target;
3997   else
3998     tquotient = gen_reg_rtx (compute_mode);
3999
4000   size = GET_MODE_BITSIZE (compute_mode);
4001 #if 0
4002   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4003      (mode), and thereby get better code when OP1 is a constant.  Do that
4004      later.  It will require going over all usages of SIZE below.  */
4005   size = GET_MODE_BITSIZE (mode);
4006 #endif
4007
4008   /* Only deduct something for a REM if the last divide done was
4009      for a different constant.   Then set the constant of the last
4010      divide.  */
4011   max_cost = (unsignedp
4012               ? udiv_cost (speed, compute_mode)
4013               : sdiv_cost (speed, compute_mode));
4014   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4015                      && INTVAL (op1) == last_div_const))
4016     max_cost -= (mul_cost (speed, compute_mode)
4017                  + add_cost (speed, compute_mode));
4018
4019   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4020
4021   /* Now convert to the best mode to use.  */
4022   if (compute_mode != mode)
4023     {
4024       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4025       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4026
4027       /* convert_modes may have placed op1 into a register, so we
4028          must recompute the following.  */
4029       op1_is_constant = CONST_INT_P (op1);
4030       op1_is_pow2 = (op1_is_constant
4031                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4032                           || (! unsignedp
4033                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4034     }
4035
4036   /* If one of the operands is a volatile MEM, copy it into a register.  */
4037
4038   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4039     op0 = force_reg (compute_mode, op0);
4040   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4041     op1 = force_reg (compute_mode, op1);
4042
4043   /* If we need the remainder or if OP1 is constant, we need to
4044      put OP0 in a register in case it has any queued subexpressions.  */
4045   if (rem_flag || op1_is_constant)
4046     op0 = force_reg (compute_mode, op0);
4047
4048   last = get_last_insn ();
4049
4050   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4051   if (unsignedp)
4052     {
4053       if (code == FLOOR_DIV_EXPR)
4054         code = TRUNC_DIV_EXPR;
4055       if (code == FLOOR_MOD_EXPR)
4056         code = TRUNC_MOD_EXPR;
4057       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4058         code = TRUNC_DIV_EXPR;
4059     }
4060
4061   if (op1 != const0_rtx)
4062     switch (code)
4063       {
4064       case TRUNC_MOD_EXPR:
4065       case TRUNC_DIV_EXPR:
4066         if (op1_is_constant)
4067           {
4068             if (unsignedp)
4069               {
4070                 unsigned HOST_WIDE_INT mh, ml;
4071                 int pre_shift, post_shift;
4072                 int dummy;
4073                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4074                                             & GET_MODE_MASK (compute_mode));
4075
4076                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4077                   {
4078                     pre_shift = floor_log2 (d);
4079                     if (rem_flag)
4080                       {
4081                         unsigned HOST_WIDE_INT mask
4082                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4083                         remainder
4084                           = expand_binop (compute_mode, and_optab, op0,
4085                                           gen_int_mode (mask, compute_mode),
4086                                           remainder, 1,
4087                                           OPTAB_LIB_WIDEN);
4088                         if (remainder)
4089                           return gen_lowpart (mode, remainder);
4090                       }
4091                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4092                                              pre_shift, tquotient, 1);
4093                   }
4094                 else if (size <= HOST_BITS_PER_WIDE_INT)
4095                   {
4096                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4097                       {
4098                         /* Most significant bit of divisor is set; emit an scc
4099                            insn.  */
4100                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4101                                                           compute_mode, 1, 1);
4102                       }
4103                     else
4104                       {
4105                         /* Find a suitable multiplier and right shift count
4106                            instead of multiplying with D.  */
4107
4108                         mh = choose_multiplier (d, size, size,
4109                                                 &ml, &post_shift, &dummy);
4110
4111                         /* If the suggested multiplier is more than SIZE bits,
4112                            we can do better for even divisors, using an
4113                            initial right shift.  */
4114                         if (mh != 0 && (d & 1) == 0)
4115                           {
4116                             pre_shift = floor_log2 (d & -d);
4117                             mh = choose_multiplier (d >> pre_shift, size,
4118                                                     size - pre_shift,
4119                                                     &ml, &post_shift, &dummy);
4120                             gcc_assert (!mh);
4121                           }
4122                         else
4123                           pre_shift = 0;
4124
4125                         if (mh != 0)
4126                           {
4127                             rtx t1, t2, t3, t4;
4128
4129                             if (post_shift - 1 >= BITS_PER_WORD)
4130                               goto fail1;
4131
4132                             extra_cost
4133                               = (shift_cost (speed, compute_mode, post_shift - 1)
4134                                  + shift_cost (speed, compute_mode, 1)
4135                                  + 2 * add_cost (speed, compute_mode));
4136                             t1 = expmed_mult_highpart
4137                               (compute_mode, op0,
4138                                gen_int_mode (ml, compute_mode),
4139                                NULL_RTX, 1, max_cost - extra_cost);
4140                             if (t1 == 0)
4141                               goto fail1;
4142                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4143                                                                op0, t1),
4144                                                 NULL_RTX);
4145                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4146                                                t2, 1, NULL_RTX, 1);
4147                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4148                                                               t1, t3),
4149                                                 NULL_RTX);
4150                             quotient = expand_shift
4151                               (RSHIFT_EXPR, compute_mode, t4,
4152                                post_shift - 1, tquotient, 1);
4153                           }
4154                         else
4155                           {
4156                             rtx t1, t2;
4157
4158                             if (pre_shift >= BITS_PER_WORD
4159                                 || post_shift >= BITS_PER_WORD)
4160                               goto fail1;
4161
4162                             t1 = expand_shift
4163                               (RSHIFT_EXPR, compute_mode, op0,
4164                                pre_shift, NULL_RTX, 1);
4165                             extra_cost
4166                               = (shift_cost (speed, compute_mode, pre_shift)
4167                                  + shift_cost (speed, compute_mode, post_shift));
4168                             t2 = expmed_mult_highpart
4169                               (compute_mode, t1,
4170                                gen_int_mode (ml, compute_mode),
4171                                NULL_RTX, 1, max_cost - extra_cost);
4172                             if (t2 == 0)
4173                               goto fail1;
4174                             quotient = expand_shift
4175                               (RSHIFT_EXPR, compute_mode, t2,
4176                                post_shift, tquotient, 1);
4177                           }
4178                       }
4179                   }
4180                 else            /* Too wide mode to use tricky code */
4181                   break;
4182
4183                 insn = get_last_insn ();
4184                 if (insn != last)
4185                   set_dst_reg_note (insn, REG_EQUAL,
4186                                     gen_rtx_UDIV (compute_mode, op0, op1),
4187                                     quotient);
4188               }
4189             else                /* TRUNC_DIV, signed */
4190               {
4191                 unsigned HOST_WIDE_INT ml;
4192                 int lgup, post_shift;
4193                 rtx mlr;
4194                 HOST_WIDE_INT d = INTVAL (op1);
4195                 unsigned HOST_WIDE_INT abs_d;
4196
4197                 /* Since d might be INT_MIN, we have to cast to
4198                    unsigned HOST_WIDE_INT before negating to avoid
4199                    undefined signed overflow.  */
4200                 abs_d = (d >= 0
4201                          ? (unsigned HOST_WIDE_INT) d
4202                          : - (unsigned HOST_WIDE_INT) d);
4203
4204                 /* n rem d = n rem -d */
4205                 if (rem_flag && d < 0)
4206                   {
4207                     d = abs_d;
4208                     op1 = gen_int_mode (abs_d, compute_mode);
4209                   }
4210
4211                 if (d == 1)
4212                   quotient = op0;
4213                 else if (d == -1)
4214                   quotient = expand_unop (compute_mode, neg_optab, op0,
4215                                           tquotient, 0);
4216                 else if (HOST_BITS_PER_WIDE_INT >= size
4217                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4218                   {
4219                     /* This case is not handled correctly below.  */
4220                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4221                                                 compute_mode, 1, 1);
4222                     if (quotient == 0)
4223                       goto fail1;
4224                   }
4225                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4226                          && (rem_flag
4227                              ? smod_pow2_cheap (speed, compute_mode)
4228                              : sdiv_pow2_cheap (speed, compute_mode))
4229                          /* We assume that cheap metric is true if the
4230                             optab has an expander for this mode.  */
4231                          && ((optab_handler ((rem_flag ? smod_optab
4232                                               : sdiv_optab),
4233                                              compute_mode)
4234                               != CODE_FOR_nothing)
4235                              || (optab_handler (sdivmod_optab,
4236                                                 compute_mode)
4237                                  != CODE_FOR_nothing)))
4238                   ;
4239                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4240                   {
4241                     if (rem_flag)
4242                       {
4243                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4244                         if (remainder)
4245                           return gen_lowpart (mode, remainder);
4246                       }
4247
4248                     if (sdiv_pow2_cheap (speed, compute_mode)
4249                         && ((optab_handler (sdiv_optab, compute_mode)
4250                              != CODE_FOR_nothing)
4251                             || (optab_handler (sdivmod_optab, compute_mode)
4252                                 != CODE_FOR_nothing)))
4253                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4254                                                 compute_mode, op0,
4255                                                 gen_int_mode (abs_d,
4256                                                               compute_mode),
4257                                                 NULL_RTX, 0);
4258                     else
4259                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4260
4261                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4262                        negate the quotient.  */
4263                     if (d < 0)
4264                       {
4265                         insn = get_last_insn ();
4266                         if (insn != last
4267                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4268                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4269                           set_dst_reg_note (insn, REG_EQUAL,
4270                                             gen_rtx_DIV (compute_mode, op0,
4271                                                          gen_int_mode
4272                                                            (abs_d,
4273                                                             compute_mode)),
4274                                             quotient);
4275
4276                         quotient = expand_unop (compute_mode, neg_optab,
4277                                                 quotient, quotient, 0);
4278                       }
4279                   }
4280                 else if (size <= HOST_BITS_PER_WIDE_INT)
4281                   {
4282                     choose_multiplier (abs_d, size, size - 1,
4283                                        &ml, &post_shift, &lgup);
4284                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4285                       {
4286                         rtx t1, t2, t3;
4287
4288                         if (post_shift >= BITS_PER_WORD
4289                             || size - 1 >= BITS_PER_WORD)
4290                           goto fail1;
4291
4292                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4293                                       + shift_cost (speed, compute_mode, size - 1)
4294                                       + add_cost (speed, compute_mode));
4295                         t1 = expmed_mult_highpart
4296                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4297                            NULL_RTX, 0, max_cost - extra_cost);
4298                         if (t1 == 0)
4299                           goto fail1;
4300                         t2 = expand_shift
4301                           (RSHIFT_EXPR, compute_mode, t1,
4302                            post_shift, NULL_RTX, 0);
4303                         t3 = expand_shift
4304                           (RSHIFT_EXPR, compute_mode, op0,
4305                            size - 1, NULL_RTX, 0);
4306                         if (d < 0)
4307                           quotient
4308                             = force_operand (gen_rtx_MINUS (compute_mode,
4309                                                             t3, t2),
4310                                              tquotient);
4311                         else
4312                           quotient
4313                             = force_operand (gen_rtx_MINUS (compute_mode,
4314                                                             t2, t3),
4315                                              tquotient);
4316                       }
4317                     else
4318                       {
4319                         rtx t1, t2, t3, t4;
4320
4321                         if (post_shift >= BITS_PER_WORD
4322                             || size - 1 >= BITS_PER_WORD)
4323                           goto fail1;
4324
4325                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4326                         mlr = gen_int_mode (ml, compute_mode);
4327                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4328                                       + shift_cost (speed, compute_mode, size - 1)
4329                                       + 2 * add_cost (speed, compute_mode));
4330                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4331                                                    NULL_RTX, 0,
4332                                                    max_cost - extra_cost);
4333                         if (t1 == 0)
4334                           goto fail1;
4335                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4336                                                           t1, op0),
4337                                             NULL_RTX);
4338                         t3 = expand_shift
4339                           (RSHIFT_EXPR, compute_mode, t2,
4340                            post_shift, NULL_RTX, 0);
4341                         t4 = expand_shift
4342                           (RSHIFT_EXPR, compute_mode, op0,
4343                            size - 1, NULL_RTX, 0);
4344                         if (d < 0)
4345                           quotient
4346                             = force_operand (gen_rtx_MINUS (compute_mode,
4347                                                             t4, t3),
4348                                              tquotient);
4349                         else
4350                           quotient
4351                             = force_operand (gen_rtx_MINUS (compute_mode,
4352                                                             t3, t4),
4353                                              tquotient);
4354                       }
4355                   }
4356                 else            /* Too wide mode to use tricky code */
4357                   break;
4358
4359                 insn = get_last_insn ();
4360                 if (insn != last)
4361                   set_dst_reg_note (insn, REG_EQUAL,
4362                                     gen_rtx_DIV (compute_mode, op0, op1),
4363                                     quotient);
4364               }
4365             break;
4366           }
4367       fail1:
4368         delete_insns_since (last);
4369         break;
4370
4371       case FLOOR_DIV_EXPR:
4372       case FLOOR_MOD_EXPR:
4373       /* We will come here only for signed operations.  */
4374         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4375           {
4376             unsigned HOST_WIDE_INT mh, ml;
4377             int pre_shift, lgup, post_shift;
4378             HOST_WIDE_INT d = INTVAL (op1);
4379
4380             if (d > 0)
4381               {
4382                 /* We could just as easily deal with negative constants here,
4383                    but it does not seem worth the trouble for GCC 2.6.  */
4384                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4385                   {
4386                     pre_shift = floor_log2 (d);
4387                     if (rem_flag)
4388                       {
4389                         unsigned HOST_WIDE_INT mask
4390                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4391                         remainder = expand_binop
4392                           (compute_mode, and_optab, op0,
4393                            gen_int_mode (mask, compute_mode),
4394                            remainder, 0, OPTAB_LIB_WIDEN);
4395                         if (remainder)
4396                           return gen_lowpart (mode, remainder);
4397                       }
4398                     quotient = expand_shift
4399                       (RSHIFT_EXPR, compute_mode, op0,
4400                        pre_shift, tquotient, 0);
4401                   }
4402                 else
4403                   {
4404                     rtx t1, t2, t3, t4;
4405
4406                     mh = choose_multiplier (d, size, size - 1,
4407                                             &ml, &post_shift, &lgup);
4408                     gcc_assert (!mh);
4409
4410                     if (post_shift < BITS_PER_WORD
4411                         && size - 1 < BITS_PER_WORD)
4412                       {
4413                         t1 = expand_shift
4414                           (RSHIFT_EXPR, compute_mode, op0,
4415                            size - 1, NULL_RTX, 0);
4416                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4417                                            NULL_RTX, 0, OPTAB_WIDEN);
4418                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4419                                       + shift_cost (speed, compute_mode, size - 1)
4420                                       + 2 * add_cost (speed, compute_mode));
4421                         t3 = expmed_mult_highpart
4422                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4423                            NULL_RTX, 1, max_cost - extra_cost);
4424                         if (t3 != 0)
4425                           {
4426                             t4 = expand_shift
4427                               (RSHIFT_EXPR, compute_mode, t3,
4428                                post_shift, NULL_RTX, 1);
4429                             quotient = expand_binop (compute_mode, xor_optab,
4430                                                      t4, t1, tquotient, 0,
4431                                                      OPTAB_WIDEN);
4432                           }
4433                       }
4434                   }
4435               }
4436             else
4437               {
4438                 rtx nsign, t1, t2, t3, t4;
4439                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4440                                                   op0, constm1_rtx), NULL_RTX);
4441                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4442                                    0, OPTAB_WIDEN);
4443                 nsign = expand_shift
4444                   (RSHIFT_EXPR, compute_mode, t2,
4445                    size - 1, NULL_RTX, 0);
4446                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4447                                     NULL_RTX);
4448                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4449                                     NULL_RTX, 0);
4450                 if (t4)
4451                   {
4452                     rtx t5;
4453                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4454                                       NULL_RTX, 0);
4455                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4456                                                             t4, t5),
4457                                               tquotient);
4458                   }
4459               }
4460           }
4461
4462         if (quotient != 0)
4463           break;
4464         delete_insns_since (last);
4465
4466         /* Try using an instruction that produces both the quotient and
4467            remainder, using truncation.  We can easily compensate the quotient
4468            or remainder to get floor rounding, once we have the remainder.
4469            Notice that we compute also the final remainder value here,
4470            and return the result right away.  */
4471         if (target == 0 || GET_MODE (target) != compute_mode)
4472           target = gen_reg_rtx (compute_mode);
4473
4474         if (rem_flag)
4475           {
4476             remainder
4477               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4478             quotient = gen_reg_rtx (compute_mode);
4479           }
4480         else
4481           {
4482             quotient
4483               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4484             remainder = gen_reg_rtx (compute_mode);
4485           }
4486
4487         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4488                                  quotient, remainder, 0))
4489           {
4490             /* This could be computed with a branch-less sequence.
4491                Save that for later.  */
4492             rtx tem;
4493             rtx_code_label *label = gen_label_rtx ();
4494             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4495             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4496                                 NULL_RTX, 0, OPTAB_WIDEN);
4497             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4498             expand_dec (quotient, const1_rtx);
4499             expand_inc (remainder, op1);
4500             emit_label (label);
4501             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4502           }
4503
4504         /* No luck with division elimination or divmod.  Have to do it
4505            by conditionally adjusting op0 *and* the result.  */
4506         {
4507           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4508           rtx adjusted_op0;
4509           rtx tem;
4510
4511           quotient = gen_reg_rtx (compute_mode);
4512           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4513           label1 = gen_label_rtx ();
4514           label2 = gen_label_rtx ();
4515           label3 = gen_label_rtx ();
4516           label4 = gen_label_rtx ();
4517           label5 = gen_label_rtx ();
4518           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4519           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4520           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4521                               quotient, 0, OPTAB_LIB_WIDEN);
4522           if (tem != quotient)
4523             emit_move_insn (quotient, tem);
4524           emit_jump_insn (gen_jump (label5));
4525           emit_barrier ();
4526           emit_label (label1);
4527           expand_inc (adjusted_op0, const1_rtx);
4528           emit_jump_insn (gen_jump (label4));
4529           emit_barrier ();
4530           emit_label (label2);
4531           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4532           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4533                               quotient, 0, OPTAB_LIB_WIDEN);
4534           if (tem != quotient)
4535             emit_move_insn (quotient, tem);
4536           emit_jump_insn (gen_jump (label5));
4537           emit_barrier ();
4538           emit_label (label3);
4539           expand_dec (adjusted_op0, const1_rtx);
4540           emit_label (label4);
4541           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4542                               quotient, 0, OPTAB_LIB_WIDEN);
4543           if (tem != quotient)
4544             emit_move_insn (quotient, tem);
4545           expand_dec (quotient, const1_rtx);
4546           emit_label (label5);
4547         }
4548         break;
4549
4550       case CEIL_DIV_EXPR:
4551       case CEIL_MOD_EXPR:
4552         if (unsignedp)
4553           {
4554             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4555               {
4556                 rtx t1, t2, t3;
4557                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4558                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4559                                    floor_log2 (d), tquotient, 1);
4560                 t2 = expand_binop (compute_mode, and_optab, op0,
4561                                    gen_int_mode (d - 1, compute_mode),
4562                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4563                 t3 = gen_reg_rtx (compute_mode);
4564                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4565                                       compute_mode, 1, 1);
4566                 if (t3 == 0)
4567                   {
4568                     rtx_code_label *lab;
4569                     lab = gen_label_rtx ();
4570                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4571                     expand_inc (t1, const1_rtx);
4572                     emit_label (lab);
4573                     quotient = t1;
4574                   }
4575                 else
4576                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4577                                                           t1, t3),
4578                                             tquotient);
4579                 break;
4580               }
4581
4582             /* Try using an instruction that produces both the quotient and
4583                remainder, using truncation.  We can easily compensate the
4584                quotient or remainder to get ceiling rounding, once we have the
4585                remainder.  Notice that we compute also the final remainder
4586                value here, and return the result right away.  */
4587             if (target == 0 || GET_MODE (target) != compute_mode)
4588               target = gen_reg_rtx (compute_mode);
4589
4590             if (rem_flag)
4591               {
4592                 remainder = (REG_P (target)
4593                              ? target : gen_reg_rtx (compute_mode));
4594                 quotient = gen_reg_rtx (compute_mode);
4595               }
4596             else
4597               {
4598                 quotient = (REG_P (target)
4599                             ? target : gen_reg_rtx (compute_mode));
4600                 remainder = gen_reg_rtx (compute_mode);
4601               }
4602
4603             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4604                                      remainder, 1))
4605               {
4606                 /* This could be computed with a branch-less sequence.
4607                    Save that for later.  */
4608                 rtx_code_label *label = gen_label_rtx ();
4609                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4610                                  compute_mode, label);
4611                 expand_inc (quotient, const1_rtx);
4612                 expand_dec (remainder, op1);
4613                 emit_label (label);
4614                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4615               }
4616
4617             /* No luck with division elimination or divmod.  Have to do it
4618                by conditionally adjusting op0 *and* the result.  */
4619             {
4620               rtx_code_label *label1, *label2;
4621               rtx adjusted_op0, tem;
4622
4623               quotient = gen_reg_rtx (compute_mode);
4624               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4625               label1 = gen_label_rtx ();
4626               label2 = gen_label_rtx ();
4627               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4628                                compute_mode, label1);
4629               emit_move_insn  (quotient, const0_rtx);
4630               emit_jump_insn (gen_jump (label2));
4631               emit_barrier ();
4632               emit_label (label1);
4633               expand_dec (adjusted_op0, const1_rtx);
4634               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4635                                   quotient, 1, OPTAB_LIB_WIDEN);
4636               if (tem != quotient)
4637                 emit_move_insn (quotient, tem);
4638               expand_inc (quotient, const1_rtx);
4639               emit_label (label2);
4640             }
4641           }
4642         else /* signed */
4643           {
4644             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4645                 && INTVAL (op1) >= 0)
4646               {
4647                 /* This is extremely similar to the code for the unsigned case
4648                    above.  For 2.7 we should merge these variants, but for
4649                    2.6.1 I don't want to touch the code for unsigned since that
4650                    get used in C.  The signed case will only be used by other
4651                    languages (Ada).  */
4652
4653                 rtx t1, t2, t3;
4654                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4655                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4656                                    floor_log2 (d), tquotient, 0);
4657                 t2 = expand_binop (compute_mode, and_optab, op0,
4658                                    gen_int_mode (d - 1, compute_mode),
4659                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4660                 t3 = gen_reg_rtx (compute_mode);
4661                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4662                                       compute_mode, 1, 1);
4663                 if (t3 == 0)
4664                   {
4665                     rtx_code_label *lab;
4666                     lab = gen_label_rtx ();
4667                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4668                     expand_inc (t1, const1_rtx);
4669                     emit_label (lab);
4670                     quotient = t1;
4671                   }
4672                 else
4673                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4674                                                           t1, t3),
4675                                             tquotient);
4676                 break;
4677               }
4678
4679             /* Try using an instruction that produces both the quotient and
4680                remainder, using truncation.  We can easily compensate the
4681                quotient or remainder to get ceiling rounding, once we have the
4682                remainder.  Notice that we compute also the final remainder
4683                value here, and return the result right away.  */
4684             if (target == 0 || GET_MODE (target) != compute_mode)
4685               target = gen_reg_rtx (compute_mode);
4686             if (rem_flag)
4687               {
4688                 remainder= (REG_P (target)
4689                             ? target : gen_reg_rtx (compute_mode));
4690                 quotient = gen_reg_rtx (compute_mode);
4691               }
4692             else
4693               {
4694                 quotient = (REG_P (target)
4695                             ? target : gen_reg_rtx (compute_mode));
4696                 remainder = gen_reg_rtx (compute_mode);
4697               }
4698
4699             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4700                                      remainder, 0))
4701               {
4702                 /* This could be computed with a branch-less sequence.
4703                    Save that for later.  */
4704                 rtx tem;
4705                 rtx_code_label *label = gen_label_rtx ();
4706                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4707                                  compute_mode, label);
4708                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4709                                     NULL_RTX, 0, OPTAB_WIDEN);
4710                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4711                 expand_inc (quotient, const1_rtx);
4712                 expand_dec (remainder, op1);
4713                 emit_label (label);
4714                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4715               }
4716
4717             /* No luck with division elimination or divmod.  Have to do it
4718                by conditionally adjusting op0 *and* the result.  */
4719             {
4720               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4721               rtx adjusted_op0;
4722               rtx tem;
4723
4724               quotient = gen_reg_rtx (compute_mode);
4725               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4726               label1 = gen_label_rtx ();
4727               label2 = gen_label_rtx ();
4728               label3 = gen_label_rtx ();
4729               label4 = gen_label_rtx ();
4730               label5 = gen_label_rtx ();
4731               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4732               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4733                                compute_mode, label1);
4734               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4735                                   quotient, 0, OPTAB_LIB_WIDEN);
4736               if (tem != quotient)
4737                 emit_move_insn (quotient, tem);
4738               emit_jump_insn (gen_jump (label5));
4739               emit_barrier ();
4740               emit_label (label1);
4741               expand_dec (adjusted_op0, const1_rtx);
4742               emit_jump_insn (gen_jump (label4));
4743               emit_barrier ();
4744               emit_label (label2);
4745               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4746                                compute_mode, label3);
4747               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4748                                   quotient, 0, OPTAB_LIB_WIDEN);
4749               if (tem != quotient)
4750                 emit_move_insn (quotient, tem);
4751               emit_jump_insn (gen_jump (label5));
4752               emit_barrier ();
4753               emit_label (label3);
4754               expand_inc (adjusted_op0, const1_rtx);
4755               emit_label (label4);
4756               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4757                                   quotient, 0, OPTAB_LIB_WIDEN);
4758               if (tem != quotient)
4759                 emit_move_insn (quotient, tem);
4760               expand_inc (quotient, const1_rtx);
4761               emit_label (label5);
4762             }
4763           }
4764         break;
4765
4766       case EXACT_DIV_EXPR:
4767         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4768           {
4769             HOST_WIDE_INT d = INTVAL (op1);
4770             unsigned HOST_WIDE_INT ml;
4771             int pre_shift;
4772             rtx t1;
4773
4774             pre_shift = floor_log2 (d & -d);
4775             ml = invert_mod2n (d >> pre_shift, size);
4776             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4777                                pre_shift, NULL_RTX, unsignedp);
4778             quotient = expand_mult (compute_mode, t1,
4779                                     gen_int_mode (ml, compute_mode),
4780                                     NULL_RTX, 1);
4781
4782             insn = get_last_insn ();
4783             set_dst_reg_note (insn, REG_EQUAL,
4784                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4785                                               compute_mode, op0, op1),
4786                               quotient);
4787           }
4788         break;
4789
4790       case ROUND_DIV_EXPR:
4791       case ROUND_MOD_EXPR:
4792         if (unsignedp)
4793           {
4794             rtx tem;
4795             rtx_code_label *label;
4796             label = gen_label_rtx ();
4797             quotient = gen_reg_rtx (compute_mode);
4798             remainder = gen_reg_rtx (compute_mode);
4799             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4800               {
4801                 rtx tem;
4802                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4803                                          quotient, 1, OPTAB_LIB_WIDEN);
4804                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4805                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4806                                           remainder, 1, OPTAB_LIB_WIDEN);
4807               }
4808             tem = plus_constant (compute_mode, op1, -1);
4809             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4810             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4811             expand_inc (quotient, const1_rtx);
4812             expand_dec (remainder, op1);
4813             emit_label (label);
4814           }
4815         else
4816           {
4817             rtx abs_rem, abs_op1, tem, mask;
4818             rtx_code_label *label;
4819             label = gen_label_rtx ();
4820             quotient = gen_reg_rtx (compute_mode);
4821             remainder = gen_reg_rtx (compute_mode);
4822             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4823               {
4824                 rtx tem;
4825                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4826                                          quotient, 0, OPTAB_LIB_WIDEN);
4827                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4828                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4829                                           remainder, 0, OPTAB_LIB_WIDEN);
4830               }
4831             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4832             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4833             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4834                                 1, NULL_RTX, 1);
4835             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4836             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4837                                 NULL_RTX, 0, OPTAB_WIDEN);
4838             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4839                                  size - 1, NULL_RTX, 0);
4840             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4841                                 NULL_RTX, 0, OPTAB_WIDEN);
4842             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4843                                 NULL_RTX, 0, OPTAB_WIDEN);
4844             expand_inc (quotient, tem);
4845             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4846                                 NULL_RTX, 0, OPTAB_WIDEN);
4847             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4848                                 NULL_RTX, 0, OPTAB_WIDEN);
4849             expand_dec (remainder, tem);
4850             emit_label (label);
4851           }
4852         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4853
4854       default:
4855         gcc_unreachable ();
4856       }
4857
4858   if (quotient == 0)
4859     {
4860       if (target && GET_MODE (target) != compute_mode)
4861         target = 0;
4862
4863       if (rem_flag)
4864         {
4865           /* Try to produce the remainder without producing the quotient.
4866              If we seem to have a divmod pattern that does not require widening,
4867              don't try widening here.  We should really have a WIDEN argument
4868              to expand_twoval_binop, since what we'd really like to do here is
4869              1) try a mod insn in compute_mode
4870              2) try a divmod insn in compute_mode
4871              3) try a div insn in compute_mode and multiply-subtract to get
4872                 remainder
4873              4) try the same things with widening allowed.  */
4874           remainder
4875             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4876                                  op0, op1, target,
4877                                  unsignedp,
4878                                  ((optab_handler (optab2, compute_mode)
4879                                    != CODE_FOR_nothing)
4880                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4881           if (remainder == 0)
4882             {
4883               /* No luck there.  Can we do remainder and divide at once
4884                  without a library call?  */
4885               remainder = gen_reg_rtx (compute_mode);
4886               if (! expand_twoval_binop ((unsignedp
4887                                           ? udivmod_optab
4888                                           : sdivmod_optab),
4889                                          op0, op1,
4890                                          NULL_RTX, remainder, unsignedp))
4891                 remainder = 0;
4892             }
4893
4894           if (remainder)
4895             return gen_lowpart (mode, remainder);
4896         }
4897
4898       /* Produce the quotient.  Try a quotient insn, but not a library call.
4899          If we have a divmod in this mode, use it in preference to widening
4900          the div (for this test we assume it will not fail). Note that optab2
4901          is set to the one of the two optabs that the call below will use.  */
4902       quotient
4903         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4904                              op0, op1, rem_flag ? NULL_RTX : target,
4905                              unsignedp,
4906                              ((optab_handler (optab2, compute_mode)
4907                                != CODE_FOR_nothing)
4908                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4909
4910       if (quotient == 0)
4911         {
4912           /* No luck there.  Try a quotient-and-remainder insn,
4913              keeping the quotient alone.  */
4914           quotient = gen_reg_rtx (compute_mode);
4915           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4916                                      op0, op1,
4917                                      quotient, NULL_RTX, unsignedp))
4918             {
4919               quotient = 0;
4920               if (! rem_flag)
4921                 /* Still no luck.  If we are not computing the remainder,
4922                    use a library call for the quotient.  */
4923                 quotient = sign_expand_binop (compute_mode,
4924                                               udiv_optab, sdiv_optab,
4925                                               op0, op1, target,
4926                                               unsignedp, OPTAB_LIB_WIDEN);
4927             }
4928         }
4929     }
4930
4931   if (rem_flag)
4932     {
4933       if (target && GET_MODE (target) != compute_mode)
4934         target = 0;
4935
4936       if (quotient == 0)
4937         {
4938           /* No divide instruction either.  Use library for remainder.  */
4939           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4940                                          op0, op1, target,
4941                                          unsignedp, OPTAB_LIB_WIDEN);
4942           /* No remainder function.  Try a quotient-and-remainder
4943              function, keeping the remainder.  */
4944           if (!remainder)
4945             {
4946               remainder = gen_reg_rtx (compute_mode);
4947               if (!expand_twoval_binop_libfunc
4948                   (unsignedp ? udivmod_optab : sdivmod_optab,
4949                    op0, op1,
4950                    NULL_RTX, remainder,
4951                    unsignedp ? UMOD : MOD))
4952                 remainder = NULL_RTX;
4953             }
4954         }
4955       else
4956         {
4957           /* We divided.  Now finish doing X - Y * (X / Y).  */
4958           remainder = expand_mult (compute_mode, quotient, op1,
4959                                    NULL_RTX, unsignedp);
4960           remainder = expand_binop (compute_mode, sub_optab, op0,
4961                                     remainder, target, unsignedp,
4962                                     OPTAB_LIB_WIDEN);
4963         }
4964     }
4965
4966   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4967 }
4968 \f
4969 /* Return a tree node with data type TYPE, describing the value of X.
4970    Usually this is an VAR_DECL, if there is no obvious better choice.
4971    X may be an expression, however we only support those expressions
4972    generated by loop.c.  */
4973
4974 tree
4975 make_tree (tree type, rtx x)
4976 {
4977   tree t;
4978
4979   switch (GET_CODE (x))
4980     {
4981     case CONST_INT:
4982     case CONST_WIDE_INT:
4983       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4984       return t;
4985
4986     case CONST_DOUBLE:
4987       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4988       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4989         t = wide_int_to_tree (type,
4990                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
4991                                                     HOST_BITS_PER_WIDE_INT * 2));
4992       else
4993         {
4994           REAL_VALUE_TYPE d;
4995
4996           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4997           t = build_real (type, d);
4998         }
4999
5000       return t;
5001
5002     case CONST_VECTOR:
5003       {
5004         int units = CONST_VECTOR_NUNITS (x);
5005         tree itype = TREE_TYPE (type);
5006         tree *elts;
5007         int i;
5008
5009         /* Build a tree with vector elements.  */
5010         elts = XALLOCAVEC (tree, units);
5011         for (i = units - 1; i >= 0; --i)
5012           {
5013             rtx elt = CONST_VECTOR_ELT (x, i);
5014             elts[i] = make_tree (itype, elt);
5015           }
5016
5017         return build_vector (type, elts);
5018       }
5019
5020     case PLUS:
5021       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5022                           make_tree (type, XEXP (x, 1)));
5023
5024     case MINUS:
5025       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5026                           make_tree (type, XEXP (x, 1)));
5027
5028     case NEG:
5029       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5030
5031     case MULT:
5032       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5033                           make_tree (type, XEXP (x, 1)));
5034
5035     case ASHIFT:
5036       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5037                           make_tree (type, XEXP (x, 1)));
5038
5039     case LSHIFTRT:
5040       t = unsigned_type_for (type);
5041       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5042                                          make_tree (t, XEXP (x, 0)),
5043                                          make_tree (type, XEXP (x, 1))));
5044
5045     case ASHIFTRT:
5046       t = signed_type_for (type);
5047       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5048                                          make_tree (t, XEXP (x, 0)),
5049                                          make_tree (type, XEXP (x, 1))));
5050
5051     case DIV:
5052       if (TREE_CODE (type) != REAL_TYPE)
5053         t = signed_type_for (type);
5054       else
5055         t = type;
5056
5057       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5058                                          make_tree (t, XEXP (x, 0)),
5059                                          make_tree (t, XEXP (x, 1))));
5060     case UDIV:
5061       t = unsigned_type_for (type);
5062       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5063                                          make_tree (t, XEXP (x, 0)),
5064                                          make_tree (t, XEXP (x, 1))));
5065
5066     case SIGN_EXTEND:
5067     case ZERO_EXTEND:
5068       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5069                                           GET_CODE (x) == ZERO_EXTEND);
5070       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5071
5072     case CONST:
5073       return make_tree (type, XEXP (x, 0));
5074
5075     case SYMBOL_REF:
5076       t = SYMBOL_REF_DECL (x);
5077       if (t)
5078         return fold_convert (type, build_fold_addr_expr (t));
5079       /* else fall through.  */
5080
5081     default:
5082       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5083
5084       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5085          address mode to pointer mode.  */
5086       if (POINTER_TYPE_P (type))
5087         x = convert_memory_address_addr_space
5088               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5089
5090       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5091          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5092       t->decl_with_rtl.rtl = x;
5093
5094       return t;
5095     }
5096 }
5097 \f
5098 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5099    and returning TARGET.
5100
5101    If TARGET is 0, a pseudo-register or constant is returned.  */
5102
5103 rtx
5104 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5105 {
5106   rtx tem = 0;
5107
5108   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5109     tem = simplify_binary_operation (AND, mode, op0, op1);
5110   if (tem == 0)
5111     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5112
5113   if (target == 0)
5114     target = tem;
5115   else if (tem != target)
5116     emit_move_insn (target, tem);
5117   return target;
5118 }
5119
5120 /* Helper function for emit_store_flag.  */
5121 rtx
5122 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5123              machine_mode mode, machine_mode compare_mode,
5124              int unsignedp, rtx x, rtx y, int normalizep,
5125              machine_mode target_mode)
5126 {
5127   struct expand_operand ops[4];
5128   rtx op0, comparison, subtarget;
5129   rtx_insn *last;
5130   machine_mode result_mode = targetm.cstore_mode (icode);
5131
5132   last = get_last_insn ();
5133   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5134   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5135   if (!x || !y)
5136     {
5137       delete_insns_since (last);
5138       return NULL_RTX;
5139     }
5140
5141   if (target_mode == VOIDmode)
5142     target_mode = result_mode;
5143   if (!target)
5144     target = gen_reg_rtx (target_mode);
5145
5146   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5147
5148   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5149   create_fixed_operand (&ops[1], comparison);
5150   create_fixed_operand (&ops[2], x);
5151   create_fixed_operand (&ops[3], y);
5152   if (!maybe_expand_insn (icode, 4, ops))
5153     {
5154       delete_insns_since (last);
5155       return NULL_RTX;
5156     }
5157   subtarget = ops[0].value;
5158
5159   /* If we are converting to a wider mode, first convert to
5160      TARGET_MODE, then normalize.  This produces better combining
5161      opportunities on machines that have a SIGN_EXTRACT when we are
5162      testing a single bit.  This mostly benefits the 68k.
5163
5164      If STORE_FLAG_VALUE does not have the sign bit set when
5165      interpreted in MODE, we can do this conversion as unsigned, which
5166      is usually more efficient.  */
5167   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5168     {
5169       convert_move (target, subtarget,
5170                     val_signbit_known_clear_p (result_mode,
5171                                                STORE_FLAG_VALUE));
5172       op0 = target;
5173       result_mode = target_mode;
5174     }
5175   else
5176     op0 = subtarget;
5177
5178   /* If we want to keep subexpressions around, don't reuse our last
5179      target.  */
5180   if (optimize)
5181     subtarget = 0;
5182
5183   /* Now normalize to the proper value in MODE.  Sometimes we don't
5184      have to do anything.  */
5185   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5186     ;
5187   /* STORE_FLAG_VALUE might be the most negative number, so write
5188      the comparison this way to avoid a compiler-time warning.  */
5189   else if (- normalizep == STORE_FLAG_VALUE)
5190     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5191
5192   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5193      it hard to use a value of just the sign bit due to ANSI integer
5194      constant typing rules.  */
5195   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5196     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5197                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5198                         normalizep == 1);
5199   else
5200     {
5201       gcc_assert (STORE_FLAG_VALUE & 1);
5202
5203       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5204       if (normalizep == -1)
5205         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5206     }
5207
5208   /* If we were converting to a smaller mode, do the conversion now.  */
5209   if (target_mode != result_mode)
5210     {
5211       convert_move (target, op0, 0);
5212       return target;
5213     }
5214   else
5215     return op0;
5216 }
5217
5218
5219 /* A subroutine of emit_store_flag only including "tricks" that do not
5220    need a recursive call.  These are kept separate to avoid infinite
5221    loops.  */
5222
5223 static rtx
5224 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5225                    machine_mode mode, int unsignedp, int normalizep,
5226                    machine_mode target_mode)
5227 {
5228   rtx subtarget;
5229   enum insn_code icode;
5230   machine_mode compare_mode;
5231   enum mode_class mclass;
5232   enum rtx_code scode;
5233   rtx tem;
5234
5235   if (unsignedp)
5236     code = unsigned_condition (code);
5237   scode = swap_condition (code);
5238
5239   /* If one operand is constant, make it the second one.  Only do this
5240      if the other operand is not constant as well.  */
5241
5242   if (swap_commutative_operands_p (op0, op1))
5243     {
5244       tem = op0;
5245       op0 = op1;
5246       op1 = tem;
5247       code = swap_condition (code);
5248     }
5249
5250   if (mode == VOIDmode)
5251     mode = GET_MODE (op0);
5252
5253   /* For some comparisons with 1 and -1, we can convert this to
5254      comparisons with zero.  This will often produce more opportunities for
5255      store-flag insns.  */
5256
5257   switch (code)
5258     {
5259     case LT:
5260       if (op1 == const1_rtx)
5261         op1 = const0_rtx, code = LE;
5262       break;
5263     case LE:
5264       if (op1 == constm1_rtx)
5265         op1 = const0_rtx, code = LT;
5266       break;
5267     case GE:
5268       if (op1 == const1_rtx)
5269         op1 = const0_rtx, code = GT;
5270       break;
5271     case GT:
5272       if (op1 == constm1_rtx)
5273         op1 = const0_rtx, code = GE;
5274       break;
5275     case GEU:
5276       if (op1 == const1_rtx)
5277         op1 = const0_rtx, code = NE;
5278       break;
5279     case LTU:
5280       if (op1 == const1_rtx)
5281         op1 = const0_rtx, code = EQ;
5282       break;
5283     default:
5284       break;
5285     }
5286
5287   /* If we are comparing a double-word integer with zero or -1, we can
5288      convert the comparison into one involving a single word.  */
5289   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5290       && GET_MODE_CLASS (mode) == MODE_INT
5291       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5292     {
5293       if ((code == EQ || code == NE)
5294           && (op1 == const0_rtx || op1 == constm1_rtx))
5295         {
5296           rtx op00, op01;
5297
5298           /* Do a logical OR or AND of the two words and compare the
5299              result.  */
5300           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5301           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5302           tem = expand_binop (word_mode,
5303                               op1 == const0_rtx ? ior_optab : and_optab,
5304                               op00, op01, NULL_RTX, unsignedp,
5305                               OPTAB_DIRECT);
5306
5307           if (tem != 0)
5308             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5309                                    unsignedp, normalizep);
5310         }
5311       else if ((code == LT || code == GE) && op1 == const0_rtx)
5312         {
5313           rtx op0h;
5314
5315           /* If testing the sign bit, can just test on high word.  */
5316           op0h = simplify_gen_subreg (word_mode, op0, mode,
5317                                       subreg_highpart_offset (word_mode,
5318                                                               mode));
5319           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5320                                  unsignedp, normalizep);
5321         }
5322       else
5323         tem = NULL_RTX;
5324
5325       if (tem)
5326         {
5327           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5328             return tem;
5329           if (!target)
5330             target = gen_reg_rtx (target_mode);
5331
5332           convert_move (target, tem,
5333                         !val_signbit_known_set_p (word_mode,
5334                                                   (normalizep ? normalizep
5335                                                    : STORE_FLAG_VALUE)));
5336           return target;
5337         }
5338     }
5339
5340   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5341      complement of A (for GE) and shifting the sign bit to the low bit.  */
5342   if (op1 == const0_rtx && (code == LT || code == GE)
5343       && GET_MODE_CLASS (mode) == MODE_INT
5344       && (normalizep || STORE_FLAG_VALUE == 1
5345           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5346     {
5347       subtarget = target;
5348
5349       if (!target)
5350         target_mode = mode;
5351
5352       /* If the result is to be wider than OP0, it is best to convert it
5353          first.  If it is to be narrower, it is *incorrect* to convert it
5354          first.  */
5355       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5356         {
5357           op0 = convert_modes (target_mode, mode, op0, 0);
5358           mode = target_mode;
5359         }
5360
5361       if (target_mode != mode)
5362         subtarget = 0;
5363
5364       if (code == GE)
5365         op0 = expand_unop (mode, one_cmpl_optab, op0,
5366                            ((STORE_FLAG_VALUE == 1 || normalizep)
5367                             ? 0 : subtarget), 0);
5368
5369       if (STORE_FLAG_VALUE == 1 || normalizep)
5370         /* If we are supposed to produce a 0/1 value, we want to do
5371            a logical shift from the sign bit to the low-order bit; for
5372            a -1/0 value, we do an arithmetic shift.  */
5373         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5374                             GET_MODE_BITSIZE (mode) - 1,
5375                             subtarget, normalizep != -1);
5376
5377       if (mode != target_mode)
5378         op0 = convert_modes (target_mode, mode, op0, 0);
5379
5380       return op0;
5381     }
5382
5383   mclass = GET_MODE_CLASS (mode);
5384   for (compare_mode = mode; compare_mode != VOIDmode;
5385        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5386     {
5387      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5388      icode = optab_handler (cstore_optab, optab_mode);
5389      if (icode != CODE_FOR_nothing)
5390         {
5391           do_pending_stack_adjust ();
5392           tem = emit_cstore (target, icode, code, mode, compare_mode,
5393                              unsignedp, op0, op1, normalizep, target_mode);
5394           if (tem)
5395             return tem;
5396
5397           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5398             {
5399               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5400                                  unsignedp, op1, op0, normalizep, target_mode);
5401               if (tem)
5402                 return tem;
5403             }
5404           break;
5405         }
5406     }
5407
5408   return 0;
5409 }
5410
5411 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5412    and storing in TARGET.  Normally return TARGET.
5413    Return 0 if that cannot be done.
5414
5415    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5416    it is VOIDmode, they cannot both be CONST_INT.
5417
5418    UNSIGNEDP is for the case where we have to widen the operands
5419    to perform the operation.  It says to use zero-extension.
5420
5421    NORMALIZEP is 1 if we should convert the result to be either zero
5422    or one.  Normalize is -1 if we should convert the result to be
5423    either zero or -1.  If NORMALIZEP is zero, the result will be left
5424    "raw" out of the scc insn.  */
5425
5426 rtx
5427 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5428                  machine_mode mode, int unsignedp, int normalizep)
5429 {
5430   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5431   enum rtx_code rcode;
5432   rtx subtarget;
5433   rtx tem, trueval;
5434   rtx_insn *last;
5435
5436   /* If we compare constants, we shouldn't use a store-flag operation,
5437      but a constant load.  We can get there via the vanilla route that
5438      usually generates a compare-branch sequence, but will in this case
5439      fold the comparison to a constant, and thus elide the branch.  */
5440   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5441     return NULL_RTX;
5442
5443   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5444                            target_mode);
5445   if (tem)
5446     return tem;
5447
5448   /* If we reached here, we can't do this with a scc insn, however there
5449      are some comparisons that can be done in other ways.  Don't do any
5450      of these cases if branches are very cheap.  */
5451   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5452     return 0;
5453
5454   /* See what we need to return.  We can only return a 1, -1, or the
5455      sign bit.  */
5456
5457   if (normalizep == 0)
5458     {
5459       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5460         normalizep = STORE_FLAG_VALUE;
5461
5462       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5463         ;
5464       else
5465         return 0;
5466     }
5467
5468   last = get_last_insn ();
5469
5470   /* If optimizing, use different pseudo registers for each insn, instead
5471      of reusing the same pseudo.  This leads to better CSE, but slows
5472      down the compiler, since there are more pseudos */
5473   subtarget = (!optimize
5474                && (target_mode == mode)) ? target : NULL_RTX;
5475   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5476
5477   /* For floating-point comparisons, try the reverse comparison or try
5478      changing the "orderedness" of the comparison.  */
5479   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5480     {
5481       enum rtx_code first_code;
5482       bool and_them;
5483
5484       rcode = reverse_condition_maybe_unordered (code);
5485       if (can_compare_p (rcode, mode, ccp_store_flag)
5486           && (code == ORDERED || code == UNORDERED
5487               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5488               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5489         {
5490           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5491                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5492
5493           /* For the reverse comparison, use either an addition or a XOR.  */
5494           if (want_add
5495               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5496                            optimize_insn_for_speed_p ()) == 0)
5497             {
5498               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5499                                        STORE_FLAG_VALUE, target_mode);
5500               if (tem)
5501                 return expand_binop (target_mode, add_optab, tem,
5502                                      gen_int_mode (normalizep, target_mode),
5503                                      target, 0, OPTAB_WIDEN);
5504             }
5505           else if (!want_add
5506                    && rtx_cost (trueval, XOR, 1,
5507                                 optimize_insn_for_speed_p ()) == 0)
5508             {
5509               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5510                                        normalizep, target_mode);
5511               if (tem)
5512                 return expand_binop (target_mode, xor_optab, tem, trueval,
5513                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5514             }
5515         }
5516
5517       delete_insns_since (last);
5518
5519       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5520       if (code == ORDERED || code == UNORDERED)
5521         return 0;
5522
5523       and_them = split_comparison (code, mode, &first_code, &code);
5524
5525       /* If there are no NaNs, the first comparison should always fall through.
5526          Effectively change the comparison to the other one.  */
5527       if (!HONOR_NANS (mode))
5528         {
5529           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5530           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5531                                     target_mode);
5532         }
5533
5534 #ifdef HAVE_conditional_move
5535       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5536          conditional move.  */
5537       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5538                                normalizep, target_mode);
5539       if (tem == 0)
5540         return 0;
5541
5542       if (and_them)
5543         tem = emit_conditional_move (target, code, op0, op1, mode,
5544                                      tem, const0_rtx, GET_MODE (tem), 0);
5545       else
5546         tem = emit_conditional_move (target, code, op0, op1, mode,
5547                                      trueval, tem, GET_MODE (tem), 0);
5548
5549       if (tem == 0)
5550         delete_insns_since (last);
5551       return tem;
5552 #else
5553       return 0;
5554 #endif
5555     }
5556
5557   /* The remaining tricks only apply to integer comparisons.  */
5558
5559   if (GET_MODE_CLASS (mode) != MODE_INT)
5560     return 0;
5561
5562   /* If this is an equality comparison of integers, we can try to exclusive-or
5563      (or subtract) the two operands and use a recursive call to try the
5564      comparison with zero.  Don't do any of these cases if branches are
5565      very cheap.  */
5566
5567   if ((code == EQ || code == NE) && op1 != const0_rtx)
5568     {
5569       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5570                           OPTAB_WIDEN);
5571
5572       if (tem == 0)
5573         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5574                             OPTAB_WIDEN);
5575       if (tem != 0)
5576         tem = emit_store_flag (target, code, tem, const0_rtx,
5577                                mode, unsignedp, normalizep);
5578       if (tem != 0)
5579         return tem;
5580
5581       delete_insns_since (last);
5582     }
5583
5584   /* For integer comparisons, try the reverse comparison.  However, for
5585      small X and if we'd have anyway to extend, implementing "X != 0"
5586      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5587   rcode = reverse_condition (code);
5588   if (can_compare_p (rcode, mode, ccp_store_flag)
5589       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5590             && code == NE
5591             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5592             && op1 == const0_rtx))
5593     {
5594       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5595                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5596
5597       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5598       if (want_add
5599           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5600                        optimize_insn_for_speed_p ()) == 0)
5601         {
5602           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5603                                    STORE_FLAG_VALUE, target_mode);
5604           if (tem != 0)
5605             tem = expand_binop (target_mode, add_optab, tem,
5606                                 gen_int_mode (normalizep, target_mode),
5607                                 target, 0, OPTAB_WIDEN);
5608         }
5609       else if (!want_add
5610                && rtx_cost (trueval, XOR, 1,
5611                             optimize_insn_for_speed_p ()) == 0)
5612         {
5613           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5614                                    normalizep, target_mode);
5615           if (tem != 0)
5616             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5617                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5618         }
5619
5620       if (tem != 0)
5621         return tem;
5622       delete_insns_since (last);
5623     }
5624
5625   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5626      the constant zero.  Reject all other comparisons at this point.  Only
5627      do LE and GT if branches are expensive since they are expensive on
5628      2-operand machines.  */
5629
5630   if (op1 != const0_rtx
5631       || (code != EQ && code != NE
5632           && (BRANCH_COST (optimize_insn_for_speed_p (),
5633                            false) <= 1 || (code != LE && code != GT))))
5634     return 0;
5635
5636   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5637      do the necessary operation below.  */
5638
5639   tem = 0;
5640
5641   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5642      the sign bit set.  */
5643
5644   if (code == LE)
5645     {
5646       /* This is destructive, so SUBTARGET can't be OP0.  */
5647       if (rtx_equal_p (subtarget, op0))
5648         subtarget = 0;
5649
5650       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5651                           OPTAB_WIDEN);
5652       if (tem)
5653         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5654                             OPTAB_WIDEN);
5655     }
5656
5657   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5658      number of bits in the mode of OP0, minus one.  */
5659
5660   if (code == GT)
5661     {
5662       if (rtx_equal_p (subtarget, op0))
5663         subtarget = 0;
5664
5665       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5666                           GET_MODE_BITSIZE (mode) - 1,
5667                           subtarget, 0);
5668       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5669                           OPTAB_WIDEN);
5670     }
5671
5672   if (code == EQ || code == NE)
5673     {
5674       /* For EQ or NE, one way to do the comparison is to apply an operation
5675          that converts the operand into a positive number if it is nonzero
5676          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5677          for NE we negate.  This puts the result in the sign bit.  Then we
5678          normalize with a shift, if needed.
5679
5680          Two operations that can do the above actions are ABS and FFS, so try
5681          them.  If that doesn't work, and MODE is smaller than a full word,
5682          we can use zero-extension to the wider mode (an unsigned conversion)
5683          as the operation.  */
5684
5685       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5686          that is compensated by the subsequent overflow when subtracting
5687          one / negating.  */
5688
5689       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5690         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5691       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5692         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5693       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5694         {
5695           tem = convert_modes (word_mode, mode, op0, 1);
5696           mode = word_mode;
5697         }
5698
5699       if (tem != 0)
5700         {
5701           if (code == EQ)
5702             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5703                                 0, OPTAB_WIDEN);
5704           else
5705             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5706         }
5707
5708       /* If we couldn't do it that way, for NE we can "or" the two's complement
5709          of the value with itself.  For EQ, we take the one's complement of
5710          that "or", which is an extra insn, so we only handle EQ if branches
5711          are expensive.  */
5712
5713       if (tem == 0
5714           && (code == NE
5715               || BRANCH_COST (optimize_insn_for_speed_p (),
5716                               false) > 1))
5717         {
5718           if (rtx_equal_p (subtarget, op0))
5719             subtarget = 0;
5720
5721           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5722           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5723                               OPTAB_WIDEN);
5724
5725           if (tem && code == EQ)
5726             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5727         }
5728     }
5729
5730   if (tem && normalizep)
5731     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5732                         GET_MODE_BITSIZE (mode) - 1,
5733                         subtarget, normalizep == 1);
5734
5735   if (tem)
5736     {
5737       if (!target)
5738         ;
5739       else if (GET_MODE (tem) != target_mode)
5740         {
5741           convert_move (target, tem, 0);
5742           tem = target;
5743         }
5744       else if (!subtarget)
5745         {
5746           emit_move_insn (target, tem);
5747           tem = target;
5748         }
5749     }
5750   else
5751     delete_insns_since (last);
5752
5753   return tem;
5754 }
5755
5756 /* Like emit_store_flag, but always succeeds.  */
5757
5758 rtx
5759 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5760                        machine_mode mode, int unsignedp, int normalizep)
5761 {
5762   rtx tem;
5763   rtx_code_label *label;
5764   rtx trueval, falseval;
5765
5766   /* First see if emit_store_flag can do the job.  */
5767   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5768   if (tem != 0)
5769     return tem;
5770
5771   if (!target)
5772     target = gen_reg_rtx (word_mode);
5773
5774   /* If this failed, we have to do this with set/compare/jump/set code.
5775      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5776   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5777   if (code == NE
5778       && GET_MODE_CLASS (mode) == MODE_INT
5779       && REG_P (target)
5780       && op0 == target
5781       && op1 == const0_rtx)
5782     {
5783       label = gen_label_rtx ();
5784       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5785                                mode, NULL_RTX, NULL_RTX, label, -1);
5786       emit_move_insn (target, trueval);
5787       emit_label (label);
5788       return target;
5789     }
5790
5791   if (!REG_P (target)
5792       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5793     target = gen_reg_rtx (GET_MODE (target));
5794
5795   /* Jump in the right direction if the target cannot implement CODE
5796      but can jump on its reverse condition.  */
5797   falseval = const0_rtx;
5798   if (! can_compare_p (code, mode, ccp_jump)
5799       && (! FLOAT_MODE_P (mode)
5800           || code == ORDERED || code == UNORDERED
5801           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5802           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5803     {
5804       enum rtx_code rcode;
5805       if (FLOAT_MODE_P (mode))
5806         rcode = reverse_condition_maybe_unordered (code);
5807       else
5808         rcode = reverse_condition (code);
5809
5810       /* Canonicalize to UNORDERED for the libcall.  */
5811       if (can_compare_p (rcode, mode, ccp_jump)
5812           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5813         {
5814           falseval = trueval;
5815           trueval = const0_rtx;
5816           code = rcode;
5817         }
5818     }
5819
5820   emit_move_insn (target, trueval);
5821   label = gen_label_rtx ();
5822   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5823                            NULL_RTX, label, -1);
5824
5825   emit_move_insn (target, falseval);
5826   emit_label (label);
5827
5828   return target;
5829 }
5830 \f
5831 /* Perform possibly multi-word comparison and conditional jump to LABEL
5832    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5833    now a thin wrapper around do_compare_rtx_and_jump.  */
5834
5835 static void
5836 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5837                  rtx_code_label *label)
5838 {
5839   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5840   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5841                            NULL_RTX, NULL_RTX, label, -1);
5842 }