gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "emit-rtl.h"
  35 #include "diagnostic-core.h"
  36 #include "fold-const.h"
  37 #include "stor-layout.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "expr.h"
  41 #include "langhooks.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx, scalar_int_mode, bool);
  54 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  55                                      unsigned HOST_WIDE_INT,
  56                                      unsigned HOST_WIDE_INT,
  57                                      rtx, scalar_int_mode, bool);
  58 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  59                                    unsigned HOST_WIDE_INT,
  60                                    unsigned HOST_WIDE_INT,
  61                                    unsigned HOST_WIDE_INT,
  62                                    unsigned HOST_WIDE_INT,
  63                                    rtx, scalar_int_mode, bool);
  64 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  65                                     unsigned HOST_WIDE_INT,
  66                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  67 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  68                                       unsigned HOST_WIDE_INT,
  69                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  70 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  71 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  72                                     unsigned HOST_WIDE_INT,
  73                                     unsigned HOST_WIDE_INT, int, bool);
  74 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  75 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  76 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  77
  78 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  79    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  80    The mask is truncated if necessary to the width of mode MODE.  The
  81    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  82
  83 static inline rtx
  84 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  85 {
  86   return immed_wide_int_const
  87     (wi::shifted_mask (bitpos, bitsize, complement,
  88                        GET_MODE_PRECISION (mode)), mode);
  89 }
  90
  91 /* Test whether a value is zero of a power of two.  */
  92 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  93   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  94
  95 struct init_expmed_rtl
  96 {
  97   rtx reg;
  98   rtx plus;
  99   rtx neg;
 100   rtx mult;
 101   rtx sdiv;
 102   rtx udiv;
 103   rtx sdiv_32;
 104   rtx smod_32;
 105   rtx wide_mult;
 106   rtx wide_lshr;
 107   rtx wide_trunc;
 108   rtx shift;
 109   rtx shift_mult;
 110   rtx shift_add;
 111   rtx shift_sub0;
 112   rtx shift_sub1;
 113   rtx zext;
 114   rtx trunc;
 115
 116   rtx pow2[MAX_BITS_PER_WORD];
 117   rtx cint[MAX_BITS_PER_WORD];
 118 };
 119
 120 static void
 121 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 122                       scalar_int_mode from_mode, bool speed)
 123 {
 124   int to_size, from_size;
 125   rtx which;
 126
 127   to_size = GET_MODE_PRECISION (to_mode);
 128   from_size = GET_MODE_PRECISION (from_mode);
 129
 130   /* Most partial integers have a precision less than the "full"
 131      integer it requires for storage.  In case one doesn't, for
 132      comparison purposes here, reduce the bit size by one in that
 133      case.  */
 134   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 135       && pow2p_hwi (to_size))
 136     to_size --;
 137   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 138       && pow2p_hwi (from_size))
 139     from_size --;
 140
 141   /* Assume cost of zero-extend and sign-extend is the same.  */
 142   which = (to_size < from_size ? all->trunc : all->zext);
 143
 144   PUT_MODE (all->reg, from_mode);
 145   set_convert_cost (to_mode, from_mode, speed,
 146                     set_src_cost (which, to_mode, speed));
 147 }
 148
 149 static void
 150 init_expmed_one_mode (struct init_expmed_rtl *all,
 151                       machine_mode mode, int speed)
 152 {
 153   int m, n, mode_bitsize;
 154   machine_mode mode_from;
 155
 156   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 157
 158   PUT_MODE (all->reg, mode);
 159   PUT_MODE (all->plus, mode);
 160   PUT_MODE (all->neg, mode);
 161   PUT_MODE (all->mult, mode);
 162   PUT_MODE (all->sdiv, mode);
 163   PUT_MODE (all->udiv, mode);
 164   PUT_MODE (all->sdiv_32, mode);
 165   PUT_MODE (all->smod_32, mode);
 166   PUT_MODE (all->wide_trunc, mode);
 167   PUT_MODE (all->shift, mode);
 168   PUT_MODE (all->shift_mult, mode);
 169   PUT_MODE (all->shift_add, mode);
 170   PUT_MODE (all->shift_sub0, mode);
 171   PUT_MODE (all->shift_sub1, mode);
 172   PUT_MODE (all->zext, mode);
 173   PUT_MODE (all->trunc, mode);
 174
 175   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 176   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 177   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 178   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 179   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 180
 181   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 182                                      <= 2 * add_cost (speed, mode)));
 183   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 184                                      <= 4 * add_cost (speed, mode)));
 185
 186   set_shift_cost (speed, mode, 0, 0);
 187   {
 188     int cost = add_cost (speed, mode);
 189     set_shiftadd_cost (speed, mode, 0, cost);
 190     set_shiftsub0_cost (speed, mode, 0, cost);
 191     set_shiftsub1_cost (speed, mode, 0, cost);
 192   }
 193
 194   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 195   for (m = 1; m < n; m++)
 196     {
 197       XEXP (all->shift, 1) = all->cint[m];
 198       XEXP (all->shift_mult, 1) = all->pow2[m];
 199
 200       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 201       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 202                                                        speed));
 203       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 204                                                         speed));
 205       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 206                                                         speed));
 207     }
 208
 209   scalar_int_mode int_mode_to;
 210   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 211     {
 212       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 213            mode_from = (machine_mode)(mode_from + 1))
 214         init_expmed_one_conv (all, int_mode_to,
 215                               as_a <scalar_int_mode> (mode_from), speed);
 216
 217       scalar_int_mode wider_mode;
 218       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 219           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 220         {
 221           PUT_MODE (all->zext, wider_mode);
 222           PUT_MODE (all->wide_mult, wider_mode);
 223           PUT_MODE (all->wide_lshr, wider_mode);
 224           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 225
 226           set_mul_widen_cost (speed, wider_mode,
 227                               set_src_cost (all->wide_mult, wider_mode, speed));
 228           set_mul_highpart_cost (speed, int_mode_to,
 229                                  set_src_cost (all->wide_trunc,
 230                                                int_mode_to, speed));
 231         }
 232     }
 233 }
 234
 235 void
 236 init_expmed (void)
 237 {
 238   struct init_expmed_rtl all;
 239   machine_mode mode = QImode;
 240   int m, speed;
 241
 242   memset (&all, 0, sizeof all);
 243   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 244     {
 245       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 246       all.cint[m] = GEN_INT (m);
 247     }
 248
 249   /* Avoid using hard regs in ways which may be unsupported.  */
 250   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 251   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 252   all.neg = gen_rtx_NEG (mode, all.reg);
 253   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 254   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 255   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 256   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 257   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 258   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 259   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 260   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 261   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 262   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 263   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 264   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 265   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 266   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 267   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 268
 269   for (speed = 0; speed < 2; speed++)
 270     {
 271       crtl->maybe_hot_insn_p = speed;
 272       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 273
 274       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 275            mode = (machine_mode)(mode + 1))
 276         init_expmed_one_mode (&all, mode, speed);
 277
 278       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 279         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 280              mode = (machine_mode)(mode + 1))
 281           init_expmed_one_mode (&all, mode, speed);
 282
 283       if (MIN_MODE_VECTOR_INT != VOIDmode)
 284         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 285              mode = (machine_mode)(mode + 1))
 286           init_expmed_one_mode (&all, mode, speed);
 287     }
 288
 289   if (alg_hash_used_p ())
 290     {
 291       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 292       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 293     }
 294   else
 295     set_alg_hash_used_p (true);
 296   default_rtl_profile ();
 297
 298   ggc_free (all.trunc);
 299   ggc_free (all.shift_sub1);
 300   ggc_free (all.shift_sub0);
 301   ggc_free (all.shift_add);
 302   ggc_free (all.shift_mult);
 303   ggc_free (all.shift);
 304   ggc_free (all.wide_trunc);
 305   ggc_free (all.wide_lshr);
 306   ggc_free (all.wide_mult);
 307   ggc_free (all.zext);
 308   ggc_free (all.smod_32);
 309   ggc_free (all.sdiv_32);
 310   ggc_free (all.udiv);
 311   ggc_free (all.sdiv);
 312   ggc_free (all.mult);
 313   ggc_free (all.neg);
 314   ggc_free (all.plus);
 315   ggc_free (all.reg);
 316 }
 317
 318 /* Return an rtx representing minus the value of X.
 319    MODE is the intended mode of the result,
 320    useful if X is a CONST_INT.  */
 321
 322 rtx
 323 negate_rtx (machine_mode mode, rtx x)
 324 {
 325   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 326
 327   if (result == 0)
 328     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 329
 330   return result;
 331 }
 332
 333 /* Whether reverse storage order is supported on the target.  */
 334 static int reverse_storage_order_supported = -1;
 335
 336 /* Check whether reverse storage order is supported on the target.  */
 337
 338 static void
 339 check_reverse_storage_order_support (void)
 340 {
 341   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 342     {
 343       reverse_storage_order_supported = 0;
 344       sorry ("reverse scalar storage order");
 345     }
 346   else
 347     reverse_storage_order_supported = 1;
 348 }
 349
 350 /* Whether reverse FP storage order is supported on the target.  */
 351 static int reverse_float_storage_order_supported = -1;
 352
 353 /* Check whether reverse FP storage order is supported on the target.  */
 354
 355 static void
 356 check_reverse_float_storage_order_support (void)
 357 {
 358   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 359     {
 360       reverse_float_storage_order_supported = 0;
 361       sorry ("reverse floating-point scalar storage order");
 362     }
 363   else
 364     reverse_float_storage_order_supported = 1;
 365 }
 366
 367 /* Return an rtx representing value of X with reverse storage order.
 368    MODE is the intended mode of the result,
 369    useful if X is a CONST_INT.  */
 370
 371 rtx
 372 flip_storage_order (machine_mode mode, rtx x)
 373 {
 374   scalar_int_mode int_mode;
 375   rtx result;
 376
 377   if (mode == QImode)
 378     return x;
 379
 380   if (COMPLEX_MODE_P (mode))
 381     {
 382       rtx real = read_complex_part (x, false);
 383       rtx imag = read_complex_part (x, true);
 384
 385       real = flip_storage_order (GET_MODE_INNER (mode), real);
 386       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 387
 388       return gen_rtx_CONCAT (mode, real, imag);
 389     }
 390
 391   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 392     check_reverse_storage_order_support ();
 393
 394   if (!is_a <scalar_int_mode> (mode, &int_mode))
 395     {
 396       if (FLOAT_MODE_P (mode)
 397           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 398         check_reverse_float_storage_order_support ();
 399
 400       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
 401         {
 402           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 403           return x;
 404         }
 405       x = gen_lowpart (int_mode, x);
 406     }
 407
 408   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 409   if (result == 0)
 410     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 411
 412   if (int_mode != mode)
 413     result = gen_lowpart (mode, result);
 414
 415   return result;
 416 }
 417
 418 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 419    first unit of mode MODE that contains a bitfield of size BITSIZE at
 420    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 421    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 422    of the field within the new memory.  */
 423
 424 static rtx
 425 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 426                       unsigned HOST_WIDE_INT bitsize,
 427                       unsigned HOST_WIDE_INT bitnum,
 428                       unsigned HOST_WIDE_INT *new_bitnum)
 429 {
 430   scalar_int_mode imode;
 431   if (mode.exists (&imode))
 432     {
 433       unsigned int unit = GET_MODE_BITSIZE (imode);
 434       *new_bitnum = bitnum % unit;
 435       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 436       return adjust_bitfield_address (mem, imode, offset);
 437     }
 438   else
 439     {
 440       *new_bitnum = bitnum % BITS_PER_UNIT;
 441       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 442       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 443                             / BITS_PER_UNIT);
 444       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 445     }
 446 }
 447
 448 /* The caller wants to perform insertion or extraction PATTERN on a
 449    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 450    BITREGION_START and BITREGION_END are as for store_bit_field
 451    and FIELDMODE is the natural mode of the field.
 452
 453    Search for a mode that is compatible with the memory access
 454    restrictions and (where applicable) with a register insertion or
 455    extraction.  Return the new memory on success, storing the adjusted
 456    bit position in *NEW_BITNUM.  Return null otherwise.  */
 457
 458 static rtx
 459 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 460                               rtx op0, HOST_WIDE_INT bitsize,
 461                               HOST_WIDE_INT bitnum,
 462                               unsigned HOST_WIDE_INT bitregion_start,
 463                               unsigned HOST_WIDE_INT bitregion_end,
 464                               machine_mode fieldmode,
 465                               unsigned HOST_WIDE_INT *new_bitnum)
 466 {
 467   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 468                                 bitregion_end, MEM_ALIGN (op0),
 469                                 MEM_VOLATILE_P (op0));
 470   scalar_int_mode best_mode;
 471   if (iter.next_mode (&best_mode))
 472     {
 473       /* We can use a memory in BEST_MODE.  See whether this is true for
 474          any wider modes.  All other things being equal, we prefer to
 475          use the widest mode possible because it tends to expose more
 476          CSE opportunities.  */
 477       if (!iter.prefer_smaller_modes ())
 478         {
 479           /* Limit the search to the mode required by the corresponding
 480              register insertion or extraction instruction, if any.  */
 481           scalar_int_mode limit_mode = word_mode;
 482           extraction_insn insn;
 483           if (get_best_reg_extraction_insn (&insn, pattern,
 484                                             GET_MODE_BITSIZE (best_mode),
 485                                             fieldmode))
 486             limit_mode = insn.field_mode;
 487
 488           scalar_int_mode wider_mode;
 489           while (iter.next_mode (&wider_mode)
 490                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 491             best_mode = wider_mode;
 492         }
 493       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 494                                    new_bitnum);
 495     }
 496   return NULL_RTX;
 497 }
 498
 499 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 500    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 501    offset is then BITNUM / BITS_PER_UNIT.  */
 502
 503 static bool
 504 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 505                      unsigned HOST_WIDE_INT bitsize,
 506                      machine_mode struct_mode)
 507 {
 508   if (BYTES_BIG_ENDIAN)
 509     return (bitnum % BITS_PER_UNIT == 0
 510             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 511                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 512   else
 513     return bitnum % BITS_PER_WORD == 0;
 514 }
 515
 516 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 517    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 518    Return false if the access would touch memory outside the range
 519    BITREGION_START to BITREGION_END for conformance to the C++ memory
 520    model.  */
 521
 522 static bool
 523 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 524                             unsigned HOST_WIDE_INT bitnum,
 525                             scalar_int_mode fieldmode,
 526                             unsigned HOST_WIDE_INT bitregion_start,
 527                             unsigned HOST_WIDE_INT bitregion_end)
 528 {
 529   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 530
 531   /* -fstrict-volatile-bitfields must be enabled and we must have a
 532      volatile MEM.  */
 533   if (!MEM_P (op0)
 534       || !MEM_VOLATILE_P (op0)
 535       || flag_strict_volatile_bitfields <= 0)
 536     return false;
 537
 538   /* The bit size must not be larger than the field mode, and
 539      the field mode must not be larger than a word.  */
 540   if (bitsize > modesize || modesize > BITS_PER_WORD)
 541     return false;
 542
 543   /* Check for cases of unaligned fields that must be split.  */
 544   if (bitnum % modesize + bitsize > modesize)
 545     return false;
 546
 547   /* The memory must be sufficiently aligned for a MODESIZE access.
 548      This condition guarantees, that the memory access will not
 549      touch anything after the end of the structure.  */
 550   if (MEM_ALIGN (op0) < modesize)
 551     return false;
 552
 553   /* Check for cases where the C++ memory model applies.  */
 554   if (bitregion_end != 0
 555       && (bitnum - bitnum % modesize < bitregion_start
 556           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 557     return false;
 558
 559   return true;
 560 }
 561
 562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 563    bit number BITNUM can be treated as a simple value of mode MODE.  */
 564
 565 static bool
 566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 567                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 568 {
 569   return (MEM_P (op0)
 570           && bitnum % BITS_PER_UNIT == 0
 571           && bitsize == GET_MODE_BITSIZE (mode)
 572           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 573               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 574                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 575 }
 576 \f
 577 /* Try to use instruction INSV to store VALUE into a field of OP0.
 578    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 579    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 580    are as for store_bit_field.  */
 581
 582 static bool
 583 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 584                             opt_scalar_int_mode op0_mode,
 585                             unsigned HOST_WIDE_INT bitsize,
 586                             unsigned HOST_WIDE_INT bitnum,
 587                             rtx value, scalar_int_mode value_mode)
 588 {
 589   struct expand_operand ops[4];
 590   rtx value1;
 591   rtx xop0 = op0;
 592   rtx_insn *last = get_last_insn ();
 593   bool copy_back = false;
 594
 595   scalar_int_mode op_mode = insv->field_mode;
 596   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 597   if (bitsize == 0 || bitsize > unit)
 598     return false;
 599
 600   if (MEM_P (xop0))
 601     /* Get a reference to the first byte of the field.  */
 602     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 603                                  &bitnum);
 604   else
 605     {
 606       /* Convert from counting within OP0 to counting in OP_MODE.  */
 607       if (BYTES_BIG_ENDIAN)
 608         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 609
 610       /* If xop0 is a register, we need it in OP_MODE
 611          to make it acceptable to the format of insv.  */
 612       if (GET_CODE (xop0) == SUBREG)
 613         /* We can't just change the mode, because this might clobber op0,
 614            and we will need the original value of op0 if insv fails.  */
 615         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 616       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 617         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 618     }
 619
 620   /* If the destination is a paradoxical subreg such that we need a
 621      truncate to the inner mode, perform the insertion on a temporary and
 622      truncate the result to the original destination.  Note that we can't
 623      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 624      X) 0)) is (reg:N X).  */
 625   if (GET_CODE (xop0) == SUBREG
 626       && REG_P (SUBREG_REG (xop0))
 627       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 628                                          op_mode))
 629     {
 630       rtx tem = gen_reg_rtx (op_mode);
 631       emit_move_insn (tem, xop0);
 632       xop0 = tem;
 633       copy_back = true;
 634     }
 635
 636   /* There are similar overflow check at the start of store_bit_field_1,
 637      but that only check the situation where the field lies completely
 638      outside the register, while there do have situation where the field
 639      lies partialy in the register, we need to adjust bitsize for this
 640      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 641      will broken on those arch support bit insert instruction, like arm, aarch64
 642      etc.  */
 643   if (bitsize + bitnum > unit && bitnum < unit)
 644     {
 645       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 646                "destination object, data truncated into %wu-bit",
 647                bitsize, unit - bitnum);
 648       bitsize = unit - bitnum;
 649     }
 650
 651   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 652      "backwards" from the size of the unit we are inserting into.
 653      Otherwise, we count bits from the most significant on a
 654      BYTES/BITS_BIG_ENDIAN machine.  */
 655
 656   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 657     bitnum = unit - bitsize - bitnum;
 658
 659   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 660   value1 = value;
 661   if (value_mode != op_mode)
 662     {
 663       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 664         {
 665           rtx tmp;
 666           /* Optimization: Don't bother really extending VALUE
 667              if it has all the bits we will actually use.  However,
 668              if we must narrow it, be sure we do it correctly.  */
 669
 670           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 671             {
 672               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 673               if (! tmp)
 674                 tmp = simplify_gen_subreg (op_mode,
 675                                            force_reg (value_mode, value1),
 676                                            value_mode, 0);
 677             }
 678           else
 679             {
 680               tmp = gen_lowpart_if_possible (op_mode, value1);
 681               if (! tmp)
 682                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 683             }
 684           value1 = tmp;
 685         }
 686       else if (CONST_INT_P (value))
 687         value1 = gen_int_mode (INTVAL (value), op_mode);
 688       else
 689         /* Parse phase is supposed to make VALUE's data type
 690            match that of the component reference, which is a type
 691            at least as wide as the field; so VALUE should have
 692            a mode that corresponds to that type.  */
 693         gcc_assert (CONSTANT_P (value));
 694     }
 695
 696   create_fixed_operand (&ops[0], xop0);
 697   create_integer_operand (&ops[1], bitsize);
 698   create_integer_operand (&ops[2], bitnum);
 699   create_input_operand (&ops[3], value1, op_mode);
 700   if (maybe_expand_insn (insv->icode, 4, ops))
 701     {
 702       if (copy_back)
 703         convert_move (op0, xop0, true);
 704       return true;
 705     }
 706   delete_insns_since (last);
 707   return false;
 708 }
 709
 710 /* A subroutine of store_bit_field, with the same arguments.  Return true
 711    if the operation could be implemented.
 712
 713    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 714    no other way of implementing the operation.  If FALLBACK_P is false,
 715    return false instead.  */
 716
 717 static bool
 718 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 719                    unsigned HOST_WIDE_INT bitnum,
 720                    unsigned HOST_WIDE_INT bitregion_start,
 721                    unsigned HOST_WIDE_INT bitregion_end,
 722                    machine_mode fieldmode,
 723                    rtx value, bool reverse, bool fallback_p)
 724 {
 725   rtx op0 = str_rtx;
 726   rtx orig_value;
 727
 728   while (GET_CODE (op0) == SUBREG)
 729     {
 730       /* The following line once was done only if WORDS_BIG_ENDIAN,
 731          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 732          meaningful at a much higher level; when structures are copied
 733          between memory and regs, the higher-numbered regs
 734          always get higher addresses.  */
 735       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 736       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 737       int byte_offset = 0;
 738
 739       /* Paradoxical subregs need special handling on big-endian machines.  */
 740       if (paradoxical_subreg_p (op0))
 741         {
 742           int difference = inner_mode_size - outer_mode_size;
 743
 744           if (WORDS_BIG_ENDIAN)
 745             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 746           if (BYTES_BIG_ENDIAN)
 747             byte_offset += difference % UNITS_PER_WORD;
 748         }
 749       else
 750         byte_offset = SUBREG_BYTE (op0);
 751
 752       bitnum += byte_offset * BITS_PER_UNIT;
 753       op0 = SUBREG_REG (op0);
 754     }
 755
 756   /* No action is needed if the target is a register and if the field
 757      lies completely outside that register.  This can occur if the source
 758      code contains an out-of-bounds access to a small array.  */
 759   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 760     return true;
 761
 762   /* Use vec_set patterns for inserting parts of vectors whenever
 763      available.  */
 764   if (VECTOR_MODE_P (GET_MODE (op0))
 765       && !MEM_P (op0)
 766       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 767       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 768       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 769       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 770     {
 771       struct expand_operand ops[3];
 772       machine_mode outermode = GET_MODE (op0);
 773       machine_mode innermode = GET_MODE_INNER (outermode);
 774       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 775       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 776
 777       create_fixed_operand (&ops[0], op0);
 778       create_input_operand (&ops[1], value, innermode);
 779       create_integer_operand (&ops[2], pos);
 780       if (maybe_expand_insn (icode, 3, ops))
 781         return true;
 782     }
 783
 784   /* If the target is a register, overwriting the entire object, or storing
 785      a full-word or multi-word field can be done with just a SUBREG.  */
 786   if (!MEM_P (op0)
 787       && bitsize == GET_MODE_BITSIZE (fieldmode)
 788       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 789           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 790     {
 791       /* Use the subreg machinery either to narrow OP0 to the required
 792          words or to cope with mode punning between equal-sized modes.
 793          In the latter case, use subreg on the rhs side, not lhs.  */
 794       rtx sub;
 795
 796       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 797         {
 798           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 799           if (sub)
 800             {
 801               if (reverse)
 802                 sub = flip_storage_order (GET_MODE (op0), sub);
 803               emit_move_insn (op0, sub);
 804               return true;
 805             }
 806         }
 807       else
 808         {
 809           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 810                                      bitnum / BITS_PER_UNIT);
 811           if (sub)
 812             {
 813               if (reverse)
 814                 value = flip_storage_order (fieldmode, value);
 815               emit_move_insn (sub, value);
 816               return true;
 817             }
 818         }
 819     }
 820
 821   /* If the target is memory, storing any naturally aligned field can be
 822      done with a simple store.  For targets that support fast unaligned
 823      memory, any naturally sized, unit aligned field can be done directly.  */
 824   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 825     {
 826       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 827       if (reverse)
 828         value = flip_storage_order (fieldmode, value);
 829       emit_move_insn (op0, value);
 830       return true;
 831     }
 832
 833   /* Make sure we are playing with integral modes.  Pun with subregs
 834      if we aren't.  This must come after the entire register case above,
 835      since that case is valid for any mode.  The following cases are only
 836      valid for integral modes.  */
 837   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 838   scalar_int_mode imode;
 839   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 840     {
 841       if (MEM_P (op0))
 842         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 843                                             0, MEM_SIZE (op0));
 844       else
 845         op0 = gen_lowpart (op0_mode.require (), op0);
 846     }
 847
 848   /* Storing an lsb-aligned field in a register
 849      can be done with a movstrict instruction.  */
 850
 851   if (!MEM_P (op0)
 852       && !reverse
 853       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 854       && bitsize == GET_MODE_BITSIZE (fieldmode)
 855       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 856     {
 857       struct expand_operand ops[2];
 858       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 859       rtx arg0 = op0;
 860       unsigned HOST_WIDE_INT subreg_off;
 861
 862       if (GET_CODE (arg0) == SUBREG)
 863         {
 864           /* Else we've got some float mode source being extracted into
 865              a different float mode destination -- this combination of
 866              subregs results in Severe Tire Damage.  */
 867           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 868                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 869                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 870           arg0 = SUBREG_REG (arg0);
 871         }
 872
 873       subreg_off = bitnum / BITS_PER_UNIT;
 874       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 875         {
 876           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 877
 878           create_fixed_operand (&ops[0], arg0);
 879           /* Shrink the source operand to FIELDMODE.  */
 880           create_convert_operand_to (&ops[1], value, fieldmode, false);
 881           if (maybe_expand_insn (icode, 2, ops))
 882             return true;
 883         }
 884     }
 885
 886   /* Handle fields bigger than a word.  */
 887
 888   if (bitsize > BITS_PER_WORD)
 889     {
 890       /* Here we transfer the words of the field
 891          in the order least significant first.
 892          This is because the most significant word is the one which may
 893          be less than full.
 894          However, only do that if the value is not BLKmode.  */
 895
 896       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 897       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 898       unsigned int i;
 899       rtx_insn *last;
 900
 901       /* This is the mode we must force value to, so that there will be enough
 902          subwords to extract.  Note that fieldmode will often (always?) be
 903          VOIDmode, because that is what store_field uses to indicate that this
 904          is a bit field, but passing VOIDmode to operand_subword_force
 905          is not allowed.  */
 906       fieldmode = GET_MODE (value);
 907       if (fieldmode == VOIDmode)
 908         fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 909
 910       last = get_last_insn ();
 911       for (i = 0; i < nwords; i++)
 912         {
 913           /* If I is 0, use the low-order word in both field and target;
 914              if I is 1, use the next to lowest word; and so on.  */
 915           unsigned int wordnum = (backwards
 916                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 917                                   - i - 1
 918                                   : i);
 919           unsigned int bit_offset = (backwards ^ reverse
 920                                      ? MAX ((int) bitsize - ((int) i + 1)
 921                                             * BITS_PER_WORD,
 922                                             0)
 923                                      : (int) i * BITS_PER_WORD);
 924           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 925           unsigned HOST_WIDE_INT new_bitsize =
 926             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 927
 928           /* If the remaining chunk doesn't have full wordsize we have
 929              to make sure that for big-endian machines the higher order
 930              bits are used.  */
 931           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 932             value_word = simplify_expand_binop (word_mode, lshr_optab,
 933                                                 value_word,
 934                                                 GEN_INT (BITS_PER_WORD
 935                                                          - new_bitsize),
 936                                                 NULL_RTX, true,
 937                                                 OPTAB_LIB_WIDEN);
 938
 939           if (!store_bit_field_1 (op0, new_bitsize,
 940                                   bitnum + bit_offset,
 941                                   bitregion_start, bitregion_end,
 942                                   word_mode,
 943                                   value_word, reverse, fallback_p))
 944             {
 945               delete_insns_since (last);
 946               return false;
 947             }
 948         }
 949       return true;
 950     }
 951
 952   /* If VALUE has a floating-point or complex mode, access it as an
 953      integer of the corresponding size.  This can occur on a machine
 954      with 64 bit registers that uses SFmode for float.  It can also
 955      occur for unaligned float or complex fields.  */
 956   orig_value = value;
 957   scalar_int_mode value_mode;
 958   if (GET_MODE (value) == VOIDmode)
 959     /* By this point we've dealt with values that are bigger than a word,
 960        so word_mode is a conservatively correct choice.  */
 961     value_mode = word_mode;
 962   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
 963     {
 964       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
 965       value = gen_reg_rtx (value_mode);
 966       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 967     }
 968
 969   /* If OP0 is a multi-word register, narrow it to the affected word.
 970      If the region spans two words, defer to store_split_bit_field.
 971      Don't do this if op0 is a single hard register wider than word
 972      such as a float or vector register.  */
 973   if (!MEM_P (op0)
 974       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
 975       && (!REG_P (op0)
 976           || !HARD_REGISTER_P (op0)
 977           || HARD_REGNO_NREGS (REGNO (op0), op0_mode.require ()) != 1))
 978     {
 979       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 980         {
 981           if (!fallback_p)
 982             return false;
 983
 984           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
 985                                  bitregion_start, bitregion_end,
 986                                  value, value_mode, reverse);
 987           return true;
 988         }
 989       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
 990                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 991       gcc_assert (op0);
 992       op0_mode = word_mode;
 993       bitnum %= BITS_PER_WORD;
 994     }
 995
 996   /* From here on we can assume that the field to be stored in fits
 997      within a word.  If the destination is a register, it too fits
 998      in a word.  */
 999
1000   extraction_insn insv;
1001   if (!MEM_P (op0)
1002       && !reverse
1003       && get_best_reg_extraction_insn (&insv, EP_insv,
1004                                        GET_MODE_BITSIZE (op0_mode.require ()),
1005                                        fieldmode)
1006       && store_bit_field_using_insv (&insv, op0, op0_mode,
1007                                      bitsize, bitnum, value, value_mode))
1008     return true;
1009
1010   /* If OP0 is a memory, try copying it to a register and seeing if a
1011      cheap register alternative is available.  */
1012   if (MEM_P (op0) && !reverse)
1013     {
1014       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1015                                         fieldmode)
1016           && store_bit_field_using_insv (&insv, op0, op0_mode,
1017                                          bitsize, bitnum, value, value_mode))
1018         return true;
1019
1020       rtx_insn *last = get_last_insn ();
1021
1022       /* Try loading part of OP0 into a register, inserting the bitfield
1023          into that, and then copying the result back to OP0.  */
1024       unsigned HOST_WIDE_INT bitpos;
1025       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1026                                                bitregion_start, bitregion_end,
1027                                                fieldmode, &bitpos);
1028       if (xop0)
1029         {
1030           rtx tempreg = copy_to_reg (xop0);
1031           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1032                                  bitregion_start, bitregion_end,
1033                                  fieldmode, orig_value, reverse, false))
1034             {
1035               emit_move_insn (xop0, tempreg);
1036               return true;
1037             }
1038           delete_insns_since (last);
1039         }
1040     }
1041
1042   if (!fallback_p)
1043     return false;
1044
1045   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1046                          bitregion_end, value, value_mode, reverse);
1047   return true;
1048 }
1049
1050 /* Generate code to store value from rtx VALUE
1051    into a bit-field within structure STR_RTX
1052    containing BITSIZE bits starting at bit BITNUM.
1053
1054    BITREGION_START is bitpos of the first bitfield in this region.
1055    BITREGION_END is the bitpos of the ending bitfield in this region.
1056    These two fields are 0, if the C++ memory model does not apply,
1057    or we are not interested in keeping track of bitfield regions.
1058
1059    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1060
1061    If REVERSE is true, the store is to be done in reverse order.  */
1062
1063 void
1064 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1065                  unsigned HOST_WIDE_INT bitnum,
1066                  unsigned HOST_WIDE_INT bitregion_start,
1067                  unsigned HOST_WIDE_INT bitregion_end,
1068                  machine_mode fieldmode,
1069                  rtx value, bool reverse)
1070 {
1071   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1072   scalar_int_mode int_mode;
1073   if (is_a <scalar_int_mode> (fieldmode, &int_mode)
1074       && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode,
1075                                      bitregion_start, bitregion_end))
1076     {
1077       /* Storing of a full word can be done with a simple store.
1078          We know here that the field can be accessed with one single
1079          instruction.  For targets that support unaligned memory,
1080          an unaligned access may be necessary.  */
1081       if (bitsize == GET_MODE_BITSIZE (int_mode))
1082         {
1083           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1084                                              bitnum / BITS_PER_UNIT);
1085           if (reverse)
1086             value = flip_storage_order (int_mode, value);
1087           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1088           emit_move_insn (str_rtx, value);
1089         }
1090       else
1091         {
1092           rtx temp;
1093
1094           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1095                                           &bitnum);
1096           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1097           temp = copy_to_reg (str_rtx);
1098           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1099                                   int_mode, value, reverse, true))
1100             gcc_unreachable ();
1101
1102           emit_move_insn (str_rtx, temp);
1103         }
1104
1105       return;
1106     }
1107
1108   /* Under the C++0x memory model, we must not touch bits outside the
1109      bit region.  Adjust the address to start at the beginning of the
1110      bit region.  */
1111   if (MEM_P (str_rtx) && bitregion_start > 0)
1112     {
1113       scalar_int_mode best_mode;
1114       machine_mode addr_mode = VOIDmode;
1115       HOST_WIDE_INT offset, size;
1116
1117       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1118
1119       offset = bitregion_start / BITS_PER_UNIT;
1120       bitnum -= bitregion_start;
1121       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1122       bitregion_end -= bitregion_start;
1123       bitregion_start = 0;
1124       if (get_best_mode (bitsize, bitnum,
1125                          bitregion_start, bitregion_end,
1126                          MEM_ALIGN (str_rtx), INT_MAX,
1127                          MEM_VOLATILE_P (str_rtx), &best_mode))
1128         addr_mode = best_mode;
1129       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1130                                               offset, size);
1131     }
1132
1133   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1134                           bitregion_start, bitregion_end,
1135                           fieldmode, value, reverse, true))
1136     gcc_unreachable ();
1137 }
1138 \f
1139 /* Use shifts and boolean operations to store VALUE into a bit field of
1140    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1141    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1142    the mode of VALUE.
1143
1144    If REVERSE is true, the store is to be done in reverse order.  */
1145
1146 static void
1147 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1148                        unsigned HOST_WIDE_INT bitsize,
1149                        unsigned HOST_WIDE_INT bitnum,
1150                        unsigned HOST_WIDE_INT bitregion_start,
1151                        unsigned HOST_WIDE_INT bitregion_end,
1152                        rtx value, scalar_int_mode value_mode, bool reverse)
1153 {
1154   /* There is a case not handled here:
1155      a structure with a known alignment of just a halfword
1156      and a field split across two aligned halfwords within the structure.
1157      Or likewise a structure with a known alignment of just a byte
1158      and a field split across two bytes.
1159      Such cases are not supposed to be able to occur.  */
1160
1161   scalar_int_mode best_mode;
1162   if (MEM_P (op0))
1163     {
1164       unsigned int max_bitsize = BITS_PER_WORD;
1165       scalar_int_mode imode;
1166       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1167         max_bitsize = GET_MODE_BITSIZE (imode);
1168
1169       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1170                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1171                           &best_mode))
1172         {
1173           /* The only way this should occur is if the field spans word
1174              boundaries.  */
1175           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1176                                  bitregion_start, bitregion_end,
1177                                  value, value_mode, reverse);
1178           return;
1179         }
1180
1181       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1182     }
1183   else
1184     best_mode = op0_mode.require ();
1185
1186   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1187                            value, value_mode, reverse);
1188 }
1189
1190 /* Helper function for store_fixed_bit_field, stores
1191    the bit field always using MODE, which is the mode of OP0.  The other
1192    arguments are as for store_fixed_bit_field.  */
1193
1194 static void
1195 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1196                          unsigned HOST_WIDE_INT bitsize,
1197                          unsigned HOST_WIDE_INT bitnum,
1198                          rtx value, scalar_int_mode value_mode, bool reverse)
1199 {
1200   rtx temp;
1201   int all_zero = 0;
1202   int all_one = 0;
1203
1204   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1205      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1206
1207   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1208     /* BITNUM is the distance between our msb
1209        and that of the containing datum.
1210        Convert it to the distance from the lsb.  */
1211     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1212
1213   /* Now BITNUM is always the distance between our lsb
1214      and that of OP0.  */
1215
1216   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1217      we must first convert its mode to MODE.  */
1218
1219   if (CONST_INT_P (value))
1220     {
1221       unsigned HOST_WIDE_INT v = UINTVAL (value);
1222
1223       if (bitsize < HOST_BITS_PER_WIDE_INT)
1224         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1225
1226       if (v == 0)
1227         all_zero = 1;
1228       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1229                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1230                || (bitsize == HOST_BITS_PER_WIDE_INT
1231                    && v == HOST_WIDE_INT_M1U))
1232         all_one = 1;
1233
1234       value = lshift_value (mode, v, bitnum);
1235     }
1236   else
1237     {
1238       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1239                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1240
1241       if (value_mode != mode)
1242         value = convert_to_mode (mode, value, 1);
1243
1244       if (must_and)
1245         value = expand_binop (mode, and_optab, value,
1246                               mask_rtx (mode, 0, bitsize, 0),
1247                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1248       if (bitnum > 0)
1249         value = expand_shift (LSHIFT_EXPR, mode, value,
1250                               bitnum, NULL_RTX, 1);
1251     }
1252
1253   if (reverse)
1254     value = flip_storage_order (mode, value);
1255
1256   /* Now clear the chosen bits in OP0,
1257      except that if VALUE is -1 we need not bother.  */
1258   /* We keep the intermediates in registers to allow CSE to combine
1259      consecutive bitfield assignments.  */
1260
1261   temp = force_reg (mode, op0);
1262
1263   if (! all_one)
1264     {
1265       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1266       if (reverse)
1267         mask = flip_storage_order (mode, mask);
1268       temp = expand_binop (mode, and_optab, temp, mask,
1269                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1270       temp = force_reg (mode, temp);
1271     }
1272
1273   /* Now logical-or VALUE into OP0, unless it is zero.  */
1274
1275   if (! all_zero)
1276     {
1277       temp = expand_binop (mode, ior_optab, temp, value,
1278                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1279       temp = force_reg (mode, temp);
1280     }
1281
1282   if (op0 != temp)
1283     {
1284       op0 = copy_rtx (op0);
1285       emit_move_insn (op0, temp);
1286     }
1287 }
1288 \f
1289 /* Store a bit field that is split across multiple accessible memory objects.
1290
1291    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1292    BITSIZE is the field width; BITPOS the position of its first bit
1293    (within the word).
1294    VALUE is the value to store, which has mode VALUE_MODE.
1295    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1296    a BLKmode MEM.
1297
1298    If REVERSE is true, the store is to be done in reverse order.
1299
1300    This does not yet handle fields wider than BITS_PER_WORD.  */
1301
1302 static void
1303 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1304                        unsigned HOST_WIDE_INT bitsize,
1305                        unsigned HOST_WIDE_INT bitpos,
1306                        unsigned HOST_WIDE_INT bitregion_start,
1307                        unsigned HOST_WIDE_INT bitregion_end,
1308                        rtx value, scalar_int_mode value_mode, bool reverse)
1309 {
1310   unsigned int unit, total_bits, bitsdone = 0;
1311
1312   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1313      much at a time.  */
1314   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1315     unit = BITS_PER_WORD;
1316   else
1317     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1318
1319   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1320      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1321      again, and we will mutually recurse forever.  */
1322   if (MEM_P (op0) && op0_mode.exists ())
1323     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1324
1325   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1326      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1327      that VALUE might be a floating-point constant.  */
1328   if (CONSTANT_P (value) && !CONST_INT_P (value))
1329     {
1330       rtx word = gen_lowpart_common (word_mode, value);
1331
1332       if (word && (value != word))
1333         value = word;
1334       else
1335         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1336       value_mode = word_mode;
1337     }
1338
1339   total_bits = GET_MODE_BITSIZE (value_mode);
1340
1341   while (bitsdone < bitsize)
1342     {
1343       unsigned HOST_WIDE_INT thissize;
1344       unsigned HOST_WIDE_INT thispos;
1345       unsigned HOST_WIDE_INT offset;
1346       rtx part;
1347
1348       offset = (bitpos + bitsdone) / unit;
1349       thispos = (bitpos + bitsdone) % unit;
1350
1351       /* When region of bytes we can touch is restricted, decrease
1352          UNIT close to the end of the region as needed.  If op0 is a REG
1353          or SUBREG of REG, don't do this, as there can't be data races
1354          on a register and we can expand shorter code in some cases.  */
1355       if (bitregion_end
1356           && unit > BITS_PER_UNIT
1357           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1358           && !REG_P (op0)
1359           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1360         {
1361           unit = unit / 2;
1362           continue;
1363         }
1364
1365       /* THISSIZE must not overrun a word boundary.  Otherwise,
1366          store_fixed_bit_field will call us again, and we will mutually
1367          recurse forever.  */
1368       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1369       thissize = MIN (thissize, unit - thispos);
1370
1371       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1372         {
1373           /* Fetch successively less significant portions.  */
1374           if (CONST_INT_P (value))
1375             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1376                              >> (bitsize - bitsdone - thissize))
1377                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1378           /* Likewise, but the source is little-endian.  */
1379           else if (reverse)
1380             part = extract_fixed_bit_field (word_mode, value, value_mode,
1381                                             thissize,
1382                                             bitsize - bitsdone - thissize,
1383                                             NULL_RTX, 1, false);
1384           else
1385             /* The args are chosen so that the last part includes the
1386                lsb.  Give extract_bit_field the value it needs (with
1387                endianness compensation) to fetch the piece we want.  */
1388             part = extract_fixed_bit_field (word_mode, value, value_mode,
1389                                             thissize,
1390                                             total_bits - bitsize + bitsdone,
1391                                             NULL_RTX, 1, false);
1392         }
1393       else
1394         {
1395           /* Fetch successively more significant portions.  */
1396           if (CONST_INT_P (value))
1397             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1398                              >> bitsdone)
1399                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1400           /* Likewise, but the source is big-endian.  */
1401           else if (reverse)
1402             part = extract_fixed_bit_field (word_mode, value, value_mode,
1403                                             thissize,
1404                                             total_bits - bitsdone - thissize,
1405                                             NULL_RTX, 1, false);
1406           else
1407             part = extract_fixed_bit_field (word_mode, value, value_mode,
1408                                             thissize, bitsdone, NULL_RTX,
1409                                             1, false);
1410         }
1411
1412       /* If OP0 is a register, then handle OFFSET here.  */
1413       rtx op0_piece = op0;
1414       opt_scalar_int_mode op0_piece_mode = op0_mode;
1415       if (SUBREG_P (op0) || REG_P (op0))
1416         {
1417           scalar_int_mode imode;
1418           if (op0_mode.exists (&imode)
1419               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1420             {
1421               if (offset)
1422                 op0_piece = const0_rtx;
1423             }
1424           else
1425             {
1426               op0_piece = operand_subword_force (op0,
1427                                                  offset * unit / BITS_PER_WORD,
1428                                                  GET_MODE (op0));
1429               op0_piece_mode = word_mode;
1430             }
1431           offset &= BITS_PER_WORD / unit - 1;
1432         }
1433
1434       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1435          it is just an out-of-bounds access.  Ignore it.  */
1436       if (op0_piece != const0_rtx)
1437         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1438                                offset * unit + thispos, bitregion_start,
1439                                bitregion_end, part, word_mode, reverse);
1440       bitsdone += thissize;
1441     }
1442 }
1443 \f
1444 /* A subroutine of extract_bit_field_1 that converts return value X
1445    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1446    to extract_bit_field.  */
1447
1448 static rtx
1449 convert_extracted_bit_field (rtx x, machine_mode mode,
1450                              machine_mode tmode, bool unsignedp)
1451 {
1452   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1453     return x;
1454
1455   /* If the x mode is not a scalar integral, first convert to the
1456      integer mode of that size and then access it as a floating-point
1457      value via a SUBREG.  */
1458   if (!SCALAR_INT_MODE_P (tmode))
1459     {
1460       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1461       x = convert_to_mode (int_mode, x, unsignedp);
1462       x = force_reg (int_mode, x);
1463       return gen_lowpart (tmode, x);
1464     }
1465
1466   return convert_to_mode (tmode, x, unsignedp);
1467 }
1468
1469 /* Try to use an ext(z)v pattern to extract a field from OP0.
1470    Return the extracted value on success, otherwise return null.
1471    EXTV describes the extraction instruction to use.  If OP0_MODE
1472    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1473    The other arguments are as for extract_bit_field.  */
1474
1475 static rtx
1476 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1477                               opt_scalar_int_mode op0_mode,
1478                               unsigned HOST_WIDE_INT bitsize,
1479                               unsigned HOST_WIDE_INT bitnum,
1480                               int unsignedp, rtx target,
1481                               machine_mode mode, machine_mode tmode)
1482 {
1483   struct expand_operand ops[4];
1484   rtx spec_target = target;
1485   rtx spec_target_subreg = 0;
1486   scalar_int_mode ext_mode = extv->field_mode;
1487   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1488
1489   if (bitsize == 0 || unit < bitsize)
1490     return NULL_RTX;
1491
1492   if (MEM_P (op0))
1493     /* Get a reference to the first byte of the field.  */
1494     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1495                                 &bitnum);
1496   else
1497     {
1498       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1499       if (BYTES_BIG_ENDIAN)
1500         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1501
1502       /* If op0 is a register, we need it in EXT_MODE to make it
1503          acceptable to the format of ext(z)v.  */
1504       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1505         return NULL_RTX;
1506       if (REG_P (op0) && op0_mode.require () != ext_mode)
1507         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1508     }
1509
1510   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1511      "backwards" from the size of the unit we are extracting from.
1512      Otherwise, we count bits from the most significant on a
1513      BYTES/BITS_BIG_ENDIAN machine.  */
1514
1515   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1516     bitnum = unit - bitsize - bitnum;
1517
1518   if (target == 0)
1519     target = spec_target = gen_reg_rtx (tmode);
1520
1521   if (GET_MODE (target) != ext_mode)
1522     {
1523       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1524          between the mode of the extraction (word_mode) and the target
1525          mode.  Instead, create a temporary and use convert_move to set
1526          the target.  */
1527       if (REG_P (target)
1528           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1529         {
1530           target = gen_lowpart (ext_mode, target);
1531           if (GET_MODE_PRECISION (ext_mode)
1532               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1533             spec_target_subreg = target;
1534         }
1535       else
1536         target = gen_reg_rtx (ext_mode);
1537     }
1538
1539   create_output_operand (&ops[0], target, ext_mode);
1540   create_fixed_operand (&ops[1], op0);
1541   create_integer_operand (&ops[2], bitsize);
1542   create_integer_operand (&ops[3], bitnum);
1543   if (maybe_expand_insn (extv->icode, 4, ops))
1544     {
1545       target = ops[0].value;
1546       if (target == spec_target)
1547         return target;
1548       if (target == spec_target_subreg)
1549         return spec_target;
1550       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1551     }
1552   return NULL_RTX;
1553 }
1554
1555 /* A subroutine of extract_bit_field, with the same arguments.
1556    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1557    if we can find no other means of implementing the operation.
1558    if FALLBACK_P is false, return NULL instead.  */
1559
1560 static rtx
1561 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1562                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1563                      machine_mode mode, machine_mode tmode,
1564                      bool reverse, bool fallback_p, rtx *alt_rtl)
1565 {
1566   rtx op0 = str_rtx;
1567   machine_mode mode1;
1568
1569   if (tmode == VOIDmode)
1570     tmode = mode;
1571
1572   while (GET_CODE (op0) == SUBREG)
1573     {
1574       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1575       op0 = SUBREG_REG (op0);
1576     }
1577
1578   /* If we have an out-of-bounds access to a register, just return an
1579      uninitialized register of the required mode.  This can occur if the
1580      source code contains an out-of-bounds access to a small array.  */
1581   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1582     return gen_reg_rtx (tmode);
1583
1584   if (REG_P (op0)
1585       && mode == GET_MODE (op0)
1586       && bitnum == 0
1587       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1588     {
1589       if (reverse)
1590         op0 = flip_storage_order (mode, op0);
1591       /* We're trying to extract a full register from itself.  */
1592       return op0;
1593     }
1594
1595   /* First try to check for vector from vector extractions.  */
1596   if (VECTOR_MODE_P (GET_MODE (op0))
1597       && !MEM_P (op0)
1598       && VECTOR_MODE_P (tmode)
1599       && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1600     {
1601       machine_mode new_mode = GET_MODE (op0);
1602       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1603         {
1604           new_mode = mode_for_vector (GET_MODE_INNER (tmode),
1605                                       GET_MODE_BITSIZE (GET_MODE (op0))
1606                                       / GET_MODE_UNIT_BITSIZE (tmode));
1607           if (!VECTOR_MODE_P (new_mode)
1608               || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1609               || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1610               || !targetm.vector_mode_supported_p (new_mode))
1611             new_mode = VOIDmode;
1612         }
1613       if (new_mode != VOIDmode
1614           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1615               != CODE_FOR_nothing)
1616           && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1617               == bitnum / GET_MODE_BITSIZE (tmode)))
1618         {
1619           struct expand_operand ops[3];
1620           machine_mode outermode = new_mode;
1621           machine_mode innermode = tmode;
1622           enum insn_code icode
1623             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1624           unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1625
1626           if (new_mode != GET_MODE (op0))
1627             op0 = gen_lowpart (new_mode, op0);
1628           create_output_operand (&ops[0], target, innermode);
1629           ops[0].target = 1;
1630           create_input_operand (&ops[1], op0, outermode);
1631           create_integer_operand (&ops[2], pos);
1632           if (maybe_expand_insn (icode, 3, ops))
1633             {
1634               if (alt_rtl && ops[0].target)
1635                 *alt_rtl = target;
1636               target = ops[0].value;
1637               if (GET_MODE (target) != mode)
1638                 return gen_lowpart (tmode, target);
1639               return target;
1640             }
1641         }
1642     }
1643
1644   /* See if we can get a better vector mode before extracting.  */
1645   if (VECTOR_MODE_P (GET_MODE (op0))
1646       && !MEM_P (op0)
1647       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1648     {
1649       machine_mode new_mode;
1650
1651       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1652         new_mode = MIN_MODE_VECTOR_FLOAT;
1653       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1654         new_mode = MIN_MODE_VECTOR_FRACT;
1655       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1656         new_mode = MIN_MODE_VECTOR_UFRACT;
1657       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1658         new_mode = MIN_MODE_VECTOR_ACCUM;
1659       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1660         new_mode = MIN_MODE_VECTOR_UACCUM;
1661       else
1662         new_mode = MIN_MODE_VECTOR_INT;
1663
1664       FOR_EACH_MODE_FROM (new_mode, new_mode)
1665         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1666             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1667             && targetm.vector_mode_supported_p (new_mode))
1668           break;
1669       if (new_mode != VOIDmode)
1670         op0 = gen_lowpart (new_mode, op0);
1671     }
1672
1673   /* Use vec_extract patterns for extracting parts of vectors whenever
1674      available.  */
1675   if (VECTOR_MODE_P (GET_MODE (op0))
1676       && !MEM_P (op0)
1677       && (convert_optab_handler (vec_extract_optab, GET_MODE (op0),
1678                                  GET_MODE_INNER (GET_MODE (op0)))
1679           != CODE_FOR_nothing)
1680       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1681           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1682     {
1683       struct expand_operand ops[3];
1684       machine_mode outermode = GET_MODE (op0);
1685       machine_mode innermode = GET_MODE_INNER (outermode);
1686       enum insn_code icode
1687         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1688       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1689
1690       create_output_operand (&ops[0], target, innermode);
1691       ops[0].target = 1;
1692       create_input_operand (&ops[1], op0, outermode);
1693       create_integer_operand (&ops[2], pos);
1694       if (maybe_expand_insn (icode, 3, ops))
1695         {
1696           if (alt_rtl && ops[0].target)
1697             *alt_rtl = target;
1698           target = ops[0].value;
1699           if (GET_MODE (target) != mode)
1700             return gen_lowpart (tmode, target);
1701           return target;
1702         }
1703     }
1704
1705   /* Make sure we are playing with integral modes.  Pun with subregs
1706      if we aren't.  */
1707   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1708   scalar_int_mode imode;
1709   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1710     {
1711       if (MEM_P (op0))
1712         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1713                                             0, MEM_SIZE (op0));
1714       else if (op0_mode.exists (&imode))
1715         {
1716           op0 = gen_lowpart (imode, op0);
1717
1718           /* If we got a SUBREG, force it into a register since we
1719              aren't going to be able to do another SUBREG on it.  */
1720           if (GET_CODE (op0) == SUBREG)
1721             op0 = force_reg (imode, op0);
1722         }
1723       else
1724         {
1725           HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1726           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1727           emit_move_insn (mem, op0);
1728           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1729         }
1730     }
1731
1732   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1733      If that's wrong, the solution is to test for it and set TARGET to 0
1734      if needed.  */
1735
1736   /* Get the mode of the field to use for atomic access or subreg
1737      conversion.  */
1738   mode1 = mode;
1739   if (SCALAR_INT_MODE_P (tmode))
1740     {
1741       machine_mode try_mode = mode_for_size (bitsize,
1742                                                   GET_MODE_CLASS (tmode), 0);
1743       if (try_mode != BLKmode)
1744         mode1 = try_mode;
1745     }
1746   gcc_assert (mode1 != BLKmode);
1747
1748   /* Extraction of a full MODE1 value can be done with a subreg as long
1749      as the least significant bit of the value is the least significant
1750      bit of either OP0 or a word of OP0.  */
1751   if (!MEM_P (op0)
1752       && !reverse
1753       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
1754       && bitsize == GET_MODE_BITSIZE (mode1)
1755       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, op0_mode.require ()))
1756     {
1757       rtx sub = simplify_gen_subreg (mode1, op0, op0_mode.require (),
1758                                      bitnum / BITS_PER_UNIT);
1759       if (sub)
1760         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1761     }
1762
1763   /* Extraction of a full MODE1 value can be done with a load as long as
1764      the field is on a byte boundary and is sufficiently aligned.  */
1765   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1766     {
1767       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1768       if (reverse)
1769         op0 = flip_storage_order (mode1, op0);
1770       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1771     }
1772
1773   /* Handle fields bigger than a word.  */
1774
1775   if (bitsize > BITS_PER_WORD)
1776     {
1777       /* Here we transfer the words of the field
1778          in the order least significant first.
1779          This is because the most significant word is the one which may
1780          be less than full.  */
1781
1782       const bool backwards = WORDS_BIG_ENDIAN;
1783       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1784       unsigned int i;
1785       rtx_insn *last;
1786
1787       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1788         target = gen_reg_rtx (mode);
1789
1790       /* In case we're about to clobber a base register or something
1791          (see gcc.c-torture/execute/20040625-1.c).   */
1792       if (reg_mentioned_p (target, str_rtx))
1793         target = gen_reg_rtx (mode);
1794
1795       /* Indicate for flow that the entire target reg is being set.  */
1796       emit_clobber (target);
1797
1798       last = get_last_insn ();
1799       for (i = 0; i < nwords; i++)
1800         {
1801           /* If I is 0, use the low-order word in both field and target;
1802              if I is 1, use the next to lowest word; and so on.  */
1803           /* Word number in TARGET to use.  */
1804           unsigned int wordnum
1805             = (backwards
1806                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1807                : i);
1808           /* Offset from start of field in OP0.  */
1809           unsigned int bit_offset = (backwards ^ reverse
1810                                      ? MAX ((int) bitsize - ((int) i + 1)
1811                                             * BITS_PER_WORD,
1812                                             0)
1813                                      : (int) i * BITS_PER_WORD);
1814           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1815           rtx result_part
1816             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1817                                              bitsize - i * BITS_PER_WORD),
1818                                    bitnum + bit_offset, 1, target_part,
1819                                    mode, word_mode, reverse, fallback_p, NULL);
1820
1821           gcc_assert (target_part);
1822           if (!result_part)
1823             {
1824               delete_insns_since (last);
1825               return NULL;
1826             }
1827
1828           if (result_part != target_part)
1829             emit_move_insn (target_part, result_part);
1830         }
1831
1832       if (unsignedp)
1833         {
1834           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1835              need to be zero'd out.  */
1836           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1837             {
1838               unsigned int i, total_words;
1839
1840               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1841               for (i = nwords; i < total_words; i++)
1842                 emit_move_insn
1843                   (operand_subword (target,
1844                                     backwards ? total_words - i - 1 : i,
1845                                     1, VOIDmode),
1846                    const0_rtx);
1847             }
1848           return target;
1849         }
1850
1851       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1852       target = expand_shift (LSHIFT_EXPR, mode, target,
1853                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1854       return expand_shift (RSHIFT_EXPR, mode, target,
1855                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1856     }
1857
1858   /* If OP0 is a multi-word register, narrow it to the affected word.
1859      If the region spans two words, defer to extract_split_bit_field.  */
1860   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1861     {
1862       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1863         {
1864           if (!fallback_p)
1865             return NULL_RTX;
1866           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1867                                             unsignedp, reverse);
1868           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1869         }
1870       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1871                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1872       op0_mode = word_mode;
1873       bitnum %= BITS_PER_WORD;
1874     }
1875
1876   /* From here on we know the desired field is smaller than a word.
1877      If OP0 is a register, it too fits within a word.  */
1878   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1879   extraction_insn extv;
1880   if (!MEM_P (op0)
1881       && !reverse
1882       /* ??? We could limit the structure size to the part of OP0 that
1883          contains the field, with appropriate checks for endianness
1884          and TRULY_NOOP_TRUNCATION.  */
1885       && get_best_reg_extraction_insn (&extv, pattern,
1886                                        GET_MODE_BITSIZE (op0_mode.require ()),
1887                                        tmode))
1888     {
1889       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1890                                                  bitsize, bitnum,
1891                                                  unsignedp, target, mode,
1892                                                  tmode);
1893       if (result)
1894         return result;
1895     }
1896
1897   /* If OP0 is a memory, try copying it to a register and seeing if a
1898      cheap register alternative is available.  */
1899   if (MEM_P (op0) & !reverse)
1900     {
1901       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1902                                         tmode))
1903         {
1904           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1905                                                      bitsize, bitnum,
1906                                                      unsignedp, target, mode,
1907                                                      tmode);
1908           if (result)
1909             return result;
1910         }
1911
1912       rtx_insn *last = get_last_insn ();
1913
1914       /* Try loading part of OP0 into a register and extracting the
1915          bitfield from that.  */
1916       unsigned HOST_WIDE_INT bitpos;
1917       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1918                                                0, 0, tmode, &bitpos);
1919       if (xop0)
1920         {
1921           xop0 = copy_to_reg (xop0);
1922           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1923                                             unsignedp, target,
1924                                             mode, tmode, reverse, false, NULL);
1925           if (result)
1926             return result;
1927           delete_insns_since (last);
1928         }
1929     }
1930
1931   if (!fallback_p)
1932     return NULL;
1933
1934   /* Find a correspondingly-sized integer field, so we can apply
1935      shifts and masks to it.  */
1936   scalar_int_mode int_mode;
1937   if (!int_mode_for_mode (tmode).exists (&int_mode))
1938     /* If this fails, we should probably push op0 out to memory and then
1939        do a load.  */
1940     int_mode = int_mode_for_mode (mode).require ();
1941
1942   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
1943                                     bitnum, target, unsignedp, reverse);
1944
1945   /* Complex values must be reversed piecewise, so we need to undo the global
1946      reversal, convert to the complex mode and reverse again.  */
1947   if (reverse && COMPLEX_MODE_P (tmode))
1948     {
1949       target = flip_storage_order (int_mode, target);
1950       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1951       target = flip_storage_order (tmode, target);
1952     }
1953   else
1954     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1955
1956   return target;
1957 }
1958
1959 /* Generate code to extract a byte-field from STR_RTX
1960    containing BITSIZE bits, starting at BITNUM,
1961    and put it in TARGET if possible (if TARGET is nonzero).
1962    Regardless of TARGET, we return the rtx for where the value is placed.
1963
1964    STR_RTX is the structure containing the byte (a REG or MEM).
1965    UNSIGNEDP is nonzero if this is an unsigned bit field.
1966    MODE is the natural mode of the field value once extracted.
1967    TMODE is the mode the caller would like the value to have;
1968    but the value may be returned with type MODE instead.
1969
1970    If REVERSE is true, the extraction is to be done in reverse order.
1971
1972    If a TARGET is specified and we can store in it at no extra cost,
1973    we do so, and return TARGET.
1974    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1975    if they are equally easy.  */
1976
1977 rtx
1978 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1979                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1980                    machine_mode mode, machine_mode tmode, bool reverse,
1981                    rtx *alt_rtl)
1982 {
1983   machine_mode mode1;
1984
1985   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1986   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1987     mode1 = GET_MODE (str_rtx);
1988   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1989     mode1 = GET_MODE (target);
1990   else
1991     mode1 = tmode;
1992
1993   scalar_int_mode int_mode;
1994   if (is_a <scalar_int_mode> (mode1, &int_mode)
1995       && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, 0, 0))
1996     {
1997       /* Extraction of a full INT_MODE value can be done with a simple load.
1998          We know here that the field can be accessed with one single
1999          instruction.  For targets that support unaligned memory,
2000          an unaligned access may be necessary.  */
2001       if (bitsize == GET_MODE_BITSIZE (int_mode))
2002         {
2003           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2004                                                 bitnum / BITS_PER_UNIT);
2005           if (reverse)
2006             result = flip_storage_order (int_mode, result);
2007           gcc_assert (bitnum % BITS_PER_UNIT == 0);
2008           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2009         }
2010
2011       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
2012                                       &bitnum);
2013       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
2014       str_rtx = copy_to_reg (str_rtx);
2015     }
2016
2017   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2018                               target, mode, tmode, reverse, true, alt_rtl);
2019 }
2020 \f
2021 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2022    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2023    otherwise OP0 is a BLKmode MEM.
2024
2025    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2026    If REVERSE is true, the extraction is to be done in reverse order.
2027
2028    If TARGET is nonzero, attempts to store the value there
2029    and return TARGET, but this is not guaranteed.
2030    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2031
2032 static rtx
2033 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2034                          opt_scalar_int_mode op0_mode,
2035                          unsigned HOST_WIDE_INT bitsize,
2036                          unsigned HOST_WIDE_INT bitnum, rtx target,
2037                          int unsignedp, bool reverse)
2038 {
2039   scalar_int_mode mode;
2040   if (MEM_P (op0))
2041     {
2042       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2043                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2044         /* The only way this should occur is if the field spans word
2045            boundaries.  */
2046         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2047                                         unsignedp, reverse);
2048
2049       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2050     }
2051   else
2052     mode = op0_mode.require ();
2053
2054   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2055                                     target, unsignedp, reverse);
2056 }
2057
2058 /* Helper function for extract_fixed_bit_field, extracts
2059    the bit field always using MODE, which is the mode of OP0.
2060    The other arguments are as for extract_fixed_bit_field.  */
2061
2062 static rtx
2063 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2064                            unsigned HOST_WIDE_INT bitsize,
2065                            unsigned HOST_WIDE_INT bitnum, rtx target,
2066                            int unsignedp, bool reverse)
2067 {
2068   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2069      for invalid input, such as extract equivalent of f5 from
2070      gcc.dg/pr48335-2.c.  */
2071
2072   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2073     /* BITNUM is the distance between our msb and that of OP0.
2074        Convert it to the distance from the lsb.  */
2075     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2076
2077   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2078      We have reduced the big-endian case to the little-endian case.  */
2079   if (reverse)
2080     op0 = flip_storage_order (mode, op0);
2081
2082   if (unsignedp)
2083     {
2084       if (bitnum)
2085         {
2086           /* If the field does not already start at the lsb,
2087              shift it so it does.  */
2088           /* Maybe propagate the target for the shift.  */
2089           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2090           if (tmode != mode)
2091             subtarget = 0;
2092           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2093         }
2094       /* Convert the value to the desired mode.  TMODE must also be a
2095          scalar integer for this conversion to make sense, since we
2096          shouldn't reinterpret the bits.  */
2097       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2098       if (mode != new_mode)
2099         op0 = convert_to_mode (new_mode, op0, 1);
2100
2101       /* Unless the msb of the field used to be the msb when we shifted,
2102          mask out the upper bits.  */
2103
2104       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2105         return expand_binop (new_mode, and_optab, op0,
2106                              mask_rtx (new_mode, 0, bitsize, 0),
2107                              target, 1, OPTAB_LIB_WIDEN);
2108       return op0;
2109     }
2110
2111   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2112      then arithmetic-shift its lsb to the lsb of the word.  */
2113   op0 = force_reg (mode, op0);
2114
2115   /* Find the narrowest integer mode that contains the field.  */
2116
2117   opt_scalar_int_mode mode_iter;
2118   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2119     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2120       break;
2121
2122   mode = mode_iter.require ();
2123   op0 = convert_to_mode (mode, op0, 0);
2124
2125   if (mode != tmode)
2126     target = 0;
2127
2128   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2129     {
2130       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2131       /* Maybe propagate the target for the shift.  */
2132       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2133       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2134     }
2135
2136   return expand_shift (RSHIFT_EXPR, mode, op0,
2137                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2138 }
2139
2140 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2141    VALUE << BITPOS.  */
2142
2143 static rtx
2144 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2145               int bitpos)
2146 {
2147   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2148 }
2149 \f
2150 /* Extract a bit field that is split across two words
2151    and return an RTX for the result.
2152
2153    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2154    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2155    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2156    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2157    a BLKmode MEM.
2158
2159    If REVERSE is true, the extraction is to be done in reverse order.  */
2160
2161 static rtx
2162 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2163                          unsigned HOST_WIDE_INT bitsize,
2164                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2165                          bool reverse)
2166 {
2167   unsigned int unit;
2168   unsigned int bitsdone = 0;
2169   rtx result = NULL_RTX;
2170   int first = 1;
2171
2172   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2173      much at a time.  */
2174   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2175     unit = BITS_PER_WORD;
2176   else
2177     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2178
2179   while (bitsdone < bitsize)
2180     {
2181       unsigned HOST_WIDE_INT thissize;
2182       rtx part;
2183       unsigned HOST_WIDE_INT thispos;
2184       unsigned HOST_WIDE_INT offset;
2185
2186       offset = (bitpos + bitsdone) / unit;
2187       thispos = (bitpos + bitsdone) % unit;
2188
2189       /* THISSIZE must not overrun a word boundary.  Otherwise,
2190          extract_fixed_bit_field will call us again, and we will mutually
2191          recurse forever.  */
2192       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2193       thissize = MIN (thissize, unit - thispos);
2194
2195       /* If OP0 is a register, then handle OFFSET here.  */
2196       rtx op0_piece = op0;
2197       opt_scalar_int_mode op0_piece_mode = op0_mode;
2198       if (SUBREG_P (op0) || REG_P (op0))
2199         {
2200           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2201           op0_piece_mode = word_mode;
2202           offset = 0;
2203         }
2204
2205       /* Extract the parts in bit-counting order,
2206          whose meaning is determined by BYTES_PER_UNIT.
2207          OFFSET is in UNITs, and UNIT is in bits.  */
2208       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2209                                       thissize, offset * unit + thispos,
2210                                       0, 1, reverse);
2211       bitsdone += thissize;
2212
2213       /* Shift this part into place for the result.  */
2214       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2215         {
2216           if (bitsize != bitsdone)
2217             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2218                                  bitsize - bitsdone, 0, 1);
2219         }
2220       else
2221         {
2222           if (bitsdone != thissize)
2223             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2224                                  bitsdone - thissize, 0, 1);
2225         }
2226
2227       if (first)
2228         result = part;
2229       else
2230         /* Combine the parts with bitwise or.  This works
2231            because we extracted each part as an unsigned bit field.  */
2232         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2233                                OPTAB_LIB_WIDEN);
2234
2235       first = 0;
2236     }
2237
2238   /* Unsigned bit field: we are done.  */
2239   if (unsignedp)
2240     return result;
2241   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2242   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2243                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2244   return expand_shift (RSHIFT_EXPR, word_mode, result,
2245                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2246 }
2247 \f
2248 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2249    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2250    MODE, fill the upper bits with zeros.  Fail if the layout of either
2251    mode is unknown (as for CC modes) or if the extraction would involve
2252    unprofitable mode punning.  Return the value on success, otherwise
2253    return null.
2254
2255    This is different from gen_lowpart* in these respects:
2256
2257      - the returned value must always be considered an rvalue
2258
2259      - when MODE is wider than SRC_MODE, the extraction involves
2260        a zero extension
2261
2262      - when MODE is smaller than SRC_MODE, the extraction involves
2263        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2264
2265    In other words, this routine performs a computation, whereas the
2266    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2267    operations.  */
2268
2269 rtx
2270 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2271 {
2272   scalar_int_mode int_mode, src_int_mode;
2273
2274   if (mode == src_mode)
2275     return src;
2276
2277   if (CONSTANT_P (src))
2278     {
2279       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2280          fails, it will happily create (subreg (symbol_ref)) or similar
2281          invalid SUBREGs.  */
2282       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2283       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2284       if (ret)
2285         return ret;
2286
2287       if (GET_MODE (src) == VOIDmode
2288           || !validate_subreg (mode, src_mode, src, byte))
2289         return NULL_RTX;
2290
2291       src = force_reg (GET_MODE (src), src);
2292       return gen_rtx_SUBREG (mode, src, byte);
2293     }
2294
2295   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2296     return NULL_RTX;
2297
2298   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2299       && MODES_TIEABLE_P (mode, src_mode))
2300     {
2301       rtx x = gen_lowpart_common (mode, src);
2302       if (x)
2303         return x;
2304     }
2305
2306   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2307       || !int_mode_for_mode (mode).exists (&int_mode))
2308     return NULL_RTX;
2309
2310   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2311     return NULL_RTX;
2312   if (!MODES_TIEABLE_P (int_mode, mode))
2313     return NULL_RTX;
2314
2315   src = gen_lowpart (src_int_mode, src);
2316   src = convert_modes (int_mode, src_int_mode, src, true);
2317   src = gen_lowpart (mode, src);
2318   return src;
2319 }
2320 \f
2321 /* Add INC into TARGET.  */
2322
2323 void
2324 expand_inc (rtx target, rtx inc)
2325 {
2326   rtx value = expand_binop (GET_MODE (target), add_optab,
2327                             target, inc,
2328                             target, 0, OPTAB_LIB_WIDEN);
2329   if (value != target)
2330     emit_move_insn (target, value);
2331 }
2332
2333 /* Subtract DEC from TARGET.  */
2334
2335 void
2336 expand_dec (rtx target, rtx dec)
2337 {
2338   rtx value = expand_binop (GET_MODE (target), sub_optab,
2339                             target, dec,
2340                             target, 0, OPTAB_LIB_WIDEN);
2341   if (value != target)
2342     emit_move_insn (target, value);
2343 }
2344 \f
2345 /* Output a shift instruction for expression code CODE,
2346    with SHIFTED being the rtx for the value to shift,
2347    and AMOUNT the rtx for the amount to shift by.
2348    Store the result in the rtx TARGET, if that is convenient.
2349    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2350    Return the rtx for where the value is.
2351    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2352    in which case 0 is returned.  */
2353
2354 static rtx
2355 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2356                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2357 {
2358   rtx op1, temp = 0;
2359   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2360   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2361   optab lshift_optab = ashl_optab;
2362   optab rshift_arith_optab = ashr_optab;
2363   optab rshift_uns_optab = lshr_optab;
2364   optab lrotate_optab = rotl_optab;
2365   optab rrotate_optab = rotr_optab;
2366   machine_mode op1_mode;
2367   machine_mode scalar_mode = mode;
2368   int attempt;
2369   bool speed = optimize_insn_for_speed_p ();
2370
2371   if (VECTOR_MODE_P (mode))
2372     scalar_mode = GET_MODE_INNER (mode);
2373   op1 = amount;
2374   op1_mode = GET_MODE (op1);
2375
2376   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2377      shift amount is a vector, use the vector/vector shift patterns.  */
2378   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2379     {
2380       lshift_optab = vashl_optab;
2381       rshift_arith_optab = vashr_optab;
2382       rshift_uns_optab = vlshr_optab;
2383       lrotate_optab = vrotl_optab;
2384       rrotate_optab = vrotr_optab;
2385     }
2386
2387   /* Previously detected shift-counts computed by NEGATE_EXPR
2388      and shifted in the other direction; but that does not work
2389      on all machines.  */
2390
2391   if (SHIFT_COUNT_TRUNCATED)
2392     {
2393       if (CONST_INT_P (op1)
2394           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2395               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2396         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2397                        % GET_MODE_BITSIZE (scalar_mode));
2398       else if (GET_CODE (op1) == SUBREG
2399                && subreg_lowpart_p (op1)
2400                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2401                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2402         op1 = SUBREG_REG (op1);
2403     }
2404
2405   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2406      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2407      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2408      amount instead.  */
2409   if (rotate
2410       && CONST_INT_P (op1)
2411       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2412                    GET_MODE_BITSIZE (scalar_mode) - 1))
2413     {
2414       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2415       left = !left;
2416       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2417     }
2418
2419   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2420      Note that this is not the case for bigger values.  For instance a rotation
2421      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2422      0x04030201 (bswapsi).  */
2423   if (rotate
2424       && CONST_INT_P (op1)
2425       && INTVAL (op1) == BITS_PER_UNIT
2426       && GET_MODE_SIZE (scalar_mode) == 2
2427       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2428     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2429                                   unsignedp);
2430
2431   if (op1 == const0_rtx)
2432     return shifted;
2433
2434   /* Check whether its cheaper to implement a left shift by a constant
2435      bit count by a sequence of additions.  */
2436   if (code == LSHIFT_EXPR
2437       && CONST_INT_P (op1)
2438       && INTVAL (op1) > 0
2439       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2440       && INTVAL (op1) < MAX_BITS_PER_WORD
2441       && (shift_cost (speed, mode, INTVAL (op1))
2442           > INTVAL (op1) * add_cost (speed, mode))
2443       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2444     {
2445       int i;
2446       for (i = 0; i < INTVAL (op1); i++)
2447         {
2448           temp = force_reg (mode, shifted);
2449           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2450                                   unsignedp, OPTAB_LIB_WIDEN);
2451         }
2452       return shifted;
2453     }
2454
2455   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2456     {
2457       enum optab_methods methods;
2458
2459       if (attempt == 0)
2460         methods = OPTAB_DIRECT;
2461       else if (attempt == 1)
2462         methods = OPTAB_WIDEN;
2463       else
2464         methods = OPTAB_LIB_WIDEN;
2465
2466       if (rotate)
2467         {
2468           /* Widening does not work for rotation.  */
2469           if (methods == OPTAB_WIDEN)
2470             continue;
2471           else if (methods == OPTAB_LIB_WIDEN)
2472             {
2473               /* If we have been unable to open-code this by a rotation,
2474                  do it as the IOR of two shifts.  I.e., to rotate A
2475                  by N bits, compute
2476                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2477                  where C is the bitsize of A.
2478
2479                  It is theoretically possible that the target machine might
2480                  not be able to perform either shift and hence we would
2481                  be making two libcalls rather than just the one for the
2482                  shift (similarly if IOR could not be done).  We will allow
2483                  this extremely unlikely lossage to avoid complicating the
2484                  code below.  */
2485
2486               rtx subtarget = target == shifted ? 0 : target;
2487               rtx new_amount, other_amount;
2488               rtx temp1;
2489
2490               new_amount = op1;
2491               if (op1 == const0_rtx)
2492                 return shifted;
2493               else if (CONST_INT_P (op1))
2494                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2495                                         - INTVAL (op1));
2496               else
2497                 {
2498                   other_amount
2499                     = simplify_gen_unary (NEG, GET_MODE (op1),
2500                                           op1, GET_MODE (op1));
2501                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2502                   other_amount
2503                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2504                                            gen_int_mode (mask, GET_MODE (op1)));
2505                 }
2506
2507               shifted = force_reg (mode, shifted);
2508
2509               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2510                                      mode, shifted, new_amount, 0, 1);
2511               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2512                                       mode, shifted, other_amount,
2513                                       subtarget, 1);
2514               return expand_binop (mode, ior_optab, temp, temp1, target,
2515                                    unsignedp, methods);
2516             }
2517
2518           temp = expand_binop (mode,
2519                                left ? lrotate_optab : rrotate_optab,
2520                                shifted, op1, target, unsignedp, methods);
2521         }
2522       else if (unsignedp)
2523         temp = expand_binop (mode,
2524                              left ? lshift_optab : rshift_uns_optab,
2525                              shifted, op1, target, unsignedp, methods);
2526
2527       /* Do arithmetic shifts.
2528          Also, if we are going to widen the operand, we can just as well
2529          use an arithmetic right-shift instead of a logical one.  */
2530       if (temp == 0 && ! rotate
2531           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2532         {
2533           enum optab_methods methods1 = methods;
2534
2535           /* If trying to widen a log shift to an arithmetic shift,
2536              don't accept an arithmetic shift of the same size.  */
2537           if (unsignedp)
2538             methods1 = OPTAB_MUST_WIDEN;
2539
2540           /* Arithmetic shift */
2541
2542           temp = expand_binop (mode,
2543                                left ? lshift_optab : rshift_arith_optab,
2544                                shifted, op1, target, unsignedp, methods1);
2545         }
2546
2547       /* We used to try extzv here for logical right shifts, but that was
2548          only useful for one machine, the VAX, and caused poor code
2549          generation there for lshrdi3, so the code was deleted and a
2550          define_expand for lshrsi3 was added to vax.md.  */
2551     }
2552
2553   gcc_assert (temp != NULL_RTX || may_fail);
2554   return temp;
2555 }
2556
2557 /* Output a shift instruction for expression code CODE,
2558    with SHIFTED being the rtx for the value to shift,
2559    and AMOUNT the amount to shift by.
2560    Store the result in the rtx TARGET, if that is convenient.
2561    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2562    Return the rtx for where the value is.  */
2563
2564 rtx
2565 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2566               int amount, rtx target, int unsignedp)
2567 {
2568   return expand_shift_1 (code, mode,
2569                          shifted, GEN_INT (amount), target, unsignedp);
2570 }
2571
2572 /* Likewise, but return 0 if that cannot be done.  */
2573
2574 static rtx
2575 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2576                     int amount, rtx target, int unsignedp)
2577 {
2578   return expand_shift_1 (code, mode,
2579                          shifted, GEN_INT (amount), target, unsignedp, true);
2580 }
2581
2582 /* Output a shift instruction for expression code CODE,
2583    with SHIFTED being the rtx for the value to shift,
2584    and AMOUNT the tree for the amount to shift by.
2585    Store the result in the rtx TARGET, if that is convenient.
2586    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2587    Return the rtx for where the value is.  */
2588
2589 rtx
2590 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2591                        tree amount, rtx target, int unsignedp)
2592 {
2593   return expand_shift_1 (code, mode,
2594                          shifted, expand_normal (amount), target, unsignedp);
2595 }
2596
2597 \f
2598 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2599                         const struct mult_cost *, machine_mode mode);
2600 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2601                               const struct algorithm *, enum mult_variant);
2602 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2603 static rtx extract_high_half (scalar_int_mode, rtx);
2604 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2605 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2606                                        int, int);
2607 /* Compute and return the best algorithm for multiplying by T.
2608    The algorithm must cost less than cost_limit
2609    If retval.cost >= COST_LIMIT, no algorithm was found and all
2610    other field of the returned struct are undefined.
2611    MODE is the machine mode of the multiplication.  */
2612
2613 static void
2614 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2615             const struct mult_cost *cost_limit, machine_mode mode)
2616 {
2617   int m;
2618   struct algorithm *alg_in, *best_alg;
2619   struct mult_cost best_cost;
2620   struct mult_cost new_limit;
2621   int op_cost, op_latency;
2622   unsigned HOST_WIDE_INT orig_t = t;
2623   unsigned HOST_WIDE_INT q;
2624   int maxm, hash_index;
2625   bool cache_hit = false;
2626   enum alg_code cache_alg = alg_zero;
2627   bool speed = optimize_insn_for_speed_p ();
2628   scalar_int_mode imode;
2629   struct alg_hash_entry *entry_ptr;
2630
2631   /* Indicate that no algorithm is yet found.  If no algorithm
2632      is found, this value will be returned and indicate failure.  */
2633   alg_out->cost.cost = cost_limit->cost + 1;
2634   alg_out->cost.latency = cost_limit->latency + 1;
2635
2636   if (cost_limit->cost < 0
2637       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2638     return;
2639
2640   /* Be prepared for vector modes.  */
2641   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2642
2643   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2644
2645   /* Restrict the bits of "t" to the multiplication's mode.  */
2646   t &= GET_MODE_MASK (imode);
2647
2648   /* t == 1 can be done in zero cost.  */
2649   if (t == 1)
2650     {
2651       alg_out->ops = 1;
2652       alg_out->cost.cost = 0;
2653       alg_out->cost.latency = 0;
2654       alg_out->op[0] = alg_m;
2655       return;
2656     }
2657
2658   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2659      fail now.  */
2660   if (t == 0)
2661     {
2662       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2663         return;
2664       else
2665         {
2666           alg_out->ops = 1;
2667           alg_out->cost.cost = zero_cost (speed);
2668           alg_out->cost.latency = zero_cost (speed);
2669           alg_out->op[0] = alg_zero;
2670           return;
2671         }
2672     }
2673
2674   /* We'll be needing a couple extra algorithm structures now.  */
2675
2676   alg_in = XALLOCA (struct algorithm);
2677   best_alg = XALLOCA (struct algorithm);
2678   best_cost = *cost_limit;
2679
2680   /* Compute the hash index.  */
2681   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2682
2683   /* See if we already know what to do for T.  */
2684   entry_ptr = alg_hash_entry_ptr (hash_index);
2685   if (entry_ptr->t == t
2686       && entry_ptr->mode == mode
2687       && entry_ptr->speed == speed
2688       && entry_ptr->alg != alg_unknown)
2689     {
2690       cache_alg = entry_ptr->alg;
2691
2692       if (cache_alg == alg_impossible)
2693         {
2694           /* The cache tells us that it's impossible to synthesize
2695              multiplication by T within entry_ptr->cost.  */
2696           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2697             /* COST_LIMIT is at least as restrictive as the one
2698                recorded in the hash table, in which case we have no
2699                hope of synthesizing a multiplication.  Just
2700                return.  */
2701             return;
2702
2703           /* If we get here, COST_LIMIT is less restrictive than the
2704              one recorded in the hash table, so we may be able to
2705              synthesize a multiplication.  Proceed as if we didn't
2706              have the cache entry.  */
2707         }
2708       else
2709         {
2710           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2711             /* The cached algorithm shows that this multiplication
2712                requires more cost than COST_LIMIT.  Just return.  This
2713                way, we don't clobber this cache entry with
2714                alg_impossible but retain useful information.  */
2715             return;
2716
2717           cache_hit = true;
2718
2719           switch (cache_alg)
2720             {
2721             case alg_shift:
2722               goto do_alg_shift;
2723
2724             case alg_add_t_m2:
2725             case alg_sub_t_m2:
2726               goto do_alg_addsub_t_m2;
2727
2728             case alg_add_factor:
2729             case alg_sub_factor:
2730               goto do_alg_addsub_factor;
2731
2732             case alg_add_t2_m:
2733               goto do_alg_add_t2_m;
2734
2735             case alg_sub_t2_m:
2736               goto do_alg_sub_t2_m;
2737
2738             default:
2739               gcc_unreachable ();
2740             }
2741         }
2742     }
2743
2744   /* If we have a group of zero bits at the low-order part of T, try
2745      multiplying by the remaining bits and then doing a shift.  */
2746
2747   if ((t & 1) == 0)
2748     {
2749     do_alg_shift:
2750       m = ctz_or_zero (t); /* m = number of low zero bits */
2751       if (m < maxm)
2752         {
2753           q = t >> m;
2754           /* The function expand_shift will choose between a shift and
2755              a sequence of additions, so the observed cost is given as
2756              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2757           op_cost = m * add_cost (speed, mode);
2758           if (shift_cost (speed, mode, m) < op_cost)
2759             op_cost = shift_cost (speed, mode, m);
2760           new_limit.cost = best_cost.cost - op_cost;
2761           new_limit.latency = best_cost.latency - op_cost;
2762           synth_mult (alg_in, q, &new_limit, mode);
2763
2764           alg_in->cost.cost += op_cost;
2765           alg_in->cost.latency += op_cost;
2766           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2767             {
2768               best_cost = alg_in->cost;
2769               std::swap (alg_in, best_alg);
2770               best_alg->log[best_alg->ops] = m;
2771               best_alg->op[best_alg->ops] = alg_shift;
2772             }
2773
2774           /* See if treating ORIG_T as a signed number yields a better
2775              sequence.  Try this sequence only for a negative ORIG_T
2776              as it would be useless for a non-negative ORIG_T.  */
2777           if ((HOST_WIDE_INT) orig_t < 0)
2778             {
2779               /* Shift ORIG_T as follows because a right shift of a
2780                  negative-valued signed type is implementation
2781                  defined.  */
2782               q = ~(~orig_t >> m);
2783               /* The function expand_shift will choose between a shift
2784                  and a sequence of additions, so the observed cost is
2785                  given as MIN (m * add_cost(speed, mode),
2786                  shift_cost(speed, mode, m)).  */
2787               op_cost = m * add_cost (speed, mode);
2788               if (shift_cost (speed, mode, m) < op_cost)
2789                 op_cost = shift_cost (speed, mode, m);
2790               new_limit.cost = best_cost.cost - op_cost;
2791               new_limit.latency = best_cost.latency - op_cost;
2792               synth_mult (alg_in, q, &new_limit, mode);
2793
2794               alg_in->cost.cost += op_cost;
2795               alg_in->cost.latency += op_cost;
2796               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2797                 {
2798                   best_cost = alg_in->cost;
2799                   std::swap (alg_in, best_alg);
2800                   best_alg->log[best_alg->ops] = m;
2801                   best_alg->op[best_alg->ops] = alg_shift;
2802                 }
2803             }
2804         }
2805       if (cache_hit)
2806         goto done;
2807     }
2808
2809   /* If we have an odd number, add or subtract one.  */
2810   if ((t & 1) != 0)
2811     {
2812       unsigned HOST_WIDE_INT w;
2813
2814     do_alg_addsub_t_m2:
2815       for (w = 1; (w & t) != 0; w <<= 1)
2816         ;
2817       /* If T was -1, then W will be zero after the loop.  This is another
2818          case where T ends with ...111.  Handling this with (T + 1) and
2819          subtract 1 produces slightly better code and results in algorithm
2820          selection much faster than treating it like the ...0111 case
2821          below.  */
2822       if (w == 0
2823           || (w > 2
2824               /* Reject the case where t is 3.
2825                  Thus we prefer addition in that case.  */
2826               && t != 3))
2827         {
2828           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2829
2830           op_cost = add_cost (speed, mode);
2831           new_limit.cost = best_cost.cost - op_cost;
2832           new_limit.latency = best_cost.latency - op_cost;
2833           synth_mult (alg_in, t + 1, &new_limit, mode);
2834
2835           alg_in->cost.cost += op_cost;
2836           alg_in->cost.latency += op_cost;
2837           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2838             {
2839               best_cost = alg_in->cost;
2840               std::swap (alg_in, best_alg);
2841               best_alg->log[best_alg->ops] = 0;
2842               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2843             }
2844         }
2845       else
2846         {
2847           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2848
2849           op_cost = add_cost (speed, mode);
2850           new_limit.cost = best_cost.cost - op_cost;
2851           new_limit.latency = best_cost.latency - op_cost;
2852           synth_mult (alg_in, t - 1, &new_limit, mode);
2853
2854           alg_in->cost.cost += op_cost;
2855           alg_in->cost.latency += op_cost;
2856           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2857             {
2858               best_cost = alg_in->cost;
2859               std::swap (alg_in, best_alg);
2860               best_alg->log[best_alg->ops] = 0;
2861               best_alg->op[best_alg->ops] = alg_add_t_m2;
2862             }
2863         }
2864
2865       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2866          quickly with a - a * n for some appropriate constant n.  */
2867       m = exact_log2 (-orig_t + 1);
2868       if (m >= 0 && m < maxm)
2869         {
2870           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2871           /* If the target has a cheap shift-and-subtract insn use
2872              that in preference to a shift insn followed by a sub insn.
2873              Assume that the shift-and-sub is "atomic" with a latency
2874              equal to it's cost, otherwise assume that on superscalar
2875              hardware the shift may be executed concurrently with the
2876              earlier steps in the algorithm.  */
2877           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2878             {
2879               op_cost = shiftsub1_cost (speed, mode, m);
2880               op_latency = op_cost;
2881             }
2882           else
2883             op_latency = add_cost (speed, mode);
2884
2885           new_limit.cost = best_cost.cost - op_cost;
2886           new_limit.latency = best_cost.latency - op_latency;
2887           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2888                       &new_limit, mode);
2889
2890           alg_in->cost.cost += op_cost;
2891           alg_in->cost.latency += op_latency;
2892           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2893             {
2894               best_cost = alg_in->cost;
2895               std::swap (alg_in, best_alg);
2896               best_alg->log[best_alg->ops] = m;
2897               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2898             }
2899         }
2900
2901       if (cache_hit)
2902         goto done;
2903     }
2904
2905   /* Look for factors of t of the form
2906      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2907      If we find such a factor, we can multiply by t using an algorithm that
2908      multiplies by q, shift the result by m and add/subtract it to itself.
2909
2910      We search for large factors first and loop down, even if large factors
2911      are less probable than small; if we find a large factor we will find a
2912      good sequence quickly, and therefore be able to prune (by decreasing
2913      COST_LIMIT) the search.  */
2914
2915  do_alg_addsub_factor:
2916   for (m = floor_log2 (t - 1); m >= 2; m--)
2917     {
2918       unsigned HOST_WIDE_INT d;
2919
2920       d = (HOST_WIDE_INT_1U << m) + 1;
2921       if (t % d == 0 && t > d && m < maxm
2922           && (!cache_hit || cache_alg == alg_add_factor))
2923         {
2924           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2925           if (shiftadd_cost (speed, mode, m) <= op_cost)
2926             op_cost = shiftadd_cost (speed, mode, m);
2927
2928           op_latency = op_cost;
2929
2930
2931           new_limit.cost = best_cost.cost - op_cost;
2932           new_limit.latency = best_cost.latency - op_latency;
2933           synth_mult (alg_in, t / d, &new_limit, mode);
2934
2935           alg_in->cost.cost += op_cost;
2936           alg_in->cost.latency += op_latency;
2937           if (alg_in->cost.latency < op_cost)
2938             alg_in->cost.latency = op_cost;
2939           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2940             {
2941               best_cost = alg_in->cost;
2942               std::swap (alg_in, best_alg);
2943               best_alg->log[best_alg->ops] = m;
2944               best_alg->op[best_alg->ops] = alg_add_factor;
2945             }
2946           /* Other factors will have been taken care of in the recursion.  */
2947           break;
2948         }
2949
2950       d = (HOST_WIDE_INT_1U << m) - 1;
2951       if (t % d == 0 && t > d && m < maxm
2952           && (!cache_hit || cache_alg == alg_sub_factor))
2953         {
2954           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2955           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2956             op_cost = shiftsub0_cost (speed, mode, m);
2957
2958           op_latency = op_cost;
2959
2960           new_limit.cost = best_cost.cost - op_cost;
2961           new_limit.latency = best_cost.latency - op_latency;
2962           synth_mult (alg_in, t / d, &new_limit, mode);
2963
2964           alg_in->cost.cost += op_cost;
2965           alg_in->cost.latency += op_latency;
2966           if (alg_in->cost.latency < op_cost)
2967             alg_in->cost.latency = op_cost;
2968           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2969             {
2970               best_cost = alg_in->cost;
2971               std::swap (alg_in, best_alg);
2972               best_alg->log[best_alg->ops] = m;
2973               best_alg->op[best_alg->ops] = alg_sub_factor;
2974             }
2975           break;
2976         }
2977     }
2978   if (cache_hit)
2979     goto done;
2980
2981   /* Try shift-and-add (load effective address) instructions,
2982      i.e. do a*3, a*5, a*9.  */
2983   if ((t & 1) != 0)
2984     {
2985     do_alg_add_t2_m:
2986       q = t - 1;
2987       m = ctz_hwi (q);
2988       if (q && m < maxm)
2989         {
2990           op_cost = shiftadd_cost (speed, mode, m);
2991           new_limit.cost = best_cost.cost - op_cost;
2992           new_limit.latency = best_cost.latency - op_cost;
2993           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2994
2995           alg_in->cost.cost += op_cost;
2996           alg_in->cost.latency += op_cost;
2997           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2998             {
2999               best_cost = alg_in->cost;
3000               std::swap (alg_in, best_alg);
3001               best_alg->log[best_alg->ops] = m;
3002               best_alg->op[best_alg->ops] = alg_add_t2_m;
3003             }
3004         }
3005       if (cache_hit)
3006         goto done;
3007
3008     do_alg_sub_t2_m:
3009       q = t + 1;
3010       m = ctz_hwi (q);
3011       if (q && m < maxm)
3012         {
3013           op_cost = shiftsub0_cost (speed, mode, m);
3014           new_limit.cost = best_cost.cost - op_cost;
3015           new_limit.latency = best_cost.latency - op_cost;
3016           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3017
3018           alg_in->cost.cost += op_cost;
3019           alg_in->cost.latency += op_cost;
3020           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3021             {
3022               best_cost = alg_in->cost;
3023               std::swap (alg_in, best_alg);
3024               best_alg->log[best_alg->ops] = m;
3025               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3026             }
3027         }
3028       if (cache_hit)
3029         goto done;
3030     }
3031
3032  done:
3033   /* If best_cost has not decreased, we have not found any algorithm.  */
3034   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3035     {
3036       /* We failed to find an algorithm.  Record alg_impossible for
3037          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3038          we are asked to find an algorithm for T within the same or
3039          lower COST_LIMIT, we can immediately return to the
3040          caller.  */
3041       entry_ptr->t = t;
3042       entry_ptr->mode = mode;
3043       entry_ptr->speed = speed;
3044       entry_ptr->alg = alg_impossible;
3045       entry_ptr->cost = *cost_limit;
3046       return;
3047     }
3048
3049   /* Cache the result.  */
3050   if (!cache_hit)
3051     {
3052       entry_ptr->t = t;
3053       entry_ptr->mode = mode;
3054       entry_ptr->speed = speed;
3055       entry_ptr->alg = best_alg->op[best_alg->ops];
3056       entry_ptr->cost.cost = best_cost.cost;
3057       entry_ptr->cost.latency = best_cost.latency;
3058     }
3059
3060   /* If we are getting a too long sequence for `struct algorithm'
3061      to record, make this search fail.  */
3062   if (best_alg->ops == MAX_BITS_PER_WORD)
3063     return;
3064
3065   /* Copy the algorithm from temporary space to the space at alg_out.
3066      We avoid using structure assignment because the majority of
3067      best_alg is normally undefined, and this is a critical function.  */
3068   alg_out->ops = best_alg->ops + 1;
3069   alg_out->cost = best_cost;
3070   memcpy (alg_out->op, best_alg->op,
3071           alg_out->ops * sizeof *alg_out->op);
3072   memcpy (alg_out->log, best_alg->log,
3073           alg_out->ops * sizeof *alg_out->log);
3074 }
3075 \f
3076 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3077    Try three variations:
3078
3079        - a shift/add sequence based on VAL itself
3080        - a shift/add sequence based on -VAL, followed by a negation
3081        - a shift/add sequence based on VAL - 1, followed by an addition.
3082
3083    Return true if the cheapest of these cost less than MULT_COST,
3084    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3085
3086 bool
3087 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3088                      struct algorithm *alg, enum mult_variant *variant,
3089                      int mult_cost)
3090 {
3091   struct algorithm alg2;
3092   struct mult_cost limit;
3093   int op_cost;
3094   bool speed = optimize_insn_for_speed_p ();
3095
3096   /* Fail quickly for impossible bounds.  */
3097   if (mult_cost < 0)
3098     return false;
3099
3100   /* Ensure that mult_cost provides a reasonable upper bound.
3101      Any constant multiplication can be performed with less
3102      than 2 * bits additions.  */
3103   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3104   if (mult_cost > op_cost)
3105     mult_cost = op_cost;
3106
3107   *variant = basic_variant;
3108   limit.cost = mult_cost;
3109   limit.latency = mult_cost;
3110   synth_mult (alg, val, &limit, mode);
3111
3112   /* This works only if the inverted value actually fits in an
3113      `unsigned int' */
3114   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3115     {
3116       op_cost = neg_cost (speed, mode);
3117       if (MULT_COST_LESS (&alg->cost, mult_cost))
3118         {
3119           limit.cost = alg->cost.cost - op_cost;
3120           limit.latency = alg->cost.latency - op_cost;
3121         }
3122       else
3123         {
3124           limit.cost = mult_cost - op_cost;
3125           limit.latency = mult_cost - op_cost;
3126         }
3127
3128       synth_mult (&alg2, -val, &limit, mode);
3129       alg2.cost.cost += op_cost;
3130       alg2.cost.latency += op_cost;
3131       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3132         *alg = alg2, *variant = negate_variant;
3133     }
3134
3135   /* This proves very useful for division-by-constant.  */
3136   op_cost = add_cost (speed, mode);
3137   if (MULT_COST_LESS (&alg->cost, mult_cost))
3138     {
3139       limit.cost = alg->cost.cost - op_cost;
3140       limit.latency = alg->cost.latency - op_cost;
3141     }
3142   else
3143     {
3144       limit.cost = mult_cost - op_cost;
3145       limit.latency = mult_cost - op_cost;
3146     }
3147
3148   synth_mult (&alg2, val - 1, &limit, mode);
3149   alg2.cost.cost += op_cost;
3150   alg2.cost.latency += op_cost;
3151   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3152     *alg = alg2, *variant = add_variant;
3153
3154   return MULT_COST_LESS (&alg->cost, mult_cost);
3155 }
3156
3157 /* A subroutine of expand_mult, used for constant multiplications.
3158    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3159    convenient.  Use the shift/add sequence described by ALG and apply
3160    the final fixup specified by VARIANT.  */
3161
3162 static rtx
3163 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3164                    rtx target, const struct algorithm *alg,
3165                    enum mult_variant variant)
3166 {
3167   unsigned HOST_WIDE_INT val_so_far;
3168   rtx_insn *insn;
3169   rtx accum, tem;
3170   int opno;
3171   machine_mode nmode;
3172
3173   /* Avoid referencing memory over and over and invalid sharing
3174      on SUBREGs.  */
3175   op0 = force_reg (mode, op0);
3176
3177   /* ACCUM starts out either as OP0 or as a zero, depending on
3178      the first operation.  */
3179
3180   if (alg->op[0] == alg_zero)
3181     {
3182       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3183       val_so_far = 0;
3184     }
3185   else if (alg->op[0] == alg_m)
3186     {
3187       accum = copy_to_mode_reg (mode, op0);
3188       val_so_far = 1;
3189     }
3190   else
3191     gcc_unreachable ();
3192
3193   for (opno = 1; opno < alg->ops; opno++)
3194     {
3195       int log = alg->log[opno];
3196       rtx shift_subtarget = optimize ? 0 : accum;
3197       rtx add_target
3198         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3199            && !optimize)
3200           ? target : 0;
3201       rtx accum_target = optimize ? 0 : accum;
3202       rtx accum_inner;
3203
3204       switch (alg->op[opno])
3205         {
3206         case alg_shift:
3207           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3208           /* REG_EQUAL note will be attached to the following insn.  */
3209           emit_move_insn (accum, tem);
3210           val_so_far <<= log;
3211           break;
3212
3213         case alg_add_t_m2:
3214           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3215           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3216                                  add_target ? add_target : accum_target);
3217           val_so_far += HOST_WIDE_INT_1U << log;
3218           break;
3219
3220         case alg_sub_t_m2:
3221           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3222           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3223                                  add_target ? add_target : accum_target);
3224           val_so_far -= HOST_WIDE_INT_1U << log;
3225           break;
3226
3227         case alg_add_t2_m:
3228           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3229                                 log, shift_subtarget, 0);
3230           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3231                                  add_target ? add_target : accum_target);
3232           val_so_far = (val_so_far << log) + 1;
3233           break;
3234
3235         case alg_sub_t2_m:
3236           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3237                                 log, shift_subtarget, 0);
3238           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3239                                  add_target ? add_target : accum_target);
3240           val_so_far = (val_so_far << log) - 1;
3241           break;
3242
3243         case alg_add_factor:
3244           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3245           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3246                                  add_target ? add_target : accum_target);
3247           val_so_far += val_so_far << log;
3248           break;
3249
3250         case alg_sub_factor:
3251           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3252           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3253                                  (add_target
3254                                   ? add_target : (optimize ? 0 : tem)));
3255           val_so_far = (val_so_far << log) - val_so_far;
3256           break;
3257
3258         default:
3259           gcc_unreachable ();
3260         }
3261
3262       if (SCALAR_INT_MODE_P (mode))
3263         {
3264           /* Write a REG_EQUAL note on the last insn so that we can cse
3265              multiplication sequences.  Note that if ACCUM is a SUBREG,
3266              we've set the inner register and must properly indicate that.  */
3267           tem = op0, nmode = mode;
3268           accum_inner = accum;
3269           if (GET_CODE (accum) == SUBREG)
3270             {
3271               accum_inner = SUBREG_REG (accum);
3272               nmode = GET_MODE (accum_inner);
3273               tem = gen_lowpart (nmode, op0);
3274             }
3275
3276           insn = get_last_insn ();
3277           set_dst_reg_note (insn, REG_EQUAL,
3278                             gen_rtx_MULT (nmode, tem,
3279                                           gen_int_mode (val_so_far, nmode)),
3280                             accum_inner);
3281         }
3282     }
3283
3284   if (variant == negate_variant)
3285     {
3286       val_so_far = -val_so_far;
3287       accum = expand_unop (mode, neg_optab, accum, target, 0);
3288     }
3289   else if (variant == add_variant)
3290     {
3291       val_so_far = val_so_far + 1;
3292       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3293     }
3294
3295   /* Compare only the bits of val and val_so_far that are significant
3296      in the result mode, to avoid sign-/zero-extension confusion.  */
3297   nmode = GET_MODE_INNER (mode);
3298   val &= GET_MODE_MASK (nmode);
3299   val_so_far &= GET_MODE_MASK (nmode);
3300   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3301
3302   return accum;
3303 }
3304
3305 /* Perform a multiplication and return an rtx for the result.
3306    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3307    TARGET is a suggestion for where to store the result (an rtx).
3308
3309    We check specially for a constant integer as OP1.
3310    If you want this check for OP0 as well, then before calling
3311    you should swap the two operands if OP0 would be constant.  */
3312
3313 rtx
3314 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3315              int unsignedp)
3316 {
3317   enum mult_variant variant;
3318   struct algorithm algorithm;
3319   rtx scalar_op1;
3320   int max_cost;
3321   bool speed = optimize_insn_for_speed_p ();
3322   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3323
3324   if (CONSTANT_P (op0))
3325     std::swap (op0, op1);
3326
3327   /* For vectors, there are several simplifications that can be made if
3328      all elements of the vector constant are identical.  */
3329   scalar_op1 = unwrap_const_vec_duplicate (op1);
3330
3331   if (INTEGRAL_MODE_P (mode))
3332     {
3333       rtx fake_reg;
3334       HOST_WIDE_INT coeff;
3335       bool is_neg;
3336       int mode_bitsize;
3337
3338       if (op1 == CONST0_RTX (mode))
3339         return op1;
3340       if (op1 == CONST1_RTX (mode))
3341         return op0;
3342       if (op1 == CONSTM1_RTX (mode))
3343         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3344                             op0, target, 0);
3345
3346       if (do_trapv)
3347         goto skip_synth;
3348
3349       /* If mode is integer vector mode, check if the backend supports
3350          vector lshift (by scalar or vector) at all.  If not, we can't use
3351          synthetized multiply.  */
3352       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3353           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3354           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3355         goto skip_synth;
3356
3357       /* These are the operations that are potentially turned into
3358          a sequence of shifts and additions.  */
3359       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3360
3361       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3362          less than or equal in size to `unsigned int' this doesn't matter.
3363          If the mode is larger than `unsigned int', then synth_mult works
3364          only if the constant value exactly fits in an `unsigned int' without
3365          any truncation.  This means that multiplying by negative values does
3366          not work; results are off by 2^32 on a 32 bit machine.  */
3367       if (CONST_INT_P (scalar_op1))
3368         {
3369           coeff = INTVAL (scalar_op1);
3370           is_neg = coeff < 0;
3371         }
3372 #if TARGET_SUPPORTS_WIDE_INT
3373       else if (CONST_WIDE_INT_P (scalar_op1))
3374 #else
3375       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3376 #endif
3377         {
3378           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3379           /* Perfect power of 2 (other than 1, which is handled above).  */
3380           if (shift > 0)
3381             return expand_shift (LSHIFT_EXPR, mode, op0,
3382                                  shift, target, unsignedp);
3383           else
3384             goto skip_synth;
3385         }
3386       else
3387         goto skip_synth;
3388
3389       /* We used to test optimize here, on the grounds that it's better to
3390          produce a smaller program when -O is not used.  But this causes
3391          such a terrible slowdown sometimes that it seems better to always
3392          use synth_mult.  */
3393
3394       /* Special case powers of two.  */
3395       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3396           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3397         return expand_shift (LSHIFT_EXPR, mode, op0,
3398                              floor_log2 (coeff), target, unsignedp);
3399
3400       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3401
3402       /* Attempt to handle multiplication of DImode values by negative
3403          coefficients, by performing the multiplication by a positive
3404          multiplier and then inverting the result.  */
3405       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3406         {
3407           /* Its safe to use -coeff even for INT_MIN, as the
3408              result is interpreted as an unsigned coefficient.
3409              Exclude cost of op0 from max_cost to match the cost
3410              calculation of the synth_mult.  */
3411           coeff = -(unsigned HOST_WIDE_INT) coeff;
3412           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3413                                     mode, speed)
3414                       - neg_cost (speed, mode));
3415           if (max_cost <= 0)
3416             goto skip_synth;
3417
3418           /* Special case powers of two.  */
3419           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3420             {
3421               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3422                                        floor_log2 (coeff), target, unsignedp);
3423               return expand_unop (mode, neg_optab, temp, target, 0);
3424             }
3425
3426           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3427                                    max_cost))
3428             {
3429               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3430                                             &algorithm, variant);
3431               return expand_unop (mode, neg_optab, temp, target, 0);
3432             }
3433           goto skip_synth;
3434         }
3435
3436       /* Exclude cost of op0 from max_cost to match the cost
3437          calculation of the synth_mult.  */
3438       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3439       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3440         return expand_mult_const (mode, op0, coeff, target,
3441                                   &algorithm, variant);
3442     }
3443  skip_synth:
3444
3445   /* Expand x*2.0 as x+x.  */
3446   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3447       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3448     {
3449       op0 = force_reg (GET_MODE (op0), op0);
3450       return expand_binop (mode, add_optab, op0, op0,
3451                            target, unsignedp, OPTAB_LIB_WIDEN);
3452     }
3453
3454   /* This used to use umul_optab if unsigned, but for non-widening multiply
3455      there is no difference between signed and unsigned.  */
3456   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3457                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3458   gcc_assert (op0);
3459   return op0;
3460 }
3461
3462 /* Return a cost estimate for multiplying a register by the given
3463    COEFFicient in the given MODE and SPEED.  */
3464
3465 int
3466 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3467 {
3468   int max_cost;
3469   struct algorithm algorithm;
3470   enum mult_variant variant;
3471
3472   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3473   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3474                            mode, speed);
3475   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3476     return algorithm.cost.cost;
3477   else
3478     return max_cost;
3479 }
3480
3481 /* Perform a widening multiplication and return an rtx for the result.
3482    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3483    TARGET is a suggestion for where to store the result (an rtx).
3484    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3485    or smul_widen_optab.
3486
3487    We check specially for a constant integer as OP1, comparing the
3488    cost of a widening multiply against the cost of a sequence of shifts
3489    and adds.  */
3490
3491 rtx
3492 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3493                       int unsignedp, optab this_optab)
3494 {
3495   bool speed = optimize_insn_for_speed_p ();
3496   rtx cop1;
3497
3498   if (CONST_INT_P (op1)
3499       && GET_MODE (op0) != VOIDmode
3500       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3501                                 this_optab == umul_widen_optab))
3502       && CONST_INT_P (cop1)
3503       && (INTVAL (cop1) >= 0
3504           || HWI_COMPUTABLE_MODE_P (mode)))
3505     {
3506       HOST_WIDE_INT coeff = INTVAL (cop1);
3507       int max_cost;
3508       enum mult_variant variant;
3509       struct algorithm algorithm;
3510
3511       if (coeff == 0)
3512         return CONST0_RTX (mode);
3513
3514       /* Special case powers of two.  */
3515       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3516         {
3517           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3518           return expand_shift (LSHIFT_EXPR, mode, op0,
3519                                floor_log2 (coeff), target, unsignedp);
3520         }
3521
3522       /* Exclude cost of op0 from max_cost to match the cost
3523          calculation of the synth_mult.  */
3524       max_cost = mul_widen_cost (speed, mode);
3525       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3526                                max_cost))
3527         {
3528           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3529           return expand_mult_const (mode, op0, coeff, target,
3530                                     &algorithm, variant);
3531         }
3532     }
3533   return expand_binop (mode, this_optab, op0, op1, target,
3534                        unsignedp, OPTAB_LIB_WIDEN);
3535 }
3536 \f
3537 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3538    replace division by D, and put the least significant N bits of the result
3539    in *MULTIPLIER_PTR and return the most significant bit.
3540
3541    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3542    needed precision is in PRECISION (should be <= N).
3543
3544    PRECISION should be as small as possible so this function can choose
3545    multiplier more freely.
3546
3547    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3548    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3549
3550    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3551    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3552
3553 unsigned HOST_WIDE_INT
3554 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3555                    unsigned HOST_WIDE_INT *multiplier_ptr,
3556                    int *post_shift_ptr, int *lgup_ptr)
3557 {
3558   int lgup, post_shift;
3559   int pow, pow2;
3560
3561   /* lgup = ceil(log2(divisor)); */
3562   lgup = ceil_log2 (d);
3563
3564   gcc_assert (lgup <= n);
3565
3566   pow = n + lgup;
3567   pow2 = n + lgup - precision;
3568
3569   /* mlow = 2^(N + lgup)/d */
3570   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3571   wide_int mlow = wi::udiv_trunc (val, d);
3572
3573   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3574   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3575   wide_int mhigh = wi::udiv_trunc (val, d);
3576
3577   /* If precision == N, then mlow, mhigh exceed 2^N
3578      (but they do not exceed 2^(N+1)).  */
3579
3580   /* Reduce to lowest terms.  */
3581   for (post_shift = lgup; post_shift > 0; post_shift--)
3582     {
3583       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3584                                                        HOST_BITS_PER_WIDE_INT);
3585       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3586                                                        HOST_BITS_PER_WIDE_INT);
3587       if (ml_lo >= mh_lo)
3588         break;
3589
3590       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3591       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3592     }
3593
3594   *post_shift_ptr = post_shift;
3595   *lgup_ptr = lgup;
3596   if (n < HOST_BITS_PER_WIDE_INT)
3597     {
3598       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3599       *multiplier_ptr = mhigh.to_uhwi () & mask;
3600       return mhigh.to_uhwi () >= mask;
3601     }
3602   else
3603     {
3604       *multiplier_ptr = mhigh.to_uhwi ();
3605       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3606     }
3607 }
3608
3609 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3610    congruent to 1 (mod 2**N).  */
3611
3612 static unsigned HOST_WIDE_INT
3613 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3614 {
3615   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3616
3617   /* The algorithm notes that the choice y = x satisfies
3618      x*y == 1 mod 2^3, since x is assumed odd.
3619      Each iteration doubles the number of bits of significance in y.  */
3620
3621   unsigned HOST_WIDE_INT mask;
3622   unsigned HOST_WIDE_INT y = x;
3623   int nbit = 3;
3624
3625   mask = (n == HOST_BITS_PER_WIDE_INT
3626           ? HOST_WIDE_INT_M1U
3627           : (HOST_WIDE_INT_1U << n) - 1);
3628
3629   while (nbit < n)
3630     {
3631       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3632       nbit *= 2;
3633     }
3634   return y;
3635 }
3636
3637 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3638    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3639    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3640    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3641    become signed.
3642
3643    The result is put in TARGET if that is convenient.
3644
3645    MODE is the mode of operation.  */
3646
3647 rtx
3648 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3649                              rtx op1, rtx target, int unsignedp)
3650 {
3651   rtx tem;
3652   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3653
3654   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3655                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3656   tem = expand_and (mode, tem, op1, NULL_RTX);
3657   adj_operand
3658     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3659                      adj_operand);
3660
3661   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3662                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3663   tem = expand_and (mode, tem, op0, NULL_RTX);
3664   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3665                           target);
3666
3667   return target;
3668 }
3669
3670 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3671
3672 static rtx
3673 extract_high_half (scalar_int_mode mode, rtx op)
3674 {
3675   machine_mode wider_mode;
3676
3677   if (mode == word_mode)
3678     return gen_highpart (mode, op);
3679
3680   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3681
3682   wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3683   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3684                      GET_MODE_BITSIZE (mode), 0, 1);
3685   return convert_modes (mode, wider_mode, op, 0);
3686 }
3687
3688 /* Like expmed_mult_highpart, but only consider using a multiplication
3689    optab.  OP1 is an rtx for the constant operand.  */
3690
3691 static rtx
3692 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3693                             rtx target, int unsignedp, int max_cost)
3694 {
3695   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3696   machine_mode wider_mode;
3697   optab moptab;
3698   rtx tem;
3699   int size;
3700   bool speed = optimize_insn_for_speed_p ();
3701
3702   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3703
3704   wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3705   size = GET_MODE_BITSIZE (mode);
3706
3707   /* Firstly, try using a multiplication insn that only generates the needed
3708      high part of the product, and in the sign flavor of unsignedp.  */
3709   if (mul_highpart_cost (speed, mode) < max_cost)
3710     {
3711       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3712       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3713                           unsignedp, OPTAB_DIRECT);
3714       if (tem)
3715         return tem;
3716     }
3717
3718   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3719      Need to adjust the result after the multiplication.  */
3720   if (size - 1 < BITS_PER_WORD
3721       && (mul_highpart_cost (speed, mode)
3722           + 2 * shift_cost (speed, mode, size-1)
3723           + 4 * add_cost (speed, mode) < max_cost))
3724     {
3725       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3726       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3727                           unsignedp, OPTAB_DIRECT);
3728       if (tem)
3729         /* We used the wrong signedness.  Adjust the result.  */
3730         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3731                                             tem, unsignedp);
3732     }
3733
3734   /* Try widening multiplication.  */
3735   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3736   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3737       && mul_widen_cost (speed, wider_mode) < max_cost)
3738     {
3739       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3740                           unsignedp, OPTAB_WIDEN);
3741       if (tem)
3742         return extract_high_half (mode, tem);
3743     }
3744
3745   /* Try widening the mode and perform a non-widening multiplication.  */
3746   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3747       && size - 1 < BITS_PER_WORD
3748       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3749           < max_cost))
3750     {
3751       rtx_insn *insns;
3752       rtx wop0, wop1;
3753
3754       /* We need to widen the operands, for example to ensure the
3755          constant multiplier is correctly sign or zero extended.
3756          Use a sequence to clean-up any instructions emitted by
3757          the conversions if things don't work out.  */
3758       start_sequence ();
3759       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3760       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3761       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3762                           unsignedp, OPTAB_WIDEN);
3763       insns = get_insns ();
3764       end_sequence ();
3765
3766       if (tem)
3767         {
3768           emit_insn (insns);
3769           return extract_high_half (mode, tem);
3770         }
3771     }
3772
3773   /* Try widening multiplication of opposite signedness, and adjust.  */
3774   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3775   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3776       && size - 1 < BITS_PER_WORD
3777       && (mul_widen_cost (speed, wider_mode)
3778           + 2 * shift_cost (speed, mode, size-1)
3779           + 4 * add_cost (speed, mode) < max_cost))
3780     {
3781       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3782                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3783       if (tem != 0)
3784         {
3785           tem = extract_high_half (mode, tem);
3786           /* We used the wrong signedness.  Adjust the result.  */
3787           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3788                                               target, unsignedp);
3789         }
3790     }
3791
3792   return 0;
3793 }
3794
3795 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3796    putting the high half of the result in TARGET if that is convenient,
3797    and return where the result is.  If the operation can not be performed,
3798    0 is returned.
3799
3800    MODE is the mode of operation and result.
3801
3802    UNSIGNEDP nonzero means unsigned multiply.
3803
3804    MAX_COST is the total allowed cost for the expanded RTL.  */
3805
3806 static rtx
3807 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3808                       rtx target, int unsignedp, int max_cost)
3809 {
3810   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3811   unsigned HOST_WIDE_INT cnst1;
3812   int extra_cost;
3813   bool sign_adjust = false;
3814   enum mult_variant variant;
3815   struct algorithm alg;
3816   rtx tem;
3817   bool speed = optimize_insn_for_speed_p ();
3818
3819   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3820   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3821   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3822
3823   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3824
3825   /* We can't optimize modes wider than BITS_PER_WORD.
3826      ??? We might be able to perform double-word arithmetic if
3827      mode == word_mode, however all the cost calculations in
3828      synth_mult etc. assume single-word operations.  */
3829   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3830     return expmed_mult_highpart_optab (mode, op0, op1, target,
3831                                        unsignedp, max_cost);
3832
3833   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3834
3835   /* Check whether we try to multiply by a negative constant.  */
3836   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3837     {
3838       sign_adjust = true;
3839       extra_cost += add_cost (speed, mode);
3840     }
3841
3842   /* See whether shift/add multiplication is cheap enough.  */
3843   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3844                            max_cost - extra_cost))
3845     {
3846       /* See whether the specialized multiplication optabs are
3847          cheaper than the shift/add version.  */
3848       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3849                                         alg.cost.cost + extra_cost);
3850       if (tem)
3851         return tem;
3852
3853       tem = convert_to_mode (wider_mode, op0, unsignedp);
3854       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3855       tem = extract_high_half (mode, tem);
3856
3857       /* Adjust result for signedness.  */
3858       if (sign_adjust)
3859         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3860
3861       return tem;
3862     }
3863   return expmed_mult_highpart_optab (mode, op0, op1, target,
3864                                      unsignedp, max_cost);
3865 }
3866
3867
3868 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3869
3870 static rtx
3871 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3872 {
3873   rtx result, temp, shift;
3874   rtx_code_label *label;
3875   int logd;
3876   int prec = GET_MODE_PRECISION (mode);
3877
3878   logd = floor_log2 (d);
3879   result = gen_reg_rtx (mode);
3880
3881   /* Avoid conditional branches when they're expensive.  */
3882   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3883       && optimize_insn_for_speed_p ())
3884     {
3885       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3886                                       mode, 0, -1);
3887       if (signmask)
3888         {
3889           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3890           signmask = force_reg (mode, signmask);
3891           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3892
3893           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3894              which instruction sequence to use.  If logical right shifts
3895              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3896              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3897
3898           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3899           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3900               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3901                   > COSTS_N_INSNS (2)))
3902             {
3903               temp = expand_binop (mode, xor_optab, op0, signmask,
3904                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3905               temp = expand_binop (mode, sub_optab, temp, signmask,
3906                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3907               temp = expand_binop (mode, and_optab, temp,
3908                                    gen_int_mode (masklow, mode),
3909                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3910               temp = expand_binop (mode, xor_optab, temp, signmask,
3911                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3912               temp = expand_binop (mode, sub_optab, temp, signmask,
3913                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3914             }
3915           else
3916             {
3917               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3918                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3919               signmask = force_reg (mode, signmask);
3920
3921               temp = expand_binop (mode, add_optab, op0, signmask,
3922                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3923               temp = expand_binop (mode, and_optab, temp,
3924                                    gen_int_mode (masklow, mode),
3925                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3926               temp = expand_binop (mode, sub_optab, temp, signmask,
3927                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3928             }
3929           return temp;
3930         }
3931     }
3932
3933   /* Mask contains the mode's signbit and the significant bits of the
3934      modulus.  By including the signbit in the operation, many targets
3935      can avoid an explicit compare operation in the following comparison
3936      against zero.  */
3937   wide_int mask = wi::mask (logd, false, prec);
3938   mask = wi::set_bit (mask, prec - 1);
3939
3940   temp = expand_binop (mode, and_optab, op0,
3941                        immed_wide_int_const (mask, mode),
3942                        result, 1, OPTAB_LIB_WIDEN);
3943   if (temp != result)
3944     emit_move_insn (result, temp);
3945
3946   label = gen_label_rtx ();
3947   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3948
3949   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3950                        0, OPTAB_LIB_WIDEN);
3951
3952   mask = wi::mask (logd, true, prec);
3953   temp = expand_binop (mode, ior_optab, temp,
3954                        immed_wide_int_const (mask, mode),
3955                        result, 1, OPTAB_LIB_WIDEN);
3956   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3957                        0, OPTAB_LIB_WIDEN);
3958   if (temp != result)
3959     emit_move_insn (result, temp);
3960   emit_label (label);
3961   return result;
3962 }
3963
3964 /* Expand signed division of OP0 by a power of two D in mode MODE.
3965    This routine is only called for positive values of D.  */
3966
3967 static rtx
3968 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3969 {
3970   rtx temp;
3971   rtx_code_label *label;
3972   int logd;
3973
3974   logd = floor_log2 (d);
3975
3976   if (d == 2
3977       && BRANCH_COST (optimize_insn_for_speed_p (),
3978                       false) >= 1)
3979     {
3980       temp = gen_reg_rtx (mode);
3981       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3982       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3983                            0, OPTAB_LIB_WIDEN);
3984       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3985     }
3986
3987   if (HAVE_conditional_move
3988       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3989     {
3990       rtx temp2;
3991
3992       start_sequence ();
3993       temp2 = copy_to_mode_reg (mode, op0);
3994       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3995                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3996       temp = force_reg (mode, temp);
3997
3998       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3999       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4000                                      mode, temp, temp2, mode, 0);
4001       if (temp2)
4002         {
4003           rtx_insn *seq = get_insns ();
4004           end_sequence ();
4005           emit_insn (seq);
4006           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4007         }
4008       end_sequence ();
4009     }
4010
4011   if (BRANCH_COST (optimize_insn_for_speed_p (),
4012                    false) >= 2)
4013     {
4014       int ushift = GET_MODE_BITSIZE (mode) - logd;
4015
4016       temp = gen_reg_rtx (mode);
4017       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4018       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4019           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4020              > COSTS_N_INSNS (1))
4021         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4022                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
4023       else
4024         temp = expand_shift (RSHIFT_EXPR, mode, temp,
4025                              ushift, NULL_RTX, 1);
4026       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4027                            0, OPTAB_LIB_WIDEN);
4028       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4029     }
4030
4031   label = gen_label_rtx ();
4032   temp = copy_to_mode_reg (mode, op0);
4033   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4034   expand_inc (temp, gen_int_mode (d - 1, mode));
4035   emit_label (label);
4036   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4037 }
4038 \f
4039 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4040    if that is convenient, and returning where the result is.
4041    You may request either the quotient or the remainder as the result;
4042    specify REM_FLAG nonzero to get the remainder.
4043
4044    CODE is the expression code for which kind of division this is;
4045    it controls how rounding is done.  MODE is the machine mode to use.
4046    UNSIGNEDP nonzero means do unsigned division.  */
4047
4048 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4049    and then correct it by or'ing in missing high bits
4050    if result of ANDI is nonzero.
4051    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4052    This could optimize to a bfexts instruction.
4053    But C doesn't use these operations, so their optimizations are
4054    left for later.  */
4055 /* ??? For modulo, we don't actually need the highpart of the first product,
4056    the low part will do nicely.  And for small divisors, the second multiply
4057    can also be a low-part only multiply or even be completely left out.
4058    E.g. to calculate the remainder of a division by 3 with a 32 bit
4059    multiply, multiply with 0x55555556 and extract the upper two bits;
4060    the result is exact for inputs up to 0x1fffffff.
4061    The input range can be reduced by using cross-sum rules.
4062    For odd divisors >= 3, the following table gives right shift counts
4063    so that if a number is shifted by an integer multiple of the given
4064    amount, the remainder stays the same:
4065    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4066    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4067    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4068    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4069    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4070
4071    Cross-sum rules for even numbers can be derived by leaving as many bits
4072    to the right alone as the divisor has zeros to the right.
4073    E.g. if x is an unsigned 32 bit number:
4074    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4075    */
4076
4077 rtx
4078 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4079                rtx op0, rtx op1, rtx target, int unsignedp)
4080 {
4081   machine_mode compute_mode;
4082   rtx tquotient;
4083   rtx quotient = 0, remainder = 0;
4084   rtx_insn *last;
4085   rtx_insn *insn;
4086   optab optab1, optab2;
4087   int op1_is_constant, op1_is_pow2 = 0;
4088   int max_cost, extra_cost;
4089   static HOST_WIDE_INT last_div_const = 0;
4090   bool speed = optimize_insn_for_speed_p ();
4091
4092   op1_is_constant = CONST_INT_P (op1);
4093   if (op1_is_constant)
4094     {
4095       wide_int ext_op1 = rtx_mode_t (op1, mode);
4096       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4097                      || (! unsignedp
4098                          && wi::popcount (wi::neg (ext_op1)) == 1));
4099     }
4100
4101   /*
4102      This is the structure of expand_divmod:
4103
4104      First comes code to fix up the operands so we can perform the operations
4105      correctly and efficiently.
4106
4107      Second comes a switch statement with code specific for each rounding mode.
4108      For some special operands this code emits all RTL for the desired
4109      operation, for other cases, it generates only a quotient and stores it in
4110      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4111      to indicate that it has not done anything.
4112
4113      Last comes code that finishes the operation.  If QUOTIENT is set and
4114      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4115      QUOTIENT is not set, it is computed using trunc rounding.
4116
4117      We try to generate special code for division and remainder when OP1 is a
4118      constant.  If |OP1| = 2**n we can use shifts and some other fast
4119      operations.  For other values of OP1, we compute a carefully selected
4120      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4121      by m.
4122
4123      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4124      half of the product.  Different strategies for generating the product are
4125      implemented in expmed_mult_highpart.
4126
4127      If what we actually want is the remainder, we generate that by another
4128      by-constant multiplication and a subtraction.  */
4129
4130   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4131      code below will malfunction if we are, so check here and handle
4132      the special case if so.  */
4133   if (op1 == const1_rtx)
4134     return rem_flag ? const0_rtx : op0;
4135
4136     /* When dividing by -1, we could get an overflow.
4137      negv_optab can handle overflows.  */
4138   if (! unsignedp && op1 == constm1_rtx)
4139     {
4140       if (rem_flag)
4141         return const0_rtx;
4142       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4143                           ? negv_optab : neg_optab, op0, target, 0);
4144     }
4145
4146   if (target
4147       /* Don't use the function value register as a target
4148          since we have to read it as well as write it,
4149          and function-inlining gets confused by this.  */
4150       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4151           /* Don't clobber an operand while doing a multi-step calculation.  */
4152           || ((rem_flag || op1_is_constant)
4153               && (reg_mentioned_p (target, op0)
4154                   || (MEM_P (op0) && MEM_P (target))))
4155           || reg_mentioned_p (target, op1)
4156           || (MEM_P (op1) && MEM_P (target))))
4157     target = 0;
4158
4159   /* Get the mode in which to perform this computation.  Normally it will
4160      be MODE, but sometimes we can't do the desired operation in MODE.
4161      If so, pick a wider mode in which we can do the operation.  Convert
4162      to that mode at the start to avoid repeated conversions.
4163
4164      First see what operations we need.  These depend on the expression
4165      we are evaluating.  (We assume that divxx3 insns exist under the
4166      same conditions that modxx3 insns and that these insns don't normally
4167      fail.  If these assumptions are not correct, we may generate less
4168      efficient code in some cases.)
4169
4170      Then see if we find a mode in which we can open-code that operation
4171      (either a division, modulus, or shift).  Finally, check for the smallest
4172      mode for which we can do the operation with a library call.  */
4173
4174   /* We might want to refine this now that we have division-by-constant
4175      optimization.  Since expmed_mult_highpart tries so many variants, it is
4176      not straightforward to generalize this.  Maybe we should make an array
4177      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4178
4179   optab1 = (op1_is_pow2
4180             ? (unsignedp ? lshr_optab : ashr_optab)
4181             : (unsignedp ? udiv_optab : sdiv_optab));
4182   optab2 = (op1_is_pow2 ? optab1
4183             : (unsignedp ? udivmod_optab : sdivmod_optab));
4184
4185   FOR_EACH_MODE_FROM (compute_mode, mode)
4186     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4187         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4188       break;
4189
4190   if (compute_mode == VOIDmode)
4191     FOR_EACH_MODE_FROM (compute_mode, mode)
4192       if (optab_libfunc (optab1, compute_mode)
4193           || optab_libfunc (optab2, compute_mode))
4194         break;
4195
4196   /* If we still couldn't find a mode, use MODE, but expand_binop will
4197      probably die.  */
4198   if (compute_mode == VOIDmode)
4199     compute_mode = mode;
4200
4201   if (target && GET_MODE (target) == compute_mode)
4202     tquotient = target;
4203   else
4204     tquotient = gen_reg_rtx (compute_mode);
4205
4206 #if 0
4207   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4208      (mode), and thereby get better code when OP1 is a constant.  Do that
4209      later.  It will require going over all usages of SIZE below.  */
4210   size = GET_MODE_BITSIZE (mode);
4211 #endif
4212
4213   /* Only deduct something for a REM if the last divide done was
4214      for a different constant.   Then set the constant of the last
4215      divide.  */
4216   max_cost = (unsignedp
4217               ? udiv_cost (speed, compute_mode)
4218               : sdiv_cost (speed, compute_mode));
4219   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4220                      && INTVAL (op1) == last_div_const))
4221     max_cost -= (mul_cost (speed, compute_mode)
4222                  + add_cost (speed, compute_mode));
4223
4224   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4225
4226   /* Now convert to the best mode to use.  */
4227   if (compute_mode != mode)
4228     {
4229       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4230       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4231
4232       /* convert_modes may have placed op1 into a register, so we
4233          must recompute the following.  */
4234       op1_is_constant = CONST_INT_P (op1);
4235       if (op1_is_constant)
4236         {
4237           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4238           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4239                          || (! unsignedp
4240                              && wi::popcount (wi::neg (ext_op1)) == 1));
4241         }
4242       else
4243         op1_is_pow2 = 0;
4244     }
4245
4246   /* If one of the operands is a volatile MEM, copy it into a register.  */
4247
4248   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4249     op0 = force_reg (compute_mode, op0);
4250   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4251     op1 = force_reg (compute_mode, op1);
4252
4253   /* If we need the remainder or if OP1 is constant, we need to
4254      put OP0 in a register in case it has any queued subexpressions.  */
4255   if (rem_flag || op1_is_constant)
4256     op0 = force_reg (compute_mode, op0);
4257
4258   last = get_last_insn ();
4259
4260   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4261   if (unsignedp)
4262     {
4263       if (code == FLOOR_DIV_EXPR)
4264         code = TRUNC_DIV_EXPR;
4265       if (code == FLOOR_MOD_EXPR)
4266         code = TRUNC_MOD_EXPR;
4267       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4268         code = TRUNC_DIV_EXPR;
4269     }
4270
4271   if (op1 != const0_rtx)
4272     switch (code)
4273       {
4274       case TRUNC_MOD_EXPR:
4275       case TRUNC_DIV_EXPR:
4276         if (op1_is_constant)
4277           {
4278             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4279             int size = GET_MODE_BITSIZE (int_mode);
4280             if (unsignedp)
4281               {
4282                 unsigned HOST_WIDE_INT mh, ml;
4283                 int pre_shift, post_shift;
4284                 int dummy;
4285                 wide_int wd = rtx_mode_t (op1, int_mode);
4286                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4287
4288                 if (wi::popcount (wd) == 1)
4289                   {
4290                     pre_shift = floor_log2 (d);
4291                     if (rem_flag)
4292                       {
4293                         unsigned HOST_WIDE_INT mask
4294                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4295                         remainder
4296                           = expand_binop (int_mode, and_optab, op0,
4297                                           gen_int_mode (mask, int_mode),
4298                                           remainder, 1,
4299                                           OPTAB_LIB_WIDEN);
4300                         if (remainder)
4301                           return gen_lowpart (mode, remainder);
4302                       }
4303                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4304                                              pre_shift, tquotient, 1);
4305                   }
4306                 else if (size <= HOST_BITS_PER_WIDE_INT)
4307                   {
4308                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4309                       {
4310                         /* Most significant bit of divisor is set; emit an scc
4311                            insn.  */
4312                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4313                                                           int_mode, 1, 1);
4314                       }
4315                     else
4316                       {
4317                         /* Find a suitable multiplier and right shift count
4318                            instead of multiplying with D.  */
4319
4320                         mh = choose_multiplier (d, size, size,
4321                                                 &ml, &post_shift, &dummy);
4322
4323                         /* If the suggested multiplier is more than SIZE bits,
4324                            we can do better for even divisors, using an
4325                            initial right shift.  */
4326                         if (mh != 0 && (d & 1) == 0)
4327                           {
4328                             pre_shift = ctz_or_zero (d);
4329                             mh = choose_multiplier (d >> pre_shift, size,
4330                                                     size - pre_shift,
4331                                                     &ml, &post_shift, &dummy);
4332                             gcc_assert (!mh);
4333                           }
4334                         else
4335                           pre_shift = 0;
4336
4337                         if (mh != 0)
4338                           {
4339                             rtx t1, t2, t3, t4;
4340
4341                             if (post_shift - 1 >= BITS_PER_WORD)
4342                               goto fail1;
4343
4344                             extra_cost
4345                               = (shift_cost (speed, int_mode, post_shift - 1)
4346                                  + shift_cost (speed, int_mode, 1)
4347                                  + 2 * add_cost (speed, int_mode));
4348                             t1 = expmed_mult_highpart
4349                               (int_mode, op0, gen_int_mode (ml, int_mode),
4350                                NULL_RTX, 1, max_cost - extra_cost);
4351                             if (t1 == 0)
4352                               goto fail1;
4353                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4354                                                                op0, t1),
4355                                                 NULL_RTX);
4356                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4357                                                t2, 1, NULL_RTX, 1);
4358                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4359                                                               t1, t3),
4360                                                 NULL_RTX);
4361                             quotient = expand_shift
4362                               (RSHIFT_EXPR, int_mode, t4,
4363                                post_shift - 1, tquotient, 1);
4364                           }
4365                         else
4366                           {
4367                             rtx t1, t2;
4368
4369                             if (pre_shift >= BITS_PER_WORD
4370                                 || post_shift >= BITS_PER_WORD)
4371                               goto fail1;
4372
4373                             t1 = expand_shift
4374                               (RSHIFT_EXPR, int_mode, op0,
4375                                pre_shift, NULL_RTX, 1);
4376                             extra_cost
4377                               = (shift_cost (speed, int_mode, pre_shift)
4378                                  + shift_cost (speed, int_mode, post_shift));
4379                             t2 = expmed_mult_highpart
4380                               (int_mode, t1,
4381                                gen_int_mode (ml, int_mode),
4382                                NULL_RTX, 1, max_cost - extra_cost);
4383                             if (t2 == 0)
4384                               goto fail1;
4385                             quotient = expand_shift
4386                               (RSHIFT_EXPR, int_mode, t2,
4387                                post_shift, tquotient, 1);
4388                           }
4389                       }
4390                   }
4391                 else            /* Too wide mode to use tricky code */
4392                   break;
4393
4394                 insn = get_last_insn ();
4395                 if (insn != last)
4396                   set_dst_reg_note (insn, REG_EQUAL,
4397                                     gen_rtx_UDIV (int_mode, op0, op1),
4398                                     quotient);
4399               }
4400             else                /* TRUNC_DIV, signed */
4401               {
4402                 unsigned HOST_WIDE_INT ml;
4403                 int lgup, post_shift;
4404                 rtx mlr;
4405                 HOST_WIDE_INT d = INTVAL (op1);
4406                 unsigned HOST_WIDE_INT abs_d;
4407
4408                 /* Since d might be INT_MIN, we have to cast to
4409                    unsigned HOST_WIDE_INT before negating to avoid
4410                    undefined signed overflow.  */
4411                 abs_d = (d >= 0
4412                          ? (unsigned HOST_WIDE_INT) d
4413                          : - (unsigned HOST_WIDE_INT) d);
4414
4415                 /* n rem d = n rem -d */
4416                 if (rem_flag && d < 0)
4417                   {
4418                     d = abs_d;
4419                     op1 = gen_int_mode (abs_d, int_mode);
4420                   }
4421
4422                 if (d == 1)
4423                   quotient = op0;
4424                 else if (d == -1)
4425                   quotient = expand_unop (int_mode, neg_optab, op0,
4426                                           tquotient, 0);
4427                 else if (size <= HOST_BITS_PER_WIDE_INT
4428                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4429                   {
4430                     /* This case is not handled correctly below.  */
4431                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4432                                                 int_mode, 1, 1);
4433                     if (quotient == 0)
4434                       goto fail1;
4435                   }
4436                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4437                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4438                          && (rem_flag
4439                              ? smod_pow2_cheap (speed, int_mode)
4440                              : sdiv_pow2_cheap (speed, int_mode))
4441                          /* We assume that cheap metric is true if the
4442                             optab has an expander for this mode.  */
4443                          && ((optab_handler ((rem_flag ? smod_optab
4444                                               : sdiv_optab),
4445                                              int_mode)
4446                               != CODE_FOR_nothing)
4447                              || (optab_handler (sdivmod_optab, int_mode)
4448                                  != CODE_FOR_nothing)))
4449                   ;
4450                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4451                          && (size <= HOST_BITS_PER_WIDE_INT
4452                              || abs_d != (unsigned HOST_WIDE_INT) d))
4453                   {
4454                     if (rem_flag)
4455                       {
4456                         remainder = expand_smod_pow2 (int_mode, op0, d);
4457                         if (remainder)
4458                           return gen_lowpart (mode, remainder);
4459                       }
4460
4461                     if (sdiv_pow2_cheap (speed, int_mode)
4462                         && ((optab_handler (sdiv_optab, int_mode)
4463                              != CODE_FOR_nothing)
4464                             || (optab_handler (sdivmod_optab, int_mode)
4465                                 != CODE_FOR_nothing)))
4466                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4467                                                 int_mode, op0,
4468                                                 gen_int_mode (abs_d,
4469                                                               int_mode),
4470                                                 NULL_RTX, 0);
4471                     else
4472                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4473
4474                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4475                        negate the quotient.  */
4476                     if (d < 0)
4477                       {
4478                         insn = get_last_insn ();
4479                         if (insn != last
4480                             && abs_d < (HOST_WIDE_INT_1U
4481                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4482                           set_dst_reg_note (insn, REG_EQUAL,
4483                                             gen_rtx_DIV (int_mode, op0,
4484                                                          gen_int_mode
4485                                                            (abs_d,
4486                                                             int_mode)),
4487                                             quotient);
4488
4489                         quotient = expand_unop (int_mode, neg_optab,
4490                                                 quotient, quotient, 0);
4491                       }
4492                   }
4493                 else if (size <= HOST_BITS_PER_WIDE_INT)
4494                   {
4495                     choose_multiplier (abs_d, size, size - 1,
4496                                        &ml, &post_shift, &lgup);
4497                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4498                       {
4499                         rtx t1, t2, t3;
4500
4501                         if (post_shift >= BITS_PER_WORD
4502                             || size - 1 >= BITS_PER_WORD)
4503                           goto fail1;
4504
4505                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4506                                       + shift_cost (speed, int_mode, size - 1)
4507                                       + add_cost (speed, int_mode));
4508                         t1 = expmed_mult_highpart
4509                           (int_mode, op0, gen_int_mode (ml, int_mode),
4510                            NULL_RTX, 0, max_cost - extra_cost);
4511                         if (t1 == 0)
4512                           goto fail1;
4513                         t2 = expand_shift
4514                           (RSHIFT_EXPR, int_mode, t1,
4515                            post_shift, NULL_RTX, 0);
4516                         t3 = expand_shift
4517                           (RSHIFT_EXPR, int_mode, op0,
4518                            size - 1, NULL_RTX, 0);
4519                         if (d < 0)
4520                           quotient
4521                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4522                                              tquotient);
4523                         else
4524                           quotient
4525                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4526                                              tquotient);
4527                       }
4528                     else
4529                       {
4530                         rtx t1, t2, t3, t4;
4531
4532                         if (post_shift >= BITS_PER_WORD
4533                             || size - 1 >= BITS_PER_WORD)
4534                           goto fail1;
4535
4536                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4537                         mlr = gen_int_mode (ml, int_mode);
4538                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4539                                       + shift_cost (speed, int_mode, size - 1)
4540                                       + 2 * add_cost (speed, int_mode));
4541                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4542                                                    NULL_RTX, 0,
4543                                                    max_cost - extra_cost);
4544                         if (t1 == 0)
4545                           goto fail1;
4546                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4547                                             NULL_RTX);
4548                         t3 = expand_shift
4549                           (RSHIFT_EXPR, int_mode, t2,
4550                            post_shift, NULL_RTX, 0);
4551                         t4 = expand_shift
4552                           (RSHIFT_EXPR, int_mode, op0,
4553                            size - 1, NULL_RTX, 0);
4554                         if (d < 0)
4555                           quotient
4556                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4557                                              tquotient);
4558                         else
4559                           quotient
4560                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4561                                              tquotient);
4562                       }
4563                   }
4564                 else            /* Too wide mode to use tricky code */
4565                   break;
4566
4567                 insn = get_last_insn ();
4568                 if (insn != last)
4569                   set_dst_reg_note (insn, REG_EQUAL,
4570                                     gen_rtx_DIV (int_mode, op0, op1),
4571                                     quotient);
4572               }
4573             break;
4574           }
4575       fail1:
4576         delete_insns_since (last);
4577         break;
4578
4579       case FLOOR_DIV_EXPR:
4580       case FLOOR_MOD_EXPR:
4581       /* We will come here only for signed operations.  */
4582         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4583           {
4584             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4585             int size = GET_MODE_BITSIZE (int_mode);
4586             unsigned HOST_WIDE_INT mh, ml;
4587             int pre_shift, lgup, post_shift;
4588             HOST_WIDE_INT d = INTVAL (op1);
4589
4590             if (d > 0)
4591               {
4592                 /* We could just as easily deal with negative constants here,
4593                    but it does not seem worth the trouble for GCC 2.6.  */
4594                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4595                   {
4596                     pre_shift = floor_log2 (d);
4597                     if (rem_flag)
4598                       {
4599                         unsigned HOST_WIDE_INT mask
4600                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4601                         remainder = expand_binop
4602                           (int_mode, and_optab, op0,
4603                            gen_int_mode (mask, int_mode),
4604                            remainder, 0, OPTAB_LIB_WIDEN);
4605                         if (remainder)
4606                           return gen_lowpart (mode, remainder);
4607                       }
4608                     quotient = expand_shift
4609                       (RSHIFT_EXPR, int_mode, op0,
4610                        pre_shift, tquotient, 0);
4611                   }
4612                 else
4613                   {
4614                     rtx t1, t2, t3, t4;
4615
4616                     mh = choose_multiplier (d, size, size - 1,
4617                                             &ml, &post_shift, &lgup);
4618                     gcc_assert (!mh);
4619
4620                     if (post_shift < BITS_PER_WORD
4621                         && size - 1 < BITS_PER_WORD)
4622                       {
4623                         t1 = expand_shift
4624                           (RSHIFT_EXPR, int_mode, op0,
4625                            size - 1, NULL_RTX, 0);
4626                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4627                                            NULL_RTX, 0, OPTAB_WIDEN);
4628                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4629                                       + shift_cost (speed, int_mode, size - 1)
4630                                       + 2 * add_cost (speed, int_mode));
4631                         t3 = expmed_mult_highpart
4632                           (int_mode, t2, gen_int_mode (ml, int_mode),
4633                            NULL_RTX, 1, max_cost - extra_cost);
4634                         if (t3 != 0)
4635                           {
4636                             t4 = expand_shift
4637                               (RSHIFT_EXPR, int_mode, t3,
4638                                post_shift, NULL_RTX, 1);
4639                             quotient = expand_binop (int_mode, xor_optab,
4640                                                      t4, t1, tquotient, 0,
4641                                                      OPTAB_WIDEN);
4642                           }
4643                       }
4644                   }
4645               }
4646             else
4647               {
4648                 rtx nsign, t1, t2, t3, t4;
4649                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4650                                                   op0, constm1_rtx), NULL_RTX);
4651                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4652                                    0, OPTAB_WIDEN);
4653                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4654                                       size - 1, NULL_RTX, 0);
4655                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4656                                     NULL_RTX);
4657                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4658                                     NULL_RTX, 0);
4659                 if (t4)
4660                   {
4661                     rtx t5;
4662                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4663                                       NULL_RTX, 0);
4664                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4665                                               tquotient);
4666                   }
4667               }
4668           }
4669
4670         if (quotient != 0)
4671           break;
4672         delete_insns_since (last);
4673
4674         /* Try using an instruction that produces both the quotient and
4675            remainder, using truncation.  We can easily compensate the quotient
4676            or remainder to get floor rounding, once we have the remainder.
4677            Notice that we compute also the final remainder value here,
4678            and return the result right away.  */
4679         if (target == 0 || GET_MODE (target) != compute_mode)
4680           target = gen_reg_rtx (compute_mode);
4681
4682         if (rem_flag)
4683           {
4684             remainder
4685               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4686             quotient = gen_reg_rtx (compute_mode);
4687           }
4688         else
4689           {
4690             quotient
4691               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4692             remainder = gen_reg_rtx (compute_mode);
4693           }
4694
4695         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4696                                  quotient, remainder, 0))
4697           {
4698             /* This could be computed with a branch-less sequence.
4699                Save that for later.  */
4700             rtx tem;
4701             rtx_code_label *label = gen_label_rtx ();
4702             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4703             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4704                                 NULL_RTX, 0, OPTAB_WIDEN);
4705             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4706             expand_dec (quotient, const1_rtx);
4707             expand_inc (remainder, op1);
4708             emit_label (label);
4709             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4710           }
4711
4712         /* No luck with division elimination or divmod.  Have to do it
4713            by conditionally adjusting op0 *and* the result.  */
4714         {
4715           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4716           rtx adjusted_op0;
4717           rtx tem;
4718
4719           quotient = gen_reg_rtx (compute_mode);
4720           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4721           label1 = gen_label_rtx ();
4722           label2 = gen_label_rtx ();
4723           label3 = gen_label_rtx ();
4724           label4 = gen_label_rtx ();
4725           label5 = gen_label_rtx ();
4726           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4727           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4728           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4729                               quotient, 0, OPTAB_LIB_WIDEN);
4730           if (tem != quotient)
4731             emit_move_insn (quotient, tem);
4732           emit_jump_insn (targetm.gen_jump (label5));
4733           emit_barrier ();
4734           emit_label (label1);
4735           expand_inc (adjusted_op0, const1_rtx);
4736           emit_jump_insn (targetm.gen_jump (label4));
4737           emit_barrier ();
4738           emit_label (label2);
4739           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4740           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4741                               quotient, 0, OPTAB_LIB_WIDEN);
4742           if (tem != quotient)
4743             emit_move_insn (quotient, tem);
4744           emit_jump_insn (targetm.gen_jump (label5));
4745           emit_barrier ();
4746           emit_label (label3);
4747           expand_dec (adjusted_op0, const1_rtx);
4748           emit_label (label4);
4749           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4750                               quotient, 0, OPTAB_LIB_WIDEN);
4751           if (tem != quotient)
4752             emit_move_insn (quotient, tem);
4753           expand_dec (quotient, const1_rtx);
4754           emit_label (label5);
4755         }
4756         break;
4757
4758       case CEIL_DIV_EXPR:
4759       case CEIL_MOD_EXPR:
4760         if (unsignedp)
4761           {
4762             if (op1_is_constant
4763                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4764                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4765                     || INTVAL (op1) >= 0))
4766               {
4767                 scalar_int_mode int_mode
4768                   = as_a <scalar_int_mode> (compute_mode);
4769                 rtx t1, t2, t3;
4770                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4771                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4772                                    floor_log2 (d), tquotient, 1);
4773                 t2 = expand_binop (int_mode, and_optab, op0,
4774                                    gen_int_mode (d - 1, int_mode),
4775                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4776                 t3 = gen_reg_rtx (int_mode);
4777                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4778                 if (t3 == 0)
4779                   {
4780                     rtx_code_label *lab;
4781                     lab = gen_label_rtx ();
4782                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4783                     expand_inc (t1, const1_rtx);
4784                     emit_label (lab);
4785                     quotient = t1;
4786                   }
4787                 else
4788                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4789                                             tquotient);
4790                 break;
4791               }
4792
4793             /* Try using an instruction that produces both the quotient and
4794                remainder, using truncation.  We can easily compensate the
4795                quotient or remainder to get ceiling rounding, once we have the
4796                remainder.  Notice that we compute also the final remainder
4797                value here, and return the result right away.  */
4798             if (target == 0 || GET_MODE (target) != compute_mode)
4799               target = gen_reg_rtx (compute_mode);
4800
4801             if (rem_flag)
4802               {
4803                 remainder = (REG_P (target)
4804                              ? target : gen_reg_rtx (compute_mode));
4805                 quotient = gen_reg_rtx (compute_mode);
4806               }
4807             else
4808               {
4809                 quotient = (REG_P (target)
4810                             ? target : gen_reg_rtx (compute_mode));
4811                 remainder = gen_reg_rtx (compute_mode);
4812               }
4813
4814             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4815                                      remainder, 1))
4816               {
4817                 /* This could be computed with a branch-less sequence.
4818                    Save that for later.  */
4819                 rtx_code_label *label = gen_label_rtx ();
4820                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4821                                  compute_mode, label);
4822                 expand_inc (quotient, const1_rtx);
4823                 expand_dec (remainder, op1);
4824                 emit_label (label);
4825                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4826               }
4827
4828             /* No luck with division elimination or divmod.  Have to do it
4829                by conditionally adjusting op0 *and* the result.  */
4830             {
4831               rtx_code_label *label1, *label2;
4832               rtx adjusted_op0, tem;
4833
4834               quotient = gen_reg_rtx (compute_mode);
4835               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4836               label1 = gen_label_rtx ();
4837               label2 = gen_label_rtx ();
4838               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4839                                compute_mode, label1);
4840               emit_move_insn  (quotient, const0_rtx);
4841               emit_jump_insn (targetm.gen_jump (label2));
4842               emit_barrier ();
4843               emit_label (label1);
4844               expand_dec (adjusted_op0, const1_rtx);
4845               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4846                                   quotient, 1, OPTAB_LIB_WIDEN);
4847               if (tem != quotient)
4848                 emit_move_insn (quotient, tem);
4849               expand_inc (quotient, const1_rtx);
4850               emit_label (label2);
4851             }
4852           }
4853         else /* signed */
4854           {
4855             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4856                 && INTVAL (op1) >= 0)
4857               {
4858                 /* This is extremely similar to the code for the unsigned case
4859                    above.  For 2.7 we should merge these variants, but for
4860                    2.6.1 I don't want to touch the code for unsigned since that
4861                    get used in C.  The signed case will only be used by other
4862                    languages (Ada).  */
4863
4864                 rtx t1, t2, t3;
4865                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4866                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4867                                    floor_log2 (d), tquotient, 0);
4868                 t2 = expand_binop (compute_mode, and_optab, op0,
4869                                    gen_int_mode (d - 1, compute_mode),
4870                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4871                 t3 = gen_reg_rtx (compute_mode);
4872                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4873                                       compute_mode, 1, 1);
4874                 if (t3 == 0)
4875                   {
4876                     rtx_code_label *lab;
4877                     lab = gen_label_rtx ();
4878                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4879                     expand_inc (t1, const1_rtx);
4880                     emit_label (lab);
4881                     quotient = t1;
4882                   }
4883                 else
4884                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4885                                                           t1, t3),
4886                                             tquotient);
4887                 break;
4888               }
4889
4890             /* Try using an instruction that produces both the quotient and
4891                remainder, using truncation.  We can easily compensate the
4892                quotient or remainder to get ceiling rounding, once we have the
4893                remainder.  Notice that we compute also the final remainder
4894                value here, and return the result right away.  */
4895             if (target == 0 || GET_MODE (target) != compute_mode)
4896               target = gen_reg_rtx (compute_mode);
4897             if (rem_flag)
4898               {
4899                 remainder= (REG_P (target)
4900                             ? target : gen_reg_rtx (compute_mode));
4901                 quotient = gen_reg_rtx (compute_mode);
4902               }
4903             else
4904               {
4905                 quotient = (REG_P (target)
4906                             ? target : gen_reg_rtx (compute_mode));
4907                 remainder = gen_reg_rtx (compute_mode);
4908               }
4909
4910             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4911                                      remainder, 0))
4912               {
4913                 /* This could be computed with a branch-less sequence.
4914                    Save that for later.  */
4915                 rtx tem;
4916                 rtx_code_label *label = gen_label_rtx ();
4917                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4918                                  compute_mode, label);
4919                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4920                                     NULL_RTX, 0, OPTAB_WIDEN);
4921                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4922                 expand_inc (quotient, const1_rtx);
4923                 expand_dec (remainder, op1);
4924                 emit_label (label);
4925                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4926               }
4927
4928             /* No luck with division elimination or divmod.  Have to do it
4929                by conditionally adjusting op0 *and* the result.  */
4930             {
4931               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4932               rtx adjusted_op0;
4933               rtx tem;
4934
4935               quotient = gen_reg_rtx (compute_mode);
4936               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4937               label1 = gen_label_rtx ();
4938               label2 = gen_label_rtx ();
4939               label3 = gen_label_rtx ();
4940               label4 = gen_label_rtx ();
4941               label5 = gen_label_rtx ();
4942               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4943               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4944                                compute_mode, label1);
4945               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4946                                   quotient, 0, OPTAB_LIB_WIDEN);
4947               if (tem != quotient)
4948                 emit_move_insn (quotient, tem);
4949               emit_jump_insn (targetm.gen_jump (label5));
4950               emit_barrier ();
4951               emit_label (label1);
4952               expand_dec (adjusted_op0, const1_rtx);
4953               emit_jump_insn (targetm.gen_jump (label4));
4954               emit_barrier ();
4955               emit_label (label2);
4956               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4957                                compute_mode, label3);
4958               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4959                                   quotient, 0, OPTAB_LIB_WIDEN);
4960               if (tem != quotient)
4961                 emit_move_insn (quotient, tem);
4962               emit_jump_insn (targetm.gen_jump (label5));
4963               emit_barrier ();
4964               emit_label (label3);
4965               expand_inc (adjusted_op0, const1_rtx);
4966               emit_label (label4);
4967               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4968                                   quotient, 0, OPTAB_LIB_WIDEN);
4969               if (tem != quotient)
4970                 emit_move_insn (quotient, tem);
4971               expand_inc (quotient, const1_rtx);
4972               emit_label (label5);
4973             }
4974           }
4975         break;
4976
4977       case EXACT_DIV_EXPR:
4978         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4979           {
4980             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4981             int size = GET_MODE_BITSIZE (int_mode);
4982             HOST_WIDE_INT d = INTVAL (op1);
4983             unsigned HOST_WIDE_INT ml;
4984             int pre_shift;
4985             rtx t1;
4986
4987             pre_shift = ctz_or_zero (d);
4988             ml = invert_mod2n (d >> pre_shift, size);
4989             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4990                                pre_shift, NULL_RTX, unsignedp);
4991             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
4992                                     NULL_RTX, 1);
4993
4994             insn = get_last_insn ();
4995             set_dst_reg_note (insn, REG_EQUAL,
4996                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4997                                               int_mode, op0, op1),
4998                               quotient);
4999           }
5000         break;
5001
5002       case ROUND_DIV_EXPR:
5003       case ROUND_MOD_EXPR:
5004         if (unsignedp)
5005           {
5006             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5007             rtx tem;
5008             rtx_code_label *label;
5009             label = gen_label_rtx ();
5010             quotient = gen_reg_rtx (int_mode);
5011             remainder = gen_reg_rtx (int_mode);
5012             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5013               {
5014                 rtx tem;
5015                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5016                                          quotient, 1, OPTAB_LIB_WIDEN);
5017                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5018                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5019                                           remainder, 1, OPTAB_LIB_WIDEN);
5020               }
5021             tem = plus_constant (int_mode, op1, -1);
5022             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5023             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5024             expand_inc (quotient, const1_rtx);
5025             expand_dec (remainder, op1);
5026             emit_label (label);
5027           }
5028         else
5029           {
5030             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5031             int size = GET_MODE_BITSIZE (int_mode);
5032             rtx abs_rem, abs_op1, tem, mask;
5033             rtx_code_label *label;
5034             label = gen_label_rtx ();
5035             quotient = gen_reg_rtx (int_mode);
5036             remainder = gen_reg_rtx (int_mode);
5037             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5038               {
5039                 rtx tem;
5040                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5041                                          quotient, 0, OPTAB_LIB_WIDEN);
5042                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5043                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5044                                           remainder, 0, OPTAB_LIB_WIDEN);
5045               }
5046             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5047             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5048             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5049                                 1, NULL_RTX, 1);
5050             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5051             tem = expand_binop (int_mode, xor_optab, op0, op1,
5052                                 NULL_RTX, 0, OPTAB_WIDEN);
5053             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5054                                  size - 1, NULL_RTX, 0);
5055             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5056                                 NULL_RTX, 0, OPTAB_WIDEN);
5057             tem = expand_binop (int_mode, sub_optab, tem, mask,
5058                                 NULL_RTX, 0, OPTAB_WIDEN);
5059             expand_inc (quotient, tem);
5060             tem = expand_binop (int_mode, xor_optab, mask, op1,
5061                                 NULL_RTX, 0, OPTAB_WIDEN);
5062             tem = expand_binop (int_mode, sub_optab, tem, mask,
5063                                 NULL_RTX, 0, OPTAB_WIDEN);
5064             expand_dec (remainder, tem);
5065             emit_label (label);
5066           }
5067         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5068
5069       default:
5070         gcc_unreachable ();
5071       }
5072
5073   if (quotient == 0)
5074     {
5075       if (target && GET_MODE (target) != compute_mode)
5076         target = 0;
5077
5078       if (rem_flag)
5079         {
5080           /* Try to produce the remainder without producing the quotient.
5081              If we seem to have a divmod pattern that does not require widening,
5082              don't try widening here.  We should really have a WIDEN argument
5083              to expand_twoval_binop, since what we'd really like to do here is
5084              1) try a mod insn in compute_mode
5085              2) try a divmod insn in compute_mode
5086              3) try a div insn in compute_mode and multiply-subtract to get
5087                 remainder
5088              4) try the same things with widening allowed.  */
5089           remainder
5090             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5091                                  op0, op1, target,
5092                                  unsignedp,
5093                                  ((optab_handler (optab2, compute_mode)
5094                                    != CODE_FOR_nothing)
5095                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5096           if (remainder == 0)
5097             {
5098               /* No luck there.  Can we do remainder and divide at once
5099                  without a library call?  */
5100               remainder = gen_reg_rtx (compute_mode);
5101               if (! expand_twoval_binop ((unsignedp
5102                                           ? udivmod_optab
5103                                           : sdivmod_optab),
5104                                          op0, op1,
5105                                          NULL_RTX, remainder, unsignedp))
5106                 remainder = 0;
5107             }
5108
5109           if (remainder)
5110             return gen_lowpart (mode, remainder);
5111         }
5112
5113       /* Produce the quotient.  Try a quotient insn, but not a library call.
5114          If we have a divmod in this mode, use it in preference to widening
5115          the div (for this test we assume it will not fail). Note that optab2
5116          is set to the one of the two optabs that the call below will use.  */
5117       quotient
5118         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5119                              op0, op1, rem_flag ? NULL_RTX : target,
5120                              unsignedp,
5121                              ((optab_handler (optab2, compute_mode)
5122                                != CODE_FOR_nothing)
5123                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5124
5125       if (quotient == 0)
5126         {
5127           /* No luck there.  Try a quotient-and-remainder insn,
5128              keeping the quotient alone.  */
5129           quotient = gen_reg_rtx (compute_mode);
5130           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5131                                      op0, op1,
5132                                      quotient, NULL_RTX, unsignedp))
5133             {
5134               quotient = 0;
5135               if (! rem_flag)
5136                 /* Still no luck.  If we are not computing the remainder,
5137                    use a library call for the quotient.  */
5138                 quotient = sign_expand_binop (compute_mode,
5139                                               udiv_optab, sdiv_optab,
5140                                               op0, op1, target,
5141                                               unsignedp, OPTAB_LIB_WIDEN);
5142             }
5143         }
5144     }
5145
5146   if (rem_flag)
5147     {
5148       if (target && GET_MODE (target) != compute_mode)
5149         target = 0;
5150
5151       if (quotient == 0)
5152         {
5153           /* No divide instruction either.  Use library for remainder.  */
5154           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5155                                          op0, op1, target,
5156                                          unsignedp, OPTAB_LIB_WIDEN);
5157           /* No remainder function.  Try a quotient-and-remainder
5158              function, keeping the remainder.  */
5159           if (!remainder)
5160             {
5161               remainder = gen_reg_rtx (compute_mode);
5162               if (!expand_twoval_binop_libfunc
5163                   (unsignedp ? udivmod_optab : sdivmod_optab,
5164                    op0, op1,
5165                    NULL_RTX, remainder,
5166                    unsignedp ? UMOD : MOD))
5167                 remainder = NULL_RTX;
5168             }
5169         }
5170       else
5171         {
5172           /* We divided.  Now finish doing X - Y * (X / Y).  */
5173           remainder = expand_mult (compute_mode, quotient, op1,
5174                                    NULL_RTX, unsignedp);
5175           remainder = expand_binop (compute_mode, sub_optab, op0,
5176                                     remainder, target, unsignedp,
5177                                     OPTAB_LIB_WIDEN);
5178         }
5179     }
5180
5181   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5182 }
5183 \f
5184 /* Return a tree node with data type TYPE, describing the value of X.
5185    Usually this is an VAR_DECL, if there is no obvious better choice.
5186    X may be an expression, however we only support those expressions
5187    generated by loop.c.  */
5188
5189 tree
5190 make_tree (tree type, rtx x)
5191 {
5192   tree t;
5193
5194   switch (GET_CODE (x))
5195     {
5196     case CONST_INT:
5197     case CONST_WIDE_INT:
5198       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5199       return t;
5200
5201     case CONST_DOUBLE:
5202       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5203       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5204         t = wide_int_to_tree (type,
5205                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5206                                                     HOST_BITS_PER_WIDE_INT * 2));
5207       else
5208         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5209
5210       return t;
5211
5212     case CONST_VECTOR:
5213       {
5214         int units = CONST_VECTOR_NUNITS (x);
5215         tree itype = TREE_TYPE (type);
5216         tree *elts;
5217         int i;
5218
5219         /* Build a tree with vector elements.  */
5220         elts = XALLOCAVEC (tree, units);
5221         for (i = units - 1; i >= 0; --i)
5222           {
5223             rtx elt = CONST_VECTOR_ELT (x, i);
5224             elts[i] = make_tree (itype, elt);
5225           }
5226
5227         return build_vector (type, elts);
5228       }
5229
5230     case PLUS:
5231       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5232                           make_tree (type, XEXP (x, 1)));
5233
5234     case MINUS:
5235       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5236                           make_tree (type, XEXP (x, 1)));
5237
5238     case NEG:
5239       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5240
5241     case MULT:
5242       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5243                           make_tree (type, XEXP (x, 1)));
5244
5245     case ASHIFT:
5246       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5247                           make_tree (type, XEXP (x, 1)));
5248
5249     case LSHIFTRT:
5250       t = unsigned_type_for (type);
5251       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5252                                          make_tree (t, XEXP (x, 0)),
5253                                          make_tree (type, XEXP (x, 1))));
5254
5255     case ASHIFTRT:
5256       t = signed_type_for (type);
5257       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5258                                          make_tree (t, XEXP (x, 0)),
5259                                          make_tree (type, XEXP (x, 1))));
5260
5261     case DIV:
5262       if (TREE_CODE (type) != REAL_TYPE)
5263         t = signed_type_for (type);
5264       else
5265         t = type;
5266
5267       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5268                                          make_tree (t, XEXP (x, 0)),
5269                                          make_tree (t, XEXP (x, 1))));
5270     case UDIV:
5271       t = unsigned_type_for (type);
5272       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5273                                          make_tree (t, XEXP (x, 0)),
5274                                          make_tree (t, XEXP (x, 1))));
5275
5276     case SIGN_EXTEND:
5277     case ZERO_EXTEND:
5278       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5279                                           GET_CODE (x) == ZERO_EXTEND);
5280       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5281
5282     case CONST:
5283       return make_tree (type, XEXP (x, 0));
5284
5285     case SYMBOL_REF:
5286       t = SYMBOL_REF_DECL (x);
5287       if (t)
5288         return fold_convert (type, build_fold_addr_expr (t));
5289       /* fall through.  */
5290
5291     default:
5292       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5293
5294       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5295          address mode to pointer mode.  */
5296       if (POINTER_TYPE_P (type))
5297         x = convert_memory_address_addr_space
5298           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5299
5300       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5301          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5302       t->decl_with_rtl.rtl = x;
5303
5304       return t;
5305     }
5306 }
5307 \f
5308 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5309    and returning TARGET.
5310
5311    If TARGET is 0, a pseudo-register or constant is returned.  */
5312
5313 rtx
5314 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5315 {
5316   rtx tem = 0;
5317
5318   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5319     tem = simplify_binary_operation (AND, mode, op0, op1);
5320   if (tem == 0)
5321     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5322
5323   if (target == 0)
5324     target = tem;
5325   else if (tem != target)
5326     emit_move_insn (target, tem);
5327   return target;
5328 }
5329
5330 /* Helper function for emit_store_flag.  */
5331 rtx
5332 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5333              machine_mode mode, machine_mode compare_mode,
5334              int unsignedp, rtx x, rtx y, int normalizep,
5335              machine_mode target_mode)
5336 {
5337   struct expand_operand ops[4];
5338   rtx op0, comparison, subtarget;
5339   rtx_insn *last;
5340   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5341   scalar_int_mode int_target_mode;
5342
5343   last = get_last_insn ();
5344   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5345   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5346   if (!x || !y)
5347     {
5348       delete_insns_since (last);
5349       return NULL_RTX;
5350     }
5351
5352   if (target_mode == VOIDmode)
5353     int_target_mode = result_mode;
5354   else
5355     int_target_mode = as_a <scalar_int_mode> (target_mode);
5356   if (!target)
5357     target = gen_reg_rtx (int_target_mode);
5358
5359   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5360
5361   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5362   create_fixed_operand (&ops[1], comparison);
5363   create_fixed_operand (&ops[2], x);
5364   create_fixed_operand (&ops[3], y);
5365   if (!maybe_expand_insn (icode, 4, ops))
5366     {
5367       delete_insns_since (last);
5368       return NULL_RTX;
5369     }
5370   subtarget = ops[0].value;
5371
5372   /* If we are converting to a wider mode, first convert to
5373      INT_TARGET_MODE, then normalize.  This produces better combining
5374      opportunities on machines that have a SIGN_EXTRACT when we are
5375      testing a single bit.  This mostly benefits the 68k.
5376
5377      If STORE_FLAG_VALUE does not have the sign bit set when
5378      interpreted in MODE, we can do this conversion as unsigned, which
5379      is usually more efficient.  */
5380   if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5381     {
5382       convert_move (target, subtarget,
5383                     val_signbit_known_clear_p (result_mode,
5384                                                STORE_FLAG_VALUE));
5385       op0 = target;
5386       result_mode = int_target_mode;
5387     }
5388   else
5389     op0 = subtarget;
5390
5391   /* If we want to keep subexpressions around, don't reuse our last
5392      target.  */
5393   if (optimize)
5394     subtarget = 0;
5395
5396   /* Now normalize to the proper value in MODE.  Sometimes we don't
5397      have to do anything.  */
5398   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5399     ;
5400   /* STORE_FLAG_VALUE might be the most negative number, so write
5401      the comparison this way to avoid a compiler-time warning.  */
5402   else if (- normalizep == STORE_FLAG_VALUE)
5403     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5404
5405   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5406      it hard to use a value of just the sign bit due to ANSI integer
5407      constant typing rules.  */
5408   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5409     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5410                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5411                         normalizep == 1);
5412   else
5413     {
5414       gcc_assert (STORE_FLAG_VALUE & 1);
5415
5416       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5417       if (normalizep == -1)
5418         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5419     }
5420
5421   /* If we were converting to a smaller mode, do the conversion now.  */
5422   if (int_target_mode != result_mode)
5423     {
5424       convert_move (target, op0, 0);
5425       return target;
5426     }
5427   else
5428     return op0;
5429 }
5430
5431
5432 /* A subroutine of emit_store_flag only including "tricks" that do not
5433    need a recursive call.  These are kept separate to avoid infinite
5434    loops.  */
5435
5436 static rtx
5437 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5438                    machine_mode mode, int unsignedp, int normalizep,
5439                    machine_mode target_mode)
5440 {
5441   rtx subtarget;
5442   enum insn_code icode;
5443   machine_mode compare_mode;
5444   enum mode_class mclass;
5445   enum rtx_code scode;
5446
5447   if (unsignedp)
5448     code = unsigned_condition (code);
5449   scode = swap_condition (code);
5450
5451   /* If one operand is constant, make it the second one.  Only do this
5452      if the other operand is not constant as well.  */
5453
5454   if (swap_commutative_operands_p (op0, op1))
5455     {
5456       std::swap (op0, op1);
5457       code = swap_condition (code);
5458     }
5459
5460   if (mode == VOIDmode)
5461     mode = GET_MODE (op0);
5462
5463   /* For some comparisons with 1 and -1, we can convert this to
5464      comparisons with zero.  This will often produce more opportunities for
5465      store-flag insns.  */
5466
5467   switch (code)
5468     {
5469     case LT:
5470       if (op1 == const1_rtx)
5471         op1 = const0_rtx, code = LE;
5472       break;
5473     case LE:
5474       if (op1 == constm1_rtx)
5475         op1 = const0_rtx, code = LT;
5476       break;
5477     case GE:
5478       if (op1 == const1_rtx)
5479         op1 = const0_rtx, code = GT;
5480       break;
5481     case GT:
5482       if (op1 == constm1_rtx)
5483         op1 = const0_rtx, code = GE;
5484       break;
5485     case GEU:
5486       if (op1 == const1_rtx)
5487         op1 = const0_rtx, code = NE;
5488       break;
5489     case LTU:
5490       if (op1 == const1_rtx)
5491         op1 = const0_rtx, code = EQ;
5492       break;
5493     default:
5494       break;
5495     }
5496
5497   /* If we are comparing a double-word integer with zero or -1, we can
5498      convert the comparison into one involving a single word.  */
5499   scalar_int_mode int_mode;
5500   if (is_int_mode (mode, &int_mode)
5501       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5502       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5503     {
5504       rtx tem;
5505       if ((code == EQ || code == NE)
5506           && (op1 == const0_rtx || op1 == constm1_rtx))
5507         {
5508           rtx op00, op01;
5509
5510           /* Do a logical OR or AND of the two words and compare the
5511              result.  */
5512           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5513           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5514           tem = expand_binop (word_mode,
5515                               op1 == const0_rtx ? ior_optab : and_optab,
5516                               op00, op01, NULL_RTX, unsignedp,
5517                               OPTAB_DIRECT);
5518
5519           if (tem != 0)
5520             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5521                                    unsignedp, normalizep);
5522         }
5523       else if ((code == LT || code == GE) && op1 == const0_rtx)
5524         {
5525           rtx op0h;
5526
5527           /* If testing the sign bit, can just test on high word.  */
5528           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5529                                       subreg_highpart_offset (word_mode,
5530                                                               int_mode));
5531           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5532                                  unsignedp, normalizep);
5533         }
5534       else
5535         tem = NULL_RTX;
5536
5537       if (tem)
5538         {
5539           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5540             return tem;
5541           if (!target)
5542             target = gen_reg_rtx (target_mode);
5543
5544           convert_move (target, tem,
5545                         !val_signbit_known_set_p (word_mode,
5546                                                   (normalizep ? normalizep
5547                                                    : STORE_FLAG_VALUE)));
5548           return target;
5549         }
5550     }
5551
5552   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5553      complement of A (for GE) and shifting the sign bit to the low bit.  */
5554   if (op1 == const0_rtx && (code == LT || code == GE)
5555       && is_int_mode (mode, &int_mode)
5556       && (normalizep || STORE_FLAG_VALUE == 1
5557           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5558     {
5559       scalar_int_mode int_target_mode;
5560       subtarget = target;
5561
5562       if (!target)
5563         int_target_mode = int_mode;
5564       else
5565         {
5566           /* If the result is to be wider than OP0, it is best to convert it
5567              first.  If it is to be narrower, it is *incorrect* to convert it
5568              first.  */
5569           int_target_mode = as_a <scalar_int_mode> (target_mode);
5570           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5571             {
5572               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5573               int_mode = int_target_mode;
5574             }
5575         }
5576
5577       if (int_target_mode != int_mode)
5578         subtarget = 0;
5579
5580       if (code == GE)
5581         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5582                            ((STORE_FLAG_VALUE == 1 || normalizep)
5583                             ? 0 : subtarget), 0);
5584
5585       if (STORE_FLAG_VALUE == 1 || normalizep)
5586         /* If we are supposed to produce a 0/1 value, we want to do
5587            a logical shift from the sign bit to the low-order bit; for
5588            a -1/0 value, we do an arithmetic shift.  */
5589         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5590                             GET_MODE_BITSIZE (int_mode) - 1,
5591                             subtarget, normalizep != -1);
5592
5593       if (int_mode != int_target_mode)
5594         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5595
5596       return op0;
5597     }
5598
5599   mclass = GET_MODE_CLASS (mode);
5600   FOR_EACH_MODE_FROM (compare_mode, mode)
5601     {
5602      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5603      icode = optab_handler (cstore_optab, optab_mode);
5604      if (icode != CODE_FOR_nothing)
5605         {
5606           do_pending_stack_adjust ();
5607           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5608                                  unsignedp, op0, op1, normalizep, target_mode);
5609           if (tem)
5610             return tem;
5611
5612           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5613             {
5614               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5615                                  unsignedp, op1, op0, normalizep, target_mode);
5616               if (tem)
5617                 return tem;
5618             }
5619           break;
5620         }
5621     }
5622
5623   return 0;
5624 }
5625
5626 /* Subroutine of emit_store_flag that handles cases in which the operands
5627    are scalar integers.  SUBTARGET is the target to use for temporary
5628    operations and TRUEVAL is the value to store when the condition is
5629    true.  All other arguments are as for emit_store_flag.  */
5630
5631 rtx
5632 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5633                      rtx op1, scalar_int_mode mode, int unsignedp,
5634                      int normalizep, rtx trueval)
5635 {
5636   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5637   rtx_insn *last = get_last_insn ();
5638   rtx tem;
5639
5640   /* If this is an equality comparison of integers, we can try to exclusive-or
5641      (or subtract) the two operands and use a recursive call to try the
5642      comparison with zero.  Don't do any of these cases if branches are
5643      very cheap.  */
5644
5645   if ((code == EQ || code == NE) && op1 != const0_rtx)
5646     {
5647       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5648                           OPTAB_WIDEN);
5649
5650       if (tem == 0)
5651         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5652                             OPTAB_WIDEN);
5653       if (tem != 0)
5654         tem = emit_store_flag (target, code, tem, const0_rtx,
5655                                mode, unsignedp, normalizep);
5656       if (tem != 0)
5657         return tem;
5658
5659       delete_insns_since (last);
5660     }
5661
5662   /* For integer comparisons, try the reverse comparison.  However, for
5663      small X and if we'd have anyway to extend, implementing "X != 0"
5664      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5665   rtx_code rcode = reverse_condition (code);
5666   if (can_compare_p (rcode, mode, ccp_store_flag)
5667       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5668             && code == NE
5669             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5670             && op1 == const0_rtx))
5671     {
5672       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5673                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5674
5675       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5676       if (want_add
5677           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5678                        optimize_insn_for_speed_p ()) == 0)
5679         {
5680           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5681                                    STORE_FLAG_VALUE, target_mode);
5682           if (tem != 0)
5683             tem = expand_binop (target_mode, add_optab, tem,
5684                                 gen_int_mode (normalizep, target_mode),
5685                                 target, 0, OPTAB_WIDEN);
5686         }
5687       else if (!want_add
5688                && rtx_cost (trueval, mode, XOR, 1,
5689                             optimize_insn_for_speed_p ()) == 0)
5690         {
5691           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5692                                    normalizep, target_mode);
5693           if (tem != 0)
5694             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5695                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5696         }
5697
5698       if (tem != 0)
5699         return tem;
5700       delete_insns_since (last);
5701     }
5702
5703   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5704      the constant zero.  Reject all other comparisons at this point.  Only
5705      do LE and GT if branches are expensive since they are expensive on
5706      2-operand machines.  */
5707
5708   if (op1 != const0_rtx
5709       || (code != EQ && code != NE
5710           && (BRANCH_COST (optimize_insn_for_speed_p (),
5711                            false) <= 1 || (code != LE && code != GT))))
5712     return 0;
5713
5714   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5715      do the necessary operation below.  */
5716
5717   tem = 0;
5718
5719   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5720      the sign bit set.  */
5721
5722   if (code == LE)
5723     {
5724       /* This is destructive, so SUBTARGET can't be OP0.  */
5725       if (rtx_equal_p (subtarget, op0))
5726         subtarget = 0;
5727
5728       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5729                           OPTAB_WIDEN);
5730       if (tem)
5731         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5732                             OPTAB_WIDEN);
5733     }
5734
5735   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5736      number of bits in the mode of OP0, minus one.  */
5737
5738   if (code == GT)
5739     {
5740       if (rtx_equal_p (subtarget, op0))
5741         subtarget = 0;
5742
5743       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5744                                 GET_MODE_BITSIZE (mode) - 1,
5745                                 subtarget, 0);
5746       if (tem)
5747         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5748                             OPTAB_WIDEN);
5749     }
5750
5751   if (code == EQ || code == NE)
5752     {
5753       /* For EQ or NE, one way to do the comparison is to apply an operation
5754          that converts the operand into a positive number if it is nonzero
5755          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5756          for NE we negate.  This puts the result in the sign bit.  Then we
5757          normalize with a shift, if needed.
5758
5759          Two operations that can do the above actions are ABS and FFS, so try
5760          them.  If that doesn't work, and MODE is smaller than a full word,
5761          we can use zero-extension to the wider mode (an unsigned conversion)
5762          as the operation.  */
5763
5764       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5765          that is compensated by the subsequent overflow when subtracting
5766          one / negating.  */
5767
5768       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5769         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5770       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5771         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5772       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5773         {
5774           tem = convert_modes (word_mode, mode, op0, 1);
5775           mode = word_mode;
5776         }
5777
5778       if (tem != 0)
5779         {
5780           if (code == EQ)
5781             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5782                                 0, OPTAB_WIDEN);
5783           else
5784             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5785         }
5786
5787       /* If we couldn't do it that way, for NE we can "or" the two's complement
5788          of the value with itself.  For EQ, we take the one's complement of
5789          that "or", which is an extra insn, so we only handle EQ if branches
5790          are expensive.  */
5791
5792       if (tem == 0
5793           && (code == NE
5794               || BRANCH_COST (optimize_insn_for_speed_p (),
5795                               false) > 1))
5796         {
5797           if (rtx_equal_p (subtarget, op0))
5798             subtarget = 0;
5799
5800           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5801           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5802                               OPTAB_WIDEN);
5803
5804           if (tem && code == EQ)
5805             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5806         }
5807     }
5808
5809   if (tem && normalizep)
5810     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5811                               GET_MODE_BITSIZE (mode) - 1,
5812                               subtarget, normalizep == 1);
5813
5814   if (tem)
5815     {
5816       if (!target)
5817         ;
5818       else if (GET_MODE (tem) != target_mode)
5819         {
5820           convert_move (target, tem, 0);
5821           tem = target;
5822         }
5823       else if (!subtarget)
5824         {
5825           emit_move_insn (target, tem);
5826           tem = target;
5827         }
5828     }
5829   else
5830     delete_insns_since (last);
5831
5832   return tem;
5833 }
5834
5835 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5836    and storing in TARGET.  Normally return TARGET.
5837    Return 0 if that cannot be done.
5838
5839    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5840    it is VOIDmode, they cannot both be CONST_INT.
5841
5842    UNSIGNEDP is for the case where we have to widen the operands
5843    to perform the operation.  It says to use zero-extension.
5844
5845    NORMALIZEP is 1 if we should convert the result to be either zero
5846    or one.  Normalize is -1 if we should convert the result to be
5847    either zero or -1.  If NORMALIZEP is zero, the result will be left
5848    "raw" out of the scc insn.  */
5849
5850 rtx
5851 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5852                  machine_mode mode, int unsignedp, int normalizep)
5853 {
5854   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5855   enum rtx_code rcode;
5856   rtx subtarget;
5857   rtx tem, trueval;
5858   rtx_insn *last;
5859
5860   /* If we compare constants, we shouldn't use a store-flag operation,
5861      but a constant load.  We can get there via the vanilla route that
5862      usually generates a compare-branch sequence, but will in this case
5863      fold the comparison to a constant, and thus elide the branch.  */
5864   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5865     return NULL_RTX;
5866
5867   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5868                            target_mode);
5869   if (tem)
5870     return tem;
5871
5872   /* If we reached here, we can't do this with a scc insn, however there
5873      are some comparisons that can be done in other ways.  Don't do any
5874      of these cases if branches are very cheap.  */
5875   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5876     return 0;
5877
5878   /* See what we need to return.  We can only return a 1, -1, or the
5879      sign bit.  */
5880
5881   if (normalizep == 0)
5882     {
5883       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5884         normalizep = STORE_FLAG_VALUE;
5885
5886       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5887         ;
5888       else
5889         return 0;
5890     }
5891
5892   last = get_last_insn ();
5893
5894   /* If optimizing, use different pseudo registers for each insn, instead
5895      of reusing the same pseudo.  This leads to better CSE, but slows
5896      down the compiler, since there are more pseudos.  */
5897   subtarget = (!optimize
5898                && (target_mode == mode)) ? target : NULL_RTX;
5899   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5900
5901   /* For floating-point comparisons, try the reverse comparison or try
5902      changing the "orderedness" of the comparison.  */
5903   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5904     {
5905       enum rtx_code first_code;
5906       bool and_them;
5907
5908       rcode = reverse_condition_maybe_unordered (code);
5909       if (can_compare_p (rcode, mode, ccp_store_flag)
5910           && (code == ORDERED || code == UNORDERED
5911               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5912               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5913         {
5914           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5915                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5916
5917           /* For the reverse comparison, use either an addition or a XOR.  */
5918           if (want_add
5919               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5920                            optimize_insn_for_speed_p ()) == 0)
5921             {
5922               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5923                                        STORE_FLAG_VALUE, target_mode);
5924               if (tem)
5925                 return expand_binop (target_mode, add_optab, tem,
5926                                      gen_int_mode (normalizep, target_mode),
5927                                      target, 0, OPTAB_WIDEN);
5928             }
5929           else if (!want_add
5930                    && rtx_cost (trueval, mode, XOR, 1,
5931                                 optimize_insn_for_speed_p ()) == 0)
5932             {
5933               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5934                                        normalizep, target_mode);
5935               if (tem)
5936                 return expand_binop (target_mode, xor_optab, tem, trueval,
5937                                      target, INTVAL (trueval) >= 0,
5938                                      OPTAB_WIDEN);
5939             }
5940         }
5941
5942       delete_insns_since (last);
5943
5944       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
5945       if (code == ORDERED || code == UNORDERED)
5946         return 0;
5947
5948       and_them = split_comparison (code, mode, &first_code, &code);
5949
5950       /* If there are no NaNs, the first comparison should always fall through.
5951          Effectively change the comparison to the other one.  */
5952       if (!HONOR_NANS (mode))
5953         {
5954           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5955           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5956                                     target_mode);
5957         }
5958
5959       if (!HAVE_conditional_move)
5960         return 0;
5961
5962       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5963          conditional move.  */
5964       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5965                                normalizep, target_mode);
5966       if (tem == 0)
5967         return 0;
5968
5969       if (and_them)
5970         tem = emit_conditional_move (target, code, op0, op1, mode,
5971                                      tem, const0_rtx, GET_MODE (tem), 0);
5972       else
5973         tem = emit_conditional_move (target, code, op0, op1, mode,
5974                                      trueval, tem, GET_MODE (tem), 0);
5975
5976       if (tem == 0)
5977         delete_insns_since (last);
5978       return tem;
5979     }
5980
5981   /* The remaining tricks only apply to integer comparisons.  */
5982
5983   scalar_int_mode int_mode;
5984   if (is_int_mode (mode, &int_mode))
5985     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
5986                                 unsignedp, normalizep, trueval);
5987
5988   return 0;
5989 }
5990
5991 /* Like emit_store_flag, but always succeeds.  */
5992
5993 rtx
5994 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5995                        machine_mode mode, int unsignedp, int normalizep)
5996 {
5997   rtx tem;
5998   rtx_code_label *label;
5999   rtx trueval, falseval;
6000
6001   /* First see if emit_store_flag can do the job.  */
6002   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6003   if (tem != 0)
6004     return tem;
6005
6006   if (!target)
6007     target = gen_reg_rtx (word_mode);
6008
6009   /* If this failed, we have to do this with set/compare/jump/set code.
6010      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6011   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6012   if (code == NE
6013       && GET_MODE_CLASS (mode) == MODE_INT
6014       && REG_P (target)
6015       && op0 == target
6016       && op1 == const0_rtx)
6017     {
6018       label = gen_label_rtx ();
6019       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6020                                NULL_RTX, NULL, label,
6021                                profile_probability::uninitialized ());
6022       emit_move_insn (target, trueval);
6023       emit_label (label);
6024       return target;
6025     }
6026
6027   if (!REG_P (target)
6028       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6029     target = gen_reg_rtx (GET_MODE (target));
6030
6031   /* Jump in the right direction if the target cannot implement CODE
6032      but can jump on its reverse condition.  */
6033   falseval = const0_rtx;
6034   if (! can_compare_p (code, mode, ccp_jump)
6035       && (! FLOAT_MODE_P (mode)
6036           || code == ORDERED || code == UNORDERED
6037           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6038           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6039     {
6040       enum rtx_code rcode;
6041       if (FLOAT_MODE_P (mode))
6042         rcode = reverse_condition_maybe_unordered (code);
6043       else
6044         rcode = reverse_condition (code);
6045
6046       /* Canonicalize to UNORDERED for the libcall.  */
6047       if (can_compare_p (rcode, mode, ccp_jump)
6048           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6049         {
6050           falseval = trueval;
6051           trueval = const0_rtx;
6052           code = rcode;
6053         }
6054     }
6055
6056   emit_move_insn (target, trueval);
6057   label = gen_label_rtx ();
6058   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6059                            label, profile_probability::uninitialized ());
6060
6061   emit_move_insn (target, falseval);
6062   emit_label (label);
6063
6064   return target;
6065 }
6066 \f
6067 /* Perform possibly multi-word comparison and conditional jump to LABEL
6068    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6069    now a thin wrapper around do_compare_rtx_and_jump.  */
6070
6071 static void
6072 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6073                  rtx_code_label *label)
6074 {
6075   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6076   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6077                            NULL, label, profile_probability::uninitialized ());
6078 }