gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2020 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "expmed.h"
  35 #include "optabs.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46
  47 struct target_expmed default_target_expmed;
  48 #if SWITCHABLE_TARGET
  49 struct target_expmed *this_target_expmed = &default_target_expmed;
  50 #endif
  51
  52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  53                                       unsigned HOST_WIDE_INT,
  54                                       unsigned HOST_WIDE_INT,
  55                                       poly_uint64, poly_uint64,
  56                                       machine_mode, rtx, bool, bool);
  57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    poly_uint64, poly_uint64,
  61                                    rtx, scalar_int_mode, bool);
  62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  63                                      unsigned HOST_WIDE_INT,
  64                                      unsigned HOST_WIDE_INT,
  65                                      rtx, scalar_int_mode, bool);
  66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  67                                    unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    poly_uint64, poly_uint64,
  70                                    rtx, scalar_int_mode, bool);
  71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  72                                        unsigned HOST_WIDE_INT,
  73                                        unsigned HOST_WIDE_INT, int, rtx,
  74                                        machine_mode, machine_mode, bool, bool);
  75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  76                                     unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  79                                       unsigned HOST_WIDE_INT,
  80                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  83                                     unsigned HOST_WIDE_INT,
  84                                     unsigned HOST_WIDE_INT, int, bool);
  85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88
  89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  91    The mask is truncated if necessary to the width of mode MODE.  The
  92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  93
  94 static inline rtx
  95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  96 {
  97   return immed_wide_int_const
  98     (wi::shifted_mask (bitpos, bitsize, complement,
  99                        GET_MODE_PRECISION (mode)), mode);
 100 }
 101
 102 /* Test whether a value is zero of a power of two.  */
 103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 105
 106 struct init_expmed_rtl
 107 {
 108   rtx reg;
 109   rtx plus;
 110   rtx neg;
 111   rtx mult;
 112   rtx sdiv;
 113   rtx udiv;
 114   rtx sdiv_32;
 115   rtx smod_32;
 116   rtx wide_mult;
 117   rtx wide_lshr;
 118   rtx wide_trunc;
 119   rtx shift;
 120   rtx shift_mult;
 121   rtx shift_add;
 122   rtx shift_sub0;
 123   rtx shift_sub1;
 124   rtx zext;
 125   rtx trunc;
 126
 127   rtx pow2[MAX_BITS_PER_WORD];
 128   rtx cint[MAX_BITS_PER_WORD];
 129 };
 130
 131 static void
 132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 133                       scalar_int_mode from_mode, bool speed)
 134 {
 135   int to_size, from_size;
 136   rtx which;
 137
 138   to_size = GET_MODE_PRECISION (to_mode);
 139   from_size = GET_MODE_PRECISION (from_mode);
 140
 141   /* Most partial integers have a precision less than the "full"
 142      integer it requires for storage.  In case one doesn't, for
 143      comparison purposes here, reduce the bit size by one in that
 144      case.  */
 145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 146       && pow2p_hwi (to_size))
 147     to_size --;
 148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 149       && pow2p_hwi (from_size))
 150     from_size --;
 151
 152   /* Assume cost of zero-extend and sign-extend is the same.  */
 153   which = (to_size < from_size ? all->trunc : all->zext);
 154
 155   PUT_MODE (all->reg, from_mode);
 156   set_convert_cost (to_mode, from_mode, speed,
 157                     set_src_cost (which, to_mode, speed));
 158 }
 159
 160 static void
 161 init_expmed_one_mode (struct init_expmed_rtl *all,
 162                       machine_mode mode, int speed)
 163 {
 164   int m, n, mode_bitsize;
 165   machine_mode mode_from;
 166
 167   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 168
 169   PUT_MODE (all->reg, mode);
 170   PUT_MODE (all->plus, mode);
 171   PUT_MODE (all->neg, mode);
 172   PUT_MODE (all->mult, mode);
 173   PUT_MODE (all->sdiv, mode);
 174   PUT_MODE (all->udiv, mode);
 175   PUT_MODE (all->sdiv_32, mode);
 176   PUT_MODE (all->smod_32, mode);
 177   PUT_MODE (all->wide_trunc, mode);
 178   PUT_MODE (all->shift, mode);
 179   PUT_MODE (all->shift_mult, mode);
 180   PUT_MODE (all->shift_add, mode);
 181   PUT_MODE (all->shift_sub0, mode);
 182   PUT_MODE (all->shift_sub1, mode);
 183   PUT_MODE (all->zext, mode);
 184   PUT_MODE (all->trunc, mode);
 185
 186   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 187   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 188   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 189   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 190   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 191
 192   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 193                                      <= 2 * add_cost (speed, mode)));
 194   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 195                                      <= 4 * add_cost (speed, mode)));
 196
 197   set_shift_cost (speed, mode, 0, 0);
 198   {
 199     int cost = add_cost (speed, mode);
 200     set_shiftadd_cost (speed, mode, 0, cost);
 201     set_shiftsub0_cost (speed, mode, 0, cost);
 202     set_shiftsub1_cost (speed, mode, 0, cost);
 203   }
 204
 205   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 206   for (m = 1; m < n; m++)
 207     {
 208       XEXP (all->shift, 1) = all->cint[m];
 209       XEXP (all->shift_mult, 1) = all->pow2[m];
 210
 211       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 212       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 213                                                        speed));
 214       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 215                                                         speed));
 216       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 217                                                         speed));
 218     }
 219
 220   scalar_int_mode int_mode_to;
 221   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 222     {
 223       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 224            mode_from = (machine_mode)(mode_from + 1))
 225         init_expmed_one_conv (all, int_mode_to,
 226                               as_a <scalar_int_mode> (mode_from), speed);
 227
 228       scalar_int_mode wider_mode;
 229       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 230           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 231         {
 232           PUT_MODE (all->zext, wider_mode);
 233           PUT_MODE (all->wide_mult, wider_mode);
 234           PUT_MODE (all->wide_lshr, wider_mode);
 235           XEXP (all->wide_lshr, 1)
 236             = gen_int_shift_amount (wider_mode, mode_bitsize);
 237
 238           set_mul_widen_cost (speed, wider_mode,
 239                               set_src_cost (all->wide_mult, wider_mode, speed));
 240           set_mul_highpart_cost (speed, int_mode_to,
 241                                  set_src_cost (all->wide_trunc,
 242                                                int_mode_to, speed));
 243         }
 244     }
 245 }
 246
 247 void
 248 init_expmed (void)
 249 {
 250   struct init_expmed_rtl all;
 251   machine_mode mode = QImode;
 252   int m, speed;
 253
 254   memset (&all, 0, sizeof all);
 255   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 256     {
 257       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 258       all.cint[m] = GEN_INT (m);
 259     }
 260
 261   /* Avoid using hard regs in ways which may be unsupported.  */
 262   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 263   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 264   all.neg = gen_rtx_NEG (mode, all.reg);
 265   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 266   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 267   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 268   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 269   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 270   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 271   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 272   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 273   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 274   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 275   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 276   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 277   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 278   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 279   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 280
 281   for (speed = 0; speed < 2; speed++)
 282     {
 283       crtl->maybe_hot_insn_p = speed;
 284       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 285
 286       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 287            mode = (machine_mode)(mode + 1))
 288         init_expmed_one_mode (&all, mode, speed);
 289
 290       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 291         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 292              mode = (machine_mode)(mode + 1))
 293           init_expmed_one_mode (&all, mode, speed);
 294
 295       if (MIN_MODE_VECTOR_INT != VOIDmode)
 296         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 297              mode = (machine_mode)(mode + 1))
 298           init_expmed_one_mode (&all, mode, speed);
 299     }
 300
 301   if (alg_hash_used_p ())
 302     {
 303       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 304       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 305     }
 306   else
 307     set_alg_hash_used_p (true);
 308   default_rtl_profile ();
 309
 310   ggc_free (all.trunc);
 311   ggc_free (all.shift_sub1);
 312   ggc_free (all.shift_sub0);
 313   ggc_free (all.shift_add);
 314   ggc_free (all.shift_mult);
 315   ggc_free (all.shift);
 316   ggc_free (all.wide_trunc);
 317   ggc_free (all.wide_lshr);
 318   ggc_free (all.wide_mult);
 319   ggc_free (all.zext);
 320   ggc_free (all.smod_32);
 321   ggc_free (all.sdiv_32);
 322   ggc_free (all.udiv);
 323   ggc_free (all.sdiv);
 324   ggc_free (all.mult);
 325   ggc_free (all.neg);
 326   ggc_free (all.plus);
 327   ggc_free (all.reg);
 328 }
 329
 330 /* Return an rtx representing minus the value of X.
 331    MODE is the intended mode of the result,
 332    useful if X is a CONST_INT.  */
 333
 334 rtx
 335 negate_rtx (machine_mode mode, rtx x)
 336 {
 337   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 338
 339   if (result == 0)
 340     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 341
 342   return result;
 343 }
 344
 345 /* Whether reverse storage order is supported on the target.  */
 346 static int reverse_storage_order_supported = -1;
 347
 348 /* Check whether reverse storage order is supported on the target.  */
 349
 350 static void
 351 check_reverse_storage_order_support (void)
 352 {
 353   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 354     {
 355       reverse_storage_order_supported = 0;
 356       sorry ("reverse scalar storage order");
 357     }
 358   else
 359     reverse_storage_order_supported = 1;
 360 }
 361
 362 /* Whether reverse FP storage order is supported on the target.  */
 363 static int reverse_float_storage_order_supported = -1;
 364
 365 /* Check whether reverse FP storage order is supported on the target.  */
 366
 367 static void
 368 check_reverse_float_storage_order_support (void)
 369 {
 370   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 371     {
 372       reverse_float_storage_order_supported = 0;
 373       sorry ("reverse floating-point scalar storage order");
 374     }
 375   else
 376     reverse_float_storage_order_supported = 1;
 377 }
 378
 379 /* Return an rtx representing value of X with reverse storage order.
 380    MODE is the intended mode of the result,
 381    useful if X is a CONST_INT.  */
 382
 383 rtx
 384 flip_storage_order (machine_mode mode, rtx x)
 385 {
 386   scalar_int_mode int_mode;
 387   rtx result;
 388
 389   if (mode == QImode)
 390     return x;
 391
 392   if (COMPLEX_MODE_P (mode))
 393     {
 394       rtx real = read_complex_part (x, false);
 395       rtx imag = read_complex_part (x, true);
 396
 397       real = flip_storage_order (GET_MODE_INNER (mode), real);
 398       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 399
 400       return gen_rtx_CONCAT (mode, real, imag);
 401     }
 402
 403   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 404     check_reverse_storage_order_support ();
 405
 406   if (!is_a <scalar_int_mode> (mode, &int_mode))
 407     {
 408       if (FLOAT_MODE_P (mode)
 409           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 410         check_reverse_float_storage_order_support ();
 411
 412       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
 413         {
 414           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 415           return x;
 416         }
 417       x = gen_lowpart (int_mode, x);
 418     }
 419
 420   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 421   if (result == 0)
 422     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 423
 424   if (int_mode != mode)
 425     result = gen_lowpart (mode, result);
 426
 427   return result;
 428 }
 429
 430 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 431    first unit of mode MODE that contains a bitfield of size BITSIZE at
 432    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 433    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 434    of the field within the new memory.  */
 435
 436 static rtx
 437 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 438                       unsigned HOST_WIDE_INT bitsize,
 439                       unsigned HOST_WIDE_INT bitnum,
 440                       unsigned HOST_WIDE_INT *new_bitnum)
 441 {
 442   scalar_int_mode imode;
 443   if (mode.exists (&imode))
 444     {
 445       unsigned int unit = GET_MODE_BITSIZE (imode);
 446       *new_bitnum = bitnum % unit;
 447       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 448       return adjust_bitfield_address (mem, imode, offset);
 449     }
 450   else
 451     {
 452       *new_bitnum = bitnum % BITS_PER_UNIT;
 453       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 454       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 455                             / BITS_PER_UNIT);
 456       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 457     }
 458 }
 459
 460 /* The caller wants to perform insertion or extraction PATTERN on a
 461    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 462    BITREGION_START and BITREGION_END are as for store_bit_field
 463    and FIELDMODE is the natural mode of the field.
 464
 465    Search for a mode that is compatible with the memory access
 466    restrictions and (where applicable) with a register insertion or
 467    extraction.  Return the new memory on success, storing the adjusted
 468    bit position in *NEW_BITNUM.  Return null otherwise.  */
 469
 470 static rtx
 471 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 472                               rtx op0, HOST_WIDE_INT bitsize,
 473                               HOST_WIDE_INT bitnum,
 474                               poly_uint64 bitregion_start,
 475                               poly_uint64 bitregion_end,
 476                               machine_mode fieldmode,
 477                               unsigned HOST_WIDE_INT *new_bitnum)
 478 {
 479   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 480                                 bitregion_end, MEM_ALIGN (op0),
 481                                 MEM_VOLATILE_P (op0));
 482   scalar_int_mode best_mode;
 483   if (iter.next_mode (&best_mode))
 484     {
 485       /* We can use a memory in BEST_MODE.  See whether this is true for
 486          any wider modes.  All other things being equal, we prefer to
 487          use the widest mode possible because it tends to expose more
 488          CSE opportunities.  */
 489       if (!iter.prefer_smaller_modes ())
 490         {
 491           /* Limit the search to the mode required by the corresponding
 492              register insertion or extraction instruction, if any.  */
 493           scalar_int_mode limit_mode = word_mode;
 494           extraction_insn insn;
 495           if (get_best_reg_extraction_insn (&insn, pattern,
 496                                             GET_MODE_BITSIZE (best_mode),
 497                                             fieldmode))
 498             limit_mode = insn.field_mode;
 499
 500           scalar_int_mode wider_mode;
 501           while (iter.next_mode (&wider_mode)
 502                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 503             best_mode = wider_mode;
 504         }
 505       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 506                                    new_bitnum);
 507     }
 508   return NULL_RTX;
 509 }
 510
 511 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 512    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 513    offset is then BITNUM / BITS_PER_UNIT.  */
 514
 515 static bool
 516 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 517                      machine_mode struct_mode)
 518 {
 519   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 520   if (BYTES_BIG_ENDIAN)
 521     return (multiple_p (bitnum, BITS_PER_UNIT)
 522             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 523                 || multiple_p (bitnum + bitsize,
 524                                regsize * BITS_PER_UNIT)));
 525   else
 526     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 527 }
 528
 529 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 530    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 531    Return false if the access would touch memory outside the range
 532    BITREGION_START to BITREGION_END for conformance to the C++ memory
 533    model.  */
 534
 535 static bool
 536 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 537                             unsigned HOST_WIDE_INT bitnum,
 538                             scalar_int_mode fieldmode,
 539                             poly_uint64 bitregion_start,
 540                             poly_uint64 bitregion_end)
 541 {
 542   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 543
 544   /* -fstrict-volatile-bitfields must be enabled and we must have a
 545      volatile MEM.  */
 546   if (!MEM_P (op0)
 547       || !MEM_VOLATILE_P (op0)
 548       || flag_strict_volatile_bitfields <= 0)
 549     return false;
 550
 551   /* The bit size must not be larger than the field mode, and
 552      the field mode must not be larger than a word.  */
 553   if (bitsize > modesize || modesize > BITS_PER_WORD)
 554     return false;
 555
 556   /* Check for cases of unaligned fields that must be split.  */
 557   if (bitnum % modesize + bitsize > modesize)
 558     return false;
 559
 560   /* The memory must be sufficiently aligned for a MODESIZE access.
 561      This condition guarantees, that the memory access will not
 562      touch anything after the end of the structure.  */
 563   if (MEM_ALIGN (op0) < modesize)
 564     return false;
 565
 566   /* Check for cases where the C++ memory model applies.  */
 567   if (maybe_ne (bitregion_end, 0U)
 568       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 569           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 570                        bitregion_end)))
 571     return false;
 572
 573   return true;
 574 }
 575
 576 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 577    bit number BITNUM can be treated as a simple value of mode MODE.
 578    Store the byte offset in *BYTENUM if so.  */
 579
 580 static bool
 581 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 582                        machine_mode mode, poly_uint64 *bytenum)
 583 {
 584   return (MEM_P (op0)
 585           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 586           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 587           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 588               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 589                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 590 }
 591 \f
 592 /* Try to use instruction INSV to store VALUE into a field of OP0.
 593    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 594    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 595    are as for store_bit_field.  */
 596
 597 static bool
 598 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 599                             opt_scalar_int_mode op0_mode,
 600                             unsigned HOST_WIDE_INT bitsize,
 601                             unsigned HOST_WIDE_INT bitnum,
 602                             rtx value, scalar_int_mode value_mode)
 603 {
 604   class expand_operand ops[4];
 605   rtx value1;
 606   rtx xop0 = op0;
 607   rtx_insn *last = get_last_insn ();
 608   bool copy_back = false;
 609
 610   scalar_int_mode op_mode = insv->field_mode;
 611   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 612   if (bitsize == 0 || bitsize > unit)
 613     return false;
 614
 615   if (MEM_P (xop0))
 616     /* Get a reference to the first byte of the field.  */
 617     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 618                                  &bitnum);
 619   else
 620     {
 621       /* Convert from counting within OP0 to counting in OP_MODE.  */
 622       if (BYTES_BIG_ENDIAN)
 623         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 624
 625       /* If xop0 is a register, we need it in OP_MODE
 626          to make it acceptable to the format of insv.  */
 627       if (GET_CODE (xop0) == SUBREG)
 628         /* We can't just change the mode, because this might clobber op0,
 629            and we will need the original value of op0 if insv fails.  */
 630         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 631       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 632         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 633     }
 634
 635   /* If the destination is a paradoxical subreg such that we need a
 636      truncate to the inner mode, perform the insertion on a temporary and
 637      truncate the result to the original destination.  Note that we can't
 638      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 639      X) 0)) is (reg:N X).  */
 640   if (GET_CODE (xop0) == SUBREG
 641       && REG_P (SUBREG_REG (xop0))
 642       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 643                                          op_mode))
 644     {
 645       rtx tem = gen_reg_rtx (op_mode);
 646       emit_move_insn (tem, xop0);
 647       xop0 = tem;
 648       copy_back = true;
 649     }
 650
 651   /* There are similar overflow check at the start of store_bit_field_1,
 652      but that only check the situation where the field lies completely
 653      outside the register, while there do have situation where the field
 654      lies partialy in the register, we need to adjust bitsize for this
 655      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 656      will broken on those arch support bit insert instruction, like arm, aarch64
 657      etc.  */
 658   if (bitsize + bitnum > unit && bitnum < unit)
 659     {
 660       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 661                "destination object, data truncated into %wu-bit",
 662                bitsize, unit - bitnum);
 663       bitsize = unit - bitnum;
 664     }
 665
 666   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 667      "backwards" from the size of the unit we are inserting into.
 668      Otherwise, we count bits from the most significant on a
 669      BYTES/BITS_BIG_ENDIAN machine.  */
 670
 671   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 672     bitnum = unit - bitsize - bitnum;
 673
 674   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 675   value1 = value;
 676   if (value_mode != op_mode)
 677     {
 678       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 679         {
 680           rtx tmp;
 681           /* Optimization: Don't bother really extending VALUE
 682              if it has all the bits we will actually use.  However,
 683              if we must narrow it, be sure we do it correctly.  */
 684
 685           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 686             {
 687               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 688               if (! tmp)
 689                 tmp = simplify_gen_subreg (op_mode,
 690                                            force_reg (value_mode, value1),
 691                                            value_mode, 0);
 692             }
 693           else
 694             {
 695               tmp = gen_lowpart_if_possible (op_mode, value1);
 696               if (! tmp)
 697                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 698             }
 699           value1 = tmp;
 700         }
 701       else if (CONST_INT_P (value))
 702         value1 = gen_int_mode (INTVAL (value), op_mode);
 703       else
 704         /* Parse phase is supposed to make VALUE's data type
 705            match that of the component reference, which is a type
 706            at least as wide as the field; so VALUE should have
 707            a mode that corresponds to that type.  */
 708         gcc_assert (CONSTANT_P (value));
 709     }
 710
 711   create_fixed_operand (&ops[0], xop0);
 712   create_integer_operand (&ops[1], bitsize);
 713   create_integer_operand (&ops[2], bitnum);
 714   create_input_operand (&ops[3], value1, op_mode);
 715   if (maybe_expand_insn (insv->icode, 4, ops))
 716     {
 717       if (copy_back)
 718         convert_move (op0, xop0, true);
 719       return true;
 720     }
 721   delete_insns_since (last);
 722   return false;
 723 }
 724
 725 /* A subroutine of store_bit_field, with the same arguments.  Return true
 726    if the operation could be implemented.
 727
 728    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 729    no other way of implementing the operation.  If FALLBACK_P is false,
 730    return false instead.  */
 731
 732 static bool
 733 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 734                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 735                    machine_mode fieldmode,
 736                    rtx value, bool reverse, bool fallback_p)
 737 {
 738   rtx op0 = str_rtx;
 739
 740   while (GET_CODE (op0) == SUBREG)
 741     {
 742       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 743       op0 = SUBREG_REG (op0);
 744     }
 745
 746   /* No action is needed if the target is a register and if the field
 747      lies completely outside that register.  This can occur if the source
 748      code contains an out-of-bounds access to a small array.  */
 749   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 750     return true;
 751
 752   /* Use vec_set patterns for inserting parts of vectors whenever
 753      available.  */
 754   machine_mode outermode = GET_MODE (op0);
 755   scalar_mode innermode = GET_MODE_INNER (outermode);
 756   poly_uint64 pos;
 757   if (VECTOR_MODE_P (outermode)
 758       && !MEM_P (op0)
 759       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 760       && fieldmode == innermode
 761       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 762       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 763     {
 764       class expand_operand ops[3];
 765       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 766
 767       create_fixed_operand (&ops[0], op0);
 768       create_input_operand (&ops[1], value, innermode);
 769       create_integer_operand (&ops[2], pos);
 770       if (maybe_expand_insn (icode, 3, ops))
 771         return true;
 772     }
 773
 774   /* If the target is a register, overwriting the entire object, or storing
 775      a full-word or multi-word field can be done with just a SUBREG.  */
 776   if (!MEM_P (op0)
 777       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 778     {
 779       /* Use the subreg machinery either to narrow OP0 to the required
 780          words or to cope with mode punning between equal-sized modes.
 781          In the latter case, use subreg on the rhs side, not lhs.  */
 782       rtx sub;
 783       HOST_WIDE_INT regnum;
 784       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 785       if (known_eq (bitnum, 0U)
 786           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 787         {
 788           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 789           if (sub)
 790             {
 791               if (reverse)
 792                 sub = flip_storage_order (GET_MODE (op0), sub);
 793               emit_move_insn (op0, sub);
 794               return true;
 795             }
 796         }
 797       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 798                && multiple_p (bitsize, regsize * BITS_PER_UNIT))
 799         {
 800           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 801                                      regnum * regsize);
 802           if (sub)
 803             {
 804               if (reverse)
 805                 value = flip_storage_order (fieldmode, value);
 806               emit_move_insn (sub, value);
 807               return true;
 808             }
 809         }
 810     }
 811
 812   /* If the target is memory, storing any naturally aligned field can be
 813      done with a simple store.  For targets that support fast unaligned
 814      memory, any naturally sized, unit aligned field can be done directly.  */
 815   poly_uint64 bytenum;
 816   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 817     {
 818       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 819       if (reverse)
 820         value = flip_storage_order (fieldmode, value);
 821       emit_move_insn (op0, value);
 822       return true;
 823     }
 824
 825   /* It's possible we'll need to handle other cases here for
 826      polynomial bitnum and bitsize.  */
 827
 828   /* From here on we need to be looking at a fixed-size insertion.  */
 829   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 830   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 831
 832   /* Make sure we are playing with integral modes.  Pun with subregs
 833      if we aren't.  This must come after the entire register case above,
 834      since that case is valid for any mode.  The following cases are only
 835      valid for integral modes.  */
 836   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 837   scalar_int_mode imode;
 838   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 839     {
 840       if (MEM_P (op0))
 841         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 842                                             0, MEM_SIZE (op0));
 843       else if (!op0_mode.exists ())
 844         {
 845           if (ibitnum == 0
 846               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 847               && MEM_P (value)
 848               && !reverse)
 849             {
 850               value = adjust_address (value, GET_MODE (op0), 0);
 851               emit_move_insn (op0, value);
 852               return true;
 853             }
 854           if (!fallback_p)
 855             return false;
 856           rtx temp = assign_stack_temp (GET_MODE (op0),
 857                                         GET_MODE_SIZE (GET_MODE (op0)));
 858           emit_move_insn (temp, op0);
 859           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 860                              reverse, fallback_p);
 861           emit_move_insn (op0, temp);
 862           return true;
 863         }
 864       else
 865         op0 = gen_lowpart (op0_mode.require (), op0);
 866     }
 867
 868   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 869                                    bitregion_start, bitregion_end,
 870                                    fieldmode, value, reverse, fallback_p);
 871 }
 872
 873 /* Subroutine of store_bit_field_1, with the same arguments, except
 874    that BITSIZE and BITNUM are constant.  Handle cases specific to
 875    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 876    otherwise OP0 is a BLKmode MEM.  */
 877
 878 static bool
 879 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 880                           unsigned HOST_WIDE_INT bitsize,
 881                           unsigned HOST_WIDE_INT bitnum,
 882                           poly_uint64 bitregion_start,
 883                           poly_uint64 bitregion_end,
 884                           machine_mode fieldmode,
 885                           rtx value, bool reverse, bool fallback_p)
 886 {
 887   /* Storing an lsb-aligned field in a register
 888      can be done with a movstrict instruction.  */
 889
 890   if (!MEM_P (op0)
 891       && !reverse
 892       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 893       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 894       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 895     {
 896       class expand_operand ops[2];
 897       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 898       rtx arg0 = op0;
 899       unsigned HOST_WIDE_INT subreg_off;
 900
 901       if (GET_CODE (arg0) == SUBREG)
 902         {
 903           /* Else we've got some float mode source being extracted into
 904              a different float mode destination -- this combination of
 905              subregs results in Severe Tire Damage.  */
 906           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 907                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 908                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 909           arg0 = SUBREG_REG (arg0);
 910         }
 911
 912       subreg_off = bitnum / BITS_PER_UNIT;
 913       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 914         {
 915           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 916
 917           create_fixed_operand (&ops[0], arg0);
 918           /* Shrink the source operand to FIELDMODE.  */
 919           create_convert_operand_to (&ops[1], value, fieldmode, false);
 920           if (maybe_expand_insn (icode, 2, ops))
 921             return true;
 922         }
 923     }
 924
 925   /* Handle fields bigger than a word.  */
 926
 927   if (bitsize > BITS_PER_WORD)
 928     {
 929       /* Here we transfer the words of the field
 930          in the order least significant first.
 931          This is because the most significant word is the one which may
 932          be less than full.
 933          However, only do that if the value is not BLKmode.  */
 934
 935       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 936       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 937       rtx_insn *last;
 938
 939       /* This is the mode we must force value to, so that there will be enough
 940          subwords to extract.  Note that fieldmode will often (always?) be
 941          VOIDmode, because that is what store_field uses to indicate that this
 942          is a bit field, but passing VOIDmode to operand_subword_force
 943          is not allowed.
 944
 945          The mode must be fixed-size, since insertions into variable-sized
 946          objects are meant to be handled before calling this function.  */
 947       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 948       if (value_mode == VOIDmode)
 949         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 950
 951       last = get_last_insn ();
 952       for (int i = 0; i < nwords; i++)
 953         {
 954           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 955              except maybe for the last iteration.  */
 956           const unsigned HOST_WIDE_INT new_bitsize
 957             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 958           /* Bit offset from the starting bit number in the target.  */
 959           const unsigned int bit_offset
 960             = backwards ^ reverse
 961               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 962               : i * BITS_PER_WORD;
 963           /* Starting word number in the value.  */
 964           const unsigned int wordnum
 965             = backwards
 966               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 967               : i;
 968           /* The chunk of the value in word_mode.  We use bit-field extraction
 969               in BLKmode to handle unaligned memory references and to shift the
 970               last chunk right on big-endian machines if need be.  */
 971           rtx value_word
 972             = fieldmode == BLKmode
 973               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 974                                    1, NULL_RTX, word_mode, word_mode, false,
 975                                    NULL)
 976               : operand_subword_force (value, wordnum, value_mode);
 977
 978           if (!store_bit_field_1 (op0, new_bitsize,
 979                                   bitnum + bit_offset,
 980                                   bitregion_start, bitregion_end,
 981                                   word_mode,
 982                                   value_word, reverse, fallback_p))
 983             {
 984               delete_insns_since (last);
 985               return false;
 986             }
 987         }
 988       return true;
 989     }
 990
 991   /* If VALUE has a floating-point or complex mode, access it as an
 992      integer of the corresponding size.  This can occur on a machine
 993      with 64 bit registers that uses SFmode for float.  It can also
 994      occur for unaligned float or complex fields.  */
 995   rtx orig_value = value;
 996   scalar_int_mode value_mode;
 997   if (GET_MODE (value) == VOIDmode)
 998     /* By this point we've dealt with values that are bigger than a word,
 999        so word_mode is a conservatively correct choice.  */
1000     value_mode = word_mode;
1001   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1002     {
1003       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1004       value = gen_reg_rtx (value_mode);
1005       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1006     }
1007
1008   /* If OP0 is a multi-word register, narrow it to the affected word.
1009      If the region spans two words, defer to store_split_bit_field.
1010      Don't do this if op0 is a single hard register wider than word
1011      such as a float or vector register.  */
1012   if (!MEM_P (op0)
1013       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1014       && (!REG_P (op0)
1015           || !HARD_REGISTER_P (op0)
1016           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1017     {
1018       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1019         {
1020           if (!fallback_p)
1021             return false;
1022
1023           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1024                                  bitregion_start, bitregion_end,
1025                                  value, value_mode, reverse);
1026           return true;
1027         }
1028       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1029                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1030       gcc_assert (op0);
1031       op0_mode = word_mode;
1032       bitnum %= BITS_PER_WORD;
1033     }
1034
1035   /* From here on we can assume that the field to be stored in fits
1036      within a word.  If the destination is a register, it too fits
1037      in a word.  */
1038
1039   extraction_insn insv;
1040   if (!MEM_P (op0)
1041       && !reverse
1042       && get_best_reg_extraction_insn (&insv, EP_insv,
1043                                        GET_MODE_BITSIZE (op0_mode.require ()),
1044                                        fieldmode)
1045       && store_bit_field_using_insv (&insv, op0, op0_mode,
1046                                      bitsize, bitnum, value, value_mode))
1047     return true;
1048
1049   /* If OP0 is a memory, try copying it to a register and seeing if a
1050      cheap register alternative is available.  */
1051   if (MEM_P (op0) && !reverse)
1052     {
1053       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1054                                         fieldmode)
1055           && store_bit_field_using_insv (&insv, op0, op0_mode,
1056                                          bitsize, bitnum, value, value_mode))
1057         return true;
1058
1059       rtx_insn *last = get_last_insn ();
1060
1061       /* Try loading part of OP0 into a register, inserting the bitfield
1062          into that, and then copying the result back to OP0.  */
1063       unsigned HOST_WIDE_INT bitpos;
1064       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1065                                                bitregion_start, bitregion_end,
1066                                                fieldmode, &bitpos);
1067       if (xop0)
1068         {
1069           rtx tempreg = copy_to_reg (xop0);
1070           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1071                                  bitregion_start, bitregion_end,
1072                                  fieldmode, orig_value, reverse, false))
1073             {
1074               emit_move_insn (xop0, tempreg);
1075               return true;
1076             }
1077           delete_insns_since (last);
1078         }
1079     }
1080
1081   if (!fallback_p)
1082     return false;
1083
1084   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1085                          bitregion_end, value, value_mode, reverse);
1086   return true;
1087 }
1088
1089 /* Generate code to store value from rtx VALUE
1090    into a bit-field within structure STR_RTX
1091    containing BITSIZE bits starting at bit BITNUM.
1092
1093    BITREGION_START is bitpos of the first bitfield in this region.
1094    BITREGION_END is the bitpos of the ending bitfield in this region.
1095    These two fields are 0, if the C++ memory model does not apply,
1096    or we are not interested in keeping track of bitfield regions.
1097
1098    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1099
1100    If REVERSE is true, the store is to be done in reverse order.  */
1101
1102 void
1103 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1104                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1105                  machine_mode fieldmode,
1106                  rtx value, bool reverse)
1107 {
1108   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1109   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1110   scalar_int_mode int_mode;
1111   if (bitsize.is_constant (&ibitsize)
1112       && bitnum.is_constant (&ibitnum)
1113       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1114       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1115                                      bitregion_start, bitregion_end))
1116     {
1117       /* Storing of a full word can be done with a simple store.
1118          We know here that the field can be accessed with one single
1119          instruction.  For targets that support unaligned memory,
1120          an unaligned access may be necessary.  */
1121       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1122         {
1123           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1124                                              ibitnum / BITS_PER_UNIT);
1125           if (reverse)
1126             value = flip_storage_order (int_mode, value);
1127           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1128           emit_move_insn (str_rtx, value);
1129         }
1130       else
1131         {
1132           rtx temp;
1133
1134           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1135                                           ibitnum, &ibitnum);
1136           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1137           temp = copy_to_reg (str_rtx);
1138           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1139                                   int_mode, value, reverse, true))
1140             gcc_unreachable ();
1141
1142           emit_move_insn (str_rtx, temp);
1143         }
1144
1145       return;
1146     }
1147
1148   /* Under the C++0x memory model, we must not touch bits outside the
1149      bit region.  Adjust the address to start at the beginning of the
1150      bit region.  */
1151   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1152     {
1153       scalar_int_mode best_mode;
1154       machine_mode addr_mode = VOIDmode;
1155
1156       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1157       bitnum -= bitregion_start;
1158       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1159       bitregion_end -= bitregion_start;
1160       bitregion_start = 0;
1161       if (bitsize.is_constant (&ibitsize)
1162           && bitnum.is_constant (&ibitnum)
1163           && get_best_mode (ibitsize, ibitnum,
1164                             bitregion_start, bitregion_end,
1165                             MEM_ALIGN (str_rtx), INT_MAX,
1166                             MEM_VOLATILE_P (str_rtx), &best_mode))
1167         addr_mode = best_mode;
1168       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1169                                               offset, size);
1170     }
1171
1172   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1173                           bitregion_start, bitregion_end,
1174                           fieldmode, value, reverse, true))
1175     gcc_unreachable ();
1176 }
1177 \f
1178 /* Use shifts and boolean operations to store VALUE into a bit field of
1179    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1180    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1181    the mode of VALUE.
1182
1183    If REVERSE is true, the store is to be done in reverse order.  */
1184
1185 static void
1186 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1187                        unsigned HOST_WIDE_INT bitsize,
1188                        unsigned HOST_WIDE_INT bitnum,
1189                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1190                        rtx value, scalar_int_mode value_mode, bool reverse)
1191 {
1192   /* There is a case not handled here:
1193      a structure with a known alignment of just a halfword
1194      and a field split across two aligned halfwords within the structure.
1195      Or likewise a structure with a known alignment of just a byte
1196      and a field split across two bytes.
1197      Such cases are not supposed to be able to occur.  */
1198
1199   scalar_int_mode best_mode;
1200   if (MEM_P (op0))
1201     {
1202       unsigned int max_bitsize = BITS_PER_WORD;
1203       scalar_int_mode imode;
1204       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1205         max_bitsize = GET_MODE_BITSIZE (imode);
1206
1207       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1208                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1209                           &best_mode))
1210         {
1211           /* The only way this should occur is if the field spans word
1212              boundaries.  */
1213           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1214                                  bitregion_start, bitregion_end,
1215                                  value, value_mode, reverse);
1216           return;
1217         }
1218
1219       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1220     }
1221   else
1222     best_mode = op0_mode.require ();
1223
1224   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1225                            value, value_mode, reverse);
1226 }
1227
1228 /* Helper function for store_fixed_bit_field, stores
1229    the bit field always using MODE, which is the mode of OP0.  The other
1230    arguments are as for store_fixed_bit_field.  */
1231
1232 static void
1233 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1234                          unsigned HOST_WIDE_INT bitsize,
1235                          unsigned HOST_WIDE_INT bitnum,
1236                          rtx value, scalar_int_mode value_mode, bool reverse)
1237 {
1238   rtx temp;
1239   int all_zero = 0;
1240   int all_one = 0;
1241
1242   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1243      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1244
1245   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1246     /* BITNUM is the distance between our msb
1247        and that of the containing datum.
1248        Convert it to the distance from the lsb.  */
1249     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1250
1251   /* Now BITNUM is always the distance between our lsb
1252      and that of OP0.  */
1253
1254   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1255      we must first convert its mode to MODE.  */
1256
1257   if (CONST_INT_P (value))
1258     {
1259       unsigned HOST_WIDE_INT v = UINTVAL (value);
1260
1261       if (bitsize < HOST_BITS_PER_WIDE_INT)
1262         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1263
1264       if (v == 0)
1265         all_zero = 1;
1266       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1267                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1268                || (bitsize == HOST_BITS_PER_WIDE_INT
1269                    && v == HOST_WIDE_INT_M1U))
1270         all_one = 1;
1271
1272       value = lshift_value (mode, v, bitnum);
1273     }
1274   else
1275     {
1276       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1277                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1278
1279       if (value_mode != mode)
1280         value = convert_to_mode (mode, value, 1);
1281
1282       if (must_and)
1283         value = expand_binop (mode, and_optab, value,
1284                               mask_rtx (mode, 0, bitsize, 0),
1285                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1286       if (bitnum > 0)
1287         value = expand_shift (LSHIFT_EXPR, mode, value,
1288                               bitnum, NULL_RTX, 1);
1289     }
1290
1291   if (reverse)
1292     value = flip_storage_order (mode, value);
1293
1294   /* Now clear the chosen bits in OP0,
1295      except that if VALUE is -1 we need not bother.  */
1296   /* We keep the intermediates in registers to allow CSE to combine
1297      consecutive bitfield assignments.  */
1298
1299   temp = force_reg (mode, op0);
1300
1301   if (! all_one)
1302     {
1303       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1304       if (reverse)
1305         mask = flip_storage_order (mode, mask);
1306       temp = expand_binop (mode, and_optab, temp, mask,
1307                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1308       temp = force_reg (mode, temp);
1309     }
1310
1311   /* Now logical-or VALUE into OP0, unless it is zero.  */
1312
1313   if (! all_zero)
1314     {
1315       temp = expand_binop (mode, ior_optab, temp, value,
1316                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1317       temp = force_reg (mode, temp);
1318     }
1319
1320   if (op0 != temp)
1321     {
1322       op0 = copy_rtx (op0);
1323       emit_move_insn (op0, temp);
1324     }
1325 }
1326 \f
1327 /* Store a bit field that is split across multiple accessible memory objects.
1328
1329    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1330    BITSIZE is the field width; BITPOS the position of its first bit
1331    (within the word).
1332    VALUE is the value to store, which has mode VALUE_MODE.
1333    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1334    a BLKmode MEM.
1335
1336    If REVERSE is true, the store is to be done in reverse order.
1337
1338    This does not yet handle fields wider than BITS_PER_WORD.  */
1339
1340 static void
1341 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1342                        unsigned HOST_WIDE_INT bitsize,
1343                        unsigned HOST_WIDE_INT bitpos,
1344                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1345                        rtx value, scalar_int_mode value_mode, bool reverse)
1346 {
1347   unsigned int unit, total_bits, bitsdone = 0;
1348
1349   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1350      much at a time.  */
1351   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1352     unit = BITS_PER_WORD;
1353   else
1354     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1355
1356   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1357      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1358      again, and we will mutually recurse forever.  */
1359   if (MEM_P (op0) && op0_mode.exists ())
1360     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1361
1362   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1363      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1364      that VALUE might be a floating-point constant.  */
1365   if (CONSTANT_P (value) && !CONST_INT_P (value))
1366     {
1367       rtx word = gen_lowpart_common (word_mode, value);
1368
1369       if (word && (value != word))
1370         value = word;
1371       else
1372         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1373       value_mode = word_mode;
1374     }
1375
1376   total_bits = GET_MODE_BITSIZE (value_mode);
1377
1378   while (bitsdone < bitsize)
1379     {
1380       unsigned HOST_WIDE_INT thissize;
1381       unsigned HOST_WIDE_INT thispos;
1382       unsigned HOST_WIDE_INT offset;
1383       rtx part;
1384
1385       offset = (bitpos + bitsdone) / unit;
1386       thispos = (bitpos + bitsdone) % unit;
1387
1388       /* When region of bytes we can touch is restricted, decrease
1389          UNIT close to the end of the region as needed.  If op0 is a REG
1390          or SUBREG of REG, don't do this, as there can't be data races
1391          on a register and we can expand shorter code in some cases.  */
1392       if (maybe_ne (bitregion_end, 0U)
1393           && unit > BITS_PER_UNIT
1394           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1395           && !REG_P (op0)
1396           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1397         {
1398           unit = unit / 2;
1399           continue;
1400         }
1401
1402       /* THISSIZE must not overrun a word boundary.  Otherwise,
1403          store_fixed_bit_field will call us again, and we will mutually
1404          recurse forever.  */
1405       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1406       thissize = MIN (thissize, unit - thispos);
1407
1408       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1409         {
1410           /* Fetch successively less significant portions.  */
1411           if (CONST_INT_P (value))
1412             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1413                              >> (bitsize - bitsdone - thissize))
1414                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1415           /* Likewise, but the source is little-endian.  */
1416           else if (reverse)
1417             part = extract_fixed_bit_field (word_mode, value, value_mode,
1418                                             thissize,
1419                                             bitsize - bitsdone - thissize,
1420                                             NULL_RTX, 1, false);
1421           else
1422             /* The args are chosen so that the last part includes the
1423                lsb.  Give extract_bit_field the value it needs (with
1424                endianness compensation) to fetch the piece we want.  */
1425             part = extract_fixed_bit_field (word_mode, value, value_mode,
1426                                             thissize,
1427                                             total_bits - bitsize + bitsdone,
1428                                             NULL_RTX, 1, false);
1429         }
1430       else
1431         {
1432           /* Fetch successively more significant portions.  */
1433           if (CONST_INT_P (value))
1434             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1435                              >> bitsdone)
1436                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1437           /* Likewise, but the source is big-endian.  */
1438           else if (reverse)
1439             part = extract_fixed_bit_field (word_mode, value, value_mode,
1440                                             thissize,
1441                                             total_bits - bitsdone - thissize,
1442                                             NULL_RTX, 1, false);
1443           else
1444             part = extract_fixed_bit_field (word_mode, value, value_mode,
1445                                             thissize, bitsdone, NULL_RTX,
1446                                             1, false);
1447         }
1448
1449       /* If OP0 is a register, then handle OFFSET here.  */
1450       rtx op0_piece = op0;
1451       opt_scalar_int_mode op0_piece_mode = op0_mode;
1452       if (SUBREG_P (op0) || REG_P (op0))
1453         {
1454           scalar_int_mode imode;
1455           if (op0_mode.exists (&imode)
1456               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1457             {
1458               if (offset)
1459                 op0_piece = const0_rtx;
1460             }
1461           else
1462             {
1463               op0_piece = operand_subword_force (op0,
1464                                                  offset * unit / BITS_PER_WORD,
1465                                                  GET_MODE (op0));
1466               op0_piece_mode = word_mode;
1467             }
1468           offset &= BITS_PER_WORD / unit - 1;
1469         }
1470
1471       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1472          it is just an out-of-bounds access.  Ignore it.  */
1473       if (op0_piece != const0_rtx)
1474         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1475                                offset * unit + thispos, bitregion_start,
1476                                bitregion_end, part, word_mode, reverse);
1477       bitsdone += thissize;
1478     }
1479 }
1480 \f
1481 /* A subroutine of extract_bit_field_1 that converts return value X
1482    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1483    to extract_bit_field.  */
1484
1485 static rtx
1486 convert_extracted_bit_field (rtx x, machine_mode mode,
1487                              machine_mode tmode, bool unsignedp)
1488 {
1489   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1490     return x;
1491
1492   /* If the x mode is not a scalar integral, first convert to the
1493      integer mode of that size and then access it as a floating-point
1494      value via a SUBREG.  */
1495   if (!SCALAR_INT_MODE_P (tmode))
1496     {
1497       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1498       x = convert_to_mode (int_mode, x, unsignedp);
1499       x = force_reg (int_mode, x);
1500       return gen_lowpart (tmode, x);
1501     }
1502
1503   return convert_to_mode (tmode, x, unsignedp);
1504 }
1505
1506 /* Try to use an ext(z)v pattern to extract a field from OP0.
1507    Return the extracted value on success, otherwise return null.
1508    EXTV describes the extraction instruction to use.  If OP0_MODE
1509    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1510    The other arguments are as for extract_bit_field.  */
1511
1512 static rtx
1513 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1514                               opt_scalar_int_mode op0_mode,
1515                               unsigned HOST_WIDE_INT bitsize,
1516                               unsigned HOST_WIDE_INT bitnum,
1517                               int unsignedp, rtx target,
1518                               machine_mode mode, machine_mode tmode)
1519 {
1520   class expand_operand ops[4];
1521   rtx spec_target = target;
1522   rtx spec_target_subreg = 0;
1523   scalar_int_mode ext_mode = extv->field_mode;
1524   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1525
1526   if (bitsize == 0 || unit < bitsize)
1527     return NULL_RTX;
1528
1529   if (MEM_P (op0))
1530     /* Get a reference to the first byte of the field.  */
1531     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1532                                 &bitnum);
1533   else
1534     {
1535       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1536       if (BYTES_BIG_ENDIAN)
1537         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1538
1539       /* If op0 is a register, we need it in EXT_MODE to make it
1540          acceptable to the format of ext(z)v.  */
1541       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1542         return NULL_RTX;
1543       if (REG_P (op0) && op0_mode.require () != ext_mode)
1544         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1545     }
1546
1547   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1548      "backwards" from the size of the unit we are extracting from.
1549      Otherwise, we count bits from the most significant on a
1550      BYTES/BITS_BIG_ENDIAN machine.  */
1551
1552   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1553     bitnum = unit - bitsize - bitnum;
1554
1555   if (target == 0)
1556     target = spec_target = gen_reg_rtx (tmode);
1557
1558   if (GET_MODE (target) != ext_mode)
1559     {
1560       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1561          between the mode of the extraction (word_mode) and the target
1562          mode.  Instead, create a temporary and use convert_move to set
1563          the target.  */
1564       if (REG_P (target)
1565           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1566         {
1567           target = gen_lowpart (ext_mode, target);
1568           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1569             spec_target_subreg = target;
1570         }
1571       else
1572         target = gen_reg_rtx (ext_mode);
1573     }
1574
1575   create_output_operand (&ops[0], target, ext_mode);
1576   create_fixed_operand (&ops[1], op0);
1577   create_integer_operand (&ops[2], bitsize);
1578   create_integer_operand (&ops[3], bitnum);
1579   if (maybe_expand_insn (extv->icode, 4, ops))
1580     {
1581       target = ops[0].value;
1582       if (target == spec_target)
1583         return target;
1584       if (target == spec_target_subreg)
1585         return spec_target;
1586       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1587     }
1588   return NULL_RTX;
1589 }
1590
1591 /* See whether it would be valid to extract the part of OP0 described
1592    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1593    operation.  Return the subreg if so, otherwise return null.  */
1594
1595 static rtx
1596 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1597                              poly_uint64 bitsize, poly_uint64 bitnum)
1598 {
1599   poly_uint64 bytenum;
1600   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1601       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1602       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1603       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1604     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1605   return NULL_RTX;
1606 }
1607
1608 /* A subroutine of extract_bit_field, with the same arguments.
1609    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1610    if we can find no other means of implementing the operation.
1611    if FALLBACK_P is false, return NULL instead.  */
1612
1613 static rtx
1614 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1615                      int unsignedp, rtx target, machine_mode mode,
1616                      machine_mode tmode, bool reverse, bool fallback_p,
1617                      rtx *alt_rtl)
1618 {
1619   rtx op0 = str_rtx;
1620   machine_mode mode1;
1621
1622   if (tmode == VOIDmode)
1623     tmode = mode;
1624
1625   while (GET_CODE (op0) == SUBREG)
1626     {
1627       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1628       op0 = SUBREG_REG (op0);
1629     }
1630
1631   /* If we have an out-of-bounds access to a register, just return an
1632      uninitialized register of the required mode.  This can occur if the
1633      source code contains an out-of-bounds access to a small array.  */
1634   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1635     return gen_reg_rtx (tmode);
1636
1637   if (REG_P (op0)
1638       && mode == GET_MODE (op0)
1639       && known_eq (bitnum, 0U)
1640       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1641     {
1642       if (reverse)
1643         op0 = flip_storage_order (mode, op0);
1644       /* We're trying to extract a full register from itself.  */
1645       return op0;
1646     }
1647
1648   /* First try to check for vector from vector extractions.  */
1649   if (VECTOR_MODE_P (GET_MODE (op0))
1650       && !MEM_P (op0)
1651       && VECTOR_MODE_P (tmode)
1652       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1653       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1654     {
1655       machine_mode new_mode = GET_MODE (op0);
1656       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1657         {
1658           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1659           poly_uint64 nunits;
1660           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1661                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1662               || !related_vector_mode (tmode, inner_mode,
1663                                        nunits).exists (&new_mode)
1664               || maybe_ne (GET_MODE_SIZE (new_mode),
1665                            GET_MODE_SIZE (GET_MODE (op0))))
1666             new_mode = VOIDmode;
1667         }
1668       poly_uint64 pos;
1669       if (new_mode != VOIDmode
1670           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1671               != CODE_FOR_nothing)
1672           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1673         {
1674           class expand_operand ops[3];
1675           machine_mode outermode = new_mode;
1676           machine_mode innermode = tmode;
1677           enum insn_code icode
1678             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1679
1680           if (new_mode != GET_MODE (op0))
1681             op0 = gen_lowpart (new_mode, op0);
1682           create_output_operand (&ops[0], target, innermode);
1683           ops[0].target = 1;
1684           create_input_operand (&ops[1], op0, outermode);
1685           create_integer_operand (&ops[2], pos);
1686           if (maybe_expand_insn (icode, 3, ops))
1687             {
1688               if (alt_rtl && ops[0].target)
1689                 *alt_rtl = target;
1690               target = ops[0].value;
1691               if (GET_MODE (target) != mode)
1692                 return gen_lowpart (tmode, target);
1693               return target;
1694             }
1695         }
1696     }
1697
1698   /* See if we can get a better vector mode before extracting.  */
1699   if (VECTOR_MODE_P (GET_MODE (op0))
1700       && !MEM_P (op0)
1701       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1702     {
1703       machine_mode new_mode;
1704
1705       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1706         new_mode = MIN_MODE_VECTOR_FLOAT;
1707       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1708         new_mode = MIN_MODE_VECTOR_FRACT;
1709       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1710         new_mode = MIN_MODE_VECTOR_UFRACT;
1711       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1712         new_mode = MIN_MODE_VECTOR_ACCUM;
1713       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1714         new_mode = MIN_MODE_VECTOR_UACCUM;
1715       else
1716         new_mode = MIN_MODE_VECTOR_INT;
1717
1718       FOR_EACH_MODE_FROM (new_mode, new_mode)
1719         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1720             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1721             && targetm.vector_mode_supported_p (new_mode))
1722           break;
1723       if (new_mode != VOIDmode)
1724         op0 = gen_lowpart (new_mode, op0);
1725     }
1726
1727   /* Use vec_extract patterns for extracting parts of vectors whenever
1728      available.  If that fails, see whether the current modes and bitregion
1729      give a natural subreg.  */
1730   machine_mode outermode = GET_MODE (op0);
1731   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1732     {
1733       scalar_mode innermode = GET_MODE_INNER (outermode);
1734       enum insn_code icode
1735         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1736       poly_uint64 pos;
1737       if (icode != CODE_FOR_nothing
1738           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1739           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1740         {
1741           class expand_operand ops[3];
1742
1743           create_output_operand (&ops[0], target, innermode);
1744           ops[0].target = 1;
1745           create_input_operand (&ops[1], op0, outermode);
1746           create_integer_operand (&ops[2], pos);
1747           if (maybe_expand_insn (icode, 3, ops))
1748             {
1749               if (alt_rtl && ops[0].target)
1750                 *alt_rtl = target;
1751               target = ops[0].value;
1752               if (GET_MODE (target) != mode)
1753                 return gen_lowpart (tmode, target);
1754               return target;
1755             }
1756         }
1757       /* Using subregs is useful if we're extracting one register vector
1758          from a multi-register vector.  extract_bit_field_as_subreg checks
1759          for valid bitsize and bitnum, so we don't need to do that here.  */
1760       if (VECTOR_MODE_P (mode))
1761         {
1762           rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1763           if (sub)
1764             return sub;
1765         }
1766     }
1767
1768   /* Make sure we are playing with integral modes.  Pun with subregs
1769      if we aren't.  */
1770   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1771   scalar_int_mode imode;
1772   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1773     {
1774       if (MEM_P (op0))
1775         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1776                                             0, MEM_SIZE (op0));
1777       else if (op0_mode.exists (&imode))
1778         {
1779           op0 = gen_lowpart (imode, op0);
1780
1781           /* If we got a SUBREG, force it into a register since we
1782              aren't going to be able to do another SUBREG on it.  */
1783           if (GET_CODE (op0) == SUBREG)
1784             op0 = force_reg (imode, op0);
1785         }
1786       else
1787         {
1788           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1789           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1790           emit_move_insn (mem, op0);
1791           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1792         }
1793     }
1794
1795   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1796      If that's wrong, the solution is to test for it and set TARGET to 0
1797      if needed.  */
1798
1799   /* Get the mode of the field to use for atomic access or subreg
1800      conversion.  */
1801   if (!SCALAR_INT_MODE_P (tmode)
1802       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1803     mode1 = mode;
1804   gcc_assert (mode1 != BLKmode);
1805
1806   /* Extraction of a full MODE1 value can be done with a subreg as long
1807      as the least significant bit of the value is the least significant
1808      bit of either OP0 or a word of OP0.  */
1809   if (!MEM_P (op0) && !reverse)
1810     {
1811       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1812       if (sub)
1813         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1814     }
1815
1816   /* Extraction of a full MODE1 value can be done with a load as long as
1817      the field is on a byte boundary and is sufficiently aligned.  */
1818   poly_uint64 bytenum;
1819   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1820     {
1821       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1822       if (reverse)
1823         op0 = flip_storage_order (mode1, op0);
1824       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1825     }
1826
1827   /* If we have a memory source and a non-constant bit offset, restrict
1828      the memory to the referenced bytes.  This is a worst-case fallback
1829      but is useful for things like vector booleans.  */
1830   if (MEM_P (op0) && !bitnum.is_constant ())
1831     {
1832       bytenum = bits_to_bytes_round_down (bitnum);
1833       bitnum = num_trailing_bits (bitnum);
1834       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1835       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1836       op0_mode = opt_scalar_int_mode ();
1837     }
1838
1839   /* It's possible we'll need to handle other cases here for
1840      polynomial bitnum and bitsize.  */
1841
1842   /* From here on we need to be looking at a fixed-size insertion.  */
1843   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1844                                      bitnum.to_constant (), unsignedp,
1845                                      target, mode, tmode, reverse, fallback_p);
1846 }
1847
1848 /* Subroutine of extract_bit_field_1, with the same arguments, except
1849    that BITSIZE and BITNUM are constant.  Handle cases specific to
1850    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1851    otherwise OP0 is a BLKmode MEM.  */
1852
1853 static rtx
1854 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1855                             unsigned HOST_WIDE_INT bitsize,
1856                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1857                             rtx target, machine_mode mode, machine_mode tmode,
1858                             bool reverse, bool fallback_p)
1859 {
1860   /* Handle fields bigger than a word.  */
1861
1862   if (bitsize > BITS_PER_WORD)
1863     {
1864       /* Here we transfer the words of the field
1865          in the order least significant first.
1866          This is because the most significant word is the one which may
1867          be less than full.  */
1868
1869       const bool backwards = WORDS_BIG_ENDIAN;
1870       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1871       unsigned int i;
1872       rtx_insn *last;
1873
1874       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1875         target = gen_reg_rtx (mode);
1876
1877       /* In case we're about to clobber a base register or something
1878          (see gcc.c-torture/execute/20040625-1.c).   */
1879       if (reg_mentioned_p (target, op0))
1880         target = gen_reg_rtx (mode);
1881
1882       /* Indicate for flow that the entire target reg is being set.  */
1883       emit_clobber (target);
1884
1885       /* The mode must be fixed-size, since extract_bit_field_1 handles
1886          extractions from variable-sized objects before calling this
1887          function.  */
1888       unsigned int target_size
1889         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1890       last = get_last_insn ();
1891       for (i = 0; i < nwords; i++)
1892         {
1893           /* If I is 0, use the low-order word in both field and target;
1894              if I is 1, use the next to lowest word; and so on.  */
1895           /* Word number in TARGET to use.  */
1896           unsigned int wordnum
1897             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1898           /* Offset from start of field in OP0.  */
1899           unsigned int bit_offset = (backwards ^ reverse
1900                                      ? MAX ((int) bitsize - ((int) i + 1)
1901                                             * BITS_PER_WORD,
1902                                             0)
1903                                      : (int) i * BITS_PER_WORD);
1904           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1905           rtx result_part
1906             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1907                                              bitsize - i * BITS_PER_WORD),
1908                                    bitnum + bit_offset, 1, target_part,
1909                                    mode, word_mode, reverse, fallback_p, NULL);
1910
1911           gcc_assert (target_part);
1912           if (!result_part)
1913             {
1914               delete_insns_since (last);
1915               return NULL;
1916             }
1917
1918           if (result_part != target_part)
1919             emit_move_insn (target_part, result_part);
1920         }
1921
1922       if (unsignedp)
1923         {
1924           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1925              need to be zero'd out.  */
1926           if (target_size > nwords * UNITS_PER_WORD)
1927             {
1928               unsigned int i, total_words;
1929
1930               total_words = target_size / UNITS_PER_WORD;
1931               for (i = nwords; i < total_words; i++)
1932                 emit_move_insn
1933                   (operand_subword (target,
1934                                     backwards ? total_words - i - 1 : i,
1935                                     1, VOIDmode),
1936                    const0_rtx);
1937             }
1938           return target;
1939         }
1940
1941       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1942       target = expand_shift (LSHIFT_EXPR, mode, target,
1943                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1944       return expand_shift (RSHIFT_EXPR, mode, target,
1945                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1946     }
1947
1948   /* If OP0 is a multi-word register, narrow it to the affected word.
1949      If the region spans two words, defer to extract_split_bit_field.  */
1950   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1951     {
1952       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1953         {
1954           if (!fallback_p)
1955             return NULL_RTX;
1956           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1957                                             unsignedp, reverse);
1958           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1959         }
1960       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1961                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1962       op0_mode = word_mode;
1963       bitnum %= BITS_PER_WORD;
1964     }
1965
1966   /* From here on we know the desired field is smaller than a word.
1967      If OP0 is a register, it too fits within a word.  */
1968   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1969   extraction_insn extv;
1970   if (!MEM_P (op0)
1971       && !reverse
1972       /* ??? We could limit the structure size to the part of OP0 that
1973          contains the field, with appropriate checks for endianness
1974          and TARGET_TRULY_NOOP_TRUNCATION.  */
1975       && get_best_reg_extraction_insn (&extv, pattern,
1976                                        GET_MODE_BITSIZE (op0_mode.require ()),
1977                                        tmode))
1978     {
1979       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1980                                                  bitsize, bitnum,
1981                                                  unsignedp, target, mode,
1982                                                  tmode);
1983       if (result)
1984         return result;
1985     }
1986
1987   /* If OP0 is a memory, try copying it to a register and seeing if a
1988      cheap register alternative is available.  */
1989   if (MEM_P (op0) & !reverse)
1990     {
1991       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1992                                         tmode))
1993         {
1994           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1995                                                      bitsize, bitnum,
1996                                                      unsignedp, target, mode,
1997                                                      tmode);
1998           if (result)
1999             return result;
2000         }
2001
2002       rtx_insn *last = get_last_insn ();
2003
2004       /* Try loading part of OP0 into a register and extracting the
2005          bitfield from that.  */
2006       unsigned HOST_WIDE_INT bitpos;
2007       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2008                                                0, 0, tmode, &bitpos);
2009       if (xop0)
2010         {
2011           xop0 = copy_to_reg (xop0);
2012           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2013                                             unsignedp, target,
2014                                             mode, tmode, reverse, false, NULL);
2015           if (result)
2016             return result;
2017           delete_insns_since (last);
2018         }
2019     }
2020
2021   if (!fallback_p)
2022     return NULL;
2023
2024   /* Find a correspondingly-sized integer field, so we can apply
2025      shifts and masks to it.  */
2026   scalar_int_mode int_mode;
2027   if (!int_mode_for_mode (tmode).exists (&int_mode))
2028     /* If this fails, we should probably push op0 out to memory and then
2029        do a load.  */
2030     int_mode = int_mode_for_mode (mode).require ();
2031
2032   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2033                                     bitnum, target, unsignedp, reverse);
2034
2035   /* Complex values must be reversed piecewise, so we need to undo the global
2036      reversal, convert to the complex mode and reverse again.  */
2037   if (reverse && COMPLEX_MODE_P (tmode))
2038     {
2039       target = flip_storage_order (int_mode, target);
2040       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2041       target = flip_storage_order (tmode, target);
2042     }
2043   else
2044     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2045
2046   return target;
2047 }
2048
2049 /* Generate code to extract a byte-field from STR_RTX
2050    containing BITSIZE bits, starting at BITNUM,
2051    and put it in TARGET if possible (if TARGET is nonzero).
2052    Regardless of TARGET, we return the rtx for where the value is placed.
2053
2054    STR_RTX is the structure containing the byte (a REG or MEM).
2055    UNSIGNEDP is nonzero if this is an unsigned bit field.
2056    MODE is the natural mode of the field value once extracted.
2057    TMODE is the mode the caller would like the value to have;
2058    but the value may be returned with type MODE instead.
2059
2060    If REVERSE is true, the extraction is to be done in reverse order.
2061
2062    If a TARGET is specified and we can store in it at no extra cost,
2063    we do so, and return TARGET.
2064    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2065    if they are equally easy.
2066
2067    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2068    then *ALT_RTL is set to TARGET (before legitimziation).  */
2069
2070 rtx
2071 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2072                    int unsignedp, rtx target, machine_mode mode,
2073                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2074 {
2075   machine_mode mode1;
2076
2077   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2078   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2079     mode1 = GET_MODE (str_rtx);
2080   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2081     mode1 = GET_MODE (target);
2082   else
2083     mode1 = tmode;
2084
2085   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2086   scalar_int_mode int_mode;
2087   if (bitsize.is_constant (&ibitsize)
2088       && bitnum.is_constant (&ibitnum)
2089       && is_a <scalar_int_mode> (mode1, &int_mode)
2090       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2091                                      int_mode, 0, 0))
2092     {
2093       /* Extraction of a full INT_MODE value can be done with a simple load.
2094          We know here that the field can be accessed with one single
2095          instruction.  For targets that support unaligned memory,
2096          an unaligned access may be necessary.  */
2097       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2098         {
2099           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2100                                                 ibitnum / BITS_PER_UNIT);
2101           if (reverse)
2102             result = flip_storage_order (int_mode, result);
2103           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2104           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2105         }
2106
2107       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2108                                       &ibitnum);
2109       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2110       str_rtx = copy_to_reg (str_rtx);
2111       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2112                                   target, mode, tmode, reverse, true, alt_rtl);
2113     }
2114
2115   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2116                               target, mode, tmode, reverse, true, alt_rtl);
2117 }
2118 \f
2119 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2120    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2121    otherwise OP0 is a BLKmode MEM.
2122
2123    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2124    If REVERSE is true, the extraction is to be done in reverse order.
2125
2126    If TARGET is nonzero, attempts to store the value there
2127    and return TARGET, but this is not guaranteed.
2128    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2129
2130 static rtx
2131 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2132                          opt_scalar_int_mode op0_mode,
2133                          unsigned HOST_WIDE_INT bitsize,
2134                          unsigned HOST_WIDE_INT bitnum, rtx target,
2135                          int unsignedp, bool reverse)
2136 {
2137   scalar_int_mode mode;
2138   if (MEM_P (op0))
2139     {
2140       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2141                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2142         /* The only way this should occur is if the field spans word
2143            boundaries.  */
2144         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2145                                         unsignedp, reverse);
2146
2147       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2148     }
2149   else
2150     mode = op0_mode.require ();
2151
2152   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2153                                     target, unsignedp, reverse);
2154 }
2155
2156 /* Helper function for extract_fixed_bit_field, extracts
2157    the bit field always using MODE, which is the mode of OP0.
2158    The other arguments are as for extract_fixed_bit_field.  */
2159
2160 static rtx
2161 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2162                            unsigned HOST_WIDE_INT bitsize,
2163                            unsigned HOST_WIDE_INT bitnum, rtx target,
2164                            int unsignedp, bool reverse)
2165 {
2166   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2167      for invalid input, such as extract equivalent of f5 from
2168      gcc.dg/pr48335-2.c.  */
2169
2170   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2171     /* BITNUM is the distance between our msb and that of OP0.
2172        Convert it to the distance from the lsb.  */
2173     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2174
2175   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2176      We have reduced the big-endian case to the little-endian case.  */
2177   if (reverse)
2178     op0 = flip_storage_order (mode, op0);
2179
2180   if (unsignedp)
2181     {
2182       if (bitnum)
2183         {
2184           /* If the field does not already start at the lsb,
2185              shift it so it does.  */
2186           /* Maybe propagate the target for the shift.  */
2187           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2188           if (tmode != mode)
2189             subtarget = 0;
2190           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2191         }
2192       /* Convert the value to the desired mode.  TMODE must also be a
2193          scalar integer for this conversion to make sense, since we
2194          shouldn't reinterpret the bits.  */
2195       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2196       if (mode != new_mode)
2197         op0 = convert_to_mode (new_mode, op0, 1);
2198
2199       /* Unless the msb of the field used to be the msb when we shifted,
2200          mask out the upper bits.  */
2201
2202       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2203         return expand_binop (new_mode, and_optab, op0,
2204                              mask_rtx (new_mode, 0, bitsize, 0),
2205                              target, 1, OPTAB_LIB_WIDEN);
2206       return op0;
2207     }
2208
2209   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2210      then arithmetic-shift its lsb to the lsb of the word.  */
2211   op0 = force_reg (mode, op0);
2212
2213   /* Find the narrowest integer mode that contains the field.  */
2214
2215   opt_scalar_int_mode mode_iter;
2216   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2217     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2218       break;
2219
2220   mode = mode_iter.require ();
2221   op0 = convert_to_mode (mode, op0, 0);
2222
2223   if (mode != tmode)
2224     target = 0;
2225
2226   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2227     {
2228       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2229       /* Maybe propagate the target for the shift.  */
2230       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2231       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2232     }
2233
2234   return expand_shift (RSHIFT_EXPR, mode, op0,
2235                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2236 }
2237
2238 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2239    VALUE << BITPOS.  */
2240
2241 static rtx
2242 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2243               int bitpos)
2244 {
2245   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2246 }
2247 \f
2248 /* Extract a bit field that is split across two words
2249    and return an RTX for the result.
2250
2251    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2252    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2253    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2254    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2255    a BLKmode MEM.
2256
2257    If REVERSE is true, the extraction is to be done in reverse order.  */
2258
2259 static rtx
2260 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2261                          unsigned HOST_WIDE_INT bitsize,
2262                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2263                          bool reverse)
2264 {
2265   unsigned int unit;
2266   unsigned int bitsdone = 0;
2267   rtx result = NULL_RTX;
2268   int first = 1;
2269
2270   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2271      much at a time.  */
2272   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2273     unit = BITS_PER_WORD;
2274   else
2275     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2276
2277   while (bitsdone < bitsize)
2278     {
2279       unsigned HOST_WIDE_INT thissize;
2280       rtx part;
2281       unsigned HOST_WIDE_INT thispos;
2282       unsigned HOST_WIDE_INT offset;
2283
2284       offset = (bitpos + bitsdone) / unit;
2285       thispos = (bitpos + bitsdone) % unit;
2286
2287       /* THISSIZE must not overrun a word boundary.  Otherwise,
2288          extract_fixed_bit_field will call us again, and we will mutually
2289          recurse forever.  */
2290       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2291       thissize = MIN (thissize, unit - thispos);
2292
2293       /* If OP0 is a register, then handle OFFSET here.  */
2294       rtx op0_piece = op0;
2295       opt_scalar_int_mode op0_piece_mode = op0_mode;
2296       if (SUBREG_P (op0) || REG_P (op0))
2297         {
2298           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2299           op0_piece_mode = word_mode;
2300           offset = 0;
2301         }
2302
2303       /* Extract the parts in bit-counting order,
2304          whose meaning is determined by BYTES_PER_UNIT.
2305          OFFSET is in UNITs, and UNIT is in bits.  */
2306       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2307                                       thissize, offset * unit + thispos,
2308                                       0, 1, reverse);
2309       bitsdone += thissize;
2310
2311       /* Shift this part into place for the result.  */
2312       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2313         {
2314           if (bitsize != bitsdone)
2315             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2316                                  bitsize - bitsdone, 0, 1);
2317         }
2318       else
2319         {
2320           if (bitsdone != thissize)
2321             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2322                                  bitsdone - thissize, 0, 1);
2323         }
2324
2325       if (first)
2326         result = part;
2327       else
2328         /* Combine the parts with bitwise or.  This works
2329            because we extracted each part as an unsigned bit field.  */
2330         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2331                                OPTAB_LIB_WIDEN);
2332
2333       first = 0;
2334     }
2335
2336   /* Unsigned bit field: we are done.  */
2337   if (unsignedp)
2338     return result;
2339   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2340   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2341                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2342   return expand_shift (RSHIFT_EXPR, word_mode, result,
2343                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2344 }
2345 \f
2346 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2347    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2348    MODE, fill the upper bits with zeros.  Fail if the layout of either
2349    mode is unknown (as for CC modes) or if the extraction would involve
2350    unprofitable mode punning.  Return the value on success, otherwise
2351    return null.
2352
2353    This is different from gen_lowpart* in these respects:
2354
2355      - the returned value must always be considered an rvalue
2356
2357      - when MODE is wider than SRC_MODE, the extraction involves
2358        a zero extension
2359
2360      - when MODE is smaller than SRC_MODE, the extraction involves
2361        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2362
2363    In other words, this routine performs a computation, whereas the
2364    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2365    operations.  */
2366
2367 rtx
2368 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2369 {
2370   scalar_int_mode int_mode, src_int_mode;
2371
2372   if (mode == src_mode)
2373     return src;
2374
2375   if (CONSTANT_P (src))
2376     {
2377       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2378          fails, it will happily create (subreg (symbol_ref)) or similar
2379          invalid SUBREGs.  */
2380       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2381       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2382       if (ret)
2383         return ret;
2384
2385       if (GET_MODE (src) == VOIDmode
2386           || !validate_subreg (mode, src_mode, src, byte))
2387         return NULL_RTX;
2388
2389       src = force_reg (GET_MODE (src), src);
2390       return gen_rtx_SUBREG (mode, src, byte);
2391     }
2392
2393   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2394     return NULL_RTX;
2395
2396   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2397       && targetm.modes_tieable_p (mode, src_mode))
2398     {
2399       rtx x = gen_lowpart_common (mode, src);
2400       if (x)
2401         return x;
2402     }
2403
2404   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2405       || !int_mode_for_mode (mode).exists (&int_mode))
2406     return NULL_RTX;
2407
2408   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2409     return NULL_RTX;
2410   if (!targetm.modes_tieable_p (int_mode, mode))
2411     return NULL_RTX;
2412
2413   src = gen_lowpart (src_int_mode, src);
2414   if (!validate_subreg (int_mode, src_int_mode, src,
2415                         subreg_lowpart_offset (int_mode, src_int_mode)))
2416     return NULL_RTX;
2417
2418   src = convert_modes (int_mode, src_int_mode, src, true);
2419   src = gen_lowpart (mode, src);
2420   return src;
2421 }
2422 \f
2423 /* Add INC into TARGET.  */
2424
2425 void
2426 expand_inc (rtx target, rtx inc)
2427 {
2428   rtx value = expand_binop (GET_MODE (target), add_optab,
2429                             target, inc,
2430                             target, 0, OPTAB_LIB_WIDEN);
2431   if (value != target)
2432     emit_move_insn (target, value);
2433 }
2434
2435 /* Subtract DEC from TARGET.  */
2436
2437 void
2438 expand_dec (rtx target, rtx dec)
2439 {
2440   rtx value = expand_binop (GET_MODE (target), sub_optab,
2441                             target, dec,
2442                             target, 0, OPTAB_LIB_WIDEN);
2443   if (value != target)
2444     emit_move_insn (target, value);
2445 }
2446 \f
2447 /* Output a shift instruction for expression code CODE,
2448    with SHIFTED being the rtx for the value to shift,
2449    and AMOUNT the rtx for the amount to shift by.
2450    Store the result in the rtx TARGET, if that is convenient.
2451    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2452    Return the rtx for where the value is.
2453    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2454    in which case 0 is returned.  */
2455
2456 static rtx
2457 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2458                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2459 {
2460   rtx op1, temp = 0;
2461   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2462   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2463   optab lshift_optab = ashl_optab;
2464   optab rshift_arith_optab = ashr_optab;
2465   optab rshift_uns_optab = lshr_optab;
2466   optab lrotate_optab = rotl_optab;
2467   optab rrotate_optab = rotr_optab;
2468   machine_mode op1_mode;
2469   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2470   int attempt;
2471   bool speed = optimize_insn_for_speed_p ();
2472
2473   op1 = amount;
2474   op1_mode = GET_MODE (op1);
2475
2476   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2477      shift amount is a vector, use the vector/vector shift patterns.  */
2478   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2479     {
2480       lshift_optab = vashl_optab;
2481       rshift_arith_optab = vashr_optab;
2482       rshift_uns_optab = vlshr_optab;
2483       lrotate_optab = vrotl_optab;
2484       rrotate_optab = vrotr_optab;
2485     }
2486
2487   /* Previously detected shift-counts computed by NEGATE_EXPR
2488      and shifted in the other direction; but that does not work
2489      on all machines.  */
2490
2491   if (SHIFT_COUNT_TRUNCATED)
2492     {
2493       if (CONST_INT_P (op1)
2494           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2495               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2496         op1 = gen_int_shift_amount (mode,
2497                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2498                                     % GET_MODE_BITSIZE (scalar_mode));
2499       else if (GET_CODE (op1) == SUBREG
2500                && subreg_lowpart_p (op1)
2501                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2502                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2503         op1 = SUBREG_REG (op1);
2504     }
2505
2506   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2507      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2508      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2509      amount instead.  */
2510   if (rotate
2511       && CONST_INT_P (op1)
2512       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2513                    GET_MODE_BITSIZE (scalar_mode) - 1))
2514     {
2515       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2516                                          - INTVAL (op1)));
2517       left = !left;
2518       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2519     }
2520
2521   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2522      Note that this is not the case for bigger values.  For instance a rotation
2523      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2524      0x04030201 (bswapsi).  */
2525   if (rotate
2526       && CONST_INT_P (op1)
2527       && INTVAL (op1) == BITS_PER_UNIT
2528       && GET_MODE_SIZE (scalar_mode) == 2
2529       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2530     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2531
2532   if (op1 == const0_rtx)
2533     return shifted;
2534
2535   /* Check whether its cheaper to implement a left shift by a constant
2536      bit count by a sequence of additions.  */
2537   if (code == LSHIFT_EXPR
2538       && CONST_INT_P (op1)
2539       && INTVAL (op1) > 0
2540       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2541       && INTVAL (op1) < MAX_BITS_PER_WORD
2542       && (shift_cost (speed, mode, INTVAL (op1))
2543           > INTVAL (op1) * add_cost (speed, mode))
2544       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2545     {
2546       int i;
2547       for (i = 0; i < INTVAL (op1); i++)
2548         {
2549           temp = force_reg (mode, shifted);
2550           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2551                                   unsignedp, OPTAB_LIB_WIDEN);
2552         }
2553       return shifted;
2554     }
2555
2556   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2557     {
2558       enum optab_methods methods;
2559
2560       if (attempt == 0)
2561         methods = OPTAB_DIRECT;
2562       else if (attempt == 1)
2563         methods = OPTAB_WIDEN;
2564       else
2565         methods = OPTAB_LIB_WIDEN;
2566
2567       if (rotate)
2568         {
2569           /* Widening does not work for rotation.  */
2570           if (methods == OPTAB_WIDEN)
2571             continue;
2572           else if (methods == OPTAB_LIB_WIDEN)
2573             {
2574               /* If we have been unable to open-code this by a rotation,
2575                  do it as the IOR of two shifts.  I.e., to rotate A
2576                  by N bits, compute
2577                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2578                  where C is the bitsize of A.
2579
2580                  It is theoretically possible that the target machine might
2581                  not be able to perform either shift and hence we would
2582                  be making two libcalls rather than just the one for the
2583                  shift (similarly if IOR could not be done).  We will allow
2584                  this extremely unlikely lossage to avoid complicating the
2585                  code below.  */
2586
2587               rtx subtarget = target == shifted ? 0 : target;
2588               rtx new_amount, other_amount;
2589               rtx temp1;
2590
2591               new_amount = op1;
2592               if (op1 == const0_rtx)
2593                 return shifted;
2594               else if (CONST_INT_P (op1))
2595                 other_amount = gen_int_shift_amount
2596                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2597               else
2598                 {
2599                   other_amount
2600                     = simplify_gen_unary (NEG, GET_MODE (op1),
2601                                           op1, GET_MODE (op1));
2602                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2603                   other_amount
2604                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2605                                            gen_int_mode (mask, GET_MODE (op1)));
2606                 }
2607
2608               shifted = force_reg (mode, shifted);
2609
2610               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2611                                      mode, shifted, new_amount, 0, 1);
2612               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2613                                       mode, shifted, other_amount,
2614                                       subtarget, 1);
2615               return expand_binop (mode, ior_optab, temp, temp1, target,
2616                                    unsignedp, methods);
2617             }
2618
2619           temp = expand_binop (mode,
2620                                left ? lrotate_optab : rrotate_optab,
2621                                shifted, op1, target, unsignedp, methods);
2622         }
2623       else if (unsignedp)
2624         temp = expand_binop (mode,
2625                              left ? lshift_optab : rshift_uns_optab,
2626                              shifted, op1, target, unsignedp, methods);
2627
2628       /* Do arithmetic shifts.
2629          Also, if we are going to widen the operand, we can just as well
2630          use an arithmetic right-shift instead of a logical one.  */
2631       if (temp == 0 && ! rotate
2632           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2633         {
2634           enum optab_methods methods1 = methods;
2635
2636           /* If trying to widen a log shift to an arithmetic shift,
2637              don't accept an arithmetic shift of the same size.  */
2638           if (unsignedp)
2639             methods1 = OPTAB_MUST_WIDEN;
2640
2641           /* Arithmetic shift */
2642
2643           temp = expand_binop (mode,
2644                                left ? lshift_optab : rshift_arith_optab,
2645                                shifted, op1, target, unsignedp, methods1);
2646         }
2647
2648       /* We used to try extzv here for logical right shifts, but that was
2649          only useful for one machine, the VAX, and caused poor code
2650          generation there for lshrdi3, so the code was deleted and a
2651          define_expand for lshrsi3 was added to vax.md.  */
2652     }
2653
2654   gcc_assert (temp != NULL_RTX || may_fail);
2655   return temp;
2656 }
2657
2658 /* Output a shift instruction for expression code CODE,
2659    with SHIFTED being the rtx for the value to shift,
2660    and AMOUNT the amount to shift by.
2661    Store the result in the rtx TARGET, if that is convenient.
2662    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2663    Return the rtx for where the value is.  */
2664
2665 rtx
2666 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2667               poly_int64 amount, rtx target, int unsignedp)
2668 {
2669   return expand_shift_1 (code, mode, shifted,
2670                          gen_int_shift_amount (mode, amount),
2671                          target, unsignedp);
2672 }
2673
2674 /* Likewise, but return 0 if that cannot be done.  */
2675
2676 static rtx
2677 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2678                     int amount, rtx target, int unsignedp)
2679 {
2680   return expand_shift_1 (code, mode,
2681                          shifted, GEN_INT (amount), target, unsignedp, true);
2682 }
2683
2684 /* Output a shift instruction for expression code CODE,
2685    with SHIFTED being the rtx for the value to shift,
2686    and AMOUNT the tree for the amount to shift by.
2687    Store the result in the rtx TARGET, if that is convenient.
2688    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2689    Return the rtx for where the value is.  */
2690
2691 rtx
2692 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2693                        tree amount, rtx target, int unsignedp)
2694 {
2695   return expand_shift_1 (code, mode,
2696                          shifted, expand_normal (amount), target, unsignedp);
2697 }
2698
2699 \f
2700 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2701                         const struct mult_cost *, machine_mode mode);
2702 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2703                               const struct algorithm *, enum mult_variant);
2704 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2705 static rtx extract_high_half (scalar_int_mode, rtx);
2706 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2707 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2708                                        int, int);
2709 /* Compute and return the best algorithm for multiplying by T.
2710    The algorithm must cost less than cost_limit
2711    If retval.cost >= COST_LIMIT, no algorithm was found and all
2712    other field of the returned struct are undefined.
2713    MODE is the machine mode of the multiplication.  */
2714
2715 static void
2716 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2717             const struct mult_cost *cost_limit, machine_mode mode)
2718 {
2719   int m;
2720   struct algorithm *alg_in, *best_alg;
2721   struct mult_cost best_cost;
2722   struct mult_cost new_limit;
2723   int op_cost, op_latency;
2724   unsigned HOST_WIDE_INT orig_t = t;
2725   unsigned HOST_WIDE_INT q;
2726   int maxm, hash_index;
2727   bool cache_hit = false;
2728   enum alg_code cache_alg = alg_zero;
2729   bool speed = optimize_insn_for_speed_p ();
2730   scalar_int_mode imode;
2731   struct alg_hash_entry *entry_ptr;
2732
2733   /* Indicate that no algorithm is yet found.  If no algorithm
2734      is found, this value will be returned and indicate failure.  */
2735   alg_out->cost.cost = cost_limit->cost + 1;
2736   alg_out->cost.latency = cost_limit->latency + 1;
2737
2738   if (cost_limit->cost < 0
2739       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2740     return;
2741
2742   /* Be prepared for vector modes.  */
2743   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2744
2745   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2746
2747   /* Restrict the bits of "t" to the multiplication's mode.  */
2748   t &= GET_MODE_MASK (imode);
2749
2750   /* t == 1 can be done in zero cost.  */
2751   if (t == 1)
2752     {
2753       alg_out->ops = 1;
2754       alg_out->cost.cost = 0;
2755       alg_out->cost.latency = 0;
2756       alg_out->op[0] = alg_m;
2757       return;
2758     }
2759
2760   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2761      fail now.  */
2762   if (t == 0)
2763     {
2764       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2765         return;
2766       else
2767         {
2768           alg_out->ops = 1;
2769           alg_out->cost.cost = zero_cost (speed);
2770           alg_out->cost.latency = zero_cost (speed);
2771           alg_out->op[0] = alg_zero;
2772           return;
2773         }
2774     }
2775
2776   /* We'll be needing a couple extra algorithm structures now.  */
2777
2778   alg_in = XALLOCA (struct algorithm);
2779   best_alg = XALLOCA (struct algorithm);
2780   best_cost = *cost_limit;
2781
2782   /* Compute the hash index.  */
2783   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2784
2785   /* See if we already know what to do for T.  */
2786   entry_ptr = alg_hash_entry_ptr (hash_index);
2787   if (entry_ptr->t == t
2788       && entry_ptr->mode == mode
2789       && entry_ptr->speed == speed
2790       && entry_ptr->alg != alg_unknown)
2791     {
2792       cache_alg = entry_ptr->alg;
2793
2794       if (cache_alg == alg_impossible)
2795         {
2796           /* The cache tells us that it's impossible to synthesize
2797              multiplication by T within entry_ptr->cost.  */
2798           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2799             /* COST_LIMIT is at least as restrictive as the one
2800                recorded in the hash table, in which case we have no
2801                hope of synthesizing a multiplication.  Just
2802                return.  */
2803             return;
2804
2805           /* If we get here, COST_LIMIT is less restrictive than the
2806              one recorded in the hash table, so we may be able to
2807              synthesize a multiplication.  Proceed as if we didn't
2808              have the cache entry.  */
2809         }
2810       else
2811         {
2812           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2813             /* The cached algorithm shows that this multiplication
2814                requires more cost than COST_LIMIT.  Just return.  This
2815                way, we don't clobber this cache entry with
2816                alg_impossible but retain useful information.  */
2817             return;
2818
2819           cache_hit = true;
2820
2821           switch (cache_alg)
2822             {
2823             case alg_shift:
2824               goto do_alg_shift;
2825
2826             case alg_add_t_m2:
2827             case alg_sub_t_m2:
2828               goto do_alg_addsub_t_m2;
2829
2830             case alg_add_factor:
2831             case alg_sub_factor:
2832               goto do_alg_addsub_factor;
2833
2834             case alg_add_t2_m:
2835               goto do_alg_add_t2_m;
2836
2837             case alg_sub_t2_m:
2838               goto do_alg_sub_t2_m;
2839
2840             default:
2841               gcc_unreachable ();
2842             }
2843         }
2844     }
2845
2846   /* If we have a group of zero bits at the low-order part of T, try
2847      multiplying by the remaining bits and then doing a shift.  */
2848
2849   if ((t & 1) == 0)
2850     {
2851     do_alg_shift:
2852       m = ctz_or_zero (t); /* m = number of low zero bits */
2853       if (m < maxm)
2854         {
2855           q = t >> m;
2856           /* The function expand_shift will choose between a shift and
2857              a sequence of additions, so the observed cost is given as
2858              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2859           op_cost = m * add_cost (speed, mode);
2860           if (shift_cost (speed, mode, m) < op_cost)
2861             op_cost = shift_cost (speed, mode, m);
2862           new_limit.cost = best_cost.cost - op_cost;
2863           new_limit.latency = best_cost.latency - op_cost;
2864           synth_mult (alg_in, q, &new_limit, mode);
2865
2866           alg_in->cost.cost += op_cost;
2867           alg_in->cost.latency += op_cost;
2868           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2869             {
2870               best_cost = alg_in->cost;
2871               std::swap (alg_in, best_alg);
2872               best_alg->log[best_alg->ops] = m;
2873               best_alg->op[best_alg->ops] = alg_shift;
2874             }
2875
2876           /* See if treating ORIG_T as a signed number yields a better
2877              sequence.  Try this sequence only for a negative ORIG_T
2878              as it would be useless for a non-negative ORIG_T.  */
2879           if ((HOST_WIDE_INT) orig_t < 0)
2880             {
2881               /* Shift ORIG_T as follows because a right shift of a
2882                  negative-valued signed type is implementation
2883                  defined.  */
2884               q = ~(~orig_t >> m);
2885               /* The function expand_shift will choose between a shift
2886                  and a sequence of additions, so the observed cost is
2887                  given as MIN (m * add_cost(speed, mode),
2888                  shift_cost(speed, mode, m)).  */
2889               op_cost = m * add_cost (speed, mode);
2890               if (shift_cost (speed, mode, m) < op_cost)
2891                 op_cost = shift_cost (speed, mode, m);
2892               new_limit.cost = best_cost.cost - op_cost;
2893               new_limit.latency = best_cost.latency - op_cost;
2894               synth_mult (alg_in, q, &new_limit, mode);
2895
2896               alg_in->cost.cost += op_cost;
2897               alg_in->cost.latency += op_cost;
2898               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2899                 {
2900                   best_cost = alg_in->cost;
2901                   std::swap (alg_in, best_alg);
2902                   best_alg->log[best_alg->ops] = m;
2903                   best_alg->op[best_alg->ops] = alg_shift;
2904                 }
2905             }
2906         }
2907       if (cache_hit)
2908         goto done;
2909     }
2910
2911   /* If we have an odd number, add or subtract one.  */
2912   if ((t & 1) != 0)
2913     {
2914       unsigned HOST_WIDE_INT w;
2915
2916     do_alg_addsub_t_m2:
2917       for (w = 1; (w & t) != 0; w <<= 1)
2918         ;
2919       /* If T was -1, then W will be zero after the loop.  This is another
2920          case where T ends with ...111.  Handling this with (T + 1) and
2921          subtract 1 produces slightly better code and results in algorithm
2922          selection much faster than treating it like the ...0111 case
2923          below.  */
2924       if (w == 0
2925           || (w > 2
2926               /* Reject the case where t is 3.
2927                  Thus we prefer addition in that case.  */
2928               && t != 3))
2929         {
2930           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2931
2932           op_cost = add_cost (speed, mode);
2933           new_limit.cost = best_cost.cost - op_cost;
2934           new_limit.latency = best_cost.latency - op_cost;
2935           synth_mult (alg_in, t + 1, &new_limit, mode);
2936
2937           alg_in->cost.cost += op_cost;
2938           alg_in->cost.latency += op_cost;
2939           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2940             {
2941               best_cost = alg_in->cost;
2942               std::swap (alg_in, best_alg);
2943               best_alg->log[best_alg->ops] = 0;
2944               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2945             }
2946         }
2947       else
2948         {
2949           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2950
2951           op_cost = add_cost (speed, mode);
2952           new_limit.cost = best_cost.cost - op_cost;
2953           new_limit.latency = best_cost.latency - op_cost;
2954           synth_mult (alg_in, t - 1, &new_limit, mode);
2955
2956           alg_in->cost.cost += op_cost;
2957           alg_in->cost.latency += op_cost;
2958           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2959             {
2960               best_cost = alg_in->cost;
2961               std::swap (alg_in, best_alg);
2962               best_alg->log[best_alg->ops] = 0;
2963               best_alg->op[best_alg->ops] = alg_add_t_m2;
2964             }
2965         }
2966
2967       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2968          quickly with a - a * n for some appropriate constant n.  */
2969       m = exact_log2 (-orig_t + 1);
2970       if (m >= 0 && m < maxm)
2971         {
2972           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2973           /* If the target has a cheap shift-and-subtract insn use
2974              that in preference to a shift insn followed by a sub insn.
2975              Assume that the shift-and-sub is "atomic" with a latency
2976              equal to it's cost, otherwise assume that on superscalar
2977              hardware the shift may be executed concurrently with the
2978              earlier steps in the algorithm.  */
2979           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2980             {
2981               op_cost = shiftsub1_cost (speed, mode, m);
2982               op_latency = op_cost;
2983             }
2984           else
2985             op_latency = add_cost (speed, mode);
2986
2987           new_limit.cost = best_cost.cost - op_cost;
2988           new_limit.latency = best_cost.latency - op_latency;
2989           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2990                       &new_limit, mode);
2991
2992           alg_in->cost.cost += op_cost;
2993           alg_in->cost.latency += op_latency;
2994           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2995             {
2996               best_cost = alg_in->cost;
2997               std::swap (alg_in, best_alg);
2998               best_alg->log[best_alg->ops] = m;
2999               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3000             }
3001         }
3002
3003       if (cache_hit)
3004         goto done;
3005     }
3006
3007   /* Look for factors of t of the form
3008      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3009      If we find such a factor, we can multiply by t using an algorithm that
3010      multiplies by q, shift the result by m and add/subtract it to itself.
3011
3012      We search for large factors first and loop down, even if large factors
3013      are less probable than small; if we find a large factor we will find a
3014      good sequence quickly, and therefore be able to prune (by decreasing
3015      COST_LIMIT) the search.  */
3016
3017  do_alg_addsub_factor:
3018   for (m = floor_log2 (t - 1); m >= 2; m--)
3019     {
3020       unsigned HOST_WIDE_INT d;
3021
3022       d = (HOST_WIDE_INT_1U << m) + 1;
3023       if (t % d == 0 && t > d && m < maxm
3024           && (!cache_hit || cache_alg == alg_add_factor))
3025         {
3026           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3027           if (shiftadd_cost (speed, mode, m) <= op_cost)
3028             op_cost = shiftadd_cost (speed, mode, m);
3029
3030           op_latency = op_cost;
3031
3032
3033           new_limit.cost = best_cost.cost - op_cost;
3034           new_limit.latency = best_cost.latency - op_latency;
3035           synth_mult (alg_in, t / d, &new_limit, mode);
3036
3037           alg_in->cost.cost += op_cost;
3038           alg_in->cost.latency += op_latency;
3039           if (alg_in->cost.latency < op_cost)
3040             alg_in->cost.latency = op_cost;
3041           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3042             {
3043               best_cost = alg_in->cost;
3044               std::swap (alg_in, best_alg);
3045               best_alg->log[best_alg->ops] = m;
3046               best_alg->op[best_alg->ops] = alg_add_factor;
3047             }
3048           /* Other factors will have been taken care of in the recursion.  */
3049           break;
3050         }
3051
3052       d = (HOST_WIDE_INT_1U << m) - 1;
3053       if (t % d == 0 && t > d && m < maxm
3054           && (!cache_hit || cache_alg == alg_sub_factor))
3055         {
3056           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3057           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3058             op_cost = shiftsub0_cost (speed, mode, m);
3059
3060           op_latency = op_cost;
3061
3062           new_limit.cost = best_cost.cost - op_cost;
3063           new_limit.latency = best_cost.latency - op_latency;
3064           synth_mult (alg_in, t / d, &new_limit, mode);
3065
3066           alg_in->cost.cost += op_cost;
3067           alg_in->cost.latency += op_latency;
3068           if (alg_in->cost.latency < op_cost)
3069             alg_in->cost.latency = op_cost;
3070           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3071             {
3072               best_cost = alg_in->cost;
3073               std::swap (alg_in, best_alg);
3074               best_alg->log[best_alg->ops] = m;
3075               best_alg->op[best_alg->ops] = alg_sub_factor;
3076             }
3077           break;
3078         }
3079     }
3080   if (cache_hit)
3081     goto done;
3082
3083   /* Try shift-and-add (load effective address) instructions,
3084      i.e. do a*3, a*5, a*9.  */
3085   if ((t & 1) != 0)
3086     {
3087     do_alg_add_t2_m:
3088       q = t - 1;
3089       m = ctz_hwi (q);
3090       if (q && m < maxm)
3091         {
3092           op_cost = shiftadd_cost (speed, mode, m);
3093           new_limit.cost = best_cost.cost - op_cost;
3094           new_limit.latency = best_cost.latency - op_cost;
3095           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3096
3097           alg_in->cost.cost += op_cost;
3098           alg_in->cost.latency += op_cost;
3099           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3100             {
3101               best_cost = alg_in->cost;
3102               std::swap (alg_in, best_alg);
3103               best_alg->log[best_alg->ops] = m;
3104               best_alg->op[best_alg->ops] = alg_add_t2_m;
3105             }
3106         }
3107       if (cache_hit)
3108         goto done;
3109
3110     do_alg_sub_t2_m:
3111       q = t + 1;
3112       m = ctz_hwi (q);
3113       if (q && m < maxm)
3114         {
3115           op_cost = shiftsub0_cost (speed, mode, m);
3116           new_limit.cost = best_cost.cost - op_cost;
3117           new_limit.latency = best_cost.latency - op_cost;
3118           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3119
3120           alg_in->cost.cost += op_cost;
3121           alg_in->cost.latency += op_cost;
3122           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3123             {
3124               best_cost = alg_in->cost;
3125               std::swap (alg_in, best_alg);
3126               best_alg->log[best_alg->ops] = m;
3127               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3128             }
3129         }
3130       if (cache_hit)
3131         goto done;
3132     }
3133
3134  done:
3135   /* If best_cost has not decreased, we have not found any algorithm.  */
3136   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3137     {
3138       /* We failed to find an algorithm.  Record alg_impossible for
3139          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3140          we are asked to find an algorithm for T within the same or
3141          lower COST_LIMIT, we can immediately return to the
3142          caller.  */
3143       entry_ptr->t = t;
3144       entry_ptr->mode = mode;
3145       entry_ptr->speed = speed;
3146       entry_ptr->alg = alg_impossible;
3147       entry_ptr->cost = *cost_limit;
3148       return;
3149     }
3150
3151   /* Cache the result.  */
3152   if (!cache_hit)
3153     {
3154       entry_ptr->t = t;
3155       entry_ptr->mode = mode;
3156       entry_ptr->speed = speed;
3157       entry_ptr->alg = best_alg->op[best_alg->ops];
3158       entry_ptr->cost.cost = best_cost.cost;
3159       entry_ptr->cost.latency = best_cost.latency;
3160     }
3161
3162   /* If we are getting a too long sequence for `struct algorithm'
3163      to record, make this search fail.  */
3164   if (best_alg->ops == MAX_BITS_PER_WORD)
3165     return;
3166
3167   /* Copy the algorithm from temporary space to the space at alg_out.
3168      We avoid using structure assignment because the majority of
3169      best_alg is normally undefined, and this is a critical function.  */
3170   alg_out->ops = best_alg->ops + 1;
3171   alg_out->cost = best_cost;
3172   memcpy (alg_out->op, best_alg->op,
3173           alg_out->ops * sizeof *alg_out->op);
3174   memcpy (alg_out->log, best_alg->log,
3175           alg_out->ops * sizeof *alg_out->log);
3176 }
3177 \f
3178 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3179    Try three variations:
3180
3181        - a shift/add sequence based on VAL itself
3182        - a shift/add sequence based on -VAL, followed by a negation
3183        - a shift/add sequence based on VAL - 1, followed by an addition.
3184
3185    Return true if the cheapest of these cost less than MULT_COST,
3186    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3187
3188 bool
3189 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3190                      struct algorithm *alg, enum mult_variant *variant,
3191                      int mult_cost)
3192 {
3193   struct algorithm alg2;
3194   struct mult_cost limit;
3195   int op_cost;
3196   bool speed = optimize_insn_for_speed_p ();
3197
3198   /* Fail quickly for impossible bounds.  */
3199   if (mult_cost < 0)
3200     return false;
3201
3202   /* Ensure that mult_cost provides a reasonable upper bound.
3203      Any constant multiplication can be performed with less
3204      than 2 * bits additions.  */
3205   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3206   if (mult_cost > op_cost)
3207     mult_cost = op_cost;
3208
3209   *variant = basic_variant;
3210   limit.cost = mult_cost;
3211   limit.latency = mult_cost;
3212   synth_mult (alg, val, &limit, mode);
3213
3214   /* This works only if the inverted value actually fits in an
3215      `unsigned int' */
3216   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3217     {
3218       op_cost = neg_cost (speed, mode);
3219       if (MULT_COST_LESS (&alg->cost, mult_cost))
3220         {
3221           limit.cost = alg->cost.cost - op_cost;
3222           limit.latency = alg->cost.latency - op_cost;
3223         }
3224       else
3225         {
3226           limit.cost = mult_cost - op_cost;
3227           limit.latency = mult_cost - op_cost;
3228         }
3229
3230       synth_mult (&alg2, -val, &limit, mode);
3231       alg2.cost.cost += op_cost;
3232       alg2.cost.latency += op_cost;
3233       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3234         *alg = alg2, *variant = negate_variant;
3235     }
3236
3237   /* This proves very useful for division-by-constant.  */
3238   op_cost = add_cost (speed, mode);
3239   if (MULT_COST_LESS (&alg->cost, mult_cost))
3240     {
3241       limit.cost = alg->cost.cost - op_cost;
3242       limit.latency = alg->cost.latency - op_cost;
3243     }
3244   else
3245     {
3246       limit.cost = mult_cost - op_cost;
3247       limit.latency = mult_cost - op_cost;
3248     }
3249
3250   synth_mult (&alg2, val - 1, &limit, mode);
3251   alg2.cost.cost += op_cost;
3252   alg2.cost.latency += op_cost;
3253   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3254     *alg = alg2, *variant = add_variant;
3255
3256   return MULT_COST_LESS (&alg->cost, mult_cost);
3257 }
3258
3259 /* A subroutine of expand_mult, used for constant multiplications.
3260    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3261    convenient.  Use the shift/add sequence described by ALG and apply
3262    the final fixup specified by VARIANT.  */
3263
3264 static rtx
3265 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3266                    rtx target, const struct algorithm *alg,
3267                    enum mult_variant variant)
3268 {
3269   unsigned HOST_WIDE_INT val_so_far;
3270   rtx_insn *insn;
3271   rtx accum, tem;
3272   int opno;
3273   machine_mode nmode;
3274
3275   /* Avoid referencing memory over and over and invalid sharing
3276      on SUBREGs.  */
3277   op0 = force_reg (mode, op0);
3278
3279   /* ACCUM starts out either as OP0 or as a zero, depending on
3280      the first operation.  */
3281
3282   if (alg->op[0] == alg_zero)
3283     {
3284       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3285       val_so_far = 0;
3286     }
3287   else if (alg->op[0] == alg_m)
3288     {
3289       accum = copy_to_mode_reg (mode, op0);
3290       val_so_far = 1;
3291     }
3292   else
3293     gcc_unreachable ();
3294
3295   for (opno = 1; opno < alg->ops; opno++)
3296     {
3297       int log = alg->log[opno];
3298       rtx shift_subtarget = optimize ? 0 : accum;
3299       rtx add_target
3300         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3301            && !optimize)
3302           ? target : 0;
3303       rtx accum_target = optimize ? 0 : accum;
3304       rtx accum_inner;
3305
3306       switch (alg->op[opno])
3307         {
3308         case alg_shift:
3309           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3310           /* REG_EQUAL note will be attached to the following insn.  */
3311           emit_move_insn (accum, tem);
3312           val_so_far <<= log;
3313           break;
3314
3315         case alg_add_t_m2:
3316           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3317           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3318                                  add_target ? add_target : accum_target);
3319           val_so_far += HOST_WIDE_INT_1U << log;
3320           break;
3321
3322         case alg_sub_t_m2:
3323           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3324           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3325                                  add_target ? add_target : accum_target);
3326           val_so_far -= HOST_WIDE_INT_1U << log;
3327           break;
3328
3329         case alg_add_t2_m:
3330           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3331                                 log, shift_subtarget, 0);
3332           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3333                                  add_target ? add_target : accum_target);
3334           val_so_far = (val_so_far << log) + 1;
3335           break;
3336
3337         case alg_sub_t2_m:
3338           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3339                                 log, shift_subtarget, 0);
3340           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3341                                  add_target ? add_target : accum_target);
3342           val_so_far = (val_so_far << log) - 1;
3343           break;
3344
3345         case alg_add_factor:
3346           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3347           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3348                                  add_target ? add_target : accum_target);
3349           val_so_far += val_so_far << log;
3350           break;
3351
3352         case alg_sub_factor:
3353           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3354           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3355                                  (add_target
3356                                   ? add_target : (optimize ? 0 : tem)));
3357           val_so_far = (val_so_far << log) - val_so_far;
3358           break;
3359
3360         default:
3361           gcc_unreachable ();
3362         }
3363
3364       if (SCALAR_INT_MODE_P (mode))
3365         {
3366           /* Write a REG_EQUAL note on the last insn so that we can cse
3367              multiplication sequences.  Note that if ACCUM is a SUBREG,
3368              we've set the inner register and must properly indicate that.  */
3369           tem = op0, nmode = mode;
3370           accum_inner = accum;
3371           if (GET_CODE (accum) == SUBREG)
3372             {
3373               accum_inner = SUBREG_REG (accum);
3374               nmode = GET_MODE (accum_inner);
3375               tem = gen_lowpart (nmode, op0);
3376             }
3377
3378           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3379              In that case, only the low bits of accum would be guaranteed to
3380              be equal to the content of the REG_EQUAL note, the upper bits
3381              can be anything.  */
3382           if (!paradoxical_subreg_p (tem))
3383             {
3384               insn = get_last_insn ();
3385               wide_int wval_so_far
3386                 = wi::uhwi (val_so_far,
3387                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3388               rtx c = immed_wide_int_const (wval_so_far, nmode);
3389               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3390                                 accum_inner);
3391             }
3392         }
3393     }
3394
3395   if (variant == negate_variant)
3396     {
3397       val_so_far = -val_so_far;
3398       accum = expand_unop (mode, neg_optab, accum, target, 0);
3399     }
3400   else if (variant == add_variant)
3401     {
3402       val_so_far = val_so_far + 1;
3403       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3404     }
3405
3406   /* Compare only the bits of val and val_so_far that are significant
3407      in the result mode, to avoid sign-/zero-extension confusion.  */
3408   nmode = GET_MODE_INNER (mode);
3409   val &= GET_MODE_MASK (nmode);
3410   val_so_far &= GET_MODE_MASK (nmode);
3411   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3412
3413   return accum;
3414 }
3415
3416 /* Perform a multiplication and return an rtx for the result.
3417    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3418    TARGET is a suggestion for where to store the result (an rtx).
3419
3420    We check specially for a constant integer as OP1.
3421    If you want this check for OP0 as well, then before calling
3422    you should swap the two operands if OP0 would be constant.  */
3423
3424 rtx
3425 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3426              int unsignedp, bool no_libcall)
3427 {
3428   enum mult_variant variant;
3429   struct algorithm algorithm;
3430   rtx scalar_op1;
3431   int max_cost;
3432   bool speed = optimize_insn_for_speed_p ();
3433   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3434
3435   if (CONSTANT_P (op0))
3436     std::swap (op0, op1);
3437
3438   /* For vectors, there are several simplifications that can be made if
3439      all elements of the vector constant are identical.  */
3440   scalar_op1 = unwrap_const_vec_duplicate (op1);
3441
3442   if (INTEGRAL_MODE_P (mode))
3443     {
3444       rtx fake_reg;
3445       HOST_WIDE_INT coeff;
3446       bool is_neg;
3447       int mode_bitsize;
3448
3449       if (op1 == CONST0_RTX (mode))
3450         return op1;
3451       if (op1 == CONST1_RTX (mode))
3452         return op0;
3453       if (op1 == CONSTM1_RTX (mode))
3454         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3455                             op0, target, 0);
3456
3457       if (do_trapv)
3458         goto skip_synth;
3459
3460       /* If mode is integer vector mode, check if the backend supports
3461          vector lshift (by scalar or vector) at all.  If not, we can't use
3462          synthetized multiply.  */
3463       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3464           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3465           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3466         goto skip_synth;
3467
3468       /* These are the operations that are potentially turned into
3469          a sequence of shifts and additions.  */
3470       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3471
3472       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3473          less than or equal in size to `unsigned int' this doesn't matter.
3474          If the mode is larger than `unsigned int', then synth_mult works
3475          only if the constant value exactly fits in an `unsigned int' without
3476          any truncation.  This means that multiplying by negative values does
3477          not work; results are off by 2^32 on a 32 bit machine.  */
3478       if (CONST_INT_P (scalar_op1))
3479         {
3480           coeff = INTVAL (scalar_op1);
3481           is_neg = coeff < 0;
3482         }
3483 #if TARGET_SUPPORTS_WIDE_INT
3484       else if (CONST_WIDE_INT_P (scalar_op1))
3485 #else
3486       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3487 #endif
3488         {
3489           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3490           /* Perfect power of 2 (other than 1, which is handled above).  */
3491           if (shift > 0)
3492             return expand_shift (LSHIFT_EXPR, mode, op0,
3493                                  shift, target, unsignedp);
3494           else
3495             goto skip_synth;
3496         }
3497       else
3498         goto skip_synth;
3499
3500       /* We used to test optimize here, on the grounds that it's better to
3501          produce a smaller program when -O is not used.  But this causes
3502          such a terrible slowdown sometimes that it seems better to always
3503          use synth_mult.  */
3504
3505       /* Special case powers of two.  */
3506       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3507           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3508         return expand_shift (LSHIFT_EXPR, mode, op0,
3509                              floor_log2 (coeff), target, unsignedp);
3510
3511       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3512
3513       /* Attempt to handle multiplication of DImode values by negative
3514          coefficients, by performing the multiplication by a positive
3515          multiplier and then inverting the result.  */
3516       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3517         {
3518           /* Its safe to use -coeff even for INT_MIN, as the
3519              result is interpreted as an unsigned coefficient.
3520              Exclude cost of op0 from max_cost to match the cost
3521              calculation of the synth_mult.  */
3522           coeff = -(unsigned HOST_WIDE_INT) coeff;
3523           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3524                                     mode, speed)
3525                       - neg_cost (speed, mode));
3526           if (max_cost <= 0)
3527             goto skip_synth;
3528
3529           /* Special case powers of two.  */
3530           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3531             {
3532               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3533                                        floor_log2 (coeff), target, unsignedp);
3534               return expand_unop (mode, neg_optab, temp, target, 0);
3535             }
3536
3537           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3538                                    max_cost))
3539             {
3540               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3541                                             &algorithm, variant);
3542               return expand_unop (mode, neg_optab, temp, target, 0);
3543             }
3544           goto skip_synth;
3545         }
3546
3547       /* Exclude cost of op0 from max_cost to match the cost
3548          calculation of the synth_mult.  */
3549       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3550       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3551         return expand_mult_const (mode, op0, coeff, target,
3552                                   &algorithm, variant);
3553     }
3554  skip_synth:
3555
3556   /* Expand x*2.0 as x+x.  */
3557   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3558       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3559     {
3560       op0 = force_reg (GET_MODE (op0), op0);
3561       return expand_binop (mode, add_optab, op0, op0,
3562                            target, unsignedp,
3563                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3564     }
3565
3566   /* This used to use umul_optab if unsigned, but for non-widening multiply
3567      there is no difference between signed and unsigned.  */
3568   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3569                       op0, op1, target, unsignedp,
3570                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3571   gcc_assert (op0 || no_libcall);
3572   return op0;
3573 }
3574
3575 /* Return a cost estimate for multiplying a register by the given
3576    COEFFicient in the given MODE and SPEED.  */
3577
3578 int
3579 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3580 {
3581   int max_cost;
3582   struct algorithm algorithm;
3583   enum mult_variant variant;
3584
3585   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3586   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3587                            mode, speed);
3588   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3589     return algorithm.cost.cost;
3590   else
3591     return max_cost;
3592 }
3593
3594 /* Perform a widening multiplication and return an rtx for the result.
3595    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3596    TARGET is a suggestion for where to store the result (an rtx).
3597    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3598    or smul_widen_optab.
3599
3600    We check specially for a constant integer as OP1, comparing the
3601    cost of a widening multiply against the cost of a sequence of shifts
3602    and adds.  */
3603
3604 rtx
3605 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3606                       int unsignedp, optab this_optab)
3607 {
3608   bool speed = optimize_insn_for_speed_p ();
3609   rtx cop1;
3610
3611   if (CONST_INT_P (op1)
3612       && GET_MODE (op0) != VOIDmode
3613       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3614                                 this_optab == umul_widen_optab))
3615       && CONST_INT_P (cop1)
3616       && (INTVAL (cop1) >= 0
3617           || HWI_COMPUTABLE_MODE_P (mode)))
3618     {
3619       HOST_WIDE_INT coeff = INTVAL (cop1);
3620       int max_cost;
3621       enum mult_variant variant;
3622       struct algorithm algorithm;
3623
3624       if (coeff == 0)
3625         return CONST0_RTX (mode);
3626
3627       /* Special case powers of two.  */
3628       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3629         {
3630           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3631           return expand_shift (LSHIFT_EXPR, mode, op0,
3632                                floor_log2 (coeff), target, unsignedp);
3633         }
3634
3635       /* Exclude cost of op0 from max_cost to match the cost
3636          calculation of the synth_mult.  */
3637       max_cost = mul_widen_cost (speed, mode);
3638       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3639                                max_cost))
3640         {
3641           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3642           return expand_mult_const (mode, op0, coeff, target,
3643                                     &algorithm, variant);
3644         }
3645     }
3646   return expand_binop (mode, this_optab, op0, op1, target,
3647                        unsignedp, OPTAB_LIB_WIDEN);
3648 }
3649 \f
3650 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3651    replace division by D, and put the least significant N bits of the result
3652    in *MULTIPLIER_PTR and return the most significant bit.
3653
3654    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3655    needed precision is in PRECISION (should be <= N).
3656
3657    PRECISION should be as small as possible so this function can choose
3658    multiplier more freely.
3659
3660    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3661    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3662
3663    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3664    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3665
3666 unsigned HOST_WIDE_INT
3667 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3668                    unsigned HOST_WIDE_INT *multiplier_ptr,
3669                    int *post_shift_ptr, int *lgup_ptr)
3670 {
3671   int lgup, post_shift;
3672   int pow, pow2;
3673
3674   /* lgup = ceil(log2(divisor)); */
3675   lgup = ceil_log2 (d);
3676
3677   gcc_assert (lgup <= n);
3678
3679   pow = n + lgup;
3680   pow2 = n + lgup - precision;
3681
3682   /* mlow = 2^(N + lgup)/d */
3683   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3684   wide_int mlow = wi::udiv_trunc (val, d);
3685
3686   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3687   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3688   wide_int mhigh = wi::udiv_trunc (val, d);
3689
3690   /* If precision == N, then mlow, mhigh exceed 2^N
3691      (but they do not exceed 2^(N+1)).  */
3692
3693   /* Reduce to lowest terms.  */
3694   for (post_shift = lgup; post_shift > 0; post_shift--)
3695     {
3696       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3697                                                        HOST_BITS_PER_WIDE_INT);
3698       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3699                                                        HOST_BITS_PER_WIDE_INT);
3700       if (ml_lo >= mh_lo)
3701         break;
3702
3703       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3704       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3705     }
3706
3707   *post_shift_ptr = post_shift;
3708   *lgup_ptr = lgup;
3709   if (n < HOST_BITS_PER_WIDE_INT)
3710     {
3711       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3712       *multiplier_ptr = mhigh.to_uhwi () & mask;
3713       return mhigh.to_uhwi () > mask;
3714     }
3715   else
3716     {
3717       *multiplier_ptr = mhigh.to_uhwi ();
3718       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3719     }
3720 }
3721
3722 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3723    congruent to 1 (mod 2**N).  */
3724
3725 static unsigned HOST_WIDE_INT
3726 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3727 {
3728   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3729
3730   /* The algorithm notes that the choice y = x satisfies
3731      x*y == 1 mod 2^3, since x is assumed odd.
3732      Each iteration doubles the number of bits of significance in y.  */
3733
3734   unsigned HOST_WIDE_INT mask;
3735   unsigned HOST_WIDE_INT y = x;
3736   int nbit = 3;
3737
3738   mask = (n == HOST_BITS_PER_WIDE_INT
3739           ? HOST_WIDE_INT_M1U
3740           : (HOST_WIDE_INT_1U << n) - 1);
3741
3742   while (nbit < n)
3743     {
3744       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3745       nbit *= 2;
3746     }
3747   return y;
3748 }
3749
3750 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3751    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3752    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3753    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3754    become signed.
3755
3756    The result is put in TARGET if that is convenient.
3757
3758    MODE is the mode of operation.  */
3759
3760 rtx
3761 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3762                              rtx op1, rtx target, int unsignedp)
3763 {
3764   rtx tem;
3765   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3766
3767   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3768                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3769   tem = expand_and (mode, tem, op1, NULL_RTX);
3770   adj_operand
3771     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3772                      adj_operand);
3773
3774   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3775                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3776   tem = expand_and (mode, tem, op0, NULL_RTX);
3777   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3778                           target);
3779
3780   return target;
3781 }
3782
3783 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3784
3785 static rtx
3786 extract_high_half (scalar_int_mode mode, rtx op)
3787 {
3788   if (mode == word_mode)
3789     return gen_highpart (mode, op);
3790
3791   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3792
3793   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3794                      GET_MODE_BITSIZE (mode), 0, 1);
3795   return convert_modes (mode, wider_mode, op, 0);
3796 }
3797
3798 /* Like expmed_mult_highpart, but only consider using a multiplication
3799    optab.  OP1 is an rtx for the constant operand.  */
3800
3801 static rtx
3802 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3803                             rtx target, int unsignedp, int max_cost)
3804 {
3805   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3806   optab moptab;
3807   rtx tem;
3808   int size;
3809   bool speed = optimize_insn_for_speed_p ();
3810
3811   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3812
3813   size = GET_MODE_BITSIZE (mode);
3814
3815   /* Firstly, try using a multiplication insn that only generates the needed
3816      high part of the product, and in the sign flavor of unsignedp.  */
3817   if (mul_highpart_cost (speed, mode) < max_cost)
3818     {
3819       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3820       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3821                           unsignedp, OPTAB_DIRECT);
3822       if (tem)
3823         return tem;
3824     }
3825
3826   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3827      Need to adjust the result after the multiplication.  */
3828   if (size - 1 < BITS_PER_WORD
3829       && (mul_highpart_cost (speed, mode)
3830           + 2 * shift_cost (speed, mode, size-1)
3831           + 4 * add_cost (speed, mode) < max_cost))
3832     {
3833       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3834       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3835                           unsignedp, OPTAB_DIRECT);
3836       if (tem)
3837         /* We used the wrong signedness.  Adjust the result.  */
3838         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3839                                             tem, unsignedp);
3840     }
3841
3842   /* Try widening multiplication.  */
3843   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3844   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3845       && mul_widen_cost (speed, wider_mode) < max_cost)
3846     {
3847       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3848                           unsignedp, OPTAB_WIDEN);
3849       if (tem)
3850         return extract_high_half (mode, tem);
3851     }
3852
3853   /* Try widening the mode and perform a non-widening multiplication.  */
3854   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3855       && size - 1 < BITS_PER_WORD
3856       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3857           < max_cost))
3858     {
3859       rtx_insn *insns;
3860       rtx wop0, wop1;
3861
3862       /* We need to widen the operands, for example to ensure the
3863          constant multiplier is correctly sign or zero extended.
3864          Use a sequence to clean-up any instructions emitted by
3865          the conversions if things don't work out.  */
3866       start_sequence ();
3867       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3868       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3869       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3870                           unsignedp, OPTAB_WIDEN);
3871       insns = get_insns ();
3872       end_sequence ();
3873
3874       if (tem)
3875         {
3876           emit_insn (insns);
3877           return extract_high_half (mode, tem);
3878         }
3879     }
3880
3881   /* Try widening multiplication of opposite signedness, and adjust.  */
3882   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3883   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3884       && size - 1 < BITS_PER_WORD
3885       && (mul_widen_cost (speed, wider_mode)
3886           + 2 * shift_cost (speed, mode, size-1)
3887           + 4 * add_cost (speed, mode) < max_cost))
3888     {
3889       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3890                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3891       if (tem != 0)
3892         {
3893           tem = extract_high_half (mode, tem);
3894           /* We used the wrong signedness.  Adjust the result.  */
3895           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3896                                               target, unsignedp);
3897         }
3898     }
3899
3900   return 0;
3901 }
3902
3903 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3904    putting the high half of the result in TARGET if that is convenient,
3905    and return where the result is.  If the operation cannot be performed,
3906    0 is returned.
3907
3908    MODE is the mode of operation and result.
3909
3910    UNSIGNEDP nonzero means unsigned multiply.
3911
3912    MAX_COST is the total allowed cost for the expanded RTL.  */
3913
3914 static rtx
3915 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3916                       rtx target, int unsignedp, int max_cost)
3917 {
3918   unsigned HOST_WIDE_INT cnst1;
3919   int extra_cost;
3920   bool sign_adjust = false;
3921   enum mult_variant variant;
3922   struct algorithm alg;
3923   rtx tem;
3924   bool speed = optimize_insn_for_speed_p ();
3925
3926   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3927   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3928
3929   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3930
3931   /* We can't optimize modes wider than BITS_PER_WORD.
3932      ??? We might be able to perform double-word arithmetic if
3933      mode == word_mode, however all the cost calculations in
3934      synth_mult etc. assume single-word operations.  */
3935   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3936   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3937     return expmed_mult_highpart_optab (mode, op0, op1, target,
3938                                        unsignedp, max_cost);
3939
3940   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3941
3942   /* Check whether we try to multiply by a negative constant.  */
3943   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3944     {
3945       sign_adjust = true;
3946       extra_cost += add_cost (speed, mode);
3947     }
3948
3949   /* See whether shift/add multiplication is cheap enough.  */
3950   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3951                            max_cost - extra_cost))
3952     {
3953       /* See whether the specialized multiplication optabs are
3954          cheaper than the shift/add version.  */
3955       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3956                                         alg.cost.cost + extra_cost);
3957       if (tem)
3958         return tem;
3959
3960       tem = convert_to_mode (wider_mode, op0, unsignedp);
3961       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3962       tem = extract_high_half (mode, tem);
3963
3964       /* Adjust result for signedness.  */
3965       if (sign_adjust)
3966         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3967
3968       return tem;
3969     }
3970   return expmed_mult_highpart_optab (mode, op0, op1, target,
3971                                      unsignedp, max_cost);
3972 }
3973
3974
3975 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3976
3977 static rtx
3978 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3979 {
3980   rtx result, temp, shift;
3981   rtx_code_label *label;
3982   int logd;
3983   int prec = GET_MODE_PRECISION (mode);
3984
3985   logd = floor_log2 (d);
3986   result = gen_reg_rtx (mode);
3987
3988   /* Avoid conditional branches when they're expensive.  */
3989   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3990       && optimize_insn_for_speed_p ())
3991     {
3992       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3993                                       mode, 0, -1);
3994       if (signmask)
3995         {
3996           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3997           signmask = force_reg (mode, signmask);
3998           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
3999
4000           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4001              which instruction sequence to use.  If logical right shifts
4002              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4003              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4004
4005           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4006           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4007               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4008                   > COSTS_N_INSNS (2)))
4009             {
4010               temp = expand_binop (mode, xor_optab, op0, signmask,
4011                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4012               temp = expand_binop (mode, sub_optab, temp, signmask,
4013                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4014               temp = expand_binop (mode, and_optab, temp,
4015                                    gen_int_mode (masklow, mode),
4016                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017               temp = expand_binop (mode, xor_optab, temp, signmask,
4018                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4019               temp = expand_binop (mode, sub_optab, temp, signmask,
4020                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4021             }
4022           else
4023             {
4024               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4025                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4026               signmask = force_reg (mode, signmask);
4027
4028               temp = expand_binop (mode, add_optab, op0, signmask,
4029                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4030               temp = expand_binop (mode, and_optab, temp,
4031                                    gen_int_mode (masklow, mode),
4032                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4033               temp = expand_binop (mode, sub_optab, temp, signmask,
4034                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4035             }
4036           return temp;
4037         }
4038     }
4039
4040   /* Mask contains the mode's signbit and the significant bits of the
4041      modulus.  By including the signbit in the operation, many targets
4042      can avoid an explicit compare operation in the following comparison
4043      against zero.  */
4044   wide_int mask = wi::mask (logd, false, prec);
4045   mask = wi::set_bit (mask, prec - 1);
4046
4047   temp = expand_binop (mode, and_optab, op0,
4048                        immed_wide_int_const (mask, mode),
4049                        result, 1, OPTAB_LIB_WIDEN);
4050   if (temp != result)
4051     emit_move_insn (result, temp);
4052
4053   label = gen_label_rtx ();
4054   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4055
4056   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4057                        0, OPTAB_LIB_WIDEN);
4058
4059   mask = wi::mask (logd, true, prec);
4060   temp = expand_binop (mode, ior_optab, temp,
4061                        immed_wide_int_const (mask, mode),
4062                        result, 1, OPTAB_LIB_WIDEN);
4063   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4064                        0, OPTAB_LIB_WIDEN);
4065   if (temp != result)
4066     emit_move_insn (result, temp);
4067   emit_label (label);
4068   return result;
4069 }
4070
4071 /* Expand signed division of OP0 by a power of two D in mode MODE.
4072    This routine is only called for positive values of D.  */
4073
4074 static rtx
4075 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4076 {
4077   rtx temp;
4078   rtx_code_label *label;
4079   int logd;
4080
4081   logd = floor_log2 (d);
4082
4083   if (d == 2
4084       && BRANCH_COST (optimize_insn_for_speed_p (),
4085                       false) >= 1)
4086     {
4087       temp = gen_reg_rtx (mode);
4088       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4089       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4090                            0, OPTAB_LIB_WIDEN);
4091       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4092     }
4093
4094   if (HAVE_conditional_move
4095       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4096     {
4097       rtx temp2;
4098
4099       start_sequence ();
4100       temp2 = copy_to_mode_reg (mode, op0);
4101       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4102                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4103       temp = force_reg (mode, temp);
4104
4105       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4106       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4107                                      mode, temp, temp2, mode, 0);
4108       if (temp2)
4109         {
4110           rtx_insn *seq = get_insns ();
4111           end_sequence ();
4112           emit_insn (seq);
4113           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4114         }
4115       end_sequence ();
4116     }
4117
4118   if (BRANCH_COST (optimize_insn_for_speed_p (),
4119                    false) >= 2)
4120     {
4121       int ushift = GET_MODE_BITSIZE (mode) - logd;
4122
4123       temp = gen_reg_rtx (mode);
4124       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4125       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4126           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4127              > COSTS_N_INSNS (1))
4128         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4129                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
4130       else
4131         temp = expand_shift (RSHIFT_EXPR, mode, temp,
4132                              ushift, NULL_RTX, 1);
4133       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4134                            0, OPTAB_LIB_WIDEN);
4135       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4136     }
4137
4138   label = gen_label_rtx ();
4139   temp = copy_to_mode_reg (mode, op0);
4140   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4141   expand_inc (temp, gen_int_mode (d - 1, mode));
4142   emit_label (label);
4143   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4144 }
4145 \f
4146 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4147    if that is convenient, and returning where the result is.
4148    You may request either the quotient or the remainder as the result;
4149    specify REM_FLAG nonzero to get the remainder.
4150
4151    CODE is the expression code for which kind of division this is;
4152    it controls how rounding is done.  MODE is the machine mode to use.
4153    UNSIGNEDP nonzero means do unsigned division.  */
4154
4155 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4156    and then correct it by or'ing in missing high bits
4157    if result of ANDI is nonzero.
4158    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4159    This could optimize to a bfexts instruction.
4160    But C doesn't use these operations, so their optimizations are
4161    left for later.  */
4162 /* ??? For modulo, we don't actually need the highpart of the first product,
4163    the low part will do nicely.  And for small divisors, the second multiply
4164    can also be a low-part only multiply or even be completely left out.
4165    E.g. to calculate the remainder of a division by 3 with a 32 bit
4166    multiply, multiply with 0x55555556 and extract the upper two bits;
4167    the result is exact for inputs up to 0x1fffffff.
4168    The input range can be reduced by using cross-sum rules.
4169    For odd divisors >= 3, the following table gives right shift counts
4170    so that if a number is shifted by an integer multiple of the given
4171    amount, the remainder stays the same:
4172    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4173    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4174    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4175    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4176    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4177
4178    Cross-sum rules for even numbers can be derived by leaving as many bits
4179    to the right alone as the divisor has zeros to the right.
4180    E.g. if x is an unsigned 32 bit number:
4181    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4182    */
4183
4184 rtx
4185 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4186                rtx op0, rtx op1, rtx target, int unsignedp)
4187 {
4188   machine_mode compute_mode;
4189   rtx tquotient;
4190   rtx quotient = 0, remainder = 0;
4191   rtx_insn *last;
4192   rtx_insn *insn;
4193   optab optab1, optab2;
4194   int op1_is_constant, op1_is_pow2 = 0;
4195   int max_cost, extra_cost;
4196   static HOST_WIDE_INT last_div_const = 0;
4197   bool speed = optimize_insn_for_speed_p ();
4198
4199   op1_is_constant = CONST_INT_P (op1);
4200   if (op1_is_constant)
4201     {
4202       wide_int ext_op1 = rtx_mode_t (op1, mode);
4203       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4204                      || (! unsignedp
4205                          && wi::popcount (wi::neg (ext_op1)) == 1));
4206     }
4207
4208   /*
4209      This is the structure of expand_divmod:
4210
4211      First comes code to fix up the operands so we can perform the operations
4212      correctly and efficiently.
4213
4214      Second comes a switch statement with code specific for each rounding mode.
4215      For some special operands this code emits all RTL for the desired
4216      operation, for other cases, it generates only a quotient and stores it in
4217      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4218      to indicate that it has not done anything.
4219
4220      Last comes code that finishes the operation.  If QUOTIENT is set and
4221      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4222      QUOTIENT is not set, it is computed using trunc rounding.
4223
4224      We try to generate special code for division and remainder when OP1 is a
4225      constant.  If |OP1| = 2**n we can use shifts and some other fast
4226      operations.  For other values of OP1, we compute a carefully selected
4227      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4228      by m.
4229
4230      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4231      half of the product.  Different strategies for generating the product are
4232      implemented in expmed_mult_highpart.
4233
4234      If what we actually want is the remainder, we generate that by another
4235      by-constant multiplication and a subtraction.  */
4236
4237   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4238      code below will malfunction if we are, so check here and handle
4239      the special case if so.  */
4240   if (op1 == const1_rtx)
4241     return rem_flag ? const0_rtx : op0;
4242
4243     /* When dividing by -1, we could get an overflow.
4244      negv_optab can handle overflows.  */
4245   if (! unsignedp && op1 == constm1_rtx)
4246     {
4247       if (rem_flag)
4248         return const0_rtx;
4249       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4250                           ? negv_optab : neg_optab, op0, target, 0);
4251     }
4252
4253   if (target
4254       /* Don't use the function value register as a target
4255          since we have to read it as well as write it,
4256          and function-inlining gets confused by this.  */
4257       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4258           /* Don't clobber an operand while doing a multi-step calculation.  */
4259           || ((rem_flag || op1_is_constant)
4260               && (reg_mentioned_p (target, op0)
4261                   || (MEM_P (op0) && MEM_P (target))))
4262           || reg_mentioned_p (target, op1)
4263           || (MEM_P (op1) && MEM_P (target))))
4264     target = 0;
4265
4266   /* Get the mode in which to perform this computation.  Normally it will
4267      be MODE, but sometimes we can't do the desired operation in MODE.
4268      If so, pick a wider mode in which we can do the operation.  Convert
4269      to that mode at the start to avoid repeated conversions.
4270
4271      First see what operations we need.  These depend on the expression
4272      we are evaluating.  (We assume that divxx3 insns exist under the
4273      same conditions that modxx3 insns and that these insns don't normally
4274      fail.  If these assumptions are not correct, we may generate less
4275      efficient code in some cases.)
4276
4277      Then see if we find a mode in which we can open-code that operation
4278      (either a division, modulus, or shift).  Finally, check for the smallest
4279      mode for which we can do the operation with a library call.  */
4280
4281   /* We might want to refine this now that we have division-by-constant
4282      optimization.  Since expmed_mult_highpart tries so many variants, it is
4283      not straightforward to generalize this.  Maybe we should make an array
4284      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4285
4286   optab1 = (op1_is_pow2
4287             ? (unsignedp ? lshr_optab : ashr_optab)
4288             : (unsignedp ? udiv_optab : sdiv_optab));
4289   optab2 = (op1_is_pow2 ? optab1
4290             : (unsignedp ? udivmod_optab : sdivmod_optab));
4291
4292   FOR_EACH_MODE_FROM (compute_mode, mode)
4293     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4294         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4295       break;
4296
4297   if (compute_mode == VOIDmode)
4298     FOR_EACH_MODE_FROM (compute_mode, mode)
4299       if (optab_libfunc (optab1, compute_mode)
4300           || optab_libfunc (optab2, compute_mode))
4301         break;
4302
4303   /* If we still couldn't find a mode, use MODE, but expand_binop will
4304      probably die.  */
4305   if (compute_mode == VOIDmode)
4306     compute_mode = mode;
4307
4308   if (target && GET_MODE (target) == compute_mode)
4309     tquotient = target;
4310   else
4311     tquotient = gen_reg_rtx (compute_mode);
4312
4313 #if 0
4314   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4315      (mode), and thereby get better code when OP1 is a constant.  Do that
4316      later.  It will require going over all usages of SIZE below.  */
4317   size = GET_MODE_BITSIZE (mode);
4318 #endif
4319
4320   /* Only deduct something for a REM if the last divide done was
4321      for a different constant.   Then set the constant of the last
4322      divide.  */
4323   max_cost = (unsignedp
4324               ? udiv_cost (speed, compute_mode)
4325               : sdiv_cost (speed, compute_mode));
4326   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4327                      && INTVAL (op1) == last_div_const))
4328     max_cost -= (mul_cost (speed, compute_mode)
4329                  + add_cost (speed, compute_mode));
4330
4331   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4332
4333   /* Now convert to the best mode to use.  */
4334   if (compute_mode != mode)
4335     {
4336       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4337       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4338
4339       /* convert_modes may have placed op1 into a register, so we
4340          must recompute the following.  */
4341       op1_is_constant = CONST_INT_P (op1);
4342       if (op1_is_constant)
4343         {
4344           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4345           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4346                          || (! unsignedp
4347                              && wi::popcount (wi::neg (ext_op1)) == 1));
4348         }
4349       else
4350         op1_is_pow2 = 0;
4351     }
4352
4353   /* If one of the operands is a volatile MEM, copy it into a register.  */
4354
4355   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4356     op0 = force_reg (compute_mode, op0);
4357   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4358     op1 = force_reg (compute_mode, op1);
4359
4360   /* If we need the remainder or if OP1 is constant, we need to
4361      put OP0 in a register in case it has any queued subexpressions.  */
4362   if (rem_flag || op1_is_constant)
4363     op0 = force_reg (compute_mode, op0);
4364
4365   last = get_last_insn ();
4366
4367   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4368   if (unsignedp)
4369     {
4370       if (code == FLOOR_DIV_EXPR)
4371         code = TRUNC_DIV_EXPR;
4372       if (code == FLOOR_MOD_EXPR)
4373         code = TRUNC_MOD_EXPR;
4374       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4375         code = TRUNC_DIV_EXPR;
4376     }
4377
4378   if (op1 != const0_rtx)
4379     switch (code)
4380       {
4381       case TRUNC_MOD_EXPR:
4382       case TRUNC_DIV_EXPR:
4383         if (op1_is_constant)
4384           {
4385             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4386             int size = GET_MODE_BITSIZE (int_mode);
4387             if (unsignedp)
4388               {
4389                 unsigned HOST_WIDE_INT mh, ml;
4390                 int pre_shift, post_shift;
4391                 int dummy;
4392                 wide_int wd = rtx_mode_t (op1, int_mode);
4393                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4394
4395                 if (wi::popcount (wd) == 1)
4396                   {
4397                     pre_shift = floor_log2 (d);
4398                     if (rem_flag)
4399                       {
4400                         unsigned HOST_WIDE_INT mask
4401                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4402                         remainder
4403                           = expand_binop (int_mode, and_optab, op0,
4404                                           gen_int_mode (mask, int_mode),
4405                                           remainder, 1,
4406                                           OPTAB_LIB_WIDEN);
4407                         if (remainder)
4408                           return gen_lowpart (mode, remainder);
4409                       }
4410                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4411                                              pre_shift, tquotient, 1);
4412                   }
4413                 else if (size <= HOST_BITS_PER_WIDE_INT)
4414                   {
4415                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4416                       {
4417                         /* Most significant bit of divisor is set; emit an scc
4418                            insn.  */
4419                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4420                                                           int_mode, 1, 1);
4421                       }
4422                     else
4423                       {
4424                         /* Find a suitable multiplier and right shift count
4425                            instead of multiplying with D.  */
4426
4427                         mh = choose_multiplier (d, size, size,
4428                                                 &ml, &post_shift, &dummy);
4429
4430                         /* If the suggested multiplier is more than SIZE bits,
4431                            we can do better for even divisors, using an
4432                            initial right shift.  */
4433                         if (mh != 0 && (d & 1) == 0)
4434                           {
4435                             pre_shift = ctz_or_zero (d);
4436                             mh = choose_multiplier (d >> pre_shift, size,
4437                                                     size - pre_shift,
4438                                                     &ml, &post_shift, &dummy);
4439                             gcc_assert (!mh);
4440                           }
4441                         else
4442                           pre_shift = 0;
4443
4444                         if (mh != 0)
4445                           {
4446                             rtx t1, t2, t3, t4;
4447
4448                             if (post_shift - 1 >= BITS_PER_WORD)
4449                               goto fail1;
4450
4451                             extra_cost
4452                               = (shift_cost (speed, int_mode, post_shift - 1)
4453                                  + shift_cost (speed, int_mode, 1)
4454                                  + 2 * add_cost (speed, int_mode));
4455                             t1 = expmed_mult_highpart
4456                               (int_mode, op0, gen_int_mode (ml, int_mode),
4457                                NULL_RTX, 1, max_cost - extra_cost);
4458                             if (t1 == 0)
4459                               goto fail1;
4460                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4461                                                                op0, t1),
4462                                                 NULL_RTX);
4463                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4464                                                t2, 1, NULL_RTX, 1);
4465                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4466                                                               t1, t3),
4467                                                 NULL_RTX);
4468                             quotient = expand_shift
4469                               (RSHIFT_EXPR, int_mode, t4,
4470                                post_shift - 1, tquotient, 1);
4471                           }
4472                         else
4473                           {
4474                             rtx t1, t2;
4475
4476                             if (pre_shift >= BITS_PER_WORD
4477                                 || post_shift >= BITS_PER_WORD)
4478                               goto fail1;
4479
4480                             t1 = expand_shift
4481                               (RSHIFT_EXPR, int_mode, op0,
4482                                pre_shift, NULL_RTX, 1);
4483                             extra_cost
4484                               = (shift_cost (speed, int_mode, pre_shift)
4485                                  + shift_cost (speed, int_mode, post_shift));
4486                             t2 = expmed_mult_highpart
4487                               (int_mode, t1,
4488                                gen_int_mode (ml, int_mode),
4489                                NULL_RTX, 1, max_cost - extra_cost);
4490                             if (t2 == 0)
4491                               goto fail1;
4492                             quotient = expand_shift
4493                               (RSHIFT_EXPR, int_mode, t2,
4494                                post_shift, tquotient, 1);
4495                           }
4496                       }
4497                   }
4498                 else            /* Too wide mode to use tricky code */
4499                   break;
4500
4501                 insn = get_last_insn ();
4502                 if (insn != last)
4503                   set_dst_reg_note (insn, REG_EQUAL,
4504                                     gen_rtx_UDIV (int_mode, op0, op1),
4505                                     quotient);
4506               }
4507             else                /* TRUNC_DIV, signed */
4508               {
4509                 unsigned HOST_WIDE_INT ml;
4510                 int lgup, post_shift;
4511                 rtx mlr;
4512                 HOST_WIDE_INT d = INTVAL (op1);
4513                 unsigned HOST_WIDE_INT abs_d;
4514
4515                 /* Not prepared to handle division/remainder by
4516                    0xffffffffffffffff8000000000000000 etc.  */
4517                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4518                   break;
4519
4520                 /* Since d might be INT_MIN, we have to cast to
4521                    unsigned HOST_WIDE_INT before negating to avoid
4522                    undefined signed overflow.  */
4523                 abs_d = (d >= 0
4524                          ? (unsigned HOST_WIDE_INT) d
4525                          : - (unsigned HOST_WIDE_INT) d);
4526
4527                 /* n rem d = n rem -d */
4528                 if (rem_flag && d < 0)
4529                   {
4530                     d = abs_d;
4531                     op1 = gen_int_mode (abs_d, int_mode);
4532                   }
4533
4534                 if (d == 1)
4535                   quotient = op0;
4536                 else if (d == -1)
4537                   quotient = expand_unop (int_mode, neg_optab, op0,
4538                                           tquotient, 0);
4539                 else if (size <= HOST_BITS_PER_WIDE_INT
4540                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4541                   {
4542                     /* This case is not handled correctly below.  */
4543                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4544                                                 int_mode, 1, 1);
4545                     if (quotient == 0)
4546                       goto fail1;
4547                   }
4548                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4549                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4550                          && (rem_flag
4551                              ? smod_pow2_cheap (speed, int_mode)
4552                              : sdiv_pow2_cheap (speed, int_mode))
4553                          /* We assume that cheap metric is true if the
4554                             optab has an expander for this mode.  */
4555                          && ((optab_handler ((rem_flag ? smod_optab
4556                                               : sdiv_optab),
4557                                              int_mode)
4558                               != CODE_FOR_nothing)
4559                              || (optab_handler (sdivmod_optab, int_mode)
4560                                  != CODE_FOR_nothing)))
4561                   ;
4562                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4563                   {
4564                     if (rem_flag)
4565                       {
4566                         remainder = expand_smod_pow2 (int_mode, op0, d);
4567                         if (remainder)
4568                           return gen_lowpart (mode, remainder);
4569                       }
4570
4571                     if (sdiv_pow2_cheap (speed, int_mode)
4572                         && ((optab_handler (sdiv_optab, int_mode)
4573                              != CODE_FOR_nothing)
4574                             || (optab_handler (sdivmod_optab, int_mode)
4575                                 != CODE_FOR_nothing)))
4576                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4577                                                 int_mode, op0,
4578                                                 gen_int_mode (abs_d,
4579                                                               int_mode),
4580                                                 NULL_RTX, 0);
4581                     else
4582                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4583
4584                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4585                        negate the quotient.  */
4586                     if (d < 0)
4587                       {
4588                         insn = get_last_insn ();
4589                         if (insn != last
4590                             && abs_d < (HOST_WIDE_INT_1U
4591                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4592                           set_dst_reg_note (insn, REG_EQUAL,
4593                                             gen_rtx_DIV (int_mode, op0,
4594                                                          gen_int_mode
4595                                                            (abs_d,
4596                                                             int_mode)),
4597                                             quotient);
4598
4599                         quotient = expand_unop (int_mode, neg_optab,
4600                                                 quotient, quotient, 0);
4601                       }
4602                   }
4603                 else if (size <= HOST_BITS_PER_WIDE_INT)
4604                   {
4605                     choose_multiplier (abs_d, size, size - 1,
4606                                        &ml, &post_shift, &lgup);
4607                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4608                       {
4609                         rtx t1, t2, t3;
4610
4611                         if (post_shift >= BITS_PER_WORD
4612                             || size - 1 >= BITS_PER_WORD)
4613                           goto fail1;
4614
4615                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4616                                       + shift_cost (speed, int_mode, size - 1)
4617                                       + add_cost (speed, int_mode));
4618                         t1 = expmed_mult_highpart
4619                           (int_mode, op0, gen_int_mode (ml, int_mode),
4620                            NULL_RTX, 0, max_cost - extra_cost);
4621                         if (t1 == 0)
4622                           goto fail1;
4623                         t2 = expand_shift
4624                           (RSHIFT_EXPR, int_mode, t1,
4625                            post_shift, NULL_RTX, 0);
4626                         t3 = expand_shift
4627                           (RSHIFT_EXPR, int_mode, op0,
4628                            size - 1, NULL_RTX, 0);
4629                         if (d < 0)
4630                           quotient
4631                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4632                                              tquotient);
4633                         else
4634                           quotient
4635                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4636                                              tquotient);
4637                       }
4638                     else
4639                       {
4640                         rtx t1, t2, t3, t4;
4641
4642                         if (post_shift >= BITS_PER_WORD
4643                             || size - 1 >= BITS_PER_WORD)
4644                           goto fail1;
4645
4646                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4647                         mlr = gen_int_mode (ml, int_mode);
4648                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4649                                       + shift_cost (speed, int_mode, size - 1)
4650                                       + 2 * add_cost (speed, int_mode));
4651                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4652                                                    NULL_RTX, 0,
4653                                                    max_cost - extra_cost);
4654                         if (t1 == 0)
4655                           goto fail1;
4656                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4657                                             NULL_RTX);
4658                         t3 = expand_shift
4659                           (RSHIFT_EXPR, int_mode, t2,
4660                            post_shift, NULL_RTX, 0);
4661                         t4 = expand_shift
4662                           (RSHIFT_EXPR, int_mode, op0,
4663                            size - 1, NULL_RTX, 0);
4664                         if (d < 0)
4665                           quotient
4666                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4667                                              tquotient);
4668                         else
4669                           quotient
4670                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4671                                              tquotient);
4672                       }
4673                   }
4674                 else            /* Too wide mode to use tricky code */
4675                   break;
4676
4677                 insn = get_last_insn ();
4678                 if (insn != last)
4679                   set_dst_reg_note (insn, REG_EQUAL,
4680                                     gen_rtx_DIV (int_mode, op0, op1),
4681                                     quotient);
4682               }
4683             break;
4684           }
4685       fail1:
4686         delete_insns_since (last);
4687         break;
4688
4689       case FLOOR_DIV_EXPR:
4690       case FLOOR_MOD_EXPR:
4691       /* We will come here only for signed operations.  */
4692         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4693           {
4694             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4695             int size = GET_MODE_BITSIZE (int_mode);
4696             unsigned HOST_WIDE_INT mh, ml;
4697             int pre_shift, lgup, post_shift;
4698             HOST_WIDE_INT d = INTVAL (op1);
4699
4700             if (d > 0)
4701               {
4702                 /* We could just as easily deal with negative constants here,
4703                    but it does not seem worth the trouble for GCC 2.6.  */
4704                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4705                   {
4706                     pre_shift = floor_log2 (d);
4707                     if (rem_flag)
4708                       {
4709                         unsigned HOST_WIDE_INT mask
4710                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4711                         remainder = expand_binop
4712                           (int_mode, and_optab, op0,
4713                            gen_int_mode (mask, int_mode),
4714                            remainder, 0, OPTAB_LIB_WIDEN);
4715                         if (remainder)
4716                           return gen_lowpart (mode, remainder);
4717                       }
4718                     quotient = expand_shift
4719                       (RSHIFT_EXPR, int_mode, op0,
4720                        pre_shift, tquotient, 0);
4721                   }
4722                 else
4723                   {
4724                     rtx t1, t2, t3, t4;
4725
4726                     mh = choose_multiplier (d, size, size - 1,
4727                                             &ml, &post_shift, &lgup);
4728                     gcc_assert (!mh);
4729
4730                     if (post_shift < BITS_PER_WORD
4731                         && size - 1 < BITS_PER_WORD)
4732                       {
4733                         t1 = expand_shift
4734                           (RSHIFT_EXPR, int_mode, op0,
4735                            size - 1, NULL_RTX, 0);
4736                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4737                                            NULL_RTX, 0, OPTAB_WIDEN);
4738                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4739                                       + shift_cost (speed, int_mode, size - 1)
4740                                       + 2 * add_cost (speed, int_mode));
4741                         t3 = expmed_mult_highpart
4742                           (int_mode, t2, gen_int_mode (ml, int_mode),
4743                            NULL_RTX, 1, max_cost - extra_cost);
4744                         if (t3 != 0)
4745                           {
4746                             t4 = expand_shift
4747                               (RSHIFT_EXPR, int_mode, t3,
4748                                post_shift, NULL_RTX, 1);
4749                             quotient = expand_binop (int_mode, xor_optab,
4750                                                      t4, t1, tquotient, 0,
4751                                                      OPTAB_WIDEN);
4752                           }
4753                       }
4754                   }
4755               }
4756             else
4757               {
4758                 rtx nsign, t1, t2, t3, t4;
4759                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4760                                                   op0, constm1_rtx), NULL_RTX);
4761                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4762                                    0, OPTAB_WIDEN);
4763                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4764                                       size - 1, NULL_RTX, 0);
4765                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4766                                     NULL_RTX);
4767                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4768                                     NULL_RTX, 0);
4769                 if (t4)
4770                   {
4771                     rtx t5;
4772                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4773                                       NULL_RTX, 0);
4774                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4775                                               tquotient);
4776                   }
4777               }
4778           }
4779
4780         if (quotient != 0)
4781           break;
4782         delete_insns_since (last);
4783
4784         /* Try using an instruction that produces both the quotient and
4785            remainder, using truncation.  We can easily compensate the quotient
4786            or remainder to get floor rounding, once we have the remainder.
4787            Notice that we compute also the final remainder value here,
4788            and return the result right away.  */
4789         if (target == 0 || GET_MODE (target) != compute_mode)
4790           target = gen_reg_rtx (compute_mode);
4791
4792         if (rem_flag)
4793           {
4794             remainder
4795               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4796             quotient = gen_reg_rtx (compute_mode);
4797           }
4798         else
4799           {
4800             quotient
4801               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4802             remainder = gen_reg_rtx (compute_mode);
4803           }
4804
4805         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4806                                  quotient, remainder, 0))
4807           {
4808             /* This could be computed with a branch-less sequence.
4809                Save that for later.  */
4810             rtx tem;
4811             rtx_code_label *label = gen_label_rtx ();
4812             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4813             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4814                                 NULL_RTX, 0, OPTAB_WIDEN);
4815             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4816             expand_dec (quotient, const1_rtx);
4817             expand_inc (remainder, op1);
4818             emit_label (label);
4819             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4820           }
4821
4822         /* No luck with division elimination or divmod.  Have to do it
4823            by conditionally adjusting op0 *and* the result.  */
4824         {
4825           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4826           rtx adjusted_op0;
4827           rtx tem;
4828
4829           quotient = gen_reg_rtx (compute_mode);
4830           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4831           label1 = gen_label_rtx ();
4832           label2 = gen_label_rtx ();
4833           label3 = gen_label_rtx ();
4834           label4 = gen_label_rtx ();
4835           label5 = gen_label_rtx ();
4836           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4837           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4838           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4839                               quotient, 0, OPTAB_LIB_WIDEN);
4840           if (tem != quotient)
4841             emit_move_insn (quotient, tem);
4842           emit_jump_insn (targetm.gen_jump (label5));
4843           emit_barrier ();
4844           emit_label (label1);
4845           expand_inc (adjusted_op0, const1_rtx);
4846           emit_jump_insn (targetm.gen_jump (label4));
4847           emit_barrier ();
4848           emit_label (label2);
4849           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4850           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4851                               quotient, 0, OPTAB_LIB_WIDEN);
4852           if (tem != quotient)
4853             emit_move_insn (quotient, tem);
4854           emit_jump_insn (targetm.gen_jump (label5));
4855           emit_barrier ();
4856           emit_label (label3);
4857           expand_dec (adjusted_op0, const1_rtx);
4858           emit_label (label4);
4859           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4860                               quotient, 0, OPTAB_LIB_WIDEN);
4861           if (tem != quotient)
4862             emit_move_insn (quotient, tem);
4863           expand_dec (quotient, const1_rtx);
4864           emit_label (label5);
4865         }
4866         break;
4867
4868       case CEIL_DIV_EXPR:
4869       case CEIL_MOD_EXPR:
4870         if (unsignedp)
4871           {
4872             if (op1_is_constant
4873                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4874                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4875                     || INTVAL (op1) >= 0))
4876               {
4877                 scalar_int_mode int_mode
4878                   = as_a <scalar_int_mode> (compute_mode);
4879                 rtx t1, t2, t3;
4880                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4881                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4882                                    floor_log2 (d), tquotient, 1);
4883                 t2 = expand_binop (int_mode, and_optab, op0,
4884                                    gen_int_mode (d - 1, int_mode),
4885                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4886                 t3 = gen_reg_rtx (int_mode);
4887                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4888                 if (t3 == 0)
4889                   {
4890                     rtx_code_label *lab;
4891                     lab = gen_label_rtx ();
4892                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4893                     expand_inc (t1, const1_rtx);
4894                     emit_label (lab);
4895                     quotient = t1;
4896                   }
4897                 else
4898                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4899                                             tquotient);
4900                 break;
4901               }
4902
4903             /* Try using an instruction that produces both the quotient and
4904                remainder, using truncation.  We can easily compensate the
4905                quotient or remainder to get ceiling rounding, once we have the
4906                remainder.  Notice that we compute also the final remainder
4907                value here, and return the result right away.  */
4908             if (target == 0 || GET_MODE (target) != compute_mode)
4909               target = gen_reg_rtx (compute_mode);
4910
4911             if (rem_flag)
4912               {
4913                 remainder = (REG_P (target)
4914                              ? target : gen_reg_rtx (compute_mode));
4915                 quotient = gen_reg_rtx (compute_mode);
4916               }
4917             else
4918               {
4919                 quotient = (REG_P (target)
4920                             ? target : gen_reg_rtx (compute_mode));
4921                 remainder = gen_reg_rtx (compute_mode);
4922               }
4923
4924             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4925                                      remainder, 1))
4926               {
4927                 /* This could be computed with a branch-less sequence.
4928                    Save that for later.  */
4929                 rtx_code_label *label = gen_label_rtx ();
4930                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4931                                  compute_mode, label);
4932                 expand_inc (quotient, const1_rtx);
4933                 expand_dec (remainder, op1);
4934                 emit_label (label);
4935                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4936               }
4937
4938             /* No luck with division elimination or divmod.  Have to do it
4939                by conditionally adjusting op0 *and* the result.  */
4940             {
4941               rtx_code_label *label1, *label2;
4942               rtx adjusted_op0, tem;
4943
4944               quotient = gen_reg_rtx (compute_mode);
4945               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4946               label1 = gen_label_rtx ();
4947               label2 = gen_label_rtx ();
4948               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4949                                compute_mode, label1);
4950               emit_move_insn  (quotient, const0_rtx);
4951               emit_jump_insn (targetm.gen_jump (label2));
4952               emit_barrier ();
4953               emit_label (label1);
4954               expand_dec (adjusted_op0, const1_rtx);
4955               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4956                                   quotient, 1, OPTAB_LIB_WIDEN);
4957               if (tem != quotient)
4958                 emit_move_insn (quotient, tem);
4959               expand_inc (quotient, const1_rtx);
4960               emit_label (label2);
4961             }
4962           }
4963         else /* signed */
4964           {
4965             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4966                 && INTVAL (op1) >= 0)
4967               {
4968                 /* This is extremely similar to the code for the unsigned case
4969                    above.  For 2.7 we should merge these variants, but for
4970                    2.6.1 I don't want to touch the code for unsigned since that
4971                    get used in C.  The signed case will only be used by other
4972                    languages (Ada).  */
4973
4974                 rtx t1, t2, t3;
4975                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4976                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4977                                    floor_log2 (d), tquotient, 0);
4978                 t2 = expand_binop (compute_mode, and_optab, op0,
4979                                    gen_int_mode (d - 1, compute_mode),
4980                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4981                 t3 = gen_reg_rtx (compute_mode);
4982                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4983                                       compute_mode, 1, 1);
4984                 if (t3 == 0)
4985                   {
4986                     rtx_code_label *lab;
4987                     lab = gen_label_rtx ();
4988                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4989                     expand_inc (t1, const1_rtx);
4990                     emit_label (lab);
4991                     quotient = t1;
4992                   }
4993                 else
4994                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4995                                                           t1, t3),
4996                                             tquotient);
4997                 break;
4998               }
4999
5000             /* Try using an instruction that produces both the quotient and
5001                remainder, using truncation.  We can easily compensate the
5002                quotient or remainder to get ceiling rounding, once we have the
5003                remainder.  Notice that we compute also the final remainder
5004                value here, and return the result right away.  */
5005             if (target == 0 || GET_MODE (target) != compute_mode)
5006               target = gen_reg_rtx (compute_mode);
5007             if (rem_flag)
5008               {
5009                 remainder= (REG_P (target)
5010                             ? target : gen_reg_rtx (compute_mode));
5011                 quotient = gen_reg_rtx (compute_mode);
5012               }
5013             else
5014               {
5015                 quotient = (REG_P (target)
5016                             ? target : gen_reg_rtx (compute_mode));
5017                 remainder = gen_reg_rtx (compute_mode);
5018               }
5019
5020             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5021                                      remainder, 0))
5022               {
5023                 /* This could be computed with a branch-less sequence.
5024                    Save that for later.  */
5025                 rtx tem;
5026                 rtx_code_label *label = gen_label_rtx ();
5027                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5028                                  compute_mode, label);
5029                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5030                                     NULL_RTX, 0, OPTAB_WIDEN);
5031                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5032                 expand_inc (quotient, const1_rtx);
5033                 expand_dec (remainder, op1);
5034                 emit_label (label);
5035                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5036               }
5037
5038             /* No luck with division elimination or divmod.  Have to do it
5039                by conditionally adjusting op0 *and* the result.  */
5040             {
5041               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5042               rtx adjusted_op0;
5043               rtx tem;
5044
5045               quotient = gen_reg_rtx (compute_mode);
5046               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5047               label1 = gen_label_rtx ();
5048               label2 = gen_label_rtx ();
5049               label3 = gen_label_rtx ();
5050               label4 = gen_label_rtx ();
5051               label5 = gen_label_rtx ();
5052               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5053               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5054                                compute_mode, label1);
5055               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5056                                   quotient, 0, OPTAB_LIB_WIDEN);
5057               if (tem != quotient)
5058                 emit_move_insn (quotient, tem);
5059               emit_jump_insn (targetm.gen_jump (label5));
5060               emit_barrier ();
5061               emit_label (label1);
5062               expand_dec (adjusted_op0, const1_rtx);
5063               emit_jump_insn (targetm.gen_jump (label4));
5064               emit_barrier ();
5065               emit_label (label2);
5066               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5067                                compute_mode, label3);
5068               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5069                                   quotient, 0, OPTAB_LIB_WIDEN);
5070               if (tem != quotient)
5071                 emit_move_insn (quotient, tem);
5072               emit_jump_insn (targetm.gen_jump (label5));
5073               emit_barrier ();
5074               emit_label (label3);
5075               expand_inc (adjusted_op0, const1_rtx);
5076               emit_label (label4);
5077               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5078                                   quotient, 0, OPTAB_LIB_WIDEN);
5079               if (tem != quotient)
5080                 emit_move_insn (quotient, tem);
5081               expand_inc (quotient, const1_rtx);
5082               emit_label (label5);
5083             }
5084           }
5085         break;
5086
5087       case EXACT_DIV_EXPR:
5088         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5089           {
5090             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5091             int size = GET_MODE_BITSIZE (int_mode);
5092             HOST_WIDE_INT d = INTVAL (op1);
5093             unsigned HOST_WIDE_INT ml;
5094             int pre_shift;
5095             rtx t1;
5096
5097             pre_shift = ctz_or_zero (d);
5098             ml = invert_mod2n (d >> pre_shift, size);
5099             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5100                                pre_shift, NULL_RTX, unsignedp);
5101             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5102                                     NULL_RTX, 1);
5103
5104             insn = get_last_insn ();
5105             set_dst_reg_note (insn, REG_EQUAL,
5106                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5107                                               int_mode, op0, op1),
5108                               quotient);
5109           }
5110         break;
5111
5112       case ROUND_DIV_EXPR:
5113       case ROUND_MOD_EXPR:
5114         if (unsignedp)
5115           {
5116             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5117             rtx tem;
5118             rtx_code_label *label;
5119             label = gen_label_rtx ();
5120             quotient = gen_reg_rtx (int_mode);
5121             remainder = gen_reg_rtx (int_mode);
5122             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5123               {
5124                 rtx tem;
5125                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5126                                          quotient, 1, OPTAB_LIB_WIDEN);
5127                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5128                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5129                                           remainder, 1, OPTAB_LIB_WIDEN);
5130               }
5131             tem = plus_constant (int_mode, op1, -1);
5132             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5133             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5134             expand_inc (quotient, const1_rtx);
5135             expand_dec (remainder, op1);
5136             emit_label (label);
5137           }
5138         else
5139           {
5140             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5141             int size = GET_MODE_BITSIZE (int_mode);
5142             rtx abs_rem, abs_op1, tem, mask;
5143             rtx_code_label *label;
5144             label = gen_label_rtx ();
5145             quotient = gen_reg_rtx (int_mode);
5146             remainder = gen_reg_rtx (int_mode);
5147             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5148               {
5149                 rtx tem;
5150                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5151                                          quotient, 0, OPTAB_LIB_WIDEN);
5152                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5153                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5154                                           remainder, 0, OPTAB_LIB_WIDEN);
5155               }
5156             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5157             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5158             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5159                                 1, NULL_RTX, 1);
5160             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5161             tem = expand_binop (int_mode, xor_optab, op0, op1,
5162                                 NULL_RTX, 0, OPTAB_WIDEN);
5163             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5164                                  size - 1, NULL_RTX, 0);
5165             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5166                                 NULL_RTX, 0, OPTAB_WIDEN);
5167             tem = expand_binop (int_mode, sub_optab, tem, mask,
5168                                 NULL_RTX, 0, OPTAB_WIDEN);
5169             expand_inc (quotient, tem);
5170             tem = expand_binop (int_mode, xor_optab, mask, op1,
5171                                 NULL_RTX, 0, OPTAB_WIDEN);
5172             tem = expand_binop (int_mode, sub_optab, tem, mask,
5173                                 NULL_RTX, 0, OPTAB_WIDEN);
5174             expand_dec (remainder, tem);
5175             emit_label (label);
5176           }
5177         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5178
5179       default:
5180         gcc_unreachable ();
5181       }
5182
5183   if (quotient == 0)
5184     {
5185       if (target && GET_MODE (target) != compute_mode)
5186         target = 0;
5187
5188       if (rem_flag)
5189         {
5190           /* Try to produce the remainder without producing the quotient.
5191              If we seem to have a divmod pattern that does not require widening,
5192              don't try widening here.  We should really have a WIDEN argument
5193              to expand_twoval_binop, since what we'd really like to do here is
5194              1) try a mod insn in compute_mode
5195              2) try a divmod insn in compute_mode
5196              3) try a div insn in compute_mode and multiply-subtract to get
5197                 remainder
5198              4) try the same things with widening allowed.  */
5199           remainder
5200             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5201                                  op0, op1, target,
5202                                  unsignedp,
5203                                  ((optab_handler (optab2, compute_mode)
5204                                    != CODE_FOR_nothing)
5205                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5206           if (remainder == 0)
5207             {
5208               /* No luck there.  Can we do remainder and divide at once
5209                  without a library call?  */
5210               remainder = gen_reg_rtx (compute_mode);
5211               if (! expand_twoval_binop ((unsignedp
5212                                           ? udivmod_optab
5213                                           : sdivmod_optab),
5214                                          op0, op1,
5215                                          NULL_RTX, remainder, unsignedp))
5216                 remainder = 0;
5217             }
5218
5219           if (remainder)
5220             return gen_lowpart (mode, remainder);
5221         }
5222
5223       /* Produce the quotient.  Try a quotient insn, but not a library call.
5224          If we have a divmod in this mode, use it in preference to widening
5225          the div (for this test we assume it will not fail). Note that optab2
5226          is set to the one of the two optabs that the call below will use.  */
5227       quotient
5228         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5229                              op0, op1, rem_flag ? NULL_RTX : target,
5230                              unsignedp,
5231                              ((optab_handler (optab2, compute_mode)
5232                                != CODE_FOR_nothing)
5233                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5234
5235       if (quotient == 0)
5236         {
5237           /* No luck there.  Try a quotient-and-remainder insn,
5238              keeping the quotient alone.  */
5239           quotient = gen_reg_rtx (compute_mode);
5240           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5241                                      op0, op1,
5242                                      quotient, NULL_RTX, unsignedp))
5243             {
5244               quotient = 0;
5245               if (! rem_flag)
5246                 /* Still no luck.  If we are not computing the remainder,
5247                    use a library call for the quotient.  */
5248                 quotient = sign_expand_binop (compute_mode,
5249                                               udiv_optab, sdiv_optab,
5250                                               op0, op1, target,
5251                                               unsignedp, OPTAB_LIB_WIDEN);
5252             }
5253         }
5254     }
5255
5256   if (rem_flag)
5257     {
5258       if (target && GET_MODE (target) != compute_mode)
5259         target = 0;
5260
5261       if (quotient == 0)
5262         {
5263           /* No divide instruction either.  Use library for remainder.  */
5264           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5265                                          op0, op1, target,
5266                                          unsignedp, OPTAB_LIB_WIDEN);
5267           /* No remainder function.  Try a quotient-and-remainder
5268              function, keeping the remainder.  */
5269           if (!remainder)
5270             {
5271               remainder = gen_reg_rtx (compute_mode);
5272               if (!expand_twoval_binop_libfunc
5273                   (unsignedp ? udivmod_optab : sdivmod_optab,
5274                    op0, op1,
5275                    NULL_RTX, remainder,
5276                    unsignedp ? UMOD : MOD))
5277                 remainder = NULL_RTX;
5278             }
5279         }
5280       else
5281         {
5282           /* We divided.  Now finish doing X - Y * (X / Y).  */
5283           remainder = expand_mult (compute_mode, quotient, op1,
5284                                    NULL_RTX, unsignedp);
5285           remainder = expand_binop (compute_mode, sub_optab, op0,
5286                                     remainder, target, unsignedp,
5287                                     OPTAB_LIB_WIDEN);
5288         }
5289     }
5290
5291   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5292 }
5293 \f
5294 /* Return a tree node with data type TYPE, describing the value of X.
5295    Usually this is an VAR_DECL, if there is no obvious better choice.
5296    X may be an expression, however we only support those expressions
5297    generated by loop.c.  */
5298
5299 tree
5300 make_tree (tree type, rtx x)
5301 {
5302   tree t;
5303
5304   switch (GET_CODE (x))
5305     {
5306     case CONST_INT:
5307     case CONST_WIDE_INT:
5308       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5309       return t;
5310
5311     case CONST_DOUBLE:
5312       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5313       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5314         t = wide_int_to_tree (type,
5315                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5316                                                     HOST_BITS_PER_WIDE_INT * 2));
5317       else
5318         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5319
5320       return t;
5321
5322     case CONST_VECTOR:
5323       {
5324         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5325         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5326         tree itype = TREE_TYPE (type);
5327
5328         /* Build a tree with vector elements.  */
5329         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5330         unsigned int count = elts.encoded_nelts ();
5331         for (unsigned int i = 0; i < count; ++i)
5332           {
5333             rtx elt = CONST_VECTOR_ELT (x, i);
5334             elts.quick_push (make_tree (itype, elt));
5335           }
5336
5337         return elts.build ();
5338       }
5339
5340     case PLUS:
5341       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5342                           make_tree (type, XEXP (x, 1)));
5343
5344     case MINUS:
5345       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5346                           make_tree (type, XEXP (x, 1)));
5347
5348     case NEG:
5349       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5350
5351     case MULT:
5352       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5353                           make_tree (type, XEXP (x, 1)));
5354
5355     case ASHIFT:
5356       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5357                           make_tree (type, XEXP (x, 1)));
5358
5359     case LSHIFTRT:
5360       t = unsigned_type_for (type);
5361       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5362                                          make_tree (t, XEXP (x, 0)),
5363                                          make_tree (type, XEXP (x, 1))));
5364
5365     case ASHIFTRT:
5366       t = signed_type_for (type);
5367       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5368                                          make_tree (t, XEXP (x, 0)),
5369                                          make_tree (type, XEXP (x, 1))));
5370
5371     case DIV:
5372       if (TREE_CODE (type) != REAL_TYPE)
5373         t = signed_type_for (type);
5374       else
5375         t = type;
5376
5377       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5378                                          make_tree (t, XEXP (x, 0)),
5379                                          make_tree (t, XEXP (x, 1))));
5380     case UDIV:
5381       t = unsigned_type_for (type);
5382       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5383                                          make_tree (t, XEXP (x, 0)),
5384                                          make_tree (t, XEXP (x, 1))));
5385
5386     case SIGN_EXTEND:
5387     case ZERO_EXTEND:
5388       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5389                                           GET_CODE (x) == ZERO_EXTEND);
5390       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5391
5392     case CONST:
5393       return make_tree (type, XEXP (x, 0));
5394
5395     case SYMBOL_REF:
5396       t = SYMBOL_REF_DECL (x);
5397       if (t)
5398         return fold_convert (type, build_fold_addr_expr (t));
5399       /* fall through.  */
5400
5401     default:
5402       if (CONST_POLY_INT_P (x))
5403         return wide_int_to_tree (t, const_poly_int_value (x));
5404
5405       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5406
5407       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5408          address mode to pointer mode.  */
5409       if (POINTER_TYPE_P (type))
5410         x = convert_memory_address_addr_space
5411           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5412
5413       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5414          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5415       t->decl_with_rtl.rtl = x;
5416
5417       return t;
5418     }
5419 }
5420 \f
5421 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5422    and returning TARGET.
5423
5424    If TARGET is 0, a pseudo-register or constant is returned.  */
5425
5426 rtx
5427 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5428 {
5429   rtx tem = 0;
5430
5431   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5432     tem = simplify_binary_operation (AND, mode, op0, op1);
5433   if (tem == 0)
5434     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5435
5436   if (target == 0)
5437     target = tem;
5438   else if (tem != target)
5439     emit_move_insn (target, tem);
5440   return target;
5441 }
5442
5443 /* Helper function for emit_store_flag.  */
5444 rtx
5445 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5446              machine_mode mode, machine_mode compare_mode,
5447              int unsignedp, rtx x, rtx y, int normalizep,
5448              machine_mode target_mode)
5449 {
5450   class expand_operand ops[4];
5451   rtx op0, comparison, subtarget;
5452   rtx_insn *last;
5453   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5454   scalar_int_mode int_target_mode;
5455
5456   last = get_last_insn ();
5457   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5458   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5459   if (!x || !y)
5460     {
5461       delete_insns_since (last);
5462       return NULL_RTX;
5463     }
5464
5465   if (target_mode == VOIDmode)
5466     int_target_mode = result_mode;
5467   else
5468     int_target_mode = as_a <scalar_int_mode> (target_mode);
5469   if (!target)
5470     target = gen_reg_rtx (int_target_mode);
5471
5472   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5473
5474   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5475   create_fixed_operand (&ops[1], comparison);
5476   create_fixed_operand (&ops[2], x);
5477   create_fixed_operand (&ops[3], y);
5478   if (!maybe_expand_insn (icode, 4, ops))
5479     {
5480       delete_insns_since (last);
5481       return NULL_RTX;
5482     }
5483   subtarget = ops[0].value;
5484
5485   /* If we are converting to a wider mode, first convert to
5486      INT_TARGET_MODE, then normalize.  This produces better combining
5487      opportunities on machines that have a SIGN_EXTRACT when we are
5488      testing a single bit.  This mostly benefits the 68k.
5489
5490      If STORE_FLAG_VALUE does not have the sign bit set when
5491      interpreted in MODE, we can do this conversion as unsigned, which
5492      is usually more efficient.  */
5493   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5494     {
5495       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5496                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5497
5498       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5499       convert_move (target, subtarget, unsignedp);
5500
5501       op0 = target;
5502       result_mode = int_target_mode;
5503     }
5504   else
5505     op0 = subtarget;
5506
5507   /* If we want to keep subexpressions around, don't reuse our last
5508      target.  */
5509   if (optimize)
5510     subtarget = 0;
5511
5512   /* Now normalize to the proper value in MODE.  Sometimes we don't
5513      have to do anything.  */
5514   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5515     ;
5516   /* STORE_FLAG_VALUE might be the most negative number, so write
5517      the comparison this way to avoid a compiler-time warning.  */
5518   else if (- normalizep == STORE_FLAG_VALUE)
5519     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5520
5521   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5522      it hard to use a value of just the sign bit due to ANSI integer
5523      constant typing rules.  */
5524   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5525     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5526                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5527                         normalizep == 1);
5528   else
5529     {
5530       gcc_assert (STORE_FLAG_VALUE & 1);
5531
5532       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5533       if (normalizep == -1)
5534         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5535     }
5536
5537   /* If we were converting to a smaller mode, do the conversion now.  */
5538   if (int_target_mode != result_mode)
5539     {
5540       convert_move (target, op0, 0);
5541       return target;
5542     }
5543   else
5544     return op0;
5545 }
5546
5547
5548 /* A subroutine of emit_store_flag only including "tricks" that do not
5549    need a recursive call.  These are kept separate to avoid infinite
5550    loops.  */
5551
5552 static rtx
5553 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5554                    machine_mode mode, int unsignedp, int normalizep,
5555                    machine_mode target_mode)
5556 {
5557   rtx subtarget;
5558   enum insn_code icode;
5559   machine_mode compare_mode;
5560   enum mode_class mclass;
5561   enum rtx_code scode;
5562
5563   if (unsignedp)
5564     code = unsigned_condition (code);
5565   scode = swap_condition (code);
5566
5567   /* If one operand is constant, make it the second one.  Only do this
5568      if the other operand is not constant as well.  */
5569
5570   if (swap_commutative_operands_p (op0, op1))
5571     {
5572       std::swap (op0, op1);
5573       code = swap_condition (code);
5574     }
5575
5576   if (mode == VOIDmode)
5577     mode = GET_MODE (op0);
5578
5579   if (CONST_SCALAR_INT_P (op1))
5580     canonicalize_comparison (mode, &code, &op1);
5581
5582   /* For some comparisons with 1 and -1, we can convert this to
5583      comparisons with zero.  This will often produce more opportunities for
5584      store-flag insns.  */
5585
5586   switch (code)
5587     {
5588     case LT:
5589       if (op1 == const1_rtx)
5590         op1 = const0_rtx, code = LE;
5591       break;
5592     case LE:
5593       if (op1 == constm1_rtx)
5594         op1 = const0_rtx, code = LT;
5595       break;
5596     case GE:
5597       if (op1 == const1_rtx)
5598         op1 = const0_rtx, code = GT;
5599       break;
5600     case GT:
5601       if (op1 == constm1_rtx)
5602         op1 = const0_rtx, code = GE;
5603       break;
5604     case GEU:
5605       if (op1 == const1_rtx)
5606         op1 = const0_rtx, code = NE;
5607       break;
5608     case LTU:
5609       if (op1 == const1_rtx)
5610         op1 = const0_rtx, code = EQ;
5611       break;
5612     default:
5613       break;
5614     }
5615
5616   /* If we are comparing a double-word integer with zero or -1, we can
5617      convert the comparison into one involving a single word.  */
5618   scalar_int_mode int_mode;
5619   if (is_int_mode (mode, &int_mode)
5620       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5621       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5622     {
5623       rtx tem;
5624       if ((code == EQ || code == NE)
5625           && (op1 == const0_rtx || op1 == constm1_rtx))
5626         {
5627           rtx op00, op01;
5628
5629           /* Do a logical OR or AND of the two words and compare the
5630              result.  */
5631           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5632           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5633           tem = expand_binop (word_mode,
5634                               op1 == const0_rtx ? ior_optab : and_optab,
5635                               op00, op01, NULL_RTX, unsignedp,
5636                               OPTAB_DIRECT);
5637
5638           if (tem != 0)
5639             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5640                                    unsignedp, normalizep);
5641         }
5642       else if ((code == LT || code == GE) && op1 == const0_rtx)
5643         {
5644           rtx op0h;
5645
5646           /* If testing the sign bit, can just test on high word.  */
5647           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5648                                       subreg_highpart_offset (word_mode,
5649                                                               int_mode));
5650           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5651                                  unsignedp, normalizep);
5652         }
5653       else
5654         tem = NULL_RTX;
5655
5656       if (tem)
5657         {
5658           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5659             return tem;
5660           if (!target)
5661             target = gen_reg_rtx (target_mode);
5662
5663           convert_move (target, tem,
5664                         !val_signbit_known_set_p (word_mode,
5665                                                   (normalizep ? normalizep
5666                                                    : STORE_FLAG_VALUE)));
5667           return target;
5668         }
5669     }
5670
5671   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5672      complement of A (for GE) and shifting the sign bit to the low bit.  */
5673   if (op1 == const0_rtx && (code == LT || code == GE)
5674       && is_int_mode (mode, &int_mode)
5675       && (normalizep || STORE_FLAG_VALUE == 1
5676           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5677     {
5678       scalar_int_mode int_target_mode;
5679       subtarget = target;
5680
5681       if (!target)
5682         int_target_mode = int_mode;
5683       else
5684         {
5685           /* If the result is to be wider than OP0, it is best to convert it
5686              first.  If it is to be narrower, it is *incorrect* to convert it
5687              first.  */
5688           int_target_mode = as_a <scalar_int_mode> (target_mode);
5689           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5690             {
5691               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5692               int_mode = int_target_mode;
5693             }
5694         }
5695
5696       if (int_target_mode != int_mode)
5697         subtarget = 0;
5698
5699       if (code == GE)
5700         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5701                            ((STORE_FLAG_VALUE == 1 || normalizep)
5702                             ? 0 : subtarget), 0);
5703
5704       if (STORE_FLAG_VALUE == 1 || normalizep)
5705         /* If we are supposed to produce a 0/1 value, we want to do
5706            a logical shift from the sign bit to the low-order bit; for
5707            a -1/0 value, we do an arithmetic shift.  */
5708         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5709                             GET_MODE_BITSIZE (int_mode) - 1,
5710                             subtarget, normalizep != -1);
5711
5712       if (int_mode != int_target_mode)
5713         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5714
5715       return op0;
5716     }
5717
5718   mclass = GET_MODE_CLASS (mode);
5719   FOR_EACH_MODE_FROM (compare_mode, mode)
5720     {
5721      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5722      icode = optab_handler (cstore_optab, optab_mode);
5723      if (icode != CODE_FOR_nothing)
5724         {
5725           do_pending_stack_adjust ();
5726           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5727                                  unsignedp, op0, op1, normalizep, target_mode);
5728           if (tem)
5729             return tem;
5730
5731           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5732             {
5733               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5734                                  unsignedp, op1, op0, normalizep, target_mode);
5735               if (tem)
5736                 return tem;
5737             }
5738           break;
5739         }
5740     }
5741
5742   return 0;
5743 }
5744
5745 /* Subroutine of emit_store_flag that handles cases in which the operands
5746    are scalar integers.  SUBTARGET is the target to use for temporary
5747    operations and TRUEVAL is the value to store when the condition is
5748    true.  All other arguments are as for emit_store_flag.  */
5749
5750 rtx
5751 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5752                      rtx op1, scalar_int_mode mode, int unsignedp,
5753                      int normalizep, rtx trueval)
5754 {
5755   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5756   rtx_insn *last = get_last_insn ();
5757
5758   /* If this is an equality comparison of integers, we can try to exclusive-or
5759      (or subtract) the two operands and use a recursive call to try the
5760      comparison with zero.  Don't do any of these cases if branches are
5761      very cheap.  */
5762
5763   if ((code == EQ || code == NE) && op1 != const0_rtx)
5764     {
5765       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5766                               OPTAB_WIDEN);
5767
5768       if (tem == 0)
5769         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5770                             OPTAB_WIDEN);
5771       if (tem != 0)
5772         tem = emit_store_flag (target, code, tem, const0_rtx,
5773                                mode, unsignedp, normalizep);
5774       if (tem != 0)
5775         return tem;
5776
5777       delete_insns_since (last);
5778     }
5779
5780   /* For integer comparisons, try the reverse comparison.  However, for
5781      small X and if we'd have anyway to extend, implementing "X != 0"
5782      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5783   rtx_code rcode = reverse_condition (code);
5784   if (can_compare_p (rcode, mode, ccp_store_flag)
5785       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5786             && code == NE
5787             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5788             && op1 == const0_rtx))
5789     {
5790       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5791                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5792
5793       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5794       if (want_add
5795           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5796                        optimize_insn_for_speed_p ()) == 0)
5797         {
5798           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5799                                        STORE_FLAG_VALUE, target_mode);
5800           if (tem != 0)
5801             tem = expand_binop (target_mode, add_optab, tem,
5802                                 gen_int_mode (normalizep, target_mode),
5803                                 target, 0, OPTAB_WIDEN);
5804           if (tem != 0)
5805             return tem;
5806         }
5807       else if (!want_add
5808                && rtx_cost (trueval, mode, XOR, 1,
5809                             optimize_insn_for_speed_p ()) == 0)
5810         {
5811           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5812                                        normalizep, target_mode);
5813           if (tem != 0)
5814             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5815                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5816           if (tem != 0)
5817             return tem;
5818         }
5819
5820       delete_insns_since (last);
5821     }
5822
5823   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5824      the constant zero.  Reject all other comparisons at this point.  Only
5825      do LE and GT if branches are expensive since they are expensive on
5826      2-operand machines.  */
5827
5828   if (op1 != const0_rtx
5829       || (code != EQ && code != NE
5830           && (BRANCH_COST (optimize_insn_for_speed_p (),
5831                            false) <= 1 || (code != LE && code != GT))))
5832     return 0;
5833
5834   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5835      do the necessary operation below.  */
5836
5837   rtx tem = 0;
5838
5839   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5840      the sign bit set.  */
5841
5842   if (code == LE)
5843     {
5844       /* This is destructive, so SUBTARGET can't be OP0.  */
5845       if (rtx_equal_p (subtarget, op0))
5846         subtarget = 0;
5847
5848       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5849                           OPTAB_WIDEN);
5850       if (tem)
5851         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5852                             OPTAB_WIDEN);
5853     }
5854
5855   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5856      number of bits in the mode of OP0, minus one.  */
5857
5858   if (code == GT)
5859     {
5860       if (rtx_equal_p (subtarget, op0))
5861         subtarget = 0;
5862
5863       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5864                                 GET_MODE_BITSIZE (mode) - 1,
5865                                 subtarget, 0);
5866       if (tem)
5867         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5868                             OPTAB_WIDEN);
5869     }
5870
5871   if (code == EQ || code == NE)
5872     {
5873       /* For EQ or NE, one way to do the comparison is to apply an operation
5874          that converts the operand into a positive number if it is nonzero
5875          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5876          for NE we negate.  This puts the result in the sign bit.  Then we
5877          normalize with a shift, if needed.
5878
5879          Two operations that can do the above actions are ABS and FFS, so try
5880          them.  If that doesn't work, and MODE is smaller than a full word,
5881          we can use zero-extension to the wider mode (an unsigned conversion)
5882          as the operation.  */
5883
5884       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5885          that is compensated by the subsequent overflow when subtracting
5886          one / negating.  */
5887
5888       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5889         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5890       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5891         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5892       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5893         {
5894           tem = convert_modes (word_mode, mode, op0, 1);
5895           mode = word_mode;
5896         }
5897
5898       if (tem != 0)
5899         {
5900           if (code == EQ)
5901             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5902                                 0, OPTAB_WIDEN);
5903           else
5904             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5905         }
5906
5907       /* If we couldn't do it that way, for NE we can "or" the two's complement
5908          of the value with itself.  For EQ, we take the one's complement of
5909          that "or", which is an extra insn, so we only handle EQ if branches
5910          are expensive.  */
5911
5912       if (tem == 0
5913           && (code == NE
5914               || BRANCH_COST (optimize_insn_for_speed_p (),
5915                               false) > 1))
5916         {
5917           if (rtx_equal_p (subtarget, op0))
5918             subtarget = 0;
5919
5920           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5921           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5922                               OPTAB_WIDEN);
5923
5924           if (tem && code == EQ)
5925             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5926         }
5927     }
5928
5929   if (tem && normalizep)
5930     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5931                               GET_MODE_BITSIZE (mode) - 1,
5932                               subtarget, normalizep == 1);
5933
5934   if (tem)
5935     {
5936       if (!target)
5937         ;
5938       else if (GET_MODE (tem) != target_mode)
5939         {
5940           convert_move (target, tem, 0);
5941           tem = target;
5942         }
5943       else if (!subtarget)
5944         {
5945           emit_move_insn (target, tem);
5946           tem = target;
5947         }
5948     }
5949   else
5950     delete_insns_since (last);
5951
5952   return tem;
5953 }
5954
5955 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5956    and storing in TARGET.  Normally return TARGET.
5957    Return 0 if that cannot be done.
5958
5959    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5960    it is VOIDmode, they cannot both be CONST_INT.
5961
5962    UNSIGNEDP is for the case where we have to widen the operands
5963    to perform the operation.  It says to use zero-extension.
5964
5965    NORMALIZEP is 1 if we should convert the result to be either zero
5966    or one.  Normalize is -1 if we should convert the result to be
5967    either zero or -1.  If NORMALIZEP is zero, the result will be left
5968    "raw" out of the scc insn.  */
5969
5970 rtx
5971 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5972                  machine_mode mode, int unsignedp, int normalizep)
5973 {
5974   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5975   enum rtx_code rcode;
5976   rtx subtarget;
5977   rtx tem, trueval;
5978   rtx_insn *last;
5979
5980   /* If we compare constants, we shouldn't use a store-flag operation,
5981      but a constant load.  We can get there via the vanilla route that
5982      usually generates a compare-branch sequence, but will in this case
5983      fold the comparison to a constant, and thus elide the branch.  */
5984   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5985     return NULL_RTX;
5986
5987   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5988                            target_mode);
5989   if (tem)
5990     return tem;
5991
5992   /* If we reached here, we can't do this with a scc insn, however there
5993      are some comparisons that can be done in other ways.  Don't do any
5994      of these cases if branches are very cheap.  */
5995   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5996     return 0;
5997
5998   /* See what we need to return.  We can only return a 1, -1, or the
5999      sign bit.  */
6000
6001   if (normalizep == 0)
6002     {
6003       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6004         normalizep = STORE_FLAG_VALUE;
6005
6006       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6007         ;
6008       else
6009         return 0;
6010     }
6011
6012   last = get_last_insn ();
6013
6014   /* If optimizing, use different pseudo registers for each insn, instead
6015      of reusing the same pseudo.  This leads to better CSE, but slows
6016      down the compiler, since there are more pseudos.  */
6017   subtarget = (!optimize
6018                && (target_mode == mode)) ? target : NULL_RTX;
6019   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6020
6021   /* For floating-point comparisons, try the reverse comparison or try
6022      changing the "orderedness" of the comparison.  */
6023   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6024     {
6025       enum rtx_code first_code;
6026       bool and_them;
6027
6028       rcode = reverse_condition_maybe_unordered (code);
6029       if (can_compare_p (rcode, mode, ccp_store_flag)
6030           && (code == ORDERED || code == UNORDERED
6031               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6032               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6033         {
6034           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6035                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6036
6037           /* For the reverse comparison, use either an addition or a XOR.  */
6038           if (want_add
6039               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6040                            optimize_insn_for_speed_p ()) == 0)
6041             {
6042               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6043                                        STORE_FLAG_VALUE, target_mode);
6044               if (tem)
6045                 return expand_binop (target_mode, add_optab, tem,
6046                                      gen_int_mode (normalizep, target_mode),
6047                                      target, 0, OPTAB_WIDEN);
6048             }
6049           else if (!want_add
6050                    && rtx_cost (trueval, mode, XOR, 1,
6051                                 optimize_insn_for_speed_p ()) == 0)
6052             {
6053               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6054                                        normalizep, target_mode);
6055               if (tem)
6056                 return expand_binop (target_mode, xor_optab, tem, trueval,
6057                                      target, INTVAL (trueval) >= 0,
6058                                      OPTAB_WIDEN);
6059             }
6060         }
6061
6062       delete_insns_since (last);
6063
6064       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6065       if (code == ORDERED || code == UNORDERED)
6066         return 0;
6067
6068       and_them = split_comparison (code, mode, &first_code, &code);
6069
6070       /* If there are no NaNs, the first comparison should always fall through.
6071          Effectively change the comparison to the other one.  */
6072       if (!HONOR_NANS (mode))
6073         {
6074           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6075           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6076                                     target_mode);
6077         }
6078
6079       if (!HAVE_conditional_move)
6080         return 0;
6081
6082       /* Do not turn a trapping comparison into a non-trapping one.  */
6083       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6084           && flag_trapping_math)
6085         return 0;
6086
6087       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6088          conditional move.  */
6089       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6090                                normalizep, target_mode);
6091       if (tem == 0)
6092         return 0;
6093
6094       if (and_them)
6095         tem = emit_conditional_move (target, code, op0, op1, mode,
6096                                      tem, const0_rtx, GET_MODE (tem), 0);
6097       else
6098         tem = emit_conditional_move (target, code, op0, op1, mode,
6099                                      trueval, tem, GET_MODE (tem), 0);
6100
6101       if (tem == 0)
6102         delete_insns_since (last);
6103       return tem;
6104     }
6105
6106   /* The remaining tricks only apply to integer comparisons.  */
6107
6108   scalar_int_mode int_mode;
6109   if (is_int_mode (mode, &int_mode))
6110     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6111                                 unsignedp, normalizep, trueval);
6112
6113   return 0;
6114 }
6115
6116 /* Like emit_store_flag, but always succeeds.  */
6117
6118 rtx
6119 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6120                        machine_mode mode, int unsignedp, int normalizep)
6121 {
6122   rtx tem;
6123   rtx_code_label *label;
6124   rtx trueval, falseval;
6125
6126   /* First see if emit_store_flag can do the job.  */
6127   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6128   if (tem != 0)
6129     return tem;
6130
6131   /* If one operand is constant, make it the second one.  Only do this
6132      if the other operand is not constant as well.  */
6133   if (swap_commutative_operands_p (op0, op1))
6134     {
6135       std::swap (op0, op1);
6136       code = swap_condition (code);
6137     }
6138
6139   if (mode == VOIDmode)
6140     mode = GET_MODE (op0);
6141
6142   if (!target)
6143     target = gen_reg_rtx (word_mode);
6144
6145   /* If this failed, we have to do this with set/compare/jump/set code.
6146      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6147   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6148   if (code == NE
6149       && GET_MODE_CLASS (mode) == MODE_INT
6150       && REG_P (target)
6151       && op0 == target
6152       && op1 == const0_rtx)
6153     {
6154       label = gen_label_rtx ();
6155       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6156                                NULL_RTX, NULL, label,
6157                                profile_probability::uninitialized ());
6158       emit_move_insn (target, trueval);
6159       emit_label (label);
6160       return target;
6161     }
6162
6163   if (!REG_P (target)
6164       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6165     target = gen_reg_rtx (GET_MODE (target));
6166
6167   /* Jump in the right direction if the target cannot implement CODE
6168      but can jump on its reverse condition.  */
6169   falseval = const0_rtx;
6170   if (! can_compare_p (code, mode, ccp_jump)
6171       && (! FLOAT_MODE_P (mode)
6172           || code == ORDERED || code == UNORDERED
6173           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6174           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6175     {
6176       enum rtx_code rcode;
6177       if (FLOAT_MODE_P (mode))
6178         rcode = reverse_condition_maybe_unordered (code);
6179       else
6180         rcode = reverse_condition (code);
6181
6182       /* Canonicalize to UNORDERED for the libcall.  */
6183       if (can_compare_p (rcode, mode, ccp_jump)
6184           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6185         {
6186           falseval = trueval;
6187           trueval = const0_rtx;
6188           code = rcode;
6189         }
6190     }
6191
6192   emit_move_insn (target, trueval);
6193   label = gen_label_rtx ();
6194   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6195                            label, profile_probability::uninitialized ());
6196
6197   emit_move_insn (target, falseval);
6198   emit_label (label);
6199
6200   return target;
6201 }
6202
6203 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6204    and exclusive ranges in order to create an equivalent comparison.  See
6205    canonicalize_cmp_for_target for the possible cases.  */
6206
6207 static enum rtx_code
6208 equivalent_cmp_code (enum rtx_code code)
6209 {
6210   switch (code)
6211     {
6212     case GT:
6213       return GE;
6214     case GE:
6215       return GT;
6216     case LT:
6217       return LE;
6218     case LE:
6219       return LT;
6220     case GTU:
6221       return GEU;
6222     case GEU:
6223       return GTU;
6224     case LTU:
6225       return LEU;
6226     case LEU:
6227       return LTU;
6228
6229     default:
6230       return code;
6231     }
6232 }
6233
6234 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6235    purpose of this is to end up with an immediate which can be loaded into a
6236    register in fewer moves, if possible.
6237
6238    For each integer comparison there exists an equivalent choice:
6239      i)   a >  b or a >= b + 1
6240      ii)  a <= b or a <  b + 1
6241      iii) a >= b or a >  b - 1
6242      iv)  a <  b or a <= b - 1
6243
6244    MODE is the mode of the first operand.
6245    CODE points to the comparison code.
6246    IMM points to the rtx containing the immediate.  *IMM must satisfy
6247    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6248    on exit.  */
6249
6250 void
6251 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6252 {
6253   if (!SCALAR_INT_MODE_P (mode))
6254     return;
6255
6256   int to_add = 0;
6257   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6258
6259   /* Extract the immediate value from the rtx.  */
6260   wide_int imm_val = rtx_mode_t (*imm, mode);
6261
6262   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6263     to_add = 1;
6264   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6265     to_add = -1;
6266   else
6267     return;
6268
6269   /* Check for overflow/underflow in the case of signed values and
6270      wrapping around in the case of unsigned values.  If any occur
6271      cancel the optimization.  */
6272   wi::overflow_type overflow = wi::OVF_NONE;
6273   wide_int imm_modif;
6274
6275   if (to_add == 1)
6276     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6277   else
6278     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6279
6280   if (overflow)
6281     return;
6282
6283   /* The following creates a pseudo; if we cannot do that, bail out.  */
6284   if (!can_create_pseudo_p ())
6285     return;
6286
6287   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6288   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6289
6290   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6291   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6292
6293   /* Update the immediate and the code.  */
6294   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6295     {
6296       *code = equivalent_cmp_code (*code);
6297       *imm = new_imm;
6298     }
6299 }
6300
6301
6302 \f
6303 /* Perform possibly multi-word comparison and conditional jump to LABEL
6304    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6305    now a thin wrapper around do_compare_rtx_and_jump.  */
6306
6307 static void
6308 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6309                  rtx_code_label *label)
6310 {
6311   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6312   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6313                            NULL, label, profile_probability::uninitialized ());
6314 }