gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2021 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46
  47 struct target_expmed default_target_expmed;
  48 #if SWITCHABLE_TARGET
  49 struct target_expmed *this_target_expmed = &default_target_expmed;
  50 #endif
  51
  52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  53                                       unsigned HOST_WIDE_INT,
  54                                       unsigned HOST_WIDE_INT,
  55                                       poly_uint64, poly_uint64,
  56                                       machine_mode, rtx, bool, bool);
  57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    poly_uint64, poly_uint64,
  61                                    rtx, scalar_int_mode, bool);
  62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  63                                      unsigned HOST_WIDE_INT,
  64                                      unsigned HOST_WIDE_INT,
  65                                      rtx, scalar_int_mode, bool);
  66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  67                                    unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    poly_uint64, poly_uint64,
  70                                    rtx, scalar_int_mode, bool);
  71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  72                                        unsigned HOST_WIDE_INT,
  73                                        unsigned HOST_WIDE_INT, int, rtx,
  74                                        machine_mode, machine_mode, bool, bool);
  75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  76                                     unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  79                                       unsigned HOST_WIDE_INT,
  80                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  83                                     unsigned HOST_WIDE_INT,
  84                                     unsigned HOST_WIDE_INT, int, bool);
  85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88
  89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  91    The mask is truncated if necessary to the width of mode MODE.  The
  92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  93
  94 static inline rtx
  95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  96 {
  97   return immed_wide_int_const
  98     (wi::shifted_mask (bitpos, bitsize, complement,
  99                        GET_MODE_PRECISION (mode)), mode);
 100 }
 101
 102 /* Test whether a value is zero of a power of two.  */
 103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 105
 106 struct init_expmed_rtl
 107 {
 108   rtx reg;
 109   rtx plus;
 110   rtx neg;
 111   rtx mult;
 112   rtx sdiv;
 113   rtx udiv;
 114   rtx sdiv_32;
 115   rtx smod_32;
 116   rtx wide_mult;
 117   rtx wide_lshr;
 118   rtx wide_trunc;
 119   rtx shift;
 120   rtx shift_mult;
 121   rtx shift_add;
 122   rtx shift_sub0;
 123   rtx shift_sub1;
 124   rtx zext;
 125   rtx trunc;
 126
 127   rtx pow2[MAX_BITS_PER_WORD];
 128   rtx cint[MAX_BITS_PER_WORD];
 129 };
 130
 131 static void
 132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 133                       scalar_int_mode from_mode, bool speed)
 134 {
 135   int to_size, from_size;
 136   rtx which;
 137
 138   to_size = GET_MODE_PRECISION (to_mode);
 139   from_size = GET_MODE_PRECISION (from_mode);
 140
 141   /* Most partial integers have a precision less than the "full"
 142      integer it requires for storage.  In case one doesn't, for
 143      comparison purposes here, reduce the bit size by one in that
 144      case.  */
 145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 146       && pow2p_hwi (to_size))
 147     to_size --;
 148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 149       && pow2p_hwi (from_size))
 150     from_size --;
 151
 152   /* Assume cost of zero-extend and sign-extend is the same.  */
 153   which = (to_size < from_size ? all->trunc : all->zext);
 154
 155   PUT_MODE (all->reg, from_mode);
 156   set_convert_cost (to_mode, from_mode, speed,
 157                     set_src_cost (which, to_mode, speed));
 158   /* Restore all->reg's mode.  */
 159   PUT_MODE (all->reg, to_mode);
 160 }
 161
 162 static void
 163 init_expmed_one_mode (struct init_expmed_rtl *all,
 164                       machine_mode mode, int speed)
 165 {
 166   int m, n, mode_bitsize;
 167   machine_mode mode_from;
 168
 169   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 170
 171   PUT_MODE (all->reg, mode);
 172   PUT_MODE (all->plus, mode);
 173   PUT_MODE (all->neg, mode);
 174   PUT_MODE (all->mult, mode);
 175   PUT_MODE (all->sdiv, mode);
 176   PUT_MODE (all->udiv, mode);
 177   PUT_MODE (all->sdiv_32, mode);
 178   PUT_MODE (all->smod_32, mode);
 179   PUT_MODE (all->wide_trunc, mode);
 180   PUT_MODE (all->shift, mode);
 181   PUT_MODE (all->shift_mult, mode);
 182   PUT_MODE (all->shift_add, mode);
 183   PUT_MODE (all->shift_sub0, mode);
 184   PUT_MODE (all->shift_sub1, mode);
 185   PUT_MODE (all->zext, mode);
 186   PUT_MODE (all->trunc, mode);
 187
 188   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 189   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 190   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 191   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 192   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 193
 194   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 195                                      <= 2 * add_cost (speed, mode)));
 196   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 197                                      <= 4 * add_cost (speed, mode)));
 198
 199   set_shift_cost (speed, mode, 0, 0);
 200   {
 201     int cost = add_cost (speed, mode);
 202     set_shiftadd_cost (speed, mode, 0, cost);
 203     set_shiftsub0_cost (speed, mode, 0, cost);
 204     set_shiftsub1_cost (speed, mode, 0, cost);
 205   }
 206
 207   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 208   for (m = 1; m < n; m++)
 209     {
 210       XEXP (all->shift, 1) = all->cint[m];
 211       XEXP (all->shift_mult, 1) = all->pow2[m];
 212
 213       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 214       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 215                                                        speed));
 216       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 217                                                         speed));
 218       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 219                                                         speed));
 220     }
 221
 222   scalar_int_mode int_mode_to;
 223   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 224     {
 225       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 226            mode_from = (machine_mode)(mode_from + 1))
 227         init_expmed_one_conv (all, int_mode_to,
 228                               as_a <scalar_int_mode> (mode_from), speed);
 229
 230       scalar_int_mode wider_mode;
 231       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 232           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 233         {
 234           PUT_MODE (all->reg, mode);
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1)
 239             = gen_int_shift_amount (wider_mode, mode_bitsize);
 240
 241           set_mul_widen_cost (speed, wider_mode,
 242                               set_src_cost (all->wide_mult, wider_mode, speed));
 243           set_mul_highpart_cost (speed, int_mode_to,
 244                                  set_src_cost (all->wide_trunc,
 245                                                int_mode_to, speed));
 246         }
 247     }
 248 }
 249
 250 void
 251 init_expmed (void)
 252 {
 253   struct init_expmed_rtl all;
 254   machine_mode mode = QImode;
 255   int m, speed;
 256
 257   memset (&all, 0, sizeof all);
 258   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 259     {
 260       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 261       all.cint[m] = GEN_INT (m);
 262     }
 263
 264   /* Avoid using hard regs in ways which may be unsupported.  */
 265   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 266   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 267   all.neg = gen_rtx_NEG (mode, all.reg);
 268   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 269   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 270   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 271   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 272   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 273   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 274   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 275   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 276   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 277   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 278   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 279   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 280   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 282   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 283
 284   for (speed = 0; speed < 2; speed++)
 285     {
 286       crtl->maybe_hot_insn_p = speed;
 287       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 288
 289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 290            mode = (machine_mode)(mode + 1))
 291         init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 294         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 295              mode = (machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297
 298       if (MIN_MODE_VECTOR_INT != VOIDmode)
 299         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 300              mode = (machine_mode)(mode + 1))
 301           init_expmed_one_mode (&all, mode, speed);
 302     }
 303
 304   if (alg_hash_used_p ())
 305     {
 306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 308     }
 309   else
 310     set_alg_hash_used_p (true);
 311   default_rtl_profile ();
 312
 313   ggc_free (all.trunc);
 314   ggc_free (all.shift_sub1);
 315   ggc_free (all.shift_sub0);
 316   ggc_free (all.shift_add);
 317   ggc_free (all.shift_mult);
 318   ggc_free (all.shift);
 319   ggc_free (all.wide_trunc);
 320   ggc_free (all.wide_lshr);
 321   ggc_free (all.wide_mult);
 322   ggc_free (all.zext);
 323   ggc_free (all.smod_32);
 324   ggc_free (all.sdiv_32);
 325   ggc_free (all.udiv);
 326   ggc_free (all.sdiv);
 327   ggc_free (all.mult);
 328   ggc_free (all.neg);
 329   ggc_free (all.plus);
 330   ggc_free (all.reg);
 331 }
 332
 333 /* Return an rtx representing minus the value of X.
 334    MODE is the intended mode of the result,
 335    useful if X is a CONST_INT.  */
 336
 337 rtx
 338 negate_rtx (machine_mode mode, rtx x)
 339 {
 340   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 341
 342   if (result == 0)
 343     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 344
 345   return result;
 346 }
 347
 348 /* Whether reverse storage order is supported on the target.  */
 349 static int reverse_storage_order_supported = -1;
 350
 351 /* Check whether reverse storage order is supported on the target.  */
 352
 353 static void
 354 check_reverse_storage_order_support (void)
 355 {
 356   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 357     {
 358       reverse_storage_order_supported = 0;
 359       sorry ("reverse scalar storage order");
 360     }
 361   else
 362     reverse_storage_order_supported = 1;
 363 }
 364
 365 /* Whether reverse FP storage order is supported on the target.  */
 366 static int reverse_float_storage_order_supported = -1;
 367
 368 /* Check whether reverse FP storage order is supported on the target.  */
 369
 370 static void
 371 check_reverse_float_storage_order_support (void)
 372 {
 373   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 374     {
 375       reverse_float_storage_order_supported = 0;
 376       sorry ("reverse floating-point scalar storage order");
 377     }
 378   else
 379     reverse_float_storage_order_supported = 1;
 380 }
 381
 382 /* Return an rtx representing value of X with reverse storage order.
 383    MODE is the intended mode of the result,
 384    useful if X is a CONST_INT.  */
 385
 386 rtx
 387 flip_storage_order (machine_mode mode, rtx x)
 388 {
 389   scalar_int_mode int_mode;
 390   rtx result;
 391
 392   if (mode == QImode)
 393     return x;
 394
 395   if (COMPLEX_MODE_P (mode))
 396     {
 397       rtx real = read_complex_part (x, false);
 398       rtx imag = read_complex_part (x, true);
 399
 400       real = flip_storage_order (GET_MODE_INNER (mode), real);
 401       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 402
 403       return gen_rtx_CONCAT (mode, real, imag);
 404     }
 405
 406   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 407     check_reverse_storage_order_support ();
 408
 409   if (!is_a <scalar_int_mode> (mode, &int_mode))
 410     {
 411       if (FLOAT_MODE_P (mode)
 412           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 413         check_reverse_float_storage_order_support ();
 414
 415       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 416           || !targetm.scalar_mode_supported_p (int_mode))
 417         {
 418           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 419           return x;
 420         }
 421       x = gen_lowpart (int_mode, x);
 422     }
 423
 424   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 425   if (result == 0)
 426     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 427
 428   if (int_mode != mode)
 429     result = gen_lowpart (mode, result);
 430
 431   return result;
 432 }
 433
 434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 435    first unit of mode MODE that contains a bitfield of size BITSIZE at
 436    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 437    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 438    of the field within the new memory.  */
 439
 440 static rtx
 441 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 442                       unsigned HOST_WIDE_INT bitsize,
 443                       unsigned HOST_WIDE_INT bitnum,
 444                       unsigned HOST_WIDE_INT *new_bitnum)
 445 {
 446   scalar_int_mode imode;
 447   if (mode.exists (&imode))
 448     {
 449       unsigned int unit = GET_MODE_BITSIZE (imode);
 450       *new_bitnum = bitnum % unit;
 451       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 452       return adjust_bitfield_address (mem, imode, offset);
 453     }
 454   else
 455     {
 456       *new_bitnum = bitnum % BITS_PER_UNIT;
 457       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 458       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 459                             / BITS_PER_UNIT);
 460       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 461     }
 462 }
 463
 464 /* The caller wants to perform insertion or extraction PATTERN on a
 465    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 466    BITREGION_START and BITREGION_END are as for store_bit_field
 467    and FIELDMODE is the natural mode of the field.
 468
 469    Search for a mode that is compatible with the memory access
 470    restrictions and (where applicable) with a register insertion or
 471    extraction.  Return the new memory on success, storing the adjusted
 472    bit position in *NEW_BITNUM.  Return null otherwise.  */
 473
 474 static rtx
 475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 476                               rtx op0, HOST_WIDE_INT bitsize,
 477                               HOST_WIDE_INT bitnum,
 478                               poly_uint64 bitregion_start,
 479                               poly_uint64 bitregion_end,
 480                               machine_mode fieldmode,
 481                               unsigned HOST_WIDE_INT *new_bitnum)
 482 {
 483   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 484                                 bitregion_end, MEM_ALIGN (op0),
 485                                 MEM_VOLATILE_P (op0));
 486   scalar_int_mode best_mode;
 487   if (iter.next_mode (&best_mode))
 488     {
 489       /* We can use a memory in BEST_MODE.  See whether this is true for
 490          any wider modes.  All other things being equal, we prefer to
 491          use the widest mode possible because it tends to expose more
 492          CSE opportunities.  */
 493       if (!iter.prefer_smaller_modes ())
 494         {
 495           /* Limit the search to the mode required by the corresponding
 496              register insertion or extraction instruction, if any.  */
 497           scalar_int_mode limit_mode = word_mode;
 498           extraction_insn insn;
 499           if (get_best_reg_extraction_insn (&insn, pattern,
 500                                             GET_MODE_BITSIZE (best_mode),
 501                                             fieldmode))
 502             limit_mode = insn.field_mode;
 503
 504           scalar_int_mode wider_mode;
 505           while (iter.next_mode (&wider_mode)
 506                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 507             best_mode = wider_mode;
 508         }
 509       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 510                                    new_bitnum);
 511     }
 512   return NULL_RTX;
 513 }
 514
 515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 516    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 517    offset is then BITNUM / BITS_PER_UNIT.  */
 518
 519 static bool
 520 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 521                      machine_mode struct_mode)
 522 {
 523   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 524   if (BYTES_BIG_ENDIAN)
 525     return (multiple_p (bitnum, BITS_PER_UNIT)
 526             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 527                 || multiple_p (bitnum + bitsize,
 528                                regsize * BITS_PER_UNIT)));
 529   else
 530     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 531 }
 532
 533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 534    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 535    Return false if the access would touch memory outside the range
 536    BITREGION_START to BITREGION_END for conformance to the C++ memory
 537    model.  */
 538
 539 static bool
 540 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 541                             unsigned HOST_WIDE_INT bitnum,
 542                             scalar_int_mode fieldmode,
 543                             poly_uint64 bitregion_start,
 544                             poly_uint64 bitregion_end)
 545 {
 546   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 547
 548   /* -fstrict-volatile-bitfields must be enabled and we must have a
 549      volatile MEM.  */
 550   if (!MEM_P (op0)
 551       || !MEM_VOLATILE_P (op0)
 552       || flag_strict_volatile_bitfields <= 0)
 553     return false;
 554
 555   /* The bit size must not be larger than the field mode, and
 556      the field mode must not be larger than a word.  */
 557   if (bitsize > modesize || modesize > BITS_PER_WORD)
 558     return false;
 559
 560   /* Check for cases of unaligned fields that must be split.  */
 561   if (bitnum % modesize + bitsize > modesize)
 562     return false;
 563
 564   /* The memory must be sufficiently aligned for a MODESIZE access.
 565      This condition guarantees, that the memory access will not
 566      touch anything after the end of the structure.  */
 567   if (MEM_ALIGN (op0) < modesize)
 568     return false;
 569
 570   /* Check for cases where the C++ memory model applies.  */
 571   if (maybe_ne (bitregion_end, 0U)
 572       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 573           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 574                        bitregion_end)))
 575     return false;
 576
 577   return true;
 578 }
 579
 580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 581    bit number BITNUM can be treated as a simple value of mode MODE.
 582    Store the byte offset in *BYTENUM if so.  */
 583
 584 static bool
 585 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 586                        machine_mode mode, poly_uint64 *bytenum)
 587 {
 588   return (MEM_P (op0)
 589           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 590           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 591           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 592               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 593                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 594 }
 595 \f
 596 /* Try to use instruction INSV to store VALUE into a field of OP0.
 597    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 598    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 599    are as for store_bit_field.  */
 600
 601 static bool
 602 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 603                             opt_scalar_int_mode op0_mode,
 604                             unsigned HOST_WIDE_INT bitsize,
 605                             unsigned HOST_WIDE_INT bitnum,
 606                             rtx value, scalar_int_mode value_mode)
 607 {
 608   class expand_operand ops[4];
 609   rtx value1;
 610   rtx xop0 = op0;
 611   rtx_insn *last = get_last_insn ();
 612   bool copy_back = false;
 613
 614   scalar_int_mode op_mode = insv->field_mode;
 615   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 616   if (bitsize == 0 || bitsize > unit)
 617     return false;
 618
 619   if (MEM_P (xop0))
 620     /* Get a reference to the first byte of the field.  */
 621     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 622                                  &bitnum);
 623   else
 624     {
 625       /* Convert from counting within OP0 to counting in OP_MODE.  */
 626       if (BYTES_BIG_ENDIAN)
 627         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 628
 629       /* If xop0 is a register, we need it in OP_MODE
 630          to make it acceptable to the format of insv.  */
 631       if (GET_CODE (xop0) == SUBREG)
 632         {
 633           /* If such a SUBREG can't be created, give up.  */
 634           if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
 635                                 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
 636             return false;
 637           /* We can't just change the mode, because this might clobber op0,
 638              and we will need the original value of op0 if insv fails.  */
 639           xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
 640                                  SUBREG_BYTE (xop0));
 641         }
 642       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 643         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 644     }
 645
 646   /* If the destination is a paradoxical subreg such that we need a
 647      truncate to the inner mode, perform the insertion on a temporary and
 648      truncate the result to the original destination.  Note that we can't
 649      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 650      X) 0)) is (reg:N X).  */
 651   if (GET_CODE (xop0) == SUBREG
 652       && REG_P (SUBREG_REG (xop0))
 653       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 654                                          op_mode))
 655     {
 656       rtx tem = gen_reg_rtx (op_mode);
 657       emit_move_insn (tem, xop0);
 658       xop0 = tem;
 659       copy_back = true;
 660     }
 661
 662   /* There are similar overflow check at the start of store_bit_field_1,
 663      but that only check the situation where the field lies completely
 664      outside the register, while there do have situation where the field
 665      lies partialy in the register, we need to adjust bitsize for this
 666      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 667      will broken on those arch support bit insert instruction, like arm, aarch64
 668      etc.  */
 669   if (bitsize + bitnum > unit && bitnum < unit)
 670     {
 671       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 672                "destination object, data truncated into %wu-bit",
 673                bitsize, unit - bitnum);
 674       bitsize = unit - bitnum;
 675     }
 676
 677   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 678      "backwards" from the size of the unit we are inserting into.
 679      Otherwise, we count bits from the most significant on a
 680      BYTES/BITS_BIG_ENDIAN machine.  */
 681
 682   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 683     bitnum = unit - bitsize - bitnum;
 684
 685   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 686   value1 = value;
 687   if (value_mode != op_mode)
 688     {
 689       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 690         {
 691           rtx tmp;
 692           /* Optimization: Don't bother really extending VALUE
 693              if it has all the bits we will actually use.  However,
 694              if we must narrow it, be sure we do it correctly.  */
 695
 696           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 697             {
 698               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 699               if (! tmp)
 700                 tmp = simplify_gen_subreg (op_mode,
 701                                            force_reg (value_mode, value1),
 702                                            value_mode, 0);
 703             }
 704           else
 705             {
 706               tmp = gen_lowpart_if_possible (op_mode, value1);
 707               if (! tmp)
 708                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 709             }
 710           value1 = tmp;
 711         }
 712       else if (CONST_INT_P (value))
 713         value1 = gen_int_mode (INTVAL (value), op_mode);
 714       else
 715         /* Parse phase is supposed to make VALUE's data type
 716            match that of the component reference, which is a type
 717            at least as wide as the field; so VALUE should have
 718            a mode that corresponds to that type.  */
 719         gcc_assert (CONSTANT_P (value));
 720     }
 721
 722   create_fixed_operand (&ops[0], xop0);
 723   create_integer_operand (&ops[1], bitsize);
 724   create_integer_operand (&ops[2], bitnum);
 725   create_input_operand (&ops[3], value1, op_mode);
 726   if (maybe_expand_insn (insv->icode, 4, ops))
 727     {
 728       if (copy_back)
 729         convert_move (op0, xop0, true);
 730       return true;
 731     }
 732   delete_insns_since (last);
 733   return false;
 734 }
 735
 736 /* A subroutine of store_bit_field, with the same arguments.  Return true
 737    if the operation could be implemented.
 738
 739    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 740    no other way of implementing the operation.  If FALLBACK_P is false,
 741    return false instead.  */
 742
 743 static bool
 744 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 745                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 746                    machine_mode fieldmode,
 747                    rtx value, bool reverse, bool fallback_p)
 748 {
 749   rtx op0 = str_rtx;
 750
 751   while (GET_CODE (op0) == SUBREG)
 752     {
 753       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 754       op0 = SUBREG_REG (op0);
 755     }
 756
 757   /* No action is needed if the target is a register and if the field
 758      lies completely outside that register.  This can occur if the source
 759      code contains an out-of-bounds access to a small array.  */
 760   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 761     return true;
 762
 763   /* Use vec_set patterns for inserting parts of vectors whenever
 764      available.  */
 765   machine_mode outermode = GET_MODE (op0);
 766   scalar_mode innermode = GET_MODE_INNER (outermode);
 767   poly_uint64 pos;
 768   if (VECTOR_MODE_P (outermode)
 769       && !MEM_P (op0)
 770       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 771       && fieldmode == innermode
 772       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 773       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 774     {
 775       class expand_operand ops[3];
 776       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 777
 778       create_fixed_operand (&ops[0], op0);
 779       create_input_operand (&ops[1], value, innermode);
 780       create_integer_operand (&ops[2], pos);
 781       if (maybe_expand_insn (icode, 3, ops))
 782         return true;
 783     }
 784
 785   /* If the target is a register, overwriting the entire object, or storing
 786      a full-word or multi-word field can be done with just a SUBREG.  */
 787   if (!MEM_P (op0)
 788       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 789     {
 790       /* Use the subreg machinery either to narrow OP0 to the required
 791          words or to cope with mode punning between equal-sized modes.
 792          In the latter case, use subreg on the rhs side, not lhs.  */
 793       rtx sub;
 794       HOST_WIDE_INT regnum;
 795       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 796       if (known_eq (bitnum, 0U)
 797           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 798         {
 799           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 800           if (sub)
 801             {
 802               if (reverse)
 803                 sub = flip_storage_order (GET_MODE (op0), sub);
 804               emit_move_insn (op0, sub);
 805               return true;
 806             }
 807         }
 808       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 809                && multiple_p (bitsize, regsize * BITS_PER_UNIT))
 810         {
 811           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 812                                      regnum * regsize);
 813           if (sub)
 814             {
 815               if (reverse)
 816                 value = flip_storage_order (fieldmode, value);
 817               emit_move_insn (sub, value);
 818               return true;
 819             }
 820         }
 821     }
 822
 823   /* If the target is memory, storing any naturally aligned field can be
 824      done with a simple store.  For targets that support fast unaligned
 825      memory, any naturally sized, unit aligned field can be done directly.  */
 826   poly_uint64 bytenum;
 827   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 828     {
 829       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 830       if (reverse)
 831         value = flip_storage_order (fieldmode, value);
 832       emit_move_insn (op0, value);
 833       return true;
 834     }
 835
 836   /* It's possible we'll need to handle other cases here for
 837      polynomial bitnum and bitsize.  */
 838
 839   /* From here on we need to be looking at a fixed-size insertion.  */
 840   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 841   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 842
 843   /* Make sure we are playing with integral modes.  Pun with subregs
 844      if we aren't.  This must come after the entire register case above,
 845      since that case is valid for any mode.  The following cases are only
 846      valid for integral modes.  */
 847   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 848   scalar_int_mode imode;
 849   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 850     {
 851       if (MEM_P (op0))
 852         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 853                                             0, MEM_SIZE (op0));
 854       else if (!op0_mode.exists ())
 855         {
 856           if (ibitnum == 0
 857               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 858               && MEM_P (value)
 859               && !reverse)
 860             {
 861               value = adjust_address (value, GET_MODE (op0), 0);
 862               emit_move_insn (op0, value);
 863               return true;
 864             }
 865           if (!fallback_p)
 866             return false;
 867           rtx temp = assign_stack_temp (GET_MODE (op0),
 868                                         GET_MODE_SIZE (GET_MODE (op0)));
 869           emit_move_insn (temp, op0);
 870           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 871                              reverse, fallback_p);
 872           emit_move_insn (op0, temp);
 873           return true;
 874         }
 875       else
 876         op0 = gen_lowpart (op0_mode.require (), op0);
 877     }
 878
 879   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 880                                    bitregion_start, bitregion_end,
 881                                    fieldmode, value, reverse, fallback_p);
 882 }
 883
 884 /* Subroutine of store_bit_field_1, with the same arguments, except
 885    that BITSIZE and BITNUM are constant.  Handle cases specific to
 886    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 887    otherwise OP0 is a BLKmode MEM.  */
 888
 889 static bool
 890 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 891                           unsigned HOST_WIDE_INT bitsize,
 892                           unsigned HOST_WIDE_INT bitnum,
 893                           poly_uint64 bitregion_start,
 894                           poly_uint64 bitregion_end,
 895                           machine_mode fieldmode,
 896                           rtx value, bool reverse, bool fallback_p)
 897 {
 898   /* Storing an lsb-aligned field in a register
 899      can be done with a movstrict instruction.  */
 900
 901   if (!MEM_P (op0)
 902       && !reverse
 903       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 904       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 905       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 906     {
 907       class expand_operand ops[2];
 908       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 909       rtx arg0 = op0;
 910       unsigned HOST_WIDE_INT subreg_off;
 911
 912       if (GET_CODE (arg0) == SUBREG)
 913         {
 914           /* Else we've got some float mode source being extracted into
 915              a different float mode destination -- this combination of
 916              subregs results in Severe Tire Damage.  */
 917           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 918                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 919                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 920           arg0 = SUBREG_REG (arg0);
 921         }
 922
 923       subreg_off = bitnum / BITS_PER_UNIT;
 924       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
 925           /* STRICT_LOW_PART must have a non-paradoxical subreg as
 926              operand.  */
 927           && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
 928         {
 929           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 930
 931           create_fixed_operand (&ops[0], arg0);
 932           /* Shrink the source operand to FIELDMODE.  */
 933           create_convert_operand_to (&ops[1], value, fieldmode, false);
 934           if (maybe_expand_insn (icode, 2, ops))
 935             return true;
 936         }
 937     }
 938
 939   /* Handle fields bigger than a word.  */
 940
 941   if (bitsize > BITS_PER_WORD)
 942     {
 943       /* Here we transfer the words of the field
 944          in the order least significant first.
 945          This is because the most significant word is the one which may
 946          be less than full.
 947          However, only do that if the value is not BLKmode.  */
 948
 949       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 950       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 951       rtx_insn *last;
 952
 953       /* This is the mode we must force value to, so that there will be enough
 954          subwords to extract.  Note that fieldmode will often (always?) be
 955          VOIDmode, because that is what store_field uses to indicate that this
 956          is a bit field, but passing VOIDmode to operand_subword_force
 957          is not allowed.
 958
 959          The mode must be fixed-size, since insertions into variable-sized
 960          objects are meant to be handled before calling this function.  */
 961       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 962       if (value_mode == VOIDmode)
 963         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 964
 965       last = get_last_insn ();
 966       for (int i = 0; i < nwords; i++)
 967         {
 968           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 969              except maybe for the last iteration.  */
 970           const unsigned HOST_WIDE_INT new_bitsize
 971             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 972           /* Bit offset from the starting bit number in the target.  */
 973           const unsigned int bit_offset
 974             = backwards ^ reverse
 975               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 976               : i * BITS_PER_WORD;
 977           /* Starting word number in the value.  */
 978           const unsigned int wordnum
 979             = backwards
 980               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 981               : i;
 982           /* The chunk of the value in word_mode.  We use bit-field extraction
 983               in BLKmode to handle unaligned memory references and to shift the
 984               last chunk right on big-endian machines if need be.  */
 985           rtx value_word
 986             = fieldmode == BLKmode
 987               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 988                                    1, NULL_RTX, word_mode, word_mode, false,
 989                                    NULL)
 990               : operand_subword_force (value, wordnum, value_mode);
 991
 992           if (!store_bit_field_1 (op0, new_bitsize,
 993                                   bitnum + bit_offset,
 994                                   bitregion_start, bitregion_end,
 995                                   word_mode,
 996                                   value_word, reverse, fallback_p))
 997             {
 998               delete_insns_since (last);
 999               return false;
1000             }
1001         }
1002       return true;
1003     }
1004
1005   /* If VALUE has a floating-point or complex mode, access it as an
1006      integer of the corresponding size.  This can occur on a machine
1007      with 64 bit registers that uses SFmode for float.  It can also
1008      occur for unaligned float or complex fields.  */
1009   rtx orig_value = value;
1010   scalar_int_mode value_mode;
1011   if (GET_MODE (value) == VOIDmode)
1012     /* By this point we've dealt with values that are bigger than a word,
1013        so word_mode is a conservatively correct choice.  */
1014     value_mode = word_mode;
1015   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1016     {
1017       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1018       value = gen_reg_rtx (value_mode);
1019       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1020     }
1021
1022   /* If OP0 is a multi-word register, narrow it to the affected word.
1023      If the region spans two words, defer to store_split_bit_field.
1024      Don't do this if op0 is a single hard register wider than word
1025      such as a float or vector register.  */
1026   if (!MEM_P (op0)
1027       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1028       && (!REG_P (op0)
1029           || !HARD_REGISTER_P (op0)
1030           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1031     {
1032       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1033         {
1034           if (!fallback_p)
1035             return false;
1036
1037           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1038                                  bitregion_start, bitregion_end,
1039                                  value, value_mode, reverse);
1040           return true;
1041         }
1042       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1043                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1044       gcc_assert (op0);
1045       op0_mode = word_mode;
1046       bitnum %= BITS_PER_WORD;
1047     }
1048
1049   /* From here on we can assume that the field to be stored in fits
1050      within a word.  If the destination is a register, it too fits
1051      in a word.  */
1052
1053   extraction_insn insv;
1054   if (!MEM_P (op0)
1055       && !reverse
1056       && get_best_reg_extraction_insn (&insv, EP_insv,
1057                                        GET_MODE_BITSIZE (op0_mode.require ()),
1058                                        fieldmode)
1059       && store_bit_field_using_insv (&insv, op0, op0_mode,
1060                                      bitsize, bitnum, value, value_mode))
1061     return true;
1062
1063   /* If OP0 is a memory, try copying it to a register and seeing if a
1064      cheap register alternative is available.  */
1065   if (MEM_P (op0) && !reverse)
1066     {
1067       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1068                                         fieldmode)
1069           && store_bit_field_using_insv (&insv, op0, op0_mode,
1070                                          bitsize, bitnum, value, value_mode))
1071         return true;
1072
1073       rtx_insn *last = get_last_insn ();
1074
1075       /* Try loading part of OP0 into a register, inserting the bitfield
1076          into that, and then copying the result back to OP0.  */
1077       unsigned HOST_WIDE_INT bitpos;
1078       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1079                                                bitregion_start, bitregion_end,
1080                                                fieldmode, &bitpos);
1081       if (xop0)
1082         {
1083           rtx tempreg = copy_to_reg (xop0);
1084           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1085                                  bitregion_start, bitregion_end,
1086                                  fieldmode, orig_value, reverse, false))
1087             {
1088               emit_move_insn (xop0, tempreg);
1089               return true;
1090             }
1091           delete_insns_since (last);
1092         }
1093     }
1094
1095   if (!fallback_p)
1096     return false;
1097
1098   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1099                          bitregion_end, value, value_mode, reverse);
1100   return true;
1101 }
1102
1103 /* Generate code to store value from rtx VALUE
1104    into a bit-field within structure STR_RTX
1105    containing BITSIZE bits starting at bit BITNUM.
1106
1107    BITREGION_START is bitpos of the first bitfield in this region.
1108    BITREGION_END is the bitpos of the ending bitfield in this region.
1109    These two fields are 0, if the C++ memory model does not apply,
1110    or we are not interested in keeping track of bitfield regions.
1111
1112    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1113
1114    If REVERSE is true, the store is to be done in reverse order.  */
1115
1116 void
1117 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1118                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1119                  machine_mode fieldmode,
1120                  rtx value, bool reverse)
1121 {
1122   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1123   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1124   scalar_int_mode int_mode;
1125   if (bitsize.is_constant (&ibitsize)
1126       && bitnum.is_constant (&ibitnum)
1127       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1128       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1129                                      bitregion_start, bitregion_end))
1130     {
1131       /* Storing of a full word can be done with a simple store.
1132          We know here that the field can be accessed with one single
1133          instruction.  For targets that support unaligned memory,
1134          an unaligned access may be necessary.  */
1135       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1136         {
1137           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1138                                              ibitnum / BITS_PER_UNIT);
1139           if (reverse)
1140             value = flip_storage_order (int_mode, value);
1141           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1142           emit_move_insn (str_rtx, value);
1143         }
1144       else
1145         {
1146           rtx temp;
1147
1148           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1149                                           ibitnum, &ibitnum);
1150           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1151           temp = copy_to_reg (str_rtx);
1152           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1153                                   int_mode, value, reverse, true))
1154             gcc_unreachable ();
1155
1156           emit_move_insn (str_rtx, temp);
1157         }
1158
1159       return;
1160     }
1161
1162   /* Under the C++0x memory model, we must not touch bits outside the
1163      bit region.  Adjust the address to start at the beginning of the
1164      bit region.  */
1165   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1166     {
1167       scalar_int_mode best_mode;
1168       machine_mode addr_mode = VOIDmode;
1169
1170       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1171       bitnum -= bitregion_start;
1172       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1173       bitregion_end -= bitregion_start;
1174       bitregion_start = 0;
1175       if (bitsize.is_constant (&ibitsize)
1176           && bitnum.is_constant (&ibitnum)
1177           && get_best_mode (ibitsize, ibitnum,
1178                             bitregion_start, bitregion_end,
1179                             MEM_ALIGN (str_rtx), INT_MAX,
1180                             MEM_VOLATILE_P (str_rtx), &best_mode))
1181         addr_mode = best_mode;
1182       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1183                                               offset, size);
1184     }
1185
1186   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1187                           bitregion_start, bitregion_end,
1188                           fieldmode, value, reverse, true))
1189     gcc_unreachable ();
1190 }
1191 \f
1192 /* Use shifts and boolean operations to store VALUE into a bit field of
1193    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1194    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1195    the mode of VALUE.
1196
1197    If REVERSE is true, the store is to be done in reverse order.  */
1198
1199 static void
1200 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1201                        unsigned HOST_WIDE_INT bitsize,
1202                        unsigned HOST_WIDE_INT bitnum,
1203                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1204                        rtx value, scalar_int_mode value_mode, bool reverse)
1205 {
1206   /* There is a case not handled here:
1207      a structure with a known alignment of just a halfword
1208      and a field split across two aligned halfwords within the structure.
1209      Or likewise a structure with a known alignment of just a byte
1210      and a field split across two bytes.
1211      Such cases are not supposed to be able to occur.  */
1212
1213   scalar_int_mode best_mode;
1214   if (MEM_P (op0))
1215     {
1216       unsigned int max_bitsize = BITS_PER_WORD;
1217       scalar_int_mode imode;
1218       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1219         max_bitsize = GET_MODE_BITSIZE (imode);
1220
1221       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1222                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1223                           &best_mode))
1224         {
1225           /* The only way this should occur is if the field spans word
1226              boundaries.  */
1227           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1228                                  bitregion_start, bitregion_end,
1229                                  value, value_mode, reverse);
1230           return;
1231         }
1232
1233       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1234     }
1235   else
1236     best_mode = op0_mode.require ();
1237
1238   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1239                            value, value_mode, reverse);
1240 }
1241
1242 /* Helper function for store_fixed_bit_field, stores
1243    the bit field always using MODE, which is the mode of OP0.  The other
1244    arguments are as for store_fixed_bit_field.  */
1245
1246 static void
1247 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1248                          unsigned HOST_WIDE_INT bitsize,
1249                          unsigned HOST_WIDE_INT bitnum,
1250                          rtx value, scalar_int_mode value_mode, bool reverse)
1251 {
1252   rtx temp;
1253   int all_zero = 0;
1254   int all_one = 0;
1255
1256   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1257      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1258
1259   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1260     /* BITNUM is the distance between our msb
1261        and that of the containing datum.
1262        Convert it to the distance from the lsb.  */
1263     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1264
1265   /* Now BITNUM is always the distance between our lsb
1266      and that of OP0.  */
1267
1268   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1269      we must first convert its mode to MODE.  */
1270
1271   if (CONST_INT_P (value))
1272     {
1273       unsigned HOST_WIDE_INT v = UINTVAL (value);
1274
1275       if (bitsize < HOST_BITS_PER_WIDE_INT)
1276         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1277
1278       if (v == 0)
1279         all_zero = 1;
1280       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1281                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1282                || (bitsize == HOST_BITS_PER_WIDE_INT
1283                    && v == HOST_WIDE_INT_M1U))
1284         all_one = 1;
1285
1286       value = lshift_value (mode, v, bitnum);
1287     }
1288   else
1289     {
1290       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1291                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1292
1293       if (value_mode != mode)
1294         value = convert_to_mode (mode, value, 1);
1295
1296       if (must_and)
1297         value = expand_binop (mode, and_optab, value,
1298                               mask_rtx (mode, 0, bitsize, 0),
1299                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1300       if (bitnum > 0)
1301         value = expand_shift (LSHIFT_EXPR, mode, value,
1302                               bitnum, NULL_RTX, 1);
1303     }
1304
1305   if (reverse)
1306     value = flip_storage_order (mode, value);
1307
1308   /* Now clear the chosen bits in OP0,
1309      except that if VALUE is -1 we need not bother.  */
1310   /* We keep the intermediates in registers to allow CSE to combine
1311      consecutive bitfield assignments.  */
1312
1313   temp = force_reg (mode, op0);
1314
1315   if (! all_one)
1316     {
1317       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1318       if (reverse)
1319         mask = flip_storage_order (mode, mask);
1320       temp = expand_binop (mode, and_optab, temp, mask,
1321                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1322       temp = force_reg (mode, temp);
1323     }
1324
1325   /* Now logical-or VALUE into OP0, unless it is zero.  */
1326
1327   if (! all_zero)
1328     {
1329       temp = expand_binop (mode, ior_optab, temp, value,
1330                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1331       temp = force_reg (mode, temp);
1332     }
1333
1334   if (op0 != temp)
1335     {
1336       op0 = copy_rtx (op0);
1337       emit_move_insn (op0, temp);
1338     }
1339 }
1340 \f
1341 /* Store a bit field that is split across multiple accessible memory objects.
1342
1343    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1344    BITSIZE is the field width; BITPOS the position of its first bit
1345    (within the word).
1346    VALUE is the value to store, which has mode VALUE_MODE.
1347    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1348    a BLKmode MEM.
1349
1350    If REVERSE is true, the store is to be done in reverse order.
1351
1352    This does not yet handle fields wider than BITS_PER_WORD.  */
1353
1354 static void
1355 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1356                        unsigned HOST_WIDE_INT bitsize,
1357                        unsigned HOST_WIDE_INT bitpos,
1358                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1359                        rtx value, scalar_int_mode value_mode, bool reverse)
1360 {
1361   unsigned int unit, total_bits, bitsdone = 0;
1362
1363   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1364      much at a time.  */
1365   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1366     unit = BITS_PER_WORD;
1367   else
1368     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1369
1370   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1371      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1372      again, and we will mutually recurse forever.  */
1373   if (MEM_P (op0) && op0_mode.exists ())
1374     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1375
1376   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1377      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1378      that VALUE might be a floating-point constant.  */
1379   if (CONSTANT_P (value) && !CONST_INT_P (value))
1380     {
1381       rtx word = gen_lowpart_common (word_mode, value);
1382
1383       if (word && (value != word))
1384         value = word;
1385       else
1386         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1387       value_mode = word_mode;
1388     }
1389
1390   total_bits = GET_MODE_BITSIZE (value_mode);
1391
1392   while (bitsdone < bitsize)
1393     {
1394       unsigned HOST_WIDE_INT thissize;
1395       unsigned HOST_WIDE_INT thispos;
1396       unsigned HOST_WIDE_INT offset;
1397       rtx part;
1398
1399       offset = (bitpos + bitsdone) / unit;
1400       thispos = (bitpos + bitsdone) % unit;
1401
1402       /* When region of bytes we can touch is restricted, decrease
1403          UNIT close to the end of the region as needed.  If op0 is a REG
1404          or SUBREG of REG, don't do this, as there can't be data races
1405          on a register and we can expand shorter code in some cases.  */
1406       if (maybe_ne (bitregion_end, 0U)
1407           && unit > BITS_PER_UNIT
1408           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1409           && !REG_P (op0)
1410           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1411         {
1412           unit = unit / 2;
1413           continue;
1414         }
1415
1416       /* THISSIZE must not overrun a word boundary.  Otherwise,
1417          store_fixed_bit_field will call us again, and we will mutually
1418          recurse forever.  */
1419       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1420       thissize = MIN (thissize, unit - thispos);
1421
1422       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1423         {
1424           /* Fetch successively less significant portions.  */
1425           if (CONST_INT_P (value))
1426             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1427                              >> (bitsize - bitsdone - thissize))
1428                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1429           /* Likewise, but the source is little-endian.  */
1430           else if (reverse)
1431             part = extract_fixed_bit_field (word_mode, value, value_mode,
1432                                             thissize,
1433                                             bitsize - bitsdone - thissize,
1434                                             NULL_RTX, 1, false);
1435           else
1436             /* The args are chosen so that the last part includes the
1437                lsb.  Give extract_bit_field the value it needs (with
1438                endianness compensation) to fetch the piece we want.  */
1439             part = extract_fixed_bit_field (word_mode, value, value_mode,
1440                                             thissize,
1441                                             total_bits - bitsize + bitsdone,
1442                                             NULL_RTX, 1, false);
1443         }
1444       else
1445         {
1446           /* Fetch successively more significant portions.  */
1447           if (CONST_INT_P (value))
1448             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1449                              >> bitsdone)
1450                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1451           /* Likewise, but the source is big-endian.  */
1452           else if (reverse)
1453             part = extract_fixed_bit_field (word_mode, value, value_mode,
1454                                             thissize,
1455                                             total_bits - bitsdone - thissize,
1456                                             NULL_RTX, 1, false);
1457           else
1458             part = extract_fixed_bit_field (word_mode, value, value_mode,
1459                                             thissize, bitsdone, NULL_RTX,
1460                                             1, false);
1461         }
1462
1463       /* If OP0 is a register, then handle OFFSET here.  */
1464       rtx op0_piece = op0;
1465       opt_scalar_int_mode op0_piece_mode = op0_mode;
1466       if (SUBREG_P (op0) || REG_P (op0))
1467         {
1468           scalar_int_mode imode;
1469           if (op0_mode.exists (&imode)
1470               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1471             {
1472               if (offset)
1473                 op0_piece = const0_rtx;
1474             }
1475           else
1476             {
1477               op0_piece = operand_subword_force (op0,
1478                                                  offset * unit / BITS_PER_WORD,
1479                                                  GET_MODE (op0));
1480               op0_piece_mode = word_mode;
1481             }
1482           offset &= BITS_PER_WORD / unit - 1;
1483         }
1484
1485       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1486          it is just an out-of-bounds access.  Ignore it.  */
1487       if (op0_piece != const0_rtx)
1488         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1489                                offset * unit + thispos, bitregion_start,
1490                                bitregion_end, part, word_mode, reverse);
1491       bitsdone += thissize;
1492     }
1493 }
1494 \f
1495 /* A subroutine of extract_bit_field_1 that converts return value X
1496    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1497    to extract_bit_field.  */
1498
1499 static rtx
1500 convert_extracted_bit_field (rtx x, machine_mode mode,
1501                              machine_mode tmode, bool unsignedp)
1502 {
1503   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1504     return x;
1505
1506   /* If the x mode is not a scalar integral, first convert to the
1507      integer mode of that size and then access it as a floating-point
1508      value via a SUBREG.  */
1509   if (!SCALAR_INT_MODE_P (tmode))
1510     {
1511       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1512       x = convert_to_mode (int_mode, x, unsignedp);
1513       x = force_reg (int_mode, x);
1514       return gen_lowpart (tmode, x);
1515     }
1516
1517   return convert_to_mode (tmode, x, unsignedp);
1518 }
1519
1520 /* Try to use an ext(z)v pattern to extract a field from OP0.
1521    Return the extracted value on success, otherwise return null.
1522    EXTV describes the extraction instruction to use.  If OP0_MODE
1523    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1524    The other arguments are as for extract_bit_field.  */
1525
1526 static rtx
1527 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1528                               opt_scalar_int_mode op0_mode,
1529                               unsigned HOST_WIDE_INT bitsize,
1530                               unsigned HOST_WIDE_INT bitnum,
1531                               int unsignedp, rtx target,
1532                               machine_mode mode, machine_mode tmode)
1533 {
1534   class expand_operand ops[4];
1535   rtx spec_target = target;
1536   rtx spec_target_subreg = 0;
1537   scalar_int_mode ext_mode = extv->field_mode;
1538   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1539
1540   if (bitsize == 0 || unit < bitsize)
1541     return NULL_RTX;
1542
1543   if (MEM_P (op0))
1544     /* Get a reference to the first byte of the field.  */
1545     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1546                                 &bitnum);
1547   else
1548     {
1549       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1550       if (BYTES_BIG_ENDIAN)
1551         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1552
1553       /* If op0 is a register, we need it in EXT_MODE to make it
1554          acceptable to the format of ext(z)v.  */
1555       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1556         return NULL_RTX;
1557       if (REG_P (op0) && op0_mode.require () != ext_mode)
1558         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1559     }
1560
1561   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1562      "backwards" from the size of the unit we are extracting from.
1563      Otherwise, we count bits from the most significant on a
1564      BYTES/BITS_BIG_ENDIAN machine.  */
1565
1566   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1567     bitnum = unit - bitsize - bitnum;
1568
1569   if (target == 0)
1570     target = spec_target = gen_reg_rtx (tmode);
1571
1572   if (GET_MODE (target) != ext_mode)
1573     {
1574       rtx temp;
1575       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1576          between the mode of the extraction (word_mode) and the target
1577          mode.  Instead, create a temporary and use convert_move to set
1578          the target.  */
1579       if (REG_P (target)
1580           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1581           && (temp = gen_lowpart_if_possible (ext_mode, target)))
1582         {
1583           target = temp;
1584           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1585             spec_target_subreg = target;
1586         }
1587       else
1588         target = gen_reg_rtx (ext_mode);
1589     }
1590
1591   create_output_operand (&ops[0], target, ext_mode);
1592   create_fixed_operand (&ops[1], op0);
1593   create_integer_operand (&ops[2], bitsize);
1594   create_integer_operand (&ops[3], bitnum);
1595   if (maybe_expand_insn (extv->icode, 4, ops))
1596     {
1597       target = ops[0].value;
1598       if (target == spec_target)
1599         return target;
1600       if (target == spec_target_subreg)
1601         return spec_target;
1602       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1603     }
1604   return NULL_RTX;
1605 }
1606
1607 /* See whether it would be valid to extract the part of OP0 described
1608    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1609    operation.  Return the subreg if so, otherwise return null.  */
1610
1611 static rtx
1612 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1613                              poly_uint64 bitsize, poly_uint64 bitnum)
1614 {
1615   poly_uint64 bytenum;
1616   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1617       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1618       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1619       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1620     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1621   return NULL_RTX;
1622 }
1623
1624 /* A subroutine of extract_bit_field, with the same arguments.
1625    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1626    if we can find no other means of implementing the operation.
1627    if FALLBACK_P is false, return NULL instead.  */
1628
1629 static rtx
1630 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1631                      int unsignedp, rtx target, machine_mode mode,
1632                      machine_mode tmode, bool reverse, bool fallback_p,
1633                      rtx *alt_rtl)
1634 {
1635   rtx op0 = str_rtx;
1636   machine_mode mode1;
1637
1638   if (tmode == VOIDmode)
1639     tmode = mode;
1640
1641   while (GET_CODE (op0) == SUBREG)
1642     {
1643       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1644       op0 = SUBREG_REG (op0);
1645     }
1646
1647   /* If we have an out-of-bounds access to a register, just return an
1648      uninitialized register of the required mode.  This can occur if the
1649      source code contains an out-of-bounds access to a small array.  */
1650   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1651     return gen_reg_rtx (tmode);
1652
1653   if (REG_P (op0)
1654       && mode == GET_MODE (op0)
1655       && known_eq (bitnum, 0U)
1656       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1657     {
1658       if (reverse)
1659         op0 = flip_storage_order (mode, op0);
1660       /* We're trying to extract a full register from itself.  */
1661       return op0;
1662     }
1663
1664   /* First try to check for vector from vector extractions.  */
1665   if (VECTOR_MODE_P (GET_MODE (op0))
1666       && !MEM_P (op0)
1667       && VECTOR_MODE_P (tmode)
1668       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1669       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1670     {
1671       machine_mode new_mode = GET_MODE (op0);
1672       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1673         {
1674           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1675           poly_uint64 nunits;
1676           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1677                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1678               || !related_vector_mode (tmode, inner_mode,
1679                                        nunits).exists (&new_mode)
1680               || maybe_ne (GET_MODE_SIZE (new_mode),
1681                            GET_MODE_SIZE (GET_MODE (op0))))
1682             new_mode = VOIDmode;
1683         }
1684       poly_uint64 pos;
1685       if (new_mode != VOIDmode
1686           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1687               != CODE_FOR_nothing)
1688           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1689         {
1690           class expand_operand ops[3];
1691           machine_mode outermode = new_mode;
1692           machine_mode innermode = tmode;
1693           enum insn_code icode
1694             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1695
1696           if (new_mode != GET_MODE (op0))
1697             op0 = gen_lowpart (new_mode, op0);
1698           create_output_operand (&ops[0], target, innermode);
1699           ops[0].target = 1;
1700           create_input_operand (&ops[1], op0, outermode);
1701           create_integer_operand (&ops[2], pos);
1702           if (maybe_expand_insn (icode, 3, ops))
1703             {
1704               if (alt_rtl && ops[0].target)
1705                 *alt_rtl = target;
1706               target = ops[0].value;
1707               if (GET_MODE (target) != mode)
1708                 return gen_lowpart (tmode, target);
1709               return target;
1710             }
1711         }
1712     }
1713
1714   /* See if we can get a better vector mode before extracting.  */
1715   if (VECTOR_MODE_P (GET_MODE (op0))
1716       && !MEM_P (op0)
1717       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1718     {
1719       machine_mode new_mode;
1720
1721       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1722         new_mode = MIN_MODE_VECTOR_FLOAT;
1723       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1724         new_mode = MIN_MODE_VECTOR_FRACT;
1725       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1726         new_mode = MIN_MODE_VECTOR_UFRACT;
1727       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1728         new_mode = MIN_MODE_VECTOR_ACCUM;
1729       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1730         new_mode = MIN_MODE_VECTOR_UACCUM;
1731       else
1732         new_mode = MIN_MODE_VECTOR_INT;
1733
1734       FOR_EACH_MODE_FROM (new_mode, new_mode)
1735         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1736             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1737             && targetm.vector_mode_supported_p (new_mode))
1738           break;
1739       if (new_mode != VOIDmode)
1740         op0 = gen_lowpart (new_mode, op0);
1741     }
1742
1743   /* Use vec_extract patterns for extracting parts of vectors whenever
1744      available.  If that fails, see whether the current modes and bitregion
1745      give a natural subreg.  */
1746   machine_mode outermode = GET_MODE (op0);
1747   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1748     {
1749       scalar_mode innermode = GET_MODE_INNER (outermode);
1750       enum insn_code icode
1751         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1752       poly_uint64 pos;
1753       if (icode != CODE_FOR_nothing
1754           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1755           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1756         {
1757           class expand_operand ops[3];
1758
1759           create_output_operand (&ops[0], target, innermode);
1760           ops[0].target = 1;
1761           create_input_operand (&ops[1], op0, outermode);
1762           create_integer_operand (&ops[2], pos);
1763           if (maybe_expand_insn (icode, 3, ops))
1764             {
1765               if (alt_rtl && ops[0].target)
1766                 *alt_rtl = target;
1767               target = ops[0].value;
1768               if (GET_MODE (target) != mode)
1769                 return gen_lowpart (tmode, target);
1770               return target;
1771             }
1772         }
1773       /* Using subregs is useful if we're extracting one register vector
1774          from a multi-register vector.  extract_bit_field_as_subreg checks
1775          for valid bitsize and bitnum, so we don't need to do that here.  */
1776       if (VECTOR_MODE_P (mode))
1777         {
1778           rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1779           if (sub)
1780             return sub;
1781         }
1782     }
1783
1784   /* Make sure we are playing with integral modes.  Pun with subregs
1785      if we aren't.  */
1786   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1787   scalar_int_mode imode;
1788   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1789     {
1790       if (MEM_P (op0))
1791         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1792                                             0, MEM_SIZE (op0));
1793       else if (op0_mode.exists (&imode))
1794         {
1795           op0 = gen_lowpart (imode, op0);
1796
1797           /* If we got a SUBREG, force it into a register since we
1798              aren't going to be able to do another SUBREG on it.  */
1799           if (GET_CODE (op0) == SUBREG)
1800             op0 = force_reg (imode, op0);
1801         }
1802       else
1803         {
1804           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1805           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1806           emit_move_insn (mem, op0);
1807           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1808         }
1809     }
1810
1811   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1812      If that's wrong, the solution is to test for it and set TARGET to 0
1813      if needed.  */
1814
1815   /* Get the mode of the field to use for atomic access or subreg
1816      conversion.  */
1817   if (!SCALAR_INT_MODE_P (tmode)
1818       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1819     mode1 = mode;
1820   gcc_assert (mode1 != BLKmode);
1821
1822   /* Extraction of a full MODE1 value can be done with a subreg as long
1823      as the least significant bit of the value is the least significant
1824      bit of either OP0 or a word of OP0.  */
1825   if (!MEM_P (op0) && !reverse)
1826     {
1827       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1828       if (sub)
1829         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1830     }
1831
1832   /* Extraction of a full MODE1 value can be done with a load as long as
1833      the field is on a byte boundary and is sufficiently aligned.  */
1834   poly_uint64 bytenum;
1835   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1836     {
1837       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1838       if (reverse)
1839         op0 = flip_storage_order (mode1, op0);
1840       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1841     }
1842
1843   /* If we have a memory source and a non-constant bit offset, restrict
1844      the memory to the referenced bytes.  This is a worst-case fallback
1845      but is useful for things like vector booleans.  */
1846   if (MEM_P (op0) && !bitnum.is_constant ())
1847     {
1848       bytenum = bits_to_bytes_round_down (bitnum);
1849       bitnum = num_trailing_bits (bitnum);
1850       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1851       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1852       op0_mode = opt_scalar_int_mode ();
1853     }
1854
1855   /* It's possible we'll need to handle other cases here for
1856      polynomial bitnum and bitsize.  */
1857
1858   /* From here on we need to be looking at a fixed-size insertion.  */
1859   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1860                                      bitnum.to_constant (), unsignedp,
1861                                      target, mode, tmode, reverse, fallback_p);
1862 }
1863
1864 /* Subroutine of extract_bit_field_1, with the same arguments, except
1865    that BITSIZE and BITNUM are constant.  Handle cases specific to
1866    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1867    otherwise OP0 is a BLKmode MEM.  */
1868
1869 static rtx
1870 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1871                             unsigned HOST_WIDE_INT bitsize,
1872                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1873                             rtx target, machine_mode mode, machine_mode tmode,
1874                             bool reverse, bool fallback_p)
1875 {
1876   /* Handle fields bigger than a word.  */
1877
1878   if (bitsize > BITS_PER_WORD)
1879     {
1880       /* Here we transfer the words of the field
1881          in the order least significant first.
1882          This is because the most significant word is the one which may
1883          be less than full.  */
1884
1885       const bool backwards = WORDS_BIG_ENDIAN;
1886       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1887       unsigned int i;
1888       rtx_insn *last;
1889
1890       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1891         target = gen_reg_rtx (mode);
1892
1893       /* In case we're about to clobber a base register or something
1894          (see gcc.c-torture/execute/20040625-1.c).   */
1895       if (reg_mentioned_p (target, op0))
1896         target = gen_reg_rtx (mode);
1897
1898       /* Indicate for flow that the entire target reg is being set.  */
1899       emit_clobber (target);
1900
1901       /* The mode must be fixed-size, since extract_bit_field_1 handles
1902          extractions from variable-sized objects before calling this
1903          function.  */
1904       unsigned int target_size
1905         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1906       last = get_last_insn ();
1907       for (i = 0; i < nwords; i++)
1908         {
1909           /* If I is 0, use the low-order word in both field and target;
1910              if I is 1, use the next to lowest word; and so on.  */
1911           /* Word number in TARGET to use.  */
1912           unsigned int wordnum
1913             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1914           /* Offset from start of field in OP0.  */
1915           unsigned int bit_offset = (backwards ^ reverse
1916                                      ? MAX ((int) bitsize - ((int) i + 1)
1917                                             * BITS_PER_WORD,
1918                                             0)
1919                                      : (int) i * BITS_PER_WORD);
1920           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1921           rtx result_part
1922             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1923                                              bitsize - i * BITS_PER_WORD),
1924                                    bitnum + bit_offset, 1, target_part,
1925                                    mode, word_mode, reverse, fallback_p, NULL);
1926
1927           gcc_assert (target_part);
1928           if (!result_part)
1929             {
1930               delete_insns_since (last);
1931               return NULL;
1932             }
1933
1934           if (result_part != target_part)
1935             emit_move_insn (target_part, result_part);
1936         }
1937
1938       if (unsignedp)
1939         {
1940           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1941              need to be zero'd out.  */
1942           if (target_size > nwords * UNITS_PER_WORD)
1943             {
1944               unsigned int i, total_words;
1945
1946               total_words = target_size / UNITS_PER_WORD;
1947               for (i = nwords; i < total_words; i++)
1948                 emit_move_insn
1949                   (operand_subword (target,
1950                                     backwards ? total_words - i - 1 : i,
1951                                     1, VOIDmode),
1952                    const0_rtx);
1953             }
1954           return target;
1955         }
1956
1957       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1958       target = expand_shift (LSHIFT_EXPR, mode, target,
1959                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1960       return expand_shift (RSHIFT_EXPR, mode, target,
1961                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1962     }
1963
1964   /* If OP0 is a multi-word register, narrow it to the affected word.
1965      If the region spans two words, defer to extract_split_bit_field.  */
1966   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1967     {
1968       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1969         {
1970           if (!fallback_p)
1971             return NULL_RTX;
1972           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1973                                             unsignedp, reverse);
1974           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1975         }
1976       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1977                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1978       op0_mode = word_mode;
1979       bitnum %= BITS_PER_WORD;
1980     }
1981
1982   /* From here on we know the desired field is smaller than a word.
1983      If OP0 is a register, it too fits within a word.  */
1984   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1985   extraction_insn extv;
1986   if (!MEM_P (op0)
1987       && !reverse
1988       /* ??? We could limit the structure size to the part of OP0 that
1989          contains the field, with appropriate checks for endianness
1990          and TARGET_TRULY_NOOP_TRUNCATION.  */
1991       && get_best_reg_extraction_insn (&extv, pattern,
1992                                        GET_MODE_BITSIZE (op0_mode.require ()),
1993                                        tmode))
1994     {
1995       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1996                                                  bitsize, bitnum,
1997                                                  unsignedp, target, mode,
1998                                                  tmode);
1999       if (result)
2000         return result;
2001     }
2002
2003   /* If OP0 is a memory, try copying it to a register and seeing if a
2004      cheap register alternative is available.  */
2005   if (MEM_P (op0) & !reverse)
2006     {
2007       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2008                                         tmode))
2009         {
2010           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2011                                                      bitsize, bitnum,
2012                                                      unsignedp, target, mode,
2013                                                      tmode);
2014           if (result)
2015             return result;
2016         }
2017
2018       rtx_insn *last = get_last_insn ();
2019
2020       /* Try loading part of OP0 into a register and extracting the
2021          bitfield from that.  */
2022       unsigned HOST_WIDE_INT bitpos;
2023       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2024                                                0, 0, tmode, &bitpos);
2025       if (xop0)
2026         {
2027           xop0 = copy_to_reg (xop0);
2028           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2029                                             unsignedp, target,
2030                                             mode, tmode, reverse, false, NULL);
2031           if (result)
2032             return result;
2033           delete_insns_since (last);
2034         }
2035     }
2036
2037   if (!fallback_p)
2038     return NULL;
2039
2040   /* Find a correspondingly-sized integer field, so we can apply
2041      shifts and masks to it.  */
2042   scalar_int_mode int_mode;
2043   if (!int_mode_for_mode (tmode).exists (&int_mode))
2044     /* If this fails, we should probably push op0 out to memory and then
2045        do a load.  */
2046     int_mode = int_mode_for_mode (mode).require ();
2047
2048   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2049                                     bitnum, target, unsignedp, reverse);
2050
2051   /* Complex values must be reversed piecewise, so we need to undo the global
2052      reversal, convert to the complex mode and reverse again.  */
2053   if (reverse && COMPLEX_MODE_P (tmode))
2054     {
2055       target = flip_storage_order (int_mode, target);
2056       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2057       target = flip_storage_order (tmode, target);
2058     }
2059   else
2060     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2061
2062   return target;
2063 }
2064
2065 /* Generate code to extract a byte-field from STR_RTX
2066    containing BITSIZE bits, starting at BITNUM,
2067    and put it in TARGET if possible (if TARGET is nonzero).
2068    Regardless of TARGET, we return the rtx for where the value is placed.
2069
2070    STR_RTX is the structure containing the byte (a REG or MEM).
2071    UNSIGNEDP is nonzero if this is an unsigned bit field.
2072    MODE is the natural mode of the field value once extracted.
2073    TMODE is the mode the caller would like the value to have;
2074    but the value may be returned with type MODE instead.
2075
2076    If REVERSE is true, the extraction is to be done in reverse order.
2077
2078    If a TARGET is specified and we can store in it at no extra cost,
2079    we do so, and return TARGET.
2080    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2081    if they are equally easy.
2082
2083    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2084    then *ALT_RTL is set to TARGET (before legitimziation).  */
2085
2086 rtx
2087 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2088                    int unsignedp, rtx target, machine_mode mode,
2089                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2090 {
2091   machine_mode mode1;
2092
2093   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2094   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2095     mode1 = GET_MODE (str_rtx);
2096   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2097     mode1 = GET_MODE (target);
2098   else
2099     mode1 = tmode;
2100
2101   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2102   scalar_int_mode int_mode;
2103   if (bitsize.is_constant (&ibitsize)
2104       && bitnum.is_constant (&ibitnum)
2105       && is_a <scalar_int_mode> (mode1, &int_mode)
2106       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2107                                      int_mode, 0, 0))
2108     {
2109       /* Extraction of a full INT_MODE value can be done with a simple load.
2110          We know here that the field can be accessed with one single
2111          instruction.  For targets that support unaligned memory,
2112          an unaligned access may be necessary.  */
2113       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2114         {
2115           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2116                                                 ibitnum / BITS_PER_UNIT);
2117           if (reverse)
2118             result = flip_storage_order (int_mode, result);
2119           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2120           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2121         }
2122
2123       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2124                                       &ibitnum);
2125       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2126       str_rtx = copy_to_reg (str_rtx);
2127       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2128                                   target, mode, tmode, reverse, true, alt_rtl);
2129     }
2130
2131   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2132                               target, mode, tmode, reverse, true, alt_rtl);
2133 }
2134 \f
2135 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2136    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2137    otherwise OP0 is a BLKmode MEM.
2138
2139    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2140    If REVERSE is true, the extraction is to be done in reverse order.
2141
2142    If TARGET is nonzero, attempts to store the value there
2143    and return TARGET, but this is not guaranteed.
2144    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2145
2146 static rtx
2147 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2148                          opt_scalar_int_mode op0_mode,
2149                          unsigned HOST_WIDE_INT bitsize,
2150                          unsigned HOST_WIDE_INT bitnum, rtx target,
2151                          int unsignedp, bool reverse)
2152 {
2153   scalar_int_mode mode;
2154   if (MEM_P (op0))
2155     {
2156       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2157                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2158         /* The only way this should occur is if the field spans word
2159            boundaries.  */
2160         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2161                                         unsignedp, reverse);
2162
2163       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2164     }
2165   else
2166     mode = op0_mode.require ();
2167
2168   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2169                                     target, unsignedp, reverse);
2170 }
2171
2172 /* Helper function for extract_fixed_bit_field, extracts
2173    the bit field always using MODE, which is the mode of OP0.
2174    The other arguments are as for extract_fixed_bit_field.  */
2175
2176 static rtx
2177 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2178                            unsigned HOST_WIDE_INT bitsize,
2179                            unsigned HOST_WIDE_INT bitnum, rtx target,
2180                            int unsignedp, bool reverse)
2181 {
2182   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2183      for invalid input, such as extract equivalent of f5 from
2184      gcc.dg/pr48335-2.c.  */
2185
2186   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2187     /* BITNUM is the distance between our msb and that of OP0.
2188        Convert it to the distance from the lsb.  */
2189     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2190
2191   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2192      We have reduced the big-endian case to the little-endian case.  */
2193   if (reverse)
2194     op0 = flip_storage_order (mode, op0);
2195
2196   if (unsignedp)
2197     {
2198       if (bitnum)
2199         {
2200           /* If the field does not already start at the lsb,
2201              shift it so it does.  */
2202           /* Maybe propagate the target for the shift.  */
2203           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2204           if (tmode != mode)
2205             subtarget = 0;
2206           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2207         }
2208       /* Convert the value to the desired mode.  TMODE must also be a
2209          scalar integer for this conversion to make sense, since we
2210          shouldn't reinterpret the bits.  */
2211       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2212       if (mode != new_mode)
2213         op0 = convert_to_mode (new_mode, op0, 1);
2214
2215       /* Unless the msb of the field used to be the msb when we shifted,
2216          mask out the upper bits.  */
2217
2218       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2219         return expand_binop (new_mode, and_optab, op0,
2220                              mask_rtx (new_mode, 0, bitsize, 0),
2221                              target, 1, OPTAB_LIB_WIDEN);
2222       return op0;
2223     }
2224
2225   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2226      then arithmetic-shift its lsb to the lsb of the word.  */
2227   op0 = force_reg (mode, op0);
2228
2229   /* Find the narrowest integer mode that contains the field.  */
2230
2231   opt_scalar_int_mode mode_iter;
2232   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2233     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2234       break;
2235
2236   mode = mode_iter.require ();
2237   op0 = convert_to_mode (mode, op0, 0);
2238
2239   if (mode != tmode)
2240     target = 0;
2241
2242   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2243     {
2244       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2245       /* Maybe propagate the target for the shift.  */
2246       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2247       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2248     }
2249
2250   return expand_shift (RSHIFT_EXPR, mode, op0,
2251                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2252 }
2253
2254 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2255    VALUE << BITPOS.  */
2256
2257 static rtx
2258 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2259               int bitpos)
2260 {
2261   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2262 }
2263 \f
2264 /* Extract a bit field that is split across two words
2265    and return an RTX for the result.
2266
2267    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2268    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2269    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2270    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2271    a BLKmode MEM.
2272
2273    If REVERSE is true, the extraction is to be done in reverse order.  */
2274
2275 static rtx
2276 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2277                          unsigned HOST_WIDE_INT bitsize,
2278                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2279                          bool reverse)
2280 {
2281   unsigned int unit;
2282   unsigned int bitsdone = 0;
2283   rtx result = NULL_RTX;
2284   int first = 1;
2285
2286   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2287      much at a time.  */
2288   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2289     unit = BITS_PER_WORD;
2290   else
2291     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2292
2293   while (bitsdone < bitsize)
2294     {
2295       unsigned HOST_WIDE_INT thissize;
2296       rtx part;
2297       unsigned HOST_WIDE_INT thispos;
2298       unsigned HOST_WIDE_INT offset;
2299
2300       offset = (bitpos + bitsdone) / unit;
2301       thispos = (bitpos + bitsdone) % unit;
2302
2303       /* THISSIZE must not overrun a word boundary.  Otherwise,
2304          extract_fixed_bit_field will call us again, and we will mutually
2305          recurse forever.  */
2306       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2307       thissize = MIN (thissize, unit - thispos);
2308
2309       /* If OP0 is a register, then handle OFFSET here.  */
2310       rtx op0_piece = op0;
2311       opt_scalar_int_mode op0_piece_mode = op0_mode;
2312       if (SUBREG_P (op0) || REG_P (op0))
2313         {
2314           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2315           op0_piece_mode = word_mode;
2316           offset = 0;
2317         }
2318
2319       /* Extract the parts in bit-counting order,
2320          whose meaning is determined by BYTES_PER_UNIT.
2321          OFFSET is in UNITs, and UNIT is in bits.  */
2322       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2323                                       thissize, offset * unit + thispos,
2324                                       0, 1, reverse);
2325       bitsdone += thissize;
2326
2327       /* Shift this part into place for the result.  */
2328       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2329         {
2330           if (bitsize != bitsdone)
2331             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2332                                  bitsize - bitsdone, 0, 1);
2333         }
2334       else
2335         {
2336           if (bitsdone != thissize)
2337             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2338                                  bitsdone - thissize, 0, 1);
2339         }
2340
2341       if (first)
2342         result = part;
2343       else
2344         /* Combine the parts with bitwise or.  This works
2345            because we extracted each part as an unsigned bit field.  */
2346         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2347                                OPTAB_LIB_WIDEN);
2348
2349       first = 0;
2350     }
2351
2352   /* Unsigned bit field: we are done.  */
2353   if (unsignedp)
2354     return result;
2355   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2356   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2357                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2358   return expand_shift (RSHIFT_EXPR, word_mode, result,
2359                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2360 }
2361 \f
2362 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2363    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2364    MODE, fill the upper bits with zeros.  Fail if the layout of either
2365    mode is unknown (as for CC modes) or if the extraction would involve
2366    unprofitable mode punning.  Return the value on success, otherwise
2367    return null.
2368
2369    This is different from gen_lowpart* in these respects:
2370
2371      - the returned value must always be considered an rvalue
2372
2373      - when MODE is wider than SRC_MODE, the extraction involves
2374        a zero extension
2375
2376      - when MODE is smaller than SRC_MODE, the extraction involves
2377        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2378
2379    In other words, this routine performs a computation, whereas the
2380    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2381    operations.  */
2382
2383 rtx
2384 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2385 {
2386   scalar_int_mode int_mode, src_int_mode;
2387
2388   if (mode == src_mode)
2389     return src;
2390
2391   if (CONSTANT_P (src))
2392     {
2393       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2394          fails, it will happily create (subreg (symbol_ref)) or similar
2395          invalid SUBREGs.  */
2396       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2397       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2398       if (ret)
2399         return ret;
2400
2401       if (GET_MODE (src) == VOIDmode
2402           || !validate_subreg (mode, src_mode, src, byte))
2403         return NULL_RTX;
2404
2405       src = force_reg (GET_MODE (src), src);
2406       return gen_rtx_SUBREG (mode, src, byte);
2407     }
2408
2409   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2410     return NULL_RTX;
2411
2412   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2413       && targetm.modes_tieable_p (mode, src_mode))
2414     {
2415       rtx x = gen_lowpart_common (mode, src);
2416       if (x)
2417         return x;
2418     }
2419
2420   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2421       || !int_mode_for_mode (mode).exists (&int_mode))
2422     return NULL_RTX;
2423
2424   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2425     return NULL_RTX;
2426   if (!targetm.modes_tieable_p (int_mode, mode))
2427     return NULL_RTX;
2428
2429   src = gen_lowpart (src_int_mode, src);
2430   if (!validate_subreg (int_mode, src_int_mode, src,
2431                         subreg_lowpart_offset (int_mode, src_int_mode)))
2432     return NULL_RTX;
2433
2434   src = convert_modes (int_mode, src_int_mode, src, true);
2435   src = gen_lowpart (mode, src);
2436   return src;
2437 }
2438 \f
2439 /* Add INC into TARGET.  */
2440
2441 void
2442 expand_inc (rtx target, rtx inc)
2443 {
2444   rtx value = expand_binop (GET_MODE (target), add_optab,
2445                             target, inc,
2446                             target, 0, OPTAB_LIB_WIDEN);
2447   if (value != target)
2448     emit_move_insn (target, value);
2449 }
2450
2451 /* Subtract DEC from TARGET.  */
2452
2453 void
2454 expand_dec (rtx target, rtx dec)
2455 {
2456   rtx value = expand_binop (GET_MODE (target), sub_optab,
2457                             target, dec,
2458                             target, 0, OPTAB_LIB_WIDEN);
2459   if (value != target)
2460     emit_move_insn (target, value);
2461 }
2462 \f
2463 /* Output a shift instruction for expression code CODE,
2464    with SHIFTED being the rtx for the value to shift,
2465    and AMOUNT the rtx for the amount to shift by.
2466    Store the result in the rtx TARGET, if that is convenient.
2467    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2468    Return the rtx for where the value is.
2469    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2470    in which case 0 is returned.  */
2471
2472 static rtx
2473 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2474                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2475 {
2476   rtx op1, temp = 0;
2477   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2478   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2479   optab lshift_optab = ashl_optab;
2480   optab rshift_arith_optab = ashr_optab;
2481   optab rshift_uns_optab = lshr_optab;
2482   optab lrotate_optab = rotl_optab;
2483   optab rrotate_optab = rotr_optab;
2484   machine_mode op1_mode;
2485   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2486   int attempt;
2487   bool speed = optimize_insn_for_speed_p ();
2488
2489   op1 = amount;
2490   op1_mode = GET_MODE (op1);
2491
2492   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2493      shift amount is a vector, use the vector/vector shift patterns.  */
2494   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2495     {
2496       lshift_optab = vashl_optab;
2497       rshift_arith_optab = vashr_optab;
2498       rshift_uns_optab = vlshr_optab;
2499       lrotate_optab = vrotl_optab;
2500       rrotate_optab = vrotr_optab;
2501     }
2502
2503   /* Previously detected shift-counts computed by NEGATE_EXPR
2504      and shifted in the other direction; but that does not work
2505      on all machines.  */
2506
2507   if (SHIFT_COUNT_TRUNCATED)
2508     {
2509       if (CONST_INT_P (op1)
2510           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2511               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2512         op1 = gen_int_shift_amount (mode,
2513                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2514                                     % GET_MODE_BITSIZE (scalar_mode));
2515       else if (GET_CODE (op1) == SUBREG
2516                && subreg_lowpart_p (op1)
2517                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2518                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2519         op1 = SUBREG_REG (op1);
2520     }
2521
2522   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2523      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2524      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2525      amount instead.  */
2526   if (rotate
2527       && CONST_INT_P (op1)
2528       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2529                    GET_MODE_BITSIZE (scalar_mode) - 1))
2530     {
2531       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2532                                          - INTVAL (op1)));
2533       left = !left;
2534       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2535     }
2536
2537   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2538      Note that this is not the case for bigger values.  For instance a rotation
2539      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2540      0x04030201 (bswapsi).  */
2541   if (rotate
2542       && CONST_INT_P (op1)
2543       && INTVAL (op1) == BITS_PER_UNIT
2544       && GET_MODE_SIZE (scalar_mode) == 2
2545       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2546     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2547
2548   if (op1 == const0_rtx)
2549     return shifted;
2550
2551   /* Check whether its cheaper to implement a left shift by a constant
2552      bit count by a sequence of additions.  */
2553   if (code == LSHIFT_EXPR
2554       && CONST_INT_P (op1)
2555       && INTVAL (op1) > 0
2556       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2557       && INTVAL (op1) < MAX_BITS_PER_WORD
2558       && (shift_cost (speed, mode, INTVAL (op1))
2559           > INTVAL (op1) * add_cost (speed, mode))
2560       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2561     {
2562       int i;
2563       for (i = 0; i < INTVAL (op1); i++)
2564         {
2565           temp = force_reg (mode, shifted);
2566           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2567                                   unsignedp, OPTAB_LIB_WIDEN);
2568         }
2569       return shifted;
2570     }
2571
2572   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2573     {
2574       enum optab_methods methods;
2575
2576       if (attempt == 0)
2577         methods = OPTAB_DIRECT;
2578       else if (attempt == 1)
2579         methods = OPTAB_WIDEN;
2580       else
2581         methods = OPTAB_LIB_WIDEN;
2582
2583       if (rotate)
2584         {
2585           /* Widening does not work for rotation.  */
2586           if (methods == OPTAB_WIDEN)
2587             continue;
2588           else if (methods == OPTAB_LIB_WIDEN)
2589             {
2590               /* If we have been unable to open-code this by a rotation,
2591                  do it as the IOR of two shifts.  I.e., to rotate A
2592                  by N bits, compute
2593                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2594                  where C is the bitsize of A.
2595
2596                  It is theoretically possible that the target machine might
2597                  not be able to perform either shift and hence we would
2598                  be making two libcalls rather than just the one for the
2599                  shift (similarly if IOR could not be done).  We will allow
2600                  this extremely unlikely lossage to avoid complicating the
2601                  code below.  */
2602
2603               rtx subtarget = target == shifted ? 0 : target;
2604               rtx new_amount, other_amount;
2605               rtx temp1;
2606
2607               new_amount = op1;
2608               if (op1 == const0_rtx)
2609                 return shifted;
2610               else if (CONST_INT_P (op1))
2611                 other_amount = gen_int_shift_amount
2612                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2613               else
2614                 {
2615                   other_amount
2616                     = simplify_gen_unary (NEG, GET_MODE (op1),
2617                                           op1, GET_MODE (op1));
2618                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2619                   other_amount
2620                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2621                                            gen_int_mode (mask, GET_MODE (op1)));
2622                 }
2623
2624               shifted = force_reg (mode, shifted);
2625
2626               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2627                                      mode, shifted, new_amount, 0, 1);
2628               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2629                                       mode, shifted, other_amount,
2630                                       subtarget, 1);
2631               return expand_binop (mode, ior_optab, temp, temp1, target,
2632                                    unsignedp, methods);
2633             }
2634
2635           temp = expand_binop (mode,
2636                                left ? lrotate_optab : rrotate_optab,
2637                                shifted, op1, target, unsignedp, methods);
2638         }
2639       else if (unsignedp)
2640         temp = expand_binop (mode,
2641                              left ? lshift_optab : rshift_uns_optab,
2642                              shifted, op1, target, unsignedp, methods);
2643
2644       /* Do arithmetic shifts.
2645          Also, if we are going to widen the operand, we can just as well
2646          use an arithmetic right-shift instead of a logical one.  */
2647       if (temp == 0 && ! rotate
2648           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2649         {
2650           enum optab_methods methods1 = methods;
2651
2652           /* If trying to widen a log shift to an arithmetic shift,
2653              don't accept an arithmetic shift of the same size.  */
2654           if (unsignedp)
2655             methods1 = OPTAB_MUST_WIDEN;
2656
2657           /* Arithmetic shift */
2658
2659           temp = expand_binop (mode,
2660                                left ? lshift_optab : rshift_arith_optab,
2661                                shifted, op1, target, unsignedp, methods1);
2662         }
2663
2664       /* We used to try extzv here for logical right shifts, but that was
2665          only useful for one machine, the VAX, and caused poor code
2666          generation there for lshrdi3, so the code was deleted and a
2667          define_expand for lshrsi3 was added to vax.md.  */
2668     }
2669
2670   gcc_assert (temp != NULL_RTX || may_fail);
2671   return temp;
2672 }
2673
2674 /* Output a shift instruction for expression code CODE,
2675    with SHIFTED being the rtx for the value to shift,
2676    and AMOUNT the amount to shift by.
2677    Store the result in the rtx TARGET, if that is convenient.
2678    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2679    Return the rtx for where the value is.  */
2680
2681 rtx
2682 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2683               poly_int64 amount, rtx target, int unsignedp)
2684 {
2685   return expand_shift_1 (code, mode, shifted,
2686                          gen_int_shift_amount (mode, amount),
2687                          target, unsignedp);
2688 }
2689
2690 /* Likewise, but return 0 if that cannot be done.  */
2691
2692 static rtx
2693 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2694                     int amount, rtx target, int unsignedp)
2695 {
2696   return expand_shift_1 (code, mode,
2697                          shifted, GEN_INT (amount), target, unsignedp, true);
2698 }
2699
2700 /* Output a shift instruction for expression code CODE,
2701    with SHIFTED being the rtx for the value to shift,
2702    and AMOUNT the tree for the amount to shift by.
2703    Store the result in the rtx TARGET, if that is convenient.
2704    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2705    Return the rtx for where the value is.  */
2706
2707 rtx
2708 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2709                        tree amount, rtx target, int unsignedp)
2710 {
2711   return expand_shift_1 (code, mode,
2712                          shifted, expand_normal (amount), target, unsignedp);
2713 }
2714
2715 \f
2716 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2717                         const struct mult_cost *, machine_mode mode);
2718 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2719                               const struct algorithm *, enum mult_variant);
2720 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2721 static rtx extract_high_half (scalar_int_mode, rtx);
2722 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2723 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2724                                        int, int);
2725 /* Compute and return the best algorithm for multiplying by T.
2726    The algorithm must cost less than cost_limit
2727    If retval.cost >= COST_LIMIT, no algorithm was found and all
2728    other field of the returned struct are undefined.
2729    MODE is the machine mode of the multiplication.  */
2730
2731 static void
2732 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2733             const struct mult_cost *cost_limit, machine_mode mode)
2734 {
2735   int m;
2736   struct algorithm *alg_in, *best_alg;
2737   struct mult_cost best_cost;
2738   struct mult_cost new_limit;
2739   int op_cost, op_latency;
2740   unsigned HOST_WIDE_INT orig_t = t;
2741   unsigned HOST_WIDE_INT q;
2742   int maxm, hash_index;
2743   bool cache_hit = false;
2744   enum alg_code cache_alg = alg_zero;
2745   bool speed = optimize_insn_for_speed_p ();
2746   scalar_int_mode imode;
2747   struct alg_hash_entry *entry_ptr;
2748
2749   /* Indicate that no algorithm is yet found.  If no algorithm
2750      is found, this value will be returned and indicate failure.  */
2751   alg_out->cost.cost = cost_limit->cost + 1;
2752   alg_out->cost.latency = cost_limit->latency + 1;
2753
2754   if (cost_limit->cost < 0
2755       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2756     return;
2757
2758   /* Be prepared for vector modes.  */
2759   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2760
2761   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2762
2763   /* Restrict the bits of "t" to the multiplication's mode.  */
2764   t &= GET_MODE_MASK (imode);
2765
2766   /* t == 1 can be done in zero cost.  */
2767   if (t == 1)
2768     {
2769       alg_out->ops = 1;
2770       alg_out->cost.cost = 0;
2771       alg_out->cost.latency = 0;
2772       alg_out->op[0] = alg_m;
2773       return;
2774     }
2775
2776   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2777      fail now.  */
2778   if (t == 0)
2779     {
2780       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2781         return;
2782       else
2783         {
2784           alg_out->ops = 1;
2785           alg_out->cost.cost = zero_cost (speed);
2786           alg_out->cost.latency = zero_cost (speed);
2787           alg_out->op[0] = alg_zero;
2788           return;
2789         }
2790     }
2791
2792   /* We'll be needing a couple extra algorithm structures now.  */
2793
2794   alg_in = XALLOCA (struct algorithm);
2795   best_alg = XALLOCA (struct algorithm);
2796   best_cost = *cost_limit;
2797
2798   /* Compute the hash index.  */
2799   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2800
2801   /* See if we already know what to do for T.  */
2802   entry_ptr = alg_hash_entry_ptr (hash_index);
2803   if (entry_ptr->t == t
2804       && entry_ptr->mode == mode
2805       && entry_ptr->speed == speed
2806       && entry_ptr->alg != alg_unknown)
2807     {
2808       cache_alg = entry_ptr->alg;
2809
2810       if (cache_alg == alg_impossible)
2811         {
2812           /* The cache tells us that it's impossible to synthesize
2813              multiplication by T within entry_ptr->cost.  */
2814           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2815             /* COST_LIMIT is at least as restrictive as the one
2816                recorded in the hash table, in which case we have no
2817                hope of synthesizing a multiplication.  Just
2818                return.  */
2819             return;
2820
2821           /* If we get here, COST_LIMIT is less restrictive than the
2822              one recorded in the hash table, so we may be able to
2823              synthesize a multiplication.  Proceed as if we didn't
2824              have the cache entry.  */
2825         }
2826       else
2827         {
2828           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2829             /* The cached algorithm shows that this multiplication
2830                requires more cost than COST_LIMIT.  Just return.  This
2831                way, we don't clobber this cache entry with
2832                alg_impossible but retain useful information.  */
2833             return;
2834
2835           cache_hit = true;
2836
2837           switch (cache_alg)
2838             {
2839             case alg_shift:
2840               goto do_alg_shift;
2841
2842             case alg_add_t_m2:
2843             case alg_sub_t_m2:
2844               goto do_alg_addsub_t_m2;
2845
2846             case alg_add_factor:
2847             case alg_sub_factor:
2848               goto do_alg_addsub_factor;
2849
2850             case alg_add_t2_m:
2851               goto do_alg_add_t2_m;
2852
2853             case alg_sub_t2_m:
2854               goto do_alg_sub_t2_m;
2855
2856             default:
2857               gcc_unreachable ();
2858             }
2859         }
2860     }
2861
2862   /* If we have a group of zero bits at the low-order part of T, try
2863      multiplying by the remaining bits and then doing a shift.  */
2864
2865   if ((t & 1) == 0)
2866     {
2867     do_alg_shift:
2868       m = ctz_or_zero (t); /* m = number of low zero bits */
2869       if (m < maxm)
2870         {
2871           q = t >> m;
2872           /* The function expand_shift will choose between a shift and
2873              a sequence of additions, so the observed cost is given as
2874              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2875           op_cost = m * add_cost (speed, mode);
2876           if (shift_cost (speed, mode, m) < op_cost)
2877             op_cost = shift_cost (speed, mode, m);
2878           new_limit.cost = best_cost.cost - op_cost;
2879           new_limit.latency = best_cost.latency - op_cost;
2880           synth_mult (alg_in, q, &new_limit, mode);
2881
2882           alg_in->cost.cost += op_cost;
2883           alg_in->cost.latency += op_cost;
2884           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2885             {
2886               best_cost = alg_in->cost;
2887               std::swap (alg_in, best_alg);
2888               best_alg->log[best_alg->ops] = m;
2889               best_alg->op[best_alg->ops] = alg_shift;
2890             }
2891
2892           /* See if treating ORIG_T as a signed number yields a better
2893              sequence.  Try this sequence only for a negative ORIG_T
2894              as it would be useless for a non-negative ORIG_T.  */
2895           if ((HOST_WIDE_INT) orig_t < 0)
2896             {
2897               /* Shift ORIG_T as follows because a right shift of a
2898                  negative-valued signed type is implementation
2899                  defined.  */
2900               q = ~(~orig_t >> m);
2901               /* The function expand_shift will choose between a shift
2902                  and a sequence of additions, so the observed cost is
2903                  given as MIN (m * add_cost(speed, mode),
2904                  shift_cost(speed, mode, m)).  */
2905               op_cost = m * add_cost (speed, mode);
2906               if (shift_cost (speed, mode, m) < op_cost)
2907                 op_cost = shift_cost (speed, mode, m);
2908               new_limit.cost = best_cost.cost - op_cost;
2909               new_limit.latency = best_cost.latency - op_cost;
2910               synth_mult (alg_in, q, &new_limit, mode);
2911
2912               alg_in->cost.cost += op_cost;
2913               alg_in->cost.latency += op_cost;
2914               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2915                 {
2916                   best_cost = alg_in->cost;
2917                   std::swap (alg_in, best_alg);
2918                   best_alg->log[best_alg->ops] = m;
2919                   best_alg->op[best_alg->ops] = alg_shift;
2920                 }
2921             }
2922         }
2923       if (cache_hit)
2924         goto done;
2925     }
2926
2927   /* If we have an odd number, add or subtract one.  */
2928   if ((t & 1) != 0)
2929     {
2930       unsigned HOST_WIDE_INT w;
2931
2932     do_alg_addsub_t_m2:
2933       for (w = 1; (w & t) != 0; w <<= 1)
2934         ;
2935       /* If T was -1, then W will be zero after the loop.  This is another
2936          case where T ends with ...111.  Handling this with (T + 1) and
2937          subtract 1 produces slightly better code and results in algorithm
2938          selection much faster than treating it like the ...0111 case
2939          below.  */
2940       if (w == 0
2941           || (w > 2
2942               /* Reject the case where t is 3.
2943                  Thus we prefer addition in that case.  */
2944               && t != 3))
2945         {
2946           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2947
2948           op_cost = add_cost (speed, mode);
2949           new_limit.cost = best_cost.cost - op_cost;
2950           new_limit.latency = best_cost.latency - op_cost;
2951           synth_mult (alg_in, t + 1, &new_limit, mode);
2952
2953           alg_in->cost.cost += op_cost;
2954           alg_in->cost.latency += op_cost;
2955           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2956             {
2957               best_cost = alg_in->cost;
2958               std::swap (alg_in, best_alg);
2959               best_alg->log[best_alg->ops] = 0;
2960               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2961             }
2962         }
2963       else
2964         {
2965           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2966
2967           op_cost = add_cost (speed, mode);
2968           new_limit.cost = best_cost.cost - op_cost;
2969           new_limit.latency = best_cost.latency - op_cost;
2970           synth_mult (alg_in, t - 1, &new_limit, mode);
2971
2972           alg_in->cost.cost += op_cost;
2973           alg_in->cost.latency += op_cost;
2974           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2975             {
2976               best_cost = alg_in->cost;
2977               std::swap (alg_in, best_alg);
2978               best_alg->log[best_alg->ops] = 0;
2979               best_alg->op[best_alg->ops] = alg_add_t_m2;
2980             }
2981         }
2982
2983       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2984          quickly with a - a * n for some appropriate constant n.  */
2985       m = exact_log2 (-orig_t + 1);
2986       if (m >= 0 && m < maxm)
2987         {
2988           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2989           /* If the target has a cheap shift-and-subtract insn use
2990              that in preference to a shift insn followed by a sub insn.
2991              Assume that the shift-and-sub is "atomic" with a latency
2992              equal to it's cost, otherwise assume that on superscalar
2993              hardware the shift may be executed concurrently with the
2994              earlier steps in the algorithm.  */
2995           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2996             {
2997               op_cost = shiftsub1_cost (speed, mode, m);
2998               op_latency = op_cost;
2999             }
3000           else
3001             op_latency = add_cost (speed, mode);
3002
3003           new_limit.cost = best_cost.cost - op_cost;
3004           new_limit.latency = best_cost.latency - op_latency;
3005           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3006                       &new_limit, mode);
3007
3008           alg_in->cost.cost += op_cost;
3009           alg_in->cost.latency += op_latency;
3010           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3011             {
3012               best_cost = alg_in->cost;
3013               std::swap (alg_in, best_alg);
3014               best_alg->log[best_alg->ops] = m;
3015               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3016             }
3017         }
3018
3019       if (cache_hit)
3020         goto done;
3021     }
3022
3023   /* Look for factors of t of the form
3024      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3025      If we find such a factor, we can multiply by t using an algorithm that
3026      multiplies by q, shift the result by m and add/subtract it to itself.
3027
3028      We search for large factors first and loop down, even if large factors
3029      are less probable than small; if we find a large factor we will find a
3030      good sequence quickly, and therefore be able to prune (by decreasing
3031      COST_LIMIT) the search.  */
3032
3033  do_alg_addsub_factor:
3034   for (m = floor_log2 (t - 1); m >= 2; m--)
3035     {
3036       unsigned HOST_WIDE_INT d;
3037
3038       d = (HOST_WIDE_INT_1U << m) + 1;
3039       if (t % d == 0 && t > d && m < maxm
3040           && (!cache_hit || cache_alg == alg_add_factor))
3041         {
3042           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3043           if (shiftadd_cost (speed, mode, m) <= op_cost)
3044             op_cost = shiftadd_cost (speed, mode, m);
3045
3046           op_latency = op_cost;
3047
3048
3049           new_limit.cost = best_cost.cost - op_cost;
3050           new_limit.latency = best_cost.latency - op_latency;
3051           synth_mult (alg_in, t / d, &new_limit, mode);
3052
3053           alg_in->cost.cost += op_cost;
3054           alg_in->cost.latency += op_latency;
3055           if (alg_in->cost.latency < op_cost)
3056             alg_in->cost.latency = op_cost;
3057           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3058             {
3059               best_cost = alg_in->cost;
3060               std::swap (alg_in, best_alg);
3061               best_alg->log[best_alg->ops] = m;
3062               best_alg->op[best_alg->ops] = alg_add_factor;
3063             }
3064           /* Other factors will have been taken care of in the recursion.  */
3065           break;
3066         }
3067
3068       d = (HOST_WIDE_INT_1U << m) - 1;
3069       if (t % d == 0 && t > d && m < maxm
3070           && (!cache_hit || cache_alg == alg_sub_factor))
3071         {
3072           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3073           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3074             op_cost = shiftsub0_cost (speed, mode, m);
3075
3076           op_latency = op_cost;
3077
3078           new_limit.cost = best_cost.cost - op_cost;
3079           new_limit.latency = best_cost.latency - op_latency;
3080           synth_mult (alg_in, t / d, &new_limit, mode);
3081
3082           alg_in->cost.cost += op_cost;
3083           alg_in->cost.latency += op_latency;
3084           if (alg_in->cost.latency < op_cost)
3085             alg_in->cost.latency = op_cost;
3086           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3087             {
3088               best_cost = alg_in->cost;
3089               std::swap (alg_in, best_alg);
3090               best_alg->log[best_alg->ops] = m;
3091               best_alg->op[best_alg->ops] = alg_sub_factor;
3092             }
3093           break;
3094         }
3095     }
3096   if (cache_hit)
3097     goto done;
3098
3099   /* Try shift-and-add (load effective address) instructions,
3100      i.e. do a*3, a*5, a*9.  */
3101   if ((t & 1) != 0)
3102     {
3103     do_alg_add_t2_m:
3104       q = t - 1;
3105       m = ctz_hwi (q);
3106       if (q && m < maxm)
3107         {
3108           op_cost = shiftadd_cost (speed, mode, m);
3109           new_limit.cost = best_cost.cost - op_cost;
3110           new_limit.latency = best_cost.latency - op_cost;
3111           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3112
3113           alg_in->cost.cost += op_cost;
3114           alg_in->cost.latency += op_cost;
3115           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3116             {
3117               best_cost = alg_in->cost;
3118               std::swap (alg_in, best_alg);
3119               best_alg->log[best_alg->ops] = m;
3120               best_alg->op[best_alg->ops] = alg_add_t2_m;
3121             }
3122         }
3123       if (cache_hit)
3124         goto done;
3125
3126     do_alg_sub_t2_m:
3127       q = t + 1;
3128       m = ctz_hwi (q);
3129       if (q && m < maxm)
3130         {
3131           op_cost = shiftsub0_cost (speed, mode, m);
3132           new_limit.cost = best_cost.cost - op_cost;
3133           new_limit.latency = best_cost.latency - op_cost;
3134           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3135
3136           alg_in->cost.cost += op_cost;
3137           alg_in->cost.latency += op_cost;
3138           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3139             {
3140               best_cost = alg_in->cost;
3141               std::swap (alg_in, best_alg);
3142               best_alg->log[best_alg->ops] = m;
3143               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3144             }
3145         }
3146       if (cache_hit)
3147         goto done;
3148     }
3149
3150  done:
3151   /* If best_cost has not decreased, we have not found any algorithm.  */
3152   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3153     {
3154       /* We failed to find an algorithm.  Record alg_impossible for
3155          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3156          we are asked to find an algorithm for T within the same or
3157          lower COST_LIMIT, we can immediately return to the
3158          caller.  */
3159       entry_ptr->t = t;
3160       entry_ptr->mode = mode;
3161       entry_ptr->speed = speed;
3162       entry_ptr->alg = alg_impossible;
3163       entry_ptr->cost = *cost_limit;
3164       return;
3165     }
3166
3167   /* Cache the result.  */
3168   if (!cache_hit)
3169     {
3170       entry_ptr->t = t;
3171       entry_ptr->mode = mode;
3172       entry_ptr->speed = speed;
3173       entry_ptr->alg = best_alg->op[best_alg->ops];
3174       entry_ptr->cost.cost = best_cost.cost;
3175       entry_ptr->cost.latency = best_cost.latency;
3176     }
3177
3178   /* If we are getting a too long sequence for `struct algorithm'
3179      to record, make this search fail.  */
3180   if (best_alg->ops == MAX_BITS_PER_WORD)
3181     return;
3182
3183   /* Copy the algorithm from temporary space to the space at alg_out.
3184      We avoid using structure assignment because the majority of
3185      best_alg is normally undefined, and this is a critical function.  */
3186   alg_out->ops = best_alg->ops + 1;
3187   alg_out->cost = best_cost;
3188   memcpy (alg_out->op, best_alg->op,
3189           alg_out->ops * sizeof *alg_out->op);
3190   memcpy (alg_out->log, best_alg->log,
3191           alg_out->ops * sizeof *alg_out->log);
3192 }
3193 \f
3194 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3195    Try three variations:
3196
3197        - a shift/add sequence based on VAL itself
3198        - a shift/add sequence based on -VAL, followed by a negation
3199        - a shift/add sequence based on VAL - 1, followed by an addition.
3200
3201    Return true if the cheapest of these cost less than MULT_COST,
3202    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3203
3204 bool
3205 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3206                      struct algorithm *alg, enum mult_variant *variant,
3207                      int mult_cost)
3208 {
3209   struct algorithm alg2;
3210   struct mult_cost limit;
3211   int op_cost;
3212   bool speed = optimize_insn_for_speed_p ();
3213
3214   /* Fail quickly for impossible bounds.  */
3215   if (mult_cost < 0)
3216     return false;
3217
3218   /* Ensure that mult_cost provides a reasonable upper bound.
3219      Any constant multiplication can be performed with less
3220      than 2 * bits additions.  */
3221   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3222   if (mult_cost > op_cost)
3223     mult_cost = op_cost;
3224
3225   *variant = basic_variant;
3226   limit.cost = mult_cost;
3227   limit.latency = mult_cost;
3228   synth_mult (alg, val, &limit, mode);
3229
3230   /* This works only if the inverted value actually fits in an
3231      `unsigned int' */
3232   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3233     {
3234       op_cost = neg_cost (speed, mode);
3235       if (MULT_COST_LESS (&alg->cost, mult_cost))
3236         {
3237           limit.cost = alg->cost.cost - op_cost;
3238           limit.latency = alg->cost.latency - op_cost;
3239         }
3240       else
3241         {
3242           limit.cost = mult_cost - op_cost;
3243           limit.latency = mult_cost - op_cost;
3244         }
3245
3246       synth_mult (&alg2, -val, &limit, mode);
3247       alg2.cost.cost += op_cost;
3248       alg2.cost.latency += op_cost;
3249       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3250         *alg = alg2, *variant = negate_variant;
3251     }
3252
3253   /* This proves very useful for division-by-constant.  */
3254   op_cost = add_cost (speed, mode);
3255   if (MULT_COST_LESS (&alg->cost, mult_cost))
3256     {
3257       limit.cost = alg->cost.cost - op_cost;
3258       limit.latency = alg->cost.latency - op_cost;
3259     }
3260   else
3261     {
3262       limit.cost = mult_cost - op_cost;
3263       limit.latency = mult_cost - op_cost;
3264     }
3265
3266   synth_mult (&alg2, val - 1, &limit, mode);
3267   alg2.cost.cost += op_cost;
3268   alg2.cost.latency += op_cost;
3269   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3270     *alg = alg2, *variant = add_variant;
3271
3272   return MULT_COST_LESS (&alg->cost, mult_cost);
3273 }
3274
3275 /* A subroutine of expand_mult, used for constant multiplications.
3276    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3277    convenient.  Use the shift/add sequence described by ALG and apply
3278    the final fixup specified by VARIANT.  */
3279
3280 static rtx
3281 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3282                    rtx target, const struct algorithm *alg,
3283                    enum mult_variant variant)
3284 {
3285   unsigned HOST_WIDE_INT val_so_far;
3286   rtx_insn *insn;
3287   rtx accum, tem;
3288   int opno;
3289   machine_mode nmode;
3290
3291   /* Avoid referencing memory over and over and invalid sharing
3292      on SUBREGs.  */
3293   op0 = force_reg (mode, op0);
3294
3295   /* ACCUM starts out either as OP0 or as a zero, depending on
3296      the first operation.  */
3297
3298   if (alg->op[0] == alg_zero)
3299     {
3300       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3301       val_so_far = 0;
3302     }
3303   else if (alg->op[0] == alg_m)
3304     {
3305       accum = copy_to_mode_reg (mode, op0);
3306       val_so_far = 1;
3307     }
3308   else
3309     gcc_unreachable ();
3310
3311   for (opno = 1; opno < alg->ops; opno++)
3312     {
3313       int log = alg->log[opno];
3314       rtx shift_subtarget = optimize ? 0 : accum;
3315       rtx add_target
3316         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3317            && !optimize)
3318           ? target : 0;
3319       rtx accum_target = optimize ? 0 : accum;
3320       rtx accum_inner;
3321
3322       switch (alg->op[opno])
3323         {
3324         case alg_shift:
3325           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3326           /* REG_EQUAL note will be attached to the following insn.  */
3327           emit_move_insn (accum, tem);
3328           val_so_far <<= log;
3329           break;
3330
3331         case alg_add_t_m2:
3332           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3333           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3334                                  add_target ? add_target : accum_target);
3335           val_so_far += HOST_WIDE_INT_1U << log;
3336           break;
3337
3338         case alg_sub_t_m2:
3339           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3340           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3341                                  add_target ? add_target : accum_target);
3342           val_so_far -= HOST_WIDE_INT_1U << log;
3343           break;
3344
3345         case alg_add_t2_m:
3346           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3347                                 log, shift_subtarget, 0);
3348           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3349                                  add_target ? add_target : accum_target);
3350           val_so_far = (val_so_far << log) + 1;
3351           break;
3352
3353         case alg_sub_t2_m:
3354           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3355                                 log, shift_subtarget, 0);
3356           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3357                                  add_target ? add_target : accum_target);
3358           val_so_far = (val_so_far << log) - 1;
3359           break;
3360
3361         case alg_add_factor:
3362           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3363           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3364                                  add_target ? add_target : accum_target);
3365           val_so_far += val_so_far << log;
3366           break;
3367
3368         case alg_sub_factor:
3369           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3370           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3371                                  (add_target
3372                                   ? add_target : (optimize ? 0 : tem)));
3373           val_so_far = (val_so_far << log) - val_so_far;
3374           break;
3375
3376         default:
3377           gcc_unreachable ();
3378         }
3379
3380       if (SCALAR_INT_MODE_P (mode))
3381         {
3382           /* Write a REG_EQUAL note on the last insn so that we can cse
3383              multiplication sequences.  Note that if ACCUM is a SUBREG,
3384              we've set the inner register and must properly indicate that.  */
3385           tem = op0, nmode = mode;
3386           accum_inner = accum;
3387           if (GET_CODE (accum) == SUBREG)
3388             {
3389               accum_inner = SUBREG_REG (accum);
3390               nmode = GET_MODE (accum_inner);
3391               tem = gen_lowpart (nmode, op0);
3392             }
3393
3394           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3395              In that case, only the low bits of accum would be guaranteed to
3396              be equal to the content of the REG_EQUAL note, the upper bits
3397              can be anything.  */
3398           if (!paradoxical_subreg_p (tem))
3399             {
3400               insn = get_last_insn ();
3401               wide_int wval_so_far
3402                 = wi::uhwi (val_so_far,
3403                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3404               rtx c = immed_wide_int_const (wval_so_far, nmode);
3405               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3406                                 accum_inner);
3407             }
3408         }
3409     }
3410
3411   if (variant == negate_variant)
3412     {
3413       val_so_far = -val_so_far;
3414       accum = expand_unop (mode, neg_optab, accum, target, 0);
3415     }
3416   else if (variant == add_variant)
3417     {
3418       val_so_far = val_so_far + 1;
3419       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3420     }
3421
3422   /* Compare only the bits of val and val_so_far that are significant
3423      in the result mode, to avoid sign-/zero-extension confusion.  */
3424   nmode = GET_MODE_INNER (mode);
3425   val &= GET_MODE_MASK (nmode);
3426   val_so_far &= GET_MODE_MASK (nmode);
3427   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3428
3429   return accum;
3430 }
3431
3432 /* Perform a multiplication and return an rtx for the result.
3433    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3434    TARGET is a suggestion for where to store the result (an rtx).
3435
3436    We check specially for a constant integer as OP1.
3437    If you want this check for OP0 as well, then before calling
3438    you should swap the two operands if OP0 would be constant.  */
3439
3440 rtx
3441 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3442              int unsignedp, bool no_libcall)
3443 {
3444   enum mult_variant variant;
3445   struct algorithm algorithm;
3446   rtx scalar_op1;
3447   int max_cost;
3448   bool speed = optimize_insn_for_speed_p ();
3449   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3450
3451   if (CONSTANT_P (op0))
3452     std::swap (op0, op1);
3453
3454   /* For vectors, there are several simplifications that can be made if
3455      all elements of the vector constant are identical.  */
3456   scalar_op1 = unwrap_const_vec_duplicate (op1);
3457
3458   if (INTEGRAL_MODE_P (mode))
3459     {
3460       rtx fake_reg;
3461       HOST_WIDE_INT coeff;
3462       bool is_neg;
3463       int mode_bitsize;
3464
3465       if (op1 == CONST0_RTX (mode))
3466         return op1;
3467       if (op1 == CONST1_RTX (mode))
3468         return op0;
3469       if (op1 == CONSTM1_RTX (mode))
3470         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3471                             op0, target, 0);
3472
3473       if (do_trapv)
3474         goto skip_synth;
3475
3476       /* If mode is integer vector mode, check if the backend supports
3477          vector lshift (by scalar or vector) at all.  If not, we can't use
3478          synthetized multiply.  */
3479       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3480           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3481           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3482         goto skip_synth;
3483
3484       /* These are the operations that are potentially turned into
3485          a sequence of shifts and additions.  */
3486       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3487
3488       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3489          less than or equal in size to `unsigned int' this doesn't matter.
3490          If the mode is larger than `unsigned int', then synth_mult works
3491          only if the constant value exactly fits in an `unsigned int' without
3492          any truncation.  This means that multiplying by negative values does
3493          not work; results are off by 2^32 on a 32 bit machine.  */
3494       if (CONST_INT_P (scalar_op1))
3495         {
3496           coeff = INTVAL (scalar_op1);
3497           is_neg = coeff < 0;
3498         }
3499 #if TARGET_SUPPORTS_WIDE_INT
3500       else if (CONST_WIDE_INT_P (scalar_op1))
3501 #else
3502       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3503 #endif
3504         {
3505           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3506           /* Perfect power of 2 (other than 1, which is handled above).  */
3507           if (shift > 0)
3508             return expand_shift (LSHIFT_EXPR, mode, op0,
3509                                  shift, target, unsignedp);
3510           else
3511             goto skip_synth;
3512         }
3513       else
3514         goto skip_synth;
3515
3516       /* We used to test optimize here, on the grounds that it's better to
3517          produce a smaller program when -O is not used.  But this causes
3518          such a terrible slowdown sometimes that it seems better to always
3519          use synth_mult.  */
3520
3521       /* Special case powers of two.  */
3522       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3523           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3524         return expand_shift (LSHIFT_EXPR, mode, op0,
3525                              floor_log2 (coeff), target, unsignedp);
3526
3527       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3528
3529       /* Attempt to handle multiplication of DImode values by negative
3530          coefficients, by performing the multiplication by a positive
3531          multiplier and then inverting the result.  */
3532       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3533         {
3534           /* Its safe to use -coeff even for INT_MIN, as the
3535              result is interpreted as an unsigned coefficient.
3536              Exclude cost of op0 from max_cost to match the cost
3537              calculation of the synth_mult.  */
3538           coeff = -(unsigned HOST_WIDE_INT) coeff;
3539           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3540                                     mode, speed)
3541                       - neg_cost (speed, mode));
3542           if (max_cost <= 0)
3543             goto skip_synth;
3544
3545           /* Special case powers of two.  */
3546           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3547             {
3548               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3549                                        floor_log2 (coeff), target, unsignedp);
3550               return expand_unop (mode, neg_optab, temp, target, 0);
3551             }
3552
3553           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3554                                    max_cost))
3555             {
3556               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3557                                             &algorithm, variant);
3558               return expand_unop (mode, neg_optab, temp, target, 0);
3559             }
3560           goto skip_synth;
3561         }
3562
3563       /* Exclude cost of op0 from max_cost to match the cost
3564          calculation of the synth_mult.  */
3565       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3566       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3567         return expand_mult_const (mode, op0, coeff, target,
3568                                   &algorithm, variant);
3569     }
3570  skip_synth:
3571
3572   /* Expand x*2.0 as x+x.  */
3573   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3574       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3575     {
3576       op0 = force_reg (GET_MODE (op0), op0);
3577       return expand_binop (mode, add_optab, op0, op0,
3578                            target, unsignedp,
3579                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3580     }
3581
3582   /* This used to use umul_optab if unsigned, but for non-widening multiply
3583      there is no difference between signed and unsigned.  */
3584   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3585                       op0, op1, target, unsignedp,
3586                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3587   gcc_assert (op0 || no_libcall);
3588   return op0;
3589 }
3590
3591 /* Return a cost estimate for multiplying a register by the given
3592    COEFFicient in the given MODE and SPEED.  */
3593
3594 int
3595 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3596 {
3597   int max_cost;
3598   struct algorithm algorithm;
3599   enum mult_variant variant;
3600
3601   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3602   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3603                            mode, speed);
3604   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3605     return algorithm.cost.cost;
3606   else
3607     return max_cost;
3608 }
3609
3610 /* Perform a widening multiplication and return an rtx for the result.
3611    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3612    TARGET is a suggestion for where to store the result (an rtx).
3613    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3614    or smul_widen_optab.
3615
3616    We check specially for a constant integer as OP1, comparing the
3617    cost of a widening multiply against the cost of a sequence of shifts
3618    and adds.  */
3619
3620 rtx
3621 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3622                       int unsignedp, optab this_optab)
3623 {
3624   bool speed = optimize_insn_for_speed_p ();
3625   rtx cop1;
3626
3627   if (CONST_INT_P (op1)
3628       && GET_MODE (op0) != VOIDmode
3629       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3630                                 this_optab == umul_widen_optab))
3631       && CONST_INT_P (cop1)
3632       && (INTVAL (cop1) >= 0
3633           || HWI_COMPUTABLE_MODE_P (mode)))
3634     {
3635       HOST_WIDE_INT coeff = INTVAL (cop1);
3636       int max_cost;
3637       enum mult_variant variant;
3638       struct algorithm algorithm;
3639
3640       if (coeff == 0)
3641         return CONST0_RTX (mode);
3642
3643       /* Special case powers of two.  */
3644       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3645         {
3646           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3647           return expand_shift (LSHIFT_EXPR, mode, op0,
3648                                floor_log2 (coeff), target, unsignedp);
3649         }
3650
3651       /* Exclude cost of op0 from max_cost to match the cost
3652          calculation of the synth_mult.  */
3653       max_cost = mul_widen_cost (speed, mode);
3654       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3655                                max_cost))
3656         {
3657           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3658           return expand_mult_const (mode, op0, coeff, target,
3659                                     &algorithm, variant);
3660         }
3661     }
3662   return expand_binop (mode, this_optab, op0, op1, target,
3663                        unsignedp, OPTAB_LIB_WIDEN);
3664 }
3665 \f
3666 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3667    replace division by D, and put the least significant N bits of the result
3668    in *MULTIPLIER_PTR and return the most significant bit.
3669
3670    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3671    needed precision is in PRECISION (should be <= N).
3672
3673    PRECISION should be as small as possible so this function can choose
3674    multiplier more freely.
3675
3676    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3677    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3678
3679    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3680    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3681
3682 unsigned HOST_WIDE_INT
3683 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3684                    unsigned HOST_WIDE_INT *multiplier_ptr,
3685                    int *post_shift_ptr, int *lgup_ptr)
3686 {
3687   int lgup, post_shift;
3688   int pow, pow2;
3689
3690   /* lgup = ceil(log2(divisor)); */
3691   lgup = ceil_log2 (d);
3692
3693   gcc_assert (lgup <= n);
3694
3695   pow = n + lgup;
3696   pow2 = n + lgup - precision;
3697
3698   /* mlow = 2^(N + lgup)/d */
3699   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3700   wide_int mlow = wi::udiv_trunc (val, d);
3701
3702   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3703   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3704   wide_int mhigh = wi::udiv_trunc (val, d);
3705
3706   /* If precision == N, then mlow, mhigh exceed 2^N
3707      (but they do not exceed 2^(N+1)).  */
3708
3709   /* Reduce to lowest terms.  */
3710   for (post_shift = lgup; post_shift > 0; post_shift--)
3711     {
3712       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3713                                                        HOST_BITS_PER_WIDE_INT);
3714       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3715                                                        HOST_BITS_PER_WIDE_INT);
3716       if (ml_lo >= mh_lo)
3717         break;
3718
3719       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3720       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3721     }
3722
3723   *post_shift_ptr = post_shift;
3724   *lgup_ptr = lgup;
3725   if (n < HOST_BITS_PER_WIDE_INT)
3726     {
3727       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3728       *multiplier_ptr = mhigh.to_uhwi () & mask;
3729       return mhigh.to_uhwi () > mask;
3730     }
3731   else
3732     {
3733       *multiplier_ptr = mhigh.to_uhwi ();
3734       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3735     }
3736 }
3737
3738 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3739    congruent to 1 (mod 2**N).  */
3740
3741 static unsigned HOST_WIDE_INT
3742 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3743 {
3744   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3745
3746   /* The algorithm notes that the choice y = x satisfies
3747      x*y == 1 mod 2^3, since x is assumed odd.
3748      Each iteration doubles the number of bits of significance in y.  */
3749
3750   unsigned HOST_WIDE_INT mask;
3751   unsigned HOST_WIDE_INT y = x;
3752   int nbit = 3;
3753
3754   mask = (n == HOST_BITS_PER_WIDE_INT
3755           ? HOST_WIDE_INT_M1U
3756           : (HOST_WIDE_INT_1U << n) - 1);
3757
3758   while (nbit < n)
3759     {
3760       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3761       nbit *= 2;
3762     }
3763   return y;
3764 }
3765
3766 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3767    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3768    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3769    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3770    become signed.
3771
3772    The result is put in TARGET if that is convenient.
3773
3774    MODE is the mode of operation.  */
3775
3776 rtx
3777 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3778                              rtx op1, rtx target, int unsignedp)
3779 {
3780   rtx tem;
3781   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3782
3783   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3784                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3785   tem = expand_and (mode, tem, op1, NULL_RTX);
3786   adj_operand
3787     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3788                      adj_operand);
3789
3790   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3791                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3792   tem = expand_and (mode, tem, op0, NULL_RTX);
3793   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3794                           target);
3795
3796   return target;
3797 }
3798
3799 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3800
3801 static rtx
3802 extract_high_half (scalar_int_mode mode, rtx op)
3803 {
3804   if (mode == word_mode)
3805     return gen_highpart (mode, op);
3806
3807   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3808
3809   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3810                      GET_MODE_BITSIZE (mode), 0, 1);
3811   return convert_modes (mode, wider_mode, op, 0);
3812 }
3813
3814 /* Like expmed_mult_highpart, but only consider using a multiplication
3815    optab.  OP1 is an rtx for the constant operand.  */
3816
3817 static rtx
3818 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3819                             rtx target, int unsignedp, int max_cost)
3820 {
3821   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3822   optab moptab;
3823   rtx tem;
3824   int size;
3825   bool speed = optimize_insn_for_speed_p ();
3826
3827   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3828
3829   size = GET_MODE_BITSIZE (mode);
3830
3831   /* Firstly, try using a multiplication insn that only generates the needed
3832      high part of the product, and in the sign flavor of unsignedp.  */
3833   if (mul_highpart_cost (speed, mode) < max_cost)
3834     {
3835       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3836       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3837                           unsignedp, OPTAB_DIRECT);
3838       if (tem)
3839         return tem;
3840     }
3841
3842   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3843      Need to adjust the result after the multiplication.  */
3844   if (size - 1 < BITS_PER_WORD
3845       && (mul_highpart_cost (speed, mode)
3846           + 2 * shift_cost (speed, mode, size-1)
3847           + 4 * add_cost (speed, mode) < max_cost))
3848     {
3849       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3850       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3851                           unsignedp, OPTAB_DIRECT);
3852       if (tem)
3853         /* We used the wrong signedness.  Adjust the result.  */
3854         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3855                                             tem, unsignedp);
3856     }
3857
3858   /* Try widening multiplication.  */
3859   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3860   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3861       && mul_widen_cost (speed, wider_mode) < max_cost)
3862     {
3863       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3864                           unsignedp, OPTAB_WIDEN);
3865       if (tem)
3866         return extract_high_half (mode, tem);
3867     }
3868
3869   /* Try widening the mode and perform a non-widening multiplication.  */
3870   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3871       && size - 1 < BITS_PER_WORD
3872       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3873           < max_cost))
3874     {
3875       rtx_insn *insns;
3876       rtx wop0, wop1;
3877
3878       /* We need to widen the operands, for example to ensure the
3879          constant multiplier is correctly sign or zero extended.
3880          Use a sequence to clean-up any instructions emitted by
3881          the conversions if things don't work out.  */
3882       start_sequence ();
3883       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3884       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3885       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3886                           unsignedp, OPTAB_WIDEN);
3887       insns = get_insns ();
3888       end_sequence ();
3889
3890       if (tem)
3891         {
3892           emit_insn (insns);
3893           return extract_high_half (mode, tem);
3894         }
3895     }
3896
3897   /* Try widening multiplication of opposite signedness, and adjust.  */
3898   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3899   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3900       && size - 1 < BITS_PER_WORD
3901       && (mul_widen_cost (speed, wider_mode)
3902           + 2 * shift_cost (speed, mode, size-1)
3903           + 4 * add_cost (speed, mode) < max_cost))
3904     {
3905       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3906                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3907       if (tem != 0)
3908         {
3909           tem = extract_high_half (mode, tem);
3910           /* We used the wrong signedness.  Adjust the result.  */
3911           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3912                                               target, unsignedp);
3913         }
3914     }
3915
3916   return 0;
3917 }
3918
3919 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3920    putting the high half of the result in TARGET if that is convenient,
3921    and return where the result is.  If the operation cannot be performed,
3922    0 is returned.
3923
3924    MODE is the mode of operation and result.
3925
3926    UNSIGNEDP nonzero means unsigned multiply.
3927
3928    MAX_COST is the total allowed cost for the expanded RTL.  */
3929
3930 static rtx
3931 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3932                       rtx target, int unsignedp, int max_cost)
3933 {
3934   unsigned HOST_WIDE_INT cnst1;
3935   int extra_cost;
3936   bool sign_adjust = false;
3937   enum mult_variant variant;
3938   struct algorithm alg;
3939   rtx tem;
3940   bool speed = optimize_insn_for_speed_p ();
3941
3942   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3943   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3944
3945   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3946
3947   /* We can't optimize modes wider than BITS_PER_WORD.
3948      ??? We might be able to perform double-word arithmetic if
3949      mode == word_mode, however all the cost calculations in
3950      synth_mult etc. assume single-word operations.  */
3951   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3952   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3953     return expmed_mult_highpart_optab (mode, op0, op1, target,
3954                                        unsignedp, max_cost);
3955
3956   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3957
3958   /* Check whether we try to multiply by a negative constant.  */
3959   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3960     {
3961       sign_adjust = true;
3962       extra_cost += add_cost (speed, mode);
3963     }
3964
3965   /* See whether shift/add multiplication is cheap enough.  */
3966   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3967                            max_cost - extra_cost))
3968     {
3969       /* See whether the specialized multiplication optabs are
3970          cheaper than the shift/add version.  */
3971       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3972                                         alg.cost.cost + extra_cost);
3973       if (tem)
3974         return tem;
3975
3976       tem = convert_to_mode (wider_mode, op0, unsignedp);
3977       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3978       tem = extract_high_half (mode, tem);
3979
3980       /* Adjust result for signedness.  */
3981       if (sign_adjust)
3982         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3983
3984       return tem;
3985     }
3986   return expmed_mult_highpart_optab (mode, op0, op1, target,
3987                                      unsignedp, max_cost);
3988 }
3989
3990
3991 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3992
3993 static rtx
3994 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3995 {
3996   rtx result, temp, shift;
3997   rtx_code_label *label;
3998   int logd;
3999   int prec = GET_MODE_PRECISION (mode);
4000
4001   logd = floor_log2 (d);
4002   result = gen_reg_rtx (mode);
4003
4004   /* Avoid conditional branches when they're expensive.  */
4005   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4006       && optimize_insn_for_speed_p ())
4007     {
4008       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4009                                       mode, 0, -1);
4010       if (signmask)
4011         {
4012           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4013           signmask = force_reg (mode, signmask);
4014           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4015
4016           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4017              which instruction sequence to use.  If logical right shifts
4018              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4019              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4020
4021           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4022           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4023               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4024                   > COSTS_N_INSNS (2)))
4025             {
4026               temp = expand_binop (mode, xor_optab, op0, signmask,
4027                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4028               temp = expand_binop (mode, sub_optab, temp, signmask,
4029                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4030               temp = expand_binop (mode, and_optab, temp,
4031                                    gen_int_mode (masklow, mode),
4032                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4033               temp = expand_binop (mode, xor_optab, temp, signmask,
4034                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4035               temp = expand_binop (mode, sub_optab, temp, signmask,
4036                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4037             }
4038           else
4039             {
4040               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4041                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4042               signmask = force_reg (mode, signmask);
4043
4044               temp = expand_binop (mode, add_optab, op0, signmask,
4045                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4046               temp = expand_binop (mode, and_optab, temp,
4047                                    gen_int_mode (masklow, mode),
4048                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4049               temp = expand_binop (mode, sub_optab, temp, signmask,
4050                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4051             }
4052           return temp;
4053         }
4054     }
4055
4056   /* Mask contains the mode's signbit and the significant bits of the
4057      modulus.  By including the signbit in the operation, many targets
4058      can avoid an explicit compare operation in the following comparison
4059      against zero.  */
4060   wide_int mask = wi::mask (logd, false, prec);
4061   mask = wi::set_bit (mask, prec - 1);
4062
4063   temp = expand_binop (mode, and_optab, op0,
4064                        immed_wide_int_const (mask, mode),
4065                        result, 1, OPTAB_LIB_WIDEN);
4066   if (temp != result)
4067     emit_move_insn (result, temp);
4068
4069   label = gen_label_rtx ();
4070   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4071
4072   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4073                        0, OPTAB_LIB_WIDEN);
4074
4075   mask = wi::mask (logd, true, prec);
4076   temp = expand_binop (mode, ior_optab, temp,
4077                        immed_wide_int_const (mask, mode),
4078                        result, 1, OPTAB_LIB_WIDEN);
4079   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4080                        0, OPTAB_LIB_WIDEN);
4081   if (temp != result)
4082     emit_move_insn (result, temp);
4083   emit_label (label);
4084   return result;
4085 }
4086
4087 /* Expand signed division of OP0 by a power of two D in mode MODE.
4088    This routine is only called for positive values of D.  */
4089
4090 static rtx
4091 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4092 {
4093   rtx temp;
4094   rtx_code_label *label;
4095   int logd;
4096
4097   logd = floor_log2 (d);
4098
4099   if (d == 2
4100       && BRANCH_COST (optimize_insn_for_speed_p (),
4101                       false) >= 1)
4102     {
4103       temp = gen_reg_rtx (mode);
4104       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4105       if (temp != NULL_RTX)
4106         {
4107           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4108                                0, OPTAB_LIB_WIDEN);
4109           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4110         }
4111     }
4112
4113   if (HAVE_conditional_move
4114       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4115     {
4116       rtx temp2;
4117
4118       start_sequence ();
4119       temp2 = copy_to_mode_reg (mode, op0);
4120       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4121                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4122       temp = force_reg (mode, temp);
4123
4124       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4125       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4126                                      mode, temp, temp2, mode, 0);
4127       if (temp2)
4128         {
4129           rtx_insn *seq = get_insns ();
4130           end_sequence ();
4131           emit_insn (seq);
4132           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4133         }
4134       end_sequence ();
4135     }
4136
4137   if (BRANCH_COST (optimize_insn_for_speed_p (),
4138                    false) >= 2)
4139     {
4140       int ushift = GET_MODE_BITSIZE (mode) - logd;
4141
4142       temp = gen_reg_rtx (mode);
4143       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4144       if (temp != NULL_RTX)
4145         {
4146           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4147               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4148               > COSTS_N_INSNS (1))
4149             temp = expand_binop (mode, and_optab, temp,
4150                                  gen_int_mode (d - 1, mode),
4151                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4152           else
4153             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4154                                  ushift, NULL_RTX, 1);
4155           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4156                                0, OPTAB_LIB_WIDEN);
4157           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4158         }
4159     }
4160
4161   label = gen_label_rtx ();
4162   temp = copy_to_mode_reg (mode, op0);
4163   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4164   expand_inc (temp, gen_int_mode (d - 1, mode));
4165   emit_label (label);
4166   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4167 }
4168 \f
4169 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4170    if that is convenient, and returning where the result is.
4171    You may request either the quotient or the remainder as the result;
4172    specify REM_FLAG nonzero to get the remainder.
4173
4174    CODE is the expression code for which kind of division this is;
4175    it controls how rounding is done.  MODE is the machine mode to use.
4176    UNSIGNEDP nonzero means do unsigned division.  */
4177
4178 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4179    and then correct it by or'ing in missing high bits
4180    if result of ANDI is nonzero.
4181    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4182    This could optimize to a bfexts instruction.
4183    But C doesn't use these operations, so their optimizations are
4184    left for later.  */
4185 /* ??? For modulo, we don't actually need the highpart of the first product,
4186    the low part will do nicely.  And for small divisors, the second multiply
4187    can also be a low-part only multiply or even be completely left out.
4188    E.g. to calculate the remainder of a division by 3 with a 32 bit
4189    multiply, multiply with 0x55555556 and extract the upper two bits;
4190    the result is exact for inputs up to 0x1fffffff.
4191    The input range can be reduced by using cross-sum rules.
4192    For odd divisors >= 3, the following table gives right shift counts
4193    so that if a number is shifted by an integer multiple of the given
4194    amount, the remainder stays the same:
4195    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4196    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4197    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4198    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4199    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4200
4201    Cross-sum rules for even numbers can be derived by leaving as many bits
4202    to the right alone as the divisor has zeros to the right.
4203    E.g. if x is an unsigned 32 bit number:
4204    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4205    */
4206
4207 rtx
4208 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4209                rtx op0, rtx op1, rtx target, int unsignedp,
4210                enum optab_methods methods)
4211 {
4212   machine_mode compute_mode;
4213   rtx tquotient;
4214   rtx quotient = 0, remainder = 0;
4215   rtx_insn *last;
4216   rtx_insn *insn;
4217   optab optab1, optab2;
4218   int op1_is_constant, op1_is_pow2 = 0;
4219   int max_cost, extra_cost;
4220   static HOST_WIDE_INT last_div_const = 0;
4221   bool speed = optimize_insn_for_speed_p ();
4222
4223   op1_is_constant = CONST_INT_P (op1);
4224   if (op1_is_constant)
4225     {
4226       wide_int ext_op1 = rtx_mode_t (op1, mode);
4227       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4228                      || (! unsignedp
4229                          && wi::popcount (wi::neg (ext_op1)) == 1));
4230     }
4231
4232   /*
4233      This is the structure of expand_divmod:
4234
4235      First comes code to fix up the operands so we can perform the operations
4236      correctly and efficiently.
4237
4238      Second comes a switch statement with code specific for each rounding mode.
4239      For some special operands this code emits all RTL for the desired
4240      operation, for other cases, it generates only a quotient and stores it in
4241      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4242      to indicate that it has not done anything.
4243
4244      Last comes code that finishes the operation.  If QUOTIENT is set and
4245      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4246      QUOTIENT is not set, it is computed using trunc rounding.
4247
4248      We try to generate special code for division and remainder when OP1 is a
4249      constant.  If |OP1| = 2**n we can use shifts and some other fast
4250      operations.  For other values of OP1, we compute a carefully selected
4251      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4252      by m.
4253
4254      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4255      half of the product.  Different strategies for generating the product are
4256      implemented in expmed_mult_highpart.
4257
4258      If what we actually want is the remainder, we generate that by another
4259      by-constant multiplication and a subtraction.  */
4260
4261   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4262      code below will malfunction if we are, so check here and handle
4263      the special case if so.  */
4264   if (op1 == const1_rtx)
4265     return rem_flag ? const0_rtx : op0;
4266
4267     /* When dividing by -1, we could get an overflow.
4268      negv_optab can handle overflows.  */
4269   if (! unsignedp && op1 == constm1_rtx)
4270     {
4271       if (rem_flag)
4272         return const0_rtx;
4273       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4274                           ? negv_optab : neg_optab, op0, target, 0);
4275     }
4276
4277   if (target
4278       /* Don't use the function value register as a target
4279          since we have to read it as well as write it,
4280          and function-inlining gets confused by this.  */
4281       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4282           /* Don't clobber an operand while doing a multi-step calculation.  */
4283           || ((rem_flag || op1_is_constant)
4284               && (reg_mentioned_p (target, op0)
4285                   || (MEM_P (op0) && MEM_P (target))))
4286           || reg_mentioned_p (target, op1)
4287           || (MEM_P (op1) && MEM_P (target))))
4288     target = 0;
4289
4290   /* Get the mode in which to perform this computation.  Normally it will
4291      be MODE, but sometimes we can't do the desired operation in MODE.
4292      If so, pick a wider mode in which we can do the operation.  Convert
4293      to that mode at the start to avoid repeated conversions.
4294
4295      First see what operations we need.  These depend on the expression
4296      we are evaluating.  (We assume that divxx3 insns exist under the
4297      same conditions that modxx3 insns and that these insns don't normally
4298      fail.  If these assumptions are not correct, we may generate less
4299      efficient code in some cases.)
4300
4301      Then see if we find a mode in which we can open-code that operation
4302      (either a division, modulus, or shift).  Finally, check for the smallest
4303      mode for which we can do the operation with a library call.  */
4304
4305   /* We might want to refine this now that we have division-by-constant
4306      optimization.  Since expmed_mult_highpart tries so many variants, it is
4307      not straightforward to generalize this.  Maybe we should make an array
4308      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4309
4310   optab1 = (op1_is_pow2
4311             ? (unsignedp ? lshr_optab : ashr_optab)
4312             : (unsignedp ? udiv_optab : sdiv_optab));
4313   optab2 = (op1_is_pow2 ? optab1
4314             : (unsignedp ? udivmod_optab : sdivmod_optab));
4315
4316   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4317     {
4318       FOR_EACH_MODE_FROM (compute_mode, mode)
4319       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4320           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4321         break;
4322
4323       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4324         FOR_EACH_MODE_FROM (compute_mode, mode)
4325           if (optab_libfunc (optab1, compute_mode)
4326               || optab_libfunc (optab2, compute_mode))
4327             break;
4328     }
4329   else
4330     compute_mode = mode;
4331
4332   /* If we still couldn't find a mode, use MODE, but expand_binop will
4333      probably die.  */
4334   if (compute_mode == VOIDmode)
4335     compute_mode = mode;
4336
4337   if (target && GET_MODE (target) == compute_mode)
4338     tquotient = target;
4339   else
4340     tquotient = gen_reg_rtx (compute_mode);
4341
4342 #if 0
4343   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4344      (mode), and thereby get better code when OP1 is a constant.  Do that
4345      later.  It will require going over all usages of SIZE below.  */
4346   size = GET_MODE_BITSIZE (mode);
4347 #endif
4348
4349   /* Only deduct something for a REM if the last divide done was
4350      for a different constant.   Then set the constant of the last
4351      divide.  */
4352   max_cost = (unsignedp
4353               ? udiv_cost (speed, compute_mode)
4354               : sdiv_cost (speed, compute_mode));
4355   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4356                      && INTVAL (op1) == last_div_const))
4357     max_cost -= (mul_cost (speed, compute_mode)
4358                  + add_cost (speed, compute_mode));
4359
4360   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4361
4362   /* Now convert to the best mode to use.  */
4363   if (compute_mode != mode)
4364     {
4365       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4366       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4367
4368       /* convert_modes may have placed op1 into a register, so we
4369          must recompute the following.  */
4370       op1_is_constant = CONST_INT_P (op1);
4371       if (op1_is_constant)
4372         {
4373           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4374           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4375                          || (! unsignedp
4376                              && wi::popcount (wi::neg (ext_op1)) == 1));
4377         }
4378       else
4379         op1_is_pow2 = 0;
4380     }
4381
4382   /* If one of the operands is a volatile MEM, copy it into a register.  */
4383
4384   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4385     op0 = force_reg (compute_mode, op0);
4386   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4387     op1 = force_reg (compute_mode, op1);
4388
4389   /* If we need the remainder or if OP1 is constant, we need to
4390      put OP0 in a register in case it has any queued subexpressions.  */
4391   if (rem_flag || op1_is_constant)
4392     op0 = force_reg (compute_mode, op0);
4393
4394   last = get_last_insn ();
4395
4396   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4397   if (unsignedp)
4398     {
4399       if (code == FLOOR_DIV_EXPR)
4400         code = TRUNC_DIV_EXPR;
4401       if (code == FLOOR_MOD_EXPR)
4402         code = TRUNC_MOD_EXPR;
4403       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4404         code = TRUNC_DIV_EXPR;
4405     }
4406
4407   if (op1 != const0_rtx)
4408     switch (code)
4409       {
4410       case TRUNC_MOD_EXPR:
4411       case TRUNC_DIV_EXPR:
4412         if (op1_is_constant)
4413           {
4414             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4415             int size = GET_MODE_BITSIZE (int_mode);
4416             if (unsignedp)
4417               {
4418                 unsigned HOST_WIDE_INT mh, ml;
4419                 int pre_shift, post_shift;
4420                 int dummy;
4421                 wide_int wd = rtx_mode_t (op1, int_mode);
4422                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4423
4424                 if (wi::popcount (wd) == 1)
4425                   {
4426                     pre_shift = floor_log2 (d);
4427                     if (rem_flag)
4428                       {
4429                         unsigned HOST_WIDE_INT mask
4430                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4431                         remainder
4432                           = expand_binop (int_mode, and_optab, op0,
4433                                           gen_int_mode (mask, int_mode),
4434                                           remainder, 1, methods);
4435                         if (remainder)
4436                           return gen_lowpart (mode, remainder);
4437                       }
4438                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4439                                              pre_shift, tquotient, 1);
4440                   }
4441                 else if (size <= HOST_BITS_PER_WIDE_INT)
4442                   {
4443                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4444                       {
4445                         /* Most significant bit of divisor is set; emit an scc
4446                            insn.  */
4447                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4448                                                           int_mode, 1, 1);
4449                       }
4450                     else
4451                       {
4452                         /* Find a suitable multiplier and right shift count
4453                            instead of multiplying with D.  */
4454
4455                         mh = choose_multiplier (d, size, size,
4456                                                 &ml, &post_shift, &dummy);
4457
4458                         /* If the suggested multiplier is more than SIZE bits,
4459                            we can do better for even divisors, using an
4460                            initial right shift.  */
4461                         if (mh != 0 && (d & 1) == 0)
4462                           {
4463                             pre_shift = ctz_or_zero (d);
4464                             mh = choose_multiplier (d >> pre_shift, size,
4465                                                     size - pre_shift,
4466                                                     &ml, &post_shift, &dummy);
4467                             gcc_assert (!mh);
4468                           }
4469                         else
4470                           pre_shift = 0;
4471
4472                         if (mh != 0)
4473                           {
4474                             rtx t1, t2, t3, t4;
4475
4476                             if (post_shift - 1 >= BITS_PER_WORD)
4477                               goto fail1;
4478
4479                             extra_cost
4480                               = (shift_cost (speed, int_mode, post_shift - 1)
4481                                  + shift_cost (speed, int_mode, 1)
4482                                  + 2 * add_cost (speed, int_mode));
4483                             t1 = expmed_mult_highpart
4484                               (int_mode, op0, gen_int_mode (ml, int_mode),
4485                                NULL_RTX, 1, max_cost - extra_cost);
4486                             if (t1 == 0)
4487                               goto fail1;
4488                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4489                                                                op0, t1),
4490                                                 NULL_RTX);
4491                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4492                                                t2, 1, NULL_RTX, 1);
4493                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4494                                                               t1, t3),
4495                                                 NULL_RTX);
4496                             quotient = expand_shift
4497                               (RSHIFT_EXPR, int_mode, t4,
4498                                post_shift - 1, tquotient, 1);
4499                           }
4500                         else
4501                           {
4502                             rtx t1, t2;
4503
4504                             if (pre_shift >= BITS_PER_WORD
4505                                 || post_shift >= BITS_PER_WORD)
4506                               goto fail1;
4507
4508                             t1 = expand_shift
4509                               (RSHIFT_EXPR, int_mode, op0,
4510                                pre_shift, NULL_RTX, 1);
4511                             extra_cost
4512                               = (shift_cost (speed, int_mode, pre_shift)
4513                                  + shift_cost (speed, int_mode, post_shift));
4514                             t2 = expmed_mult_highpart
4515                               (int_mode, t1,
4516                                gen_int_mode (ml, int_mode),
4517                                NULL_RTX, 1, max_cost - extra_cost);
4518                             if (t2 == 0)
4519                               goto fail1;
4520                             quotient = expand_shift
4521                               (RSHIFT_EXPR, int_mode, t2,
4522                                post_shift, tquotient, 1);
4523                           }
4524                       }
4525                   }
4526                 else            /* Too wide mode to use tricky code */
4527                   break;
4528
4529                 insn = get_last_insn ();
4530                 if (insn != last)
4531                   set_dst_reg_note (insn, REG_EQUAL,
4532                                     gen_rtx_UDIV (int_mode, op0, op1),
4533                                     quotient);
4534               }
4535             else                /* TRUNC_DIV, signed */
4536               {
4537                 unsigned HOST_WIDE_INT ml;
4538                 int lgup, post_shift;
4539                 rtx mlr;
4540                 HOST_WIDE_INT d = INTVAL (op1);
4541                 unsigned HOST_WIDE_INT abs_d;
4542
4543                 /* Not prepared to handle division/remainder by
4544                    0xffffffffffffffff8000000000000000 etc.  */
4545                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4546                   break;
4547
4548                 /* Since d might be INT_MIN, we have to cast to
4549                    unsigned HOST_WIDE_INT before negating to avoid
4550                    undefined signed overflow.  */
4551                 abs_d = (d >= 0
4552                          ? (unsigned HOST_WIDE_INT) d
4553                          : - (unsigned HOST_WIDE_INT) d);
4554
4555                 /* n rem d = n rem -d */
4556                 if (rem_flag && d < 0)
4557                   {
4558                     d = abs_d;
4559                     op1 = gen_int_mode (abs_d, int_mode);
4560                   }
4561
4562                 if (d == 1)
4563                   quotient = op0;
4564                 else if (d == -1)
4565                   quotient = expand_unop (int_mode, neg_optab, op0,
4566                                           tquotient, 0);
4567                 else if (size <= HOST_BITS_PER_WIDE_INT
4568                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4569                   {
4570                     /* This case is not handled correctly below.  */
4571                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4572                                                 int_mode, 1, 1);
4573                     if (quotient == 0)
4574                       goto fail1;
4575                   }
4576                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4577                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4578                          && (rem_flag
4579                              ? smod_pow2_cheap (speed, int_mode)
4580                              : sdiv_pow2_cheap (speed, int_mode))
4581                          /* We assume that cheap metric is true if the
4582                             optab has an expander for this mode.  */
4583                          && ((optab_handler ((rem_flag ? smod_optab
4584                                               : sdiv_optab),
4585                                              int_mode)
4586                               != CODE_FOR_nothing)
4587                              || (optab_handler (sdivmod_optab, int_mode)
4588                                  != CODE_FOR_nothing)))
4589                   ;
4590                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4591                   {
4592                     if (rem_flag)
4593                       {
4594                         remainder = expand_smod_pow2 (int_mode, op0, d);
4595                         if (remainder)
4596                           return gen_lowpart (mode, remainder);
4597                       }
4598
4599                     if (sdiv_pow2_cheap (speed, int_mode)
4600                         && ((optab_handler (sdiv_optab, int_mode)
4601                              != CODE_FOR_nothing)
4602                             || (optab_handler (sdivmod_optab, int_mode)
4603                                 != CODE_FOR_nothing)))
4604                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4605                                                 int_mode, op0,
4606                                                 gen_int_mode (abs_d,
4607                                                               int_mode),
4608                                                 NULL_RTX, 0);
4609                     else
4610                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4611
4612                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4613                        negate the quotient.  */
4614                     if (d < 0)
4615                       {
4616                         insn = get_last_insn ();
4617                         if (insn != last
4618                             && abs_d < (HOST_WIDE_INT_1U
4619                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4620                           set_dst_reg_note (insn, REG_EQUAL,
4621                                             gen_rtx_DIV (int_mode, op0,
4622                                                          gen_int_mode
4623                                                            (abs_d,
4624                                                             int_mode)),
4625                                             quotient);
4626
4627                         quotient = expand_unop (int_mode, neg_optab,
4628                                                 quotient, quotient, 0);
4629                       }
4630                   }
4631                 else if (size <= HOST_BITS_PER_WIDE_INT)
4632                   {
4633                     choose_multiplier (abs_d, size, size - 1,
4634                                        &ml, &post_shift, &lgup);
4635                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4636                       {
4637                         rtx t1, t2, t3;
4638
4639                         if (post_shift >= BITS_PER_WORD
4640                             || size - 1 >= BITS_PER_WORD)
4641                           goto fail1;
4642
4643                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4644                                       + shift_cost (speed, int_mode, size - 1)
4645                                       + add_cost (speed, int_mode));
4646                         t1 = expmed_mult_highpart
4647                           (int_mode, op0, gen_int_mode (ml, int_mode),
4648                            NULL_RTX, 0, max_cost - extra_cost);
4649                         if (t1 == 0)
4650                           goto fail1;
4651                         t2 = expand_shift
4652                           (RSHIFT_EXPR, int_mode, t1,
4653                            post_shift, NULL_RTX, 0);
4654                         t3 = expand_shift
4655                           (RSHIFT_EXPR, int_mode, op0,
4656                            size - 1, NULL_RTX, 0);
4657                         if (d < 0)
4658                           quotient
4659                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4660                                              tquotient);
4661                         else
4662                           quotient
4663                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4664                                              tquotient);
4665                       }
4666                     else
4667                       {
4668                         rtx t1, t2, t3, t4;
4669
4670                         if (post_shift >= BITS_PER_WORD
4671                             || size - 1 >= BITS_PER_WORD)
4672                           goto fail1;
4673
4674                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4675                         mlr = gen_int_mode (ml, int_mode);
4676                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4677                                       + shift_cost (speed, int_mode, size - 1)
4678                                       + 2 * add_cost (speed, int_mode));
4679                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4680                                                    NULL_RTX, 0,
4681                                                    max_cost - extra_cost);
4682                         if (t1 == 0)
4683                           goto fail1;
4684                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4685                                             NULL_RTX);
4686                         t3 = expand_shift
4687                           (RSHIFT_EXPR, int_mode, t2,
4688                            post_shift, NULL_RTX, 0);
4689                         t4 = expand_shift
4690                           (RSHIFT_EXPR, int_mode, op0,
4691                            size - 1, NULL_RTX, 0);
4692                         if (d < 0)
4693                           quotient
4694                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4695                                              tquotient);
4696                         else
4697                           quotient
4698                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4699                                              tquotient);
4700                       }
4701                   }
4702                 else            /* Too wide mode to use tricky code */
4703                   break;
4704
4705                 insn = get_last_insn ();
4706                 if (insn != last)
4707                   set_dst_reg_note (insn, REG_EQUAL,
4708                                     gen_rtx_DIV (int_mode, op0, op1),
4709                                     quotient);
4710               }
4711             break;
4712           }
4713       fail1:
4714         delete_insns_since (last);
4715         break;
4716
4717       case FLOOR_DIV_EXPR:
4718       case FLOOR_MOD_EXPR:
4719       /* We will come here only for signed operations.  */
4720         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4721           {
4722             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4723             int size = GET_MODE_BITSIZE (int_mode);
4724             unsigned HOST_WIDE_INT mh, ml;
4725             int pre_shift, lgup, post_shift;
4726             HOST_WIDE_INT d = INTVAL (op1);
4727
4728             if (d > 0)
4729               {
4730                 /* We could just as easily deal with negative constants here,
4731                    but it does not seem worth the trouble for GCC 2.6.  */
4732                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4733                   {
4734                     pre_shift = floor_log2 (d);
4735                     if (rem_flag)
4736                       {
4737                         unsigned HOST_WIDE_INT mask
4738                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4739                         remainder = expand_binop
4740                           (int_mode, and_optab, op0,
4741                            gen_int_mode (mask, int_mode),
4742                            remainder, 0, methods);
4743                         if (remainder)
4744                           return gen_lowpart (mode, remainder);
4745                       }
4746                     quotient = expand_shift
4747                       (RSHIFT_EXPR, int_mode, op0,
4748                        pre_shift, tquotient, 0);
4749                   }
4750                 else
4751                   {
4752                     rtx t1, t2, t3, t4;
4753
4754                     mh = choose_multiplier (d, size, size - 1,
4755                                             &ml, &post_shift, &lgup);
4756                     gcc_assert (!mh);
4757
4758                     if (post_shift < BITS_PER_WORD
4759                         && size - 1 < BITS_PER_WORD)
4760                       {
4761                         t1 = expand_shift
4762                           (RSHIFT_EXPR, int_mode, op0,
4763                            size - 1, NULL_RTX, 0);
4764                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4765                                            NULL_RTX, 0, OPTAB_WIDEN);
4766                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4767                                       + shift_cost (speed, int_mode, size - 1)
4768                                       + 2 * add_cost (speed, int_mode));
4769                         t3 = expmed_mult_highpart
4770                           (int_mode, t2, gen_int_mode (ml, int_mode),
4771                            NULL_RTX, 1, max_cost - extra_cost);
4772                         if (t3 != 0)
4773                           {
4774                             t4 = expand_shift
4775                               (RSHIFT_EXPR, int_mode, t3,
4776                                post_shift, NULL_RTX, 1);
4777                             quotient = expand_binop (int_mode, xor_optab,
4778                                                      t4, t1, tquotient, 0,
4779                                                      OPTAB_WIDEN);
4780                           }
4781                       }
4782                   }
4783               }
4784             else
4785               {
4786                 rtx nsign, t1, t2, t3, t4;
4787                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4788                                                   op0, constm1_rtx), NULL_RTX);
4789                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4790                                    0, OPTAB_WIDEN);
4791                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4792                                       size - 1, NULL_RTX, 0);
4793                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4794                                     NULL_RTX);
4795                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4796                                     NULL_RTX, 0);
4797                 if (t4)
4798                   {
4799                     rtx t5;
4800                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4801                                       NULL_RTX, 0);
4802                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4803                                               tquotient);
4804                   }
4805               }
4806           }
4807
4808         if (quotient != 0)
4809           break;
4810         delete_insns_since (last);
4811
4812         /* Try using an instruction that produces both the quotient and
4813            remainder, using truncation.  We can easily compensate the quotient
4814            or remainder to get floor rounding, once we have the remainder.
4815            Notice that we compute also the final remainder value here,
4816            and return the result right away.  */
4817         if (target == 0 || GET_MODE (target) != compute_mode)
4818           target = gen_reg_rtx (compute_mode);
4819
4820         if (rem_flag)
4821           {
4822             remainder
4823               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4824             quotient = gen_reg_rtx (compute_mode);
4825           }
4826         else
4827           {
4828             quotient
4829               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4830             remainder = gen_reg_rtx (compute_mode);
4831           }
4832
4833         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4834                                  quotient, remainder, 0))
4835           {
4836             /* This could be computed with a branch-less sequence.
4837                Save that for later.  */
4838             rtx tem;
4839             rtx_code_label *label = gen_label_rtx ();
4840             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4841             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4842                                 NULL_RTX, 0, OPTAB_WIDEN);
4843             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4844             expand_dec (quotient, const1_rtx);
4845             expand_inc (remainder, op1);
4846             emit_label (label);
4847             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4848           }
4849
4850         /* No luck with division elimination or divmod.  Have to do it
4851            by conditionally adjusting op0 *and* the result.  */
4852         {
4853           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4854           rtx adjusted_op0;
4855           rtx tem;
4856
4857           quotient = gen_reg_rtx (compute_mode);
4858           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4859           label1 = gen_label_rtx ();
4860           label2 = gen_label_rtx ();
4861           label3 = gen_label_rtx ();
4862           label4 = gen_label_rtx ();
4863           label5 = gen_label_rtx ();
4864           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4865           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4866           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4867                               quotient, 0, methods);
4868           if (tem != quotient)
4869             emit_move_insn (quotient, tem);
4870           emit_jump_insn (targetm.gen_jump (label5));
4871           emit_barrier ();
4872           emit_label (label1);
4873           expand_inc (adjusted_op0, const1_rtx);
4874           emit_jump_insn (targetm.gen_jump (label4));
4875           emit_barrier ();
4876           emit_label (label2);
4877           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4878           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4879                               quotient, 0, methods);
4880           if (tem != quotient)
4881             emit_move_insn (quotient, tem);
4882           emit_jump_insn (targetm.gen_jump (label5));
4883           emit_barrier ();
4884           emit_label (label3);
4885           expand_dec (adjusted_op0, const1_rtx);
4886           emit_label (label4);
4887           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4888                               quotient, 0, methods);
4889           if (tem != quotient)
4890             emit_move_insn (quotient, tem);
4891           expand_dec (quotient, const1_rtx);
4892           emit_label (label5);
4893         }
4894         break;
4895
4896       case CEIL_DIV_EXPR:
4897       case CEIL_MOD_EXPR:
4898         if (unsignedp)
4899           {
4900             if (op1_is_constant
4901                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4902                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4903                     || INTVAL (op1) >= 0))
4904               {
4905                 scalar_int_mode int_mode
4906                   = as_a <scalar_int_mode> (compute_mode);
4907                 rtx t1, t2, t3;
4908                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4909                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4910                                    floor_log2 (d), tquotient, 1);
4911                 t2 = expand_binop (int_mode, and_optab, op0,
4912                                    gen_int_mode (d - 1, int_mode),
4913                                    NULL_RTX, 1, methods);
4914                 t3 = gen_reg_rtx (int_mode);
4915                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4916                 if (t3 == 0)
4917                   {
4918                     rtx_code_label *lab;
4919                     lab = gen_label_rtx ();
4920                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4921                     expand_inc (t1, const1_rtx);
4922                     emit_label (lab);
4923                     quotient = t1;
4924                   }
4925                 else
4926                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4927                                             tquotient);
4928                 break;
4929               }
4930
4931             /* Try using an instruction that produces both the quotient and
4932                remainder, using truncation.  We can easily compensate the
4933                quotient or remainder to get ceiling rounding, once we have the
4934                remainder.  Notice that we compute also the final remainder
4935                value here, and return the result right away.  */
4936             if (target == 0 || GET_MODE (target) != compute_mode)
4937               target = gen_reg_rtx (compute_mode);
4938
4939             if (rem_flag)
4940               {
4941                 remainder = (REG_P (target)
4942                              ? target : gen_reg_rtx (compute_mode));
4943                 quotient = gen_reg_rtx (compute_mode);
4944               }
4945             else
4946               {
4947                 quotient = (REG_P (target)
4948                             ? target : gen_reg_rtx (compute_mode));
4949                 remainder = gen_reg_rtx (compute_mode);
4950               }
4951
4952             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4953                                      remainder, 1))
4954               {
4955                 /* This could be computed with a branch-less sequence.
4956                    Save that for later.  */
4957                 rtx_code_label *label = gen_label_rtx ();
4958                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4959                                  compute_mode, label);
4960                 expand_inc (quotient, const1_rtx);
4961                 expand_dec (remainder, op1);
4962                 emit_label (label);
4963                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4964               }
4965
4966             /* No luck with division elimination or divmod.  Have to do it
4967                by conditionally adjusting op0 *and* the result.  */
4968             {
4969               rtx_code_label *label1, *label2;
4970               rtx adjusted_op0, tem;
4971
4972               quotient = gen_reg_rtx (compute_mode);
4973               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4974               label1 = gen_label_rtx ();
4975               label2 = gen_label_rtx ();
4976               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4977                                compute_mode, label1);
4978               emit_move_insn  (quotient, const0_rtx);
4979               emit_jump_insn (targetm.gen_jump (label2));
4980               emit_barrier ();
4981               emit_label (label1);
4982               expand_dec (adjusted_op0, const1_rtx);
4983               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4984                                   quotient, 1, methods);
4985               if (tem != quotient)
4986                 emit_move_insn (quotient, tem);
4987               expand_inc (quotient, const1_rtx);
4988               emit_label (label2);
4989             }
4990           }
4991         else /* signed */
4992           {
4993             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4994                 && INTVAL (op1) >= 0)
4995               {
4996                 /* This is extremely similar to the code for the unsigned case
4997                    above.  For 2.7 we should merge these variants, but for
4998                    2.6.1 I don't want to touch the code for unsigned since that
4999                    get used in C.  The signed case will only be used by other
5000                    languages (Ada).  */
5001
5002                 rtx t1, t2, t3;
5003                 unsigned HOST_WIDE_INT d = INTVAL (op1);
5004                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5005                                    floor_log2 (d), tquotient, 0);
5006                 t2 = expand_binop (compute_mode, and_optab, op0,
5007                                    gen_int_mode (d - 1, compute_mode),
5008                                    NULL_RTX, 1, methods);
5009                 t3 = gen_reg_rtx (compute_mode);
5010                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5011                                       compute_mode, 1, 1);
5012                 if (t3 == 0)
5013                   {
5014                     rtx_code_label *lab;
5015                     lab = gen_label_rtx ();
5016                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5017                     expand_inc (t1, const1_rtx);
5018                     emit_label (lab);
5019                     quotient = t1;
5020                   }
5021                 else
5022                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5023                                                           t1, t3),
5024                                             tquotient);
5025                 break;
5026               }
5027
5028             /* Try using an instruction that produces both the quotient and
5029                remainder, using truncation.  We can easily compensate the
5030                quotient or remainder to get ceiling rounding, once we have the
5031                remainder.  Notice that we compute also the final remainder
5032                value here, and return the result right away.  */
5033             if (target == 0 || GET_MODE (target) != compute_mode)
5034               target = gen_reg_rtx (compute_mode);
5035             if (rem_flag)
5036               {
5037                 remainder= (REG_P (target)
5038                             ? target : gen_reg_rtx (compute_mode));
5039                 quotient = gen_reg_rtx (compute_mode);
5040               }
5041             else
5042               {
5043                 quotient = (REG_P (target)
5044                             ? target : gen_reg_rtx (compute_mode));
5045                 remainder = gen_reg_rtx (compute_mode);
5046               }
5047
5048             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5049                                      remainder, 0))
5050               {
5051                 /* This could be computed with a branch-less sequence.
5052                    Save that for later.  */
5053                 rtx tem;
5054                 rtx_code_label *label = gen_label_rtx ();
5055                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5056                                  compute_mode, label);
5057                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5058                                     NULL_RTX, 0, OPTAB_WIDEN);
5059                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5060                 expand_inc (quotient, const1_rtx);
5061                 expand_dec (remainder, op1);
5062                 emit_label (label);
5063                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5064               }
5065
5066             /* No luck with division elimination or divmod.  Have to do it
5067                by conditionally adjusting op0 *and* the result.  */
5068             {
5069               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5070               rtx adjusted_op0;
5071               rtx tem;
5072
5073               quotient = gen_reg_rtx (compute_mode);
5074               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5075               label1 = gen_label_rtx ();
5076               label2 = gen_label_rtx ();
5077               label3 = gen_label_rtx ();
5078               label4 = gen_label_rtx ();
5079               label5 = gen_label_rtx ();
5080               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5081               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5082                                compute_mode, label1);
5083               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5084                                   quotient, 0, methods);
5085               if (tem != quotient)
5086                 emit_move_insn (quotient, tem);
5087               emit_jump_insn (targetm.gen_jump (label5));
5088               emit_barrier ();
5089               emit_label (label1);
5090               expand_dec (adjusted_op0, const1_rtx);
5091               emit_jump_insn (targetm.gen_jump (label4));
5092               emit_barrier ();
5093               emit_label (label2);
5094               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5095                                compute_mode, label3);
5096               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5097                                   quotient, 0, methods);
5098               if (tem != quotient)
5099                 emit_move_insn (quotient, tem);
5100               emit_jump_insn (targetm.gen_jump (label5));
5101               emit_barrier ();
5102               emit_label (label3);
5103               expand_inc (adjusted_op0, const1_rtx);
5104               emit_label (label4);
5105               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5106                                   quotient, 0, methods);
5107               if (tem != quotient)
5108                 emit_move_insn (quotient, tem);
5109               expand_inc (quotient, const1_rtx);
5110               emit_label (label5);
5111             }
5112           }
5113         break;
5114
5115       case EXACT_DIV_EXPR:
5116         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5117           {
5118             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5119             int size = GET_MODE_BITSIZE (int_mode);
5120             HOST_WIDE_INT d = INTVAL (op1);
5121             unsigned HOST_WIDE_INT ml;
5122             int pre_shift;
5123             rtx t1;
5124
5125             pre_shift = ctz_or_zero (d);
5126             ml = invert_mod2n (d >> pre_shift, size);
5127             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5128                                pre_shift, NULL_RTX, unsignedp);
5129             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5130                                     NULL_RTX, 1);
5131
5132             insn = get_last_insn ();
5133             set_dst_reg_note (insn, REG_EQUAL,
5134                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5135                                               int_mode, op0, op1),
5136                               quotient);
5137           }
5138         break;
5139
5140       case ROUND_DIV_EXPR:
5141       case ROUND_MOD_EXPR:
5142         if (unsignedp)
5143           {
5144             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5145             rtx tem;
5146             rtx_code_label *label;
5147             label = gen_label_rtx ();
5148             quotient = gen_reg_rtx (int_mode);
5149             remainder = gen_reg_rtx (int_mode);
5150             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5151               {
5152                 rtx tem;
5153                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5154                                          quotient, 1, methods);
5155                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5156                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5157                                           remainder, 1, methods);
5158               }
5159             tem = plus_constant (int_mode, op1, -1);
5160             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5161             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5162             expand_inc (quotient, const1_rtx);
5163             expand_dec (remainder, op1);
5164             emit_label (label);
5165           }
5166         else
5167           {
5168             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5169             int size = GET_MODE_BITSIZE (int_mode);
5170             rtx abs_rem, abs_op1, tem, mask;
5171             rtx_code_label *label;
5172             label = gen_label_rtx ();
5173             quotient = gen_reg_rtx (int_mode);
5174             remainder = gen_reg_rtx (int_mode);
5175             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5176               {
5177                 rtx tem;
5178                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5179                                          quotient, 0, methods);
5180                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5181                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5182                                           remainder, 0, methods);
5183               }
5184             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5185             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5186             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5187                                 1, NULL_RTX, 1);
5188             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5189             tem = expand_binop (int_mode, xor_optab, op0, op1,
5190                                 NULL_RTX, 0, OPTAB_WIDEN);
5191             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5192                                  size - 1, NULL_RTX, 0);
5193             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5194                                 NULL_RTX, 0, OPTAB_WIDEN);
5195             tem = expand_binop (int_mode, sub_optab, tem, mask,
5196                                 NULL_RTX, 0, OPTAB_WIDEN);
5197             expand_inc (quotient, tem);
5198             tem = expand_binop (int_mode, xor_optab, mask, op1,
5199                                 NULL_RTX, 0, OPTAB_WIDEN);
5200             tem = expand_binop (int_mode, sub_optab, tem, mask,
5201                                 NULL_RTX, 0, OPTAB_WIDEN);
5202             expand_dec (remainder, tem);
5203             emit_label (label);
5204           }
5205         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5206
5207       default:
5208         gcc_unreachable ();
5209       }
5210
5211   if (quotient == 0)
5212     {
5213       if (target && GET_MODE (target) != compute_mode)
5214         target = 0;
5215
5216       if (rem_flag)
5217         {
5218           /* Try to produce the remainder without producing the quotient.
5219              If we seem to have a divmod pattern that does not require widening,
5220              don't try widening here.  We should really have a WIDEN argument
5221              to expand_twoval_binop, since what we'd really like to do here is
5222              1) try a mod insn in compute_mode
5223              2) try a divmod insn in compute_mode
5224              3) try a div insn in compute_mode and multiply-subtract to get
5225                 remainder
5226              4) try the same things with widening allowed.  */
5227           remainder
5228             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5229                                  op0, op1, target,
5230                                  unsignedp,
5231                                  ((optab_handler (optab2, compute_mode)
5232                                    != CODE_FOR_nothing)
5233                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5234           if (remainder == 0)
5235             {
5236               /* No luck there.  Can we do remainder and divide at once
5237                  without a library call?  */
5238               remainder = gen_reg_rtx (compute_mode);
5239               if (! expand_twoval_binop ((unsignedp
5240                                           ? udivmod_optab
5241                                           : sdivmod_optab),
5242                                          op0, op1,
5243                                          NULL_RTX, remainder, unsignedp))
5244                 remainder = 0;
5245             }
5246
5247           if (remainder)
5248             return gen_lowpart (mode, remainder);
5249         }
5250
5251       /* Produce the quotient.  Try a quotient insn, but not a library call.
5252          If we have a divmod in this mode, use it in preference to widening
5253          the div (for this test we assume it will not fail). Note that optab2
5254          is set to the one of the two optabs that the call below will use.  */
5255       quotient
5256         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5257                              op0, op1, rem_flag ? NULL_RTX : target,
5258                              unsignedp,
5259                              ((optab_handler (optab2, compute_mode)
5260                                != CODE_FOR_nothing)
5261                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5262
5263       if (quotient == 0)
5264         {
5265           /* No luck there.  Try a quotient-and-remainder insn,
5266              keeping the quotient alone.  */
5267           quotient = gen_reg_rtx (compute_mode);
5268           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5269                                      op0, op1,
5270                                      quotient, NULL_RTX, unsignedp))
5271             {
5272               quotient = 0;
5273               if (! rem_flag)
5274                 /* Still no luck.  If we are not computing the remainder,
5275                    use a library call for the quotient.  */
5276                 quotient = sign_expand_binop (compute_mode,
5277                                               udiv_optab, sdiv_optab,
5278                                               op0, op1, target,
5279                                               unsignedp, methods);
5280             }
5281         }
5282     }
5283
5284   if (rem_flag)
5285     {
5286       if (target && GET_MODE (target) != compute_mode)
5287         target = 0;
5288
5289       if (quotient == 0)
5290         {
5291           /* No divide instruction either.  Use library for remainder.  */
5292           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5293                                          op0, op1, target,
5294                                          unsignedp, methods);
5295           /* No remainder function.  Try a quotient-and-remainder
5296              function, keeping the remainder.  */
5297           if (!remainder
5298               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5299             {
5300               remainder = gen_reg_rtx (compute_mode);
5301               if (!expand_twoval_binop_libfunc
5302                   (unsignedp ? udivmod_optab : sdivmod_optab,
5303                    op0, op1,
5304                    NULL_RTX, remainder,
5305                    unsignedp ? UMOD : MOD))
5306                 remainder = NULL_RTX;
5307             }
5308         }
5309       else
5310         {
5311           /* We divided.  Now finish doing X - Y * (X / Y).  */
5312           remainder = expand_mult (compute_mode, quotient, op1,
5313                                    NULL_RTX, unsignedp);
5314           remainder = expand_binop (compute_mode, sub_optab, op0,
5315                                     remainder, target, unsignedp,
5316                                     methods);
5317         }
5318     }
5319
5320   if (methods != OPTAB_LIB_WIDEN
5321       && (rem_flag ? remainder : quotient) == NULL_RTX)
5322     return NULL_RTX;
5323
5324   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5325 }
5326 \f
5327 /* Return a tree node with data type TYPE, describing the value of X.
5328    Usually this is an VAR_DECL, if there is no obvious better choice.
5329    X may be an expression, however we only support those expressions
5330    generated by loop.c.  */
5331
5332 tree
5333 make_tree (tree type, rtx x)
5334 {
5335   tree t;
5336
5337   switch (GET_CODE (x))
5338     {
5339     case CONST_INT:
5340     case CONST_WIDE_INT:
5341       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5342       return t;
5343
5344     case CONST_DOUBLE:
5345       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5346       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5347         t = wide_int_to_tree (type,
5348                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5349                                                     HOST_BITS_PER_WIDE_INT * 2));
5350       else
5351         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5352
5353       return t;
5354
5355     case CONST_VECTOR:
5356       {
5357         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5358         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5359         tree itype = TREE_TYPE (type);
5360
5361         /* Build a tree with vector elements.  */
5362         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5363         unsigned int count = elts.encoded_nelts ();
5364         for (unsigned int i = 0; i < count; ++i)
5365           {
5366             rtx elt = CONST_VECTOR_ELT (x, i);
5367             elts.quick_push (make_tree (itype, elt));
5368           }
5369
5370         return elts.build ();
5371       }
5372
5373     case PLUS:
5374       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5375                           make_tree (type, XEXP (x, 1)));
5376
5377     case MINUS:
5378       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5379                           make_tree (type, XEXP (x, 1)));
5380
5381     case NEG:
5382       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5383
5384     case MULT:
5385       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5386                           make_tree (type, XEXP (x, 1)));
5387
5388     case ASHIFT:
5389       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5390                           make_tree (type, XEXP (x, 1)));
5391
5392     case LSHIFTRT:
5393       t = unsigned_type_for (type);
5394       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5395                                          make_tree (t, XEXP (x, 0)),
5396                                          make_tree (type, XEXP (x, 1))));
5397
5398     case ASHIFTRT:
5399       t = signed_type_for (type);
5400       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5401                                          make_tree (t, XEXP (x, 0)),
5402                                          make_tree (type, XEXP (x, 1))));
5403
5404     case DIV:
5405       if (TREE_CODE (type) != REAL_TYPE)
5406         t = signed_type_for (type);
5407       else
5408         t = type;
5409
5410       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5411                                          make_tree (t, XEXP (x, 0)),
5412                                          make_tree (t, XEXP (x, 1))));
5413     case UDIV:
5414       t = unsigned_type_for (type);
5415       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5416                                          make_tree (t, XEXP (x, 0)),
5417                                          make_tree (t, XEXP (x, 1))));
5418
5419     case SIGN_EXTEND:
5420     case ZERO_EXTEND:
5421       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5422                                           GET_CODE (x) == ZERO_EXTEND);
5423       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5424
5425     case CONST:
5426       return make_tree (type, XEXP (x, 0));
5427
5428     case SYMBOL_REF:
5429       t = SYMBOL_REF_DECL (x);
5430       if (t)
5431         return fold_convert (type, build_fold_addr_expr (t));
5432       /* fall through.  */
5433
5434     default:
5435       if (CONST_POLY_INT_P (x))
5436         return wide_int_to_tree (t, const_poly_int_value (x));
5437
5438       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5439
5440       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5441          address mode to pointer mode.  */
5442       if (POINTER_TYPE_P (type))
5443         x = convert_memory_address_addr_space
5444           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5445
5446       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5447          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5448       t->decl_with_rtl.rtl = x;
5449
5450       return t;
5451     }
5452 }
5453 \f
5454 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5455    and returning TARGET.
5456
5457    If TARGET is 0, a pseudo-register or constant is returned.  */
5458
5459 rtx
5460 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5461 {
5462   rtx tem = 0;
5463
5464   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5465     tem = simplify_binary_operation (AND, mode, op0, op1);
5466   if (tem == 0)
5467     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5468
5469   if (target == 0)
5470     target = tem;
5471   else if (tem != target)
5472     emit_move_insn (target, tem);
5473   return target;
5474 }
5475
5476 /* Helper function for emit_store_flag.  */
5477 rtx
5478 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5479              machine_mode mode, machine_mode compare_mode,
5480              int unsignedp, rtx x, rtx y, int normalizep,
5481              machine_mode target_mode)
5482 {
5483   class expand_operand ops[4];
5484   rtx op0, comparison, subtarget;
5485   rtx_insn *last;
5486   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5487   scalar_int_mode int_target_mode;
5488
5489   last = get_last_insn ();
5490   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5491   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5492   if (!x || !y)
5493     {
5494       delete_insns_since (last);
5495       return NULL_RTX;
5496     }
5497
5498   if (target_mode == VOIDmode)
5499     int_target_mode = result_mode;
5500   else
5501     int_target_mode = as_a <scalar_int_mode> (target_mode);
5502   if (!target)
5503     target = gen_reg_rtx (int_target_mode);
5504
5505   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5506
5507   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5508   create_fixed_operand (&ops[1], comparison);
5509   create_fixed_operand (&ops[2], x);
5510   create_fixed_operand (&ops[3], y);
5511   if (!maybe_expand_insn (icode, 4, ops))
5512     {
5513       delete_insns_since (last);
5514       return NULL_RTX;
5515     }
5516   subtarget = ops[0].value;
5517
5518   /* If we are converting to a wider mode, first convert to
5519      INT_TARGET_MODE, then normalize.  This produces better combining
5520      opportunities on machines that have a SIGN_EXTRACT when we are
5521      testing a single bit.  This mostly benefits the 68k.
5522
5523      If STORE_FLAG_VALUE does not have the sign bit set when
5524      interpreted in MODE, we can do this conversion as unsigned, which
5525      is usually more efficient.  */
5526   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5527     {
5528       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5529                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5530
5531       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5532       convert_move (target, subtarget, unsignedp);
5533
5534       op0 = target;
5535       result_mode = int_target_mode;
5536     }
5537   else
5538     op0 = subtarget;
5539
5540   /* If we want to keep subexpressions around, don't reuse our last
5541      target.  */
5542   if (optimize)
5543     subtarget = 0;
5544
5545   /* Now normalize to the proper value in MODE.  Sometimes we don't
5546      have to do anything.  */
5547   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5548     ;
5549   /* STORE_FLAG_VALUE might be the most negative number, so write
5550      the comparison this way to avoid a compiler-time warning.  */
5551   else if (- normalizep == STORE_FLAG_VALUE)
5552     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5553
5554   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5555      it hard to use a value of just the sign bit due to ANSI integer
5556      constant typing rules.  */
5557   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5558     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5559                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5560                         normalizep == 1);
5561   else
5562     {
5563       gcc_assert (STORE_FLAG_VALUE & 1);
5564
5565       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5566       if (normalizep == -1)
5567         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5568     }
5569
5570   /* If we were converting to a smaller mode, do the conversion now.  */
5571   if (int_target_mode != result_mode)
5572     {
5573       convert_move (target, op0, 0);
5574       return target;
5575     }
5576   else
5577     return op0;
5578 }
5579
5580
5581 /* A subroutine of emit_store_flag only including "tricks" that do not
5582    need a recursive call.  These are kept separate to avoid infinite
5583    loops.  */
5584
5585 static rtx
5586 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5587                    machine_mode mode, int unsignedp, int normalizep,
5588                    machine_mode target_mode)
5589 {
5590   rtx subtarget;
5591   enum insn_code icode;
5592   machine_mode compare_mode;
5593   enum mode_class mclass;
5594   enum rtx_code scode;
5595
5596   if (unsignedp)
5597     code = unsigned_condition (code);
5598   scode = swap_condition (code);
5599
5600   /* If one operand is constant, make it the second one.  Only do this
5601      if the other operand is not constant as well.  */
5602
5603   if (swap_commutative_operands_p (op0, op1))
5604     {
5605       std::swap (op0, op1);
5606       code = swap_condition (code);
5607     }
5608
5609   if (mode == VOIDmode)
5610     mode = GET_MODE (op0);
5611
5612   if (CONST_SCALAR_INT_P (op1))
5613     canonicalize_comparison (mode, &code, &op1);
5614
5615   /* For some comparisons with 1 and -1, we can convert this to
5616      comparisons with zero.  This will often produce more opportunities for
5617      store-flag insns.  */
5618
5619   switch (code)
5620     {
5621     case LT:
5622       if (op1 == const1_rtx)
5623         op1 = const0_rtx, code = LE;
5624       break;
5625     case LE:
5626       if (op1 == constm1_rtx)
5627         op1 = const0_rtx, code = LT;
5628       break;
5629     case GE:
5630       if (op1 == const1_rtx)
5631         op1 = const0_rtx, code = GT;
5632       break;
5633     case GT:
5634       if (op1 == constm1_rtx)
5635         op1 = const0_rtx, code = GE;
5636       break;
5637     case GEU:
5638       if (op1 == const1_rtx)
5639         op1 = const0_rtx, code = NE;
5640       break;
5641     case LTU:
5642       if (op1 == const1_rtx)
5643         op1 = const0_rtx, code = EQ;
5644       break;
5645     default:
5646       break;
5647     }
5648
5649   /* If we are comparing a double-word integer with zero or -1, we can
5650      convert the comparison into one involving a single word.  */
5651   scalar_int_mode int_mode;
5652   if (is_int_mode (mode, &int_mode)
5653       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5654       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5655     {
5656       rtx tem;
5657       if ((code == EQ || code == NE)
5658           && (op1 == const0_rtx || op1 == constm1_rtx))
5659         {
5660           rtx op00, op01;
5661
5662           /* Do a logical OR or AND of the two words and compare the
5663              result.  */
5664           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5665           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5666           tem = expand_binop (word_mode,
5667                               op1 == const0_rtx ? ior_optab : and_optab,
5668                               op00, op01, NULL_RTX, unsignedp,
5669                               OPTAB_DIRECT);
5670
5671           if (tem != 0)
5672             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5673                                    unsignedp, normalizep);
5674         }
5675       else if ((code == LT || code == GE) && op1 == const0_rtx)
5676         {
5677           rtx op0h;
5678
5679           /* If testing the sign bit, can just test on high word.  */
5680           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5681                                       subreg_highpart_offset (word_mode,
5682                                                               int_mode));
5683           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5684                                  unsignedp, normalizep);
5685         }
5686       else
5687         tem = NULL_RTX;
5688
5689       if (tem)
5690         {
5691           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5692             return tem;
5693           if (!target)
5694             target = gen_reg_rtx (target_mode);
5695
5696           convert_move (target, tem,
5697                         !val_signbit_known_set_p (word_mode,
5698                                                   (normalizep ? normalizep
5699                                                    : STORE_FLAG_VALUE)));
5700           return target;
5701         }
5702     }
5703
5704   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5705      complement of A (for GE) and shifting the sign bit to the low bit.  */
5706   if (op1 == const0_rtx && (code == LT || code == GE)
5707       && is_int_mode (mode, &int_mode)
5708       && (normalizep || STORE_FLAG_VALUE == 1
5709           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5710     {
5711       scalar_int_mode int_target_mode;
5712       subtarget = target;
5713
5714       if (!target)
5715         int_target_mode = int_mode;
5716       else
5717         {
5718           /* If the result is to be wider than OP0, it is best to convert it
5719              first.  If it is to be narrower, it is *incorrect* to convert it
5720              first.  */
5721           int_target_mode = as_a <scalar_int_mode> (target_mode);
5722           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5723             {
5724               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5725               int_mode = int_target_mode;
5726             }
5727         }
5728
5729       if (int_target_mode != int_mode)
5730         subtarget = 0;
5731
5732       if (code == GE)
5733         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5734                            ((STORE_FLAG_VALUE == 1 || normalizep)
5735                             ? 0 : subtarget), 0);
5736
5737       if (STORE_FLAG_VALUE == 1 || normalizep)
5738         /* If we are supposed to produce a 0/1 value, we want to do
5739            a logical shift from the sign bit to the low-order bit; for
5740            a -1/0 value, we do an arithmetic shift.  */
5741         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5742                             GET_MODE_BITSIZE (int_mode) - 1,
5743                             subtarget, normalizep != -1);
5744
5745       if (int_mode != int_target_mode)
5746         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5747
5748       return op0;
5749     }
5750
5751   mclass = GET_MODE_CLASS (mode);
5752   FOR_EACH_MODE_FROM (compare_mode, mode)
5753     {
5754      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5755      icode = optab_handler (cstore_optab, optab_mode);
5756      if (icode != CODE_FOR_nothing)
5757         {
5758           do_pending_stack_adjust ();
5759           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5760                                  unsignedp, op0, op1, normalizep, target_mode);
5761           if (tem)
5762             return tem;
5763
5764           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5765             {
5766               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5767                                  unsignedp, op1, op0, normalizep, target_mode);
5768               if (tem)
5769                 return tem;
5770             }
5771           break;
5772         }
5773     }
5774
5775   return 0;
5776 }
5777
5778 /* Subroutine of emit_store_flag that handles cases in which the operands
5779    are scalar integers.  SUBTARGET is the target to use for temporary
5780    operations and TRUEVAL is the value to store when the condition is
5781    true.  All other arguments are as for emit_store_flag.  */
5782
5783 rtx
5784 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5785                      rtx op1, scalar_int_mode mode, int unsignedp,
5786                      int normalizep, rtx trueval)
5787 {
5788   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5789   rtx_insn *last = get_last_insn ();
5790
5791   /* If this is an equality comparison of integers, we can try to exclusive-or
5792      (or subtract) the two operands and use a recursive call to try the
5793      comparison with zero.  Don't do any of these cases if branches are
5794      very cheap.  */
5795
5796   if ((code == EQ || code == NE) && op1 != const0_rtx)
5797     {
5798       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5799                               OPTAB_WIDEN);
5800
5801       if (tem == 0)
5802         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5803                             OPTAB_WIDEN);
5804       if (tem != 0)
5805         tem = emit_store_flag (target, code, tem, const0_rtx,
5806                                mode, unsignedp, normalizep);
5807       if (tem != 0)
5808         return tem;
5809
5810       delete_insns_since (last);
5811     }
5812
5813   /* For integer comparisons, try the reverse comparison.  However, for
5814      small X and if we'd have anyway to extend, implementing "X != 0"
5815      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5816   rtx_code rcode = reverse_condition (code);
5817   if (can_compare_p (rcode, mode, ccp_store_flag)
5818       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5819             && code == NE
5820             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5821             && op1 == const0_rtx))
5822     {
5823       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5824                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5825
5826       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5827       if (want_add
5828           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5829                        optimize_insn_for_speed_p ()) == 0)
5830         {
5831           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5832                                        STORE_FLAG_VALUE, target_mode);
5833           if (tem != 0)
5834             tem = expand_binop (target_mode, add_optab, tem,
5835                                 gen_int_mode (normalizep, target_mode),
5836                                 target, 0, OPTAB_WIDEN);
5837           if (tem != 0)
5838             return tem;
5839         }
5840       else if (!want_add
5841                && rtx_cost (trueval, mode, XOR, 1,
5842                             optimize_insn_for_speed_p ()) == 0)
5843         {
5844           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5845                                        normalizep, target_mode);
5846           if (tem != 0)
5847             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5848                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5849           if (tem != 0)
5850             return tem;
5851         }
5852
5853       delete_insns_since (last);
5854     }
5855
5856   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5857      the constant zero.  Reject all other comparisons at this point.  Only
5858      do LE and GT if branches are expensive since they are expensive on
5859      2-operand machines.  */
5860
5861   if (op1 != const0_rtx
5862       || (code != EQ && code != NE
5863           && (BRANCH_COST (optimize_insn_for_speed_p (),
5864                            false) <= 1 || (code != LE && code != GT))))
5865     return 0;
5866
5867   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5868      do the necessary operation below.  */
5869
5870   rtx tem = 0;
5871
5872   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5873      the sign bit set.  */
5874
5875   if (code == LE)
5876     {
5877       /* This is destructive, so SUBTARGET can't be OP0.  */
5878       if (rtx_equal_p (subtarget, op0))
5879         subtarget = 0;
5880
5881       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5882                           OPTAB_WIDEN);
5883       if (tem)
5884         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5885                             OPTAB_WIDEN);
5886     }
5887
5888   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5889      number of bits in the mode of OP0, minus one.  */
5890
5891   if (code == GT)
5892     {
5893       if (rtx_equal_p (subtarget, op0))
5894         subtarget = 0;
5895
5896       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5897                                 GET_MODE_BITSIZE (mode) - 1,
5898                                 subtarget, 0);
5899       if (tem)
5900         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5901                             OPTAB_WIDEN);
5902     }
5903
5904   if (code == EQ || code == NE)
5905     {
5906       /* For EQ or NE, one way to do the comparison is to apply an operation
5907          that converts the operand into a positive number if it is nonzero
5908          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5909          for NE we negate.  This puts the result in the sign bit.  Then we
5910          normalize with a shift, if needed.
5911
5912          Two operations that can do the above actions are ABS and FFS, so try
5913          them.  If that doesn't work, and MODE is smaller than a full word,
5914          we can use zero-extension to the wider mode (an unsigned conversion)
5915          as the operation.  */
5916
5917       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5918          that is compensated by the subsequent overflow when subtracting
5919          one / negating.  */
5920
5921       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5922         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5923       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5924         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5925       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5926         {
5927           tem = convert_modes (word_mode, mode, op0, 1);
5928           mode = word_mode;
5929         }
5930
5931       if (tem != 0)
5932         {
5933           if (code == EQ)
5934             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5935                                 0, OPTAB_WIDEN);
5936           else
5937             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5938         }
5939
5940       /* If we couldn't do it that way, for NE we can "or" the two's complement
5941          of the value with itself.  For EQ, we take the one's complement of
5942          that "or", which is an extra insn, so we only handle EQ if branches
5943          are expensive.  */
5944
5945       if (tem == 0
5946           && (code == NE
5947               || BRANCH_COST (optimize_insn_for_speed_p (),
5948                               false) > 1))
5949         {
5950           if (rtx_equal_p (subtarget, op0))
5951             subtarget = 0;
5952
5953           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5954           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5955                               OPTAB_WIDEN);
5956
5957           if (tem && code == EQ)
5958             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5959         }
5960     }
5961
5962   if (tem && normalizep)
5963     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5964                               GET_MODE_BITSIZE (mode) - 1,
5965                               subtarget, normalizep == 1);
5966
5967   if (tem)
5968     {
5969       if (!target)
5970         ;
5971       else if (GET_MODE (tem) != target_mode)
5972         {
5973           convert_move (target, tem, 0);
5974           tem = target;
5975         }
5976       else if (!subtarget)
5977         {
5978           emit_move_insn (target, tem);
5979           tem = target;
5980         }
5981     }
5982   else
5983     delete_insns_since (last);
5984
5985   return tem;
5986 }
5987
5988 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5989    and storing in TARGET.  Normally return TARGET.
5990    Return 0 if that cannot be done.
5991
5992    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5993    it is VOIDmode, they cannot both be CONST_INT.
5994
5995    UNSIGNEDP is for the case where we have to widen the operands
5996    to perform the operation.  It says to use zero-extension.
5997
5998    NORMALIZEP is 1 if we should convert the result to be either zero
5999    or one.  Normalize is -1 if we should convert the result to be
6000    either zero or -1.  If NORMALIZEP is zero, the result will be left
6001    "raw" out of the scc insn.  */
6002
6003 rtx
6004 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6005                  machine_mode mode, int unsignedp, int normalizep)
6006 {
6007   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6008   enum rtx_code rcode;
6009   rtx subtarget;
6010   rtx tem, trueval;
6011   rtx_insn *last;
6012
6013   /* If we compare constants, we shouldn't use a store-flag operation,
6014      but a constant load.  We can get there via the vanilla route that
6015      usually generates a compare-branch sequence, but will in this case
6016      fold the comparison to a constant, and thus elide the branch.  */
6017   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6018     return NULL_RTX;
6019
6020   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6021                            target_mode);
6022   if (tem)
6023     return tem;
6024
6025   /* If we reached here, we can't do this with a scc insn, however there
6026      are some comparisons that can be done in other ways.  Don't do any
6027      of these cases if branches are very cheap.  */
6028   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6029     return 0;
6030
6031   /* See what we need to return.  We can only return a 1, -1, or the
6032      sign bit.  */
6033
6034   if (normalizep == 0)
6035     {
6036       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6037         normalizep = STORE_FLAG_VALUE;
6038
6039       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6040         ;
6041       else
6042         return 0;
6043     }
6044
6045   last = get_last_insn ();
6046
6047   /* If optimizing, use different pseudo registers for each insn, instead
6048      of reusing the same pseudo.  This leads to better CSE, but slows
6049      down the compiler, since there are more pseudos.  */
6050   subtarget = (!optimize
6051                && (target_mode == mode)) ? target : NULL_RTX;
6052   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6053
6054   /* For floating-point comparisons, try the reverse comparison or try
6055      changing the "orderedness" of the comparison.  */
6056   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6057     {
6058       enum rtx_code first_code;
6059       bool and_them;
6060
6061       rcode = reverse_condition_maybe_unordered (code);
6062       if (can_compare_p (rcode, mode, ccp_store_flag)
6063           && (code == ORDERED || code == UNORDERED
6064               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6065               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6066         {
6067           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6068                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6069
6070           /* For the reverse comparison, use either an addition or a XOR.  */
6071           if (want_add
6072               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6073                            optimize_insn_for_speed_p ()) == 0)
6074             {
6075               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6076                                        STORE_FLAG_VALUE, target_mode);
6077               if (tem)
6078                 return expand_binop (target_mode, add_optab, tem,
6079                                      gen_int_mode (normalizep, target_mode),
6080                                      target, 0, OPTAB_WIDEN);
6081             }
6082           else if (!want_add
6083                    && rtx_cost (trueval, mode, XOR, 1,
6084                                 optimize_insn_for_speed_p ()) == 0)
6085             {
6086               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6087                                        normalizep, target_mode);
6088               if (tem)
6089                 return expand_binop (target_mode, xor_optab, tem, trueval,
6090                                      target, INTVAL (trueval) >= 0,
6091                                      OPTAB_WIDEN);
6092             }
6093         }
6094
6095       delete_insns_since (last);
6096
6097       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6098       if (code == ORDERED || code == UNORDERED)
6099         return 0;
6100
6101       and_them = split_comparison (code, mode, &first_code, &code);
6102
6103       /* If there are no NaNs, the first comparison should always fall through.
6104          Effectively change the comparison to the other one.  */
6105       if (!HONOR_NANS (mode))
6106         {
6107           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6108           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6109                                     target_mode);
6110         }
6111
6112       if (!HAVE_conditional_move)
6113         return 0;
6114
6115       /* Do not turn a trapping comparison into a non-trapping one.  */
6116       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6117           && flag_trapping_math)
6118         return 0;
6119
6120       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6121          conditional move.  */
6122       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6123                                normalizep, target_mode);
6124       if (tem == 0)
6125         return 0;
6126
6127       if (and_them)
6128         tem = emit_conditional_move (target, code, op0, op1, mode,
6129                                      tem, const0_rtx, GET_MODE (tem), 0);
6130       else
6131         tem = emit_conditional_move (target, code, op0, op1, mode,
6132                                      trueval, tem, GET_MODE (tem), 0);
6133
6134       if (tem == 0)
6135         delete_insns_since (last);
6136       return tem;
6137     }
6138
6139   /* The remaining tricks only apply to integer comparisons.  */
6140
6141   scalar_int_mode int_mode;
6142   if (is_int_mode (mode, &int_mode))
6143     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6144                                 unsignedp, normalizep, trueval);
6145
6146   return 0;
6147 }
6148
6149 /* Like emit_store_flag, but always succeeds.  */
6150
6151 rtx
6152 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6153                        machine_mode mode, int unsignedp, int normalizep)
6154 {
6155   rtx tem;
6156   rtx_code_label *label;
6157   rtx trueval, falseval;
6158
6159   /* First see if emit_store_flag can do the job.  */
6160   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6161   if (tem != 0)
6162     return tem;
6163
6164   /* If one operand is constant, make it the second one.  Only do this
6165      if the other operand is not constant as well.  */
6166   if (swap_commutative_operands_p (op0, op1))
6167     {
6168       std::swap (op0, op1);
6169       code = swap_condition (code);
6170     }
6171
6172   if (mode == VOIDmode)
6173     mode = GET_MODE (op0);
6174
6175   if (!target)
6176     target = gen_reg_rtx (word_mode);
6177
6178   /* If this failed, we have to do this with set/compare/jump/set code.
6179      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6180   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6181   if (code == NE
6182       && GET_MODE_CLASS (mode) == MODE_INT
6183       && REG_P (target)
6184       && op0 == target
6185       && op1 == const0_rtx)
6186     {
6187       label = gen_label_rtx ();
6188       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6189                                NULL_RTX, NULL, label,
6190                                profile_probability::uninitialized ());
6191       emit_move_insn (target, trueval);
6192       emit_label (label);
6193       return target;
6194     }
6195
6196   if (!REG_P (target)
6197       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6198     target = gen_reg_rtx (GET_MODE (target));
6199
6200   /* Jump in the right direction if the target cannot implement CODE
6201      but can jump on its reverse condition.  */
6202   falseval = const0_rtx;
6203   if (! can_compare_p (code, mode, ccp_jump)
6204       && (! FLOAT_MODE_P (mode)
6205           || code == ORDERED || code == UNORDERED
6206           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6207           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6208     {
6209       enum rtx_code rcode;
6210       if (FLOAT_MODE_P (mode))
6211         rcode = reverse_condition_maybe_unordered (code);
6212       else
6213         rcode = reverse_condition (code);
6214
6215       /* Canonicalize to UNORDERED for the libcall.  */
6216       if (can_compare_p (rcode, mode, ccp_jump)
6217           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6218         {
6219           falseval = trueval;
6220           trueval = const0_rtx;
6221           code = rcode;
6222         }
6223     }
6224
6225   emit_move_insn (target, trueval);
6226   label = gen_label_rtx ();
6227   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6228                            label, profile_probability::uninitialized ());
6229
6230   emit_move_insn (target, falseval);
6231   emit_label (label);
6232
6233   return target;
6234 }
6235
6236 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6237    and exclusive ranges in order to create an equivalent comparison.  See
6238    canonicalize_cmp_for_target for the possible cases.  */
6239
6240 static enum rtx_code
6241 equivalent_cmp_code (enum rtx_code code)
6242 {
6243   switch (code)
6244     {
6245     case GT:
6246       return GE;
6247     case GE:
6248       return GT;
6249     case LT:
6250       return LE;
6251     case LE:
6252       return LT;
6253     case GTU:
6254       return GEU;
6255     case GEU:
6256       return GTU;
6257     case LTU:
6258       return LEU;
6259     case LEU:
6260       return LTU;
6261
6262     default:
6263       return code;
6264     }
6265 }
6266
6267 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6268    purpose of this is to end up with an immediate which can be loaded into a
6269    register in fewer moves, if possible.
6270
6271    For each integer comparison there exists an equivalent choice:
6272      i)   a >  b or a >= b + 1
6273      ii)  a <= b or a <  b + 1
6274      iii) a >= b or a >  b - 1
6275      iv)  a <  b or a <= b - 1
6276
6277    MODE is the mode of the first operand.
6278    CODE points to the comparison code.
6279    IMM points to the rtx containing the immediate.  *IMM must satisfy
6280    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6281    on exit.  */
6282
6283 void
6284 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6285 {
6286   if (!SCALAR_INT_MODE_P (mode))
6287     return;
6288
6289   int to_add = 0;
6290   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6291
6292   /* Extract the immediate value from the rtx.  */
6293   wide_int imm_val = rtx_mode_t (*imm, mode);
6294
6295   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6296     to_add = 1;
6297   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6298     to_add = -1;
6299   else
6300     return;
6301
6302   /* Check for overflow/underflow in the case of signed values and
6303      wrapping around in the case of unsigned values.  If any occur
6304      cancel the optimization.  */
6305   wi::overflow_type overflow = wi::OVF_NONE;
6306   wide_int imm_modif;
6307
6308   if (to_add == 1)
6309     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6310   else
6311     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6312
6313   if (overflow)
6314     return;
6315
6316   /* The following creates a pseudo; if we cannot do that, bail out.  */
6317   if (!can_create_pseudo_p ())
6318     return;
6319
6320   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6321   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6322
6323   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6324   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6325
6326   /* Update the immediate and the code.  */
6327   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6328     {
6329       *code = equivalent_cmp_code (*code);
6330       *imm = new_imm;
6331     }
6332 }
6333
6334
6335 \f
6336 /* Perform possibly multi-word comparison and conditional jump to LABEL
6337    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6338    now a thin wrapper around do_compare_rtx_and_jump.  */
6339
6340 static void
6341 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6342                  rtx_code_label *label)
6343 {
6344   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6345   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6346                            NULL, label, profile_probability::uninitialized ());
6347 }