gcc/expmed.cc

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2024 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "optabs.h"
  35 #include "expmed.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46 #include "recog.h"
  47
  48 struct target_expmed default_target_expmed;
  49 #if SWITCHABLE_TARGET
  50 struct target_expmed *this_target_expmed = &default_target_expmed;
  51 #endif
  52
  53 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  54                                       unsigned HOST_WIDE_INT,
  55                                       unsigned HOST_WIDE_INT,
  56                                       poly_uint64, poly_uint64,
  57                                       machine_mode, rtx, bool, bool);
  58 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  59                                    unsigned HOST_WIDE_INT,
  60                                    unsigned HOST_WIDE_INT,
  61                                    poly_uint64, poly_uint64,
  62                                    rtx, scalar_int_mode, bool);
  63 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  64                                      unsigned HOST_WIDE_INT,
  65                                      unsigned HOST_WIDE_INT,
  66                                      rtx, scalar_int_mode, bool);
  67 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  68                                    unsigned HOST_WIDE_INT,
  69                                    unsigned HOST_WIDE_INT,
  70                                    poly_uint64, poly_uint64,
  71                                    rtx, scalar_int_mode, bool);
  72 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  73                                        unsigned HOST_WIDE_INT,
  74                                        unsigned HOST_WIDE_INT, int, rtx,
  75                                        machine_mode, machine_mode, bool, bool);
  76 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  77                                     unsigned HOST_WIDE_INT,
  78                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  79 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  80                                       unsigned HOST_WIDE_INT,
  81                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  82 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  83 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  84                                     unsigned HOST_WIDE_INT,
  85                                     unsigned HOST_WIDE_INT, int, bool);
  86 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  87 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  89
  90 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  91    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  92    The mask is truncated if necessary to the width of mode MODE.  The
  93    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  94
  95 static inline rtx
  96 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  97 {
  98   return immed_wide_int_const
  99     (wi::shifted_mask (bitpos, bitsize, complement,
 100                        GET_MODE_PRECISION (mode)), mode);
 101 }
 102
 103 /* Test whether a value is zero of a power of two.  */
 104 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 105   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 106
 107 struct init_expmed_rtl
 108 {
 109   rtx reg;
 110   rtx plus;
 111   rtx neg;
 112   rtx mult;
 113   rtx sdiv;
 114   rtx udiv;
 115   rtx sdiv_32;
 116   rtx smod_32;
 117   rtx wide_mult;
 118   rtx wide_lshr;
 119   rtx wide_trunc;
 120   rtx shift;
 121   rtx shift_mult;
 122   rtx shift_add;
 123   rtx shift_sub0;
 124   rtx shift_sub1;
 125   rtx zext;
 126   rtx trunc;
 127
 128   rtx pow2[MAX_BITS_PER_WORD];
 129   rtx cint[MAX_BITS_PER_WORD];
 130 };
 131
 132 static void
 133 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 134                       scalar_int_mode from_mode, bool speed)
 135 {
 136   int to_size, from_size;
 137   rtx which;
 138
 139   to_size = GET_MODE_PRECISION (to_mode);
 140   from_size = GET_MODE_PRECISION (from_mode);
 141
 142   /* Most partial integers have a precision less than the "full"
 143      integer it requires for storage.  In case one doesn't, for
 144      comparison purposes here, reduce the bit size by one in that
 145      case.  */
 146   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 147       && pow2p_hwi (to_size))
 148     to_size --;
 149   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 150       && pow2p_hwi (from_size))
 151     from_size --;
 152
 153   /* Assume cost of zero-extend and sign-extend is the same.  */
 154   which = (to_size < from_size ? all->trunc : all->zext);
 155
 156   PUT_MODE (all->reg, from_mode);
 157   set_convert_cost (to_mode, from_mode, speed,
 158                     set_src_cost (which, to_mode, speed));
 159   /* Restore all->reg's mode.  */
 160   PUT_MODE (all->reg, to_mode);
 161 }
 162
 163 static void
 164 init_expmed_one_mode (struct init_expmed_rtl *all,
 165                       machine_mode mode, int speed)
 166 {
 167   int m, n, mode_bitsize;
 168   machine_mode mode_from;
 169
 170   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 171
 172   PUT_MODE (all->reg, mode);
 173   PUT_MODE (all->plus, mode);
 174   PUT_MODE (all->neg, mode);
 175   PUT_MODE (all->mult, mode);
 176   PUT_MODE (all->sdiv, mode);
 177   PUT_MODE (all->udiv, mode);
 178   PUT_MODE (all->sdiv_32, mode);
 179   PUT_MODE (all->smod_32, mode);
 180   PUT_MODE (all->wide_trunc, mode);
 181   PUT_MODE (all->shift, mode);
 182   PUT_MODE (all->shift_mult, mode);
 183   PUT_MODE (all->shift_add, mode);
 184   PUT_MODE (all->shift_sub0, mode);
 185   PUT_MODE (all->shift_sub1, mode);
 186   PUT_MODE (all->zext, mode);
 187   PUT_MODE (all->trunc, mode);
 188
 189   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 190   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 191   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 192   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 193   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 194
 195   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 196                                      <= 2 * add_cost (speed, mode)));
 197   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 198                                      <= 4 * add_cost (speed, mode)));
 199
 200   set_shift_cost (speed, mode, 0, 0);
 201   {
 202     int cost = add_cost (speed, mode);
 203     set_shiftadd_cost (speed, mode, 0, cost);
 204     set_shiftsub0_cost (speed, mode, 0, cost);
 205     set_shiftsub1_cost (speed, mode, 0, cost);
 206   }
 207
 208   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 209   for (m = 1; m < n; m++)
 210     {
 211       XEXP (all->shift, 1) = all->cint[m];
 212       XEXP (all->shift_mult, 1) = all->pow2[m];
 213
 214       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 215       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 216                                                        speed));
 217       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 218                                                         speed));
 219       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 220                                                         speed));
 221     }
 222
 223   scalar_int_mode int_mode_to;
 224   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 225     {
 226       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 227            mode_from = (machine_mode)(mode_from + 1))
 228         init_expmed_one_conv (all, int_mode_to,
 229                               as_a <scalar_int_mode> (mode_from), speed);
 230
 231       scalar_int_mode wider_mode;
 232       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 233           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 234         {
 235           PUT_MODE (all->reg, mode);
 236           PUT_MODE (all->zext, wider_mode);
 237           PUT_MODE (all->wide_mult, wider_mode);
 238           PUT_MODE (all->wide_lshr, wider_mode);
 239           XEXP (all->wide_lshr, 1)
 240             = gen_int_shift_amount (wider_mode, mode_bitsize);
 241
 242           set_mul_widen_cost (speed, wider_mode,
 243                               set_src_cost (all->wide_mult, wider_mode, speed));
 244           set_mul_highpart_cost (speed, int_mode_to,
 245                                  set_src_cost (all->wide_trunc,
 246                                                int_mode_to, speed));
 247         }
 248     }
 249 }
 250
 251 void
 252 init_expmed (void)
 253 {
 254   struct init_expmed_rtl all;
 255   machine_mode mode = QImode;
 256   int m, speed;
 257
 258   memset (&all, 0, sizeof all);
 259   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 260     {
 261       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 262       all.cint[m] = GEN_INT (m);
 263     }
 264
 265   /* Avoid using hard regs in ways which may be unsupported.  */
 266   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 267   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 268   all.neg = gen_rtx_NEG (mode, all.reg);
 269   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 270   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 271   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 272   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 273   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 274   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 275   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 276   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 277   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 278   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 279   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 280   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 281   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 282   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 283   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 284
 285   for (speed = 0; speed < 2; speed++)
 286     {
 287       crtl->maybe_hot_insn_p = speed;
 288       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 289
 290       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 291            mode = (machine_mode)(mode + 1))
 292         init_expmed_one_mode (&all, mode, speed);
 293
 294       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 295         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 296              mode = (machine_mode)(mode + 1))
 297           init_expmed_one_mode (&all, mode, speed);
 298
 299       if (MIN_MODE_VECTOR_INT != VOIDmode)
 300         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 301              mode = (machine_mode)(mode + 1))
 302           init_expmed_one_mode (&all, mode, speed);
 303     }
 304
 305   if (alg_hash_used_p ())
 306     {
 307       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 308       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 309     }
 310   else
 311     set_alg_hash_used_p (true);
 312   default_rtl_profile ();
 313
 314   ggc_free (all.trunc);
 315   ggc_free (all.shift_sub1);
 316   ggc_free (all.shift_sub0);
 317   ggc_free (all.shift_add);
 318   ggc_free (all.shift_mult);
 319   ggc_free (all.shift);
 320   ggc_free (all.wide_trunc);
 321   ggc_free (all.wide_lshr);
 322   ggc_free (all.wide_mult);
 323   ggc_free (all.zext);
 324   ggc_free (all.smod_32);
 325   ggc_free (all.sdiv_32);
 326   ggc_free (all.udiv);
 327   ggc_free (all.sdiv);
 328   ggc_free (all.mult);
 329   ggc_free (all.neg);
 330   ggc_free (all.plus);
 331   ggc_free (all.reg);
 332 }
 333
 334 /* Return an rtx representing minus the value of X.
 335    MODE is the intended mode of the result,
 336    useful if X is a CONST_INT.  */
 337
 338 rtx
 339 negate_rtx (machine_mode mode, rtx x)
 340 {
 341   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 342
 343   if (result == 0)
 344     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 345
 346   return result;
 347 }
 348
 349 /* Whether reverse storage order is supported on the target.  */
 350 static int reverse_storage_order_supported = -1;
 351
 352 /* Check whether reverse storage order is supported on the target.  */
 353
 354 static void
 355 check_reverse_storage_order_support (void)
 356 {
 357   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 358     {
 359       reverse_storage_order_supported = 0;
 360       sorry ("reverse scalar storage order");
 361     }
 362   else
 363     reverse_storage_order_supported = 1;
 364 }
 365
 366 /* Whether reverse FP storage order is supported on the target.  */
 367 static int reverse_float_storage_order_supported = -1;
 368
 369 /* Check whether reverse FP storage order is supported on the target.  */
 370
 371 static void
 372 check_reverse_float_storage_order_support (void)
 373 {
 374   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 375     {
 376       reverse_float_storage_order_supported = 0;
 377       sorry ("reverse floating-point scalar storage order");
 378     }
 379   else
 380     reverse_float_storage_order_supported = 1;
 381 }
 382
 383 /* Return an rtx representing value of X with reverse storage order.
 384    MODE is the intended mode of the result,
 385    useful if X is a CONST_INT.  */
 386
 387 rtx
 388 flip_storage_order (machine_mode mode, rtx x)
 389 {
 390   scalar_int_mode int_mode;
 391   rtx result;
 392
 393   if (mode == QImode)
 394     return x;
 395
 396   if (COMPLEX_MODE_P (mode))
 397     {
 398       rtx real = read_complex_part (x, false);
 399       rtx imag = read_complex_part (x, true);
 400
 401       real = flip_storage_order (GET_MODE_INNER (mode), real);
 402       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 403
 404       return gen_rtx_CONCAT (mode, real, imag);
 405     }
 406
 407   if (UNLIKELY (reverse_storage_order_supported < 0))
 408     check_reverse_storage_order_support ();
 409
 410   if (!is_a <scalar_int_mode> (mode, &int_mode))
 411     {
 412       if (FLOAT_MODE_P (mode)
 413           && UNLIKELY (reverse_float_storage_order_supported < 0))
 414         check_reverse_float_storage_order_support ();
 415
 416       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
 417           || !targetm.scalar_mode_supported_p (int_mode))
 418         {
 419           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 420           return x;
 421         }
 422       x = gen_lowpart (int_mode, x);
 423     }
 424
 425   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 426   if (result == 0)
 427     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 428
 429   if (int_mode != mode)
 430     result = gen_lowpart (mode, result);
 431
 432   return result;
 433 }
 434
 435 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 436    first unit of mode MODE that contains a bitfield of size BITSIZE at
 437    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 438    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 439    of the field within the new memory.  */
 440
 441 static rtx
 442 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 443                       unsigned HOST_WIDE_INT bitsize,
 444                       unsigned HOST_WIDE_INT bitnum,
 445                       unsigned HOST_WIDE_INT *new_bitnum)
 446 {
 447   scalar_int_mode imode;
 448   if (mode.exists (&imode))
 449     {
 450       unsigned int unit = GET_MODE_BITSIZE (imode);
 451       *new_bitnum = bitnum % unit;
 452       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 453       return adjust_bitfield_address (mem, imode, offset);
 454     }
 455   else
 456     {
 457       *new_bitnum = bitnum % BITS_PER_UNIT;
 458       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 459       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 460                             / BITS_PER_UNIT);
 461       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 462     }
 463 }
 464
 465 /* The caller wants to perform insertion or extraction PATTERN on a
 466    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 467    BITREGION_START and BITREGION_END are as for store_bit_field
 468    and FIELDMODE is the natural mode of the field.
 469
 470    Search for a mode that is compatible with the memory access
 471    restrictions and (where applicable) with a register insertion or
 472    extraction.  Return the new memory on success, storing the adjusted
 473    bit position in *NEW_BITNUM.  Return null otherwise.  */
 474
 475 static rtx
 476 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 477                               rtx op0, HOST_WIDE_INT bitsize,
 478                               HOST_WIDE_INT bitnum,
 479                               poly_uint64 bitregion_start,
 480                               poly_uint64 bitregion_end,
 481                               machine_mode fieldmode,
 482                               unsigned HOST_WIDE_INT *new_bitnum)
 483 {
 484   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 485                                 bitregion_end, MEM_ALIGN (op0),
 486                                 MEM_VOLATILE_P (op0));
 487   scalar_int_mode best_mode;
 488   if (iter.next_mode (&best_mode))
 489     {
 490       /* We can use a memory in BEST_MODE.  See whether this is true for
 491          any wider modes.  All other things being equal, we prefer to
 492          use the widest mode possible because it tends to expose more
 493          CSE opportunities.  */
 494       if (!iter.prefer_smaller_modes ())
 495         {
 496           /* Limit the search to the mode required by the corresponding
 497              register insertion or extraction instruction, if any.  */
 498           scalar_int_mode limit_mode = word_mode;
 499           extraction_insn insn;
 500           if (get_best_reg_extraction_insn (&insn, pattern,
 501                                             GET_MODE_BITSIZE (best_mode),
 502                                             fieldmode))
 503             limit_mode = insn.field_mode;
 504
 505           scalar_int_mode wider_mode;
 506           while (iter.next_mode (&wider_mode)
 507                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 508             best_mode = wider_mode;
 509         }
 510       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 511                                    new_bitnum);
 512     }
 513   return NULL_RTX;
 514 }
 515
 516 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 517    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 518    offset is then BITNUM / BITS_PER_UNIT.  */
 519
 520 static bool
 521 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 522                      machine_mode struct_mode)
 523 {
 524   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 525   if (BYTES_BIG_ENDIAN)
 526     return (multiple_p (bitnum, BITS_PER_UNIT)
 527             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 528                 || multiple_p (bitnum + bitsize,
 529                                regsize * BITS_PER_UNIT)));
 530   else
 531     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 532 }
 533
 534 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 535    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 536    Return false if the access would touch memory outside the range
 537    BITREGION_START to BITREGION_END for conformance to the C++ memory
 538    model.  */
 539
 540 static bool
 541 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 542                             unsigned HOST_WIDE_INT bitnum,
 543                             scalar_int_mode fieldmode,
 544                             poly_uint64 bitregion_start,
 545                             poly_uint64 bitregion_end)
 546 {
 547   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 548
 549   /* -fstrict-volatile-bitfields must be enabled and we must have a
 550      volatile MEM.  */
 551   if (!MEM_P (op0)
 552       || !MEM_VOLATILE_P (op0)
 553       || flag_strict_volatile_bitfields <= 0)
 554     return false;
 555
 556   /* The bit size must not be larger than the field mode, and
 557      the field mode must not be larger than a word.  */
 558   if (bitsize > modesize || modesize > BITS_PER_WORD)
 559     return false;
 560
 561   /* Check for cases of unaligned fields that must be split.  */
 562   if (bitnum % modesize + bitsize > modesize)
 563     return false;
 564
 565   /* The memory must be sufficiently aligned for a MODESIZE access.
 566      This condition guarantees, that the memory access will not
 567      touch anything after the end of the structure.  */
 568   if (MEM_ALIGN (op0) < modesize)
 569     return false;
 570
 571   /* Check for cases where the C++ memory model applies.  */
 572   if (maybe_ne (bitregion_end, 0U)
 573       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 574           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 575                        bitregion_end)))
 576     return false;
 577
 578   return true;
 579 }
 580
 581 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 582    bit number BITNUM can be treated as a simple value of mode MODE.
 583    Store the byte offset in *BYTENUM if so.  */
 584
 585 static bool
 586 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 587                        machine_mode mode, poly_uint64 *bytenum)
 588 {
 589   return (MEM_P (op0)
 590           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 591           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 592           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 593               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 594                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 595 }
 596 \f
 597 /* Try to use instruction INSV to store VALUE into a field of OP0.
 598    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 599    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 600    are as for store_bit_field.  */
 601
 602 static bool
 603 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 604                             opt_scalar_int_mode op0_mode,
 605                             unsigned HOST_WIDE_INT bitsize,
 606                             unsigned HOST_WIDE_INT bitnum,
 607                             rtx value, scalar_int_mode value_mode)
 608 {
 609   class expand_operand ops[4];
 610   rtx value1;
 611   rtx xop0 = op0;
 612   rtx_insn *last = get_last_insn ();
 613   bool copy_back = false;
 614
 615   scalar_int_mode op_mode = insv->field_mode;
 616   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 617   if (bitsize == 0 || bitsize > unit)
 618     return false;
 619
 620   if (MEM_P (xop0))
 621     /* Get a reference to the first byte of the field.  */
 622     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 623                                  &bitnum);
 624   else
 625     {
 626       /* Convert from counting within OP0 to counting in OP_MODE.  */
 627       if (BYTES_BIG_ENDIAN)
 628         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 629
 630       /* If xop0 is a register, we need it in OP_MODE
 631          to make it acceptable to the format of insv.  */
 632       if (GET_CODE (xop0) == SUBREG)
 633         {
 634           /* If such a SUBREG can't be created, give up.  */
 635           if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
 636                                 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
 637             return false;
 638           /* We can't just change the mode, because this might clobber op0,
 639              and we will need the original value of op0 if insv fails.  */
 640           xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
 641                                  SUBREG_BYTE (xop0));
 642         }
 643       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 644         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 645     }
 646
 647   /* If the destination is a paradoxical subreg such that we need a
 648      truncate to the inner mode, perform the insertion on a temporary and
 649      truncate the result to the original destination.  Note that we can't
 650      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 651      X) 0)) is (reg:N X).  */
 652   if (GET_CODE (xop0) == SUBREG
 653       && REG_P (SUBREG_REG (xop0))
 654       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 655                                          op_mode))
 656     {
 657       rtx tem = gen_reg_rtx (op_mode);
 658       emit_move_insn (tem, xop0);
 659       xop0 = tem;
 660       copy_back = true;
 661     }
 662
 663   /* There are similar overflow check at the start of store_bit_field_1,
 664      but that only check the situation where the field lies completely
 665      outside the register, while there do have situation where the field
 666      lies partialy in the register, we need to adjust bitsize for this
 667      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 668      will broken on those arch support bit insert instruction, like arm, aarch64
 669      etc.  */
 670   if (bitsize + bitnum > unit && bitnum < unit)
 671     {
 672       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 673                "destination object, data truncated into %wu-bit",
 674                bitsize, unit - bitnum);
 675       bitsize = unit - bitnum;
 676     }
 677
 678   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 679      "backwards" from the size of the unit we are inserting into.
 680      Otherwise, we count bits from the most significant on a
 681      BYTES/BITS_BIG_ENDIAN machine.  */
 682
 683   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 684     bitnum = unit - bitsize - bitnum;
 685
 686   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 687   value1 = value;
 688   if (value_mode != op_mode)
 689     {
 690       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 691         {
 692           rtx tmp;
 693           /* Optimization: Don't bother really extending VALUE
 694              if it has all the bits we will actually use.  However,
 695              if we must narrow it, be sure we do it correctly.  */
 696
 697           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 698             {
 699               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 700               if (! tmp)
 701                 tmp = simplify_gen_subreg (op_mode,
 702                                            force_reg (value_mode, value1),
 703                                            value_mode, 0);
 704             }
 705           else
 706             {
 707               if (targetm.mode_rep_extended (op_mode, value_mode) != UNKNOWN)
 708                 tmp = simplify_gen_unary (TRUNCATE, op_mode,
 709                                           value1, value_mode);
 710               else
 711                 {
 712                   tmp = gen_lowpart_if_possible (op_mode, value1);
 713                   if (! tmp)
 714                     tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 715                 }
 716             }
 717           value1 = tmp;
 718         }
 719       else if (CONST_INT_P (value))
 720         value1 = gen_int_mode (INTVAL (value), op_mode);
 721       else
 722         /* Parse phase is supposed to make VALUE's data type
 723            match that of the component reference, which is a type
 724            at least as wide as the field; so VALUE should have
 725            a mode that corresponds to that type.  */
 726         gcc_assert (CONSTANT_P (value));
 727     }
 728
 729   create_fixed_operand (&ops[0], xop0);
 730   create_integer_operand (&ops[1], bitsize);
 731   create_integer_operand (&ops[2], bitnum);
 732   create_input_operand (&ops[3], value1, op_mode);
 733   if (maybe_expand_insn (insv->icode, 4, ops))
 734     {
 735       if (copy_back)
 736         convert_move (op0, xop0, true);
 737       return true;
 738     }
 739   delete_insns_since (last);
 740   return false;
 741 }
 742
 743 /* A subroutine of store_bit_field, with the same arguments.  Return true
 744    if the operation could be implemented.
 745
 746    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 747    no other way of implementing the operation.  If FALLBACK_P is false,
 748    return false instead.
 749
 750    if UNDEFINED_P is true then STR_RTX is undefined and may be set using
 751    a subreg instead.  */
 752
 753 static bool
 754 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 755                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 756                    machine_mode fieldmode,
 757                    rtx value, bool reverse, bool fallback_p, bool undefined_p)
 758 {
 759   rtx op0 = str_rtx;
 760
 761   while (GET_CODE (op0) == SUBREG)
 762     {
 763       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 764       op0 = SUBREG_REG (op0);
 765     }
 766
 767   /* No action is needed if the target is a register and if the field
 768      lies completely outside that register.  This can occur if the source
 769      code contains an out-of-bounds access to a small array.  */
 770   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 771     return true;
 772
 773   /* Use vec_set patterns for inserting parts of vectors whenever
 774      available.  */
 775   machine_mode outermode = GET_MODE (op0);
 776   scalar_mode innermode = GET_MODE_INNER (outermode);
 777   poly_uint64 pos;
 778   if (VECTOR_MODE_P (outermode)
 779       && !MEM_P (op0)
 780       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 781       && fieldmode == innermode
 782       && known_eq (bitsize, GET_MODE_PRECISION (innermode))
 783       && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
 784     {
 785       class expand_operand ops[3];
 786       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 787
 788       create_fixed_operand (&ops[0], op0);
 789       create_input_operand (&ops[1], value, innermode);
 790       create_integer_operand (&ops[2], pos);
 791       if (maybe_expand_insn (icode, 3, ops))
 792         return true;
 793     }
 794
 795   /* If the target is a register, overwriting the entire object, or storing
 796      a full-word or multi-word field can be done with just a SUBREG.  */
 797   if (!MEM_P (op0)
 798       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 799     {
 800       /* Use the subreg machinery either to narrow OP0 to the required
 801          words or to cope with mode punning between equal-sized modes.
 802          In the latter case, use subreg on the rhs side, not lhs.  */
 803       rtx sub;
 804       poly_uint64 bytenum;
 805       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 806       if (known_eq (bitnum, 0U)
 807           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 808         {
 809           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 810           if (sub)
 811             {
 812               if (reverse)
 813                 sub = flip_storage_order (GET_MODE (op0), sub);
 814               emit_move_insn (op0, sub);
 815               return true;
 816             }
 817         }
 818       else if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
 819                && (undefined_p
 820                    || (multiple_p (bitnum, regsize * BITS_PER_UNIT)
 821                        && multiple_p (bitsize, regsize * BITS_PER_UNIT)))
 822                && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
 823         {
 824           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), bytenum);
 825           if (sub)
 826             {
 827               if (reverse)
 828                 value = flip_storage_order (fieldmode, value);
 829               emit_move_insn (sub, value);
 830               return true;
 831             }
 832         }
 833     }
 834
 835   /* If the target is memory, storing any naturally aligned field can be
 836      done with a simple store.  For targets that support fast unaligned
 837      memory, any naturally sized, unit aligned field can be done directly.  */
 838   poly_uint64 bytenum;
 839   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 840     {
 841       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 842       if (reverse)
 843         value = flip_storage_order (fieldmode, value);
 844       emit_move_insn (op0, value);
 845       return true;
 846     }
 847
 848   /* It's possible we'll need to handle other cases here for
 849      polynomial bitnum and bitsize.  */
 850
 851   /* From here on we need to be looking at a fixed-size insertion.  */
 852   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 853   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 854
 855   /* Make sure we are playing with integral modes.  Pun with subregs
 856      if we aren't.  This must come after the entire register case above,
 857      since that case is valid for any mode.  The following cases are only
 858      valid for integral modes.  */
 859   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 860   scalar_int_mode imode;
 861   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 862     {
 863       if (MEM_P (op0))
 864         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 865                                             0, MEM_SIZE (op0));
 866       else if (!op0_mode.exists ())
 867         {
 868           if (ibitnum == 0
 869               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 870               && MEM_P (value)
 871               && !reverse)
 872             {
 873               value = adjust_address (value, GET_MODE (op0), 0);
 874               emit_move_insn (op0, value);
 875               return true;
 876             }
 877           if (!fallback_p)
 878             return false;
 879           rtx temp = assign_stack_temp (GET_MODE (op0),
 880                                         GET_MODE_SIZE (GET_MODE (op0)));
 881           emit_move_insn (temp, op0);
 882           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 883                              reverse, fallback_p, undefined_p);
 884           emit_move_insn (op0, temp);
 885           return true;
 886         }
 887       else
 888         op0 = gen_lowpart (op0_mode.require (), op0);
 889     }
 890
 891   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 892                                    bitregion_start, bitregion_end,
 893                                    fieldmode, value, reverse, fallback_p);
 894 }
 895
 896 /* Subroutine of store_bit_field_1, with the same arguments, except
 897    that BITSIZE and BITNUM are constant.  Handle cases specific to
 898    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 899    otherwise OP0 is a BLKmode MEM.  */
 900
 901 static bool
 902 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 903                           unsigned HOST_WIDE_INT bitsize,
 904                           unsigned HOST_WIDE_INT bitnum,
 905                           poly_uint64 bitregion_start,
 906                           poly_uint64 bitregion_end,
 907                           machine_mode fieldmode,
 908                           rtx value, bool reverse, bool fallback_p)
 909 {
 910   /* Storing an lsb-aligned field in a register
 911      can be done with a movstrict instruction.  */
 912
 913   if (!MEM_P (op0)
 914       && !reverse
 915       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 916       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 917       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 918     {
 919       class expand_operand ops[2];
 920       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 921       rtx arg0 = op0;
 922       unsigned HOST_WIDE_INT subreg_off;
 923
 924       if (GET_CODE (arg0) == SUBREG)
 925         {
 926           /* Else we've got some float mode source being extracted into
 927              a different float mode destination -- this combination of
 928              subregs results in Severe Tire Damage.  */
 929           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 930                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 931                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 932           arg0 = SUBREG_REG (arg0);
 933         }
 934
 935       subreg_off = bitnum / BITS_PER_UNIT;
 936       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
 937           /* STRICT_LOW_PART must have a non-paradoxical subreg as
 938              operand.  */
 939           && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
 940         {
 941           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 942
 943           create_fixed_operand (&ops[0], arg0);
 944           /* Shrink the source operand to FIELDMODE.  */
 945           create_convert_operand_to (&ops[1], value, fieldmode, false);
 946           if (maybe_expand_insn (icode, 2, ops))
 947             return true;
 948         }
 949     }
 950
 951   /* Handle fields bigger than a word.  */
 952
 953   if (bitsize > BITS_PER_WORD)
 954     {
 955       /* Here we transfer the words of the field
 956          in the order least significant first.
 957          This is because the most significant word is the one which may
 958          be less than full.
 959          However, only do that if the value is not BLKmode.  */
 960
 961       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 962       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 963       rtx_insn *last;
 964
 965       /* This is the mode we must force value to, so that there will be enough
 966          subwords to extract.  Note that fieldmode will often (always?) be
 967          VOIDmode, because that is what store_field uses to indicate that this
 968          is a bit field, but passing VOIDmode to operand_subword_force
 969          is not allowed.
 970
 971          The mode must be fixed-size, since insertions into variable-sized
 972          objects are meant to be handled before calling this function.  */
 973       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 974       if (value_mode == VOIDmode)
 975         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 976
 977       last = get_last_insn ();
 978       for (int i = 0; i < nwords; i++)
 979         {
 980           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 981              except maybe for the last iteration.  */
 982           const unsigned HOST_WIDE_INT new_bitsize
 983             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 984           /* Bit offset from the starting bit number in the target.  */
 985           const unsigned int bit_offset
 986             = backwards ^ reverse
 987               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 988               : i * BITS_PER_WORD;
 989           /* Starting word number in the value.  */
 990           const unsigned int wordnum
 991             = backwards
 992               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 993               : i;
 994           /* The chunk of the value in word_mode.  We use bit-field extraction
 995               in BLKmode to handle unaligned memory references and to shift the
 996               last chunk right on big-endian machines if need be.  */
 997           rtx value_word
 998             = fieldmode == BLKmode
 999               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
1000                                    1, NULL_RTX, word_mode, word_mode, false,
1001                                    NULL)
1002               : operand_subword_force (value, wordnum, value_mode);
1003
1004           if (!store_bit_field_1 (op0, new_bitsize,
1005                                   bitnum + bit_offset,
1006                                   bitregion_start, bitregion_end,
1007                                   word_mode,
1008                                   value_word, reverse, fallback_p, false))
1009             {
1010               delete_insns_since (last);
1011               return false;
1012             }
1013         }
1014       return true;
1015     }
1016
1017   /* If VALUE has a floating-point or complex mode, access it as an
1018      integer of the corresponding size.  This can occur on a machine
1019      with 64 bit registers that uses SFmode for float.  It can also
1020      occur for unaligned float or complex fields.  */
1021   rtx orig_value = value;
1022   scalar_int_mode value_mode;
1023   if (GET_MODE (value) == VOIDmode)
1024     /* By this point we've dealt with values that are bigger than a word,
1025        so word_mode is a conservatively correct choice.  */
1026     value_mode = word_mode;
1027   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1028     {
1029       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1030       value = gen_reg_rtx (value_mode);
1031       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1032     }
1033
1034   /* If OP0 is a multi-word register, narrow it to the affected word.
1035      If the region spans two words, defer to store_split_bit_field.
1036      Don't do this if op0 is a single hard register wider than word
1037      such as a float or vector register.  */
1038   if (!MEM_P (op0)
1039       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1040       && (!REG_P (op0)
1041           || !HARD_REGISTER_P (op0)
1042           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1043     {
1044       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1045         {
1046           if (!fallback_p)
1047             return false;
1048
1049           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1050                                  bitregion_start, bitregion_end,
1051                                  value, value_mode, reverse);
1052           return true;
1053         }
1054       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1055                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1056       gcc_assert (op0);
1057       op0_mode = word_mode;
1058       bitnum %= BITS_PER_WORD;
1059     }
1060
1061   /* From here on we can assume that the field to be stored in fits
1062      within a word.  If the destination is a register, it too fits
1063      in a word.  */
1064
1065   extraction_insn insv;
1066   if (!MEM_P (op0)
1067       && !reverse
1068       && get_best_reg_extraction_insn (&insv, EP_insv,
1069                                        GET_MODE_BITSIZE (op0_mode.require ()),
1070                                        fieldmode)
1071       && store_bit_field_using_insv (&insv, op0, op0_mode,
1072                                      bitsize, bitnum, value, value_mode))
1073     return true;
1074
1075   /* If OP0 is a memory, try copying it to a register and seeing if a
1076      cheap register alternative is available.  */
1077   if (MEM_P (op0) && !reverse)
1078     {
1079       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1080                                         fieldmode)
1081           && store_bit_field_using_insv (&insv, op0, op0_mode,
1082                                          bitsize, bitnum, value, value_mode))
1083         return true;
1084
1085       rtx_insn *last = get_last_insn ();
1086
1087       /* Try loading part of OP0 into a register, inserting the bitfield
1088          into that, and then copying the result back to OP0.  */
1089       unsigned HOST_WIDE_INT bitpos;
1090       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1091                                                bitregion_start, bitregion_end,
1092                                                fieldmode, &bitpos);
1093       if (xop0)
1094         {
1095           rtx tempreg = copy_to_reg (xop0);
1096           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1097                                  bitregion_start, bitregion_end,
1098                                  fieldmode, orig_value, reverse, false, false))
1099             {
1100               emit_move_insn (xop0, tempreg);
1101               return true;
1102             }
1103           delete_insns_since (last);
1104         }
1105     }
1106
1107   if (!fallback_p)
1108     return false;
1109
1110   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1111                          bitregion_end, value, value_mode, reverse);
1112   return true;
1113 }
1114
1115 /* Generate code to store value from rtx VALUE
1116    into a bit-field within structure STR_RTX
1117    containing BITSIZE bits starting at bit BITNUM.
1118
1119    BITREGION_START is bitpos of the first bitfield in this region.
1120    BITREGION_END is the bitpos of the ending bitfield in this region.
1121    These two fields are 0, if the C++ memory model does not apply,
1122    or we are not interested in keeping track of bitfield regions.
1123
1124    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1125
1126    If REVERSE is true, the store is to be done in reverse order.
1127
1128    If UNDEFINED_P is true then STR_RTX is currently undefined.  */
1129
1130 void
1131 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1132                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1133                  machine_mode fieldmode,
1134                  rtx value, bool reverse, bool undefined_p)
1135 {
1136   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1137   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1138   scalar_int_mode int_mode;
1139   if (bitsize.is_constant (&ibitsize)
1140       && bitnum.is_constant (&ibitnum)
1141       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1142       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1143                                      bitregion_start, bitregion_end))
1144     {
1145       /* Storing of a full word can be done with a simple store.
1146          We know here that the field can be accessed with one single
1147          instruction.  For targets that support unaligned memory,
1148          an unaligned access may be necessary.  */
1149       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1150         {
1151           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1152                                              ibitnum / BITS_PER_UNIT);
1153           if (reverse)
1154             value = flip_storage_order (int_mode, value);
1155           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1156           emit_move_insn (str_rtx, value);
1157         }
1158       else
1159         {
1160           rtx temp;
1161
1162           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1163                                           ibitnum, &ibitnum);
1164           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1165           temp = copy_to_reg (str_rtx);
1166           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1167                                   int_mode, value, reverse, true, undefined_p))
1168             gcc_unreachable ();
1169
1170           emit_move_insn (str_rtx, temp);
1171         }
1172
1173       return;
1174     }
1175
1176   /* Under the C++0x memory model, we must not touch bits outside the
1177      bit region.  Adjust the address to start at the beginning of the
1178      bit region.  */
1179   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1180     {
1181       scalar_int_mode best_mode;
1182       machine_mode addr_mode = VOIDmode;
1183
1184       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1185       bitnum -= bitregion_start;
1186       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1187       bitregion_end -= bitregion_start;
1188       bitregion_start = 0;
1189       if (bitsize.is_constant (&ibitsize)
1190           && bitnum.is_constant (&ibitnum)
1191           && get_best_mode (ibitsize, ibitnum,
1192                             bitregion_start, bitregion_end,
1193                             MEM_ALIGN (str_rtx), INT_MAX,
1194                             MEM_VOLATILE_P (str_rtx), &best_mode))
1195         addr_mode = best_mode;
1196       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1197                                               offset, size);
1198     }
1199
1200   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1201                           bitregion_start, bitregion_end,
1202                           fieldmode, value, reverse, true, undefined_p))
1203     gcc_unreachable ();
1204 }
1205 \f
1206 /* Use shifts and boolean operations to store VALUE into a bit field of
1207    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1208    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1209    the mode of VALUE.
1210
1211    If REVERSE is true, the store is to be done in reverse order.  */
1212
1213 static void
1214 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1215                        unsigned HOST_WIDE_INT bitsize,
1216                        unsigned HOST_WIDE_INT bitnum,
1217                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1218                        rtx value, scalar_int_mode value_mode, bool reverse)
1219 {
1220   /* There is a case not handled here:
1221      a structure with a known alignment of just a halfword
1222      and a field split across two aligned halfwords within the structure.
1223      Or likewise a structure with a known alignment of just a byte
1224      and a field split across two bytes.
1225      Such cases are not supposed to be able to occur.  */
1226
1227   scalar_int_mode best_mode;
1228   if (MEM_P (op0))
1229     {
1230       unsigned int max_bitsize = BITS_PER_WORD;
1231       scalar_int_mode imode;
1232       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1233         max_bitsize = GET_MODE_BITSIZE (imode);
1234
1235       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1236                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1237                           &best_mode))
1238         {
1239           /* The only way this should occur is if the field spans word
1240              boundaries.  */
1241           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1242                                  bitregion_start, bitregion_end,
1243                                  value, value_mode, reverse);
1244           return;
1245         }
1246
1247       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1248     }
1249   else
1250     best_mode = op0_mode.require ();
1251
1252   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1253                            value, value_mode, reverse);
1254 }
1255
1256 /* Helper function for store_fixed_bit_field, stores
1257    the bit field always using MODE, which is the mode of OP0.  The other
1258    arguments are as for store_fixed_bit_field.  */
1259
1260 static void
1261 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1262                          unsigned HOST_WIDE_INT bitsize,
1263                          unsigned HOST_WIDE_INT bitnum,
1264                          rtx value, scalar_int_mode value_mode, bool reverse)
1265 {
1266   rtx temp;
1267   int all_zero = 0;
1268   int all_one = 0;
1269
1270   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1271      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1272
1273   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1274     /* BITNUM is the distance between our msb
1275        and that of the containing datum.
1276        Convert it to the distance from the lsb.  */
1277     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1278
1279   /* Now BITNUM is always the distance between our lsb
1280      and that of OP0.  */
1281
1282   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1283      we must first convert its mode to MODE.  */
1284
1285   if (CONST_INT_P (value))
1286     {
1287       unsigned HOST_WIDE_INT v = UINTVAL (value);
1288
1289       if (bitsize < HOST_BITS_PER_WIDE_INT)
1290         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1291
1292       if (v == 0)
1293         all_zero = 1;
1294       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1295                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1296                || (bitsize == HOST_BITS_PER_WIDE_INT
1297                    && v == HOST_WIDE_INT_M1U))
1298         all_one = 1;
1299
1300       value = lshift_value (mode, v, bitnum);
1301     }
1302   else
1303     {
1304       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1305                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1306
1307       if (value_mode != mode)
1308         value = convert_to_mode (mode, value, 1);
1309
1310       if (must_and)
1311         value = expand_binop (mode, and_optab, value,
1312                               mask_rtx (mode, 0, bitsize, 0),
1313                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1314       if (bitnum > 0)
1315         value = expand_shift (LSHIFT_EXPR, mode, value,
1316                               bitnum, NULL_RTX, 1);
1317     }
1318
1319   if (reverse)
1320     value = flip_storage_order (mode, value);
1321
1322   /* Now clear the chosen bits in OP0,
1323      except that if VALUE is -1 we need not bother.  */
1324   /* We keep the intermediates in registers to allow CSE to combine
1325      consecutive bitfield assignments.  */
1326
1327   temp = force_reg (mode, op0);
1328
1329   if (! all_one)
1330     {
1331       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1332       if (reverse)
1333         mask = flip_storage_order (mode, mask);
1334       temp = expand_binop (mode, and_optab, temp, mask,
1335                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1336       temp = force_reg (mode, temp);
1337     }
1338
1339   /* Now logical-or VALUE into OP0, unless it is zero.  */
1340
1341   if (! all_zero)
1342     {
1343       temp = expand_binop (mode, ior_optab, temp, value,
1344                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1345       temp = force_reg (mode, temp);
1346     }
1347
1348   if (op0 != temp)
1349     {
1350       op0 = copy_rtx (op0);
1351       emit_move_insn (op0, temp);
1352     }
1353 }
1354 \f
1355 /* Store a bit field that is split across multiple accessible memory objects.
1356
1357    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1358    BITSIZE is the field width; BITPOS the position of its first bit
1359    (within the word).
1360    VALUE is the value to store, which has mode VALUE_MODE.
1361    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1362    a BLKmode MEM.
1363
1364    If REVERSE is true, the store is to be done in reverse order.
1365
1366    This does not yet handle fields wider than BITS_PER_WORD.  */
1367
1368 static void
1369 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1370                        unsigned HOST_WIDE_INT bitsize,
1371                        unsigned HOST_WIDE_INT bitpos,
1372                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1373                        rtx value, scalar_int_mode value_mode, bool reverse)
1374 {
1375   unsigned int unit, total_bits, bitsdone = 0;
1376
1377   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1378      much at a time.  */
1379   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1380     unit = BITS_PER_WORD;
1381   else
1382     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1383
1384   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1385      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1386      again, and we will mutually recurse forever.  */
1387   if (MEM_P (op0) && op0_mode.exists ())
1388     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1389
1390   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1391      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1392      that VALUE might be a floating-point constant.  */
1393   if (CONSTANT_P (value) && !CONST_INT_P (value))
1394     {
1395       rtx word = gen_lowpart_common (word_mode, value);
1396
1397       if (word && (value != word))
1398         value = word;
1399       else
1400         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1401       value_mode = word_mode;
1402     }
1403
1404   total_bits = GET_MODE_BITSIZE (value_mode);
1405
1406   while (bitsdone < bitsize)
1407     {
1408       unsigned HOST_WIDE_INT thissize;
1409       unsigned HOST_WIDE_INT thispos;
1410       unsigned HOST_WIDE_INT offset;
1411       rtx part;
1412
1413       offset = (bitpos + bitsdone) / unit;
1414       thispos = (bitpos + bitsdone) % unit;
1415
1416       /* When region of bytes we can touch is restricted, decrease
1417          UNIT close to the end of the region as needed.  If op0 is a REG
1418          or SUBREG of REG, don't do this, as there can't be data races
1419          on a register and we can expand shorter code in some cases.  */
1420       if (maybe_ne (bitregion_end, 0U)
1421           && unit > BITS_PER_UNIT
1422           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1423           && !REG_P (op0)
1424           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1425         {
1426           unit = unit / 2;
1427           continue;
1428         }
1429
1430       /* THISSIZE must not overrun a word boundary.  Otherwise,
1431          store_fixed_bit_field will call us again, and we will mutually
1432          recurse forever.  */
1433       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1434       thissize = MIN (thissize, unit - thispos);
1435
1436       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1437         {
1438           /* Fetch successively less significant portions.  */
1439           if (CONST_INT_P (value))
1440             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1441                              >> (bitsize - bitsdone - thissize))
1442                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1443           /* Likewise, but the source is little-endian.  */
1444           else if (reverse)
1445             part = extract_fixed_bit_field (word_mode, value, value_mode,
1446                                             thissize,
1447                                             bitsize - bitsdone - thissize,
1448                                             NULL_RTX, 1, false);
1449           else
1450             /* The args are chosen so that the last part includes the
1451                lsb.  Give extract_bit_field the value it needs (with
1452                endianness compensation) to fetch the piece we want.  */
1453             part = extract_fixed_bit_field (word_mode, value, value_mode,
1454                                             thissize,
1455                                             total_bits - bitsize + bitsdone,
1456                                             NULL_RTX, 1, false);
1457         }
1458       else
1459         {
1460           /* Fetch successively more significant portions.  */
1461           if (CONST_INT_P (value))
1462             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1463                              >> bitsdone)
1464                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1465           /* Likewise, but the source is big-endian.  */
1466           else if (reverse)
1467             part = extract_fixed_bit_field (word_mode, value, value_mode,
1468                                             thissize,
1469                                             total_bits - bitsdone - thissize,
1470                                             NULL_RTX, 1, false);
1471           else
1472             part = extract_fixed_bit_field (word_mode, value, value_mode,
1473                                             thissize, bitsdone, NULL_RTX,
1474                                             1, false);
1475         }
1476
1477       /* If OP0 is a register, then handle OFFSET here.  */
1478       rtx op0_piece = op0;
1479       opt_scalar_int_mode op0_piece_mode = op0_mode;
1480       if (SUBREG_P (op0) || REG_P (op0))
1481         {
1482           scalar_int_mode imode;
1483           if (op0_mode.exists (&imode)
1484               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1485             {
1486               if (offset)
1487                 op0_piece = const0_rtx;
1488             }
1489           else
1490             {
1491               op0_piece = operand_subword_force (op0,
1492                                                  offset * unit / BITS_PER_WORD,
1493                                                  GET_MODE (op0));
1494               op0_piece_mode = word_mode;
1495             }
1496           offset &= BITS_PER_WORD / unit - 1;
1497         }
1498
1499       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1500          it is just an out-of-bounds access.  Ignore it.  */
1501       if (op0_piece != const0_rtx)
1502         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1503                                offset * unit + thispos, bitregion_start,
1504                                bitregion_end, part, word_mode, reverse);
1505       bitsdone += thissize;
1506     }
1507 }
1508 \f
1509 /* A subroutine of extract_bit_field_1 that converts return value X
1510    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1511    to extract_bit_field.  */
1512
1513 static rtx
1514 convert_extracted_bit_field (rtx x, machine_mode mode,
1515                              machine_mode tmode, bool unsignedp)
1516 {
1517   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1518     return x;
1519
1520   /* If the x mode is not a scalar integral, first convert to the
1521      integer mode of that size and then access it as a floating-point
1522      value via a SUBREG.  */
1523   if (!SCALAR_INT_MODE_P (tmode))
1524     {
1525       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1526       x = convert_to_mode (int_mode, x, unsignedp);
1527       x = force_reg (int_mode, x);
1528       return gen_lowpart (tmode, x);
1529     }
1530
1531   return convert_to_mode (tmode, x, unsignedp);
1532 }
1533
1534 /* Try to use an ext(z)v pattern to extract a field from OP0.
1535    Return the extracted value on success, otherwise return null.
1536    EXTV describes the extraction instruction to use.  If OP0_MODE
1537    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1538    The other arguments are as for extract_bit_field.  */
1539
1540 static rtx
1541 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1542                               opt_scalar_int_mode op0_mode,
1543                               unsigned HOST_WIDE_INT bitsize,
1544                               unsigned HOST_WIDE_INT bitnum,
1545                               int unsignedp, rtx target,
1546                               machine_mode mode, machine_mode tmode)
1547 {
1548   class expand_operand ops[4];
1549   rtx spec_target = target;
1550   rtx spec_target_subreg = 0;
1551   scalar_int_mode ext_mode = extv->field_mode;
1552   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1553
1554   if (bitsize == 0 || unit < bitsize)
1555     return NULL_RTX;
1556
1557   if (MEM_P (op0))
1558     /* Get a reference to the first byte of the field.  */
1559     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1560                                 &bitnum);
1561   else
1562     {
1563       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1564       if (BYTES_BIG_ENDIAN)
1565         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1566
1567       /* If op0 is a register, we need it in EXT_MODE to make it
1568          acceptable to the format of ext(z)v.  */
1569       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1570         return NULL_RTX;
1571       if (REG_P (op0) && op0_mode.require () != ext_mode)
1572         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1573     }
1574
1575   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1576      "backwards" from the size of the unit we are extracting from.
1577      Otherwise, we count bits from the most significant on a
1578      BYTES/BITS_BIG_ENDIAN machine.  */
1579
1580   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1581     bitnum = unit - bitsize - bitnum;
1582
1583   if (target == 0)
1584     target = spec_target = gen_reg_rtx (tmode);
1585
1586   if (GET_MODE (target) != ext_mode)
1587     {
1588       rtx temp;
1589       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1590          between the mode of the extraction (word_mode) and the target
1591          mode.  Instead, create a temporary and use convert_move to set
1592          the target.  */
1593       if (REG_P (target)
1594           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1595           && (temp = gen_lowpart_if_possible (ext_mode, target)))
1596         {
1597           target = temp;
1598           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1599             spec_target_subreg = target;
1600         }
1601       else
1602         target = gen_reg_rtx (ext_mode);
1603     }
1604
1605   create_output_operand (&ops[0], target, ext_mode);
1606   create_fixed_operand (&ops[1], op0);
1607   create_integer_operand (&ops[2], bitsize);
1608   create_integer_operand (&ops[3], bitnum);
1609   if (maybe_expand_insn (extv->icode, 4, ops))
1610     {
1611       target = ops[0].value;
1612       if (target == spec_target)
1613         return target;
1614       if (target == spec_target_subreg)
1615         return spec_target;
1616       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1617     }
1618   return NULL_RTX;
1619 }
1620
1621 /* See whether it would be valid to extract the part of OP0 with
1622    mode OP0_MODE described by BITNUM and BITSIZE into a value of
1623    mode MODE using a subreg operation.
1624    Return the subreg if so, otherwise return null.  */
1625
1626 static rtx
1627 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1628                              machine_mode op0_mode,
1629                              poly_uint64 bitsize, poly_uint64 bitnum)
1630 {
1631   poly_uint64 bytenum;
1632   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1633       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1634       && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1635       && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1636     return simplify_gen_subreg (mode, op0, op0_mode, bytenum);
1637   return NULL_RTX;
1638 }
1639
1640 /* A subroutine of extract_bit_field, with the same arguments.
1641    If UNSIGNEDP is -1, the result need not be sign or zero extended.
1642    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1643    if we can find no other means of implementing the operation.
1644    if FALLBACK_P is false, return NULL instead.  */
1645
1646 static rtx
1647 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1648                      int unsignedp, rtx target, machine_mode mode,
1649                      machine_mode tmode, bool reverse, bool fallback_p,
1650                      rtx *alt_rtl)
1651 {
1652   rtx op0 = str_rtx;
1653   machine_mode mode1;
1654
1655   if (tmode == VOIDmode)
1656     tmode = mode;
1657
1658   while (GET_CODE (op0) == SUBREG)
1659     {
1660       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1661       op0 = SUBREG_REG (op0);
1662     }
1663
1664   /* If we have an out-of-bounds access to a register, just return an
1665      uninitialized register of the required mode.  This can occur if the
1666      source code contains an out-of-bounds access to a small array.  */
1667   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1668     return gen_reg_rtx (tmode);
1669
1670   if (REG_P (op0)
1671       && mode == GET_MODE (op0)
1672       && known_eq (bitnum, 0U)
1673       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1674     {
1675       if (reverse)
1676         op0 = flip_storage_order (mode, op0);
1677       /* We're trying to extract a full register from itself.  */
1678       return op0;
1679     }
1680
1681   /* First try to check for vector from vector extractions.  */
1682   if (VECTOR_MODE_P (GET_MODE (op0))
1683       && !MEM_P (op0)
1684       && VECTOR_MODE_P (tmode)
1685       && known_eq (bitsize, GET_MODE_PRECISION (tmode))
1686       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1687     {
1688       machine_mode new_mode = GET_MODE (op0);
1689       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1690         {
1691           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1692           poly_uint64 nunits;
1693           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1694                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1695               || !related_vector_mode (tmode, inner_mode,
1696                                        nunits).exists (&new_mode)
1697               || maybe_ne (GET_MODE_SIZE (new_mode),
1698                            GET_MODE_SIZE (GET_MODE (op0))))
1699             new_mode = VOIDmode;
1700         }
1701       poly_uint64 pos;
1702       if (new_mode != VOIDmode
1703           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1704               != CODE_FOR_nothing)
1705           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1706         {
1707           class expand_operand ops[3];
1708           machine_mode outermode = new_mode;
1709           machine_mode innermode = tmode;
1710           enum insn_code icode
1711             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1712
1713           if (new_mode != GET_MODE (op0))
1714             op0 = gen_lowpart (new_mode, op0);
1715           create_output_operand (&ops[0], target, innermode);
1716           ops[0].target = 1;
1717           create_input_operand (&ops[1], op0, outermode);
1718           create_integer_operand (&ops[2], pos);
1719           if (maybe_expand_insn (icode, 3, ops))
1720             {
1721               if (alt_rtl && ops[0].target)
1722                 *alt_rtl = target;
1723               target = ops[0].value;
1724               if (GET_MODE (target) != mode)
1725                 return gen_lowpart (tmode, target);
1726               return target;
1727             }
1728         }
1729     }
1730
1731   /* See if we can get a better vector mode before extracting.  */
1732   if (VECTOR_MODE_P (GET_MODE (op0))
1733       && !MEM_P (op0)
1734       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1735     {
1736       machine_mode new_mode;
1737
1738       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1739         new_mode = MIN_MODE_VECTOR_FLOAT;
1740       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1741         new_mode = MIN_MODE_VECTOR_FRACT;
1742       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1743         new_mode = MIN_MODE_VECTOR_UFRACT;
1744       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1745         new_mode = MIN_MODE_VECTOR_ACCUM;
1746       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1747         new_mode = MIN_MODE_VECTOR_UACCUM;
1748       else
1749         new_mode = MIN_MODE_VECTOR_INT;
1750
1751       FOR_EACH_MODE_FROM (new_mode, new_mode)
1752         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1753             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1754             && known_eq (bitsize, GET_MODE_UNIT_PRECISION (new_mode))
1755             && multiple_p (bitnum, GET_MODE_UNIT_PRECISION (new_mode))
1756             && targetm.vector_mode_supported_p (new_mode)
1757             && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1758           break;
1759       if (new_mode != VOIDmode)
1760         op0 = gen_lowpart (new_mode, op0);
1761     }
1762
1763   /* Use vec_extract patterns for extracting parts of vectors whenever
1764      available.  If that fails, see whether the current modes and bitregion
1765      give a natural subreg.  */
1766   machine_mode outermode = GET_MODE (op0);
1767   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1768     {
1769       scalar_mode innermode = GET_MODE_INNER (outermode);
1770
1771       enum insn_code icode
1772         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1773
1774       poly_uint64 pos;
1775       if (icode != CODE_FOR_nothing
1776           && known_eq (bitsize, GET_MODE_PRECISION (innermode))
1777           && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
1778         {
1779           class expand_operand ops[3];
1780
1781           create_output_operand (&ops[0], target,
1782                                  insn_data[icode].operand[0].mode);
1783           ops[0].target = 1;
1784           create_input_operand (&ops[1], op0, outermode);
1785           create_integer_operand (&ops[2], pos);
1786           if (maybe_expand_insn (icode, 3, ops))
1787             {
1788               if (alt_rtl && ops[0].target)
1789                 *alt_rtl = target;
1790               target = ops[0].value;
1791               if (GET_MODE (target) != mode)
1792                 return gen_lowpart (tmode, target);
1793               return target;
1794             }
1795         }
1796       /* Using subregs is useful if we're extracting one register vector
1797          from a multi-register vector.  extract_bit_field_as_subreg checks
1798          for valid bitsize and bitnum, so we don't need to do that here.  */
1799       if (VECTOR_MODE_P (mode))
1800         {
1801           rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1802                                                  bitsize, bitnum);
1803           if (sub)
1804             return sub;
1805         }
1806     }
1807
1808   /* Make sure we are playing with integral modes.  Pun with subregs
1809      if we aren't.  */
1810   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1811   scalar_int_mode imode;
1812   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1813     {
1814       if (MEM_P (op0))
1815         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1816                                             0, MEM_SIZE (op0));
1817       else if (op0_mode.exists (&imode))
1818         {
1819           op0 = gen_lowpart (imode, op0);
1820
1821           /* If we got a SUBREG, force it into a register since we
1822              aren't going to be able to do another SUBREG on it.  */
1823           if (GET_CODE (op0) == SUBREG)
1824             op0 = force_reg (imode, op0);
1825         }
1826       else
1827         {
1828           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1829           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1830           emit_move_insn (mem, op0);
1831           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1832         }
1833     }
1834
1835   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1836      If that's wrong, the solution is to test for it and set TARGET to 0
1837      if needed.  */
1838
1839   /* Get the mode of the field to use for atomic access or subreg
1840      conversion.  */
1841   if (!SCALAR_INT_MODE_P (tmode)
1842       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1843     mode1 = mode;
1844   gcc_assert (mode1 != BLKmode);
1845
1846   /* Extraction of a full MODE1 value can be done with a subreg as long
1847      as the least significant bit of the value is the least significant
1848      bit of either OP0 or a word of OP0.  */
1849   if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1850     {
1851       rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1852                                              bitsize, bitnum);
1853       if (sub)
1854         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1855     }
1856
1857   /* Extraction of a full MODE1 value can be done with a load as long as
1858      the field is on a byte boundary and is sufficiently aligned.  */
1859   poly_uint64 bytenum;
1860   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1861     {
1862       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1863       if (reverse)
1864         op0 = flip_storage_order (mode1, op0);
1865       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1866     }
1867
1868   /* If we have a memory source and a non-constant bit offset, restrict
1869      the memory to the referenced bytes.  This is a worst-case fallback
1870      but is useful for things like vector booleans.  */
1871   if (MEM_P (op0) && !bitnum.is_constant ())
1872     {
1873       bytenum = bits_to_bytes_round_down (bitnum);
1874       bitnum = num_trailing_bits (bitnum);
1875       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1876       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1877       op0_mode = opt_scalar_int_mode ();
1878     }
1879
1880   /* It's possible we'll need to handle other cases here for
1881      polynomial bitnum and bitsize.  */
1882
1883   /* From here on we need to be looking at a fixed-size insertion.  */
1884   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1885                                      bitnum.to_constant (), unsignedp,
1886                                      target, mode, tmode, reverse, fallback_p);
1887 }
1888
1889 /* Subroutine of extract_bit_field_1, with the same arguments, except
1890    that BITSIZE and BITNUM are constant.  Handle cases specific to
1891    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1892    otherwise OP0 is a BLKmode MEM.  */
1893
1894 static rtx
1895 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1896                             unsigned HOST_WIDE_INT bitsize,
1897                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1898                             rtx target, machine_mode mode, machine_mode tmode,
1899                             bool reverse, bool fallback_p)
1900 {
1901   /* Handle fields bigger than a word.  */
1902
1903   if (bitsize > BITS_PER_WORD)
1904     {
1905       /* Here we transfer the words of the field
1906          in the order least significant first.
1907          This is because the most significant word is the one which may
1908          be less than full.  */
1909
1910       const bool backwards = WORDS_BIG_ENDIAN;
1911       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1912       unsigned int i;
1913       rtx_insn *last;
1914
1915       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1916         target = gen_reg_rtx (mode);
1917
1918       /* In case we're about to clobber a base register or something
1919          (see gcc.c-torture/execute/20040625-1.c).   */
1920       if (reg_mentioned_p (target, op0))
1921         target = gen_reg_rtx (mode);
1922
1923       /* Indicate for flow that the entire target reg is being set.  */
1924       emit_clobber (target);
1925
1926       /* The mode must be fixed-size, since extract_bit_field_1 handles
1927          extractions from variable-sized objects before calling this
1928          function.  */
1929       unsigned int target_size
1930         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1931       last = get_last_insn ();
1932       for (i = 0; i < nwords; i++)
1933         {
1934           /* If I is 0, use the low-order word in both field and target;
1935              if I is 1, use the next to lowest word; and so on.  */
1936           /* Word number in TARGET to use.  */
1937           unsigned int wordnum
1938             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1939           /* Offset from start of field in OP0.  */
1940           unsigned int bit_offset = (backwards ^ reverse
1941                                      ? MAX ((int) bitsize - ((int) i + 1)
1942                                             * BITS_PER_WORD,
1943                                             0)
1944                                      : (int) i * BITS_PER_WORD);
1945           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1946           rtx result_part
1947             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1948                                              bitsize - i * BITS_PER_WORD),
1949                                    bitnum + bit_offset,
1950                                    (unsignedp ? 1 : -1), target_part,
1951                                    mode, word_mode, reverse, fallback_p, NULL);
1952
1953           gcc_assert (target_part);
1954           if (!result_part)
1955             {
1956               delete_insns_since (last);
1957               return NULL;
1958             }
1959
1960           if (result_part != target_part)
1961             emit_move_insn (target_part, result_part);
1962         }
1963
1964       if (unsignedp)
1965         {
1966           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1967              need to be zero'd out.  */
1968           if (target_size > nwords * UNITS_PER_WORD)
1969             {
1970               unsigned int i, total_words;
1971
1972               total_words = target_size / UNITS_PER_WORD;
1973               for (i = nwords; i < total_words; i++)
1974                 emit_move_insn
1975                   (operand_subword (target,
1976                                     backwards ? total_words - i - 1 : i,
1977                                     1, VOIDmode),
1978                    const0_rtx);
1979             }
1980           return target;
1981         }
1982
1983       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1984       target = expand_shift (LSHIFT_EXPR, mode, target,
1985                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1986       return expand_shift (RSHIFT_EXPR, mode, target,
1987                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1988     }
1989
1990   /* If OP0 is a multi-word register, narrow it to the affected word.
1991      If the region spans two words, defer to extract_split_bit_field.  */
1992   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1993     {
1994       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1995         {
1996           if (!fallback_p)
1997             return NULL_RTX;
1998           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1999                                             unsignedp, reverse);
2000           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
2001         }
2002       /* If OP0 is a hard register, copy it to a pseudo before calling
2003          simplify_gen_subreg.  */
2004       if (REG_P (op0) && HARD_REGISTER_P (op0))
2005         op0 = copy_to_reg (op0);
2006       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
2007                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
2008       op0_mode = word_mode;
2009       bitnum %= BITS_PER_WORD;
2010     }
2011
2012   /* From here on we know the desired field is smaller than a word.
2013      If OP0 is a register, it too fits within a word.  */
2014   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
2015   extraction_insn extv;
2016   if (!MEM_P (op0)
2017       && !reverse
2018       /* ??? We could limit the structure size to the part of OP0 that
2019          contains the field, with appropriate checks for endianness
2020          and TARGET_TRULY_NOOP_TRUNCATION.  */
2021       && get_best_reg_extraction_insn (&extv, pattern,
2022                                        GET_MODE_BITSIZE (op0_mode.require ()),
2023                                        tmode))
2024     {
2025       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2026                                                  bitsize, bitnum,
2027                                                  unsignedp, target, mode,
2028                                                  tmode);
2029       if (result)
2030         return result;
2031     }
2032
2033   /* If OP0 is a memory, try copying it to a register and seeing if a
2034      cheap register alternative is available.  */
2035   if (MEM_P (op0) & !reverse)
2036     {
2037       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2038                                         tmode))
2039         {
2040           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2041                                                      bitsize, bitnum,
2042                                                      unsignedp, target, mode,
2043                                                      tmode);
2044           if (result)
2045             return result;
2046         }
2047
2048       rtx_insn *last = get_last_insn ();
2049
2050       /* Try loading part of OP0 into a register and extracting the
2051          bitfield from that.  */
2052       unsigned HOST_WIDE_INT bitpos;
2053       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2054                                                0, 0, tmode, &bitpos);
2055       if (xop0)
2056         {
2057           xop0 = copy_to_reg (xop0);
2058           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2059                                             unsignedp, target,
2060                                             mode, tmode, reverse, false, NULL);
2061           if (result)
2062             return result;
2063           delete_insns_since (last);
2064         }
2065     }
2066
2067   if (!fallback_p)
2068     return NULL;
2069
2070   /* Find a correspondingly-sized integer field, so we can apply
2071      shifts and masks to it.  */
2072   scalar_int_mode int_mode;
2073   if (!int_mode_for_mode (tmode).exists (&int_mode))
2074     /* If this fails, we should probably push op0 out to memory and then
2075        do a load.  */
2076     int_mode = int_mode_for_mode (mode).require ();
2077
2078   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2079                                     bitnum, target, unsignedp, reverse);
2080
2081   /* Complex values must be reversed piecewise, so we need to undo the global
2082      reversal, convert to the complex mode and reverse again.  */
2083   if (reverse && COMPLEX_MODE_P (tmode))
2084     {
2085       target = flip_storage_order (int_mode, target);
2086       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2087       target = flip_storage_order (tmode, target);
2088     }
2089   else
2090     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2091
2092   return target;
2093 }
2094
2095 /* Generate code to extract a byte-field from STR_RTX
2096    containing BITSIZE bits, starting at BITNUM,
2097    and put it in TARGET if possible (if TARGET is nonzero).
2098    Regardless of TARGET, we return the rtx for where the value is placed.
2099
2100    STR_RTX is the structure containing the byte (a REG or MEM).
2101    UNSIGNEDP is nonzero if this is an unsigned bit field.
2102    MODE is the natural mode of the field value once extracted.
2103    TMODE is the mode the caller would like the value to have;
2104    but the value may be returned with type MODE instead.
2105
2106    If REVERSE is true, the extraction is to be done in reverse order.
2107
2108    If a TARGET is specified and we can store in it at no extra cost,
2109    we do so, and return TARGET.
2110    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2111    if they are equally easy.
2112
2113    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2114    then *ALT_RTL is set to TARGET (before legitimziation).  */
2115
2116 rtx
2117 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2118                    int unsignedp, rtx target, machine_mode mode,
2119                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2120 {
2121   machine_mode mode1;
2122
2123   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2124   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2125     mode1 = GET_MODE (str_rtx);
2126   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2127     mode1 = GET_MODE (target);
2128   else
2129     mode1 = tmode;
2130
2131   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2132   scalar_int_mode int_mode;
2133   if (bitsize.is_constant (&ibitsize)
2134       && bitnum.is_constant (&ibitnum)
2135       && is_a <scalar_int_mode> (mode1, &int_mode)
2136       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2137                                      int_mode, 0, 0))
2138     {
2139       /* Extraction of a full INT_MODE value can be done with a simple load.
2140          We know here that the field can be accessed with one single
2141          instruction.  For targets that support unaligned memory,
2142          an unaligned access may be necessary.  */
2143       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2144         {
2145           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2146                                                 ibitnum / BITS_PER_UNIT);
2147           if (reverse)
2148             result = flip_storage_order (int_mode, result);
2149           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2150           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2151         }
2152
2153       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2154                                       &ibitnum);
2155       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2156       str_rtx = copy_to_reg (str_rtx);
2157       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2158                                   target, mode, tmode, reverse, true, alt_rtl);
2159     }
2160
2161   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2162                               target, mode, tmode, reverse, true, alt_rtl);
2163 }
2164 \f
2165 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2166    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2167    otherwise OP0 is a BLKmode MEM.
2168
2169    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2170    If REVERSE is true, the extraction is to be done in reverse order.
2171
2172    If TARGET is nonzero, attempts to store the value there
2173    and return TARGET, but this is not guaranteed.
2174    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2175
2176 static rtx
2177 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2178                          opt_scalar_int_mode op0_mode,
2179                          unsigned HOST_WIDE_INT bitsize,
2180                          unsigned HOST_WIDE_INT bitnum, rtx target,
2181                          int unsignedp, bool reverse)
2182 {
2183   scalar_int_mode mode;
2184   if (MEM_P (op0))
2185     {
2186       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2187                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2188         /* The only way this should occur is if the field spans word
2189            boundaries.  */
2190         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2191                                         unsignedp, reverse);
2192
2193       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2194     }
2195   else
2196     mode = op0_mode.require ();
2197
2198   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2199                                     target, unsignedp, reverse);
2200 }
2201
2202 /* Helper function for extract_fixed_bit_field, extracts
2203    the bit field always using MODE, which is the mode of OP0.
2204    If UNSIGNEDP is -1, the result need not be sign or zero extended.
2205    The other arguments are as for extract_fixed_bit_field.  */
2206
2207 static rtx
2208 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2209                            unsigned HOST_WIDE_INT bitsize,
2210                            unsigned HOST_WIDE_INT bitnum, rtx target,
2211                            int unsignedp, bool reverse)
2212 {
2213   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2214      for invalid input, such as extract equivalent of f5 from
2215      gcc.dg/pr48335-2.c.  */
2216
2217   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2218     /* BITNUM is the distance between our msb and that of OP0.
2219        Convert it to the distance from the lsb.  */
2220     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2221
2222   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2223      We have reduced the big-endian case to the little-endian case.  */
2224   if (reverse)
2225     op0 = flip_storage_order (mode, op0);
2226
2227   if (unsignedp)
2228     {
2229       if (bitnum)
2230         {
2231           /* If the field does not already start at the lsb,
2232              shift it so it does.  */
2233           /* Maybe propagate the target for the shift.  */
2234           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2235           if (tmode != mode)
2236             subtarget = 0;
2237           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2238         }
2239       /* Convert the value to the desired mode.  TMODE must also be a
2240          scalar integer for this conversion to make sense, since we
2241          shouldn't reinterpret the bits.  */
2242       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2243       if (mode != new_mode)
2244         op0 = convert_to_mode (new_mode, op0, 1);
2245
2246       /* Unless the msb of the field used to be the msb when we shifted,
2247          mask out the upper bits.  */
2248
2249       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize
2250           && unsignedp != -1)
2251         return expand_binop (new_mode, and_optab, op0,
2252                              mask_rtx (new_mode, 0, bitsize, 0),
2253                              target, 1, OPTAB_LIB_WIDEN);
2254       return op0;
2255     }
2256
2257   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2258      then arithmetic-shift its lsb to the lsb of the word.  */
2259   op0 = force_reg (mode, op0);
2260
2261   /* Find the narrowest integer mode that contains the field.  */
2262
2263   opt_scalar_int_mode mode_iter;
2264   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2265     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2266       break;
2267
2268   mode = mode_iter.require ();
2269   op0 = convert_to_mode (mode, op0, 0);
2270
2271   if (mode != tmode)
2272     target = 0;
2273
2274   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2275     {
2276       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2277       /* Maybe propagate the target for the shift.  */
2278       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2279       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2280     }
2281
2282   return expand_shift (RSHIFT_EXPR, mode, op0,
2283                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2284 }
2285
2286 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2287    VALUE << BITPOS.  */
2288
2289 static rtx
2290 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2291               int bitpos)
2292 {
2293   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2294 }
2295 \f
2296 /* Extract a bit field that is split across two words
2297    and return an RTX for the result.
2298
2299    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2300    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2301    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2302    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2303    a BLKmode MEM.
2304
2305    If REVERSE is true, the extraction is to be done in reverse order.  */
2306
2307 static rtx
2308 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2309                          unsigned HOST_WIDE_INT bitsize,
2310                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2311                          bool reverse)
2312 {
2313   unsigned int unit;
2314   unsigned int bitsdone = 0;
2315   rtx result = NULL_RTX;
2316   int first = 1;
2317
2318   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2319      much at a time.  */
2320   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2321     unit = BITS_PER_WORD;
2322   else
2323     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2324
2325   while (bitsdone < bitsize)
2326     {
2327       unsigned HOST_WIDE_INT thissize;
2328       rtx part;
2329       unsigned HOST_WIDE_INT thispos;
2330       unsigned HOST_WIDE_INT offset;
2331
2332       offset = (bitpos + bitsdone) / unit;
2333       thispos = (bitpos + bitsdone) % unit;
2334
2335       /* THISSIZE must not overrun a word boundary.  Otherwise,
2336          extract_fixed_bit_field will call us again, and we will mutually
2337          recurse forever.  */
2338       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2339       thissize = MIN (thissize, unit - thispos);
2340
2341       /* If OP0 is a register, then handle OFFSET here.  */
2342       rtx op0_piece = op0;
2343       opt_scalar_int_mode op0_piece_mode = op0_mode;
2344       if (SUBREG_P (op0) || REG_P (op0))
2345         {
2346           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2347           op0_piece_mode = word_mode;
2348           offset = 0;
2349         }
2350
2351       /* Extract the parts in bit-counting order,
2352          whose meaning is determined by BYTES_PER_UNIT.
2353          OFFSET is in UNITs, and UNIT is in bits.  */
2354       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2355                                       thissize, offset * unit + thispos,
2356                                       0, 1, reverse);
2357       bitsdone += thissize;
2358
2359       /* Shift this part into place for the result.  */
2360       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2361         {
2362           if (bitsize != bitsdone)
2363             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2364                                  bitsize - bitsdone, 0, 1);
2365         }
2366       else
2367         {
2368           if (bitsdone != thissize)
2369             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2370                                  bitsdone - thissize, 0, 1);
2371         }
2372
2373       if (first)
2374         result = part;
2375       else
2376         /* Combine the parts with bitwise or.  This works
2377            because we extracted each part as an unsigned bit field.  */
2378         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2379                                OPTAB_LIB_WIDEN);
2380
2381       first = 0;
2382     }
2383
2384   /* Unsigned bit field: we are done.  */
2385   if (unsignedp)
2386     return result;
2387   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2388   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2389                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2390   return expand_shift (RSHIFT_EXPR, word_mode, result,
2391                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2392 }
2393 \f
2394 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2395    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2396    MODE, fill the upper bits with zeros.  Fail if the layout of either
2397    mode is unknown (as for CC modes) or if the extraction would involve
2398    unprofitable mode punning.  Return the value on success, otherwise
2399    return null.
2400
2401    This is different from gen_lowpart* in these respects:
2402
2403      - the returned value must always be considered an rvalue
2404
2405      - when MODE is wider than SRC_MODE, the extraction involves
2406        a zero extension
2407
2408      - when MODE is smaller than SRC_MODE, the extraction involves
2409        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2410
2411    In other words, this routine performs a computation, whereas the
2412    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2413    operations.  */
2414
2415 rtx
2416 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2417 {
2418   scalar_int_mode int_mode, src_int_mode;
2419
2420   if (mode == src_mode)
2421     return src;
2422
2423   if (CONSTANT_P (src))
2424     {
2425       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2426          fails, it will happily create (subreg (symbol_ref)) or similar
2427          invalid SUBREGs.  */
2428       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2429       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2430       if (ret)
2431         return ret;
2432
2433       if (GET_MODE (src) == VOIDmode
2434           || !validate_subreg (mode, src_mode, src, byte))
2435         return NULL_RTX;
2436
2437       src = force_reg (GET_MODE (src), src);
2438       return gen_rtx_SUBREG (mode, src, byte);
2439     }
2440
2441   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2442     return NULL_RTX;
2443
2444   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2445       && targetm.modes_tieable_p (mode, src_mode))
2446     {
2447       rtx x = gen_lowpart_common (mode, src);
2448       if (x)
2449         return x;
2450     }
2451
2452   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2453       || !int_mode_for_mode (mode).exists (&int_mode))
2454     return NULL_RTX;
2455
2456   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2457     return NULL_RTX;
2458   if (!targetm.modes_tieable_p (int_mode, mode))
2459     return NULL_RTX;
2460
2461   src = gen_lowpart (src_int_mode, src);
2462   if (!validate_subreg (int_mode, src_int_mode, src,
2463                         subreg_lowpart_offset (int_mode, src_int_mode)))
2464     return NULL_RTX;
2465
2466   src = convert_modes (int_mode, src_int_mode, src, true);
2467   src = gen_lowpart (mode, src);
2468   return src;
2469 }
2470 \f
2471 /* Add INC into TARGET.  */
2472
2473 void
2474 expand_inc (rtx target, rtx inc)
2475 {
2476   rtx value = expand_binop (GET_MODE (target), add_optab,
2477                             target, inc,
2478                             target, 0, OPTAB_LIB_WIDEN);
2479   if (value != target)
2480     emit_move_insn (target, value);
2481 }
2482
2483 /* Subtract DEC from TARGET.  */
2484
2485 void
2486 expand_dec (rtx target, rtx dec)
2487 {
2488   rtx value = expand_binop (GET_MODE (target), sub_optab,
2489                             target, dec,
2490                             target, 0, OPTAB_LIB_WIDEN);
2491   if (value != target)
2492     emit_move_insn (target, value);
2493 }
2494 \f
2495 /* Output a shift instruction for expression code CODE,
2496    with SHIFTED being the rtx for the value to shift,
2497    and AMOUNT the rtx for the amount to shift by.
2498    Store the result in the rtx TARGET, if that is convenient.
2499    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2500    Return the rtx for where the value is.
2501    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2502    in which case 0 is returned.  */
2503
2504 static rtx
2505 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2506                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2507 {
2508   rtx op1, temp = 0;
2509   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2510   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2511   optab lshift_optab = ashl_optab;
2512   optab rshift_arith_optab = ashr_optab;
2513   optab rshift_uns_optab = lshr_optab;
2514   optab lrotate_optab = rotl_optab;
2515   optab rrotate_optab = rotr_optab;
2516   machine_mode op1_mode;
2517   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2518   int attempt;
2519   bool speed = optimize_insn_for_speed_p ();
2520
2521   op1 = amount;
2522   op1_mode = GET_MODE (op1);
2523
2524   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2525      shift amount is a vector, use the vector/vector shift patterns.  */
2526   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2527     {
2528       lshift_optab = vashl_optab;
2529       rshift_arith_optab = vashr_optab;
2530       rshift_uns_optab = vlshr_optab;
2531       lrotate_optab = vrotl_optab;
2532       rrotate_optab = vrotr_optab;
2533     }
2534
2535   /* Previously detected shift-counts computed by NEGATE_EXPR
2536      and shifted in the other direction; but that does not work
2537      on all machines.  */
2538
2539   if (SHIFT_COUNT_TRUNCATED)
2540     {
2541       if (CONST_INT_P (op1)
2542           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2543               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2544         op1 = gen_int_shift_amount (mode,
2545                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2546                                     % GET_MODE_BITSIZE (scalar_mode));
2547       else if (GET_CODE (op1) == SUBREG
2548                && subreg_lowpart_p (op1)
2549                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2550                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2551         op1 = SUBREG_REG (op1);
2552     }
2553
2554   /* Canonicalize rotates by constant amount.  We may canonicalize
2555      to reduce the immediate or if the ISA can rotate by constants
2556      in only on direction.  */
2557   if (rotate && reverse_rotate_by_imm_p (scalar_mode, left, op1))
2558     {
2559       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2560                                          - INTVAL (op1)));
2561       left = !left;
2562       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2563     }
2564
2565   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2566      Note that this is not the case for bigger values.  For instance a rotation
2567      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2568      0x04030201 (bswapsi).  */
2569   if (rotate
2570       && CONST_INT_P (op1)
2571       && INTVAL (op1) == BITS_PER_UNIT
2572       && GET_MODE_SIZE (scalar_mode) == 2
2573       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2574     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2575
2576   if (op1 == const0_rtx)
2577     return shifted;
2578
2579   /* Check whether its cheaper to implement a left shift by a constant
2580      bit count by a sequence of additions.  */
2581   if (code == LSHIFT_EXPR
2582       && CONST_INT_P (op1)
2583       && INTVAL (op1) > 0
2584       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2585       && INTVAL (op1) < MAX_BITS_PER_WORD
2586       && (shift_cost (speed, mode, INTVAL (op1))
2587           > INTVAL (op1) * add_cost (speed, mode))
2588       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2589     {
2590       int i;
2591       for (i = 0; i < INTVAL (op1); i++)
2592         {
2593           temp = force_reg (mode, shifted);
2594           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2595                                   unsignedp, OPTAB_LIB_WIDEN);
2596         }
2597       return shifted;
2598     }
2599
2600   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2601     {
2602       enum optab_methods methods;
2603
2604       if (attempt == 0)
2605         methods = OPTAB_DIRECT;
2606       else if (attempt == 1)
2607         methods = OPTAB_WIDEN;
2608       else
2609         methods = OPTAB_LIB_WIDEN;
2610
2611       if (rotate)
2612         {
2613           /* Widening does not work for rotation.  */
2614           if (methods == OPTAB_WIDEN)
2615             continue;
2616           else if (methods == OPTAB_LIB_WIDEN)
2617             {
2618               /* If we have been unable to open-code this by a rotation,
2619                  do it as the IOR of two shifts.  I.e., to rotate A
2620                  by N bits, compute
2621                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2622                  where C is the bitsize of A.
2623
2624                  It is theoretically possible that the target machine might
2625                  not be able to perform either shift and hence we would
2626                  be making two libcalls rather than just the one for the
2627                  shift (similarly if IOR could not be done).  We will allow
2628                  this extremely unlikely lossage to avoid complicating the
2629                  code below.  */
2630
2631               rtx subtarget = target == shifted ? 0 : target;
2632               rtx new_amount, other_amount;
2633               rtx temp1;
2634
2635               new_amount = op1;
2636               if (op1 == const0_rtx)
2637                 return shifted;
2638               else if (CONST_INT_P (op1))
2639                 other_amount = gen_int_shift_amount
2640                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2641               else
2642                 {
2643                   other_amount
2644                     = simplify_gen_unary (NEG, GET_MODE (op1),
2645                                           op1, GET_MODE (op1));
2646                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2647                   other_amount
2648                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2649                                            gen_int_mode (mask, GET_MODE (op1)));
2650                 }
2651
2652               shifted = force_reg (mode, shifted);
2653
2654               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2655                                      mode, shifted, new_amount, 0, 1);
2656               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2657                                       mode, shifted, other_amount,
2658                                       subtarget, 1);
2659               return expand_binop (mode, ior_optab, temp, temp1, target,
2660                                    unsignedp, methods);
2661             }
2662
2663           temp = expand_binop (mode,
2664                                left ? lrotate_optab : rrotate_optab,
2665                                shifted, op1, target, unsignedp, methods);
2666         }
2667       else if (unsignedp)
2668         temp = expand_binop (mode,
2669                              left ? lshift_optab : rshift_uns_optab,
2670                              shifted, op1, target, unsignedp, methods);
2671
2672       /* Do arithmetic shifts.
2673          Also, if we are going to widen the operand, we can just as well
2674          use an arithmetic right-shift instead of a logical one.  */
2675       if (temp == 0 && ! rotate
2676           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2677         {
2678           enum optab_methods methods1 = methods;
2679
2680           /* If trying to widen a log shift to an arithmetic shift,
2681              don't accept an arithmetic shift of the same size.  */
2682           if (unsignedp)
2683             methods1 = OPTAB_MUST_WIDEN;
2684
2685           /* Arithmetic shift */
2686
2687           temp = expand_binop (mode,
2688                                left ? lshift_optab : rshift_arith_optab,
2689                                shifted, op1, target, unsignedp, methods1);
2690         }
2691
2692       /* We used to try extzv here for logical right shifts, but that was
2693          only useful for one machine, the VAX, and caused poor code
2694          generation there for lshrdi3, so the code was deleted and a
2695          define_expand for lshrsi3 was added to vax.md.  */
2696     }
2697
2698   gcc_assert (temp != NULL_RTX || may_fail);
2699   return temp;
2700 }
2701
2702 /* Output a shift instruction for expression code CODE,
2703    with SHIFTED being the rtx for the value to shift,
2704    and AMOUNT the amount to shift by.
2705    Store the result in the rtx TARGET, if that is convenient.
2706    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2707    Return the rtx for where the value is.  */
2708
2709 rtx
2710 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2711               poly_int64 amount, rtx target, int unsignedp)
2712 {
2713   return expand_shift_1 (code, mode, shifted,
2714                          gen_int_shift_amount (mode, amount),
2715                          target, unsignedp);
2716 }
2717
2718 /* Likewise, but return 0 if that cannot be done.  */
2719
2720 rtx
2721 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2722                     int amount, rtx target, int unsignedp)
2723 {
2724   return expand_shift_1 (code, mode,
2725                          shifted, GEN_INT (amount), target, unsignedp, true);
2726 }
2727
2728 /* Output a shift instruction for expression code CODE,
2729    with SHIFTED being the rtx for the value to shift,
2730    and AMOUNT the tree for the amount to shift by.
2731    Store the result in the rtx TARGET, if that is convenient.
2732    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2733    Return the rtx for where the value is.  */
2734
2735 rtx
2736 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2737                        tree amount, rtx target, int unsignedp)
2738 {
2739   return expand_shift_1 (code, mode,
2740                          shifted, expand_normal (amount), target, unsignedp);
2741 }
2742
2743 \f
2744 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2745                         const struct mult_cost *, machine_mode mode);
2746 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2747                               const struct algorithm *, enum mult_variant);
2748 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2749 static rtx extract_high_half (scalar_int_mode, rtx);
2750 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2751 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2752                                        int, int);
2753 /* Compute and return the best algorithm for multiplying by T.
2754    The algorithm must cost less than cost_limit
2755    If retval.cost >= COST_LIMIT, no algorithm was found and all
2756    other field of the returned struct are undefined.
2757    MODE is the machine mode of the multiplication.  */
2758
2759 static void
2760 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2761             const struct mult_cost *cost_limit, machine_mode mode)
2762 {
2763   int m;
2764   struct algorithm *alg_in, *best_alg;
2765   struct mult_cost best_cost;
2766   struct mult_cost new_limit;
2767   int op_cost, op_latency;
2768   unsigned HOST_WIDE_INT orig_t = t;
2769   unsigned HOST_WIDE_INT q;
2770   int maxm, hash_index;
2771   bool cache_hit = false;
2772   enum alg_code cache_alg = alg_zero;
2773   bool speed = optimize_insn_for_speed_p ();
2774   scalar_int_mode imode;
2775   struct alg_hash_entry *entry_ptr;
2776
2777   /* Indicate that no algorithm is yet found.  If no algorithm
2778      is found, this value will be returned and indicate failure.  */
2779   alg_out->cost.cost = cost_limit->cost + 1;
2780   alg_out->cost.latency = cost_limit->latency + 1;
2781
2782   if (cost_limit->cost < 0
2783       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2784     return;
2785
2786   /* Be prepared for vector modes.  */
2787   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2788
2789   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2790
2791   /* Restrict the bits of "t" to the multiplication's mode.  */
2792   t &= GET_MODE_MASK (imode);
2793
2794   /* t == 1 can be done in zero cost.  */
2795   if (t == 1)
2796     {
2797       alg_out->ops = 1;
2798       alg_out->cost.cost = 0;
2799       alg_out->cost.latency = 0;
2800       alg_out->op[0] = alg_m;
2801       return;
2802     }
2803
2804   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2805      fail now.  */
2806   if (t == 0)
2807     {
2808       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2809         return;
2810       else
2811         {
2812           alg_out->ops = 1;
2813           alg_out->cost.cost = zero_cost (speed);
2814           alg_out->cost.latency = zero_cost (speed);
2815           alg_out->op[0] = alg_zero;
2816           return;
2817         }
2818     }
2819
2820   /* We'll be needing a couple extra algorithm structures now.  */
2821
2822   alg_in = XALLOCA (struct algorithm);
2823   best_alg = XALLOCA (struct algorithm);
2824   best_cost = *cost_limit;
2825
2826   /* Compute the hash index.  */
2827   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2828
2829   /* See if we already know what to do for T.  */
2830   entry_ptr = alg_hash_entry_ptr (hash_index);
2831   if (entry_ptr->t == t
2832       && entry_ptr->mode == mode
2833       && entry_ptr->speed == speed
2834       && entry_ptr->alg != alg_unknown)
2835     {
2836       cache_alg = entry_ptr->alg;
2837
2838       if (cache_alg == alg_impossible)
2839         {
2840           /* The cache tells us that it's impossible to synthesize
2841              multiplication by T within entry_ptr->cost.  */
2842           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2843             /* COST_LIMIT is at least as restrictive as the one
2844                recorded in the hash table, in which case we have no
2845                hope of synthesizing a multiplication.  Just
2846                return.  */
2847             return;
2848
2849           /* If we get here, COST_LIMIT is less restrictive than the
2850              one recorded in the hash table, so we may be able to
2851              synthesize a multiplication.  Proceed as if we didn't
2852              have the cache entry.  */
2853         }
2854       else
2855         {
2856           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2857             /* The cached algorithm shows that this multiplication
2858                requires more cost than COST_LIMIT.  Just return.  This
2859                way, we don't clobber this cache entry with
2860                alg_impossible but retain useful information.  */
2861             return;
2862
2863           cache_hit = true;
2864
2865           switch (cache_alg)
2866             {
2867             case alg_shift:
2868               goto do_alg_shift;
2869
2870             case alg_add_t_m2:
2871             case alg_sub_t_m2:
2872               goto do_alg_addsub_t_m2;
2873
2874             case alg_add_factor:
2875             case alg_sub_factor:
2876               goto do_alg_addsub_factor;
2877
2878             case alg_add_t2_m:
2879               goto do_alg_add_t2_m;
2880
2881             case alg_sub_t2_m:
2882               goto do_alg_sub_t2_m;
2883
2884             default:
2885               gcc_unreachable ();
2886             }
2887         }
2888     }
2889
2890   /* If we have a group of zero bits at the low-order part of T, try
2891      multiplying by the remaining bits and then doing a shift.  */
2892
2893   if ((t & 1) == 0)
2894     {
2895     do_alg_shift:
2896       m = ctz_or_zero (t); /* m = number of low zero bits */
2897       if (m < maxm)
2898         {
2899           q = t >> m;
2900           /* The function expand_shift will choose between a shift and
2901              a sequence of additions, so the observed cost is given as
2902              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2903           op_cost = m * add_cost (speed, mode);
2904           if (shift_cost (speed, mode, m) < op_cost)
2905             op_cost = shift_cost (speed, mode, m);
2906           new_limit.cost = best_cost.cost - op_cost;
2907           new_limit.latency = best_cost.latency - op_cost;
2908           synth_mult (alg_in, q, &new_limit, mode);
2909
2910           alg_in->cost.cost += op_cost;
2911           alg_in->cost.latency += op_cost;
2912           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2913             {
2914               best_cost = alg_in->cost;
2915               std::swap (alg_in, best_alg);
2916               best_alg->log[best_alg->ops] = m;
2917               best_alg->op[best_alg->ops] = alg_shift;
2918             }
2919
2920           /* See if treating ORIG_T as a signed number yields a better
2921              sequence.  Try this sequence only for a negative ORIG_T
2922              as it would be useless for a non-negative ORIG_T.  */
2923           if ((HOST_WIDE_INT) orig_t < 0)
2924             {
2925               /* Shift ORIG_T as follows because a right shift of a
2926                  negative-valued signed type is implementation
2927                  defined.  */
2928               q = ~(~orig_t >> m);
2929               /* The function expand_shift will choose between a shift
2930                  and a sequence of additions, so the observed cost is
2931                  given as MIN (m * add_cost(speed, mode),
2932                  shift_cost(speed, mode, m)).  */
2933               op_cost = m * add_cost (speed, mode);
2934               if (shift_cost (speed, mode, m) < op_cost)
2935                 op_cost = shift_cost (speed, mode, m);
2936               new_limit.cost = best_cost.cost - op_cost;
2937               new_limit.latency = best_cost.latency - op_cost;
2938               synth_mult (alg_in, q, &new_limit, mode);
2939
2940               alg_in->cost.cost += op_cost;
2941               alg_in->cost.latency += op_cost;
2942               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2943                 {
2944                   best_cost = alg_in->cost;
2945                   std::swap (alg_in, best_alg);
2946                   best_alg->log[best_alg->ops] = m;
2947                   best_alg->op[best_alg->ops] = alg_shift;
2948                 }
2949             }
2950         }
2951       if (cache_hit)
2952         goto done;
2953     }
2954
2955   /* If we have an odd number, add or subtract one.  */
2956   if ((t & 1) != 0)
2957     {
2958       unsigned HOST_WIDE_INT w;
2959
2960     do_alg_addsub_t_m2:
2961       for (w = 1; (w & t) != 0; w <<= 1)
2962         ;
2963       /* If T was -1, then W will be zero after the loop.  This is another
2964          case where T ends with ...111.  Handling this with (T + 1) and
2965          subtract 1 produces slightly better code and results in algorithm
2966          selection much faster than treating it like the ...0111 case
2967          below.  */
2968       if (w == 0
2969           || (w > 2
2970               /* Reject the case where t is 3.
2971                  Thus we prefer addition in that case.  */
2972               && t != 3))
2973         {
2974           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2975
2976           op_cost = add_cost (speed, mode);
2977           new_limit.cost = best_cost.cost - op_cost;
2978           new_limit.latency = best_cost.latency - op_cost;
2979           synth_mult (alg_in, t + 1, &new_limit, mode);
2980
2981           alg_in->cost.cost += op_cost;
2982           alg_in->cost.latency += op_cost;
2983           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2984             {
2985               best_cost = alg_in->cost;
2986               std::swap (alg_in, best_alg);
2987               best_alg->log[best_alg->ops] = 0;
2988               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2989             }
2990         }
2991       else
2992         {
2993           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2994
2995           op_cost = add_cost (speed, mode);
2996           new_limit.cost = best_cost.cost - op_cost;
2997           new_limit.latency = best_cost.latency - op_cost;
2998           synth_mult (alg_in, t - 1, &new_limit, mode);
2999
3000           alg_in->cost.cost += op_cost;
3001           alg_in->cost.latency += op_cost;
3002           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3003             {
3004               best_cost = alg_in->cost;
3005               std::swap (alg_in, best_alg);
3006               best_alg->log[best_alg->ops] = 0;
3007               best_alg->op[best_alg->ops] = alg_add_t_m2;
3008             }
3009         }
3010
3011       /* We may be able to calculate a * -7, a * -15, a * -31, etc
3012          quickly with a - a * n for some appropriate constant n.  */
3013       m = exact_log2 (-orig_t + 1);
3014       if (m >= 0 && m < maxm)
3015         {
3016           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3017           /* If the target has a cheap shift-and-subtract insn use
3018              that in preference to a shift insn followed by a sub insn.
3019              Assume that the shift-and-sub is "atomic" with a latency
3020              equal to it's cost, otherwise assume that on superscalar
3021              hardware the shift may be executed concurrently with the
3022              earlier steps in the algorithm.  */
3023           if (shiftsub1_cost (speed, mode, m) <= op_cost)
3024             {
3025               op_cost = shiftsub1_cost (speed, mode, m);
3026               op_latency = op_cost;
3027             }
3028           else
3029             op_latency = add_cost (speed, mode);
3030
3031           new_limit.cost = best_cost.cost - op_cost;
3032           new_limit.latency = best_cost.latency - op_latency;
3033           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3034                       &new_limit, mode);
3035
3036           alg_in->cost.cost += op_cost;
3037           alg_in->cost.latency += op_latency;
3038           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3039             {
3040               best_cost = alg_in->cost;
3041               std::swap (alg_in, best_alg);
3042               best_alg->log[best_alg->ops] = m;
3043               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3044             }
3045         }
3046
3047       if (cache_hit)
3048         goto done;
3049     }
3050
3051   /* Look for factors of t of the form
3052      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3053      If we find such a factor, we can multiply by t using an algorithm that
3054      multiplies by q, shift the result by m and add/subtract it to itself.
3055
3056      We search for large factors first and loop down, even if large factors
3057      are less probable than small; if we find a large factor we will find a
3058      good sequence quickly, and therefore be able to prune (by decreasing
3059      COST_LIMIT) the search.  */
3060
3061  do_alg_addsub_factor:
3062   for (m = floor_log2 (t - 1); m >= 2; m--)
3063     {
3064       unsigned HOST_WIDE_INT d;
3065
3066       d = (HOST_WIDE_INT_1U << m) + 1;
3067       if (t % d == 0 && t > d && m < maxm
3068           && (!cache_hit || cache_alg == alg_add_factor))
3069         {
3070           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3071           if (shiftadd_cost (speed, mode, m) <= op_cost)
3072             op_cost = shiftadd_cost (speed, mode, m);
3073
3074           op_latency = op_cost;
3075
3076
3077           new_limit.cost = best_cost.cost - op_cost;
3078           new_limit.latency = best_cost.latency - op_latency;
3079           synth_mult (alg_in, t / d, &new_limit, mode);
3080
3081           alg_in->cost.cost += op_cost;
3082           alg_in->cost.latency += op_latency;
3083           if (alg_in->cost.latency < op_cost)
3084             alg_in->cost.latency = op_cost;
3085           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3086             {
3087               best_cost = alg_in->cost;
3088               std::swap (alg_in, best_alg);
3089               best_alg->log[best_alg->ops] = m;
3090               best_alg->op[best_alg->ops] = alg_add_factor;
3091             }
3092           /* Other factors will have been taken care of in the recursion.  */
3093           break;
3094         }
3095
3096       d = (HOST_WIDE_INT_1U << m) - 1;
3097       if (t % d == 0 && t > d && m < maxm
3098           && (!cache_hit || cache_alg == alg_sub_factor))
3099         {
3100           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3101           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3102             op_cost = shiftsub0_cost (speed, mode, m);
3103
3104           op_latency = op_cost;
3105
3106           new_limit.cost = best_cost.cost - op_cost;
3107           new_limit.latency = best_cost.latency - op_latency;
3108           synth_mult (alg_in, t / d, &new_limit, mode);
3109
3110           alg_in->cost.cost += op_cost;
3111           alg_in->cost.latency += op_latency;
3112           if (alg_in->cost.latency < op_cost)
3113             alg_in->cost.latency = op_cost;
3114           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3115             {
3116               best_cost = alg_in->cost;
3117               std::swap (alg_in, best_alg);
3118               best_alg->log[best_alg->ops] = m;
3119               best_alg->op[best_alg->ops] = alg_sub_factor;
3120             }
3121           break;
3122         }
3123     }
3124   if (cache_hit)
3125     goto done;
3126
3127   /* Try shift-and-add (load effective address) instructions,
3128      i.e. do a*3, a*5, a*9.  */
3129   if ((t & 1) != 0)
3130     {
3131     do_alg_add_t2_m:
3132       q = t - 1;
3133       m = ctz_hwi (q);
3134       if (q && m < maxm)
3135         {
3136           op_cost = shiftadd_cost (speed, mode, m);
3137           new_limit.cost = best_cost.cost - op_cost;
3138           new_limit.latency = best_cost.latency - op_cost;
3139           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3140
3141           alg_in->cost.cost += op_cost;
3142           alg_in->cost.latency += op_cost;
3143           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3144             {
3145               best_cost = alg_in->cost;
3146               std::swap (alg_in, best_alg);
3147               best_alg->log[best_alg->ops] = m;
3148               best_alg->op[best_alg->ops] = alg_add_t2_m;
3149             }
3150         }
3151       if (cache_hit)
3152         goto done;
3153
3154     do_alg_sub_t2_m:
3155       q = t + 1;
3156       m = ctz_hwi (q);
3157       if (q && m < maxm)
3158         {
3159           op_cost = shiftsub0_cost (speed, mode, m);
3160           new_limit.cost = best_cost.cost - op_cost;
3161           new_limit.latency = best_cost.latency - op_cost;
3162           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3163
3164           alg_in->cost.cost += op_cost;
3165           alg_in->cost.latency += op_cost;
3166           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3167             {
3168               best_cost = alg_in->cost;
3169               std::swap (alg_in, best_alg);
3170               best_alg->log[best_alg->ops] = m;
3171               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3172             }
3173         }
3174       if (cache_hit)
3175         goto done;
3176     }
3177
3178  done:
3179   /* If best_cost has not decreased, we have not found any algorithm.  */
3180   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3181     {
3182       /* We failed to find an algorithm.  Record alg_impossible for
3183          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3184          we are asked to find an algorithm for T within the same or
3185          lower COST_LIMIT, we can immediately return to the
3186          caller.  */
3187       entry_ptr->t = t;
3188       entry_ptr->mode = mode;
3189       entry_ptr->speed = speed;
3190       entry_ptr->alg = alg_impossible;
3191       entry_ptr->cost = *cost_limit;
3192       return;
3193     }
3194
3195   /* Cache the result.  */
3196   if (!cache_hit)
3197     {
3198       entry_ptr->t = t;
3199       entry_ptr->mode = mode;
3200       entry_ptr->speed = speed;
3201       entry_ptr->alg = best_alg->op[best_alg->ops];
3202       entry_ptr->cost.cost = best_cost.cost;
3203       entry_ptr->cost.latency = best_cost.latency;
3204     }
3205
3206   /* If we are getting a too long sequence for `struct algorithm'
3207      to record, make this search fail.  */
3208   if (best_alg->ops == MAX_BITS_PER_WORD)
3209     return;
3210
3211   /* Copy the algorithm from temporary space to the space at alg_out.
3212      We avoid using structure assignment because the majority of
3213      best_alg is normally undefined, and this is a critical function.  */
3214   alg_out->ops = best_alg->ops + 1;
3215   alg_out->cost = best_cost;
3216   memcpy (alg_out->op, best_alg->op,
3217           alg_out->ops * sizeof *alg_out->op);
3218   memcpy (alg_out->log, best_alg->log,
3219           alg_out->ops * sizeof *alg_out->log);
3220 }
3221 \f
3222 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3223    Try three variations:
3224
3225        - a shift/add sequence based on VAL itself
3226        - a shift/add sequence based on -VAL, followed by a negation
3227        - a shift/add sequence based on VAL - 1, followed by an addition.
3228
3229    Return true if the cheapest of these cost less than MULT_COST,
3230    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3231
3232 bool
3233 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3234                      struct algorithm *alg, enum mult_variant *variant,
3235                      int mult_cost)
3236 {
3237   struct algorithm alg2;
3238   struct mult_cost limit;
3239   int op_cost;
3240   bool speed = optimize_insn_for_speed_p ();
3241
3242   /* Fail quickly for impossible bounds.  */
3243   if (mult_cost < 0)
3244     return false;
3245
3246   /* Ensure that mult_cost provides a reasonable upper bound.
3247      Any constant multiplication can be performed with less
3248      than 2 * bits additions.  */
3249   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3250   if (mult_cost > op_cost)
3251     mult_cost = op_cost;
3252
3253   *variant = basic_variant;
3254   limit.cost = mult_cost;
3255   limit.latency = mult_cost;
3256   synth_mult (alg, val, &limit, mode);
3257
3258   /* This works only if the inverted value actually fits in an
3259      `unsigned int' */
3260   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3261     {
3262       op_cost = neg_cost (speed, mode);
3263       if (MULT_COST_LESS (&alg->cost, mult_cost))
3264         {
3265           limit.cost = alg->cost.cost - op_cost;
3266           limit.latency = alg->cost.latency - op_cost;
3267         }
3268       else
3269         {
3270           limit.cost = mult_cost - op_cost;
3271           limit.latency = mult_cost - op_cost;
3272         }
3273
3274       synth_mult (&alg2, -val, &limit, mode);
3275       alg2.cost.cost += op_cost;
3276       alg2.cost.latency += op_cost;
3277       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3278         *alg = alg2, *variant = negate_variant;
3279     }
3280
3281   /* This proves very useful for division-by-constant.  */
3282   op_cost = add_cost (speed, mode);
3283   if (MULT_COST_LESS (&alg->cost, mult_cost))
3284     {
3285       limit.cost = alg->cost.cost - op_cost;
3286       limit.latency = alg->cost.latency - op_cost;
3287     }
3288   else
3289     {
3290       limit.cost = mult_cost - op_cost;
3291       limit.latency = mult_cost - op_cost;
3292     }
3293
3294   if (val != HOST_WIDE_INT_MIN
3295       || GET_MODE_UNIT_PRECISION (mode) == HOST_BITS_PER_WIDE_INT)
3296     {
3297       synth_mult (&alg2, val - HOST_WIDE_INT_1U, &limit, mode);
3298       alg2.cost.cost += op_cost;
3299       alg2.cost.latency += op_cost;
3300       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3301         *alg = alg2, *variant = add_variant;
3302     }
3303
3304   return MULT_COST_LESS (&alg->cost, mult_cost);
3305 }
3306
3307 /* A subroutine of expand_mult, used for constant multiplications.
3308    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3309    convenient.  Use the shift/add sequence described by ALG and apply
3310    the final fixup specified by VARIANT.  */
3311
3312 static rtx
3313 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3314                    rtx target, const struct algorithm *alg,
3315                    enum mult_variant variant)
3316 {
3317   unsigned HOST_WIDE_INT val_so_far;
3318   rtx_insn *insn;
3319   rtx accum, tem;
3320   int opno;
3321   machine_mode nmode;
3322
3323   /* Avoid referencing memory over and over and invalid sharing
3324      on SUBREGs.  */
3325   op0 = force_reg (mode, op0);
3326
3327   /* ACCUM starts out either as OP0 or as a zero, depending on
3328      the first operation.  */
3329
3330   if (alg->op[0] == alg_zero)
3331     {
3332       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3333       val_so_far = 0;
3334     }
3335   else if (alg->op[0] == alg_m)
3336     {
3337       accum = copy_to_mode_reg (mode, op0);
3338       val_so_far = 1;
3339     }
3340   else
3341     gcc_unreachable ();
3342
3343   for (opno = 1; opno < alg->ops; opno++)
3344     {
3345       int log = alg->log[opno];
3346       rtx shift_subtarget = optimize ? 0 : accum;
3347       rtx add_target
3348         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3349            && !optimize)
3350           ? target : 0;
3351       rtx accum_target = optimize ? 0 : accum;
3352       rtx accum_inner;
3353
3354       switch (alg->op[opno])
3355         {
3356         case alg_shift:
3357           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3358           /* REG_EQUAL note will be attached to the following insn.  */
3359           emit_move_insn (accum, tem);
3360           val_so_far <<= log;
3361           break;
3362
3363         case alg_add_t_m2:
3364           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3365           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3366                                  add_target ? add_target : accum_target);
3367           val_so_far += HOST_WIDE_INT_1U << log;
3368           break;
3369
3370         case alg_sub_t_m2:
3371           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3372           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3373                                  add_target ? add_target : accum_target);
3374           val_so_far -= HOST_WIDE_INT_1U << log;
3375           break;
3376
3377         case alg_add_t2_m:
3378           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3379                                 log, shift_subtarget, 0);
3380           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3381                                  add_target ? add_target : accum_target);
3382           val_so_far = (val_so_far << log) + 1;
3383           break;
3384
3385         case alg_sub_t2_m:
3386           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3387                                 log, shift_subtarget, 0);
3388           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3389                                  add_target ? add_target : accum_target);
3390           val_so_far = (val_so_far << log) - 1;
3391           break;
3392
3393         case alg_add_factor:
3394           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3395           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3396                                  add_target ? add_target : accum_target);
3397           val_so_far += val_so_far << log;
3398           break;
3399
3400         case alg_sub_factor:
3401           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3402           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3403                                  (add_target
3404                                   ? add_target : (optimize ? 0 : tem)));
3405           val_so_far = (val_so_far << log) - val_so_far;
3406           break;
3407
3408         default:
3409           gcc_unreachable ();
3410         }
3411
3412       if (SCALAR_INT_MODE_P (mode))
3413         {
3414           /* Write a REG_EQUAL note on the last insn so that we can cse
3415              multiplication sequences.  Note that if ACCUM is a SUBREG,
3416              we've set the inner register and must properly indicate that.  */
3417           tem = op0, nmode = mode;
3418           accum_inner = accum;
3419           if (GET_CODE (accum) == SUBREG)
3420             {
3421               accum_inner = SUBREG_REG (accum);
3422               nmode = GET_MODE (accum_inner);
3423               tem = gen_lowpart (nmode, op0);
3424             }
3425
3426           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3427              In that case, only the low bits of accum would be guaranteed to
3428              be equal to the content of the REG_EQUAL note, the upper bits
3429              can be anything.  */
3430           if (!paradoxical_subreg_p (tem))
3431             {
3432               insn = get_last_insn ();
3433               wide_int wval_so_far
3434                 = wi::uhwi (val_so_far,
3435                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3436               rtx c = immed_wide_int_const (wval_so_far, nmode);
3437               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3438                                 accum_inner);
3439             }
3440         }
3441     }
3442
3443   if (variant == negate_variant)
3444     {
3445       val_so_far = -val_so_far;
3446       accum = expand_unop (mode, neg_optab, accum, target, 0);
3447     }
3448   else if (variant == add_variant)
3449     {
3450       val_so_far = val_so_far + 1;
3451       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3452     }
3453
3454   /* Compare only the bits of val and val_so_far that are significant
3455      in the result mode, to avoid sign-/zero-extension confusion.  */
3456   nmode = GET_MODE_INNER (mode);
3457   val &= GET_MODE_MASK (nmode);
3458   val_so_far &= GET_MODE_MASK (nmode);
3459   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3460
3461   return accum;
3462 }
3463
3464 /* Perform a multiplication and return an rtx for the result.
3465    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3466    TARGET is a suggestion for where to store the result (an rtx).
3467
3468    We check specially for a constant integer as OP1.
3469    If you want this check for OP0 as well, then before calling
3470    you should swap the two operands if OP0 would be constant.  */
3471
3472 rtx
3473 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3474              int unsignedp, bool no_libcall)
3475 {
3476   enum mult_variant variant;
3477   struct algorithm algorithm;
3478   rtx scalar_op1;
3479   int max_cost;
3480   bool speed = optimize_insn_for_speed_p ();
3481   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3482
3483   if (CONSTANT_P (op0))
3484     std::swap (op0, op1);
3485
3486   /* For vectors, there are several simplifications that can be made if
3487      all elements of the vector constant are identical.  */
3488   scalar_op1 = unwrap_const_vec_duplicate (op1);
3489
3490   if (INTEGRAL_MODE_P (mode))
3491     {
3492       rtx fake_reg;
3493       HOST_WIDE_INT coeff;
3494       bool is_neg;
3495       int mode_bitsize;
3496
3497       if (op1 == CONST0_RTX (mode))
3498         return op1;
3499       if (op1 == CONST1_RTX (mode))
3500         return op0;
3501       if (op1 == CONSTM1_RTX (mode))
3502         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3503                             op0, target, 0);
3504
3505       if (do_trapv)
3506         goto skip_synth;
3507
3508       /* If mode is integer vector mode, check if the backend supports
3509          vector lshift (by scalar or vector) at all.  If not, we can't use
3510          synthetized multiply.  */
3511       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3512           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3513           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3514         goto skip_synth;
3515
3516       /* These are the operations that are potentially turned into
3517          a sequence of shifts and additions.  */
3518       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3519
3520       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3521          less than or equal in size to `unsigned int' this doesn't matter.
3522          If the mode is larger than `unsigned int', then synth_mult works
3523          only if the constant value exactly fits in an `unsigned int' without
3524          any truncation.  This means that multiplying by negative values does
3525          not work; results are off by 2^32 on a 32 bit machine.  */
3526       if (CONST_INT_P (scalar_op1))
3527         {
3528           coeff = INTVAL (scalar_op1);
3529           is_neg = coeff < 0;
3530         }
3531 #if TARGET_SUPPORTS_WIDE_INT
3532       else if (CONST_WIDE_INT_P (scalar_op1))
3533 #else
3534       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3535 #endif
3536         {
3537           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3538           /* Perfect power of 2 (other than 1, which is handled above).  */
3539           if (shift > 0)
3540             return expand_shift (LSHIFT_EXPR, mode, op0,
3541                                  shift, target, unsignedp);
3542           else
3543             goto skip_synth;
3544         }
3545       else
3546         goto skip_synth;
3547
3548       /* We used to test optimize here, on the grounds that it's better to
3549          produce a smaller program when -O is not used.  But this causes
3550          such a terrible slowdown sometimes that it seems better to always
3551          use synth_mult.  */
3552
3553       /* Special case powers of two.  */
3554       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3555           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3556         return expand_shift (LSHIFT_EXPR, mode, op0,
3557                              floor_log2 (coeff), target, unsignedp);
3558
3559       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3560
3561       /* Attempt to handle multiplication of DImode values by negative
3562          coefficients, by performing the multiplication by a positive
3563          multiplier and then inverting the result.  */
3564       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3565         {
3566           /* Its safe to use -coeff even for INT_MIN, as the
3567              result is interpreted as an unsigned coefficient.
3568              Exclude cost of op0 from max_cost to match the cost
3569              calculation of the synth_mult.  */
3570           coeff = -(unsigned HOST_WIDE_INT) coeff;
3571           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3572                                     mode, speed)
3573                       - neg_cost (speed, mode));
3574           if (max_cost <= 0)
3575             goto skip_synth;
3576
3577           /* Special case powers of two.  */
3578           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3579             {
3580               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3581                                        floor_log2 (coeff), target, unsignedp);
3582               return expand_unop (mode, neg_optab, temp, target, 0);
3583             }
3584
3585           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3586                                    max_cost))
3587             {
3588               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3589                                             &algorithm, variant);
3590               return expand_unop (mode, neg_optab, temp, target, 0);
3591             }
3592           goto skip_synth;
3593         }
3594
3595       /* Exclude cost of op0 from max_cost to match the cost
3596          calculation of the synth_mult.  */
3597       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3598       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3599         return expand_mult_const (mode, op0, coeff, target,
3600                                   &algorithm, variant);
3601     }
3602  skip_synth:
3603
3604   /* Expand x*2.0 as x+x.  */
3605   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3606       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3607     {
3608       op0 = force_reg (GET_MODE (op0), op0);
3609       return expand_binop (mode, add_optab, op0, op0,
3610                            target, unsignedp,
3611                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3612     }
3613
3614   /* This used to use umul_optab if unsigned, but for non-widening multiply
3615      there is no difference between signed and unsigned.  */
3616   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3617                       op0, op1, target, unsignedp,
3618                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3619   gcc_assert (op0 || no_libcall);
3620   return op0;
3621 }
3622
3623 /* Return a cost estimate for multiplying a register by the given
3624    COEFFicient in the given MODE and SPEED.  */
3625
3626 int
3627 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3628 {
3629   int max_cost;
3630   struct algorithm algorithm;
3631   enum mult_variant variant;
3632
3633   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3634   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3635                            mode, speed);
3636   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3637     return algorithm.cost.cost;
3638   else
3639     return max_cost;
3640 }
3641
3642 /* Perform a widening multiplication and return an rtx for the result.
3643    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3644    TARGET is a suggestion for where to store the result (an rtx).
3645    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3646    or smul_widen_optab.
3647
3648    We check specially for a constant integer as OP1, comparing the
3649    cost of a widening multiply against the cost of a sequence of shifts
3650    and adds.  */
3651
3652 rtx
3653 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3654                       int unsignedp, optab this_optab)
3655 {
3656   bool speed = optimize_insn_for_speed_p ();
3657   rtx cop1;
3658
3659   if (CONST_INT_P (op1)
3660       && GET_MODE (op0) != VOIDmode
3661       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3662                                 this_optab == umul_widen_optab))
3663       && CONST_INT_P (cop1)
3664       && (INTVAL (cop1) >= 0
3665           || HWI_COMPUTABLE_MODE_P (mode)))
3666     {
3667       HOST_WIDE_INT coeff = INTVAL (cop1);
3668       int max_cost;
3669       enum mult_variant variant;
3670       struct algorithm algorithm;
3671
3672       if (coeff == 0)
3673         return CONST0_RTX (mode);
3674
3675       /* Special case powers of two.  */
3676       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3677         {
3678           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3679           return expand_shift (LSHIFT_EXPR, mode, op0,
3680                                floor_log2 (coeff), target, unsignedp);
3681         }
3682
3683       /* Exclude cost of op0 from max_cost to match the cost
3684          calculation of the synth_mult.  */
3685       max_cost = mul_widen_cost (speed, mode);
3686       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3687                                max_cost))
3688         {
3689           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3690           return expand_mult_const (mode, op0, coeff, target,
3691                                     &algorithm, variant);
3692         }
3693     }
3694   return expand_binop (mode, this_optab, op0, op1, target,
3695                        unsignedp, OPTAB_LIB_WIDEN);
3696 }
3697 \f
3698 /* Choose a minimal N + 1 bit approximation to 2**K / D that can be used to
3699    replace division by D, put the least significant N bits of the result in
3700    *MULTIPLIER_PTR, the value K - N in *POST_SHIFT_PTR, and return the most
3701    significant bit.
3702
3703    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3704    needed precision is PRECISION (should be <= N).
3705
3706    PRECISION should be as small as possible so this function can choose the
3707    multiplier more freely.  If PRECISION is <= N - 1, the most significant
3708    bit returned by the function will be zero.
3709
3710    Using this function, x / D is equal to (x*m) / 2**N >> (*POST_SHIFT_PTR),
3711    where m is the full N + 1 bit multiplier.  */
3712
3713 unsigned HOST_WIDE_INT
3714 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3715                    unsigned HOST_WIDE_INT *multiplier_ptr,
3716                    int *post_shift_ptr)
3717 {
3718   int lgup, post_shift;
3719   int pow1, pow2;
3720
3721   /* lgup = ceil(log2(d)) */
3722   /* Assuming d > 1, we have d >= 2^(lgup-1) + 1 */
3723   lgup = ceil_log2 (d);
3724
3725   gcc_assert (lgup <= n);
3726   gcc_assert (lgup <= precision);
3727
3728   pow1 = n + lgup;
3729   pow2 = n + lgup - precision;
3730
3731   /* mlow = 2^(n + lgup)/d */
3732   /* Trivially from above we have mlow < 2^(n+1) */
3733   wide_int val = wi::set_bit_in_zero (pow1, HOST_BITS_PER_DOUBLE_INT);
3734   wide_int mlow = wi::udiv_trunc (val, d);
3735
3736   /* mhigh = (2^(n + lgup) + 2^(n + lgup - precision))/d */
3737   /* From above we have mhigh < 2^(n+1) assuming lgup <= precision */
3738   /* From precision <= n, the difference between the numerators of mhigh and
3739      mlow is >= 2^lgup >= d.  Therefore the difference of the quotients in
3740      the Euclidean division by d is at least 1, so we have mlow < mhigh and
3741      the exact value of 2^(n + lgup)/d lies in the interval [mlow; mhigh).  */
3742   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3743   wide_int mhigh = wi::udiv_trunc (val, d);
3744
3745   /* Reduce to lowest terms.  */
3746   /* If precision <= n - 1, then the difference between the numerators of
3747      mhigh and mlow is >= 2^(lgup + 1) >= 2 * 2^lgup >= 2 * d.  Therefore
3748      the difference of the quotients in the Euclidean division by d is at
3749      least 2, which means that mhigh and mlow differ by at least one bit
3750      not in the last place.  The conclusion is that the first iteration of
3751      the loop below completes and shifts mhigh and mlow by 1 bit, which in
3752      particular means that mhigh < 2^n, that is to say, the most significant
3753      bit in the n + 1 bit value is zero.  */
3754   for (post_shift = lgup; post_shift > 0; post_shift--)
3755     {
3756       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3757                                                        HOST_BITS_PER_WIDE_INT);
3758       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3759                                                        HOST_BITS_PER_WIDE_INT);
3760       if (ml_lo >= mh_lo)
3761         break;
3762
3763       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3764       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3765     }
3766
3767   *post_shift_ptr = post_shift;
3768
3769   if (n < HOST_BITS_PER_WIDE_INT)
3770     {
3771       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3772       *multiplier_ptr = mhigh.to_uhwi () & mask;
3773       return mhigh.to_uhwi () > mask;
3774     }
3775   else
3776     {
3777       *multiplier_ptr = mhigh.to_uhwi ();
3778       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3779     }
3780 }
3781
3782 /* Compute the inverse of X mod 2**N, i.e., find Y such that X * Y is congruent
3783    to 1 modulo 2**N, assuming that X is odd.  Bézout's lemma guarantees that Y
3784    exists for any given positive N.  */
3785
3786 static unsigned HOST_WIDE_INT
3787 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3788 {
3789   gcc_assert ((x & 1) == 1);
3790
3791   /* The algorithm notes that the choice Y = Z satisfies X*Y == 1 mod 2^3,
3792      since X is odd.  Then each iteration doubles the number of bits of
3793      significance in Y.  */
3794
3795   const unsigned HOST_WIDE_INT mask
3796     = (n == HOST_BITS_PER_WIDE_INT
3797        ? HOST_WIDE_INT_M1U
3798        : (HOST_WIDE_INT_1U << n) - 1);
3799   unsigned HOST_WIDE_INT y = x;
3800   int nbit = 3;
3801
3802   while (nbit < n)
3803     {
3804       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3805       nbit *= 2;
3806     }
3807
3808   return y;
3809 }
3810
3811 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3812    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3813    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3814    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3815    become signed.
3816
3817    The result is put in TARGET if that is convenient.
3818
3819    MODE is the mode of operation.  */
3820
3821 rtx
3822 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3823                              rtx op1, rtx target, int unsignedp)
3824 {
3825   rtx tem;
3826   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3827
3828   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3829                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3830   tem = expand_and (mode, tem, op1, NULL_RTX);
3831   adj_operand
3832     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3833                      adj_operand);
3834
3835   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3836                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3837   tem = expand_and (mode, tem, op0, NULL_RTX);
3838   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3839                           target);
3840
3841   return target;
3842 }
3843
3844 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3845
3846 static rtx
3847 extract_high_half (scalar_int_mode mode, rtx op)
3848 {
3849   if (mode == word_mode)
3850     return gen_highpart (mode, op);
3851
3852   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3853
3854   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3855                      GET_MODE_BITSIZE (mode), 0, 1);
3856   return convert_modes (mode, wider_mode, op, 0);
3857 }
3858
3859 /* Like expmed_mult_highpart, but only consider using a multiplication
3860    optab.  OP1 is an rtx for the constant operand.  */
3861
3862 static rtx
3863 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3864                             rtx target, int unsignedp, int max_cost)
3865 {
3866   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3867   optab moptab;
3868   rtx tem;
3869   int size;
3870   bool speed = optimize_insn_for_speed_p ();
3871
3872   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3873
3874   size = GET_MODE_BITSIZE (mode);
3875
3876   /* Firstly, try using a multiplication insn that only generates the needed
3877      high part of the product, and in the sign flavor of unsignedp.  */
3878   if (mul_highpart_cost (speed, mode) < max_cost)
3879     {
3880       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3881       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3882                           unsignedp, OPTAB_DIRECT);
3883       if (tem)
3884         return tem;
3885     }
3886
3887   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3888      Need to adjust the result after the multiplication.  */
3889   if (size - 1 < BITS_PER_WORD
3890       && (mul_highpart_cost (speed, mode)
3891           + 2 * shift_cost (speed, mode, size-1)
3892           + 4 * add_cost (speed, mode) < max_cost))
3893     {
3894       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3895       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3896                           unsignedp, OPTAB_DIRECT);
3897       if (tem)
3898         /* We used the wrong signedness.  Adjust the result.  */
3899         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3900                                             tem, unsignedp);
3901     }
3902
3903   /* Try widening multiplication.  */
3904   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3905   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3906       && mul_widen_cost (speed, wider_mode) < max_cost)
3907     {
3908       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3909                           unsignedp, OPTAB_WIDEN);
3910       if (tem)
3911         return extract_high_half (mode, tem);
3912     }
3913
3914   /* Try widening the mode and perform a non-widening multiplication.  */
3915   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3916       && size - 1 < BITS_PER_WORD
3917       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3918           < max_cost))
3919     {
3920       rtx_insn *insns;
3921       rtx wop0, wop1;
3922
3923       /* We need to widen the operands, for example to ensure the
3924          constant multiplier is correctly sign or zero extended.
3925          Use a sequence to clean-up any instructions emitted by
3926          the conversions if things don't work out.  */
3927       start_sequence ();
3928       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3929       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3930       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3931                           unsignedp, OPTAB_WIDEN);
3932       insns = get_insns ();
3933       end_sequence ();
3934
3935       if (tem)
3936         {
3937           emit_insn (insns);
3938           return extract_high_half (mode, tem);
3939         }
3940     }
3941
3942   /* Try widening multiplication of opposite signedness, and adjust.  */
3943   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3944   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3945       && size - 1 < BITS_PER_WORD
3946       && (mul_widen_cost (speed, wider_mode)
3947           + 2 * shift_cost (speed, mode, size-1)
3948           + 4 * add_cost (speed, mode) < max_cost))
3949     {
3950       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3951                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3952       if (tem != 0)
3953         {
3954           tem = extract_high_half (mode, tem);
3955           /* We used the wrong signedness.  Adjust the result.  */
3956           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3957                                               target, unsignedp);
3958         }
3959     }
3960
3961   return 0;
3962 }
3963
3964 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3965    putting the high half of the result in TARGET if that is convenient,
3966    and return where the result is.  If the operation cannot be performed,
3967    0 is returned.
3968
3969    MODE is the mode of operation and result.
3970
3971    UNSIGNEDP nonzero means unsigned multiply.
3972
3973    MAX_COST is the total allowed cost for the expanded RTL.  */
3974
3975 static rtx
3976 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3977                       rtx target, int unsignedp, int max_cost)
3978 {
3979   unsigned HOST_WIDE_INT cnst1;
3980   int extra_cost;
3981   bool sign_adjust = false;
3982   enum mult_variant variant;
3983   struct algorithm alg;
3984   rtx tem;
3985   bool speed = optimize_insn_for_speed_p ();
3986
3987   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3988   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3989
3990   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3991
3992   /* We can't optimize modes wider than BITS_PER_WORD.
3993      ??? We might be able to perform double-word arithmetic if
3994      mode == word_mode, however all the cost calculations in
3995      synth_mult etc. assume single-word operations.  */
3996   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3997   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3998     return expmed_mult_highpart_optab (mode, op0, op1, target,
3999                                        unsignedp, max_cost);
4000
4001   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
4002
4003   /* Check whether we try to multiply by a negative constant.  */
4004   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
4005     {
4006       sign_adjust = true;
4007       extra_cost += add_cost (speed, mode);
4008     }
4009
4010   /* See whether shift/add multiplication is cheap enough.  */
4011   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
4012                            max_cost - extra_cost))
4013     {
4014       /* See whether the specialized multiplication optabs are
4015          cheaper than the shift/add version.  */
4016       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
4017                                         alg.cost.cost + extra_cost);
4018       if (tem)
4019         return tem;
4020
4021       tem = convert_to_mode (wider_mode, op0, unsignedp);
4022       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
4023       tem = extract_high_half (mode, tem);
4024
4025       /* Adjust result for signedness.  */
4026       if (sign_adjust)
4027         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
4028
4029       return tem;
4030     }
4031   return expmed_mult_highpart_optab (mode, op0, op1, target,
4032                                      unsignedp, max_cost);
4033 }
4034
4035
4036 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
4037
4038 static rtx
4039 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4040 {
4041   rtx result, temp, shift;
4042   rtx_code_label *label;
4043   int logd;
4044   int prec = GET_MODE_PRECISION (mode);
4045
4046   logd = floor_log2 (d);
4047   result = gen_reg_rtx (mode);
4048
4049   /* Avoid conditional branches when they're expensive.  */
4050   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4051       && optimize_insn_for_speed_p ())
4052     {
4053       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4054                                       mode, 0, -1);
4055       if (signmask)
4056         {
4057           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4058           signmask = force_reg (mode, signmask);
4059           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4060
4061           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4062              which instruction sequence to use.  If logical right shifts
4063              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4064              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4065
4066           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4067           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4068               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4069                   > COSTS_N_INSNS (2)))
4070             {
4071               temp = expand_binop (mode, xor_optab, op0, signmask,
4072                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4073               temp = expand_binop (mode, sub_optab, temp, signmask,
4074                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4075               temp = expand_binop (mode, and_optab, temp,
4076                                    gen_int_mode (masklow, mode),
4077                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4078               temp = expand_binop (mode, xor_optab, temp, signmask,
4079                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4080               temp = expand_binop (mode, sub_optab, temp, signmask,
4081                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4082             }
4083           else
4084             {
4085               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4086                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4087               signmask = force_reg (mode, signmask);
4088
4089               temp = expand_binop (mode, add_optab, op0, signmask,
4090                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4091               temp = expand_binop (mode, and_optab, temp,
4092                                    gen_int_mode (masklow, mode),
4093                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4094               temp = expand_binop (mode, sub_optab, temp, signmask,
4095                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4096             }
4097           return temp;
4098         }
4099     }
4100
4101   /* Mask contains the mode's signbit and the significant bits of the
4102      modulus.  By including the signbit in the operation, many targets
4103      can avoid an explicit compare operation in the following comparison
4104      against zero.  */
4105   wide_int mask = wi::mask (logd, false, prec);
4106   mask = wi::set_bit (mask, prec - 1);
4107
4108   temp = expand_binop (mode, and_optab, op0,
4109                        immed_wide_int_const (mask, mode),
4110                        result, 1, OPTAB_LIB_WIDEN);
4111   if (temp != result)
4112     emit_move_insn (result, temp);
4113
4114   label = gen_label_rtx ();
4115   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4116
4117   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4118                        0, OPTAB_LIB_WIDEN);
4119
4120   mask = wi::mask (logd, true, prec);
4121   temp = expand_binop (mode, ior_optab, temp,
4122                        immed_wide_int_const (mask, mode),
4123                        result, 1, OPTAB_LIB_WIDEN);
4124   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4125                        0, OPTAB_LIB_WIDEN);
4126   if (temp != result)
4127     emit_move_insn (result, temp);
4128   emit_label (label);
4129   return result;
4130 }
4131
4132 /* Expand signed division of OP0 by a power of two D in mode MODE.
4133    This routine is only called for positive values of D.  */
4134
4135 static rtx
4136 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4137 {
4138   rtx temp;
4139   rtx_code_label *label;
4140   int logd;
4141
4142   logd = floor_log2 (d);
4143
4144   if (d == 2
4145       && BRANCH_COST (optimize_insn_for_speed_p (),
4146                       false) >= 1)
4147     {
4148       temp = gen_reg_rtx (mode);
4149       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4150       if (temp != NULL_RTX)
4151         {
4152           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4153                                0, OPTAB_LIB_WIDEN);
4154           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4155         }
4156     }
4157
4158   if (HAVE_conditional_move
4159       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4160     {
4161       rtx temp2;
4162
4163       start_sequence ();
4164       temp2 = copy_to_mode_reg (mode, op0);
4165       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4166                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4167       temp = force_reg (mode, temp);
4168
4169       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4170       temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4171                                      temp, temp2, mode, 0);
4172       if (temp2)
4173         {
4174           rtx_insn *seq = get_insns ();
4175           end_sequence ();
4176           emit_insn (seq);
4177           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4178         }
4179       end_sequence ();
4180     }
4181
4182   if (BRANCH_COST (optimize_insn_for_speed_p (),
4183                    false) >= 2)
4184     {
4185       int ushift = GET_MODE_BITSIZE (mode) - logd;
4186
4187       temp = gen_reg_rtx (mode);
4188       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4189       if (temp != NULL_RTX)
4190         {
4191           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4192               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4193               > COSTS_N_INSNS (1))
4194             temp = expand_binop (mode, and_optab, temp,
4195                                  gen_int_mode (d - 1, mode),
4196                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4197           else
4198             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4199                                  ushift, NULL_RTX, 1);
4200           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4201                                0, OPTAB_LIB_WIDEN);
4202           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4203         }
4204     }
4205
4206   label = gen_label_rtx ();
4207   temp = copy_to_mode_reg (mode, op0);
4208   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4209   expand_inc (temp, gen_int_mode (d - 1, mode));
4210   emit_label (label);
4211   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4212 }
4213 \f
4214 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4215    if that is convenient, and returning where the result is.
4216    You may request either the quotient or the remainder as the result;
4217    specify REM_FLAG nonzero to get the remainder.
4218
4219    CODE is the expression code for which kind of division this is;
4220    it controls how rounding is done.  MODE is the machine mode to use.
4221    UNSIGNEDP nonzero means do unsigned division.  */
4222
4223 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4224    and then correct it by or'ing in missing high bits
4225    if result of ANDI is nonzero.
4226    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4227    This could optimize to a bfexts instruction.
4228    But C doesn't use these operations, so their optimizations are
4229    left for later.  */
4230 /* ??? For modulo, we don't actually need the highpart of the first product,
4231    the low part will do nicely.  And for small divisors, the second multiply
4232    can also be a low-part only multiply or even be completely left out.
4233    E.g. to calculate the remainder of a division by 3 with a 32 bit
4234    multiply, multiply with 0x55555556 and extract the upper two bits;
4235    the result is exact for inputs up to 0x1fffffff.
4236    The input range can be reduced by using cross-sum rules.
4237    For odd divisors >= 3, the following table gives right shift counts
4238    so that if a number is shifted by an integer multiple of the given
4239    amount, the remainder stays the same:
4240    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4241    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4242    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4243    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4244    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4245
4246    Cross-sum rules for even numbers can be derived by leaving as many bits
4247    to the right alone as the divisor has zeros to the right.
4248    E.g. if x is an unsigned 32 bit number:
4249    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4250    */
4251
4252 rtx
4253 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4254                rtx op0, rtx op1, rtx target, int unsignedp,
4255                enum optab_methods methods)
4256 {
4257   machine_mode compute_mode;
4258   rtx tquotient;
4259   rtx quotient = 0, remainder = 0;
4260   rtx_insn *last;
4261   rtx_insn *insn;
4262   optab optab1, optab2;
4263   int op1_is_constant, op1_is_pow2 = 0;
4264   int max_cost, extra_cost;
4265   static HOST_WIDE_INT last_div_const = 0;
4266   bool speed = optimize_insn_for_speed_p ();
4267
4268   op1_is_constant = CONST_INT_P (op1);
4269   if (op1_is_constant)
4270     {
4271       wide_int ext_op1 = rtx_mode_t (op1, mode);
4272       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4273                      || (! unsignedp
4274                          && wi::popcount (wi::neg (ext_op1)) == 1));
4275     }
4276
4277   /*
4278      This is the structure of expand_divmod:
4279
4280      First comes code to fix up the operands so we can perform the operations
4281      correctly and efficiently.
4282
4283      Second comes a switch statement with code specific for each rounding mode.
4284      For some special operands this code emits all RTL for the desired
4285      operation, for other cases, it generates only a quotient and stores it in
4286      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4287      to indicate that it has not done anything.
4288
4289      Last comes code that finishes the operation.  If QUOTIENT is set and
4290      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4291      QUOTIENT is not set, it is computed using trunc rounding.
4292
4293      We try to generate special code for division and remainder when OP1 is a
4294      constant.  If |OP1| = 2**n we can use shifts and some other fast
4295      operations.  For other values of OP1, we compute a carefully selected
4296      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4297      by m.
4298
4299      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4300      half of the product.  Different strategies for generating the product are
4301      implemented in expmed_mult_highpart.
4302
4303      If what we actually want is the remainder, we generate that by another
4304      by-constant multiplication and a subtraction.  */
4305
4306   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4307      code below will malfunction if we are, so check here and handle
4308      the special case if so.  */
4309   if (op1 == const1_rtx)
4310     return rem_flag ? const0_rtx : op0;
4311
4312     /* When dividing by -1, we could get an overflow.
4313      negv_optab can handle overflows.  */
4314   if (! unsignedp && op1 == constm1_rtx)
4315     {
4316       if (rem_flag)
4317         return const0_rtx;
4318       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4319                           ? negv_optab : neg_optab, op0, target, 0);
4320     }
4321
4322   if (target
4323       /* Don't use the function value register as a target
4324          since we have to read it as well as write it,
4325          and function-inlining gets confused by this.  */
4326       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4327           /* Don't clobber an operand while doing a multi-step calculation.  */
4328           || ((rem_flag || op1_is_constant)
4329               && (reg_mentioned_p (target, op0)
4330                   || (MEM_P (op0) && MEM_P (target))))
4331           || reg_mentioned_p (target, op1)
4332           || (MEM_P (op1) && MEM_P (target))))
4333     target = 0;
4334
4335   /* Get the mode in which to perform this computation.  Normally it will
4336      be MODE, but sometimes we can't do the desired operation in MODE.
4337      If so, pick a wider mode in which we can do the operation.  Convert
4338      to that mode at the start to avoid repeated conversions.
4339
4340      First see what operations we need.  These depend on the expression
4341      we are evaluating.  (We assume that divxx3 insns exist under the
4342      same conditions that modxx3 insns and that these insns don't normally
4343      fail.  If these assumptions are not correct, we may generate less
4344      efficient code in some cases.)
4345
4346      Then see if we find a mode in which we can open-code that operation
4347      (either a division, modulus, or shift).  Finally, check for the smallest
4348      mode for which we can do the operation with a library call.  */
4349
4350   /* We might want to refine this now that we have division-by-constant
4351      optimization.  Since expmed_mult_highpart tries so many variants, it is
4352      not straightforward to generalize this.  Maybe we should make an array
4353      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4354
4355   optab1 = (op1_is_pow2
4356             ? (unsignedp ? lshr_optab : ashr_optab)
4357             : (unsignedp ? udiv_optab : sdiv_optab));
4358   optab2 = (op1_is_pow2 ? optab1
4359             : (unsignedp ? udivmod_optab : sdivmod_optab));
4360
4361   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4362     {
4363       FOR_EACH_MODE_FROM (compute_mode, mode)
4364       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4365           || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4366         break;
4367
4368       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4369         FOR_EACH_MODE_FROM (compute_mode, mode)
4370           if (optab_libfunc (optab1, compute_mode)
4371               || optab_libfunc (optab2, compute_mode))
4372             break;
4373     }
4374   else
4375     compute_mode = mode;
4376
4377   /* If we still couldn't find a mode, use MODE, but expand_binop will
4378      probably die.  */
4379   if (compute_mode == VOIDmode)
4380     compute_mode = mode;
4381
4382   if (target && GET_MODE (target) == compute_mode)
4383     tquotient = target;
4384   else
4385     tquotient = gen_reg_rtx (compute_mode);
4386
4387 #if 0
4388   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4389      (mode), and thereby get better code when OP1 is a constant.  Do that
4390      later.  It will require going over all usages of SIZE below.  */
4391   size = GET_MODE_BITSIZE (mode);
4392 #endif
4393
4394   /* Only deduct something for a REM if the last divide done was
4395      for a different constant.   Then set the constant of the last
4396      divide.  */
4397   max_cost = (unsignedp
4398               ? udiv_cost (speed, compute_mode)
4399               : sdiv_cost (speed, compute_mode));
4400   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4401                      && INTVAL (op1) == last_div_const))
4402     max_cost -= (mul_cost (speed, compute_mode)
4403                  + add_cost (speed, compute_mode));
4404
4405   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4406
4407   /* Now convert to the best mode to use.  */
4408   if (compute_mode != mode)
4409     {
4410       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4411       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4412
4413       /* convert_modes may have placed op1 into a register, so we
4414          must recompute the following.  */
4415       op1_is_constant = CONST_INT_P (op1);
4416       if (op1_is_constant)
4417         {
4418           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4419           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4420                          || (! unsignedp
4421                              && wi::popcount (wi::neg (ext_op1)) == 1));
4422         }
4423       else
4424         op1_is_pow2 = 0;
4425     }
4426
4427   /* If one of the operands is a volatile MEM, copy it into a register.  */
4428
4429   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4430     op0 = force_reg (compute_mode, op0);
4431   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4432     op1 = force_reg (compute_mode, op1);
4433
4434   /* If we need the remainder or if OP1 is constant, we need to
4435      put OP0 in a register in case it has any queued subexpressions.  */
4436   if (rem_flag || op1_is_constant)
4437     op0 = force_reg (compute_mode, op0);
4438
4439   last = get_last_insn ();
4440
4441   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4442   if (unsignedp)
4443     {
4444       if (code == FLOOR_DIV_EXPR)
4445         code = TRUNC_DIV_EXPR;
4446       if (code == FLOOR_MOD_EXPR)
4447         code = TRUNC_MOD_EXPR;
4448       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4449         code = TRUNC_DIV_EXPR;
4450     }
4451
4452   if (op1 != const0_rtx)
4453     switch (code)
4454       {
4455       case TRUNC_MOD_EXPR:
4456       case TRUNC_DIV_EXPR:
4457         if (op1_is_constant)
4458           {
4459             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4460             int size = GET_MODE_BITSIZE (int_mode);
4461             if (unsignedp)
4462               {
4463                 unsigned HOST_WIDE_INT mh, ml;
4464                 int pre_shift, post_shift;
4465                 wide_int wd = rtx_mode_t (op1, int_mode);
4466                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4467
4468                 if (wi::popcount (wd) == 1)
4469                   {
4470                     pre_shift = floor_log2 (d);
4471                     if (rem_flag)
4472                       {
4473                         unsigned HOST_WIDE_INT mask
4474                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4475                         remainder
4476                           = expand_binop (int_mode, and_optab, op0,
4477                                           gen_int_mode (mask, int_mode),
4478                                           remainder, 1, methods);
4479                         if (remainder)
4480                           return gen_lowpart (mode, remainder);
4481                       }
4482                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4483                                              pre_shift, tquotient, 1);
4484                   }
4485                 else if (size <= HOST_BITS_PER_WIDE_INT)
4486                   {
4487                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4488                       {
4489                         /* Most significant bit of divisor is set; emit an scc
4490                            insn.  */
4491                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4492                                                           int_mode, 1, 1);
4493                       }
4494                     else
4495                       {
4496                         /* Find a suitable multiplier and right shift count
4497                            instead of directly dividing by D.  */
4498                         mh = choose_multiplier (d, size, size,
4499                                                 &ml, &post_shift);
4500
4501                         /* If the suggested multiplier is more than SIZE bits,
4502                            we can do better for even divisors, using an
4503                            initial right shift.  */
4504                         if (mh != 0 && (d & 1) == 0)
4505                           {
4506                             pre_shift = ctz_or_zero (d);
4507                             mh = choose_multiplier (d >> pre_shift, size,
4508                                                     size - pre_shift,
4509                                                     &ml, &post_shift);
4510                             gcc_assert (!mh);
4511                           }
4512                         else
4513                           pre_shift = 0;
4514
4515                         if (mh != 0)
4516                           {
4517                             rtx t1, t2, t3, t4;
4518
4519                             if (post_shift - 1 >= BITS_PER_WORD)
4520                               goto fail1;
4521
4522                             extra_cost
4523                               = (shift_cost (speed, int_mode, post_shift - 1)
4524                                  + shift_cost (speed, int_mode, 1)
4525                                  + 2 * add_cost (speed, int_mode));
4526                             t1 = expmed_mult_highpart
4527                               (int_mode, op0, gen_int_mode (ml, int_mode),
4528                                NULL_RTX, 1, max_cost - extra_cost);
4529                             if (t1 == 0)
4530                               goto fail1;
4531                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4532                                                                op0, t1),
4533                                                 NULL_RTX);
4534                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4535                                                t2, 1, NULL_RTX, 1);
4536                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4537                                                               t1, t3),
4538                                                 NULL_RTX);
4539                             quotient = expand_shift
4540                               (RSHIFT_EXPR, int_mode, t4,
4541                                post_shift - 1, tquotient, 1);
4542                           }
4543                         else
4544                           {
4545                             rtx t1, t2;
4546
4547                             if (pre_shift >= BITS_PER_WORD
4548                                 || post_shift >= BITS_PER_WORD)
4549                               goto fail1;
4550
4551                             t1 = expand_shift
4552                               (RSHIFT_EXPR, int_mode, op0,
4553                                pre_shift, NULL_RTX, 1);
4554                             extra_cost
4555                               = (shift_cost (speed, int_mode, pre_shift)
4556                                  + shift_cost (speed, int_mode, post_shift));
4557                             t2 = expmed_mult_highpart
4558                               (int_mode, t1,
4559                                gen_int_mode (ml, int_mode),
4560                                NULL_RTX, 1, max_cost - extra_cost);
4561                             if (t2 == 0)
4562                               goto fail1;
4563                             quotient = expand_shift
4564                               (RSHIFT_EXPR, int_mode, t2,
4565                                post_shift, tquotient, 1);
4566                           }
4567                       }
4568                   }
4569                 else            /* Too wide mode to use tricky code */
4570                   break;
4571
4572                 insn = get_last_insn ();
4573                 if (insn != last)
4574                   set_dst_reg_note (insn, REG_EQUAL,
4575                                     gen_rtx_UDIV (int_mode, op0, op1),
4576                                     quotient);
4577               }
4578             else                /* TRUNC_DIV, signed */
4579               {
4580                 unsigned HOST_WIDE_INT ml;
4581                 int post_shift;
4582                 rtx mlr;
4583                 HOST_WIDE_INT d = INTVAL (op1);
4584                 unsigned HOST_WIDE_INT abs_d;
4585
4586                 /* Not prepared to handle division/remainder by
4587                    0xffffffffffffffff8000000000000000 etc.  */
4588                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4589                   break;
4590
4591                 /* Since d might be INT_MIN, we have to cast to
4592                    unsigned HOST_WIDE_INT before negating to avoid
4593                    undefined signed overflow.  */
4594                 abs_d = (d >= 0
4595                          ? (unsigned HOST_WIDE_INT) d
4596                          : - (unsigned HOST_WIDE_INT) d);
4597
4598                 /* n rem d = n rem -d */
4599                 if (rem_flag && d < 0)
4600                   {
4601                     d = abs_d;
4602                     op1 = gen_int_mode (abs_d, int_mode);
4603                   }
4604
4605                 if (d == 1)
4606                   quotient = op0;
4607                 else if (d == -1)
4608                   quotient = expand_unop (int_mode, neg_optab, op0,
4609                                           tquotient, 0);
4610                 else if (size <= HOST_BITS_PER_WIDE_INT
4611                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4612                   {
4613                     /* This case is not handled correctly below.  */
4614                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4615                                                 int_mode, 1, 1);
4616                     if (quotient == 0)
4617                       goto fail1;
4618                   }
4619                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4620                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4621                          && (rem_flag
4622                              ? smod_pow2_cheap (speed, int_mode)
4623                              : sdiv_pow2_cheap (speed, int_mode))
4624                          /* We assume that cheap metric is true if the
4625                             optab has an expander for this mode.  */
4626                          && ((optab_handler ((rem_flag ? smod_optab
4627                                               : sdiv_optab),
4628                                              int_mode)
4629                               != CODE_FOR_nothing)
4630                              || (optab_handler (sdivmod_optab, int_mode)
4631                                  != CODE_FOR_nothing)))
4632                   ;
4633                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4634                   {
4635                     if (rem_flag)
4636                       {
4637                         remainder = expand_smod_pow2 (int_mode, op0, d);
4638                         if (remainder)
4639                           return gen_lowpart (mode, remainder);
4640                       }
4641
4642                     if (sdiv_pow2_cheap (speed, int_mode)
4643                         && ((optab_handler (sdiv_optab, int_mode)
4644                              != CODE_FOR_nothing)
4645                             || (optab_handler (sdivmod_optab, int_mode)
4646                                 != CODE_FOR_nothing)))
4647                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4648                                                 int_mode, op0,
4649                                                 gen_int_mode (abs_d,
4650                                                               int_mode),
4651                                                 NULL_RTX, 0);
4652                     else
4653                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4654
4655                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4656                        negate the quotient.  */
4657                     if (d < 0)
4658                       {
4659                         insn = get_last_insn ();
4660                         if (insn != last
4661                             && abs_d < (HOST_WIDE_INT_1U
4662                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4663                           set_dst_reg_note (insn, REG_EQUAL,
4664                                             gen_rtx_DIV (int_mode, op0,
4665                                                          gen_int_mode
4666                                                            (abs_d,
4667                                                             int_mode)),
4668                                             quotient);
4669
4670                         quotient = expand_unop (int_mode, neg_optab,
4671                                                 quotient, quotient, 0);
4672                       }
4673                   }
4674                 else if (size <= HOST_BITS_PER_WIDE_INT)
4675                   {
4676                     choose_multiplier (abs_d, size, size - 1,
4677                                        &ml, &post_shift);
4678                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4679                       {
4680                         rtx t1, t2, t3;
4681
4682                         if (post_shift >= BITS_PER_WORD
4683                             || size - 1 >= BITS_PER_WORD)
4684                           goto fail1;
4685
4686                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4687                                       + shift_cost (speed, int_mode, size - 1)
4688                                       + add_cost (speed, int_mode));
4689                         t1 = expmed_mult_highpart
4690                           (int_mode, op0, gen_int_mode (ml, int_mode),
4691                            NULL_RTX, 0, max_cost - extra_cost);
4692                         if (t1 == 0)
4693                           goto fail1;
4694                         t2 = expand_shift
4695                           (RSHIFT_EXPR, int_mode, t1,
4696                            post_shift, NULL_RTX, 0);
4697                         t3 = expand_shift
4698                           (RSHIFT_EXPR, int_mode, op0,
4699                            size - 1, NULL_RTX, 0);
4700                         if (d < 0)
4701                           quotient
4702                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4703                                              tquotient);
4704                         else
4705                           quotient
4706                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4707                                              tquotient);
4708                       }
4709                     else
4710                       {
4711                         rtx t1, t2, t3, t4;
4712
4713                         if (post_shift >= BITS_PER_WORD
4714                             || size - 1 >= BITS_PER_WORD)
4715                           goto fail1;
4716
4717                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4718                         mlr = gen_int_mode (ml, int_mode);
4719                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4720                                       + shift_cost (speed, int_mode, size - 1)
4721                                       + 2 * add_cost (speed, int_mode));
4722                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4723                                                    NULL_RTX, 0,
4724                                                    max_cost - extra_cost);
4725                         if (t1 == 0)
4726                           goto fail1;
4727                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4728                                             NULL_RTX);
4729                         t3 = expand_shift
4730                           (RSHIFT_EXPR, int_mode, t2,
4731                            post_shift, NULL_RTX, 0);
4732                         t4 = expand_shift
4733                           (RSHIFT_EXPR, int_mode, op0,
4734                            size - 1, NULL_RTX, 0);
4735                         if (d < 0)
4736                           quotient
4737                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4738                                              tquotient);
4739                         else
4740                           quotient
4741                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4742                                              tquotient);
4743                       }
4744                   }
4745                 else            /* Too wide mode to use tricky code */
4746                   break;
4747
4748                 insn = get_last_insn ();
4749                 if (insn != last)
4750                   set_dst_reg_note (insn, REG_EQUAL,
4751                                     gen_rtx_DIV (int_mode, op0, op1),
4752                                     quotient);
4753               }
4754             break;
4755           }
4756       fail1:
4757         delete_insns_since (last);
4758         break;
4759
4760       case FLOOR_DIV_EXPR:
4761       case FLOOR_MOD_EXPR:
4762       /* We will come here only for signed operations.  */
4763         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4764           {
4765             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4766             int size = GET_MODE_BITSIZE (int_mode);
4767             unsigned HOST_WIDE_INT mh, ml;
4768             int pre_shift, post_shift;
4769             HOST_WIDE_INT d = INTVAL (op1);
4770
4771             if (d > 0)
4772               {
4773                 /* We could just as easily deal with negative constants here,
4774                    but it does not seem worth the trouble for GCC 2.6.  */
4775                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4776                   {
4777                     pre_shift = floor_log2 (d);
4778                     if (rem_flag)
4779                       {
4780                         unsigned HOST_WIDE_INT mask
4781                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4782                         remainder = expand_binop
4783                           (int_mode, and_optab, op0,
4784                            gen_int_mode (mask, int_mode),
4785                            remainder, 0, methods);
4786                         if (remainder)
4787                           return gen_lowpart (mode, remainder);
4788                       }
4789                     quotient = expand_shift
4790                       (RSHIFT_EXPR, int_mode, op0,
4791                        pre_shift, tquotient, 0);
4792                   }
4793                 else
4794                   {
4795                     rtx t1, t2, t3, t4;
4796
4797                     mh = choose_multiplier (d, size, size - 1,
4798                                             &ml, &post_shift);
4799                     gcc_assert (!mh);
4800
4801                     if (post_shift < BITS_PER_WORD
4802                         && size - 1 < BITS_PER_WORD)
4803                       {
4804                         t1 = expand_shift
4805                           (RSHIFT_EXPR, int_mode, op0,
4806                            size - 1, NULL_RTX, 0);
4807                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4808                                            NULL_RTX, 0, OPTAB_WIDEN);
4809                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4810                                       + shift_cost (speed, int_mode, size - 1)
4811                                       + 2 * add_cost (speed, int_mode));
4812                         t3 = expmed_mult_highpart
4813                           (int_mode, t2, gen_int_mode (ml, int_mode),
4814                            NULL_RTX, 1, max_cost - extra_cost);
4815                         if (t3 != 0)
4816                           {
4817                             t4 = expand_shift
4818                               (RSHIFT_EXPR, int_mode, t3,
4819                                post_shift, NULL_RTX, 1);
4820                             quotient = expand_binop (int_mode, xor_optab,
4821                                                      t4, t1, tquotient, 0,
4822                                                      OPTAB_WIDEN);
4823                           }
4824                       }
4825                   }
4826               }
4827             else
4828               {
4829                 rtx nsign, t1, t2, t3, t4;
4830                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4831                                                   op0, constm1_rtx), NULL_RTX);
4832                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4833                                    0, OPTAB_WIDEN);
4834                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4835                                       size - 1, NULL_RTX, 0);
4836                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4837                                     NULL_RTX);
4838                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4839                                     NULL_RTX, 0);
4840                 if (t4)
4841                   {
4842                     rtx t5;
4843                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4844                                       NULL_RTX, 0);
4845                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4846                                               tquotient);
4847                   }
4848               }
4849           }
4850
4851         if (quotient != 0)
4852           break;
4853         delete_insns_since (last);
4854
4855         /* Try using an instruction that produces both the quotient and
4856            remainder, using truncation.  We can easily compensate the quotient
4857            or remainder to get floor rounding, once we have the remainder.
4858            Notice that we compute also the final remainder value here,
4859            and return the result right away.  */
4860         if (target == 0 || GET_MODE (target) != compute_mode)
4861           target = gen_reg_rtx (compute_mode);
4862
4863         if (rem_flag)
4864           {
4865             remainder
4866               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4867             quotient = gen_reg_rtx (compute_mode);
4868           }
4869         else
4870           {
4871             quotient
4872               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4873             remainder = gen_reg_rtx (compute_mode);
4874           }
4875
4876         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4877                                  quotient, remainder, 0))
4878           {
4879             /* This could be computed with a branch-less sequence.
4880                Save that for later.  */
4881             rtx tem;
4882             rtx_code_label *label = gen_label_rtx ();
4883             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4884             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4885                                 NULL_RTX, 0, OPTAB_WIDEN);
4886             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4887             expand_dec (quotient, const1_rtx);
4888             expand_inc (remainder, op1);
4889             emit_label (label);
4890             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4891           }
4892
4893         /* No luck with division elimination or divmod.  Have to do it
4894            by conditionally adjusting op0 *and* the result.  */
4895         {
4896           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4897           rtx adjusted_op0;
4898           rtx tem;
4899
4900           quotient = gen_reg_rtx (compute_mode);
4901           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4902           label1 = gen_label_rtx ();
4903           label2 = gen_label_rtx ();
4904           label3 = gen_label_rtx ();
4905           label4 = gen_label_rtx ();
4906           label5 = gen_label_rtx ();
4907           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4908           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4909           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4910                               quotient, 0, methods);
4911           if (tem != quotient)
4912             emit_move_insn (quotient, tem);
4913           emit_jump_insn (targetm.gen_jump (label5));
4914           emit_barrier ();
4915           emit_label (label1);
4916           expand_inc (adjusted_op0, const1_rtx);
4917           emit_jump_insn (targetm.gen_jump (label4));
4918           emit_barrier ();
4919           emit_label (label2);
4920           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4921           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4922                               quotient, 0, methods);
4923           if (tem != quotient)
4924             emit_move_insn (quotient, tem);
4925           emit_jump_insn (targetm.gen_jump (label5));
4926           emit_barrier ();
4927           emit_label (label3);
4928           expand_dec (adjusted_op0, const1_rtx);
4929           emit_label (label4);
4930           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4931                               quotient, 0, methods);
4932           if (tem != quotient)
4933             emit_move_insn (quotient, tem);
4934           expand_dec (quotient, const1_rtx);
4935           emit_label (label5);
4936         }
4937         break;
4938
4939       case CEIL_DIV_EXPR:
4940       case CEIL_MOD_EXPR:
4941         if (unsignedp)
4942           {
4943             if (op1_is_constant
4944                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4945                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4946                     || INTVAL (op1) >= 0))
4947               {
4948                 scalar_int_mode int_mode
4949                   = as_a <scalar_int_mode> (compute_mode);
4950                 rtx t1, t2, t3;
4951                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4952                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4953                                    floor_log2 (d), tquotient, 1);
4954                 t2 = expand_binop (int_mode, and_optab, op0,
4955                                    gen_int_mode (d - 1, int_mode),
4956                                    NULL_RTX, 1, methods);
4957                 t3 = gen_reg_rtx (int_mode);
4958                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4959                 if (t3 == 0)
4960                   {
4961                     rtx_code_label *lab;
4962                     lab = gen_label_rtx ();
4963                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4964                     expand_inc (t1, const1_rtx);
4965                     emit_label (lab);
4966                     quotient = t1;
4967                   }
4968                 else
4969                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4970                                             tquotient);
4971                 break;
4972               }
4973
4974             /* Try using an instruction that produces both the quotient and
4975                remainder, using truncation.  We can easily compensate the
4976                quotient or remainder to get ceiling rounding, once we have the
4977                remainder.  Notice that we compute also the final remainder
4978                value here, and return the result right away.  */
4979             if (target == 0 || GET_MODE (target) != compute_mode)
4980               target = gen_reg_rtx (compute_mode);
4981
4982             if (rem_flag)
4983               {
4984                 remainder = (REG_P (target)
4985                              ? target : gen_reg_rtx (compute_mode));
4986                 quotient = gen_reg_rtx (compute_mode);
4987               }
4988             else
4989               {
4990                 quotient = (REG_P (target)
4991                             ? target : gen_reg_rtx (compute_mode));
4992                 remainder = gen_reg_rtx (compute_mode);
4993               }
4994
4995             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4996                                      remainder, 1))
4997               {
4998                 /* This could be computed with a branch-less sequence.
4999                    Save that for later.  */
5000                 rtx_code_label *label = gen_label_rtx ();
5001                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5002                                  compute_mode, label);
5003                 expand_inc (quotient, const1_rtx);
5004                 expand_dec (remainder, op1);
5005                 emit_label (label);
5006                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5007               }
5008
5009             /* No luck with division elimination or divmod.  Have to do it
5010                by conditionally adjusting op0 *and* the result.  */
5011             {
5012               rtx_code_label *label1, *label2;
5013               rtx adjusted_op0, tem;
5014
5015               quotient = gen_reg_rtx (compute_mode);
5016               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5017               label1 = gen_label_rtx ();
5018               label2 = gen_label_rtx ();
5019               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
5020                                compute_mode, label1);
5021               emit_move_insn  (quotient, const0_rtx);
5022               emit_jump_insn (targetm.gen_jump (label2));
5023               emit_barrier ();
5024               emit_label (label1);
5025               expand_dec (adjusted_op0, const1_rtx);
5026               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
5027                                   quotient, 1, methods);
5028               if (tem != quotient)
5029                 emit_move_insn (quotient, tem);
5030               expand_inc (quotient, const1_rtx);
5031               emit_label (label2);
5032             }
5033           }
5034         else /* signed */
5035           {
5036             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5037                 && INTVAL (op1) >= 0)
5038               {
5039                 /* This is extremely similar to the code for the unsigned case
5040                    above.  For 2.7 we should merge these variants, but for
5041                    2.6.1 I don't want to touch the code for unsigned since that
5042                    get used in C.  The signed case will only be used by other
5043                    languages (Ada).  */
5044
5045                 rtx t1, t2, t3;
5046                 unsigned HOST_WIDE_INT d = INTVAL (op1);
5047                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5048                                    floor_log2 (d), tquotient, 0);
5049                 t2 = expand_binop (compute_mode, and_optab, op0,
5050                                    gen_int_mode (d - 1, compute_mode),
5051                                    NULL_RTX, 1, methods);
5052                 t3 = gen_reg_rtx (compute_mode);
5053                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5054                                       compute_mode, 1, 1);
5055                 if (t3 == 0)
5056                   {
5057                     rtx_code_label *lab;
5058                     lab = gen_label_rtx ();
5059                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5060                     expand_inc (t1, const1_rtx);
5061                     emit_label (lab);
5062                     quotient = t1;
5063                   }
5064                 else
5065                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5066                                                           t1, t3),
5067                                             tquotient);
5068                 break;
5069               }
5070
5071             /* Try using an instruction that produces both the quotient and
5072                remainder, using truncation.  We can easily compensate the
5073                quotient or remainder to get ceiling rounding, once we have the
5074                remainder.  Notice that we compute also the final remainder
5075                value here, and return the result right away.  */
5076             if (target == 0 || GET_MODE (target) != compute_mode)
5077               target = gen_reg_rtx (compute_mode);
5078             if (rem_flag)
5079               {
5080                 remainder= (REG_P (target)
5081                             ? target : gen_reg_rtx (compute_mode));
5082                 quotient = gen_reg_rtx (compute_mode);
5083               }
5084             else
5085               {
5086                 quotient = (REG_P (target)
5087                             ? target : gen_reg_rtx (compute_mode));
5088                 remainder = gen_reg_rtx (compute_mode);
5089               }
5090
5091             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5092                                      remainder, 0))
5093               {
5094                 /* This could be computed with a branch-less sequence.
5095                    Save that for later.  */
5096                 rtx tem;
5097                 rtx_code_label *label = gen_label_rtx ();
5098                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5099                                  compute_mode, label);
5100                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5101                                     NULL_RTX, 0, OPTAB_WIDEN);
5102                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5103                 expand_inc (quotient, const1_rtx);
5104                 expand_dec (remainder, op1);
5105                 emit_label (label);
5106                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5107               }
5108
5109             /* No luck with division elimination or divmod.  Have to do it
5110                by conditionally adjusting op0 *and* the result.  */
5111             {
5112               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5113               rtx adjusted_op0;
5114               rtx tem;
5115
5116               quotient = gen_reg_rtx (compute_mode);
5117               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5118               label1 = gen_label_rtx ();
5119               label2 = gen_label_rtx ();
5120               label3 = gen_label_rtx ();
5121               label4 = gen_label_rtx ();
5122               label5 = gen_label_rtx ();
5123               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5124               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5125                                compute_mode, label1);
5126               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5127                                   quotient, 0, methods);
5128               if (tem != quotient)
5129                 emit_move_insn (quotient, tem);
5130               emit_jump_insn (targetm.gen_jump (label5));
5131               emit_barrier ();
5132               emit_label (label1);
5133               expand_dec (adjusted_op0, const1_rtx);
5134               emit_jump_insn (targetm.gen_jump (label4));
5135               emit_barrier ();
5136               emit_label (label2);
5137               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5138                                compute_mode, label3);
5139               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5140                                   quotient, 0, methods);
5141               if (tem != quotient)
5142                 emit_move_insn (quotient, tem);
5143               emit_jump_insn (targetm.gen_jump (label5));
5144               emit_barrier ();
5145               emit_label (label3);
5146               expand_inc (adjusted_op0, const1_rtx);
5147               emit_label (label4);
5148               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5149                                   quotient, 0, methods);
5150               if (tem != quotient)
5151                 emit_move_insn (quotient, tem);
5152               expand_inc (quotient, const1_rtx);
5153               emit_label (label5);
5154             }
5155           }
5156         break;
5157
5158       case EXACT_DIV_EXPR:
5159         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5160           {
5161             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5162             int size = GET_MODE_BITSIZE (int_mode);
5163             HOST_WIDE_INT d = INTVAL (op1);
5164             unsigned HOST_WIDE_INT ml;
5165             int pre_shift;
5166             rtx t1;
5167
5168             pre_shift = ctz_or_zero (d);
5169             ml = invert_mod2n (d >> pre_shift, size);
5170             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5171                                pre_shift, NULL_RTX, unsignedp);
5172             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5173                                     NULL_RTX, 1);
5174
5175             insn = get_last_insn ();
5176             set_dst_reg_note (insn, REG_EQUAL,
5177                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5178                                               int_mode, op0, op1),
5179                               quotient);
5180           }
5181         break;
5182
5183       case ROUND_DIV_EXPR:
5184       case ROUND_MOD_EXPR:
5185         if (unsignedp)
5186           {
5187             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5188             rtx tem;
5189             rtx_code_label *label;
5190             label = gen_label_rtx ();
5191             quotient = gen_reg_rtx (int_mode);
5192             remainder = gen_reg_rtx (int_mode);
5193             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5194               {
5195                 rtx tem;
5196                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5197                                          quotient, 1, methods);
5198                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5199                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5200                                           remainder, 1, methods);
5201               }
5202             tem = plus_constant (int_mode, op1, -1);
5203             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5204             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5205             expand_inc (quotient, const1_rtx);
5206             expand_dec (remainder, op1);
5207             emit_label (label);
5208           }
5209         else
5210           {
5211             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5212             int size = GET_MODE_BITSIZE (int_mode);
5213             rtx abs_rem, abs_op1, tem, mask;
5214             rtx_code_label *label;
5215             label = gen_label_rtx ();
5216             quotient = gen_reg_rtx (int_mode);
5217             remainder = gen_reg_rtx (int_mode);
5218             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5219               {
5220                 rtx tem;
5221                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5222                                          quotient, 0, methods);
5223                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5224                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5225                                           remainder, 0, methods);
5226               }
5227             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5228             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5229             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5230                                 1, NULL_RTX, 1);
5231             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5232             tem = expand_binop (int_mode, xor_optab, op0, op1,
5233                                 NULL_RTX, 0, OPTAB_WIDEN);
5234             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5235                                  size - 1, NULL_RTX, 0);
5236             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5237                                 NULL_RTX, 0, OPTAB_WIDEN);
5238             tem = expand_binop (int_mode, sub_optab, tem, mask,
5239                                 NULL_RTX, 0, OPTAB_WIDEN);
5240             expand_inc (quotient, tem);
5241             tem = expand_binop (int_mode, xor_optab, mask, op1,
5242                                 NULL_RTX, 0, OPTAB_WIDEN);
5243             tem = expand_binop (int_mode, sub_optab, tem, mask,
5244                                 NULL_RTX, 0, OPTAB_WIDEN);
5245             expand_dec (remainder, tem);
5246             emit_label (label);
5247           }
5248         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5249
5250       default:
5251         gcc_unreachable ();
5252       }
5253
5254   if (quotient == 0)
5255     {
5256       if (target && GET_MODE (target) != compute_mode)
5257         target = 0;
5258
5259       if (rem_flag)
5260         {
5261           /* Try to produce the remainder without producing the quotient.
5262              If we seem to have a divmod pattern that does not require widening,
5263              don't try widening here.  We should really have a WIDEN argument
5264              to expand_twoval_binop, since what we'd really like to do here is
5265              1) try a mod insn in compute_mode
5266              2) try a divmod insn in compute_mode
5267              3) try a div insn in compute_mode and multiply-subtract to get
5268                 remainder
5269              4) try the same things with widening allowed.  */
5270           remainder
5271             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5272                                  op0, op1, target,
5273                                  unsignedp,
5274                                  ((optab_handler (optab2, compute_mode)
5275                                    != CODE_FOR_nothing)
5276                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5277           if (remainder == 0)
5278             {
5279               /* No luck there.  Can we do remainder and divide at once
5280                  without a library call?  */
5281               remainder = gen_reg_rtx (compute_mode);
5282               if (! expand_twoval_binop ((unsignedp
5283                                           ? udivmod_optab
5284                                           : sdivmod_optab),
5285                                          op0, op1,
5286                                          NULL_RTX, remainder, unsignedp))
5287                 remainder = 0;
5288             }
5289
5290           if (remainder)
5291             return gen_lowpart (mode, remainder);
5292         }
5293
5294       /* Produce the quotient.  Try a quotient insn, but not a library call.
5295          If we have a divmod in this mode, use it in preference to widening
5296          the div (for this test we assume it will not fail). Note that optab2
5297          is set to the one of the two optabs that the call below will use.  */
5298       quotient
5299         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5300                              op0, op1, rem_flag ? NULL_RTX : target,
5301                              unsignedp,
5302                              ((optab_handler (optab2, compute_mode)
5303                                != CODE_FOR_nothing)
5304                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5305
5306       if (quotient == 0)
5307         {
5308           /* No luck there.  Try a quotient-and-remainder insn,
5309              keeping the quotient alone.  */
5310           quotient = gen_reg_rtx (compute_mode);
5311           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5312                                      op0, op1,
5313                                      quotient, NULL_RTX, unsignedp))
5314             {
5315               quotient = 0;
5316               if (! rem_flag)
5317                 /* Still no luck.  If we are not computing the remainder,
5318                    use a library call for the quotient.  */
5319                 quotient = sign_expand_binop (compute_mode,
5320                                               udiv_optab, sdiv_optab,
5321                                               op0, op1, target,
5322                                               unsignedp, methods);
5323             }
5324         }
5325     }
5326
5327   if (rem_flag)
5328     {
5329       if (target && GET_MODE (target) != compute_mode)
5330         target = 0;
5331
5332       if (quotient == 0)
5333         {
5334           /* No divide instruction either.  Use library for remainder.  */
5335           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5336                                          op0, op1, target,
5337                                          unsignedp, methods);
5338           /* No remainder function.  Try a quotient-and-remainder
5339              function, keeping the remainder.  */
5340           if (!remainder
5341               && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5342             {
5343               remainder = gen_reg_rtx (compute_mode);
5344               if (!expand_twoval_binop_libfunc
5345                   (unsignedp ? udivmod_optab : sdivmod_optab,
5346                    op0, op1,
5347                    NULL_RTX, remainder,
5348                    unsignedp ? UMOD : MOD))
5349                 remainder = NULL_RTX;
5350             }
5351         }
5352       else
5353         {
5354           /* We divided.  Now finish doing X - Y * (X / Y).  */
5355           remainder = expand_mult (compute_mode, quotient, op1,
5356                                    NULL_RTX, unsignedp);
5357           remainder = expand_binop (compute_mode, sub_optab, op0,
5358                                     remainder, target, unsignedp,
5359                                     methods);
5360         }
5361     }
5362
5363   if (methods != OPTAB_LIB_WIDEN
5364       && (rem_flag ? remainder : quotient) == NULL_RTX)
5365     return NULL_RTX;
5366
5367   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5368 }
5369 \f
5370 /* Return a tree node with data type TYPE, describing the value of X.
5371    Usually this is an VAR_DECL, if there is no obvious better choice.
5372    X may be an expression, however we only support those expressions
5373    generated by loop.c.  */
5374
5375 tree
5376 make_tree (tree type, rtx x)
5377 {
5378   tree t;
5379
5380   switch (GET_CODE (x))
5381     {
5382     case CONST_INT:
5383     case CONST_WIDE_INT:
5384       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5385       return t;
5386
5387     case CONST_DOUBLE:
5388       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5389       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5390         t = wide_int_to_tree (type,
5391                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5392                                                     HOST_BITS_PER_WIDE_INT * 2));
5393       else
5394         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5395
5396       return t;
5397
5398     case CONST_VECTOR:
5399       {
5400         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5401         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5402         tree itype = TREE_TYPE (type);
5403
5404         /* Build a tree with vector elements.  */
5405         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5406         unsigned int count = elts.encoded_nelts ();
5407         for (unsigned int i = 0; i < count; ++i)
5408           {
5409             rtx elt = CONST_VECTOR_ELT (x, i);
5410             elts.quick_push (make_tree (itype, elt));
5411           }
5412
5413         return elts.build ();
5414       }
5415
5416     case PLUS:
5417       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5418                           make_tree (type, XEXP (x, 1)));
5419
5420     case MINUS:
5421       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5422                           make_tree (type, XEXP (x, 1)));
5423
5424     case NEG:
5425       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5426
5427     case MULT:
5428       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5429                           make_tree (type, XEXP (x, 1)));
5430
5431     case ASHIFT:
5432       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5433                           make_tree (type, XEXP (x, 1)));
5434
5435     case LSHIFTRT:
5436       t = unsigned_type_for (type);
5437       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5438                                          make_tree (t, XEXP (x, 0)),
5439                                          make_tree (type, XEXP (x, 1))));
5440
5441     case ASHIFTRT:
5442       t = signed_type_for (type);
5443       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5444                                          make_tree (t, XEXP (x, 0)),
5445                                          make_tree (type, XEXP (x, 1))));
5446
5447     case DIV:
5448       if (TREE_CODE (type) != REAL_TYPE)
5449         t = signed_type_for (type);
5450       else
5451         t = type;
5452
5453       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5454                                          make_tree (t, XEXP (x, 0)),
5455                                          make_tree (t, XEXP (x, 1))));
5456     case UDIV:
5457       t = unsigned_type_for (type);
5458       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5459                                          make_tree (t, XEXP (x, 0)),
5460                                          make_tree (t, XEXP (x, 1))));
5461
5462     case SIGN_EXTEND:
5463     case ZERO_EXTEND:
5464       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5465                                           GET_CODE (x) == ZERO_EXTEND);
5466       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5467
5468     case CONST:
5469       return make_tree (type, XEXP (x, 0));
5470
5471     case SYMBOL_REF:
5472       t = SYMBOL_REF_DECL (x);
5473       if (t)
5474         return fold_convert (type, build_fold_addr_expr (t));
5475       /* fall through.  */
5476
5477     default:
5478       if (CONST_POLY_INT_P (x))
5479         return wide_int_to_tree (t, const_poly_int_value (x));
5480
5481       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5482
5483       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5484          address mode to pointer mode.  */
5485       if (POINTER_TYPE_P (type))
5486         x = convert_memory_address_addr_space
5487           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5488
5489       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5490          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5491       t->decl_with_rtl.rtl = x;
5492
5493       return t;
5494     }
5495 }
5496 \f
5497 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5498    and returning TARGET.
5499
5500    If TARGET is 0, a pseudo-register or constant is returned.  */
5501
5502 rtx
5503 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5504 {
5505   rtx tem = 0;
5506
5507   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5508     tem = simplify_binary_operation (AND, mode, op0, op1);
5509   if (tem == 0)
5510     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5511
5512   if (target == 0)
5513     target = tem;
5514   else if (tem != target)
5515     emit_move_insn (target, tem);
5516   return target;
5517 }
5518
5519 /* Helper function for emit_store_flag.  */
5520 rtx
5521 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5522              machine_mode mode, machine_mode compare_mode,
5523              int unsignedp, rtx x, rtx y, int normalizep,
5524              machine_mode target_mode)
5525 {
5526   class expand_operand ops[4];
5527   rtx op0, comparison, subtarget;
5528   rtx_insn *last;
5529   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5530   scalar_int_mode int_target_mode;
5531
5532   last = get_last_insn ();
5533   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5534   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5535   if (!x || !y)
5536     {
5537       delete_insns_since (last);
5538       return NULL_RTX;
5539     }
5540
5541   if (target_mode == VOIDmode)
5542     int_target_mode = result_mode;
5543   else
5544     int_target_mode = as_a <scalar_int_mode> (target_mode);
5545   if (!target)
5546     target = gen_reg_rtx (int_target_mode);
5547
5548   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5549
5550   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5551   create_fixed_operand (&ops[1], comparison);
5552   create_fixed_operand (&ops[2], x);
5553   create_fixed_operand (&ops[3], y);
5554   if (!maybe_expand_insn (icode, 4, ops))
5555     {
5556       delete_insns_since (last);
5557       return NULL_RTX;
5558     }
5559   subtarget = ops[0].value;
5560
5561   /* If we are converting to a wider mode, first convert to
5562      INT_TARGET_MODE, then normalize.  This produces better combining
5563      opportunities on machines that have a SIGN_EXTRACT when we are
5564      testing a single bit.  This mostly benefits the 68k.
5565
5566      If STORE_FLAG_VALUE does not have the sign bit set when
5567      interpreted in MODE, we can do this conversion as unsigned, which
5568      is usually more efficient.  */
5569   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5570     {
5571       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5572                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5573
5574       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5575       convert_move (target, subtarget, unsignedp);
5576
5577       op0 = target;
5578       result_mode = int_target_mode;
5579     }
5580   else
5581     op0 = subtarget;
5582
5583   /* If we want to keep subexpressions around, don't reuse our last
5584      target.  */
5585   if (optimize)
5586     subtarget = 0;
5587
5588   /* Now normalize to the proper value in MODE.  Sometimes we don't
5589      have to do anything.  */
5590   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5591     ;
5592   /* STORE_FLAG_VALUE might be the most negative number, so write
5593      the comparison this way to avoid a compiler-time warning.  */
5594   else if (- normalizep == STORE_FLAG_VALUE)
5595     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5596
5597   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5598      it hard to use a value of just the sign bit due to ANSI integer
5599      constant typing rules.  */
5600   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5601     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5602                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5603                         normalizep == 1);
5604   else
5605     {
5606       gcc_assert (STORE_FLAG_VALUE & 1);
5607
5608       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5609       if (normalizep == -1)
5610         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5611     }
5612
5613   /* If we were converting to a smaller mode, do the conversion now.  */
5614   if (int_target_mode != result_mode)
5615     {
5616       convert_move (target, op0, 0);
5617       return target;
5618     }
5619   else
5620     return op0;
5621 }
5622
5623
5624 /* A subroutine of emit_store_flag only including "tricks" that do not
5625    need a recursive call.  These are kept separate to avoid infinite
5626    loops.  */
5627
5628 static rtx
5629 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5630                    machine_mode mode, int unsignedp, int normalizep,
5631                    machine_mode target_mode)
5632 {
5633   rtx subtarget;
5634   enum insn_code icode;
5635   machine_mode compare_mode;
5636   enum mode_class mclass;
5637   enum rtx_code scode;
5638
5639   if (unsignedp)
5640     code = unsigned_condition (code);
5641   scode = swap_condition (code);
5642
5643   /* If one operand is constant, make it the second one.  Only do this
5644      if the other operand is not constant as well.  */
5645
5646   if (swap_commutative_operands_p (op0, op1))
5647     {
5648       std::swap (op0, op1);
5649       code = swap_condition (code);
5650     }
5651
5652   if (mode == VOIDmode)
5653     mode = GET_MODE (op0);
5654
5655   if (CONST_SCALAR_INT_P (op1))
5656     canonicalize_comparison (mode, &code, &op1);
5657
5658   /* For some comparisons with 1 and -1, we can convert this to
5659      comparisons with zero.  This will often produce more opportunities for
5660      store-flag insns.  */
5661
5662   switch (code)
5663     {
5664     case LT:
5665       if (op1 == const1_rtx)
5666         op1 = const0_rtx, code = LE;
5667       break;
5668     case LE:
5669       if (op1 == constm1_rtx)
5670         op1 = const0_rtx, code = LT;
5671       break;
5672     case GE:
5673       if (op1 == const1_rtx)
5674         op1 = const0_rtx, code = GT;
5675       break;
5676     case GT:
5677       if (op1 == constm1_rtx)
5678         op1 = const0_rtx, code = GE;
5679       break;
5680     case GEU:
5681       if (op1 == const1_rtx)
5682         op1 = const0_rtx, code = NE;
5683       break;
5684     case LTU:
5685       if (op1 == const1_rtx)
5686         op1 = const0_rtx, code = EQ;
5687       break;
5688     default:
5689       break;
5690     }
5691
5692   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5693      complement of A (for GE) and shifting the sign bit to the low bit.  */
5694   scalar_int_mode int_mode;
5695   if (op1 == const0_rtx && (code == LT || code == GE)
5696       && is_int_mode (mode, &int_mode)
5697       && (normalizep || STORE_FLAG_VALUE == 1
5698           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5699     {
5700       scalar_int_mode int_target_mode;
5701       subtarget = target;
5702
5703       if (!target)
5704         int_target_mode = int_mode;
5705       else
5706         {
5707           /* If the result is to be wider than OP0, it is best to convert it
5708              first.  If it is to be narrower, it is *incorrect* to convert it
5709              first.  */
5710           int_target_mode = as_a <scalar_int_mode> (target_mode);
5711           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5712             {
5713               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5714               int_mode = int_target_mode;
5715             }
5716         }
5717
5718       if (int_target_mode != int_mode)
5719         subtarget = 0;
5720
5721       if (code == GE)
5722         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5723                            ((STORE_FLAG_VALUE == 1 || normalizep)
5724                             ? 0 : subtarget), 0);
5725
5726       if (STORE_FLAG_VALUE == 1 || normalizep)
5727         /* If we are supposed to produce a 0/1 value, we want to do
5728            a logical shift from the sign bit to the low-order bit; for
5729            a -1/0 value, we do an arithmetic shift.  */
5730         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5731                             GET_MODE_BITSIZE (int_mode) - 1,
5732                             subtarget, normalizep != -1);
5733
5734       if (int_mode != int_target_mode)
5735         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5736
5737       return op0;
5738     }
5739
5740   /* Next try expanding this via the backend's cstore<mode>4.  */
5741   mclass = GET_MODE_CLASS (mode);
5742   FOR_EACH_WIDER_MODE_FROM (compare_mode, mode)
5743     {
5744      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5745      icode = optab_handler (cstore_optab, optab_mode);
5746      if (icode != CODE_FOR_nothing)
5747         {
5748           do_pending_stack_adjust ();
5749           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5750                                  unsignedp, op0, op1, normalizep, target_mode);
5751           if (tem)
5752             return tem;
5753
5754           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5755             {
5756               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5757                                  unsignedp, op1, op0, normalizep, target_mode);
5758               if (tem)
5759                 return tem;
5760             }
5761           break;
5762         }
5763     }
5764
5765   /* If we are comparing a double-word integer with zero or -1, we can
5766      convert the comparison into one involving a single word.  */
5767   if (is_int_mode (mode, &int_mode)
5768       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5769       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5770     {
5771       rtx tem;
5772       if ((code == EQ || code == NE)
5773           && (op1 == const0_rtx || op1 == constm1_rtx))
5774         {
5775           rtx op00, op01;
5776
5777           /* Do a logical OR or AND of the two words and compare the
5778              result.  */
5779           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5780           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5781           tem = expand_binop (word_mode,
5782                               op1 == const0_rtx ? ior_optab : and_optab,
5783                               op00, op01, NULL_RTX, unsignedp,
5784                               OPTAB_DIRECT);
5785
5786           if (tem != 0)
5787             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5788                                    unsignedp, normalizep);
5789         }
5790       else if ((code == LT || code == GE) && op1 == const0_rtx)
5791         {
5792           rtx op0h;
5793
5794           /* If testing the sign bit, can just test on high word.  */
5795           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5796                                       subreg_highpart_offset (word_mode,
5797                                                               int_mode));
5798           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5799                                  unsignedp, normalizep);
5800         }
5801       else
5802         tem = NULL_RTX;
5803
5804       if (tem)
5805         {
5806           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5807             return tem;
5808           if (!target)
5809             target = gen_reg_rtx (target_mode);
5810
5811           convert_move (target, tem,
5812                         !val_signbit_known_set_p (word_mode,
5813                                                   (normalizep ? normalizep
5814                                                    : STORE_FLAG_VALUE)));
5815           return target;
5816         }
5817     }
5818
5819   return 0;
5820 }
5821
5822 /* Subroutine of emit_store_flag that handles cases in which the operands
5823    are scalar integers.  SUBTARGET is the target to use for temporary
5824    operations and TRUEVAL is the value to store when the condition is
5825    true.  All other arguments are as for emit_store_flag.  */
5826
5827 rtx
5828 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5829                      rtx op1, scalar_int_mode mode, int unsignedp,
5830                      int normalizep, rtx trueval)
5831 {
5832   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5833   rtx_insn *last = get_last_insn ();
5834
5835   /* If this is an equality comparison of integers, we can try to exclusive-or
5836      (or subtract) the two operands and use a recursive call to try the
5837      comparison with zero.  Don't do any of these cases if branches are
5838      very cheap.  */
5839
5840   if ((code == EQ || code == NE) && op1 != const0_rtx)
5841     {
5842       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5843                               OPTAB_WIDEN);
5844
5845       if (tem == 0)
5846         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5847                             OPTAB_WIDEN);
5848       if (tem != 0)
5849         tem = emit_store_flag (target, code, tem, const0_rtx,
5850                                mode, unsignedp, normalizep);
5851       if (tem != 0)
5852         return tem;
5853
5854       delete_insns_since (last);
5855     }
5856
5857   /* For integer comparisons, try the reverse comparison.  However, for
5858      small X and if we'd have anyway to extend, implementing "X != 0"
5859      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5860   rtx_code rcode = reverse_condition (code);
5861   if (can_compare_p (rcode, mode, ccp_store_flag)
5862       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5863             && code == NE
5864             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5865             && op1 == const0_rtx))
5866     {
5867       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5868                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5869
5870       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5871       if (want_add
5872           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5873                        optimize_insn_for_speed_p ()) == 0)
5874         {
5875           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5876                                        STORE_FLAG_VALUE, target_mode);
5877           if (tem != 0)
5878             tem = expand_binop (target_mode, add_optab, tem,
5879                                 gen_int_mode (normalizep, target_mode),
5880                                 target, 0, OPTAB_WIDEN);
5881           if (tem != 0)
5882             return tem;
5883         }
5884       else if (!want_add
5885                && rtx_cost (trueval, mode, XOR, 1,
5886                             optimize_insn_for_speed_p ()) == 0)
5887         {
5888           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5889                                        normalizep, target_mode);
5890           if (tem != 0)
5891             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5892                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5893           if (tem != 0)
5894             return tem;
5895         }
5896
5897       delete_insns_since (last);
5898     }
5899
5900   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5901      the constant zero.  Reject all other comparisons at this point.  Only
5902      do LE and GT if branches are expensive since they are expensive on
5903      2-operand machines.  */
5904
5905   if (op1 != const0_rtx
5906       || (code != EQ && code != NE
5907           && (BRANCH_COST (optimize_insn_for_speed_p (),
5908                            false) <= 1 || (code != LE && code != GT))))
5909     return 0;
5910
5911   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5912      do the necessary operation below.  */
5913
5914   rtx tem = 0;
5915
5916   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5917      the sign bit set.  */
5918
5919   if (code == LE)
5920     {
5921       /* This is destructive, so SUBTARGET can't be OP0.  */
5922       if (rtx_equal_p (subtarget, op0))
5923         subtarget = 0;
5924
5925       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5926                           OPTAB_WIDEN);
5927       if (tem)
5928         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5929                             OPTAB_WIDEN);
5930     }
5931
5932   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5933      number of bits in the mode of OP0, minus one.  */
5934
5935   if (code == GT)
5936     {
5937       if (rtx_equal_p (subtarget, op0))
5938         subtarget = 0;
5939
5940       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5941                                 GET_MODE_BITSIZE (mode) - 1,
5942                                 subtarget, 0);
5943       if (tem)
5944         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5945                             OPTAB_WIDEN);
5946     }
5947
5948   if (code == EQ || code == NE)
5949     {
5950       /* For EQ or NE, one way to do the comparison is to apply an operation
5951          that converts the operand into a positive number if it is nonzero
5952          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5953          for NE we negate.  This puts the result in the sign bit.  Then we
5954          normalize with a shift, if needed.
5955
5956          Two operations that can do the above actions are ABS and FFS, so try
5957          them.  If that doesn't work, and MODE is smaller than a full word,
5958          we can use zero-extension to the wider mode (an unsigned conversion)
5959          as the operation.  */
5960
5961       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5962          that is compensated by the subsequent overflow when subtracting
5963          one / negating.  */
5964
5965       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5966         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5967       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5968         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5969       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5970         {
5971           tem = convert_modes (word_mode, mode, op0, 1);
5972           mode = word_mode;
5973         }
5974
5975       if (tem != 0)
5976         {
5977           if (code == EQ)
5978             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5979                                 0, OPTAB_WIDEN);
5980           else
5981             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5982         }
5983
5984       /* If we couldn't do it that way, for NE we can "or" the two's complement
5985          of the value with itself.  For EQ, we take the one's complement of
5986          that "or", which is an extra insn, so we only handle EQ if branches
5987          are expensive.  */
5988
5989       if (tem == 0
5990           && (code == NE
5991               || BRANCH_COST (optimize_insn_for_speed_p (),
5992                               false) > 1))
5993         {
5994           if (rtx_equal_p (subtarget, op0))
5995             subtarget = 0;
5996
5997           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5998           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5999                               OPTAB_WIDEN);
6000
6001           if (tem && code == EQ)
6002             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
6003         }
6004     }
6005
6006   if (tem && normalizep)
6007     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
6008                               GET_MODE_BITSIZE (mode) - 1,
6009                               subtarget, normalizep == 1);
6010
6011   if (tem)
6012     {
6013       if (!target)
6014         ;
6015       else if (GET_MODE (tem) != target_mode)
6016         {
6017           convert_move (target, tem, 0);
6018           tem = target;
6019         }
6020       else if (!subtarget)
6021         {
6022           emit_move_insn (target, tem);
6023           tem = target;
6024         }
6025     }
6026   else
6027     delete_insns_since (last);
6028
6029   return tem;
6030 }
6031
6032 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
6033    and storing in TARGET.  Normally return TARGET.
6034    Return 0 if that cannot be done.
6035
6036    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
6037    it is VOIDmode, they cannot both be CONST_INT.
6038
6039    UNSIGNEDP is for the case where we have to widen the operands
6040    to perform the operation.  It says to use zero-extension.
6041
6042    NORMALIZEP is 1 if we should convert the result to be either zero
6043    or one.  Normalize is -1 if we should convert the result to be
6044    either zero or -1.  If NORMALIZEP is zero, the result will be left
6045    "raw" out of the scc insn.  */
6046
6047 rtx
6048 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6049                  machine_mode mode, int unsignedp, int normalizep)
6050 {
6051   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6052   enum rtx_code rcode;
6053   rtx subtarget;
6054   rtx tem, trueval;
6055   rtx_insn *last;
6056
6057   /* If we compare constants, we shouldn't use a store-flag operation,
6058      but a constant load.  We can get there via the vanilla route that
6059      usually generates a compare-branch sequence, but will in this case
6060      fold the comparison to a constant, and thus elide the branch.  */
6061   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6062     return NULL_RTX;
6063
6064   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6065                            target_mode);
6066   if (tem)
6067     return tem;
6068
6069   /* If we reached here, we can't do this with a scc insn, however there
6070      are some comparisons that can be done in other ways.  Don't do any
6071      of these cases if branches are very cheap.  */
6072   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6073     return 0;
6074
6075   /* See what we need to return.  We can only return a 1, -1, or the
6076      sign bit.  */
6077
6078   if (normalizep == 0)
6079     {
6080       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6081         normalizep = STORE_FLAG_VALUE;
6082
6083       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6084         ;
6085       else
6086         return 0;
6087     }
6088
6089   last = get_last_insn ();
6090
6091   /* If optimizing, use different pseudo registers for each insn, instead
6092      of reusing the same pseudo.  This leads to better CSE, but slows
6093      down the compiler, since there are more pseudos.  */
6094   subtarget = (!optimize
6095                && (target_mode == mode)) ? target : NULL_RTX;
6096   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6097
6098   /* For floating-point comparisons, try the reverse comparison or try
6099      changing the "orderedness" of the comparison.  */
6100   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6101     {
6102       enum rtx_code first_code;
6103       bool and_them;
6104
6105       rcode = reverse_condition_maybe_unordered (code);
6106       if (can_compare_p (rcode, mode, ccp_store_flag)
6107           && (code == ORDERED || code == UNORDERED
6108               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6109               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6110         {
6111           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6112                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6113
6114           /* For the reverse comparison, use either an addition or a XOR.  */
6115           if (want_add
6116               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6117                            optimize_insn_for_speed_p ()) == 0)
6118             {
6119               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6120                                        STORE_FLAG_VALUE, target_mode);
6121               if (tem)
6122                 return expand_binop (target_mode, add_optab, tem,
6123                                      gen_int_mode (normalizep, target_mode),
6124                                      target, 0, OPTAB_WIDEN);
6125             }
6126           else if (!want_add
6127                    && rtx_cost (trueval, mode, XOR, 1,
6128                                 optimize_insn_for_speed_p ()) == 0)
6129             {
6130               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6131                                        normalizep, target_mode);
6132               if (tem)
6133                 return expand_binop (target_mode, xor_optab, tem, trueval,
6134                                      target, INTVAL (trueval) >= 0,
6135                                      OPTAB_WIDEN);
6136             }
6137         }
6138
6139       delete_insns_since (last);
6140
6141       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6142       if (code == ORDERED || code == UNORDERED)
6143         return 0;
6144
6145       and_them = split_comparison (code, mode, &first_code, &code);
6146
6147       /* If there are no NaNs, the first comparison should always fall through.
6148          Effectively change the comparison to the other one.  */
6149       if (!HONOR_NANS (mode))
6150         {
6151           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6152           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6153                                     target_mode);
6154         }
6155
6156       if (!HAVE_conditional_move)
6157         return 0;
6158
6159       /* Do not turn a trapping comparison into a non-trapping one.  */
6160       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6161           && flag_trapping_math)
6162         return 0;
6163
6164       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6165          conditional move.  */
6166       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6167                                normalizep, target_mode);
6168       if (tem == 0)
6169         return 0;
6170
6171       if (and_them)
6172         tem = emit_conditional_move (target, { code, op0, op1, mode },
6173                                      tem, const0_rtx, GET_MODE (tem), 0);
6174       else
6175         tem = emit_conditional_move (target, { code, op0, op1, mode },
6176                                      trueval, tem, GET_MODE (tem), 0);
6177
6178       if (tem == 0)
6179         delete_insns_since (last);
6180       return tem;
6181     }
6182
6183   /* The remaining tricks only apply to integer comparisons.  */
6184
6185   scalar_int_mode int_mode;
6186   if (is_int_mode (mode, &int_mode))
6187     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6188                                 unsignedp, normalizep, trueval);
6189
6190   return 0;
6191 }
6192
6193 /* Like emit_store_flag, but always succeeds.  */
6194
6195 rtx
6196 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6197                        machine_mode mode, int unsignedp, int normalizep)
6198 {
6199   rtx tem;
6200   rtx_code_label *label;
6201   rtx trueval, falseval;
6202
6203   /* First see if emit_store_flag can do the job.  */
6204   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6205   if (tem != 0)
6206     return tem;
6207
6208   /* If one operand is constant, make it the second one.  Only do this
6209      if the other operand is not constant as well.  */
6210   if (swap_commutative_operands_p (op0, op1))
6211     {
6212       std::swap (op0, op1);
6213       code = swap_condition (code);
6214     }
6215
6216   if (mode == VOIDmode)
6217     mode = GET_MODE (op0);
6218
6219   if (!target)
6220     target = gen_reg_rtx (word_mode);
6221
6222   /* If this failed, we have to do this with set/compare/jump/set code.
6223      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6224   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6225   if (code == NE
6226       && GET_MODE_CLASS (mode) == MODE_INT
6227       && REG_P (target)
6228       && op0 == target
6229       && op1 == const0_rtx)
6230     {
6231       label = gen_label_rtx ();
6232       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6233                                NULL_RTX, NULL, label,
6234                                profile_probability::uninitialized ());
6235       emit_move_insn (target, trueval);
6236       emit_label (label);
6237       return target;
6238     }
6239
6240   if (!REG_P (target)
6241       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6242     target = gen_reg_rtx (GET_MODE (target));
6243
6244   /* Jump in the right direction if the target cannot implement CODE
6245      but can jump on its reverse condition.  */
6246   falseval = const0_rtx;
6247   if (! can_compare_p (code, mode, ccp_jump)
6248       && (! FLOAT_MODE_P (mode)
6249           || code == ORDERED || code == UNORDERED
6250           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6251           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6252     {
6253       enum rtx_code rcode;
6254       if (FLOAT_MODE_P (mode))
6255         rcode = reverse_condition_maybe_unordered (code);
6256       else
6257         rcode = reverse_condition (code);
6258
6259       /* Canonicalize to UNORDERED for the libcall.  */
6260       if (can_compare_p (rcode, mode, ccp_jump)
6261           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6262         {
6263           falseval = trueval;
6264           trueval = const0_rtx;
6265           code = rcode;
6266         }
6267     }
6268
6269   emit_move_insn (target, trueval);
6270   label = gen_label_rtx ();
6271   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6272                            label, profile_probability::uninitialized ());
6273
6274   emit_move_insn (target, falseval);
6275   emit_label (label);
6276
6277   return target;
6278 }
6279
6280 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6281    and exclusive ranges in order to create an equivalent comparison.  See
6282    canonicalize_cmp_for_target for the possible cases.  */
6283
6284 static enum rtx_code
6285 equivalent_cmp_code (enum rtx_code code)
6286 {
6287   switch (code)
6288     {
6289     case GT:
6290       return GE;
6291     case GE:
6292       return GT;
6293     case LT:
6294       return LE;
6295     case LE:
6296       return LT;
6297     case GTU:
6298       return GEU;
6299     case GEU:
6300       return GTU;
6301     case LTU:
6302       return LEU;
6303     case LEU:
6304       return LTU;
6305
6306     default:
6307       return code;
6308     }
6309 }
6310
6311 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6312    purpose of this is to end up with an immediate which can be loaded into a
6313    register in fewer moves, if possible.
6314
6315    For each integer comparison there exists an equivalent choice:
6316      i)   a >  b or a >= b + 1
6317      ii)  a <= b or a <  b + 1
6318      iii) a >= b or a >  b - 1
6319      iv)  a <  b or a <= b - 1
6320
6321    MODE is the mode of the first operand.
6322    CODE points to the comparison code.
6323    IMM points to the rtx containing the immediate.  *IMM must satisfy
6324    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6325    on exit.  */
6326
6327 void
6328 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6329 {
6330   if (!SCALAR_INT_MODE_P (mode))
6331     return;
6332
6333   int to_add = 0;
6334   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6335
6336   /* Extract the immediate value from the rtx.  */
6337   wide_int imm_val = rtx_mode_t (*imm, mode);
6338
6339   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6340     to_add = 1;
6341   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6342     to_add = -1;
6343   else
6344     return;
6345
6346   /* Check for overflow/underflow in the case of signed values and
6347      wrapping around in the case of unsigned values.  If any occur
6348      cancel the optimization.  */
6349   wi::overflow_type overflow = wi::OVF_NONE;
6350   wide_int imm_modif;
6351
6352   if (to_add == 1)
6353     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6354   else
6355     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6356
6357   if (overflow)
6358     return;
6359
6360   /* The following creates a pseudo; if we cannot do that, bail out.  */
6361   if (!can_create_pseudo_p ())
6362     return;
6363
6364   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6365   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6366
6367   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6368   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6369
6370   /* Update the immediate and the code.  */
6371   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6372     {
6373       *code = equivalent_cmp_code (*code);
6374       *imm = new_imm;
6375     }
6376 }
6377
6378
6379 \f
6380 /* Perform possibly multi-word comparison and conditional jump to LABEL
6381    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6382    now a thin wrapper around do_compare_rtx_and_jump.  */
6383
6384 static void
6385 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6386                  rtx_code_label *label)
6387 {
6388   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6389   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6390                            NULL, label, profile_probability::uninitialized ());
6391 }