gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "regs.h"
  35 #include "emit-rtl.h"
  36 #include "diagnostic-core.h"
  37 #include "fold-const.h"
  38 #include "stor-layout.h"
  39 #include "dojump.h"
  40 #include "explow.h"
  41 #include "expr.h"
  42 #include "langhooks.h"
  43 #include "tree-vector-builder.h"
  44
  45 struct target_expmed default_target_expmed;
  46 #if SWITCHABLE_TARGET
  47 struct target_expmed *this_target_expmed = &default_target_expmed;
  48 #endif
  49
  50 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    unsigned HOST_WIDE_INT,
  54                                    unsigned HOST_WIDE_INT,
  55                                    rtx, scalar_int_mode, bool);
  56 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  57                                      unsigned HOST_WIDE_INT,
  58                                      unsigned HOST_WIDE_INT,
  59                                      rtx, scalar_int_mode, bool);
  60 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  61                                    unsigned HOST_WIDE_INT,
  62                                    unsigned HOST_WIDE_INT,
  63                                    unsigned HOST_WIDE_INT,
  64                                    unsigned HOST_WIDE_INT,
  65                                    rtx, scalar_int_mode, bool);
  66 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  67                                     unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  69 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  70                                       unsigned HOST_WIDE_INT,
  71                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  72 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  73 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  74                                     unsigned HOST_WIDE_INT,
  75                                     unsigned HOST_WIDE_INT, int, bool);
  76 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  77 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  78 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  79
  80 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  81    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  82    The mask is truncated if necessary to the width of mode MODE.  The
  83    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  84
  85 static inline rtx
  86 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  87 {
  88   return immed_wide_int_const
  89     (wi::shifted_mask (bitpos, bitsize, complement,
  90                        GET_MODE_PRECISION (mode)), mode);
  91 }
  92
  93 /* Test whether a value is zero of a power of two.  */
  94 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  95   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  96
  97 struct init_expmed_rtl
  98 {
  99   rtx reg;
 100   rtx plus;
 101   rtx neg;
 102   rtx mult;
 103   rtx sdiv;
 104   rtx udiv;
 105   rtx sdiv_32;
 106   rtx smod_32;
 107   rtx wide_mult;
 108   rtx wide_lshr;
 109   rtx wide_trunc;
 110   rtx shift;
 111   rtx shift_mult;
 112   rtx shift_add;
 113   rtx shift_sub0;
 114   rtx shift_sub1;
 115   rtx zext;
 116   rtx trunc;
 117
 118   rtx pow2[MAX_BITS_PER_WORD];
 119   rtx cint[MAX_BITS_PER_WORD];
 120 };
 121
 122 static void
 123 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 124                       scalar_int_mode from_mode, bool speed)
 125 {
 126   int to_size, from_size;
 127   rtx which;
 128
 129   to_size = GET_MODE_PRECISION (to_mode);
 130   from_size = GET_MODE_PRECISION (from_mode);
 131
 132   /* Most partial integers have a precision less than the "full"
 133      integer it requires for storage.  In case one doesn't, for
 134      comparison purposes here, reduce the bit size by one in that
 135      case.  */
 136   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 137       && pow2p_hwi (to_size))
 138     to_size --;
 139   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 140       && pow2p_hwi (from_size))
 141     from_size --;
 142
 143   /* Assume cost of zero-extend and sign-extend is the same.  */
 144   which = (to_size < from_size ? all->trunc : all->zext);
 145
 146   PUT_MODE (all->reg, from_mode);
 147   set_convert_cost (to_mode, from_mode, speed,
 148                     set_src_cost (which, to_mode, speed));
 149 }
 150
 151 static void
 152 init_expmed_one_mode (struct init_expmed_rtl *all,
 153                       machine_mode mode, int speed)
 154 {
 155   int m, n, mode_bitsize;
 156   machine_mode mode_from;
 157
 158   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 159
 160   PUT_MODE (all->reg, mode);
 161   PUT_MODE (all->plus, mode);
 162   PUT_MODE (all->neg, mode);
 163   PUT_MODE (all->mult, mode);
 164   PUT_MODE (all->sdiv, mode);
 165   PUT_MODE (all->udiv, mode);
 166   PUT_MODE (all->sdiv_32, mode);
 167   PUT_MODE (all->smod_32, mode);
 168   PUT_MODE (all->wide_trunc, mode);
 169   PUT_MODE (all->shift, mode);
 170   PUT_MODE (all->shift_mult, mode);
 171   PUT_MODE (all->shift_add, mode);
 172   PUT_MODE (all->shift_sub0, mode);
 173   PUT_MODE (all->shift_sub1, mode);
 174   PUT_MODE (all->zext, mode);
 175   PUT_MODE (all->trunc, mode);
 176
 177   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 178   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 179   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 180   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 181   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 182
 183   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 184                                      <= 2 * add_cost (speed, mode)));
 185   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 186                                      <= 4 * add_cost (speed, mode)));
 187
 188   set_shift_cost (speed, mode, 0, 0);
 189   {
 190     int cost = add_cost (speed, mode);
 191     set_shiftadd_cost (speed, mode, 0, cost);
 192     set_shiftsub0_cost (speed, mode, 0, cost);
 193     set_shiftsub1_cost (speed, mode, 0, cost);
 194   }
 195
 196   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 197   for (m = 1; m < n; m++)
 198     {
 199       XEXP (all->shift, 1) = all->cint[m];
 200       XEXP (all->shift_mult, 1) = all->pow2[m];
 201
 202       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 203       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 204                                                        speed));
 205       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 206                                                         speed));
 207       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 208                                                         speed));
 209     }
 210
 211   scalar_int_mode int_mode_to;
 212   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 213     {
 214       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 215            mode_from = (machine_mode)(mode_from + 1))
 216         init_expmed_one_conv (all, int_mode_to,
 217                               as_a <scalar_int_mode> (mode_from), speed);
 218
 219       scalar_int_mode wider_mode;
 220       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 221           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 222         {
 223           PUT_MODE (all->zext, wider_mode);
 224           PUT_MODE (all->wide_mult, wider_mode);
 225           PUT_MODE (all->wide_lshr, wider_mode);
 226           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 227
 228           set_mul_widen_cost (speed, wider_mode,
 229                               set_src_cost (all->wide_mult, wider_mode, speed));
 230           set_mul_highpart_cost (speed, int_mode_to,
 231                                  set_src_cost (all->wide_trunc,
 232                                                int_mode_to, speed));
 233         }
 234     }
 235 }
 236
 237 void
 238 init_expmed (void)
 239 {
 240   struct init_expmed_rtl all;
 241   machine_mode mode = QImode;
 242   int m, speed;
 243
 244   memset (&all, 0, sizeof all);
 245   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 246     {
 247       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 248       all.cint[m] = GEN_INT (m);
 249     }
 250
 251   /* Avoid using hard regs in ways which may be unsupported.  */
 252   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 253   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 254   all.neg = gen_rtx_NEG (mode, all.reg);
 255   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 256   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 257   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 258   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 259   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 260   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 261   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 262   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 263   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 264   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 265   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 266   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 267   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 268   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 269   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 270
 271   for (speed = 0; speed < 2; speed++)
 272     {
 273       crtl->maybe_hot_insn_p = speed;
 274       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 275
 276       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 277            mode = (machine_mode)(mode + 1))
 278         init_expmed_one_mode (&all, mode, speed);
 279
 280       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 281         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 282              mode = (machine_mode)(mode + 1))
 283           init_expmed_one_mode (&all, mode, speed);
 284
 285       if (MIN_MODE_VECTOR_INT != VOIDmode)
 286         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 287              mode = (machine_mode)(mode + 1))
 288           init_expmed_one_mode (&all, mode, speed);
 289     }
 290
 291   if (alg_hash_used_p ())
 292     {
 293       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 294       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 295     }
 296   else
 297     set_alg_hash_used_p (true);
 298   default_rtl_profile ();
 299
 300   ggc_free (all.trunc);
 301   ggc_free (all.shift_sub1);
 302   ggc_free (all.shift_sub0);
 303   ggc_free (all.shift_add);
 304   ggc_free (all.shift_mult);
 305   ggc_free (all.shift);
 306   ggc_free (all.wide_trunc);
 307   ggc_free (all.wide_lshr);
 308   ggc_free (all.wide_mult);
 309   ggc_free (all.zext);
 310   ggc_free (all.smod_32);
 311   ggc_free (all.sdiv_32);
 312   ggc_free (all.udiv);
 313   ggc_free (all.sdiv);
 314   ggc_free (all.mult);
 315   ggc_free (all.neg);
 316   ggc_free (all.plus);
 317   ggc_free (all.reg);
 318 }
 319
 320 /* Return an rtx representing minus the value of X.
 321    MODE is the intended mode of the result,
 322    useful if X is a CONST_INT.  */
 323
 324 rtx
 325 negate_rtx (machine_mode mode, rtx x)
 326 {
 327   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 328
 329   if (result == 0)
 330     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 331
 332   return result;
 333 }
 334
 335 /* Whether reverse storage order is supported on the target.  */
 336 static int reverse_storage_order_supported = -1;
 337
 338 /* Check whether reverse storage order is supported on the target.  */
 339
 340 static void
 341 check_reverse_storage_order_support (void)
 342 {
 343   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 344     {
 345       reverse_storage_order_supported = 0;
 346       sorry ("reverse scalar storage order");
 347     }
 348   else
 349     reverse_storage_order_supported = 1;
 350 }
 351
 352 /* Whether reverse FP storage order is supported on the target.  */
 353 static int reverse_float_storage_order_supported = -1;
 354
 355 /* Check whether reverse FP storage order is supported on the target.  */
 356
 357 static void
 358 check_reverse_float_storage_order_support (void)
 359 {
 360   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 361     {
 362       reverse_float_storage_order_supported = 0;
 363       sorry ("reverse floating-point scalar storage order");
 364     }
 365   else
 366     reverse_float_storage_order_supported = 1;
 367 }
 368
 369 /* Return an rtx representing value of X with reverse storage order.
 370    MODE is the intended mode of the result,
 371    useful if X is a CONST_INT.  */
 372
 373 rtx
 374 flip_storage_order (machine_mode mode, rtx x)
 375 {
 376   scalar_int_mode int_mode;
 377   rtx result;
 378
 379   if (mode == QImode)
 380     return x;
 381
 382   if (COMPLEX_MODE_P (mode))
 383     {
 384       rtx real = read_complex_part (x, false);
 385       rtx imag = read_complex_part (x, true);
 386
 387       real = flip_storage_order (GET_MODE_INNER (mode), real);
 388       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 389
 390       return gen_rtx_CONCAT (mode, real, imag);
 391     }
 392
 393   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 394     check_reverse_storage_order_support ();
 395
 396   if (!is_a <scalar_int_mode> (mode, &int_mode))
 397     {
 398       if (FLOAT_MODE_P (mode)
 399           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 400         check_reverse_float_storage_order_support ();
 401
 402       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
 403         {
 404           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 405           return x;
 406         }
 407       x = gen_lowpart (int_mode, x);
 408     }
 409
 410   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 411   if (result == 0)
 412     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 413
 414   if (int_mode != mode)
 415     result = gen_lowpart (mode, result);
 416
 417   return result;
 418 }
 419
 420 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 421    first unit of mode MODE that contains a bitfield of size BITSIZE at
 422    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 423    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 424    of the field within the new memory.  */
 425
 426 static rtx
 427 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 428                       unsigned HOST_WIDE_INT bitsize,
 429                       unsigned HOST_WIDE_INT bitnum,
 430                       unsigned HOST_WIDE_INT *new_bitnum)
 431 {
 432   scalar_int_mode imode;
 433   if (mode.exists (&imode))
 434     {
 435       unsigned int unit = GET_MODE_BITSIZE (imode);
 436       *new_bitnum = bitnum % unit;
 437       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 438       return adjust_bitfield_address (mem, imode, offset);
 439     }
 440   else
 441     {
 442       *new_bitnum = bitnum % BITS_PER_UNIT;
 443       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 444       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 445                             / BITS_PER_UNIT);
 446       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 447     }
 448 }
 449
 450 /* The caller wants to perform insertion or extraction PATTERN on a
 451    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 452    BITREGION_START and BITREGION_END are as for store_bit_field
 453    and FIELDMODE is the natural mode of the field.
 454
 455    Search for a mode that is compatible with the memory access
 456    restrictions and (where applicable) with a register insertion or
 457    extraction.  Return the new memory on success, storing the adjusted
 458    bit position in *NEW_BITNUM.  Return null otherwise.  */
 459
 460 static rtx
 461 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 462                               rtx op0, HOST_WIDE_INT bitsize,
 463                               HOST_WIDE_INT bitnum,
 464                               unsigned HOST_WIDE_INT bitregion_start,
 465                               unsigned HOST_WIDE_INT bitregion_end,
 466                               machine_mode fieldmode,
 467                               unsigned HOST_WIDE_INT *new_bitnum)
 468 {
 469   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 470                                 bitregion_end, MEM_ALIGN (op0),
 471                                 MEM_VOLATILE_P (op0));
 472   scalar_int_mode best_mode;
 473   if (iter.next_mode (&best_mode))
 474     {
 475       /* We can use a memory in BEST_MODE.  See whether this is true for
 476          any wider modes.  All other things being equal, we prefer to
 477          use the widest mode possible because it tends to expose more
 478          CSE opportunities.  */
 479       if (!iter.prefer_smaller_modes ())
 480         {
 481           /* Limit the search to the mode required by the corresponding
 482              register insertion or extraction instruction, if any.  */
 483           scalar_int_mode limit_mode = word_mode;
 484           extraction_insn insn;
 485           if (get_best_reg_extraction_insn (&insn, pattern,
 486                                             GET_MODE_BITSIZE (best_mode),
 487                                             fieldmode))
 488             limit_mode = insn.field_mode;
 489
 490           scalar_int_mode wider_mode;
 491           while (iter.next_mode (&wider_mode)
 492                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 493             best_mode = wider_mode;
 494         }
 495       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 496                                    new_bitnum);
 497     }
 498   return NULL_RTX;
 499 }
 500
 501 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 502    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 503    offset is then BITNUM / BITS_PER_UNIT.  */
 504
 505 static bool
 506 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 507                      unsigned HOST_WIDE_INT bitsize,
 508                      machine_mode struct_mode)
 509 {
 510   unsigned HOST_WIDE_INT regsize = REGMODE_NATURAL_SIZE (struct_mode);
 511   if (BYTES_BIG_ENDIAN)
 512     return (bitnum % BITS_PER_UNIT == 0
 513             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 514                 || (bitnum + bitsize) % (regsize * BITS_PER_UNIT) == 0));
 515   else
 516     return bitnum % (regsize * BITS_PER_UNIT) == 0;
 517 }
 518
 519 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 520    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 521    Return false if the access would touch memory outside the range
 522    BITREGION_START to BITREGION_END for conformance to the C++ memory
 523    model.  */
 524
 525 static bool
 526 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 527                             unsigned HOST_WIDE_INT bitnum,
 528                             scalar_int_mode fieldmode,
 529                             unsigned HOST_WIDE_INT bitregion_start,
 530                             unsigned HOST_WIDE_INT bitregion_end)
 531 {
 532   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 533
 534   /* -fstrict-volatile-bitfields must be enabled and we must have a
 535      volatile MEM.  */
 536   if (!MEM_P (op0)
 537       || !MEM_VOLATILE_P (op0)
 538       || flag_strict_volatile_bitfields <= 0)
 539     return false;
 540
 541   /* The bit size must not be larger than the field mode, and
 542      the field mode must not be larger than a word.  */
 543   if (bitsize > modesize || modesize > BITS_PER_WORD)
 544     return false;
 545
 546   /* Check for cases of unaligned fields that must be split.  */
 547   if (bitnum % modesize + bitsize > modesize)
 548     return false;
 549
 550   /* The memory must be sufficiently aligned for a MODESIZE access.
 551      This condition guarantees, that the memory access will not
 552      touch anything after the end of the structure.  */
 553   if (MEM_ALIGN (op0) < modesize)
 554     return false;
 555
 556   /* Check for cases where the C++ memory model applies.  */
 557   if (bitregion_end != 0
 558       && (bitnum - bitnum % modesize < bitregion_start
 559           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 560     return false;
 561
 562   return true;
 563 }
 564
 565 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 566    bit number BITNUM can be treated as a simple value of mode MODE.  */
 567
 568 static bool
 569 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 570                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 571 {
 572   return (MEM_P (op0)
 573           && bitnum % BITS_PER_UNIT == 0
 574           && bitsize == GET_MODE_BITSIZE (mode)
 575           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 576               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 577                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 578 }
 579 \f
 580 /* Try to use instruction INSV to store VALUE into a field of OP0.
 581    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 582    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 583    are as for store_bit_field.  */
 584
 585 static bool
 586 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 587                             opt_scalar_int_mode op0_mode,
 588                             unsigned HOST_WIDE_INT bitsize,
 589                             unsigned HOST_WIDE_INT bitnum,
 590                             rtx value, scalar_int_mode value_mode)
 591 {
 592   struct expand_operand ops[4];
 593   rtx value1;
 594   rtx xop0 = op0;
 595   rtx_insn *last = get_last_insn ();
 596   bool copy_back = false;
 597
 598   scalar_int_mode op_mode = insv->field_mode;
 599   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 600   if (bitsize == 0 || bitsize > unit)
 601     return false;
 602
 603   if (MEM_P (xop0))
 604     /* Get a reference to the first byte of the field.  */
 605     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 606                                  &bitnum);
 607   else
 608     {
 609       /* Convert from counting within OP0 to counting in OP_MODE.  */
 610       if (BYTES_BIG_ENDIAN)
 611         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 612
 613       /* If xop0 is a register, we need it in OP_MODE
 614          to make it acceptable to the format of insv.  */
 615       if (GET_CODE (xop0) == SUBREG)
 616         /* We can't just change the mode, because this might clobber op0,
 617            and we will need the original value of op0 if insv fails.  */
 618         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 619       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 620         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 621     }
 622
 623   /* If the destination is a paradoxical subreg such that we need a
 624      truncate to the inner mode, perform the insertion on a temporary and
 625      truncate the result to the original destination.  Note that we can't
 626      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 627      X) 0)) is (reg:N X).  */
 628   if (GET_CODE (xop0) == SUBREG
 629       && REG_P (SUBREG_REG (xop0))
 630       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 631                                          op_mode))
 632     {
 633       rtx tem = gen_reg_rtx (op_mode);
 634       emit_move_insn (tem, xop0);
 635       xop0 = tem;
 636       copy_back = true;
 637     }
 638
 639   /* There are similar overflow check at the start of store_bit_field_1,
 640      but that only check the situation where the field lies completely
 641      outside the register, while there do have situation where the field
 642      lies partialy in the register, we need to adjust bitsize for this
 643      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 644      will broken on those arch support bit insert instruction, like arm, aarch64
 645      etc.  */
 646   if (bitsize + bitnum > unit && bitnum < unit)
 647     {
 648       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 649                "destination object, data truncated into %wu-bit",
 650                bitsize, unit - bitnum);
 651       bitsize = unit - bitnum;
 652     }
 653
 654   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 655      "backwards" from the size of the unit we are inserting into.
 656      Otherwise, we count bits from the most significant on a
 657      BYTES/BITS_BIG_ENDIAN machine.  */
 658
 659   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 660     bitnum = unit - bitsize - bitnum;
 661
 662   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 663   value1 = value;
 664   if (value_mode != op_mode)
 665     {
 666       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 667         {
 668           rtx tmp;
 669           /* Optimization: Don't bother really extending VALUE
 670              if it has all the bits we will actually use.  However,
 671              if we must narrow it, be sure we do it correctly.  */
 672
 673           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 674             {
 675               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 676               if (! tmp)
 677                 tmp = simplify_gen_subreg (op_mode,
 678                                            force_reg (value_mode, value1),
 679                                            value_mode, 0);
 680             }
 681           else
 682             {
 683               tmp = gen_lowpart_if_possible (op_mode, value1);
 684               if (! tmp)
 685                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 686             }
 687           value1 = tmp;
 688         }
 689       else if (CONST_INT_P (value))
 690         value1 = gen_int_mode (INTVAL (value), op_mode);
 691       else
 692         /* Parse phase is supposed to make VALUE's data type
 693            match that of the component reference, which is a type
 694            at least as wide as the field; so VALUE should have
 695            a mode that corresponds to that type.  */
 696         gcc_assert (CONSTANT_P (value));
 697     }
 698
 699   create_fixed_operand (&ops[0], xop0);
 700   create_integer_operand (&ops[1], bitsize);
 701   create_integer_operand (&ops[2], bitnum);
 702   create_input_operand (&ops[3], value1, op_mode);
 703   if (maybe_expand_insn (insv->icode, 4, ops))
 704     {
 705       if (copy_back)
 706         convert_move (op0, xop0, true);
 707       return true;
 708     }
 709   delete_insns_since (last);
 710   return false;
 711 }
 712
 713 /* A subroutine of store_bit_field, with the same arguments.  Return true
 714    if the operation could be implemented.
 715
 716    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 717    no other way of implementing the operation.  If FALLBACK_P is false,
 718    return false instead.  */
 719
 720 static bool
 721 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 722                    unsigned HOST_WIDE_INT bitnum,
 723                    unsigned HOST_WIDE_INT bitregion_start,
 724                    unsigned HOST_WIDE_INT bitregion_end,
 725                    machine_mode fieldmode,
 726                    rtx value, bool reverse, bool fallback_p)
 727 {
 728   rtx op0 = str_rtx;
 729   rtx orig_value;
 730
 731   while (GET_CODE (op0) == SUBREG)
 732     {
 733       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 734       op0 = SUBREG_REG (op0);
 735     }
 736
 737   /* No action is needed if the target is a register and if the field
 738      lies completely outside that register.  This can occur if the source
 739      code contains an out-of-bounds access to a small array.  */
 740   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 741     return true;
 742
 743   /* Use vec_set patterns for inserting parts of vectors whenever
 744      available.  */
 745   machine_mode outermode = GET_MODE (op0);
 746   scalar_mode innermode = GET_MODE_INNER (outermode);
 747   if (VECTOR_MODE_P (outermode)
 748       && !MEM_P (op0)
 749       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 750       && fieldmode == innermode
 751       && bitsize == GET_MODE_BITSIZE (innermode)
 752       && !(bitnum % GET_MODE_BITSIZE (innermode)))
 753     {
 754       struct expand_operand ops[3];
 755       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 756       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 757
 758       create_fixed_operand (&ops[0], op0);
 759       create_input_operand (&ops[1], value, innermode);
 760       create_integer_operand (&ops[2], pos);
 761       if (maybe_expand_insn (icode, 3, ops))
 762         return true;
 763     }
 764
 765   /* If the target is a register, overwriting the entire object, or storing
 766      a full-word or multi-word field can be done with just a SUBREG.  */
 767   if (!MEM_P (op0)
 768       && bitsize == GET_MODE_BITSIZE (fieldmode)
 769       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 770           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 771     {
 772       /* Use the subreg machinery either to narrow OP0 to the required
 773          words or to cope with mode punning between equal-sized modes.
 774          In the latter case, use subreg on the rhs side, not lhs.  */
 775       rtx sub;
 776
 777       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 778         {
 779           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 780           if (sub)
 781             {
 782               if (reverse)
 783                 sub = flip_storage_order (GET_MODE (op0), sub);
 784               emit_move_insn (op0, sub);
 785               return true;
 786             }
 787         }
 788       else
 789         {
 790           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 791                                      bitnum / BITS_PER_UNIT);
 792           if (sub)
 793             {
 794               if (reverse)
 795                 value = flip_storage_order (fieldmode, value);
 796               emit_move_insn (sub, value);
 797               return true;
 798             }
 799         }
 800     }
 801
 802   /* If the target is memory, storing any naturally aligned field can be
 803      done with a simple store.  For targets that support fast unaligned
 804      memory, any naturally sized, unit aligned field can be done directly.  */
 805   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 806     {
 807       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 808       if (reverse)
 809         value = flip_storage_order (fieldmode, value);
 810       emit_move_insn (op0, value);
 811       return true;
 812     }
 813
 814   /* Make sure we are playing with integral modes.  Pun with subregs
 815      if we aren't.  This must come after the entire register case above,
 816      since that case is valid for any mode.  The following cases are only
 817      valid for integral modes.  */
 818   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 819   scalar_int_mode imode;
 820   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 821     {
 822       if (MEM_P (op0))
 823         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 824                                             0, MEM_SIZE (op0));
 825       else
 826         op0 = gen_lowpart (op0_mode.require (), op0);
 827     }
 828
 829   /* Storing an lsb-aligned field in a register
 830      can be done with a movstrict instruction.  */
 831
 832   if (!MEM_P (op0)
 833       && !reverse
 834       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 835       && bitsize == GET_MODE_BITSIZE (fieldmode)
 836       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 837     {
 838       struct expand_operand ops[2];
 839       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 840       rtx arg0 = op0;
 841       unsigned HOST_WIDE_INT subreg_off;
 842
 843       if (GET_CODE (arg0) == SUBREG)
 844         {
 845           /* Else we've got some float mode source being extracted into
 846              a different float mode destination -- this combination of
 847              subregs results in Severe Tire Damage.  */
 848           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 849                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 850                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 851           arg0 = SUBREG_REG (arg0);
 852         }
 853
 854       subreg_off = bitnum / BITS_PER_UNIT;
 855       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 856         {
 857           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 858
 859           create_fixed_operand (&ops[0], arg0);
 860           /* Shrink the source operand to FIELDMODE.  */
 861           create_convert_operand_to (&ops[1], value, fieldmode, false);
 862           if (maybe_expand_insn (icode, 2, ops))
 863             return true;
 864         }
 865     }
 866
 867   /* Handle fields bigger than a word.  */
 868
 869   if (bitsize > BITS_PER_WORD)
 870     {
 871       /* Here we transfer the words of the field
 872          in the order least significant first.
 873          This is because the most significant word is the one which may
 874          be less than full.
 875          However, only do that if the value is not BLKmode.  */
 876
 877       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 878       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 879       unsigned int i;
 880       rtx_insn *last;
 881
 882       /* This is the mode we must force value to, so that there will be enough
 883          subwords to extract.  Note that fieldmode will often (always?) be
 884          VOIDmode, because that is what store_field uses to indicate that this
 885          is a bit field, but passing VOIDmode to operand_subword_force
 886          is not allowed.  */
 887       fieldmode = GET_MODE (value);
 888       if (fieldmode == VOIDmode)
 889         fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 890
 891       last = get_last_insn ();
 892       for (i = 0; i < nwords; i++)
 893         {
 894           /* If I is 0, use the low-order word in both field and target;
 895              if I is 1, use the next to lowest word; and so on.  */
 896           unsigned int wordnum = (backwards
 897                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 898                                   - i - 1
 899                                   : i);
 900           unsigned int bit_offset = (backwards ^ reverse
 901                                      ? MAX ((int) bitsize - ((int) i + 1)
 902                                             * BITS_PER_WORD,
 903                                             0)
 904                                      : (int) i * BITS_PER_WORD);
 905           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 906           unsigned HOST_WIDE_INT new_bitsize =
 907             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 908
 909           /* If the remaining chunk doesn't have full wordsize we have
 910              to make sure that for big-endian machines the higher order
 911              bits are used.  */
 912           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 913             value_word = simplify_expand_binop (word_mode, lshr_optab,
 914                                                 value_word,
 915                                                 GEN_INT (BITS_PER_WORD
 916                                                          - new_bitsize),
 917                                                 NULL_RTX, true,
 918                                                 OPTAB_LIB_WIDEN);
 919
 920           if (!store_bit_field_1 (op0, new_bitsize,
 921                                   bitnum + bit_offset,
 922                                   bitregion_start, bitregion_end,
 923                                   word_mode,
 924                                   value_word, reverse, fallback_p))
 925             {
 926               delete_insns_since (last);
 927               return false;
 928             }
 929         }
 930       return true;
 931     }
 932
 933   /* If VALUE has a floating-point or complex mode, access it as an
 934      integer of the corresponding size.  This can occur on a machine
 935      with 64 bit registers that uses SFmode for float.  It can also
 936      occur for unaligned float or complex fields.  */
 937   orig_value = value;
 938   scalar_int_mode value_mode;
 939   if (GET_MODE (value) == VOIDmode)
 940     /* By this point we've dealt with values that are bigger than a word,
 941        so word_mode is a conservatively correct choice.  */
 942     value_mode = word_mode;
 943   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
 944     {
 945       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
 946       value = gen_reg_rtx (value_mode);
 947       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 948     }
 949
 950   /* If OP0 is a multi-word register, narrow it to the affected word.
 951      If the region spans two words, defer to store_split_bit_field.
 952      Don't do this if op0 is a single hard register wider than word
 953      such as a float or vector register.  */
 954   if (!MEM_P (op0)
 955       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
 956       && (!REG_P (op0)
 957           || !HARD_REGISTER_P (op0)
 958           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
 959     {
 960       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 961         {
 962           if (!fallback_p)
 963             return false;
 964
 965           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
 966                                  bitregion_start, bitregion_end,
 967                                  value, value_mode, reverse);
 968           return true;
 969         }
 970       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
 971                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 972       gcc_assert (op0);
 973       op0_mode = word_mode;
 974       bitnum %= BITS_PER_WORD;
 975     }
 976
 977   /* From here on we can assume that the field to be stored in fits
 978      within a word.  If the destination is a register, it too fits
 979      in a word.  */
 980
 981   extraction_insn insv;
 982   if (!MEM_P (op0)
 983       && !reverse
 984       && get_best_reg_extraction_insn (&insv, EP_insv,
 985                                        GET_MODE_BITSIZE (op0_mode.require ()),
 986                                        fieldmode)
 987       && store_bit_field_using_insv (&insv, op0, op0_mode,
 988                                      bitsize, bitnum, value, value_mode))
 989     return true;
 990
 991   /* If OP0 is a memory, try copying it to a register and seeing if a
 992      cheap register alternative is available.  */
 993   if (MEM_P (op0) && !reverse)
 994     {
 995       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 996                                         fieldmode)
 997           && store_bit_field_using_insv (&insv, op0, op0_mode,
 998                                          bitsize, bitnum, value, value_mode))
 999         return true;
1000
1001       rtx_insn *last = get_last_insn ();
1002
1003       /* Try loading part of OP0 into a register, inserting the bitfield
1004          into that, and then copying the result back to OP0.  */
1005       unsigned HOST_WIDE_INT bitpos;
1006       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1007                                                bitregion_start, bitregion_end,
1008                                                fieldmode, &bitpos);
1009       if (xop0)
1010         {
1011           rtx tempreg = copy_to_reg (xop0);
1012           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1013                                  bitregion_start, bitregion_end,
1014                                  fieldmode, orig_value, reverse, false))
1015             {
1016               emit_move_insn (xop0, tempreg);
1017               return true;
1018             }
1019           delete_insns_since (last);
1020         }
1021     }
1022
1023   if (!fallback_p)
1024     return false;
1025
1026   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1027                          bitregion_end, value, value_mode, reverse);
1028   return true;
1029 }
1030
1031 /* Generate code to store value from rtx VALUE
1032    into a bit-field within structure STR_RTX
1033    containing BITSIZE bits starting at bit BITNUM.
1034
1035    BITREGION_START is bitpos of the first bitfield in this region.
1036    BITREGION_END is the bitpos of the ending bitfield in this region.
1037    These two fields are 0, if the C++ memory model does not apply,
1038    or we are not interested in keeping track of bitfield regions.
1039
1040    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1041
1042    If REVERSE is true, the store is to be done in reverse order.  */
1043
1044 void
1045 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1046                  unsigned HOST_WIDE_INT bitnum,
1047                  unsigned HOST_WIDE_INT bitregion_start,
1048                  unsigned HOST_WIDE_INT bitregion_end,
1049                  machine_mode fieldmode,
1050                  rtx value, bool reverse)
1051 {
1052   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1053   scalar_int_mode int_mode;
1054   if (is_a <scalar_int_mode> (fieldmode, &int_mode)
1055       && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode,
1056                                      bitregion_start, bitregion_end))
1057     {
1058       /* Storing of a full word can be done with a simple store.
1059          We know here that the field can be accessed with one single
1060          instruction.  For targets that support unaligned memory,
1061          an unaligned access may be necessary.  */
1062       if (bitsize == GET_MODE_BITSIZE (int_mode))
1063         {
1064           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1065                                              bitnum / BITS_PER_UNIT);
1066           if (reverse)
1067             value = flip_storage_order (int_mode, value);
1068           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1069           emit_move_insn (str_rtx, value);
1070         }
1071       else
1072         {
1073           rtx temp;
1074
1075           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1076                                           &bitnum);
1077           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1078           temp = copy_to_reg (str_rtx);
1079           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1080                                   int_mode, value, reverse, true))
1081             gcc_unreachable ();
1082
1083           emit_move_insn (str_rtx, temp);
1084         }
1085
1086       return;
1087     }
1088
1089   /* Under the C++0x memory model, we must not touch bits outside the
1090      bit region.  Adjust the address to start at the beginning of the
1091      bit region.  */
1092   if (MEM_P (str_rtx) && bitregion_start > 0)
1093     {
1094       scalar_int_mode best_mode;
1095       machine_mode addr_mode = VOIDmode;
1096       HOST_WIDE_INT offset, size;
1097
1098       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1099
1100       offset = bitregion_start / BITS_PER_UNIT;
1101       bitnum -= bitregion_start;
1102       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1103       bitregion_end -= bitregion_start;
1104       bitregion_start = 0;
1105       if (get_best_mode (bitsize, bitnum,
1106                          bitregion_start, bitregion_end,
1107                          MEM_ALIGN (str_rtx), INT_MAX,
1108                          MEM_VOLATILE_P (str_rtx), &best_mode))
1109         addr_mode = best_mode;
1110       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1111                                               offset, size);
1112     }
1113
1114   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1115                           bitregion_start, bitregion_end,
1116                           fieldmode, value, reverse, true))
1117     gcc_unreachable ();
1118 }
1119 \f
1120 /* Use shifts and boolean operations to store VALUE into a bit field of
1121    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1122    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1123    the mode of VALUE.
1124
1125    If REVERSE is true, the store is to be done in reverse order.  */
1126
1127 static void
1128 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1129                        unsigned HOST_WIDE_INT bitsize,
1130                        unsigned HOST_WIDE_INT bitnum,
1131                        unsigned HOST_WIDE_INT bitregion_start,
1132                        unsigned HOST_WIDE_INT bitregion_end,
1133                        rtx value, scalar_int_mode value_mode, bool reverse)
1134 {
1135   /* There is a case not handled here:
1136      a structure with a known alignment of just a halfword
1137      and a field split across two aligned halfwords within the structure.
1138      Or likewise a structure with a known alignment of just a byte
1139      and a field split across two bytes.
1140      Such cases are not supposed to be able to occur.  */
1141
1142   scalar_int_mode best_mode;
1143   if (MEM_P (op0))
1144     {
1145       unsigned int max_bitsize = BITS_PER_WORD;
1146       scalar_int_mode imode;
1147       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1148         max_bitsize = GET_MODE_BITSIZE (imode);
1149
1150       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1151                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1152                           &best_mode))
1153         {
1154           /* The only way this should occur is if the field spans word
1155              boundaries.  */
1156           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1157                                  bitregion_start, bitregion_end,
1158                                  value, value_mode, reverse);
1159           return;
1160         }
1161
1162       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1163     }
1164   else
1165     best_mode = op0_mode.require ();
1166
1167   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1168                            value, value_mode, reverse);
1169 }
1170
1171 /* Helper function for store_fixed_bit_field, stores
1172    the bit field always using MODE, which is the mode of OP0.  The other
1173    arguments are as for store_fixed_bit_field.  */
1174
1175 static void
1176 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1177                          unsigned HOST_WIDE_INT bitsize,
1178                          unsigned HOST_WIDE_INT bitnum,
1179                          rtx value, scalar_int_mode value_mode, bool reverse)
1180 {
1181   rtx temp;
1182   int all_zero = 0;
1183   int all_one = 0;
1184
1185   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1186      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1187
1188   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1189     /* BITNUM is the distance between our msb
1190        and that of the containing datum.
1191        Convert it to the distance from the lsb.  */
1192     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1193
1194   /* Now BITNUM is always the distance between our lsb
1195      and that of OP0.  */
1196
1197   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1198      we must first convert its mode to MODE.  */
1199
1200   if (CONST_INT_P (value))
1201     {
1202       unsigned HOST_WIDE_INT v = UINTVAL (value);
1203
1204       if (bitsize < HOST_BITS_PER_WIDE_INT)
1205         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1206
1207       if (v == 0)
1208         all_zero = 1;
1209       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1210                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1211                || (bitsize == HOST_BITS_PER_WIDE_INT
1212                    && v == HOST_WIDE_INT_M1U))
1213         all_one = 1;
1214
1215       value = lshift_value (mode, v, bitnum);
1216     }
1217   else
1218     {
1219       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1220                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1221
1222       if (value_mode != mode)
1223         value = convert_to_mode (mode, value, 1);
1224
1225       if (must_and)
1226         value = expand_binop (mode, and_optab, value,
1227                               mask_rtx (mode, 0, bitsize, 0),
1228                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1229       if (bitnum > 0)
1230         value = expand_shift (LSHIFT_EXPR, mode, value,
1231                               bitnum, NULL_RTX, 1);
1232     }
1233
1234   if (reverse)
1235     value = flip_storage_order (mode, value);
1236
1237   /* Now clear the chosen bits in OP0,
1238      except that if VALUE is -1 we need not bother.  */
1239   /* We keep the intermediates in registers to allow CSE to combine
1240      consecutive bitfield assignments.  */
1241
1242   temp = force_reg (mode, op0);
1243
1244   if (! all_one)
1245     {
1246       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1247       if (reverse)
1248         mask = flip_storage_order (mode, mask);
1249       temp = expand_binop (mode, and_optab, temp, mask,
1250                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1251       temp = force_reg (mode, temp);
1252     }
1253
1254   /* Now logical-or VALUE into OP0, unless it is zero.  */
1255
1256   if (! all_zero)
1257     {
1258       temp = expand_binop (mode, ior_optab, temp, value,
1259                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1260       temp = force_reg (mode, temp);
1261     }
1262
1263   if (op0 != temp)
1264     {
1265       op0 = copy_rtx (op0);
1266       emit_move_insn (op0, temp);
1267     }
1268 }
1269 \f
1270 /* Store a bit field that is split across multiple accessible memory objects.
1271
1272    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1273    BITSIZE is the field width; BITPOS the position of its first bit
1274    (within the word).
1275    VALUE is the value to store, which has mode VALUE_MODE.
1276    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1277    a BLKmode MEM.
1278
1279    If REVERSE is true, the store is to be done in reverse order.
1280
1281    This does not yet handle fields wider than BITS_PER_WORD.  */
1282
1283 static void
1284 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1285                        unsigned HOST_WIDE_INT bitsize,
1286                        unsigned HOST_WIDE_INT bitpos,
1287                        unsigned HOST_WIDE_INT bitregion_start,
1288                        unsigned HOST_WIDE_INT bitregion_end,
1289                        rtx value, scalar_int_mode value_mode, bool reverse)
1290 {
1291   unsigned int unit, total_bits, bitsdone = 0;
1292
1293   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1294      much at a time.  */
1295   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1296     unit = BITS_PER_WORD;
1297   else
1298     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1299
1300   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1301      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1302      again, and we will mutually recurse forever.  */
1303   if (MEM_P (op0) && op0_mode.exists ())
1304     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1305
1306   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1307      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1308      that VALUE might be a floating-point constant.  */
1309   if (CONSTANT_P (value) && !CONST_INT_P (value))
1310     {
1311       rtx word = gen_lowpart_common (word_mode, value);
1312
1313       if (word && (value != word))
1314         value = word;
1315       else
1316         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1317       value_mode = word_mode;
1318     }
1319
1320   total_bits = GET_MODE_BITSIZE (value_mode);
1321
1322   while (bitsdone < bitsize)
1323     {
1324       unsigned HOST_WIDE_INT thissize;
1325       unsigned HOST_WIDE_INT thispos;
1326       unsigned HOST_WIDE_INT offset;
1327       rtx part;
1328
1329       offset = (bitpos + bitsdone) / unit;
1330       thispos = (bitpos + bitsdone) % unit;
1331
1332       /* When region of bytes we can touch is restricted, decrease
1333          UNIT close to the end of the region as needed.  If op0 is a REG
1334          or SUBREG of REG, don't do this, as there can't be data races
1335          on a register and we can expand shorter code in some cases.  */
1336       if (bitregion_end
1337           && unit > BITS_PER_UNIT
1338           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1339           && !REG_P (op0)
1340           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1341         {
1342           unit = unit / 2;
1343           continue;
1344         }
1345
1346       /* THISSIZE must not overrun a word boundary.  Otherwise,
1347          store_fixed_bit_field will call us again, and we will mutually
1348          recurse forever.  */
1349       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1350       thissize = MIN (thissize, unit - thispos);
1351
1352       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1353         {
1354           /* Fetch successively less significant portions.  */
1355           if (CONST_INT_P (value))
1356             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1357                              >> (bitsize - bitsdone - thissize))
1358                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1359           /* Likewise, but the source is little-endian.  */
1360           else if (reverse)
1361             part = extract_fixed_bit_field (word_mode, value, value_mode,
1362                                             thissize,
1363                                             bitsize - bitsdone - thissize,
1364                                             NULL_RTX, 1, false);
1365           else
1366             /* The args are chosen so that the last part includes the
1367                lsb.  Give extract_bit_field the value it needs (with
1368                endianness compensation) to fetch the piece we want.  */
1369             part = extract_fixed_bit_field (word_mode, value, value_mode,
1370                                             thissize,
1371                                             total_bits - bitsize + bitsdone,
1372                                             NULL_RTX, 1, false);
1373         }
1374       else
1375         {
1376           /* Fetch successively more significant portions.  */
1377           if (CONST_INT_P (value))
1378             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1379                              >> bitsdone)
1380                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1381           /* Likewise, but the source is big-endian.  */
1382           else if (reverse)
1383             part = extract_fixed_bit_field (word_mode, value, value_mode,
1384                                             thissize,
1385                                             total_bits - bitsdone - thissize,
1386                                             NULL_RTX, 1, false);
1387           else
1388             part = extract_fixed_bit_field (word_mode, value, value_mode,
1389                                             thissize, bitsdone, NULL_RTX,
1390                                             1, false);
1391         }
1392
1393       /* If OP0 is a register, then handle OFFSET here.  */
1394       rtx op0_piece = op0;
1395       opt_scalar_int_mode op0_piece_mode = op0_mode;
1396       if (SUBREG_P (op0) || REG_P (op0))
1397         {
1398           scalar_int_mode imode;
1399           if (op0_mode.exists (&imode)
1400               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1401             {
1402               if (offset)
1403                 op0_piece = const0_rtx;
1404             }
1405           else
1406             {
1407               op0_piece = operand_subword_force (op0,
1408                                                  offset * unit / BITS_PER_WORD,
1409                                                  GET_MODE (op0));
1410               op0_piece_mode = word_mode;
1411             }
1412           offset &= BITS_PER_WORD / unit - 1;
1413         }
1414
1415       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1416          it is just an out-of-bounds access.  Ignore it.  */
1417       if (op0_piece != const0_rtx)
1418         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1419                                offset * unit + thispos, bitregion_start,
1420                                bitregion_end, part, word_mode, reverse);
1421       bitsdone += thissize;
1422     }
1423 }
1424 \f
1425 /* A subroutine of extract_bit_field_1 that converts return value X
1426    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1427    to extract_bit_field.  */
1428
1429 static rtx
1430 convert_extracted_bit_field (rtx x, machine_mode mode,
1431                              machine_mode tmode, bool unsignedp)
1432 {
1433   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1434     return x;
1435
1436   /* If the x mode is not a scalar integral, first convert to the
1437      integer mode of that size and then access it as a floating-point
1438      value via a SUBREG.  */
1439   if (!SCALAR_INT_MODE_P (tmode))
1440     {
1441       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1442       x = convert_to_mode (int_mode, x, unsignedp);
1443       x = force_reg (int_mode, x);
1444       return gen_lowpart (tmode, x);
1445     }
1446
1447   return convert_to_mode (tmode, x, unsignedp);
1448 }
1449
1450 /* Try to use an ext(z)v pattern to extract a field from OP0.
1451    Return the extracted value on success, otherwise return null.
1452    EXTV describes the extraction instruction to use.  If OP0_MODE
1453    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1454    The other arguments are as for extract_bit_field.  */
1455
1456 static rtx
1457 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1458                               opt_scalar_int_mode op0_mode,
1459                               unsigned HOST_WIDE_INT bitsize,
1460                               unsigned HOST_WIDE_INT bitnum,
1461                               int unsignedp, rtx target,
1462                               machine_mode mode, machine_mode tmode)
1463 {
1464   struct expand_operand ops[4];
1465   rtx spec_target = target;
1466   rtx spec_target_subreg = 0;
1467   scalar_int_mode ext_mode = extv->field_mode;
1468   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1469
1470   if (bitsize == 0 || unit < bitsize)
1471     return NULL_RTX;
1472
1473   if (MEM_P (op0))
1474     /* Get a reference to the first byte of the field.  */
1475     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1476                                 &bitnum);
1477   else
1478     {
1479       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1480       if (BYTES_BIG_ENDIAN)
1481         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1482
1483       /* If op0 is a register, we need it in EXT_MODE to make it
1484          acceptable to the format of ext(z)v.  */
1485       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1486         return NULL_RTX;
1487       if (REG_P (op0) && op0_mode.require () != ext_mode)
1488         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1489     }
1490
1491   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1492      "backwards" from the size of the unit we are extracting from.
1493      Otherwise, we count bits from the most significant on a
1494      BYTES/BITS_BIG_ENDIAN machine.  */
1495
1496   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1497     bitnum = unit - bitsize - bitnum;
1498
1499   if (target == 0)
1500     target = spec_target = gen_reg_rtx (tmode);
1501
1502   if (GET_MODE (target) != ext_mode)
1503     {
1504       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1505          between the mode of the extraction (word_mode) and the target
1506          mode.  Instead, create a temporary and use convert_move to set
1507          the target.  */
1508       if (REG_P (target)
1509           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1510         {
1511           target = gen_lowpart (ext_mode, target);
1512           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1513             spec_target_subreg = target;
1514         }
1515       else
1516         target = gen_reg_rtx (ext_mode);
1517     }
1518
1519   create_output_operand (&ops[0], target, ext_mode);
1520   create_fixed_operand (&ops[1], op0);
1521   create_integer_operand (&ops[2], bitsize);
1522   create_integer_operand (&ops[3], bitnum);
1523   if (maybe_expand_insn (extv->icode, 4, ops))
1524     {
1525       target = ops[0].value;
1526       if (target == spec_target)
1527         return target;
1528       if (target == spec_target_subreg)
1529         return spec_target;
1530       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1531     }
1532   return NULL_RTX;
1533 }
1534
1535 /* A subroutine of extract_bit_field, with the same arguments.
1536    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1537    if we can find no other means of implementing the operation.
1538    if FALLBACK_P is false, return NULL instead.  */
1539
1540 static rtx
1541 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1542                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1543                      machine_mode mode, machine_mode tmode,
1544                      bool reverse, bool fallback_p, rtx *alt_rtl)
1545 {
1546   rtx op0 = str_rtx;
1547   machine_mode mode1;
1548
1549   if (tmode == VOIDmode)
1550     tmode = mode;
1551
1552   while (GET_CODE (op0) == SUBREG)
1553     {
1554       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1555       op0 = SUBREG_REG (op0);
1556     }
1557
1558   /* If we have an out-of-bounds access to a register, just return an
1559      uninitialized register of the required mode.  This can occur if the
1560      source code contains an out-of-bounds access to a small array.  */
1561   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1562     return gen_reg_rtx (tmode);
1563
1564   if (REG_P (op0)
1565       && mode == GET_MODE (op0)
1566       && bitnum == 0
1567       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1568     {
1569       if (reverse)
1570         op0 = flip_storage_order (mode, op0);
1571       /* We're trying to extract a full register from itself.  */
1572       return op0;
1573     }
1574
1575   /* First try to check for vector from vector extractions.  */
1576   if (VECTOR_MODE_P (GET_MODE (op0))
1577       && !MEM_P (op0)
1578       && VECTOR_MODE_P (tmode)
1579       && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1580     {
1581       machine_mode new_mode = GET_MODE (op0);
1582       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1583         {
1584           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1585           unsigned int nunits = (GET_MODE_BITSIZE (GET_MODE (op0))
1586                                  / GET_MODE_UNIT_BITSIZE (tmode));
1587           if (!mode_for_vector (inner_mode, nunits).exists (&new_mode)
1588               || !VECTOR_MODE_P (new_mode)
1589               || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1590               || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1591               || !targetm.vector_mode_supported_p (new_mode))
1592             new_mode = VOIDmode;
1593         }
1594       if (new_mode != VOIDmode
1595           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1596               != CODE_FOR_nothing)
1597           && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1598               == bitnum / GET_MODE_BITSIZE (tmode)))
1599         {
1600           struct expand_operand ops[3];
1601           machine_mode outermode = new_mode;
1602           machine_mode innermode = tmode;
1603           enum insn_code icode
1604             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1605           unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1606
1607           if (new_mode != GET_MODE (op0))
1608             op0 = gen_lowpart (new_mode, op0);
1609           create_output_operand (&ops[0], target, innermode);
1610           ops[0].target = 1;
1611           create_input_operand (&ops[1], op0, outermode);
1612           create_integer_operand (&ops[2], pos);
1613           if (maybe_expand_insn (icode, 3, ops))
1614             {
1615               if (alt_rtl && ops[0].target)
1616                 *alt_rtl = target;
1617               target = ops[0].value;
1618               if (GET_MODE (target) != mode)
1619                 return gen_lowpart (tmode, target);
1620               return target;
1621             }
1622         }
1623     }
1624
1625   /* See if we can get a better vector mode before extracting.  */
1626   if (VECTOR_MODE_P (GET_MODE (op0))
1627       && !MEM_P (op0)
1628       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1629     {
1630       machine_mode new_mode;
1631
1632       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1633         new_mode = MIN_MODE_VECTOR_FLOAT;
1634       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1635         new_mode = MIN_MODE_VECTOR_FRACT;
1636       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1637         new_mode = MIN_MODE_VECTOR_UFRACT;
1638       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1639         new_mode = MIN_MODE_VECTOR_ACCUM;
1640       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1641         new_mode = MIN_MODE_VECTOR_UACCUM;
1642       else
1643         new_mode = MIN_MODE_VECTOR_INT;
1644
1645       FOR_EACH_MODE_FROM (new_mode, new_mode)
1646         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1647             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1648             && targetm.vector_mode_supported_p (new_mode))
1649           break;
1650       if (new_mode != VOIDmode)
1651         op0 = gen_lowpart (new_mode, op0);
1652     }
1653
1654   /* Use vec_extract patterns for extracting parts of vectors whenever
1655      available.  */
1656   machine_mode outermode = GET_MODE (op0);
1657   scalar_mode innermode = GET_MODE_INNER (outermode);
1658   if (VECTOR_MODE_P (outermode)
1659       && !MEM_P (op0)
1660       && (convert_optab_handler (vec_extract_optab, outermode, innermode)
1661           != CODE_FOR_nothing)
1662       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (innermode)
1663           == bitnum / GET_MODE_BITSIZE (innermode)))
1664     {
1665       struct expand_operand ops[3];
1666       enum insn_code icode
1667         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1668       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1669
1670       create_output_operand (&ops[0], target, innermode);
1671       ops[0].target = 1;
1672       create_input_operand (&ops[1], op0, outermode);
1673       create_integer_operand (&ops[2], pos);
1674       if (maybe_expand_insn (icode, 3, ops))
1675         {
1676           if (alt_rtl && ops[0].target)
1677             *alt_rtl = target;
1678           target = ops[0].value;
1679           if (GET_MODE (target) != mode)
1680             return gen_lowpart (tmode, target);
1681           return target;
1682         }
1683     }
1684
1685   /* Make sure we are playing with integral modes.  Pun with subregs
1686      if we aren't.  */
1687   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1688   scalar_int_mode imode;
1689   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1690     {
1691       if (MEM_P (op0))
1692         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1693                                             0, MEM_SIZE (op0));
1694       else if (op0_mode.exists (&imode))
1695         {
1696           op0 = gen_lowpart (imode, op0);
1697
1698           /* If we got a SUBREG, force it into a register since we
1699              aren't going to be able to do another SUBREG on it.  */
1700           if (GET_CODE (op0) == SUBREG)
1701             op0 = force_reg (imode, op0);
1702         }
1703       else
1704         {
1705           HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1706           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1707           emit_move_insn (mem, op0);
1708           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1709         }
1710     }
1711
1712   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1713      If that's wrong, the solution is to test for it and set TARGET to 0
1714      if needed.  */
1715
1716   /* Get the mode of the field to use for atomic access or subreg
1717      conversion.  */
1718   if (!SCALAR_INT_MODE_P (tmode)
1719       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1720     mode1 = mode;
1721   gcc_assert (mode1 != BLKmode);
1722
1723   /* Extraction of a full MODE1 value can be done with a subreg as long
1724      as the least significant bit of the value is the least significant
1725      bit of either OP0 or a word of OP0.  */
1726   if (!MEM_P (op0)
1727       && !reverse
1728       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
1729       && bitsize == GET_MODE_BITSIZE (mode1)
1730       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, op0_mode.require ()))
1731     {
1732       rtx sub = simplify_gen_subreg (mode1, op0, op0_mode.require (),
1733                                      bitnum / BITS_PER_UNIT);
1734       if (sub)
1735         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1736     }
1737
1738   /* Extraction of a full MODE1 value can be done with a load as long as
1739      the field is on a byte boundary and is sufficiently aligned.  */
1740   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1741     {
1742       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1743       if (reverse)
1744         op0 = flip_storage_order (mode1, op0);
1745       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1746     }
1747
1748   /* Handle fields bigger than a word.  */
1749
1750   if (bitsize > BITS_PER_WORD)
1751     {
1752       /* Here we transfer the words of the field
1753          in the order least significant first.
1754          This is because the most significant word is the one which may
1755          be less than full.  */
1756
1757       const bool backwards = WORDS_BIG_ENDIAN;
1758       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1759       unsigned int i;
1760       rtx_insn *last;
1761
1762       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1763         target = gen_reg_rtx (mode);
1764
1765       /* In case we're about to clobber a base register or something
1766          (see gcc.c-torture/execute/20040625-1.c).   */
1767       if (reg_mentioned_p (target, str_rtx))
1768         target = gen_reg_rtx (mode);
1769
1770       /* Indicate for flow that the entire target reg is being set.  */
1771       emit_clobber (target);
1772
1773       last = get_last_insn ();
1774       for (i = 0; i < nwords; i++)
1775         {
1776           /* If I is 0, use the low-order word in both field and target;
1777              if I is 1, use the next to lowest word; and so on.  */
1778           /* Word number in TARGET to use.  */
1779           unsigned int wordnum
1780             = (backwards
1781                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1782                : i);
1783           /* Offset from start of field in OP0.  */
1784           unsigned int bit_offset = (backwards ^ reverse
1785                                      ? MAX ((int) bitsize - ((int) i + 1)
1786                                             * BITS_PER_WORD,
1787                                             0)
1788                                      : (int) i * BITS_PER_WORD);
1789           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1790           rtx result_part
1791             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1792                                              bitsize - i * BITS_PER_WORD),
1793                                    bitnum + bit_offset, 1, target_part,
1794                                    mode, word_mode, reverse, fallback_p, NULL);
1795
1796           gcc_assert (target_part);
1797           if (!result_part)
1798             {
1799               delete_insns_since (last);
1800               return NULL;
1801             }
1802
1803           if (result_part != target_part)
1804             emit_move_insn (target_part, result_part);
1805         }
1806
1807       if (unsignedp)
1808         {
1809           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1810              need to be zero'd out.  */
1811           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1812             {
1813               unsigned int i, total_words;
1814
1815               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1816               for (i = nwords; i < total_words; i++)
1817                 emit_move_insn
1818                   (operand_subword (target,
1819                                     backwards ? total_words - i - 1 : i,
1820                                     1, VOIDmode),
1821                    const0_rtx);
1822             }
1823           return target;
1824         }
1825
1826       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1827       target = expand_shift (LSHIFT_EXPR, mode, target,
1828                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1829       return expand_shift (RSHIFT_EXPR, mode, target,
1830                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1831     }
1832
1833   /* If OP0 is a multi-word register, narrow it to the affected word.
1834      If the region spans two words, defer to extract_split_bit_field.  */
1835   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1836     {
1837       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1838         {
1839           if (!fallback_p)
1840             return NULL_RTX;
1841           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1842                                             unsignedp, reverse);
1843           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1844         }
1845       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1846                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1847       op0_mode = word_mode;
1848       bitnum %= BITS_PER_WORD;
1849     }
1850
1851   /* From here on we know the desired field is smaller than a word.
1852      If OP0 is a register, it too fits within a word.  */
1853   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1854   extraction_insn extv;
1855   if (!MEM_P (op0)
1856       && !reverse
1857       /* ??? We could limit the structure size to the part of OP0 that
1858          contains the field, with appropriate checks for endianness
1859          and TARGET_TRULY_NOOP_TRUNCATION.  */
1860       && get_best_reg_extraction_insn (&extv, pattern,
1861                                        GET_MODE_BITSIZE (op0_mode.require ()),
1862                                        tmode))
1863     {
1864       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1865                                                  bitsize, bitnum,
1866                                                  unsignedp, target, mode,
1867                                                  tmode);
1868       if (result)
1869         return result;
1870     }
1871
1872   /* If OP0 is a memory, try copying it to a register and seeing if a
1873      cheap register alternative is available.  */
1874   if (MEM_P (op0) & !reverse)
1875     {
1876       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1877                                         tmode))
1878         {
1879           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1880                                                      bitsize, bitnum,
1881                                                      unsignedp, target, mode,
1882                                                      tmode);
1883           if (result)
1884             return result;
1885         }
1886
1887       rtx_insn *last = get_last_insn ();
1888
1889       /* Try loading part of OP0 into a register and extracting the
1890          bitfield from that.  */
1891       unsigned HOST_WIDE_INT bitpos;
1892       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1893                                                0, 0, tmode, &bitpos);
1894       if (xop0)
1895         {
1896           xop0 = copy_to_reg (xop0);
1897           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1898                                             unsignedp, target,
1899                                             mode, tmode, reverse, false, NULL);
1900           if (result)
1901             return result;
1902           delete_insns_since (last);
1903         }
1904     }
1905
1906   if (!fallback_p)
1907     return NULL;
1908
1909   /* Find a correspondingly-sized integer field, so we can apply
1910      shifts and masks to it.  */
1911   scalar_int_mode int_mode;
1912   if (!int_mode_for_mode (tmode).exists (&int_mode))
1913     /* If this fails, we should probably push op0 out to memory and then
1914        do a load.  */
1915     int_mode = int_mode_for_mode (mode).require ();
1916
1917   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
1918                                     bitnum, target, unsignedp, reverse);
1919
1920   /* Complex values must be reversed piecewise, so we need to undo the global
1921      reversal, convert to the complex mode and reverse again.  */
1922   if (reverse && COMPLEX_MODE_P (tmode))
1923     {
1924       target = flip_storage_order (int_mode, target);
1925       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1926       target = flip_storage_order (tmode, target);
1927     }
1928   else
1929     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1930
1931   return target;
1932 }
1933
1934 /* Generate code to extract a byte-field from STR_RTX
1935    containing BITSIZE bits, starting at BITNUM,
1936    and put it in TARGET if possible (if TARGET is nonzero).
1937    Regardless of TARGET, we return the rtx for where the value is placed.
1938
1939    STR_RTX is the structure containing the byte (a REG or MEM).
1940    UNSIGNEDP is nonzero if this is an unsigned bit field.
1941    MODE is the natural mode of the field value once extracted.
1942    TMODE is the mode the caller would like the value to have;
1943    but the value may be returned with type MODE instead.
1944
1945    If REVERSE is true, the extraction is to be done in reverse order.
1946
1947    If a TARGET is specified and we can store in it at no extra cost,
1948    we do so, and return TARGET.
1949    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1950    if they are equally easy.  */
1951
1952 rtx
1953 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1954                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1955                    machine_mode mode, machine_mode tmode, bool reverse,
1956                    rtx *alt_rtl)
1957 {
1958   machine_mode mode1;
1959
1960   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1961   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1962     mode1 = GET_MODE (str_rtx);
1963   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1964     mode1 = GET_MODE (target);
1965   else
1966     mode1 = tmode;
1967
1968   scalar_int_mode int_mode;
1969   if (is_a <scalar_int_mode> (mode1, &int_mode)
1970       && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, 0, 0))
1971     {
1972       /* Extraction of a full INT_MODE value can be done with a simple load.
1973          We know here that the field can be accessed with one single
1974          instruction.  For targets that support unaligned memory,
1975          an unaligned access may be necessary.  */
1976       if (bitsize == GET_MODE_BITSIZE (int_mode))
1977         {
1978           rtx result = adjust_bitfield_address (str_rtx, int_mode,
1979                                                 bitnum / BITS_PER_UNIT);
1980           if (reverse)
1981             result = flip_storage_order (int_mode, result);
1982           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1983           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1984         }
1985
1986       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1987                                       &bitnum);
1988       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1989       str_rtx = copy_to_reg (str_rtx);
1990     }
1991
1992   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1993                               target, mode, tmode, reverse, true, alt_rtl);
1994 }
1995 \f
1996 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1997    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
1998    otherwise OP0 is a BLKmode MEM.
1999
2000    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2001    If REVERSE is true, the extraction is to be done in reverse order.
2002
2003    If TARGET is nonzero, attempts to store the value there
2004    and return TARGET, but this is not guaranteed.
2005    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2006
2007 static rtx
2008 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2009                          opt_scalar_int_mode op0_mode,
2010                          unsigned HOST_WIDE_INT bitsize,
2011                          unsigned HOST_WIDE_INT bitnum, rtx target,
2012                          int unsignedp, bool reverse)
2013 {
2014   scalar_int_mode mode;
2015   if (MEM_P (op0))
2016     {
2017       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2018                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2019         /* The only way this should occur is if the field spans word
2020            boundaries.  */
2021         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2022                                         unsignedp, reverse);
2023
2024       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2025     }
2026   else
2027     mode = op0_mode.require ();
2028
2029   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2030                                     target, unsignedp, reverse);
2031 }
2032
2033 /* Helper function for extract_fixed_bit_field, extracts
2034    the bit field always using MODE, which is the mode of OP0.
2035    The other arguments are as for extract_fixed_bit_field.  */
2036
2037 static rtx
2038 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2039                            unsigned HOST_WIDE_INT bitsize,
2040                            unsigned HOST_WIDE_INT bitnum, rtx target,
2041                            int unsignedp, bool reverse)
2042 {
2043   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2044      for invalid input, such as extract equivalent of f5 from
2045      gcc.dg/pr48335-2.c.  */
2046
2047   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2048     /* BITNUM is the distance between our msb and that of OP0.
2049        Convert it to the distance from the lsb.  */
2050     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2051
2052   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2053      We have reduced the big-endian case to the little-endian case.  */
2054   if (reverse)
2055     op0 = flip_storage_order (mode, op0);
2056
2057   if (unsignedp)
2058     {
2059       if (bitnum)
2060         {
2061           /* If the field does not already start at the lsb,
2062              shift it so it does.  */
2063           /* Maybe propagate the target for the shift.  */
2064           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2065           if (tmode != mode)
2066             subtarget = 0;
2067           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2068         }
2069       /* Convert the value to the desired mode.  TMODE must also be a
2070          scalar integer for this conversion to make sense, since we
2071          shouldn't reinterpret the bits.  */
2072       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2073       if (mode != new_mode)
2074         op0 = convert_to_mode (new_mode, op0, 1);
2075
2076       /* Unless the msb of the field used to be the msb when we shifted,
2077          mask out the upper bits.  */
2078
2079       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2080         return expand_binop (new_mode, and_optab, op0,
2081                              mask_rtx (new_mode, 0, bitsize, 0),
2082                              target, 1, OPTAB_LIB_WIDEN);
2083       return op0;
2084     }
2085
2086   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2087      then arithmetic-shift its lsb to the lsb of the word.  */
2088   op0 = force_reg (mode, op0);
2089
2090   /* Find the narrowest integer mode that contains the field.  */
2091
2092   opt_scalar_int_mode mode_iter;
2093   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2094     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2095       break;
2096
2097   mode = mode_iter.require ();
2098   op0 = convert_to_mode (mode, op0, 0);
2099
2100   if (mode != tmode)
2101     target = 0;
2102
2103   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2104     {
2105       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2106       /* Maybe propagate the target for the shift.  */
2107       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2108       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2109     }
2110
2111   return expand_shift (RSHIFT_EXPR, mode, op0,
2112                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2113 }
2114
2115 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2116    VALUE << BITPOS.  */
2117
2118 static rtx
2119 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2120               int bitpos)
2121 {
2122   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2123 }
2124 \f
2125 /* Extract a bit field that is split across two words
2126    and return an RTX for the result.
2127
2128    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2129    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2130    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2131    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2132    a BLKmode MEM.
2133
2134    If REVERSE is true, the extraction is to be done in reverse order.  */
2135
2136 static rtx
2137 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2138                          unsigned HOST_WIDE_INT bitsize,
2139                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2140                          bool reverse)
2141 {
2142   unsigned int unit;
2143   unsigned int bitsdone = 0;
2144   rtx result = NULL_RTX;
2145   int first = 1;
2146
2147   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2148      much at a time.  */
2149   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2150     unit = BITS_PER_WORD;
2151   else
2152     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2153
2154   while (bitsdone < bitsize)
2155     {
2156       unsigned HOST_WIDE_INT thissize;
2157       rtx part;
2158       unsigned HOST_WIDE_INT thispos;
2159       unsigned HOST_WIDE_INT offset;
2160
2161       offset = (bitpos + bitsdone) / unit;
2162       thispos = (bitpos + bitsdone) % unit;
2163
2164       /* THISSIZE must not overrun a word boundary.  Otherwise,
2165          extract_fixed_bit_field will call us again, and we will mutually
2166          recurse forever.  */
2167       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2168       thissize = MIN (thissize, unit - thispos);
2169
2170       /* If OP0 is a register, then handle OFFSET here.  */
2171       rtx op0_piece = op0;
2172       opt_scalar_int_mode op0_piece_mode = op0_mode;
2173       if (SUBREG_P (op0) || REG_P (op0))
2174         {
2175           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2176           op0_piece_mode = word_mode;
2177           offset = 0;
2178         }
2179
2180       /* Extract the parts in bit-counting order,
2181          whose meaning is determined by BYTES_PER_UNIT.
2182          OFFSET is in UNITs, and UNIT is in bits.  */
2183       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2184                                       thissize, offset * unit + thispos,
2185                                       0, 1, reverse);
2186       bitsdone += thissize;
2187
2188       /* Shift this part into place for the result.  */
2189       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2190         {
2191           if (bitsize != bitsdone)
2192             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2193                                  bitsize - bitsdone, 0, 1);
2194         }
2195       else
2196         {
2197           if (bitsdone != thissize)
2198             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2199                                  bitsdone - thissize, 0, 1);
2200         }
2201
2202       if (first)
2203         result = part;
2204       else
2205         /* Combine the parts with bitwise or.  This works
2206            because we extracted each part as an unsigned bit field.  */
2207         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2208                                OPTAB_LIB_WIDEN);
2209
2210       first = 0;
2211     }
2212
2213   /* Unsigned bit field: we are done.  */
2214   if (unsignedp)
2215     return result;
2216   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2217   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2218                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2219   return expand_shift (RSHIFT_EXPR, word_mode, result,
2220                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2221 }
2222 \f
2223 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2224    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2225    MODE, fill the upper bits with zeros.  Fail if the layout of either
2226    mode is unknown (as for CC modes) or if the extraction would involve
2227    unprofitable mode punning.  Return the value on success, otherwise
2228    return null.
2229
2230    This is different from gen_lowpart* in these respects:
2231
2232      - the returned value must always be considered an rvalue
2233
2234      - when MODE is wider than SRC_MODE, the extraction involves
2235        a zero extension
2236
2237      - when MODE is smaller than SRC_MODE, the extraction involves
2238        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2239
2240    In other words, this routine performs a computation, whereas the
2241    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2242    operations.  */
2243
2244 rtx
2245 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2246 {
2247   scalar_int_mode int_mode, src_int_mode;
2248
2249   if (mode == src_mode)
2250     return src;
2251
2252   if (CONSTANT_P (src))
2253     {
2254       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2255          fails, it will happily create (subreg (symbol_ref)) or similar
2256          invalid SUBREGs.  */
2257       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2258       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2259       if (ret)
2260         return ret;
2261
2262       if (GET_MODE (src) == VOIDmode
2263           || !validate_subreg (mode, src_mode, src, byte))
2264         return NULL_RTX;
2265
2266       src = force_reg (GET_MODE (src), src);
2267       return gen_rtx_SUBREG (mode, src, byte);
2268     }
2269
2270   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2271     return NULL_RTX;
2272
2273   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2274       && targetm.modes_tieable_p (mode, src_mode))
2275     {
2276       rtx x = gen_lowpart_common (mode, src);
2277       if (x)
2278         return x;
2279     }
2280
2281   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2282       || !int_mode_for_mode (mode).exists (&int_mode))
2283     return NULL_RTX;
2284
2285   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2286     return NULL_RTX;
2287   if (!targetm.modes_tieable_p (int_mode, mode))
2288     return NULL_RTX;
2289
2290   src = gen_lowpart (src_int_mode, src);
2291   src = convert_modes (int_mode, src_int_mode, src, true);
2292   src = gen_lowpart (mode, src);
2293   return src;
2294 }
2295 \f
2296 /* Add INC into TARGET.  */
2297
2298 void
2299 expand_inc (rtx target, rtx inc)
2300 {
2301   rtx value = expand_binop (GET_MODE (target), add_optab,
2302                             target, inc,
2303                             target, 0, OPTAB_LIB_WIDEN);
2304   if (value != target)
2305     emit_move_insn (target, value);
2306 }
2307
2308 /* Subtract DEC from TARGET.  */
2309
2310 void
2311 expand_dec (rtx target, rtx dec)
2312 {
2313   rtx value = expand_binop (GET_MODE (target), sub_optab,
2314                             target, dec,
2315                             target, 0, OPTAB_LIB_WIDEN);
2316   if (value != target)
2317     emit_move_insn (target, value);
2318 }
2319 \f
2320 /* Output a shift instruction for expression code CODE,
2321    with SHIFTED being the rtx for the value to shift,
2322    and AMOUNT the rtx for the amount to shift by.
2323    Store the result in the rtx TARGET, if that is convenient.
2324    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2325    Return the rtx for where the value is.
2326    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2327    in which case 0 is returned.  */
2328
2329 static rtx
2330 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2331                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2332 {
2333   rtx op1, temp = 0;
2334   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2335   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2336   optab lshift_optab = ashl_optab;
2337   optab rshift_arith_optab = ashr_optab;
2338   optab rshift_uns_optab = lshr_optab;
2339   optab lrotate_optab = rotl_optab;
2340   optab rrotate_optab = rotr_optab;
2341   machine_mode op1_mode;
2342   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2343   int attempt;
2344   bool speed = optimize_insn_for_speed_p ();
2345
2346   op1 = amount;
2347   op1_mode = GET_MODE (op1);
2348
2349   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2350      shift amount is a vector, use the vector/vector shift patterns.  */
2351   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2352     {
2353       lshift_optab = vashl_optab;
2354       rshift_arith_optab = vashr_optab;
2355       rshift_uns_optab = vlshr_optab;
2356       lrotate_optab = vrotl_optab;
2357       rrotate_optab = vrotr_optab;
2358     }
2359
2360   /* Previously detected shift-counts computed by NEGATE_EXPR
2361      and shifted in the other direction; but that does not work
2362      on all machines.  */
2363
2364   if (SHIFT_COUNT_TRUNCATED)
2365     {
2366       if (CONST_INT_P (op1)
2367           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2368               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2369         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2370                        % GET_MODE_BITSIZE (scalar_mode));
2371       else if (GET_CODE (op1) == SUBREG
2372                && subreg_lowpart_p (op1)
2373                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2374                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2375         op1 = SUBREG_REG (op1);
2376     }
2377
2378   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2379      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2380      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2381      amount instead.  */
2382   if (rotate
2383       && CONST_INT_P (op1)
2384       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2385                    GET_MODE_BITSIZE (scalar_mode) - 1))
2386     {
2387       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2388       left = !left;
2389       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2390     }
2391
2392   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2393      Note that this is not the case for bigger values.  For instance a rotation
2394      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2395      0x04030201 (bswapsi).  */
2396   if (rotate
2397       && CONST_INT_P (op1)
2398       && INTVAL (op1) == BITS_PER_UNIT
2399       && GET_MODE_SIZE (scalar_mode) == 2
2400       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2401     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2402                                   unsignedp);
2403
2404   if (op1 == const0_rtx)
2405     return shifted;
2406
2407   /* Check whether its cheaper to implement a left shift by a constant
2408      bit count by a sequence of additions.  */
2409   if (code == LSHIFT_EXPR
2410       && CONST_INT_P (op1)
2411       && INTVAL (op1) > 0
2412       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2413       && INTVAL (op1) < MAX_BITS_PER_WORD
2414       && (shift_cost (speed, mode, INTVAL (op1))
2415           > INTVAL (op1) * add_cost (speed, mode))
2416       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2417     {
2418       int i;
2419       for (i = 0; i < INTVAL (op1); i++)
2420         {
2421           temp = force_reg (mode, shifted);
2422           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2423                                   unsignedp, OPTAB_LIB_WIDEN);
2424         }
2425       return shifted;
2426     }
2427
2428   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2429     {
2430       enum optab_methods methods;
2431
2432       if (attempt == 0)
2433         methods = OPTAB_DIRECT;
2434       else if (attempt == 1)
2435         methods = OPTAB_WIDEN;
2436       else
2437         methods = OPTAB_LIB_WIDEN;
2438
2439       if (rotate)
2440         {
2441           /* Widening does not work for rotation.  */
2442           if (methods == OPTAB_WIDEN)
2443             continue;
2444           else if (methods == OPTAB_LIB_WIDEN)
2445             {
2446               /* If we have been unable to open-code this by a rotation,
2447                  do it as the IOR of two shifts.  I.e., to rotate A
2448                  by N bits, compute
2449                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2450                  where C is the bitsize of A.
2451
2452                  It is theoretically possible that the target machine might
2453                  not be able to perform either shift and hence we would
2454                  be making two libcalls rather than just the one for the
2455                  shift (similarly if IOR could not be done).  We will allow
2456                  this extremely unlikely lossage to avoid complicating the
2457                  code below.  */
2458
2459               rtx subtarget = target == shifted ? 0 : target;
2460               rtx new_amount, other_amount;
2461               rtx temp1;
2462
2463               new_amount = op1;
2464               if (op1 == const0_rtx)
2465                 return shifted;
2466               else if (CONST_INT_P (op1))
2467                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2468                                         - INTVAL (op1));
2469               else
2470                 {
2471                   other_amount
2472                     = simplify_gen_unary (NEG, GET_MODE (op1),
2473                                           op1, GET_MODE (op1));
2474                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2475                   other_amount
2476                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2477                                            gen_int_mode (mask, GET_MODE (op1)));
2478                 }
2479
2480               shifted = force_reg (mode, shifted);
2481
2482               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2483                                      mode, shifted, new_amount, 0, 1);
2484               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2485                                       mode, shifted, other_amount,
2486                                       subtarget, 1);
2487               return expand_binop (mode, ior_optab, temp, temp1, target,
2488                                    unsignedp, methods);
2489             }
2490
2491           temp = expand_binop (mode,
2492                                left ? lrotate_optab : rrotate_optab,
2493                                shifted, op1, target, unsignedp, methods);
2494         }
2495       else if (unsignedp)
2496         temp = expand_binop (mode,
2497                              left ? lshift_optab : rshift_uns_optab,
2498                              shifted, op1, target, unsignedp, methods);
2499
2500       /* Do arithmetic shifts.
2501          Also, if we are going to widen the operand, we can just as well
2502          use an arithmetic right-shift instead of a logical one.  */
2503       if (temp == 0 && ! rotate
2504           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2505         {
2506           enum optab_methods methods1 = methods;
2507
2508           /* If trying to widen a log shift to an arithmetic shift,
2509              don't accept an arithmetic shift of the same size.  */
2510           if (unsignedp)
2511             methods1 = OPTAB_MUST_WIDEN;
2512
2513           /* Arithmetic shift */
2514
2515           temp = expand_binop (mode,
2516                                left ? lshift_optab : rshift_arith_optab,
2517                                shifted, op1, target, unsignedp, methods1);
2518         }
2519
2520       /* We used to try extzv here for logical right shifts, but that was
2521          only useful for one machine, the VAX, and caused poor code
2522          generation there for lshrdi3, so the code was deleted and a
2523          define_expand for lshrsi3 was added to vax.md.  */
2524     }
2525
2526   gcc_assert (temp != NULL_RTX || may_fail);
2527   return temp;
2528 }
2529
2530 /* Output a shift instruction for expression code CODE,
2531    with SHIFTED being the rtx for the value to shift,
2532    and AMOUNT the amount to shift by.
2533    Store the result in the rtx TARGET, if that is convenient.
2534    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2535    Return the rtx for where the value is.  */
2536
2537 rtx
2538 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2539               int amount, rtx target, int unsignedp)
2540 {
2541   return expand_shift_1 (code, mode,
2542                          shifted, GEN_INT (amount), target, unsignedp);
2543 }
2544
2545 /* Likewise, but return 0 if that cannot be done.  */
2546
2547 static rtx
2548 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2549                     int amount, rtx target, int unsignedp)
2550 {
2551   return expand_shift_1 (code, mode,
2552                          shifted, GEN_INT (amount), target, unsignedp, true);
2553 }
2554
2555 /* Output a shift instruction for expression code CODE,
2556    with SHIFTED being the rtx for the value to shift,
2557    and AMOUNT the tree for the amount to shift by.
2558    Store the result in the rtx TARGET, if that is convenient.
2559    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2560    Return the rtx for where the value is.  */
2561
2562 rtx
2563 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2564                        tree amount, rtx target, int unsignedp)
2565 {
2566   return expand_shift_1 (code, mode,
2567                          shifted, expand_normal (amount), target, unsignedp);
2568 }
2569
2570 \f
2571 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2572                         const struct mult_cost *, machine_mode mode);
2573 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2574                               const struct algorithm *, enum mult_variant);
2575 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2576 static rtx extract_high_half (scalar_int_mode, rtx);
2577 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2578 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2579                                        int, int);
2580 /* Compute and return the best algorithm for multiplying by T.
2581    The algorithm must cost less than cost_limit
2582    If retval.cost >= COST_LIMIT, no algorithm was found and all
2583    other field of the returned struct are undefined.
2584    MODE is the machine mode of the multiplication.  */
2585
2586 static void
2587 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2588             const struct mult_cost *cost_limit, machine_mode mode)
2589 {
2590   int m;
2591   struct algorithm *alg_in, *best_alg;
2592   struct mult_cost best_cost;
2593   struct mult_cost new_limit;
2594   int op_cost, op_latency;
2595   unsigned HOST_WIDE_INT orig_t = t;
2596   unsigned HOST_WIDE_INT q;
2597   int maxm, hash_index;
2598   bool cache_hit = false;
2599   enum alg_code cache_alg = alg_zero;
2600   bool speed = optimize_insn_for_speed_p ();
2601   scalar_int_mode imode;
2602   struct alg_hash_entry *entry_ptr;
2603
2604   /* Indicate that no algorithm is yet found.  If no algorithm
2605      is found, this value will be returned and indicate failure.  */
2606   alg_out->cost.cost = cost_limit->cost + 1;
2607   alg_out->cost.latency = cost_limit->latency + 1;
2608
2609   if (cost_limit->cost < 0
2610       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2611     return;
2612
2613   /* Be prepared for vector modes.  */
2614   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2615
2616   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2617
2618   /* Restrict the bits of "t" to the multiplication's mode.  */
2619   t &= GET_MODE_MASK (imode);
2620
2621   /* t == 1 can be done in zero cost.  */
2622   if (t == 1)
2623     {
2624       alg_out->ops = 1;
2625       alg_out->cost.cost = 0;
2626       alg_out->cost.latency = 0;
2627       alg_out->op[0] = alg_m;
2628       return;
2629     }
2630
2631   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2632      fail now.  */
2633   if (t == 0)
2634     {
2635       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2636         return;
2637       else
2638         {
2639           alg_out->ops = 1;
2640           alg_out->cost.cost = zero_cost (speed);
2641           alg_out->cost.latency = zero_cost (speed);
2642           alg_out->op[0] = alg_zero;
2643           return;
2644         }
2645     }
2646
2647   /* We'll be needing a couple extra algorithm structures now.  */
2648
2649   alg_in = XALLOCA (struct algorithm);
2650   best_alg = XALLOCA (struct algorithm);
2651   best_cost = *cost_limit;
2652
2653   /* Compute the hash index.  */
2654   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2655
2656   /* See if we already know what to do for T.  */
2657   entry_ptr = alg_hash_entry_ptr (hash_index);
2658   if (entry_ptr->t == t
2659       && entry_ptr->mode == mode
2660       && entry_ptr->speed == speed
2661       && entry_ptr->alg != alg_unknown)
2662     {
2663       cache_alg = entry_ptr->alg;
2664
2665       if (cache_alg == alg_impossible)
2666         {
2667           /* The cache tells us that it's impossible to synthesize
2668              multiplication by T within entry_ptr->cost.  */
2669           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2670             /* COST_LIMIT is at least as restrictive as the one
2671                recorded in the hash table, in which case we have no
2672                hope of synthesizing a multiplication.  Just
2673                return.  */
2674             return;
2675
2676           /* If we get here, COST_LIMIT is less restrictive than the
2677              one recorded in the hash table, so we may be able to
2678              synthesize a multiplication.  Proceed as if we didn't
2679              have the cache entry.  */
2680         }
2681       else
2682         {
2683           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2684             /* The cached algorithm shows that this multiplication
2685                requires more cost than COST_LIMIT.  Just return.  This
2686                way, we don't clobber this cache entry with
2687                alg_impossible but retain useful information.  */
2688             return;
2689
2690           cache_hit = true;
2691
2692           switch (cache_alg)
2693             {
2694             case alg_shift:
2695               goto do_alg_shift;
2696
2697             case alg_add_t_m2:
2698             case alg_sub_t_m2:
2699               goto do_alg_addsub_t_m2;
2700
2701             case alg_add_factor:
2702             case alg_sub_factor:
2703               goto do_alg_addsub_factor;
2704
2705             case alg_add_t2_m:
2706               goto do_alg_add_t2_m;
2707
2708             case alg_sub_t2_m:
2709               goto do_alg_sub_t2_m;
2710
2711             default:
2712               gcc_unreachable ();
2713             }
2714         }
2715     }
2716
2717   /* If we have a group of zero bits at the low-order part of T, try
2718      multiplying by the remaining bits and then doing a shift.  */
2719
2720   if ((t & 1) == 0)
2721     {
2722     do_alg_shift:
2723       m = ctz_or_zero (t); /* m = number of low zero bits */
2724       if (m < maxm)
2725         {
2726           q = t >> m;
2727           /* The function expand_shift will choose between a shift and
2728              a sequence of additions, so the observed cost is given as
2729              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2730           op_cost = m * add_cost (speed, mode);
2731           if (shift_cost (speed, mode, m) < op_cost)
2732             op_cost = shift_cost (speed, mode, m);
2733           new_limit.cost = best_cost.cost - op_cost;
2734           new_limit.latency = best_cost.latency - op_cost;
2735           synth_mult (alg_in, q, &new_limit, mode);
2736
2737           alg_in->cost.cost += op_cost;
2738           alg_in->cost.latency += op_cost;
2739           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2740             {
2741               best_cost = alg_in->cost;
2742               std::swap (alg_in, best_alg);
2743               best_alg->log[best_alg->ops] = m;
2744               best_alg->op[best_alg->ops] = alg_shift;
2745             }
2746
2747           /* See if treating ORIG_T as a signed number yields a better
2748              sequence.  Try this sequence only for a negative ORIG_T
2749              as it would be useless for a non-negative ORIG_T.  */
2750           if ((HOST_WIDE_INT) orig_t < 0)
2751             {
2752               /* Shift ORIG_T as follows because a right shift of a
2753                  negative-valued signed type is implementation
2754                  defined.  */
2755               q = ~(~orig_t >> m);
2756               /* The function expand_shift will choose between a shift
2757                  and a sequence of additions, so the observed cost is
2758                  given as MIN (m * add_cost(speed, mode),
2759                  shift_cost(speed, mode, m)).  */
2760               op_cost = m * add_cost (speed, mode);
2761               if (shift_cost (speed, mode, m) < op_cost)
2762                 op_cost = shift_cost (speed, mode, m);
2763               new_limit.cost = best_cost.cost - op_cost;
2764               new_limit.latency = best_cost.latency - op_cost;
2765               synth_mult (alg_in, q, &new_limit, mode);
2766
2767               alg_in->cost.cost += op_cost;
2768               alg_in->cost.latency += op_cost;
2769               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2770                 {
2771                   best_cost = alg_in->cost;
2772                   std::swap (alg_in, best_alg);
2773                   best_alg->log[best_alg->ops] = m;
2774                   best_alg->op[best_alg->ops] = alg_shift;
2775                 }
2776             }
2777         }
2778       if (cache_hit)
2779         goto done;
2780     }
2781
2782   /* If we have an odd number, add or subtract one.  */
2783   if ((t & 1) != 0)
2784     {
2785       unsigned HOST_WIDE_INT w;
2786
2787     do_alg_addsub_t_m2:
2788       for (w = 1; (w & t) != 0; w <<= 1)
2789         ;
2790       /* If T was -1, then W will be zero after the loop.  This is another
2791          case where T ends with ...111.  Handling this with (T + 1) and
2792          subtract 1 produces slightly better code and results in algorithm
2793          selection much faster than treating it like the ...0111 case
2794          below.  */
2795       if (w == 0
2796           || (w > 2
2797               /* Reject the case where t is 3.
2798                  Thus we prefer addition in that case.  */
2799               && t != 3))
2800         {
2801           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2802
2803           op_cost = add_cost (speed, mode);
2804           new_limit.cost = best_cost.cost - op_cost;
2805           new_limit.latency = best_cost.latency - op_cost;
2806           synth_mult (alg_in, t + 1, &new_limit, mode);
2807
2808           alg_in->cost.cost += op_cost;
2809           alg_in->cost.latency += op_cost;
2810           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2811             {
2812               best_cost = alg_in->cost;
2813               std::swap (alg_in, best_alg);
2814               best_alg->log[best_alg->ops] = 0;
2815               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2816             }
2817         }
2818       else
2819         {
2820           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2821
2822           op_cost = add_cost (speed, mode);
2823           new_limit.cost = best_cost.cost - op_cost;
2824           new_limit.latency = best_cost.latency - op_cost;
2825           synth_mult (alg_in, t - 1, &new_limit, mode);
2826
2827           alg_in->cost.cost += op_cost;
2828           alg_in->cost.latency += op_cost;
2829           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2830             {
2831               best_cost = alg_in->cost;
2832               std::swap (alg_in, best_alg);
2833               best_alg->log[best_alg->ops] = 0;
2834               best_alg->op[best_alg->ops] = alg_add_t_m2;
2835             }
2836         }
2837
2838       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2839          quickly with a - a * n for some appropriate constant n.  */
2840       m = exact_log2 (-orig_t + 1);
2841       if (m >= 0 && m < maxm)
2842         {
2843           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2844           /* If the target has a cheap shift-and-subtract insn use
2845              that in preference to a shift insn followed by a sub insn.
2846              Assume that the shift-and-sub is "atomic" with a latency
2847              equal to it's cost, otherwise assume that on superscalar
2848              hardware the shift may be executed concurrently with the
2849              earlier steps in the algorithm.  */
2850           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2851             {
2852               op_cost = shiftsub1_cost (speed, mode, m);
2853               op_latency = op_cost;
2854             }
2855           else
2856             op_latency = add_cost (speed, mode);
2857
2858           new_limit.cost = best_cost.cost - op_cost;
2859           new_limit.latency = best_cost.latency - op_latency;
2860           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2861                       &new_limit, mode);
2862
2863           alg_in->cost.cost += op_cost;
2864           alg_in->cost.latency += op_latency;
2865           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2866             {
2867               best_cost = alg_in->cost;
2868               std::swap (alg_in, best_alg);
2869               best_alg->log[best_alg->ops] = m;
2870               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2871             }
2872         }
2873
2874       if (cache_hit)
2875         goto done;
2876     }
2877
2878   /* Look for factors of t of the form
2879      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2880      If we find such a factor, we can multiply by t using an algorithm that
2881      multiplies by q, shift the result by m and add/subtract it to itself.
2882
2883      We search for large factors first and loop down, even if large factors
2884      are less probable than small; if we find a large factor we will find a
2885      good sequence quickly, and therefore be able to prune (by decreasing
2886      COST_LIMIT) the search.  */
2887
2888  do_alg_addsub_factor:
2889   for (m = floor_log2 (t - 1); m >= 2; m--)
2890     {
2891       unsigned HOST_WIDE_INT d;
2892
2893       d = (HOST_WIDE_INT_1U << m) + 1;
2894       if (t % d == 0 && t > d && m < maxm
2895           && (!cache_hit || cache_alg == alg_add_factor))
2896         {
2897           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2898           if (shiftadd_cost (speed, mode, m) <= op_cost)
2899             op_cost = shiftadd_cost (speed, mode, m);
2900
2901           op_latency = op_cost;
2902
2903
2904           new_limit.cost = best_cost.cost - op_cost;
2905           new_limit.latency = best_cost.latency - op_latency;
2906           synth_mult (alg_in, t / d, &new_limit, mode);
2907
2908           alg_in->cost.cost += op_cost;
2909           alg_in->cost.latency += op_latency;
2910           if (alg_in->cost.latency < op_cost)
2911             alg_in->cost.latency = op_cost;
2912           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2913             {
2914               best_cost = alg_in->cost;
2915               std::swap (alg_in, best_alg);
2916               best_alg->log[best_alg->ops] = m;
2917               best_alg->op[best_alg->ops] = alg_add_factor;
2918             }
2919           /* Other factors will have been taken care of in the recursion.  */
2920           break;
2921         }
2922
2923       d = (HOST_WIDE_INT_1U << m) - 1;
2924       if (t % d == 0 && t > d && m < maxm
2925           && (!cache_hit || cache_alg == alg_sub_factor))
2926         {
2927           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2928           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2929             op_cost = shiftsub0_cost (speed, mode, m);
2930
2931           op_latency = op_cost;
2932
2933           new_limit.cost = best_cost.cost - op_cost;
2934           new_limit.latency = best_cost.latency - op_latency;
2935           synth_mult (alg_in, t / d, &new_limit, mode);
2936
2937           alg_in->cost.cost += op_cost;
2938           alg_in->cost.latency += op_latency;
2939           if (alg_in->cost.latency < op_cost)
2940             alg_in->cost.latency = op_cost;
2941           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2942             {
2943               best_cost = alg_in->cost;
2944               std::swap (alg_in, best_alg);
2945               best_alg->log[best_alg->ops] = m;
2946               best_alg->op[best_alg->ops] = alg_sub_factor;
2947             }
2948           break;
2949         }
2950     }
2951   if (cache_hit)
2952     goto done;
2953
2954   /* Try shift-and-add (load effective address) instructions,
2955      i.e. do a*3, a*5, a*9.  */
2956   if ((t & 1) != 0)
2957     {
2958     do_alg_add_t2_m:
2959       q = t - 1;
2960       m = ctz_hwi (q);
2961       if (q && m < maxm)
2962         {
2963           op_cost = shiftadd_cost (speed, mode, m);
2964           new_limit.cost = best_cost.cost - op_cost;
2965           new_limit.latency = best_cost.latency - op_cost;
2966           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2967
2968           alg_in->cost.cost += op_cost;
2969           alg_in->cost.latency += op_cost;
2970           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2971             {
2972               best_cost = alg_in->cost;
2973               std::swap (alg_in, best_alg);
2974               best_alg->log[best_alg->ops] = m;
2975               best_alg->op[best_alg->ops] = alg_add_t2_m;
2976             }
2977         }
2978       if (cache_hit)
2979         goto done;
2980
2981     do_alg_sub_t2_m:
2982       q = t + 1;
2983       m = ctz_hwi (q);
2984       if (q && m < maxm)
2985         {
2986           op_cost = shiftsub0_cost (speed, mode, m);
2987           new_limit.cost = best_cost.cost - op_cost;
2988           new_limit.latency = best_cost.latency - op_cost;
2989           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2990
2991           alg_in->cost.cost += op_cost;
2992           alg_in->cost.latency += op_cost;
2993           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2994             {
2995               best_cost = alg_in->cost;
2996               std::swap (alg_in, best_alg);
2997               best_alg->log[best_alg->ops] = m;
2998               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2999             }
3000         }
3001       if (cache_hit)
3002         goto done;
3003     }
3004
3005  done:
3006   /* If best_cost has not decreased, we have not found any algorithm.  */
3007   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3008     {
3009       /* We failed to find an algorithm.  Record alg_impossible for
3010          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3011          we are asked to find an algorithm for T within the same or
3012          lower COST_LIMIT, we can immediately return to the
3013          caller.  */
3014       entry_ptr->t = t;
3015       entry_ptr->mode = mode;
3016       entry_ptr->speed = speed;
3017       entry_ptr->alg = alg_impossible;
3018       entry_ptr->cost = *cost_limit;
3019       return;
3020     }
3021
3022   /* Cache the result.  */
3023   if (!cache_hit)
3024     {
3025       entry_ptr->t = t;
3026       entry_ptr->mode = mode;
3027       entry_ptr->speed = speed;
3028       entry_ptr->alg = best_alg->op[best_alg->ops];
3029       entry_ptr->cost.cost = best_cost.cost;
3030       entry_ptr->cost.latency = best_cost.latency;
3031     }
3032
3033   /* If we are getting a too long sequence for `struct algorithm'
3034      to record, make this search fail.  */
3035   if (best_alg->ops == MAX_BITS_PER_WORD)
3036     return;
3037
3038   /* Copy the algorithm from temporary space to the space at alg_out.
3039      We avoid using structure assignment because the majority of
3040      best_alg is normally undefined, and this is a critical function.  */
3041   alg_out->ops = best_alg->ops + 1;
3042   alg_out->cost = best_cost;
3043   memcpy (alg_out->op, best_alg->op,
3044           alg_out->ops * sizeof *alg_out->op);
3045   memcpy (alg_out->log, best_alg->log,
3046           alg_out->ops * sizeof *alg_out->log);
3047 }
3048 \f
3049 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3050    Try three variations:
3051
3052        - a shift/add sequence based on VAL itself
3053        - a shift/add sequence based on -VAL, followed by a negation
3054        - a shift/add sequence based on VAL - 1, followed by an addition.
3055
3056    Return true if the cheapest of these cost less than MULT_COST,
3057    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3058
3059 bool
3060 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3061                      struct algorithm *alg, enum mult_variant *variant,
3062                      int mult_cost)
3063 {
3064   struct algorithm alg2;
3065   struct mult_cost limit;
3066   int op_cost;
3067   bool speed = optimize_insn_for_speed_p ();
3068
3069   /* Fail quickly for impossible bounds.  */
3070   if (mult_cost < 0)
3071     return false;
3072
3073   /* Ensure that mult_cost provides a reasonable upper bound.
3074      Any constant multiplication can be performed with less
3075      than 2 * bits additions.  */
3076   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3077   if (mult_cost > op_cost)
3078     mult_cost = op_cost;
3079
3080   *variant = basic_variant;
3081   limit.cost = mult_cost;
3082   limit.latency = mult_cost;
3083   synth_mult (alg, val, &limit, mode);
3084
3085   /* This works only if the inverted value actually fits in an
3086      `unsigned int' */
3087   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3088     {
3089       op_cost = neg_cost (speed, mode);
3090       if (MULT_COST_LESS (&alg->cost, mult_cost))
3091         {
3092           limit.cost = alg->cost.cost - op_cost;
3093           limit.latency = alg->cost.latency - op_cost;
3094         }
3095       else
3096         {
3097           limit.cost = mult_cost - op_cost;
3098           limit.latency = mult_cost - op_cost;
3099         }
3100
3101       synth_mult (&alg2, -val, &limit, mode);
3102       alg2.cost.cost += op_cost;
3103       alg2.cost.latency += op_cost;
3104       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3105         *alg = alg2, *variant = negate_variant;
3106     }
3107
3108   /* This proves very useful for division-by-constant.  */
3109   op_cost = add_cost (speed, mode);
3110   if (MULT_COST_LESS (&alg->cost, mult_cost))
3111     {
3112       limit.cost = alg->cost.cost - op_cost;
3113       limit.latency = alg->cost.latency - op_cost;
3114     }
3115   else
3116     {
3117       limit.cost = mult_cost - op_cost;
3118       limit.latency = mult_cost - op_cost;
3119     }
3120
3121   synth_mult (&alg2, val - 1, &limit, mode);
3122   alg2.cost.cost += op_cost;
3123   alg2.cost.latency += op_cost;
3124   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3125     *alg = alg2, *variant = add_variant;
3126
3127   return MULT_COST_LESS (&alg->cost, mult_cost);
3128 }
3129
3130 /* A subroutine of expand_mult, used for constant multiplications.
3131    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3132    convenient.  Use the shift/add sequence described by ALG and apply
3133    the final fixup specified by VARIANT.  */
3134
3135 static rtx
3136 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3137                    rtx target, const struct algorithm *alg,
3138                    enum mult_variant variant)
3139 {
3140   unsigned HOST_WIDE_INT val_so_far;
3141   rtx_insn *insn;
3142   rtx accum, tem;
3143   int opno;
3144   machine_mode nmode;
3145
3146   /* Avoid referencing memory over and over and invalid sharing
3147      on SUBREGs.  */
3148   op0 = force_reg (mode, op0);
3149
3150   /* ACCUM starts out either as OP0 or as a zero, depending on
3151      the first operation.  */
3152
3153   if (alg->op[0] == alg_zero)
3154     {
3155       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3156       val_so_far = 0;
3157     }
3158   else if (alg->op[0] == alg_m)
3159     {
3160       accum = copy_to_mode_reg (mode, op0);
3161       val_so_far = 1;
3162     }
3163   else
3164     gcc_unreachable ();
3165
3166   for (opno = 1; opno < alg->ops; opno++)
3167     {
3168       int log = alg->log[opno];
3169       rtx shift_subtarget = optimize ? 0 : accum;
3170       rtx add_target
3171         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3172            && !optimize)
3173           ? target : 0;
3174       rtx accum_target = optimize ? 0 : accum;
3175       rtx accum_inner;
3176
3177       switch (alg->op[opno])
3178         {
3179         case alg_shift:
3180           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3181           /* REG_EQUAL note will be attached to the following insn.  */
3182           emit_move_insn (accum, tem);
3183           val_so_far <<= log;
3184           break;
3185
3186         case alg_add_t_m2:
3187           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3188           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3189                                  add_target ? add_target : accum_target);
3190           val_so_far += HOST_WIDE_INT_1U << log;
3191           break;
3192
3193         case alg_sub_t_m2:
3194           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3195           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3196                                  add_target ? add_target : accum_target);
3197           val_so_far -= HOST_WIDE_INT_1U << log;
3198           break;
3199
3200         case alg_add_t2_m:
3201           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3202                                 log, shift_subtarget, 0);
3203           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3204                                  add_target ? add_target : accum_target);
3205           val_so_far = (val_so_far << log) + 1;
3206           break;
3207
3208         case alg_sub_t2_m:
3209           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3210                                 log, shift_subtarget, 0);
3211           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3212                                  add_target ? add_target : accum_target);
3213           val_so_far = (val_so_far << log) - 1;
3214           break;
3215
3216         case alg_add_factor:
3217           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3218           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3219                                  add_target ? add_target : accum_target);
3220           val_so_far += val_so_far << log;
3221           break;
3222
3223         case alg_sub_factor:
3224           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3225           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3226                                  (add_target
3227                                   ? add_target : (optimize ? 0 : tem)));
3228           val_so_far = (val_so_far << log) - val_so_far;
3229           break;
3230
3231         default:
3232           gcc_unreachable ();
3233         }
3234
3235       if (SCALAR_INT_MODE_P (mode))
3236         {
3237           /* Write a REG_EQUAL note on the last insn so that we can cse
3238              multiplication sequences.  Note that if ACCUM is a SUBREG,
3239              we've set the inner register and must properly indicate that.  */
3240           tem = op0, nmode = mode;
3241           accum_inner = accum;
3242           if (GET_CODE (accum) == SUBREG)
3243             {
3244               accum_inner = SUBREG_REG (accum);
3245               nmode = GET_MODE (accum_inner);
3246               tem = gen_lowpart (nmode, op0);
3247             }
3248
3249           insn = get_last_insn ();
3250           set_dst_reg_note (insn, REG_EQUAL,
3251                             gen_rtx_MULT (nmode, tem,
3252                                           gen_int_mode (val_so_far, nmode)),
3253                             accum_inner);
3254         }
3255     }
3256
3257   if (variant == negate_variant)
3258     {
3259       val_so_far = -val_so_far;
3260       accum = expand_unop (mode, neg_optab, accum, target, 0);
3261     }
3262   else if (variant == add_variant)
3263     {
3264       val_so_far = val_so_far + 1;
3265       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3266     }
3267
3268   /* Compare only the bits of val and val_so_far that are significant
3269      in the result mode, to avoid sign-/zero-extension confusion.  */
3270   nmode = GET_MODE_INNER (mode);
3271   val &= GET_MODE_MASK (nmode);
3272   val_so_far &= GET_MODE_MASK (nmode);
3273   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3274
3275   return accum;
3276 }
3277
3278 /* Perform a multiplication and return an rtx for the result.
3279    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3280    TARGET is a suggestion for where to store the result (an rtx).
3281
3282    We check specially for a constant integer as OP1.
3283    If you want this check for OP0 as well, then before calling
3284    you should swap the two operands if OP0 would be constant.  */
3285
3286 rtx
3287 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3288              int unsignedp, bool no_libcall)
3289 {
3290   enum mult_variant variant;
3291   struct algorithm algorithm;
3292   rtx scalar_op1;
3293   int max_cost;
3294   bool speed = optimize_insn_for_speed_p ();
3295   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3296
3297   if (CONSTANT_P (op0))
3298     std::swap (op0, op1);
3299
3300   /* For vectors, there are several simplifications that can be made if
3301      all elements of the vector constant are identical.  */
3302   scalar_op1 = unwrap_const_vec_duplicate (op1);
3303
3304   if (INTEGRAL_MODE_P (mode))
3305     {
3306       rtx fake_reg;
3307       HOST_WIDE_INT coeff;
3308       bool is_neg;
3309       int mode_bitsize;
3310
3311       if (op1 == CONST0_RTX (mode))
3312         return op1;
3313       if (op1 == CONST1_RTX (mode))
3314         return op0;
3315       if (op1 == CONSTM1_RTX (mode))
3316         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3317                             op0, target, 0);
3318
3319       if (do_trapv)
3320         goto skip_synth;
3321
3322       /* If mode is integer vector mode, check if the backend supports
3323          vector lshift (by scalar or vector) at all.  If not, we can't use
3324          synthetized multiply.  */
3325       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3326           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3327           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3328         goto skip_synth;
3329
3330       /* These are the operations that are potentially turned into
3331          a sequence of shifts and additions.  */
3332       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3333
3334       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3335          less than or equal in size to `unsigned int' this doesn't matter.
3336          If the mode is larger than `unsigned int', then synth_mult works
3337          only if the constant value exactly fits in an `unsigned int' without
3338          any truncation.  This means that multiplying by negative values does
3339          not work; results are off by 2^32 on a 32 bit machine.  */
3340       if (CONST_INT_P (scalar_op1))
3341         {
3342           coeff = INTVAL (scalar_op1);
3343           is_neg = coeff < 0;
3344         }
3345 #if TARGET_SUPPORTS_WIDE_INT
3346       else if (CONST_WIDE_INT_P (scalar_op1))
3347 #else
3348       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3349 #endif
3350         {
3351           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3352           /* Perfect power of 2 (other than 1, which is handled above).  */
3353           if (shift > 0)
3354             return expand_shift (LSHIFT_EXPR, mode, op0,
3355                                  shift, target, unsignedp);
3356           else
3357             goto skip_synth;
3358         }
3359       else
3360         goto skip_synth;
3361
3362       /* We used to test optimize here, on the grounds that it's better to
3363          produce a smaller program when -O is not used.  But this causes
3364          such a terrible slowdown sometimes that it seems better to always
3365          use synth_mult.  */
3366
3367       /* Special case powers of two.  */
3368       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3369           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3370         return expand_shift (LSHIFT_EXPR, mode, op0,
3371                              floor_log2 (coeff), target, unsignedp);
3372
3373       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3374
3375       /* Attempt to handle multiplication of DImode values by negative
3376          coefficients, by performing the multiplication by a positive
3377          multiplier and then inverting the result.  */
3378       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3379         {
3380           /* Its safe to use -coeff even for INT_MIN, as the
3381              result is interpreted as an unsigned coefficient.
3382              Exclude cost of op0 from max_cost to match the cost
3383              calculation of the synth_mult.  */
3384           coeff = -(unsigned HOST_WIDE_INT) coeff;
3385           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3386                                     mode, speed)
3387                       - neg_cost (speed, mode));
3388           if (max_cost <= 0)
3389             goto skip_synth;
3390
3391           /* Special case powers of two.  */
3392           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3393             {
3394               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3395                                        floor_log2 (coeff), target, unsignedp);
3396               return expand_unop (mode, neg_optab, temp, target, 0);
3397             }
3398
3399           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3400                                    max_cost))
3401             {
3402               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3403                                             &algorithm, variant);
3404               return expand_unop (mode, neg_optab, temp, target, 0);
3405             }
3406           goto skip_synth;
3407         }
3408
3409       /* Exclude cost of op0 from max_cost to match the cost
3410          calculation of the synth_mult.  */
3411       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3412       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3413         return expand_mult_const (mode, op0, coeff, target,
3414                                   &algorithm, variant);
3415     }
3416  skip_synth:
3417
3418   /* Expand x*2.0 as x+x.  */
3419   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3420       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3421     {
3422       op0 = force_reg (GET_MODE (op0), op0);
3423       return expand_binop (mode, add_optab, op0, op0,
3424                            target, unsignedp,
3425                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3426     }
3427
3428   /* This used to use umul_optab if unsigned, but for non-widening multiply
3429      there is no difference between signed and unsigned.  */
3430   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3431                       op0, op1, target, unsignedp,
3432                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3433   gcc_assert (op0 || no_libcall);
3434   return op0;
3435 }
3436
3437 /* Return a cost estimate for multiplying a register by the given
3438    COEFFicient in the given MODE and SPEED.  */
3439
3440 int
3441 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3442 {
3443   int max_cost;
3444   struct algorithm algorithm;
3445   enum mult_variant variant;
3446
3447   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3448   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3449                            mode, speed);
3450   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3451     return algorithm.cost.cost;
3452   else
3453     return max_cost;
3454 }
3455
3456 /* Perform a widening multiplication and return an rtx for the result.
3457    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3458    TARGET is a suggestion for where to store the result (an rtx).
3459    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3460    or smul_widen_optab.
3461
3462    We check specially for a constant integer as OP1, comparing the
3463    cost of a widening multiply against the cost of a sequence of shifts
3464    and adds.  */
3465
3466 rtx
3467 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3468                       int unsignedp, optab this_optab)
3469 {
3470   bool speed = optimize_insn_for_speed_p ();
3471   rtx cop1;
3472
3473   if (CONST_INT_P (op1)
3474       && GET_MODE (op0) != VOIDmode
3475       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3476                                 this_optab == umul_widen_optab))
3477       && CONST_INT_P (cop1)
3478       && (INTVAL (cop1) >= 0
3479           || HWI_COMPUTABLE_MODE_P (mode)))
3480     {
3481       HOST_WIDE_INT coeff = INTVAL (cop1);
3482       int max_cost;
3483       enum mult_variant variant;
3484       struct algorithm algorithm;
3485
3486       if (coeff == 0)
3487         return CONST0_RTX (mode);
3488
3489       /* Special case powers of two.  */
3490       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3491         {
3492           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3493           return expand_shift (LSHIFT_EXPR, mode, op0,
3494                                floor_log2 (coeff), target, unsignedp);
3495         }
3496
3497       /* Exclude cost of op0 from max_cost to match the cost
3498          calculation of the synth_mult.  */
3499       max_cost = mul_widen_cost (speed, mode);
3500       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3501                                max_cost))
3502         {
3503           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3504           return expand_mult_const (mode, op0, coeff, target,
3505                                     &algorithm, variant);
3506         }
3507     }
3508   return expand_binop (mode, this_optab, op0, op1, target,
3509                        unsignedp, OPTAB_LIB_WIDEN);
3510 }
3511 \f
3512 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3513    replace division by D, and put the least significant N bits of the result
3514    in *MULTIPLIER_PTR and return the most significant bit.
3515
3516    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3517    needed precision is in PRECISION (should be <= N).
3518
3519    PRECISION should be as small as possible so this function can choose
3520    multiplier more freely.
3521
3522    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3523    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3524
3525    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3526    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3527
3528 unsigned HOST_WIDE_INT
3529 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3530                    unsigned HOST_WIDE_INT *multiplier_ptr,
3531                    int *post_shift_ptr, int *lgup_ptr)
3532 {
3533   int lgup, post_shift;
3534   int pow, pow2;
3535
3536   /* lgup = ceil(log2(divisor)); */
3537   lgup = ceil_log2 (d);
3538
3539   gcc_assert (lgup <= n);
3540
3541   pow = n + lgup;
3542   pow2 = n + lgup - precision;
3543
3544   /* mlow = 2^(N + lgup)/d */
3545   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3546   wide_int mlow = wi::udiv_trunc (val, d);
3547
3548   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3549   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3550   wide_int mhigh = wi::udiv_trunc (val, d);
3551
3552   /* If precision == N, then mlow, mhigh exceed 2^N
3553      (but they do not exceed 2^(N+1)).  */
3554
3555   /* Reduce to lowest terms.  */
3556   for (post_shift = lgup; post_shift > 0; post_shift--)
3557     {
3558       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3559                                                        HOST_BITS_PER_WIDE_INT);
3560       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3561                                                        HOST_BITS_PER_WIDE_INT);
3562       if (ml_lo >= mh_lo)
3563         break;
3564
3565       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3566       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3567     }
3568
3569   *post_shift_ptr = post_shift;
3570   *lgup_ptr = lgup;
3571   if (n < HOST_BITS_PER_WIDE_INT)
3572     {
3573       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3574       *multiplier_ptr = mhigh.to_uhwi () & mask;
3575       return mhigh.to_uhwi () >= mask;
3576     }
3577   else
3578     {
3579       *multiplier_ptr = mhigh.to_uhwi ();
3580       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3581     }
3582 }
3583
3584 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3585    congruent to 1 (mod 2**N).  */
3586
3587 static unsigned HOST_WIDE_INT
3588 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3589 {
3590   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3591
3592   /* The algorithm notes that the choice y = x satisfies
3593      x*y == 1 mod 2^3, since x is assumed odd.
3594      Each iteration doubles the number of bits of significance in y.  */
3595
3596   unsigned HOST_WIDE_INT mask;
3597   unsigned HOST_WIDE_INT y = x;
3598   int nbit = 3;
3599
3600   mask = (n == HOST_BITS_PER_WIDE_INT
3601           ? HOST_WIDE_INT_M1U
3602           : (HOST_WIDE_INT_1U << n) - 1);
3603
3604   while (nbit < n)
3605     {
3606       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3607       nbit *= 2;
3608     }
3609   return y;
3610 }
3611
3612 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3613    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3614    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3615    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3616    become signed.
3617
3618    The result is put in TARGET if that is convenient.
3619
3620    MODE is the mode of operation.  */
3621
3622 rtx
3623 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3624                              rtx op1, rtx target, int unsignedp)
3625 {
3626   rtx tem;
3627   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3628
3629   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3630                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3631   tem = expand_and (mode, tem, op1, NULL_RTX);
3632   adj_operand
3633     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3634                      adj_operand);
3635
3636   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3637                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3638   tem = expand_and (mode, tem, op0, NULL_RTX);
3639   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3640                           target);
3641
3642   return target;
3643 }
3644
3645 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3646
3647 static rtx
3648 extract_high_half (scalar_int_mode mode, rtx op)
3649 {
3650   if (mode == word_mode)
3651     return gen_highpart (mode, op);
3652
3653   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3654
3655   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3656                      GET_MODE_BITSIZE (mode), 0, 1);
3657   return convert_modes (mode, wider_mode, op, 0);
3658 }
3659
3660 /* Like expmed_mult_highpart, but only consider using a multiplication
3661    optab.  OP1 is an rtx for the constant operand.  */
3662
3663 static rtx
3664 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3665                             rtx target, int unsignedp, int max_cost)
3666 {
3667   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3668   optab moptab;
3669   rtx tem;
3670   int size;
3671   bool speed = optimize_insn_for_speed_p ();
3672
3673   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3674
3675   size = GET_MODE_BITSIZE (mode);
3676
3677   /* Firstly, try using a multiplication insn that only generates the needed
3678      high part of the product, and in the sign flavor of unsignedp.  */
3679   if (mul_highpart_cost (speed, mode) < max_cost)
3680     {
3681       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3682       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3683                           unsignedp, OPTAB_DIRECT);
3684       if (tem)
3685         return tem;
3686     }
3687
3688   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3689      Need to adjust the result after the multiplication.  */
3690   if (size - 1 < BITS_PER_WORD
3691       && (mul_highpart_cost (speed, mode)
3692           + 2 * shift_cost (speed, mode, size-1)
3693           + 4 * add_cost (speed, mode) < max_cost))
3694     {
3695       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3696       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3697                           unsignedp, OPTAB_DIRECT);
3698       if (tem)
3699         /* We used the wrong signedness.  Adjust the result.  */
3700         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3701                                             tem, unsignedp);
3702     }
3703
3704   /* Try widening multiplication.  */
3705   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3706   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3707       && mul_widen_cost (speed, wider_mode) < max_cost)
3708     {
3709       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3710                           unsignedp, OPTAB_WIDEN);
3711       if (tem)
3712         return extract_high_half (mode, tem);
3713     }
3714
3715   /* Try widening the mode and perform a non-widening multiplication.  */
3716   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3717       && size - 1 < BITS_PER_WORD
3718       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3719           < max_cost))
3720     {
3721       rtx_insn *insns;
3722       rtx wop0, wop1;
3723
3724       /* We need to widen the operands, for example to ensure the
3725          constant multiplier is correctly sign or zero extended.
3726          Use a sequence to clean-up any instructions emitted by
3727          the conversions if things don't work out.  */
3728       start_sequence ();
3729       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3730       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3731       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3732                           unsignedp, OPTAB_WIDEN);
3733       insns = get_insns ();
3734       end_sequence ();
3735
3736       if (tem)
3737         {
3738           emit_insn (insns);
3739           return extract_high_half (mode, tem);
3740         }
3741     }
3742
3743   /* Try widening multiplication of opposite signedness, and adjust.  */
3744   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3745   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3746       && size - 1 < BITS_PER_WORD
3747       && (mul_widen_cost (speed, wider_mode)
3748           + 2 * shift_cost (speed, mode, size-1)
3749           + 4 * add_cost (speed, mode) < max_cost))
3750     {
3751       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3752                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3753       if (tem != 0)
3754         {
3755           tem = extract_high_half (mode, tem);
3756           /* We used the wrong signedness.  Adjust the result.  */
3757           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3758                                               target, unsignedp);
3759         }
3760     }
3761
3762   return 0;
3763 }
3764
3765 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3766    putting the high half of the result in TARGET if that is convenient,
3767    and return where the result is.  If the operation can not be performed,
3768    0 is returned.
3769
3770    MODE is the mode of operation and result.
3771
3772    UNSIGNEDP nonzero means unsigned multiply.
3773
3774    MAX_COST is the total allowed cost for the expanded RTL.  */
3775
3776 static rtx
3777 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3778                       rtx target, int unsignedp, int max_cost)
3779 {
3780   unsigned HOST_WIDE_INT cnst1;
3781   int extra_cost;
3782   bool sign_adjust = false;
3783   enum mult_variant variant;
3784   struct algorithm alg;
3785   rtx tem;
3786   bool speed = optimize_insn_for_speed_p ();
3787
3788   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3789   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3790
3791   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3792
3793   /* We can't optimize modes wider than BITS_PER_WORD.
3794      ??? We might be able to perform double-word arithmetic if
3795      mode == word_mode, however all the cost calculations in
3796      synth_mult etc. assume single-word operations.  */
3797   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3798   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3799     return expmed_mult_highpart_optab (mode, op0, op1, target,
3800                                        unsignedp, max_cost);
3801
3802   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3803
3804   /* Check whether we try to multiply by a negative constant.  */
3805   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3806     {
3807       sign_adjust = true;
3808       extra_cost += add_cost (speed, mode);
3809     }
3810
3811   /* See whether shift/add multiplication is cheap enough.  */
3812   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3813                            max_cost - extra_cost))
3814     {
3815       /* See whether the specialized multiplication optabs are
3816          cheaper than the shift/add version.  */
3817       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3818                                         alg.cost.cost + extra_cost);
3819       if (tem)
3820         return tem;
3821
3822       tem = convert_to_mode (wider_mode, op0, unsignedp);
3823       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3824       tem = extract_high_half (mode, tem);
3825
3826       /* Adjust result for signedness.  */
3827       if (sign_adjust)
3828         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3829
3830       return tem;
3831     }
3832   return expmed_mult_highpart_optab (mode, op0, op1, target,
3833                                      unsignedp, max_cost);
3834 }
3835
3836
3837 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3838
3839 static rtx
3840 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3841 {
3842   rtx result, temp, shift;
3843   rtx_code_label *label;
3844   int logd;
3845   int prec = GET_MODE_PRECISION (mode);
3846
3847   logd = floor_log2 (d);
3848   result = gen_reg_rtx (mode);
3849
3850   /* Avoid conditional branches when they're expensive.  */
3851   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3852       && optimize_insn_for_speed_p ())
3853     {
3854       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3855                                       mode, 0, -1);
3856       if (signmask)
3857         {
3858           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3859           signmask = force_reg (mode, signmask);
3860           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3861
3862           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3863              which instruction sequence to use.  If logical right shifts
3864              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3865              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3866
3867           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3868           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3869               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3870                   > COSTS_N_INSNS (2)))
3871             {
3872               temp = expand_binop (mode, xor_optab, op0, signmask,
3873                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3874               temp = expand_binop (mode, sub_optab, temp, signmask,
3875                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3876               temp = expand_binop (mode, and_optab, temp,
3877                                    gen_int_mode (masklow, mode),
3878                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3879               temp = expand_binop (mode, xor_optab, temp, signmask,
3880                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3881               temp = expand_binop (mode, sub_optab, temp, signmask,
3882                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3883             }
3884           else
3885             {
3886               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3887                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3888               signmask = force_reg (mode, signmask);
3889
3890               temp = expand_binop (mode, add_optab, op0, signmask,
3891                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3892               temp = expand_binop (mode, and_optab, temp,
3893                                    gen_int_mode (masklow, mode),
3894                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3895               temp = expand_binop (mode, sub_optab, temp, signmask,
3896                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3897             }
3898           return temp;
3899         }
3900     }
3901
3902   /* Mask contains the mode's signbit and the significant bits of the
3903      modulus.  By including the signbit in the operation, many targets
3904      can avoid an explicit compare operation in the following comparison
3905      against zero.  */
3906   wide_int mask = wi::mask (logd, false, prec);
3907   mask = wi::set_bit (mask, prec - 1);
3908
3909   temp = expand_binop (mode, and_optab, op0,
3910                        immed_wide_int_const (mask, mode),
3911                        result, 1, OPTAB_LIB_WIDEN);
3912   if (temp != result)
3913     emit_move_insn (result, temp);
3914
3915   label = gen_label_rtx ();
3916   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3917
3918   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3919                        0, OPTAB_LIB_WIDEN);
3920
3921   mask = wi::mask (logd, true, prec);
3922   temp = expand_binop (mode, ior_optab, temp,
3923                        immed_wide_int_const (mask, mode),
3924                        result, 1, OPTAB_LIB_WIDEN);
3925   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3926                        0, OPTAB_LIB_WIDEN);
3927   if (temp != result)
3928     emit_move_insn (result, temp);
3929   emit_label (label);
3930   return result;
3931 }
3932
3933 /* Expand signed division of OP0 by a power of two D in mode MODE.
3934    This routine is only called for positive values of D.  */
3935
3936 static rtx
3937 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3938 {
3939   rtx temp;
3940   rtx_code_label *label;
3941   int logd;
3942
3943   logd = floor_log2 (d);
3944
3945   if (d == 2
3946       && BRANCH_COST (optimize_insn_for_speed_p (),
3947                       false) >= 1)
3948     {
3949       temp = gen_reg_rtx (mode);
3950       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3951       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3952                            0, OPTAB_LIB_WIDEN);
3953       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3954     }
3955
3956   if (HAVE_conditional_move
3957       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3958     {
3959       rtx temp2;
3960
3961       start_sequence ();
3962       temp2 = copy_to_mode_reg (mode, op0);
3963       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3964                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3965       temp = force_reg (mode, temp);
3966
3967       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3968       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3969                                      mode, temp, temp2, mode, 0);
3970       if (temp2)
3971         {
3972           rtx_insn *seq = get_insns ();
3973           end_sequence ();
3974           emit_insn (seq);
3975           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3976         }
3977       end_sequence ();
3978     }
3979
3980   if (BRANCH_COST (optimize_insn_for_speed_p (),
3981                    false) >= 2)
3982     {
3983       int ushift = GET_MODE_BITSIZE (mode) - logd;
3984
3985       temp = gen_reg_rtx (mode);
3986       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3987       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3988           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3989              > COSTS_N_INSNS (1))
3990         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3991                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3992       else
3993         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3994                              ushift, NULL_RTX, 1);
3995       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3996                            0, OPTAB_LIB_WIDEN);
3997       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3998     }
3999
4000   label = gen_label_rtx ();
4001   temp = copy_to_mode_reg (mode, op0);
4002   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4003   expand_inc (temp, gen_int_mode (d - 1, mode));
4004   emit_label (label);
4005   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4006 }
4007 \f
4008 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4009    if that is convenient, and returning where the result is.
4010    You may request either the quotient or the remainder as the result;
4011    specify REM_FLAG nonzero to get the remainder.
4012
4013    CODE is the expression code for which kind of division this is;
4014    it controls how rounding is done.  MODE is the machine mode to use.
4015    UNSIGNEDP nonzero means do unsigned division.  */
4016
4017 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4018    and then correct it by or'ing in missing high bits
4019    if result of ANDI is nonzero.
4020    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4021    This could optimize to a bfexts instruction.
4022    But C doesn't use these operations, so their optimizations are
4023    left for later.  */
4024 /* ??? For modulo, we don't actually need the highpart of the first product,
4025    the low part will do nicely.  And for small divisors, the second multiply
4026    can also be a low-part only multiply or even be completely left out.
4027    E.g. to calculate the remainder of a division by 3 with a 32 bit
4028    multiply, multiply with 0x55555556 and extract the upper two bits;
4029    the result is exact for inputs up to 0x1fffffff.
4030    The input range can be reduced by using cross-sum rules.
4031    For odd divisors >= 3, the following table gives right shift counts
4032    so that if a number is shifted by an integer multiple of the given
4033    amount, the remainder stays the same:
4034    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4035    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4036    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4037    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4038    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4039
4040    Cross-sum rules for even numbers can be derived by leaving as many bits
4041    to the right alone as the divisor has zeros to the right.
4042    E.g. if x is an unsigned 32 bit number:
4043    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4044    */
4045
4046 rtx
4047 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4048                rtx op0, rtx op1, rtx target, int unsignedp)
4049 {
4050   machine_mode compute_mode;
4051   rtx tquotient;
4052   rtx quotient = 0, remainder = 0;
4053   rtx_insn *last;
4054   rtx_insn *insn;
4055   optab optab1, optab2;
4056   int op1_is_constant, op1_is_pow2 = 0;
4057   int max_cost, extra_cost;
4058   static HOST_WIDE_INT last_div_const = 0;
4059   bool speed = optimize_insn_for_speed_p ();
4060
4061   op1_is_constant = CONST_INT_P (op1);
4062   if (op1_is_constant)
4063     {
4064       wide_int ext_op1 = rtx_mode_t (op1, mode);
4065       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4066                      || (! unsignedp
4067                          && wi::popcount (wi::neg (ext_op1)) == 1));
4068     }
4069
4070   /*
4071      This is the structure of expand_divmod:
4072
4073      First comes code to fix up the operands so we can perform the operations
4074      correctly and efficiently.
4075
4076      Second comes a switch statement with code specific for each rounding mode.
4077      For some special operands this code emits all RTL for the desired
4078      operation, for other cases, it generates only a quotient and stores it in
4079      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4080      to indicate that it has not done anything.
4081
4082      Last comes code that finishes the operation.  If QUOTIENT is set and
4083      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4084      QUOTIENT is not set, it is computed using trunc rounding.
4085
4086      We try to generate special code for division and remainder when OP1 is a
4087      constant.  If |OP1| = 2**n we can use shifts and some other fast
4088      operations.  For other values of OP1, we compute a carefully selected
4089      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4090      by m.
4091
4092      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4093      half of the product.  Different strategies for generating the product are
4094      implemented in expmed_mult_highpart.
4095
4096      If what we actually want is the remainder, we generate that by another
4097      by-constant multiplication and a subtraction.  */
4098
4099   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4100      code below will malfunction if we are, so check here and handle
4101      the special case if so.  */
4102   if (op1 == const1_rtx)
4103     return rem_flag ? const0_rtx : op0;
4104
4105     /* When dividing by -1, we could get an overflow.
4106      negv_optab can handle overflows.  */
4107   if (! unsignedp && op1 == constm1_rtx)
4108     {
4109       if (rem_flag)
4110         return const0_rtx;
4111       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4112                           ? negv_optab : neg_optab, op0, target, 0);
4113     }
4114
4115   if (target
4116       /* Don't use the function value register as a target
4117          since we have to read it as well as write it,
4118          and function-inlining gets confused by this.  */
4119       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4120           /* Don't clobber an operand while doing a multi-step calculation.  */
4121           || ((rem_flag || op1_is_constant)
4122               && (reg_mentioned_p (target, op0)
4123                   || (MEM_P (op0) && MEM_P (target))))
4124           || reg_mentioned_p (target, op1)
4125           || (MEM_P (op1) && MEM_P (target))))
4126     target = 0;
4127
4128   /* Get the mode in which to perform this computation.  Normally it will
4129      be MODE, but sometimes we can't do the desired operation in MODE.
4130      If so, pick a wider mode in which we can do the operation.  Convert
4131      to that mode at the start to avoid repeated conversions.
4132
4133      First see what operations we need.  These depend on the expression
4134      we are evaluating.  (We assume that divxx3 insns exist under the
4135      same conditions that modxx3 insns and that these insns don't normally
4136      fail.  If these assumptions are not correct, we may generate less
4137      efficient code in some cases.)
4138
4139      Then see if we find a mode in which we can open-code that operation
4140      (either a division, modulus, or shift).  Finally, check for the smallest
4141      mode for which we can do the operation with a library call.  */
4142
4143   /* We might want to refine this now that we have division-by-constant
4144      optimization.  Since expmed_mult_highpart tries so many variants, it is
4145      not straightforward to generalize this.  Maybe we should make an array
4146      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4147
4148   optab1 = (op1_is_pow2
4149             ? (unsignedp ? lshr_optab : ashr_optab)
4150             : (unsignedp ? udiv_optab : sdiv_optab));
4151   optab2 = (op1_is_pow2 ? optab1
4152             : (unsignedp ? udivmod_optab : sdivmod_optab));
4153
4154   FOR_EACH_MODE_FROM (compute_mode, mode)
4155     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4156         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4157       break;
4158
4159   if (compute_mode == VOIDmode)
4160     FOR_EACH_MODE_FROM (compute_mode, mode)
4161       if (optab_libfunc (optab1, compute_mode)
4162           || optab_libfunc (optab2, compute_mode))
4163         break;
4164
4165   /* If we still couldn't find a mode, use MODE, but expand_binop will
4166      probably die.  */
4167   if (compute_mode == VOIDmode)
4168     compute_mode = mode;
4169
4170   if (target && GET_MODE (target) == compute_mode)
4171     tquotient = target;
4172   else
4173     tquotient = gen_reg_rtx (compute_mode);
4174
4175 #if 0
4176   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4177      (mode), and thereby get better code when OP1 is a constant.  Do that
4178      later.  It will require going over all usages of SIZE below.  */
4179   size = GET_MODE_BITSIZE (mode);
4180 #endif
4181
4182   /* Only deduct something for a REM if the last divide done was
4183      for a different constant.   Then set the constant of the last
4184      divide.  */
4185   max_cost = (unsignedp
4186               ? udiv_cost (speed, compute_mode)
4187               : sdiv_cost (speed, compute_mode));
4188   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4189                      && INTVAL (op1) == last_div_const))
4190     max_cost -= (mul_cost (speed, compute_mode)
4191                  + add_cost (speed, compute_mode));
4192
4193   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4194
4195   /* Now convert to the best mode to use.  */
4196   if (compute_mode != mode)
4197     {
4198       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4199       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4200
4201       /* convert_modes may have placed op1 into a register, so we
4202          must recompute the following.  */
4203       op1_is_constant = CONST_INT_P (op1);
4204       if (op1_is_constant)
4205         {
4206           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4207           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4208                          || (! unsignedp
4209                              && wi::popcount (wi::neg (ext_op1)) == 1));
4210         }
4211       else
4212         op1_is_pow2 = 0;
4213     }
4214
4215   /* If one of the operands is a volatile MEM, copy it into a register.  */
4216
4217   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4218     op0 = force_reg (compute_mode, op0);
4219   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4220     op1 = force_reg (compute_mode, op1);
4221
4222   /* If we need the remainder or if OP1 is constant, we need to
4223      put OP0 in a register in case it has any queued subexpressions.  */
4224   if (rem_flag || op1_is_constant)
4225     op0 = force_reg (compute_mode, op0);
4226
4227   last = get_last_insn ();
4228
4229   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4230   if (unsignedp)
4231     {
4232       if (code == FLOOR_DIV_EXPR)
4233         code = TRUNC_DIV_EXPR;
4234       if (code == FLOOR_MOD_EXPR)
4235         code = TRUNC_MOD_EXPR;
4236       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4237         code = TRUNC_DIV_EXPR;
4238     }
4239
4240   if (op1 != const0_rtx)
4241     switch (code)
4242       {
4243       case TRUNC_MOD_EXPR:
4244       case TRUNC_DIV_EXPR:
4245         if (op1_is_constant)
4246           {
4247             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4248             int size = GET_MODE_BITSIZE (int_mode);
4249             if (unsignedp)
4250               {
4251                 unsigned HOST_WIDE_INT mh, ml;
4252                 int pre_shift, post_shift;
4253                 int dummy;
4254                 wide_int wd = rtx_mode_t (op1, int_mode);
4255                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4256
4257                 if (wi::popcount (wd) == 1)
4258                   {
4259                     pre_shift = floor_log2 (d);
4260                     if (rem_flag)
4261                       {
4262                         unsigned HOST_WIDE_INT mask
4263                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4264                         remainder
4265                           = expand_binop (int_mode, and_optab, op0,
4266                                           gen_int_mode (mask, int_mode),
4267                                           remainder, 1,
4268                                           OPTAB_LIB_WIDEN);
4269                         if (remainder)
4270                           return gen_lowpart (mode, remainder);
4271                       }
4272                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4273                                              pre_shift, tquotient, 1);
4274                   }
4275                 else if (size <= HOST_BITS_PER_WIDE_INT)
4276                   {
4277                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4278                       {
4279                         /* Most significant bit of divisor is set; emit an scc
4280                            insn.  */
4281                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4282                                                           int_mode, 1, 1);
4283                       }
4284                     else
4285                       {
4286                         /* Find a suitable multiplier and right shift count
4287                            instead of multiplying with D.  */
4288
4289                         mh = choose_multiplier (d, size, size,
4290                                                 &ml, &post_shift, &dummy);
4291
4292                         /* If the suggested multiplier is more than SIZE bits,
4293                            we can do better for even divisors, using an
4294                            initial right shift.  */
4295                         if (mh != 0 && (d & 1) == 0)
4296                           {
4297                             pre_shift = ctz_or_zero (d);
4298                             mh = choose_multiplier (d >> pre_shift, size,
4299                                                     size - pre_shift,
4300                                                     &ml, &post_shift, &dummy);
4301                             gcc_assert (!mh);
4302                           }
4303                         else
4304                           pre_shift = 0;
4305
4306                         if (mh != 0)
4307                           {
4308                             rtx t1, t2, t3, t4;
4309
4310                             if (post_shift - 1 >= BITS_PER_WORD)
4311                               goto fail1;
4312
4313                             extra_cost
4314                               = (shift_cost (speed, int_mode, post_shift - 1)
4315                                  + shift_cost (speed, int_mode, 1)
4316                                  + 2 * add_cost (speed, int_mode));
4317                             t1 = expmed_mult_highpart
4318                               (int_mode, op0, gen_int_mode (ml, int_mode),
4319                                NULL_RTX, 1, max_cost - extra_cost);
4320                             if (t1 == 0)
4321                               goto fail1;
4322                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4323                                                                op0, t1),
4324                                                 NULL_RTX);
4325                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4326                                                t2, 1, NULL_RTX, 1);
4327                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4328                                                               t1, t3),
4329                                                 NULL_RTX);
4330                             quotient = expand_shift
4331                               (RSHIFT_EXPR, int_mode, t4,
4332                                post_shift - 1, tquotient, 1);
4333                           }
4334                         else
4335                           {
4336                             rtx t1, t2;
4337
4338                             if (pre_shift >= BITS_PER_WORD
4339                                 || post_shift >= BITS_PER_WORD)
4340                               goto fail1;
4341
4342                             t1 = expand_shift
4343                               (RSHIFT_EXPR, int_mode, op0,
4344                                pre_shift, NULL_RTX, 1);
4345                             extra_cost
4346                               = (shift_cost (speed, int_mode, pre_shift)
4347                                  + shift_cost (speed, int_mode, post_shift));
4348                             t2 = expmed_mult_highpart
4349                               (int_mode, t1,
4350                                gen_int_mode (ml, int_mode),
4351                                NULL_RTX, 1, max_cost - extra_cost);
4352                             if (t2 == 0)
4353                               goto fail1;
4354                             quotient = expand_shift
4355                               (RSHIFT_EXPR, int_mode, t2,
4356                                post_shift, tquotient, 1);
4357                           }
4358                       }
4359                   }
4360                 else            /* Too wide mode to use tricky code */
4361                   break;
4362
4363                 insn = get_last_insn ();
4364                 if (insn != last)
4365                   set_dst_reg_note (insn, REG_EQUAL,
4366                                     gen_rtx_UDIV (int_mode, op0, op1),
4367                                     quotient);
4368               }
4369             else                /* TRUNC_DIV, signed */
4370               {
4371                 unsigned HOST_WIDE_INT ml;
4372                 int lgup, post_shift;
4373                 rtx mlr;
4374                 HOST_WIDE_INT d = INTVAL (op1);
4375                 unsigned HOST_WIDE_INT abs_d;
4376
4377                 /* Since d might be INT_MIN, we have to cast to
4378                    unsigned HOST_WIDE_INT before negating to avoid
4379                    undefined signed overflow.  */
4380                 abs_d = (d >= 0
4381                          ? (unsigned HOST_WIDE_INT) d
4382                          : - (unsigned HOST_WIDE_INT) d);
4383
4384                 /* n rem d = n rem -d */
4385                 if (rem_flag && d < 0)
4386                   {
4387                     d = abs_d;
4388                     op1 = gen_int_mode (abs_d, int_mode);
4389                   }
4390
4391                 if (d == 1)
4392                   quotient = op0;
4393                 else if (d == -1)
4394                   quotient = expand_unop (int_mode, neg_optab, op0,
4395                                           tquotient, 0);
4396                 else if (size <= HOST_BITS_PER_WIDE_INT
4397                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4398                   {
4399                     /* This case is not handled correctly below.  */
4400                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4401                                                 int_mode, 1, 1);
4402                     if (quotient == 0)
4403                       goto fail1;
4404                   }
4405                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4406                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4407                          && (rem_flag
4408                              ? smod_pow2_cheap (speed, int_mode)
4409                              : sdiv_pow2_cheap (speed, int_mode))
4410                          /* We assume that cheap metric is true if the
4411                             optab has an expander for this mode.  */
4412                          && ((optab_handler ((rem_flag ? smod_optab
4413                                               : sdiv_optab),
4414                                              int_mode)
4415                               != CODE_FOR_nothing)
4416                              || (optab_handler (sdivmod_optab, int_mode)
4417                                  != CODE_FOR_nothing)))
4418                   ;
4419                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4420                          && (size <= HOST_BITS_PER_WIDE_INT
4421                              || abs_d != (unsigned HOST_WIDE_INT) d))
4422                   {
4423                     if (rem_flag)
4424                       {
4425                         remainder = expand_smod_pow2 (int_mode, op0, d);
4426                         if (remainder)
4427                           return gen_lowpart (mode, remainder);
4428                       }
4429
4430                     if (sdiv_pow2_cheap (speed, int_mode)
4431                         && ((optab_handler (sdiv_optab, int_mode)
4432                              != CODE_FOR_nothing)
4433                             || (optab_handler (sdivmod_optab, int_mode)
4434                                 != CODE_FOR_nothing)))
4435                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4436                                                 int_mode, op0,
4437                                                 gen_int_mode (abs_d,
4438                                                               int_mode),
4439                                                 NULL_RTX, 0);
4440                     else
4441                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4442
4443                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4444                        negate the quotient.  */
4445                     if (d < 0)
4446                       {
4447                         insn = get_last_insn ();
4448                         if (insn != last
4449                             && abs_d < (HOST_WIDE_INT_1U
4450                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4451                           set_dst_reg_note (insn, REG_EQUAL,
4452                                             gen_rtx_DIV (int_mode, op0,
4453                                                          gen_int_mode
4454                                                            (abs_d,
4455                                                             int_mode)),
4456                                             quotient);
4457
4458                         quotient = expand_unop (int_mode, neg_optab,
4459                                                 quotient, quotient, 0);
4460                       }
4461                   }
4462                 else if (size <= HOST_BITS_PER_WIDE_INT)
4463                   {
4464                     choose_multiplier (abs_d, size, size - 1,
4465                                        &ml, &post_shift, &lgup);
4466                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4467                       {
4468                         rtx t1, t2, t3;
4469
4470                         if (post_shift >= BITS_PER_WORD
4471                             || size - 1 >= BITS_PER_WORD)
4472                           goto fail1;
4473
4474                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4475                                       + shift_cost (speed, int_mode, size - 1)
4476                                       + add_cost (speed, int_mode));
4477                         t1 = expmed_mult_highpart
4478                           (int_mode, op0, gen_int_mode (ml, int_mode),
4479                            NULL_RTX, 0, max_cost - extra_cost);
4480                         if (t1 == 0)
4481                           goto fail1;
4482                         t2 = expand_shift
4483                           (RSHIFT_EXPR, int_mode, t1,
4484                            post_shift, NULL_RTX, 0);
4485                         t3 = expand_shift
4486                           (RSHIFT_EXPR, int_mode, op0,
4487                            size - 1, NULL_RTX, 0);
4488                         if (d < 0)
4489                           quotient
4490                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4491                                              tquotient);
4492                         else
4493                           quotient
4494                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4495                                              tquotient);
4496                       }
4497                     else
4498                       {
4499                         rtx t1, t2, t3, t4;
4500
4501                         if (post_shift >= BITS_PER_WORD
4502                             || size - 1 >= BITS_PER_WORD)
4503                           goto fail1;
4504
4505                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4506                         mlr = gen_int_mode (ml, int_mode);
4507                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4508                                       + shift_cost (speed, int_mode, size - 1)
4509                                       + 2 * add_cost (speed, int_mode));
4510                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4511                                                    NULL_RTX, 0,
4512                                                    max_cost - extra_cost);
4513                         if (t1 == 0)
4514                           goto fail1;
4515                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4516                                             NULL_RTX);
4517                         t3 = expand_shift
4518                           (RSHIFT_EXPR, int_mode, t2,
4519                            post_shift, NULL_RTX, 0);
4520                         t4 = expand_shift
4521                           (RSHIFT_EXPR, int_mode, op0,
4522                            size - 1, NULL_RTX, 0);
4523                         if (d < 0)
4524                           quotient
4525                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4526                                              tquotient);
4527                         else
4528                           quotient
4529                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4530                                              tquotient);
4531                       }
4532                   }
4533                 else            /* Too wide mode to use tricky code */
4534                   break;
4535
4536                 insn = get_last_insn ();
4537                 if (insn != last)
4538                   set_dst_reg_note (insn, REG_EQUAL,
4539                                     gen_rtx_DIV (int_mode, op0, op1),
4540                                     quotient);
4541               }
4542             break;
4543           }
4544       fail1:
4545         delete_insns_since (last);
4546         break;
4547
4548       case FLOOR_DIV_EXPR:
4549       case FLOOR_MOD_EXPR:
4550       /* We will come here only for signed operations.  */
4551         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4552           {
4553             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4554             int size = GET_MODE_BITSIZE (int_mode);
4555             unsigned HOST_WIDE_INT mh, ml;
4556             int pre_shift, lgup, post_shift;
4557             HOST_WIDE_INT d = INTVAL (op1);
4558
4559             if (d > 0)
4560               {
4561                 /* We could just as easily deal with negative constants here,
4562                    but it does not seem worth the trouble for GCC 2.6.  */
4563                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4564                   {
4565                     pre_shift = floor_log2 (d);
4566                     if (rem_flag)
4567                       {
4568                         unsigned HOST_WIDE_INT mask
4569                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4570                         remainder = expand_binop
4571                           (int_mode, and_optab, op0,
4572                            gen_int_mode (mask, int_mode),
4573                            remainder, 0, OPTAB_LIB_WIDEN);
4574                         if (remainder)
4575                           return gen_lowpart (mode, remainder);
4576                       }
4577                     quotient = expand_shift
4578                       (RSHIFT_EXPR, int_mode, op0,
4579                        pre_shift, tquotient, 0);
4580                   }
4581                 else
4582                   {
4583                     rtx t1, t2, t3, t4;
4584
4585                     mh = choose_multiplier (d, size, size - 1,
4586                                             &ml, &post_shift, &lgup);
4587                     gcc_assert (!mh);
4588
4589                     if (post_shift < BITS_PER_WORD
4590                         && size - 1 < BITS_PER_WORD)
4591                       {
4592                         t1 = expand_shift
4593                           (RSHIFT_EXPR, int_mode, op0,
4594                            size - 1, NULL_RTX, 0);
4595                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4596                                            NULL_RTX, 0, OPTAB_WIDEN);
4597                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4598                                       + shift_cost (speed, int_mode, size - 1)
4599                                       + 2 * add_cost (speed, int_mode));
4600                         t3 = expmed_mult_highpart
4601                           (int_mode, t2, gen_int_mode (ml, int_mode),
4602                            NULL_RTX, 1, max_cost - extra_cost);
4603                         if (t3 != 0)
4604                           {
4605                             t4 = expand_shift
4606                               (RSHIFT_EXPR, int_mode, t3,
4607                                post_shift, NULL_RTX, 1);
4608                             quotient = expand_binop (int_mode, xor_optab,
4609                                                      t4, t1, tquotient, 0,
4610                                                      OPTAB_WIDEN);
4611                           }
4612                       }
4613                   }
4614               }
4615             else
4616               {
4617                 rtx nsign, t1, t2, t3, t4;
4618                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4619                                                   op0, constm1_rtx), NULL_RTX);
4620                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4621                                    0, OPTAB_WIDEN);
4622                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4623                                       size - 1, NULL_RTX, 0);
4624                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4625                                     NULL_RTX);
4626                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4627                                     NULL_RTX, 0);
4628                 if (t4)
4629                   {
4630                     rtx t5;
4631                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4632                                       NULL_RTX, 0);
4633                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4634                                               tquotient);
4635                   }
4636               }
4637           }
4638
4639         if (quotient != 0)
4640           break;
4641         delete_insns_since (last);
4642
4643         /* Try using an instruction that produces both the quotient and
4644            remainder, using truncation.  We can easily compensate the quotient
4645            or remainder to get floor rounding, once we have the remainder.
4646            Notice that we compute also the final remainder value here,
4647            and return the result right away.  */
4648         if (target == 0 || GET_MODE (target) != compute_mode)
4649           target = gen_reg_rtx (compute_mode);
4650
4651         if (rem_flag)
4652           {
4653             remainder
4654               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4655             quotient = gen_reg_rtx (compute_mode);
4656           }
4657         else
4658           {
4659             quotient
4660               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4661             remainder = gen_reg_rtx (compute_mode);
4662           }
4663
4664         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4665                                  quotient, remainder, 0))
4666           {
4667             /* This could be computed with a branch-less sequence.
4668                Save that for later.  */
4669             rtx tem;
4670             rtx_code_label *label = gen_label_rtx ();
4671             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4672             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4673                                 NULL_RTX, 0, OPTAB_WIDEN);
4674             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4675             expand_dec (quotient, const1_rtx);
4676             expand_inc (remainder, op1);
4677             emit_label (label);
4678             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4679           }
4680
4681         /* No luck with division elimination or divmod.  Have to do it
4682            by conditionally adjusting op0 *and* the result.  */
4683         {
4684           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4685           rtx adjusted_op0;
4686           rtx tem;
4687
4688           quotient = gen_reg_rtx (compute_mode);
4689           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4690           label1 = gen_label_rtx ();
4691           label2 = gen_label_rtx ();
4692           label3 = gen_label_rtx ();
4693           label4 = gen_label_rtx ();
4694           label5 = gen_label_rtx ();
4695           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4696           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4697           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4698                               quotient, 0, OPTAB_LIB_WIDEN);
4699           if (tem != quotient)
4700             emit_move_insn (quotient, tem);
4701           emit_jump_insn (targetm.gen_jump (label5));
4702           emit_barrier ();
4703           emit_label (label1);
4704           expand_inc (adjusted_op0, const1_rtx);
4705           emit_jump_insn (targetm.gen_jump (label4));
4706           emit_barrier ();
4707           emit_label (label2);
4708           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4709           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4710                               quotient, 0, OPTAB_LIB_WIDEN);
4711           if (tem != quotient)
4712             emit_move_insn (quotient, tem);
4713           emit_jump_insn (targetm.gen_jump (label5));
4714           emit_barrier ();
4715           emit_label (label3);
4716           expand_dec (adjusted_op0, const1_rtx);
4717           emit_label (label4);
4718           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4719                               quotient, 0, OPTAB_LIB_WIDEN);
4720           if (tem != quotient)
4721             emit_move_insn (quotient, tem);
4722           expand_dec (quotient, const1_rtx);
4723           emit_label (label5);
4724         }
4725         break;
4726
4727       case CEIL_DIV_EXPR:
4728       case CEIL_MOD_EXPR:
4729         if (unsignedp)
4730           {
4731             if (op1_is_constant
4732                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4733                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4734                     || INTVAL (op1) >= 0))
4735               {
4736                 scalar_int_mode int_mode
4737                   = as_a <scalar_int_mode> (compute_mode);
4738                 rtx t1, t2, t3;
4739                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4740                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4741                                    floor_log2 (d), tquotient, 1);
4742                 t2 = expand_binop (int_mode, and_optab, op0,
4743                                    gen_int_mode (d - 1, int_mode),
4744                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4745                 t3 = gen_reg_rtx (int_mode);
4746                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4747                 if (t3 == 0)
4748                   {
4749                     rtx_code_label *lab;
4750                     lab = gen_label_rtx ();
4751                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4752                     expand_inc (t1, const1_rtx);
4753                     emit_label (lab);
4754                     quotient = t1;
4755                   }
4756                 else
4757                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4758                                             tquotient);
4759                 break;
4760               }
4761
4762             /* Try using an instruction that produces both the quotient and
4763                remainder, using truncation.  We can easily compensate the
4764                quotient or remainder to get ceiling rounding, once we have the
4765                remainder.  Notice that we compute also the final remainder
4766                value here, and return the result right away.  */
4767             if (target == 0 || GET_MODE (target) != compute_mode)
4768               target = gen_reg_rtx (compute_mode);
4769
4770             if (rem_flag)
4771               {
4772                 remainder = (REG_P (target)
4773                              ? target : gen_reg_rtx (compute_mode));
4774                 quotient = gen_reg_rtx (compute_mode);
4775               }
4776             else
4777               {
4778                 quotient = (REG_P (target)
4779                             ? target : gen_reg_rtx (compute_mode));
4780                 remainder = gen_reg_rtx (compute_mode);
4781               }
4782
4783             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4784                                      remainder, 1))
4785               {
4786                 /* This could be computed with a branch-less sequence.
4787                    Save that for later.  */
4788                 rtx_code_label *label = gen_label_rtx ();
4789                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4790                                  compute_mode, label);
4791                 expand_inc (quotient, const1_rtx);
4792                 expand_dec (remainder, op1);
4793                 emit_label (label);
4794                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4795               }
4796
4797             /* No luck with division elimination or divmod.  Have to do it
4798                by conditionally adjusting op0 *and* the result.  */
4799             {
4800               rtx_code_label *label1, *label2;
4801               rtx adjusted_op0, tem;
4802
4803               quotient = gen_reg_rtx (compute_mode);
4804               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4805               label1 = gen_label_rtx ();
4806               label2 = gen_label_rtx ();
4807               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4808                                compute_mode, label1);
4809               emit_move_insn  (quotient, const0_rtx);
4810               emit_jump_insn (targetm.gen_jump (label2));
4811               emit_barrier ();
4812               emit_label (label1);
4813               expand_dec (adjusted_op0, const1_rtx);
4814               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4815                                   quotient, 1, OPTAB_LIB_WIDEN);
4816               if (tem != quotient)
4817                 emit_move_insn (quotient, tem);
4818               expand_inc (quotient, const1_rtx);
4819               emit_label (label2);
4820             }
4821           }
4822         else /* signed */
4823           {
4824             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4825                 && INTVAL (op1) >= 0)
4826               {
4827                 /* This is extremely similar to the code for the unsigned case
4828                    above.  For 2.7 we should merge these variants, but for
4829                    2.6.1 I don't want to touch the code for unsigned since that
4830                    get used in C.  The signed case will only be used by other
4831                    languages (Ada).  */
4832
4833                 rtx t1, t2, t3;
4834                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4835                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4836                                    floor_log2 (d), tquotient, 0);
4837                 t2 = expand_binop (compute_mode, and_optab, op0,
4838                                    gen_int_mode (d - 1, compute_mode),
4839                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4840                 t3 = gen_reg_rtx (compute_mode);
4841                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4842                                       compute_mode, 1, 1);
4843                 if (t3 == 0)
4844                   {
4845                     rtx_code_label *lab;
4846                     lab = gen_label_rtx ();
4847                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4848                     expand_inc (t1, const1_rtx);
4849                     emit_label (lab);
4850                     quotient = t1;
4851                   }
4852                 else
4853                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4854                                                           t1, t3),
4855                                             tquotient);
4856                 break;
4857               }
4858
4859             /* Try using an instruction that produces both the quotient and
4860                remainder, using truncation.  We can easily compensate the
4861                quotient or remainder to get ceiling rounding, once we have the
4862                remainder.  Notice that we compute also the final remainder
4863                value here, and return the result right away.  */
4864             if (target == 0 || GET_MODE (target) != compute_mode)
4865               target = gen_reg_rtx (compute_mode);
4866             if (rem_flag)
4867               {
4868                 remainder= (REG_P (target)
4869                             ? target : gen_reg_rtx (compute_mode));
4870                 quotient = gen_reg_rtx (compute_mode);
4871               }
4872             else
4873               {
4874                 quotient = (REG_P (target)
4875                             ? target : gen_reg_rtx (compute_mode));
4876                 remainder = gen_reg_rtx (compute_mode);
4877               }
4878
4879             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4880                                      remainder, 0))
4881               {
4882                 /* This could be computed with a branch-less sequence.
4883                    Save that for later.  */
4884                 rtx tem;
4885                 rtx_code_label *label = gen_label_rtx ();
4886                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4887                                  compute_mode, label);
4888                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4889                                     NULL_RTX, 0, OPTAB_WIDEN);
4890                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4891                 expand_inc (quotient, const1_rtx);
4892                 expand_dec (remainder, op1);
4893                 emit_label (label);
4894                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4895               }
4896
4897             /* No luck with division elimination or divmod.  Have to do it
4898                by conditionally adjusting op0 *and* the result.  */
4899             {
4900               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4901               rtx adjusted_op0;
4902               rtx tem;
4903
4904               quotient = gen_reg_rtx (compute_mode);
4905               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4906               label1 = gen_label_rtx ();
4907               label2 = gen_label_rtx ();
4908               label3 = gen_label_rtx ();
4909               label4 = gen_label_rtx ();
4910               label5 = gen_label_rtx ();
4911               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4912               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4913                                compute_mode, label1);
4914               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4915                                   quotient, 0, OPTAB_LIB_WIDEN);
4916               if (tem != quotient)
4917                 emit_move_insn (quotient, tem);
4918               emit_jump_insn (targetm.gen_jump (label5));
4919               emit_barrier ();
4920               emit_label (label1);
4921               expand_dec (adjusted_op0, const1_rtx);
4922               emit_jump_insn (targetm.gen_jump (label4));
4923               emit_barrier ();
4924               emit_label (label2);
4925               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4926                                compute_mode, label3);
4927               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4928                                   quotient, 0, OPTAB_LIB_WIDEN);
4929               if (tem != quotient)
4930                 emit_move_insn (quotient, tem);
4931               emit_jump_insn (targetm.gen_jump (label5));
4932               emit_barrier ();
4933               emit_label (label3);
4934               expand_inc (adjusted_op0, const1_rtx);
4935               emit_label (label4);
4936               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4937                                   quotient, 0, OPTAB_LIB_WIDEN);
4938               if (tem != quotient)
4939                 emit_move_insn (quotient, tem);
4940               expand_inc (quotient, const1_rtx);
4941               emit_label (label5);
4942             }
4943           }
4944         break;
4945
4946       case EXACT_DIV_EXPR:
4947         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4948           {
4949             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4950             int size = GET_MODE_BITSIZE (int_mode);
4951             HOST_WIDE_INT d = INTVAL (op1);
4952             unsigned HOST_WIDE_INT ml;
4953             int pre_shift;
4954             rtx t1;
4955
4956             pre_shift = ctz_or_zero (d);
4957             ml = invert_mod2n (d >> pre_shift, size);
4958             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4959                                pre_shift, NULL_RTX, unsignedp);
4960             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
4961                                     NULL_RTX, 1);
4962
4963             insn = get_last_insn ();
4964             set_dst_reg_note (insn, REG_EQUAL,
4965                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4966                                               int_mode, op0, op1),
4967                               quotient);
4968           }
4969         break;
4970
4971       case ROUND_DIV_EXPR:
4972       case ROUND_MOD_EXPR:
4973         if (unsignedp)
4974           {
4975             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4976             rtx tem;
4977             rtx_code_label *label;
4978             label = gen_label_rtx ();
4979             quotient = gen_reg_rtx (int_mode);
4980             remainder = gen_reg_rtx (int_mode);
4981             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4982               {
4983                 rtx tem;
4984                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
4985                                          quotient, 1, OPTAB_LIB_WIDEN);
4986                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
4987                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
4988                                           remainder, 1, OPTAB_LIB_WIDEN);
4989               }
4990             tem = plus_constant (int_mode, op1, -1);
4991             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
4992             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
4993             expand_inc (quotient, const1_rtx);
4994             expand_dec (remainder, op1);
4995             emit_label (label);
4996           }
4997         else
4998           {
4999             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5000             int size = GET_MODE_BITSIZE (int_mode);
5001             rtx abs_rem, abs_op1, tem, mask;
5002             rtx_code_label *label;
5003             label = gen_label_rtx ();
5004             quotient = gen_reg_rtx (int_mode);
5005             remainder = gen_reg_rtx (int_mode);
5006             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5007               {
5008                 rtx tem;
5009                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5010                                          quotient, 0, OPTAB_LIB_WIDEN);
5011                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5012                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5013                                           remainder, 0, OPTAB_LIB_WIDEN);
5014               }
5015             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5016             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5017             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5018                                 1, NULL_RTX, 1);
5019             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5020             tem = expand_binop (int_mode, xor_optab, op0, op1,
5021                                 NULL_RTX, 0, OPTAB_WIDEN);
5022             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5023                                  size - 1, NULL_RTX, 0);
5024             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5025                                 NULL_RTX, 0, OPTAB_WIDEN);
5026             tem = expand_binop (int_mode, sub_optab, tem, mask,
5027                                 NULL_RTX, 0, OPTAB_WIDEN);
5028             expand_inc (quotient, tem);
5029             tem = expand_binop (int_mode, xor_optab, mask, op1,
5030                                 NULL_RTX, 0, OPTAB_WIDEN);
5031             tem = expand_binop (int_mode, sub_optab, tem, mask,
5032                                 NULL_RTX, 0, OPTAB_WIDEN);
5033             expand_dec (remainder, tem);
5034             emit_label (label);
5035           }
5036         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5037
5038       default:
5039         gcc_unreachable ();
5040       }
5041
5042   if (quotient == 0)
5043     {
5044       if (target && GET_MODE (target) != compute_mode)
5045         target = 0;
5046
5047       if (rem_flag)
5048         {
5049           /* Try to produce the remainder without producing the quotient.
5050              If we seem to have a divmod pattern that does not require widening,
5051              don't try widening here.  We should really have a WIDEN argument
5052              to expand_twoval_binop, since what we'd really like to do here is
5053              1) try a mod insn in compute_mode
5054              2) try a divmod insn in compute_mode
5055              3) try a div insn in compute_mode and multiply-subtract to get
5056                 remainder
5057              4) try the same things with widening allowed.  */
5058           remainder
5059             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5060                                  op0, op1, target,
5061                                  unsignedp,
5062                                  ((optab_handler (optab2, compute_mode)
5063                                    != CODE_FOR_nothing)
5064                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5065           if (remainder == 0)
5066             {
5067               /* No luck there.  Can we do remainder and divide at once
5068                  without a library call?  */
5069               remainder = gen_reg_rtx (compute_mode);
5070               if (! expand_twoval_binop ((unsignedp
5071                                           ? udivmod_optab
5072                                           : sdivmod_optab),
5073                                          op0, op1,
5074                                          NULL_RTX, remainder, unsignedp))
5075                 remainder = 0;
5076             }
5077
5078           if (remainder)
5079             return gen_lowpart (mode, remainder);
5080         }
5081
5082       /* Produce the quotient.  Try a quotient insn, but not a library call.
5083          If we have a divmod in this mode, use it in preference to widening
5084          the div (for this test we assume it will not fail). Note that optab2
5085          is set to the one of the two optabs that the call below will use.  */
5086       quotient
5087         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5088                              op0, op1, rem_flag ? NULL_RTX : target,
5089                              unsignedp,
5090                              ((optab_handler (optab2, compute_mode)
5091                                != CODE_FOR_nothing)
5092                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5093
5094       if (quotient == 0)
5095         {
5096           /* No luck there.  Try a quotient-and-remainder insn,
5097              keeping the quotient alone.  */
5098           quotient = gen_reg_rtx (compute_mode);
5099           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5100                                      op0, op1,
5101                                      quotient, NULL_RTX, unsignedp))
5102             {
5103               quotient = 0;
5104               if (! rem_flag)
5105                 /* Still no luck.  If we are not computing the remainder,
5106                    use a library call for the quotient.  */
5107                 quotient = sign_expand_binop (compute_mode,
5108                                               udiv_optab, sdiv_optab,
5109                                               op0, op1, target,
5110                                               unsignedp, OPTAB_LIB_WIDEN);
5111             }
5112         }
5113     }
5114
5115   if (rem_flag)
5116     {
5117       if (target && GET_MODE (target) != compute_mode)
5118         target = 0;
5119
5120       if (quotient == 0)
5121         {
5122           /* No divide instruction either.  Use library for remainder.  */
5123           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5124                                          op0, op1, target,
5125                                          unsignedp, OPTAB_LIB_WIDEN);
5126           /* No remainder function.  Try a quotient-and-remainder
5127              function, keeping the remainder.  */
5128           if (!remainder)
5129             {
5130               remainder = gen_reg_rtx (compute_mode);
5131               if (!expand_twoval_binop_libfunc
5132                   (unsignedp ? udivmod_optab : sdivmod_optab,
5133                    op0, op1,
5134                    NULL_RTX, remainder,
5135                    unsignedp ? UMOD : MOD))
5136                 remainder = NULL_RTX;
5137             }
5138         }
5139       else
5140         {
5141           /* We divided.  Now finish doing X - Y * (X / Y).  */
5142           remainder = expand_mult (compute_mode, quotient, op1,
5143                                    NULL_RTX, unsignedp);
5144           remainder = expand_binop (compute_mode, sub_optab, op0,
5145                                     remainder, target, unsignedp,
5146                                     OPTAB_LIB_WIDEN);
5147         }
5148     }
5149
5150   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5151 }
5152 \f
5153 /* Return a tree node with data type TYPE, describing the value of X.
5154    Usually this is an VAR_DECL, if there is no obvious better choice.
5155    X may be an expression, however we only support those expressions
5156    generated by loop.c.  */
5157
5158 tree
5159 make_tree (tree type, rtx x)
5160 {
5161   tree t;
5162
5163   switch (GET_CODE (x))
5164     {
5165     case CONST_INT:
5166     case CONST_WIDE_INT:
5167       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5168       return t;
5169
5170     case CONST_DOUBLE:
5171       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5172       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5173         t = wide_int_to_tree (type,
5174                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5175                                                     HOST_BITS_PER_WIDE_INT * 2));
5176       else
5177         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5178
5179       return t;
5180
5181     case CONST_VECTOR:
5182       {
5183         int units = CONST_VECTOR_NUNITS (x);
5184         tree itype = TREE_TYPE (type);
5185         int i;
5186
5187         /* Build a tree with vector elements.  */
5188         tree_vector_builder elts (type, units, 1);
5189         for (i = 0; i < units; ++i)
5190           {
5191             rtx elt = CONST_VECTOR_ELT (x, i);
5192             elts.quick_push (make_tree (itype, elt));
5193           }
5194
5195         return elts.build ();
5196       }
5197
5198     case PLUS:
5199       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5200                           make_tree (type, XEXP (x, 1)));
5201
5202     case MINUS:
5203       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5204                           make_tree (type, XEXP (x, 1)));
5205
5206     case NEG:
5207       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5208
5209     case MULT:
5210       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5211                           make_tree (type, XEXP (x, 1)));
5212
5213     case ASHIFT:
5214       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5215                           make_tree (type, XEXP (x, 1)));
5216
5217     case LSHIFTRT:
5218       t = unsigned_type_for (type);
5219       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5220                                          make_tree (t, XEXP (x, 0)),
5221                                          make_tree (type, XEXP (x, 1))));
5222
5223     case ASHIFTRT:
5224       t = signed_type_for (type);
5225       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5226                                          make_tree (t, XEXP (x, 0)),
5227                                          make_tree (type, XEXP (x, 1))));
5228
5229     case DIV:
5230       if (TREE_CODE (type) != REAL_TYPE)
5231         t = signed_type_for (type);
5232       else
5233         t = type;
5234
5235       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5236                                          make_tree (t, XEXP (x, 0)),
5237                                          make_tree (t, XEXP (x, 1))));
5238     case UDIV:
5239       t = unsigned_type_for (type);
5240       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5241                                          make_tree (t, XEXP (x, 0)),
5242                                          make_tree (t, XEXP (x, 1))));
5243
5244     case SIGN_EXTEND:
5245     case ZERO_EXTEND:
5246       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5247                                           GET_CODE (x) == ZERO_EXTEND);
5248       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5249
5250     case CONST:
5251       {
5252         rtx op = XEXP (x, 0);
5253         if (GET_CODE (op) == VEC_DUPLICATE)
5254           {
5255             tree elt_tree = make_tree (TREE_TYPE (type), XEXP (op, 0));
5256             return build_vector_from_val (type, elt_tree);
5257           }
5258         return make_tree (type, op);
5259       }
5260
5261     case SYMBOL_REF:
5262       t = SYMBOL_REF_DECL (x);
5263       if (t)
5264         return fold_convert (type, build_fold_addr_expr (t));
5265       /* fall through.  */
5266
5267     default:
5268       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5269
5270       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5271          address mode to pointer mode.  */
5272       if (POINTER_TYPE_P (type))
5273         x = convert_memory_address_addr_space
5274           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5275
5276       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5277          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5278       t->decl_with_rtl.rtl = x;
5279
5280       return t;
5281     }
5282 }
5283 \f
5284 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5285    and returning TARGET.
5286
5287    If TARGET is 0, a pseudo-register or constant is returned.  */
5288
5289 rtx
5290 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5291 {
5292   rtx tem = 0;
5293
5294   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5295     tem = simplify_binary_operation (AND, mode, op0, op1);
5296   if (tem == 0)
5297     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5298
5299   if (target == 0)
5300     target = tem;
5301   else if (tem != target)
5302     emit_move_insn (target, tem);
5303   return target;
5304 }
5305
5306 /* Helper function for emit_store_flag.  */
5307 rtx
5308 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5309              machine_mode mode, machine_mode compare_mode,
5310              int unsignedp, rtx x, rtx y, int normalizep,
5311              machine_mode target_mode)
5312 {
5313   struct expand_operand ops[4];
5314   rtx op0, comparison, subtarget;
5315   rtx_insn *last;
5316   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5317   scalar_int_mode int_target_mode;
5318
5319   last = get_last_insn ();
5320   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5321   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5322   if (!x || !y)
5323     {
5324       delete_insns_since (last);
5325       return NULL_RTX;
5326     }
5327
5328   if (target_mode == VOIDmode)
5329     int_target_mode = result_mode;
5330   else
5331     int_target_mode = as_a <scalar_int_mode> (target_mode);
5332   if (!target)
5333     target = gen_reg_rtx (int_target_mode);
5334
5335   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5336
5337   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5338   create_fixed_operand (&ops[1], comparison);
5339   create_fixed_operand (&ops[2], x);
5340   create_fixed_operand (&ops[3], y);
5341   if (!maybe_expand_insn (icode, 4, ops))
5342     {
5343       delete_insns_since (last);
5344       return NULL_RTX;
5345     }
5346   subtarget = ops[0].value;
5347
5348   /* If we are converting to a wider mode, first convert to
5349      INT_TARGET_MODE, then normalize.  This produces better combining
5350      opportunities on machines that have a SIGN_EXTRACT when we are
5351      testing a single bit.  This mostly benefits the 68k.
5352
5353      If STORE_FLAG_VALUE does not have the sign bit set when
5354      interpreted in MODE, we can do this conversion as unsigned, which
5355      is usually more efficient.  */
5356   if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5357     {
5358       convert_move (target, subtarget,
5359                     val_signbit_known_clear_p (result_mode,
5360                                                STORE_FLAG_VALUE));
5361       op0 = target;
5362       result_mode = int_target_mode;
5363     }
5364   else
5365     op0 = subtarget;
5366
5367   /* If we want to keep subexpressions around, don't reuse our last
5368      target.  */
5369   if (optimize)
5370     subtarget = 0;
5371
5372   /* Now normalize to the proper value in MODE.  Sometimes we don't
5373      have to do anything.  */
5374   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5375     ;
5376   /* STORE_FLAG_VALUE might be the most negative number, so write
5377      the comparison this way to avoid a compiler-time warning.  */
5378   else if (- normalizep == STORE_FLAG_VALUE)
5379     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5380
5381   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5382      it hard to use a value of just the sign bit due to ANSI integer
5383      constant typing rules.  */
5384   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5385     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5386                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5387                         normalizep == 1);
5388   else
5389     {
5390       gcc_assert (STORE_FLAG_VALUE & 1);
5391
5392       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5393       if (normalizep == -1)
5394         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5395     }
5396
5397   /* If we were converting to a smaller mode, do the conversion now.  */
5398   if (int_target_mode != result_mode)
5399     {
5400       convert_move (target, op0, 0);
5401       return target;
5402     }
5403   else
5404     return op0;
5405 }
5406
5407
5408 /* A subroutine of emit_store_flag only including "tricks" that do not
5409    need a recursive call.  These are kept separate to avoid infinite
5410    loops.  */
5411
5412 static rtx
5413 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5414                    machine_mode mode, int unsignedp, int normalizep,
5415                    machine_mode target_mode)
5416 {
5417   rtx subtarget;
5418   enum insn_code icode;
5419   machine_mode compare_mode;
5420   enum mode_class mclass;
5421   enum rtx_code scode;
5422
5423   if (unsignedp)
5424     code = unsigned_condition (code);
5425   scode = swap_condition (code);
5426
5427   /* If one operand is constant, make it the second one.  Only do this
5428      if the other operand is not constant as well.  */
5429
5430   if (swap_commutative_operands_p (op0, op1))
5431     {
5432       std::swap (op0, op1);
5433       code = swap_condition (code);
5434     }
5435
5436   if (mode == VOIDmode)
5437     mode = GET_MODE (op0);
5438
5439   /* For some comparisons with 1 and -1, we can convert this to
5440      comparisons with zero.  This will often produce more opportunities for
5441      store-flag insns.  */
5442
5443   switch (code)
5444     {
5445     case LT:
5446       if (op1 == const1_rtx)
5447         op1 = const0_rtx, code = LE;
5448       break;
5449     case LE:
5450       if (op1 == constm1_rtx)
5451         op1 = const0_rtx, code = LT;
5452       break;
5453     case GE:
5454       if (op1 == const1_rtx)
5455         op1 = const0_rtx, code = GT;
5456       break;
5457     case GT:
5458       if (op1 == constm1_rtx)
5459         op1 = const0_rtx, code = GE;
5460       break;
5461     case GEU:
5462       if (op1 == const1_rtx)
5463         op1 = const0_rtx, code = NE;
5464       break;
5465     case LTU:
5466       if (op1 == const1_rtx)
5467         op1 = const0_rtx, code = EQ;
5468       break;
5469     default:
5470       break;
5471     }
5472
5473   /* If we are comparing a double-word integer with zero or -1, we can
5474      convert the comparison into one involving a single word.  */
5475   scalar_int_mode int_mode;
5476   if (is_int_mode (mode, &int_mode)
5477       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5478       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5479     {
5480       rtx tem;
5481       if ((code == EQ || code == NE)
5482           && (op1 == const0_rtx || op1 == constm1_rtx))
5483         {
5484           rtx op00, op01;
5485
5486           /* Do a logical OR or AND of the two words and compare the
5487              result.  */
5488           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5489           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5490           tem = expand_binop (word_mode,
5491                               op1 == const0_rtx ? ior_optab : and_optab,
5492                               op00, op01, NULL_RTX, unsignedp,
5493                               OPTAB_DIRECT);
5494
5495           if (tem != 0)
5496             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5497                                    unsignedp, normalizep);
5498         }
5499       else if ((code == LT || code == GE) && op1 == const0_rtx)
5500         {
5501           rtx op0h;
5502
5503           /* If testing the sign bit, can just test on high word.  */
5504           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5505                                       subreg_highpart_offset (word_mode,
5506                                                               int_mode));
5507           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5508                                  unsignedp, normalizep);
5509         }
5510       else
5511         tem = NULL_RTX;
5512
5513       if (tem)
5514         {
5515           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5516             return tem;
5517           if (!target)
5518             target = gen_reg_rtx (target_mode);
5519
5520           convert_move (target, tem,
5521                         !val_signbit_known_set_p (word_mode,
5522                                                   (normalizep ? normalizep
5523                                                    : STORE_FLAG_VALUE)));
5524           return target;
5525         }
5526     }
5527
5528   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5529      complement of A (for GE) and shifting the sign bit to the low bit.  */
5530   if (op1 == const0_rtx && (code == LT || code == GE)
5531       && is_int_mode (mode, &int_mode)
5532       && (normalizep || STORE_FLAG_VALUE == 1
5533           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5534     {
5535       scalar_int_mode int_target_mode;
5536       subtarget = target;
5537
5538       if (!target)
5539         int_target_mode = int_mode;
5540       else
5541         {
5542           /* If the result is to be wider than OP0, it is best to convert it
5543              first.  If it is to be narrower, it is *incorrect* to convert it
5544              first.  */
5545           int_target_mode = as_a <scalar_int_mode> (target_mode);
5546           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5547             {
5548               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5549               int_mode = int_target_mode;
5550             }
5551         }
5552
5553       if (int_target_mode != int_mode)
5554         subtarget = 0;
5555
5556       if (code == GE)
5557         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5558                            ((STORE_FLAG_VALUE == 1 || normalizep)
5559                             ? 0 : subtarget), 0);
5560
5561       if (STORE_FLAG_VALUE == 1 || normalizep)
5562         /* If we are supposed to produce a 0/1 value, we want to do
5563            a logical shift from the sign bit to the low-order bit; for
5564            a -1/0 value, we do an arithmetic shift.  */
5565         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5566                             GET_MODE_BITSIZE (int_mode) - 1,
5567                             subtarget, normalizep != -1);
5568
5569       if (int_mode != int_target_mode)
5570         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5571
5572       return op0;
5573     }
5574
5575   mclass = GET_MODE_CLASS (mode);
5576   FOR_EACH_MODE_FROM (compare_mode, mode)
5577     {
5578      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5579      icode = optab_handler (cstore_optab, optab_mode);
5580      if (icode != CODE_FOR_nothing)
5581         {
5582           do_pending_stack_adjust ();
5583           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5584                                  unsignedp, op0, op1, normalizep, target_mode);
5585           if (tem)
5586             return tem;
5587
5588           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5589             {
5590               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5591                                  unsignedp, op1, op0, normalizep, target_mode);
5592               if (tem)
5593                 return tem;
5594             }
5595           break;
5596         }
5597     }
5598
5599   return 0;
5600 }
5601
5602 /* Subroutine of emit_store_flag that handles cases in which the operands
5603    are scalar integers.  SUBTARGET is the target to use for temporary
5604    operations and TRUEVAL is the value to store when the condition is
5605    true.  All other arguments are as for emit_store_flag.  */
5606
5607 rtx
5608 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5609                      rtx op1, scalar_int_mode mode, int unsignedp,
5610                      int normalizep, rtx trueval)
5611 {
5612   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5613   rtx_insn *last = get_last_insn ();
5614
5615   /* If this is an equality comparison of integers, we can try to exclusive-or
5616      (or subtract) the two operands and use a recursive call to try the
5617      comparison with zero.  Don't do any of these cases if branches are
5618      very cheap.  */
5619
5620   if ((code == EQ || code == NE) && op1 != const0_rtx)
5621     {
5622       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5623                               OPTAB_WIDEN);
5624
5625       if (tem == 0)
5626         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5627                             OPTAB_WIDEN);
5628       if (tem != 0)
5629         tem = emit_store_flag (target, code, tem, const0_rtx,
5630                                mode, unsignedp, normalizep);
5631       if (tem != 0)
5632         return tem;
5633
5634       delete_insns_since (last);
5635     }
5636
5637   /* For integer comparisons, try the reverse comparison.  However, for
5638      small X and if we'd have anyway to extend, implementing "X != 0"
5639      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5640   rtx_code rcode = reverse_condition (code);
5641   if (can_compare_p (rcode, mode, ccp_store_flag)
5642       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5643             && code == NE
5644             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5645             && op1 == const0_rtx))
5646     {
5647       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5648                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5649
5650       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5651       if (want_add
5652           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5653                        optimize_insn_for_speed_p ()) == 0)
5654         {
5655           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5656                                        STORE_FLAG_VALUE, target_mode);
5657           if (tem != 0)
5658             tem = expand_binop (target_mode, add_optab, tem,
5659                                 gen_int_mode (normalizep, target_mode),
5660                                 target, 0, OPTAB_WIDEN);
5661           if (tem != 0)
5662             return tem;
5663         }
5664       else if (!want_add
5665                && rtx_cost (trueval, mode, XOR, 1,
5666                             optimize_insn_for_speed_p ()) == 0)
5667         {
5668           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5669                                        normalizep, target_mode);
5670           if (tem != 0)
5671             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5672                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5673           if (tem != 0)
5674             return tem;
5675         }
5676
5677       delete_insns_since (last);
5678     }
5679
5680   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5681      the constant zero.  Reject all other comparisons at this point.  Only
5682      do LE and GT if branches are expensive since they are expensive on
5683      2-operand machines.  */
5684
5685   if (op1 != const0_rtx
5686       || (code != EQ && code != NE
5687           && (BRANCH_COST (optimize_insn_for_speed_p (),
5688                            false) <= 1 || (code != LE && code != GT))))
5689     return 0;
5690
5691   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5692      do the necessary operation below.  */
5693
5694   rtx tem = 0;
5695
5696   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5697      the sign bit set.  */
5698
5699   if (code == LE)
5700     {
5701       /* This is destructive, so SUBTARGET can't be OP0.  */
5702       if (rtx_equal_p (subtarget, op0))
5703         subtarget = 0;
5704
5705       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5706                           OPTAB_WIDEN);
5707       if (tem)
5708         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5709                             OPTAB_WIDEN);
5710     }
5711
5712   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5713      number of bits in the mode of OP0, minus one.  */
5714
5715   if (code == GT)
5716     {
5717       if (rtx_equal_p (subtarget, op0))
5718         subtarget = 0;
5719
5720       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5721                                 GET_MODE_BITSIZE (mode) - 1,
5722                                 subtarget, 0);
5723       if (tem)
5724         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5725                             OPTAB_WIDEN);
5726     }
5727
5728   if (code == EQ || code == NE)
5729     {
5730       /* For EQ or NE, one way to do the comparison is to apply an operation
5731          that converts the operand into a positive number if it is nonzero
5732          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5733          for NE we negate.  This puts the result in the sign bit.  Then we
5734          normalize with a shift, if needed.
5735
5736          Two operations that can do the above actions are ABS and FFS, so try
5737          them.  If that doesn't work, and MODE is smaller than a full word,
5738          we can use zero-extension to the wider mode (an unsigned conversion)
5739          as the operation.  */
5740
5741       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5742          that is compensated by the subsequent overflow when subtracting
5743          one / negating.  */
5744
5745       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5746         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5747       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5748         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5749       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5750         {
5751           tem = convert_modes (word_mode, mode, op0, 1);
5752           mode = word_mode;
5753         }
5754
5755       if (tem != 0)
5756         {
5757           if (code == EQ)
5758             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5759                                 0, OPTAB_WIDEN);
5760           else
5761             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5762         }
5763
5764       /* If we couldn't do it that way, for NE we can "or" the two's complement
5765          of the value with itself.  For EQ, we take the one's complement of
5766          that "or", which is an extra insn, so we only handle EQ if branches
5767          are expensive.  */
5768
5769       if (tem == 0
5770           && (code == NE
5771               || BRANCH_COST (optimize_insn_for_speed_p (),
5772                               false) > 1))
5773         {
5774           if (rtx_equal_p (subtarget, op0))
5775             subtarget = 0;
5776
5777           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5778           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5779                               OPTAB_WIDEN);
5780
5781           if (tem && code == EQ)
5782             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5783         }
5784     }
5785
5786   if (tem && normalizep)
5787     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5788                               GET_MODE_BITSIZE (mode) - 1,
5789                               subtarget, normalizep == 1);
5790
5791   if (tem)
5792     {
5793       if (!target)
5794         ;
5795       else if (GET_MODE (tem) != target_mode)
5796         {
5797           convert_move (target, tem, 0);
5798           tem = target;
5799         }
5800       else if (!subtarget)
5801         {
5802           emit_move_insn (target, tem);
5803           tem = target;
5804         }
5805     }
5806   else
5807     delete_insns_since (last);
5808
5809   return tem;
5810 }
5811
5812 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5813    and storing in TARGET.  Normally return TARGET.
5814    Return 0 if that cannot be done.
5815
5816    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5817    it is VOIDmode, they cannot both be CONST_INT.
5818
5819    UNSIGNEDP is for the case where we have to widen the operands
5820    to perform the operation.  It says to use zero-extension.
5821
5822    NORMALIZEP is 1 if we should convert the result to be either zero
5823    or one.  Normalize is -1 if we should convert the result to be
5824    either zero or -1.  If NORMALIZEP is zero, the result will be left
5825    "raw" out of the scc insn.  */
5826
5827 rtx
5828 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5829                  machine_mode mode, int unsignedp, int normalizep)
5830 {
5831   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5832   enum rtx_code rcode;
5833   rtx subtarget;
5834   rtx tem, trueval;
5835   rtx_insn *last;
5836
5837   /* If we compare constants, we shouldn't use a store-flag operation,
5838      but a constant load.  We can get there via the vanilla route that
5839      usually generates a compare-branch sequence, but will in this case
5840      fold the comparison to a constant, and thus elide the branch.  */
5841   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5842     return NULL_RTX;
5843
5844   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5845                            target_mode);
5846   if (tem)
5847     return tem;
5848
5849   /* If we reached here, we can't do this with a scc insn, however there
5850      are some comparisons that can be done in other ways.  Don't do any
5851      of these cases if branches are very cheap.  */
5852   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5853     return 0;
5854
5855   /* See what we need to return.  We can only return a 1, -1, or the
5856      sign bit.  */
5857
5858   if (normalizep == 0)
5859     {
5860       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5861         normalizep = STORE_FLAG_VALUE;
5862
5863       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5864         ;
5865       else
5866         return 0;
5867     }
5868
5869   last = get_last_insn ();
5870
5871   /* If optimizing, use different pseudo registers for each insn, instead
5872      of reusing the same pseudo.  This leads to better CSE, but slows
5873      down the compiler, since there are more pseudos.  */
5874   subtarget = (!optimize
5875                && (target_mode == mode)) ? target : NULL_RTX;
5876   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5877
5878   /* For floating-point comparisons, try the reverse comparison or try
5879      changing the "orderedness" of the comparison.  */
5880   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5881     {
5882       enum rtx_code first_code;
5883       bool and_them;
5884
5885       rcode = reverse_condition_maybe_unordered (code);
5886       if (can_compare_p (rcode, mode, ccp_store_flag)
5887           && (code == ORDERED || code == UNORDERED
5888               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5889               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5890         {
5891           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5892                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5893
5894           /* For the reverse comparison, use either an addition or a XOR.  */
5895           if (want_add
5896               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5897                            optimize_insn_for_speed_p ()) == 0)
5898             {
5899               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5900                                        STORE_FLAG_VALUE, target_mode);
5901               if (tem)
5902                 return expand_binop (target_mode, add_optab, tem,
5903                                      gen_int_mode (normalizep, target_mode),
5904                                      target, 0, OPTAB_WIDEN);
5905             }
5906           else if (!want_add
5907                    && rtx_cost (trueval, mode, XOR, 1,
5908                                 optimize_insn_for_speed_p ()) == 0)
5909             {
5910               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5911                                        normalizep, target_mode);
5912               if (tem)
5913                 return expand_binop (target_mode, xor_optab, tem, trueval,
5914                                      target, INTVAL (trueval) >= 0,
5915                                      OPTAB_WIDEN);
5916             }
5917         }
5918
5919       delete_insns_since (last);
5920
5921       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
5922       if (code == ORDERED || code == UNORDERED)
5923         return 0;
5924
5925       and_them = split_comparison (code, mode, &first_code, &code);
5926
5927       /* If there are no NaNs, the first comparison should always fall through.
5928          Effectively change the comparison to the other one.  */
5929       if (!HONOR_NANS (mode))
5930         {
5931           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5932           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5933                                     target_mode);
5934         }
5935
5936       if (!HAVE_conditional_move)
5937         return 0;
5938
5939       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5940          conditional move.  */
5941       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5942                                normalizep, target_mode);
5943       if (tem == 0)
5944         return 0;
5945
5946       if (and_them)
5947         tem = emit_conditional_move (target, code, op0, op1, mode,
5948                                      tem, const0_rtx, GET_MODE (tem), 0);
5949       else
5950         tem = emit_conditional_move (target, code, op0, op1, mode,
5951                                      trueval, tem, GET_MODE (tem), 0);
5952
5953       if (tem == 0)
5954         delete_insns_since (last);
5955       return tem;
5956     }
5957
5958   /* The remaining tricks only apply to integer comparisons.  */
5959
5960   scalar_int_mode int_mode;
5961   if (is_int_mode (mode, &int_mode))
5962     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
5963                                 unsignedp, normalizep, trueval);
5964
5965   return 0;
5966 }
5967
5968 /* Like emit_store_flag, but always succeeds.  */
5969
5970 rtx
5971 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5972                        machine_mode mode, int unsignedp, int normalizep)
5973 {
5974   rtx tem;
5975   rtx_code_label *label;
5976   rtx trueval, falseval;
5977
5978   /* First see if emit_store_flag can do the job.  */
5979   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5980   if (tem != 0)
5981     return tem;
5982
5983   if (!target)
5984     target = gen_reg_rtx (word_mode);
5985
5986   /* If this failed, we have to do this with set/compare/jump/set code.
5987      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5988   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5989   if (code == NE
5990       && GET_MODE_CLASS (mode) == MODE_INT
5991       && REG_P (target)
5992       && op0 == target
5993       && op1 == const0_rtx)
5994     {
5995       label = gen_label_rtx ();
5996       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5997                                NULL_RTX, NULL, label,
5998                                profile_probability::uninitialized ());
5999       emit_move_insn (target, trueval);
6000       emit_label (label);
6001       return target;
6002     }
6003
6004   if (!REG_P (target)
6005       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6006     target = gen_reg_rtx (GET_MODE (target));
6007
6008   /* Jump in the right direction if the target cannot implement CODE
6009      but can jump on its reverse condition.  */
6010   falseval = const0_rtx;
6011   if (! can_compare_p (code, mode, ccp_jump)
6012       && (! FLOAT_MODE_P (mode)
6013           || code == ORDERED || code == UNORDERED
6014           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6015           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6016     {
6017       enum rtx_code rcode;
6018       if (FLOAT_MODE_P (mode))
6019         rcode = reverse_condition_maybe_unordered (code);
6020       else
6021         rcode = reverse_condition (code);
6022
6023       /* Canonicalize to UNORDERED for the libcall.  */
6024       if (can_compare_p (rcode, mode, ccp_jump)
6025           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6026         {
6027           falseval = trueval;
6028           trueval = const0_rtx;
6029           code = rcode;
6030         }
6031     }
6032
6033   emit_move_insn (target, trueval);
6034   label = gen_label_rtx ();
6035   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6036                            label, profile_probability::uninitialized ());
6037
6038   emit_move_insn (target, falseval);
6039   emit_label (label);
6040
6041   return target;
6042 }
6043 \f
6044 /* Perform possibly multi-word comparison and conditional jump to LABEL
6045    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6046    now a thin wrapper around do_compare_rtx_and_jump.  */
6047
6048 static void
6049 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6050                  rtx_code_label *label)
6051 {
6052   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6053   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6054                            NULL, label, profile_probability::uninitialized ());
6055 }