gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "regs.h"
  35 #include "emit-rtl.h"
  36 #include "diagnostic-core.h"
  37 #include "fold-const.h"
  38 #include "stor-layout.h"
  39 #include "dojump.h"
  40 #include "explow.h"
  41 #include "expr.h"
  42 #include "langhooks.h"
  43 #include "tree-vector-builder.h"
  44
  45 struct target_expmed default_target_expmed;
  46 #if SWITCHABLE_TARGET
  47 struct target_expmed *this_target_expmed = &default_target_expmed;
  48 #endif
  49
  50 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    unsigned HOST_WIDE_INT,
  54                                    unsigned HOST_WIDE_INT,
  55                                    rtx, scalar_int_mode, bool);
  56 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  57                                      unsigned HOST_WIDE_INT,
  58                                      unsigned HOST_WIDE_INT,
  59                                      rtx, scalar_int_mode, bool);
  60 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  61                                    unsigned HOST_WIDE_INT,
  62                                    unsigned HOST_WIDE_INT,
  63                                    unsigned HOST_WIDE_INT,
  64                                    unsigned HOST_WIDE_INT,
  65                                    rtx, scalar_int_mode, bool);
  66 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  67                                     unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  69 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  70                                       unsigned HOST_WIDE_INT,
  71                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  72 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  73 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  74                                     unsigned HOST_WIDE_INT,
  75                                     unsigned HOST_WIDE_INT, int, bool);
  76 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  77 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  78 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  79
  80 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  81    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  82    The mask is truncated if necessary to the width of mode MODE.  The
  83    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  84
  85 static inline rtx
  86 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  87 {
  88   return immed_wide_int_const
  89     (wi::shifted_mask (bitpos, bitsize, complement,
  90                        GET_MODE_PRECISION (mode)), mode);
  91 }
  92
  93 /* Test whether a value is zero of a power of two.  */
  94 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  95   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  96
  97 struct init_expmed_rtl
  98 {
  99   rtx reg;
 100   rtx plus;
 101   rtx neg;
 102   rtx mult;
 103   rtx sdiv;
 104   rtx udiv;
 105   rtx sdiv_32;
 106   rtx smod_32;
 107   rtx wide_mult;
 108   rtx wide_lshr;
 109   rtx wide_trunc;
 110   rtx shift;
 111   rtx shift_mult;
 112   rtx shift_add;
 113   rtx shift_sub0;
 114   rtx shift_sub1;
 115   rtx zext;
 116   rtx trunc;
 117
 118   rtx pow2[MAX_BITS_PER_WORD];
 119   rtx cint[MAX_BITS_PER_WORD];
 120 };
 121
 122 static void
 123 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 124                       scalar_int_mode from_mode, bool speed)
 125 {
 126   int to_size, from_size;
 127   rtx which;
 128
 129   to_size = GET_MODE_PRECISION (to_mode);
 130   from_size = GET_MODE_PRECISION (from_mode);
 131
 132   /* Most partial integers have a precision less than the "full"
 133      integer it requires for storage.  In case one doesn't, for
 134      comparison purposes here, reduce the bit size by one in that
 135      case.  */
 136   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 137       && pow2p_hwi (to_size))
 138     to_size --;
 139   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 140       && pow2p_hwi (from_size))
 141     from_size --;
 142
 143   /* Assume cost of zero-extend and sign-extend is the same.  */
 144   which = (to_size < from_size ? all->trunc : all->zext);
 145
 146   PUT_MODE (all->reg, from_mode);
 147   set_convert_cost (to_mode, from_mode, speed,
 148                     set_src_cost (which, to_mode, speed));
 149 }
 150
 151 static void
 152 init_expmed_one_mode (struct init_expmed_rtl *all,
 153                       machine_mode mode, int speed)
 154 {
 155   int m, n, mode_bitsize;
 156   machine_mode mode_from;
 157
 158   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 159
 160   PUT_MODE (all->reg, mode);
 161   PUT_MODE (all->plus, mode);
 162   PUT_MODE (all->neg, mode);
 163   PUT_MODE (all->mult, mode);
 164   PUT_MODE (all->sdiv, mode);
 165   PUT_MODE (all->udiv, mode);
 166   PUT_MODE (all->sdiv_32, mode);
 167   PUT_MODE (all->smod_32, mode);
 168   PUT_MODE (all->wide_trunc, mode);
 169   PUT_MODE (all->shift, mode);
 170   PUT_MODE (all->shift_mult, mode);
 171   PUT_MODE (all->shift_add, mode);
 172   PUT_MODE (all->shift_sub0, mode);
 173   PUT_MODE (all->shift_sub1, mode);
 174   PUT_MODE (all->zext, mode);
 175   PUT_MODE (all->trunc, mode);
 176
 177   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 178   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 179   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 180   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 181   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 182
 183   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 184                                      <= 2 * add_cost (speed, mode)));
 185   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 186                                      <= 4 * add_cost (speed, mode)));
 187
 188   set_shift_cost (speed, mode, 0, 0);
 189   {
 190     int cost = add_cost (speed, mode);
 191     set_shiftadd_cost (speed, mode, 0, cost);
 192     set_shiftsub0_cost (speed, mode, 0, cost);
 193     set_shiftsub1_cost (speed, mode, 0, cost);
 194   }
 195
 196   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 197   for (m = 1; m < n; m++)
 198     {
 199       XEXP (all->shift, 1) = all->cint[m];
 200       XEXP (all->shift_mult, 1) = all->pow2[m];
 201
 202       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 203       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 204                                                        speed));
 205       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 206                                                         speed));
 207       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 208                                                         speed));
 209     }
 210
 211   scalar_int_mode int_mode_to;
 212   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 213     {
 214       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 215            mode_from = (machine_mode)(mode_from + 1))
 216         init_expmed_one_conv (all, int_mode_to,
 217                               as_a <scalar_int_mode> (mode_from), speed);
 218
 219       scalar_int_mode wider_mode;
 220       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 221           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 222         {
 223           PUT_MODE (all->zext, wider_mode);
 224           PUT_MODE (all->wide_mult, wider_mode);
 225           PUT_MODE (all->wide_lshr, wider_mode);
 226           XEXP (all->wide_lshr, 1)
 227             = gen_int_shift_amount (wider_mode, mode_bitsize);
 228
 229           set_mul_widen_cost (speed, wider_mode,
 230                               set_src_cost (all->wide_mult, wider_mode, speed));
 231           set_mul_highpart_cost (speed, int_mode_to,
 232                                  set_src_cost (all->wide_trunc,
 233                                                int_mode_to, speed));
 234         }
 235     }
 236 }
 237
 238 void
 239 init_expmed (void)
 240 {
 241   struct init_expmed_rtl all;
 242   machine_mode mode = QImode;
 243   int m, speed;
 244
 245   memset (&all, 0, sizeof all);
 246   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 247     {
 248       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 249       all.cint[m] = GEN_INT (m);
 250     }
 251
 252   /* Avoid using hard regs in ways which may be unsupported.  */
 253   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 254   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 255   all.neg = gen_rtx_NEG (mode, all.reg);
 256   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 257   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 258   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 259   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 260   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 261   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 262   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 263   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 264   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 265   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 266   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 267   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 268   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 269   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 270   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 271
 272   for (speed = 0; speed < 2; speed++)
 273     {
 274       crtl->maybe_hot_insn_p = speed;
 275       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 276
 277       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 278            mode = (machine_mode)(mode + 1))
 279         init_expmed_one_mode (&all, mode, speed);
 280
 281       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 282         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 283              mode = (machine_mode)(mode + 1))
 284           init_expmed_one_mode (&all, mode, speed);
 285
 286       if (MIN_MODE_VECTOR_INT != VOIDmode)
 287         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 288              mode = (machine_mode)(mode + 1))
 289           init_expmed_one_mode (&all, mode, speed);
 290     }
 291
 292   if (alg_hash_used_p ())
 293     {
 294       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 295       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 296     }
 297   else
 298     set_alg_hash_used_p (true);
 299   default_rtl_profile ();
 300
 301   ggc_free (all.trunc);
 302   ggc_free (all.shift_sub1);
 303   ggc_free (all.shift_sub0);
 304   ggc_free (all.shift_add);
 305   ggc_free (all.shift_mult);
 306   ggc_free (all.shift);
 307   ggc_free (all.wide_trunc);
 308   ggc_free (all.wide_lshr);
 309   ggc_free (all.wide_mult);
 310   ggc_free (all.zext);
 311   ggc_free (all.smod_32);
 312   ggc_free (all.sdiv_32);
 313   ggc_free (all.udiv);
 314   ggc_free (all.sdiv);
 315   ggc_free (all.mult);
 316   ggc_free (all.neg);
 317   ggc_free (all.plus);
 318   ggc_free (all.reg);
 319 }
 320
 321 /* Return an rtx representing minus the value of X.
 322    MODE is the intended mode of the result,
 323    useful if X is a CONST_INT.  */
 324
 325 rtx
 326 negate_rtx (machine_mode mode, rtx x)
 327 {
 328   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 329
 330   if (result == 0)
 331     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 332
 333   return result;
 334 }
 335
 336 /* Whether reverse storage order is supported on the target.  */
 337 static int reverse_storage_order_supported = -1;
 338
 339 /* Check whether reverse storage order is supported on the target.  */
 340
 341 static void
 342 check_reverse_storage_order_support (void)
 343 {
 344   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 345     {
 346       reverse_storage_order_supported = 0;
 347       sorry ("reverse scalar storage order");
 348     }
 349   else
 350     reverse_storage_order_supported = 1;
 351 }
 352
 353 /* Whether reverse FP storage order is supported on the target.  */
 354 static int reverse_float_storage_order_supported = -1;
 355
 356 /* Check whether reverse FP storage order is supported on the target.  */
 357
 358 static void
 359 check_reverse_float_storage_order_support (void)
 360 {
 361   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 362     {
 363       reverse_float_storage_order_supported = 0;
 364       sorry ("reverse floating-point scalar storage order");
 365     }
 366   else
 367     reverse_float_storage_order_supported = 1;
 368 }
 369
 370 /* Return an rtx representing value of X with reverse storage order.
 371    MODE is the intended mode of the result,
 372    useful if X is a CONST_INT.  */
 373
 374 rtx
 375 flip_storage_order (machine_mode mode, rtx x)
 376 {
 377   scalar_int_mode int_mode;
 378   rtx result;
 379
 380   if (mode == QImode)
 381     return x;
 382
 383   if (COMPLEX_MODE_P (mode))
 384     {
 385       rtx real = read_complex_part (x, false);
 386       rtx imag = read_complex_part (x, true);
 387
 388       real = flip_storage_order (GET_MODE_INNER (mode), real);
 389       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 390
 391       return gen_rtx_CONCAT (mode, real, imag);
 392     }
 393
 394   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 395     check_reverse_storage_order_support ();
 396
 397   if (!is_a <scalar_int_mode> (mode, &int_mode))
 398     {
 399       if (FLOAT_MODE_P (mode)
 400           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 401         check_reverse_float_storage_order_support ();
 402
 403       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
 404         {
 405           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 406           return x;
 407         }
 408       x = gen_lowpart (int_mode, x);
 409     }
 410
 411   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 412   if (result == 0)
 413     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 414
 415   if (int_mode != mode)
 416     result = gen_lowpart (mode, result);
 417
 418   return result;
 419 }
 420
 421 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 422    first unit of mode MODE that contains a bitfield of size BITSIZE at
 423    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 424    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 425    of the field within the new memory.  */
 426
 427 static rtx
 428 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 429                       unsigned HOST_WIDE_INT bitsize,
 430                       unsigned HOST_WIDE_INT bitnum,
 431                       unsigned HOST_WIDE_INT *new_bitnum)
 432 {
 433   scalar_int_mode imode;
 434   if (mode.exists (&imode))
 435     {
 436       unsigned int unit = GET_MODE_BITSIZE (imode);
 437       *new_bitnum = bitnum % unit;
 438       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 439       return adjust_bitfield_address (mem, imode, offset);
 440     }
 441   else
 442     {
 443       *new_bitnum = bitnum % BITS_PER_UNIT;
 444       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 445       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 446                             / BITS_PER_UNIT);
 447       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 448     }
 449 }
 450
 451 /* The caller wants to perform insertion or extraction PATTERN on a
 452    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 453    BITREGION_START and BITREGION_END are as for store_bit_field
 454    and FIELDMODE is the natural mode of the field.
 455
 456    Search for a mode that is compatible with the memory access
 457    restrictions and (where applicable) with a register insertion or
 458    extraction.  Return the new memory on success, storing the adjusted
 459    bit position in *NEW_BITNUM.  Return null otherwise.  */
 460
 461 static rtx
 462 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 463                               rtx op0, HOST_WIDE_INT bitsize,
 464                               HOST_WIDE_INT bitnum,
 465                               unsigned HOST_WIDE_INT bitregion_start,
 466                               unsigned HOST_WIDE_INT bitregion_end,
 467                               machine_mode fieldmode,
 468                               unsigned HOST_WIDE_INT *new_bitnum)
 469 {
 470   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 471                                 bitregion_end, MEM_ALIGN (op0),
 472                                 MEM_VOLATILE_P (op0));
 473   scalar_int_mode best_mode;
 474   if (iter.next_mode (&best_mode))
 475     {
 476       /* We can use a memory in BEST_MODE.  See whether this is true for
 477          any wider modes.  All other things being equal, we prefer to
 478          use the widest mode possible because it tends to expose more
 479          CSE opportunities.  */
 480       if (!iter.prefer_smaller_modes ())
 481         {
 482           /* Limit the search to the mode required by the corresponding
 483              register insertion or extraction instruction, if any.  */
 484           scalar_int_mode limit_mode = word_mode;
 485           extraction_insn insn;
 486           if (get_best_reg_extraction_insn (&insn, pattern,
 487                                             GET_MODE_BITSIZE (best_mode),
 488                                             fieldmode))
 489             limit_mode = insn.field_mode;
 490
 491           scalar_int_mode wider_mode;
 492           while (iter.next_mode (&wider_mode)
 493                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 494             best_mode = wider_mode;
 495         }
 496       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 497                                    new_bitnum);
 498     }
 499   return NULL_RTX;
 500 }
 501
 502 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 503    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 504    offset is then BITNUM / BITS_PER_UNIT.  */
 505
 506 static bool
 507 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 508                      unsigned HOST_WIDE_INT bitsize,
 509                      machine_mode struct_mode)
 510 {
 511   unsigned HOST_WIDE_INT regsize = REGMODE_NATURAL_SIZE (struct_mode);
 512   if (BYTES_BIG_ENDIAN)
 513     return (bitnum % BITS_PER_UNIT == 0
 514             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 515                 || (bitnum + bitsize) % (regsize * BITS_PER_UNIT) == 0));
 516   else
 517     return bitnum % (regsize * BITS_PER_UNIT) == 0;
 518 }
 519
 520 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 521    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 522    Return false if the access would touch memory outside the range
 523    BITREGION_START to BITREGION_END for conformance to the C++ memory
 524    model.  */
 525
 526 static bool
 527 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 528                             unsigned HOST_WIDE_INT bitnum,
 529                             scalar_int_mode fieldmode,
 530                             unsigned HOST_WIDE_INT bitregion_start,
 531                             unsigned HOST_WIDE_INT bitregion_end)
 532 {
 533   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 534
 535   /* -fstrict-volatile-bitfields must be enabled and we must have a
 536      volatile MEM.  */
 537   if (!MEM_P (op0)
 538       || !MEM_VOLATILE_P (op0)
 539       || flag_strict_volatile_bitfields <= 0)
 540     return false;
 541
 542   /* The bit size must not be larger than the field mode, and
 543      the field mode must not be larger than a word.  */
 544   if (bitsize > modesize || modesize > BITS_PER_WORD)
 545     return false;
 546
 547   /* Check for cases of unaligned fields that must be split.  */
 548   if (bitnum % modesize + bitsize > modesize)
 549     return false;
 550
 551   /* The memory must be sufficiently aligned for a MODESIZE access.
 552      This condition guarantees, that the memory access will not
 553      touch anything after the end of the structure.  */
 554   if (MEM_ALIGN (op0) < modesize)
 555     return false;
 556
 557   /* Check for cases where the C++ memory model applies.  */
 558   if (bitregion_end != 0
 559       && (bitnum - bitnum % modesize < bitregion_start
 560           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 561     return false;
 562
 563   return true;
 564 }
 565
 566 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 567    bit number BITNUM can be treated as a simple value of mode MODE.  */
 568
 569 static bool
 570 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 571                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 572 {
 573   return (MEM_P (op0)
 574           && bitnum % BITS_PER_UNIT == 0
 575           && bitsize == GET_MODE_BITSIZE (mode)
 576           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 577               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 578                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 579 }
 580 \f
 581 /* Try to use instruction INSV to store VALUE into a field of OP0.
 582    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 583    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 584    are as for store_bit_field.  */
 585
 586 static bool
 587 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 588                             opt_scalar_int_mode op0_mode,
 589                             unsigned HOST_WIDE_INT bitsize,
 590                             unsigned HOST_WIDE_INT bitnum,
 591                             rtx value, scalar_int_mode value_mode)
 592 {
 593   struct expand_operand ops[4];
 594   rtx value1;
 595   rtx xop0 = op0;
 596   rtx_insn *last = get_last_insn ();
 597   bool copy_back = false;
 598
 599   scalar_int_mode op_mode = insv->field_mode;
 600   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 601   if (bitsize == 0 || bitsize > unit)
 602     return false;
 603
 604   if (MEM_P (xop0))
 605     /* Get a reference to the first byte of the field.  */
 606     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 607                                  &bitnum);
 608   else
 609     {
 610       /* Convert from counting within OP0 to counting in OP_MODE.  */
 611       if (BYTES_BIG_ENDIAN)
 612         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 613
 614       /* If xop0 is a register, we need it in OP_MODE
 615          to make it acceptable to the format of insv.  */
 616       if (GET_CODE (xop0) == SUBREG)
 617         /* We can't just change the mode, because this might clobber op0,
 618            and we will need the original value of op0 if insv fails.  */
 619         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 620       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 621         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 622     }
 623
 624   /* If the destination is a paradoxical subreg such that we need a
 625      truncate to the inner mode, perform the insertion on a temporary and
 626      truncate the result to the original destination.  Note that we can't
 627      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 628      X) 0)) is (reg:N X).  */
 629   if (GET_CODE (xop0) == SUBREG
 630       && REG_P (SUBREG_REG (xop0))
 631       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 632                                          op_mode))
 633     {
 634       rtx tem = gen_reg_rtx (op_mode);
 635       emit_move_insn (tem, xop0);
 636       xop0 = tem;
 637       copy_back = true;
 638     }
 639
 640   /* There are similar overflow check at the start of store_bit_field_1,
 641      but that only check the situation where the field lies completely
 642      outside the register, while there do have situation where the field
 643      lies partialy in the register, we need to adjust bitsize for this
 644      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 645      will broken on those arch support bit insert instruction, like arm, aarch64
 646      etc.  */
 647   if (bitsize + bitnum > unit && bitnum < unit)
 648     {
 649       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 650                "destination object, data truncated into %wu-bit",
 651                bitsize, unit - bitnum);
 652       bitsize = unit - bitnum;
 653     }
 654
 655   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 656      "backwards" from the size of the unit we are inserting into.
 657      Otherwise, we count bits from the most significant on a
 658      BYTES/BITS_BIG_ENDIAN machine.  */
 659
 660   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 661     bitnum = unit - bitsize - bitnum;
 662
 663   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 664   value1 = value;
 665   if (value_mode != op_mode)
 666     {
 667       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 668         {
 669           rtx tmp;
 670           /* Optimization: Don't bother really extending VALUE
 671              if it has all the bits we will actually use.  However,
 672              if we must narrow it, be sure we do it correctly.  */
 673
 674           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 675             {
 676               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 677               if (! tmp)
 678                 tmp = simplify_gen_subreg (op_mode,
 679                                            force_reg (value_mode, value1),
 680                                            value_mode, 0);
 681             }
 682           else
 683             {
 684               tmp = gen_lowpart_if_possible (op_mode, value1);
 685               if (! tmp)
 686                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 687             }
 688           value1 = tmp;
 689         }
 690       else if (CONST_INT_P (value))
 691         value1 = gen_int_mode (INTVAL (value), op_mode);
 692       else
 693         /* Parse phase is supposed to make VALUE's data type
 694            match that of the component reference, which is a type
 695            at least as wide as the field; so VALUE should have
 696            a mode that corresponds to that type.  */
 697         gcc_assert (CONSTANT_P (value));
 698     }
 699
 700   create_fixed_operand (&ops[0], xop0);
 701   create_integer_operand (&ops[1], bitsize);
 702   create_integer_operand (&ops[2], bitnum);
 703   create_input_operand (&ops[3], value1, op_mode);
 704   if (maybe_expand_insn (insv->icode, 4, ops))
 705     {
 706       if (copy_back)
 707         convert_move (op0, xop0, true);
 708       return true;
 709     }
 710   delete_insns_since (last);
 711   return false;
 712 }
 713
 714 /* A subroutine of store_bit_field, with the same arguments.  Return true
 715    if the operation could be implemented.
 716
 717    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 718    no other way of implementing the operation.  If FALLBACK_P is false,
 719    return false instead.  */
 720
 721 static bool
 722 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 723                    unsigned HOST_WIDE_INT bitnum,
 724                    unsigned HOST_WIDE_INT bitregion_start,
 725                    unsigned HOST_WIDE_INT bitregion_end,
 726                    machine_mode fieldmode,
 727                    rtx value, bool reverse, bool fallback_p)
 728 {
 729   rtx op0 = str_rtx;
 730   rtx orig_value;
 731
 732   while (GET_CODE (op0) == SUBREG)
 733     {
 734       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 735       op0 = SUBREG_REG (op0);
 736     }
 737
 738   /* No action is needed if the target is a register and if the field
 739      lies completely outside that register.  This can occur if the source
 740      code contains an out-of-bounds access to a small array.  */
 741   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 742     return true;
 743
 744   /* Use vec_set patterns for inserting parts of vectors whenever
 745      available.  */
 746   machine_mode outermode = GET_MODE (op0);
 747   scalar_mode innermode = GET_MODE_INNER (outermode);
 748   if (VECTOR_MODE_P (outermode)
 749       && !MEM_P (op0)
 750       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 751       && fieldmode == innermode
 752       && bitsize == GET_MODE_BITSIZE (innermode)
 753       && !(bitnum % GET_MODE_BITSIZE (innermode)))
 754     {
 755       struct expand_operand ops[3];
 756       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 757       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 758
 759       create_fixed_operand (&ops[0], op0);
 760       create_input_operand (&ops[1], value, innermode);
 761       create_integer_operand (&ops[2], pos);
 762       if (maybe_expand_insn (icode, 3, ops))
 763         return true;
 764     }
 765
 766   /* If the target is a register, overwriting the entire object, or storing
 767      a full-word or multi-word field can be done with just a SUBREG.  */
 768   if (!MEM_P (op0)
 769       && bitsize == GET_MODE_BITSIZE (fieldmode)
 770       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 771           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 772     {
 773       /* Use the subreg machinery either to narrow OP0 to the required
 774          words or to cope with mode punning between equal-sized modes.
 775          In the latter case, use subreg on the rhs side, not lhs.  */
 776       rtx sub;
 777
 778       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 779         {
 780           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 781           if (sub)
 782             {
 783               if (reverse)
 784                 sub = flip_storage_order (GET_MODE (op0), sub);
 785               emit_move_insn (op0, sub);
 786               return true;
 787             }
 788         }
 789       else
 790         {
 791           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 792                                      bitnum / BITS_PER_UNIT);
 793           if (sub)
 794             {
 795               if (reverse)
 796                 value = flip_storage_order (fieldmode, value);
 797               emit_move_insn (sub, value);
 798               return true;
 799             }
 800         }
 801     }
 802
 803   /* If the target is memory, storing any naturally aligned field can be
 804      done with a simple store.  For targets that support fast unaligned
 805      memory, any naturally sized, unit aligned field can be done directly.  */
 806   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 807     {
 808       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 809       if (reverse)
 810         value = flip_storage_order (fieldmode, value);
 811       emit_move_insn (op0, value);
 812       return true;
 813     }
 814
 815   /* Make sure we are playing with integral modes.  Pun with subregs
 816      if we aren't.  This must come after the entire register case above,
 817      since that case is valid for any mode.  The following cases are only
 818      valid for integral modes.  */
 819   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 820   scalar_int_mode imode;
 821   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 822     {
 823       if (MEM_P (op0))
 824         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 825                                             0, MEM_SIZE (op0));
 826       else
 827         op0 = gen_lowpart (op0_mode.require (), op0);
 828     }
 829
 830   /* Storing an lsb-aligned field in a register
 831      can be done with a movstrict instruction.  */
 832
 833   if (!MEM_P (op0)
 834       && !reverse
 835       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 836       && bitsize == GET_MODE_BITSIZE (fieldmode)
 837       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 838     {
 839       struct expand_operand ops[2];
 840       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 841       rtx arg0 = op0;
 842       unsigned HOST_WIDE_INT subreg_off;
 843
 844       if (GET_CODE (arg0) == SUBREG)
 845         {
 846           /* Else we've got some float mode source being extracted into
 847              a different float mode destination -- this combination of
 848              subregs results in Severe Tire Damage.  */
 849           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 850                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 851                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 852           arg0 = SUBREG_REG (arg0);
 853         }
 854
 855       subreg_off = bitnum / BITS_PER_UNIT;
 856       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 857         {
 858           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 859
 860           create_fixed_operand (&ops[0], arg0);
 861           /* Shrink the source operand to FIELDMODE.  */
 862           create_convert_operand_to (&ops[1], value, fieldmode, false);
 863           if (maybe_expand_insn (icode, 2, ops))
 864             return true;
 865         }
 866     }
 867
 868   /* Handle fields bigger than a word.  */
 869
 870   if (bitsize > BITS_PER_WORD)
 871     {
 872       /* Here we transfer the words of the field
 873          in the order least significant first.
 874          This is because the most significant word is the one which may
 875          be less than full.
 876          However, only do that if the value is not BLKmode.  */
 877
 878       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 879       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 880       unsigned int i;
 881       rtx_insn *last;
 882
 883       /* This is the mode we must force value to, so that there will be enough
 884          subwords to extract.  Note that fieldmode will often (always?) be
 885          VOIDmode, because that is what store_field uses to indicate that this
 886          is a bit field, but passing VOIDmode to operand_subword_force
 887          is not allowed.  */
 888       fieldmode = GET_MODE (value);
 889       if (fieldmode == VOIDmode)
 890         fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 891
 892       last = get_last_insn ();
 893       for (i = 0; i < nwords; i++)
 894         {
 895           /* If I is 0, use the low-order word in both field and target;
 896              if I is 1, use the next to lowest word; and so on.  */
 897           unsigned int wordnum = (backwards
 898                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 899                                   - i - 1
 900                                   : i);
 901           unsigned int bit_offset = (backwards ^ reverse
 902                                      ? MAX ((int) bitsize - ((int) i + 1)
 903                                             * BITS_PER_WORD,
 904                                             0)
 905                                      : (int) i * BITS_PER_WORD);
 906           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 907           unsigned HOST_WIDE_INT new_bitsize =
 908             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 909
 910           /* If the remaining chunk doesn't have full wordsize we have
 911              to make sure that for big-endian machines the higher order
 912              bits are used.  */
 913           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 914             {
 915               int shift = BITS_PER_WORD - new_bitsize;
 916               rtx shift_rtx = gen_int_shift_amount (word_mode, shift);
 917               value_word = simplify_expand_binop (word_mode, lshr_optab,
 918                                                   value_word, shift_rtx,
 919                                                   NULL_RTX, true,
 920                                                   OPTAB_LIB_WIDEN);
 921             }
 922
 923           if (!store_bit_field_1 (op0, new_bitsize,
 924                                   bitnum + bit_offset,
 925                                   bitregion_start, bitregion_end,
 926                                   word_mode,
 927                                   value_word, reverse, fallback_p))
 928             {
 929               delete_insns_since (last);
 930               return false;
 931             }
 932         }
 933       return true;
 934     }
 935
 936   /* If VALUE has a floating-point or complex mode, access it as an
 937      integer of the corresponding size.  This can occur on a machine
 938      with 64 bit registers that uses SFmode for float.  It can also
 939      occur for unaligned float or complex fields.  */
 940   orig_value = value;
 941   scalar_int_mode value_mode;
 942   if (GET_MODE (value) == VOIDmode)
 943     /* By this point we've dealt with values that are bigger than a word,
 944        so word_mode is a conservatively correct choice.  */
 945     value_mode = word_mode;
 946   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
 947     {
 948       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
 949       value = gen_reg_rtx (value_mode);
 950       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 951     }
 952
 953   /* If OP0 is a multi-word register, narrow it to the affected word.
 954      If the region spans two words, defer to store_split_bit_field.
 955      Don't do this if op0 is a single hard register wider than word
 956      such as a float or vector register.  */
 957   if (!MEM_P (op0)
 958       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
 959       && (!REG_P (op0)
 960           || !HARD_REGISTER_P (op0)
 961           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
 962     {
 963       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 964         {
 965           if (!fallback_p)
 966             return false;
 967
 968           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
 969                                  bitregion_start, bitregion_end,
 970                                  value, value_mode, reverse);
 971           return true;
 972         }
 973       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
 974                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 975       gcc_assert (op0);
 976       op0_mode = word_mode;
 977       bitnum %= BITS_PER_WORD;
 978     }
 979
 980   /* From here on we can assume that the field to be stored in fits
 981      within a word.  If the destination is a register, it too fits
 982      in a word.  */
 983
 984   extraction_insn insv;
 985   if (!MEM_P (op0)
 986       && !reverse
 987       && get_best_reg_extraction_insn (&insv, EP_insv,
 988                                        GET_MODE_BITSIZE (op0_mode.require ()),
 989                                        fieldmode)
 990       && store_bit_field_using_insv (&insv, op0, op0_mode,
 991                                      bitsize, bitnum, value, value_mode))
 992     return true;
 993
 994   /* If OP0 is a memory, try copying it to a register and seeing if a
 995      cheap register alternative is available.  */
 996   if (MEM_P (op0) && !reverse)
 997     {
 998       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 999                                         fieldmode)
1000           && store_bit_field_using_insv (&insv, op0, op0_mode,
1001                                          bitsize, bitnum, value, value_mode))
1002         return true;
1003
1004       rtx_insn *last = get_last_insn ();
1005
1006       /* Try loading part of OP0 into a register, inserting the bitfield
1007          into that, and then copying the result back to OP0.  */
1008       unsigned HOST_WIDE_INT bitpos;
1009       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1010                                                bitregion_start, bitregion_end,
1011                                                fieldmode, &bitpos);
1012       if (xop0)
1013         {
1014           rtx tempreg = copy_to_reg (xop0);
1015           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1016                                  bitregion_start, bitregion_end,
1017                                  fieldmode, orig_value, reverse, false))
1018             {
1019               emit_move_insn (xop0, tempreg);
1020               return true;
1021             }
1022           delete_insns_since (last);
1023         }
1024     }
1025
1026   if (!fallback_p)
1027     return false;
1028
1029   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1030                          bitregion_end, value, value_mode, reverse);
1031   return true;
1032 }
1033
1034 /* Generate code to store value from rtx VALUE
1035    into a bit-field within structure STR_RTX
1036    containing BITSIZE bits starting at bit BITNUM.
1037
1038    BITREGION_START is bitpos of the first bitfield in this region.
1039    BITREGION_END is the bitpos of the ending bitfield in this region.
1040    These two fields are 0, if the C++ memory model does not apply,
1041    or we are not interested in keeping track of bitfield regions.
1042
1043    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1044
1045    If REVERSE is true, the store is to be done in reverse order.  */
1046
1047 void
1048 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1049                  unsigned HOST_WIDE_INT bitnum,
1050                  unsigned HOST_WIDE_INT bitregion_start,
1051                  unsigned HOST_WIDE_INT bitregion_end,
1052                  machine_mode fieldmode,
1053                  rtx value, bool reverse)
1054 {
1055   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1056   scalar_int_mode int_mode;
1057   if (is_a <scalar_int_mode> (fieldmode, &int_mode)
1058       && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode,
1059                                      bitregion_start, bitregion_end))
1060     {
1061       /* Storing of a full word can be done with a simple store.
1062          We know here that the field can be accessed with one single
1063          instruction.  For targets that support unaligned memory,
1064          an unaligned access may be necessary.  */
1065       if (bitsize == GET_MODE_BITSIZE (int_mode))
1066         {
1067           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1068                                              bitnum / BITS_PER_UNIT);
1069           if (reverse)
1070             value = flip_storage_order (int_mode, value);
1071           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1072           emit_move_insn (str_rtx, value);
1073         }
1074       else
1075         {
1076           rtx temp;
1077
1078           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1079                                           &bitnum);
1080           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1081           temp = copy_to_reg (str_rtx);
1082           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1083                                   int_mode, value, reverse, true))
1084             gcc_unreachable ();
1085
1086           emit_move_insn (str_rtx, temp);
1087         }
1088
1089       return;
1090     }
1091
1092   /* Under the C++0x memory model, we must not touch bits outside the
1093      bit region.  Adjust the address to start at the beginning of the
1094      bit region.  */
1095   if (MEM_P (str_rtx) && bitregion_start > 0)
1096     {
1097       scalar_int_mode best_mode;
1098       machine_mode addr_mode = VOIDmode;
1099       HOST_WIDE_INT offset, size;
1100
1101       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1102
1103       offset = bitregion_start / BITS_PER_UNIT;
1104       bitnum -= bitregion_start;
1105       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106       bitregion_end -= bitregion_start;
1107       bitregion_start = 0;
1108       if (get_best_mode (bitsize, bitnum,
1109                          bitregion_start, bitregion_end,
1110                          MEM_ALIGN (str_rtx), INT_MAX,
1111                          MEM_VOLATILE_P (str_rtx), &best_mode))
1112         addr_mode = best_mode;
1113       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1114                                               offset, size);
1115     }
1116
1117   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1118                           bitregion_start, bitregion_end,
1119                           fieldmode, value, reverse, true))
1120     gcc_unreachable ();
1121 }
1122 \f
1123 /* Use shifts and boolean operations to store VALUE into a bit field of
1124    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1125    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1126    the mode of VALUE.
1127
1128    If REVERSE is true, the store is to be done in reverse order.  */
1129
1130 static void
1131 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1132                        unsigned HOST_WIDE_INT bitsize,
1133                        unsigned HOST_WIDE_INT bitnum,
1134                        unsigned HOST_WIDE_INT bitregion_start,
1135                        unsigned HOST_WIDE_INT bitregion_end,
1136                        rtx value, scalar_int_mode value_mode, bool reverse)
1137 {
1138   /* There is a case not handled here:
1139      a structure with a known alignment of just a halfword
1140      and a field split across two aligned halfwords within the structure.
1141      Or likewise a structure with a known alignment of just a byte
1142      and a field split across two bytes.
1143      Such cases are not supposed to be able to occur.  */
1144
1145   scalar_int_mode best_mode;
1146   if (MEM_P (op0))
1147     {
1148       unsigned int max_bitsize = BITS_PER_WORD;
1149       scalar_int_mode imode;
1150       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1151         max_bitsize = GET_MODE_BITSIZE (imode);
1152
1153       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1154                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1155                           &best_mode))
1156         {
1157           /* The only way this should occur is if the field spans word
1158              boundaries.  */
1159           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1160                                  bitregion_start, bitregion_end,
1161                                  value, value_mode, reverse);
1162           return;
1163         }
1164
1165       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1166     }
1167   else
1168     best_mode = op0_mode.require ();
1169
1170   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1171                            value, value_mode, reverse);
1172 }
1173
1174 /* Helper function for store_fixed_bit_field, stores
1175    the bit field always using MODE, which is the mode of OP0.  The other
1176    arguments are as for store_fixed_bit_field.  */
1177
1178 static void
1179 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1180                          unsigned HOST_WIDE_INT bitsize,
1181                          unsigned HOST_WIDE_INT bitnum,
1182                          rtx value, scalar_int_mode value_mode, bool reverse)
1183 {
1184   rtx temp;
1185   int all_zero = 0;
1186   int all_one = 0;
1187
1188   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1189      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1190
1191   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1192     /* BITNUM is the distance between our msb
1193        and that of the containing datum.
1194        Convert it to the distance from the lsb.  */
1195     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1196
1197   /* Now BITNUM is always the distance between our lsb
1198      and that of OP0.  */
1199
1200   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1201      we must first convert its mode to MODE.  */
1202
1203   if (CONST_INT_P (value))
1204     {
1205       unsigned HOST_WIDE_INT v = UINTVAL (value);
1206
1207       if (bitsize < HOST_BITS_PER_WIDE_INT)
1208         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1209
1210       if (v == 0)
1211         all_zero = 1;
1212       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1213                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1214                || (bitsize == HOST_BITS_PER_WIDE_INT
1215                    && v == HOST_WIDE_INT_M1U))
1216         all_one = 1;
1217
1218       value = lshift_value (mode, v, bitnum);
1219     }
1220   else
1221     {
1222       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1223                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1224
1225       if (value_mode != mode)
1226         value = convert_to_mode (mode, value, 1);
1227
1228       if (must_and)
1229         value = expand_binop (mode, and_optab, value,
1230                               mask_rtx (mode, 0, bitsize, 0),
1231                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1232       if (bitnum > 0)
1233         value = expand_shift (LSHIFT_EXPR, mode, value,
1234                               bitnum, NULL_RTX, 1);
1235     }
1236
1237   if (reverse)
1238     value = flip_storage_order (mode, value);
1239
1240   /* Now clear the chosen bits in OP0,
1241      except that if VALUE is -1 we need not bother.  */
1242   /* We keep the intermediates in registers to allow CSE to combine
1243      consecutive bitfield assignments.  */
1244
1245   temp = force_reg (mode, op0);
1246
1247   if (! all_one)
1248     {
1249       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1250       if (reverse)
1251         mask = flip_storage_order (mode, mask);
1252       temp = expand_binop (mode, and_optab, temp, mask,
1253                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1254       temp = force_reg (mode, temp);
1255     }
1256
1257   /* Now logical-or VALUE into OP0, unless it is zero.  */
1258
1259   if (! all_zero)
1260     {
1261       temp = expand_binop (mode, ior_optab, temp, value,
1262                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1263       temp = force_reg (mode, temp);
1264     }
1265
1266   if (op0 != temp)
1267     {
1268       op0 = copy_rtx (op0);
1269       emit_move_insn (op0, temp);
1270     }
1271 }
1272 \f
1273 /* Store a bit field that is split across multiple accessible memory objects.
1274
1275    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1276    BITSIZE is the field width; BITPOS the position of its first bit
1277    (within the word).
1278    VALUE is the value to store, which has mode VALUE_MODE.
1279    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1280    a BLKmode MEM.
1281
1282    If REVERSE is true, the store is to be done in reverse order.
1283
1284    This does not yet handle fields wider than BITS_PER_WORD.  */
1285
1286 static void
1287 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1288                        unsigned HOST_WIDE_INT bitsize,
1289                        unsigned HOST_WIDE_INT bitpos,
1290                        unsigned HOST_WIDE_INT bitregion_start,
1291                        unsigned HOST_WIDE_INT bitregion_end,
1292                        rtx value, scalar_int_mode value_mode, bool reverse)
1293 {
1294   unsigned int unit, total_bits, bitsdone = 0;
1295
1296   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1297      much at a time.  */
1298   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1299     unit = BITS_PER_WORD;
1300   else
1301     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1302
1303   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1304      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1305      again, and we will mutually recurse forever.  */
1306   if (MEM_P (op0) && op0_mode.exists ())
1307     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1308
1309   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1310      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1311      that VALUE might be a floating-point constant.  */
1312   if (CONSTANT_P (value) && !CONST_INT_P (value))
1313     {
1314       rtx word = gen_lowpart_common (word_mode, value);
1315
1316       if (word && (value != word))
1317         value = word;
1318       else
1319         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1320       value_mode = word_mode;
1321     }
1322
1323   total_bits = GET_MODE_BITSIZE (value_mode);
1324
1325   while (bitsdone < bitsize)
1326     {
1327       unsigned HOST_WIDE_INT thissize;
1328       unsigned HOST_WIDE_INT thispos;
1329       unsigned HOST_WIDE_INT offset;
1330       rtx part;
1331
1332       offset = (bitpos + bitsdone) / unit;
1333       thispos = (bitpos + bitsdone) % unit;
1334
1335       /* When region of bytes we can touch is restricted, decrease
1336          UNIT close to the end of the region as needed.  If op0 is a REG
1337          or SUBREG of REG, don't do this, as there can't be data races
1338          on a register and we can expand shorter code in some cases.  */
1339       if (bitregion_end
1340           && unit > BITS_PER_UNIT
1341           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1342           && !REG_P (op0)
1343           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1344         {
1345           unit = unit / 2;
1346           continue;
1347         }
1348
1349       /* THISSIZE must not overrun a word boundary.  Otherwise,
1350          store_fixed_bit_field will call us again, and we will mutually
1351          recurse forever.  */
1352       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1353       thissize = MIN (thissize, unit - thispos);
1354
1355       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1356         {
1357           /* Fetch successively less significant portions.  */
1358           if (CONST_INT_P (value))
1359             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1360                              >> (bitsize - bitsdone - thissize))
1361                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1362           /* Likewise, but the source is little-endian.  */
1363           else if (reverse)
1364             part = extract_fixed_bit_field (word_mode, value, value_mode,
1365                                             thissize,
1366                                             bitsize - bitsdone - thissize,
1367                                             NULL_RTX, 1, false);
1368           else
1369             /* The args are chosen so that the last part includes the
1370                lsb.  Give extract_bit_field the value it needs (with
1371                endianness compensation) to fetch the piece we want.  */
1372             part = extract_fixed_bit_field (word_mode, value, value_mode,
1373                                             thissize,
1374                                             total_bits - bitsize + bitsdone,
1375                                             NULL_RTX, 1, false);
1376         }
1377       else
1378         {
1379           /* Fetch successively more significant portions.  */
1380           if (CONST_INT_P (value))
1381             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1382                              >> bitsdone)
1383                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1384           /* Likewise, but the source is big-endian.  */
1385           else if (reverse)
1386             part = extract_fixed_bit_field (word_mode, value, value_mode,
1387                                             thissize,
1388                                             total_bits - bitsdone - thissize,
1389                                             NULL_RTX, 1, false);
1390           else
1391             part = extract_fixed_bit_field (word_mode, value, value_mode,
1392                                             thissize, bitsdone, NULL_RTX,
1393                                             1, false);
1394         }
1395
1396       /* If OP0 is a register, then handle OFFSET here.  */
1397       rtx op0_piece = op0;
1398       opt_scalar_int_mode op0_piece_mode = op0_mode;
1399       if (SUBREG_P (op0) || REG_P (op0))
1400         {
1401           scalar_int_mode imode;
1402           if (op0_mode.exists (&imode)
1403               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1404             {
1405               if (offset)
1406                 op0_piece = const0_rtx;
1407             }
1408           else
1409             {
1410               op0_piece = operand_subword_force (op0,
1411                                                  offset * unit / BITS_PER_WORD,
1412                                                  GET_MODE (op0));
1413               op0_piece_mode = word_mode;
1414             }
1415           offset &= BITS_PER_WORD / unit - 1;
1416         }
1417
1418       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1419          it is just an out-of-bounds access.  Ignore it.  */
1420       if (op0_piece != const0_rtx)
1421         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1422                                offset * unit + thispos, bitregion_start,
1423                                bitregion_end, part, word_mode, reverse);
1424       bitsdone += thissize;
1425     }
1426 }
1427 \f
1428 /* A subroutine of extract_bit_field_1 that converts return value X
1429    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1430    to extract_bit_field.  */
1431
1432 static rtx
1433 convert_extracted_bit_field (rtx x, machine_mode mode,
1434                              machine_mode tmode, bool unsignedp)
1435 {
1436   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1437     return x;
1438
1439   /* If the x mode is not a scalar integral, first convert to the
1440      integer mode of that size and then access it as a floating-point
1441      value via a SUBREG.  */
1442   if (!SCALAR_INT_MODE_P (tmode))
1443     {
1444       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1445       x = convert_to_mode (int_mode, x, unsignedp);
1446       x = force_reg (int_mode, x);
1447       return gen_lowpart (tmode, x);
1448     }
1449
1450   return convert_to_mode (tmode, x, unsignedp);
1451 }
1452
1453 /* Try to use an ext(z)v pattern to extract a field from OP0.
1454    Return the extracted value on success, otherwise return null.
1455    EXTV describes the extraction instruction to use.  If OP0_MODE
1456    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1457    The other arguments are as for extract_bit_field.  */
1458
1459 static rtx
1460 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1461                               opt_scalar_int_mode op0_mode,
1462                               unsigned HOST_WIDE_INT bitsize,
1463                               unsigned HOST_WIDE_INT bitnum,
1464                               int unsignedp, rtx target,
1465                               machine_mode mode, machine_mode tmode)
1466 {
1467   struct expand_operand ops[4];
1468   rtx spec_target = target;
1469   rtx spec_target_subreg = 0;
1470   scalar_int_mode ext_mode = extv->field_mode;
1471   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1472
1473   if (bitsize == 0 || unit < bitsize)
1474     return NULL_RTX;
1475
1476   if (MEM_P (op0))
1477     /* Get a reference to the first byte of the field.  */
1478     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1479                                 &bitnum);
1480   else
1481     {
1482       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1483       if (BYTES_BIG_ENDIAN)
1484         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1485
1486       /* If op0 is a register, we need it in EXT_MODE to make it
1487          acceptable to the format of ext(z)v.  */
1488       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1489         return NULL_RTX;
1490       if (REG_P (op0) && op0_mode.require () != ext_mode)
1491         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1492     }
1493
1494   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1495      "backwards" from the size of the unit we are extracting from.
1496      Otherwise, we count bits from the most significant on a
1497      BYTES/BITS_BIG_ENDIAN machine.  */
1498
1499   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1500     bitnum = unit - bitsize - bitnum;
1501
1502   if (target == 0)
1503     target = spec_target = gen_reg_rtx (tmode);
1504
1505   if (GET_MODE (target) != ext_mode)
1506     {
1507       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1508          between the mode of the extraction (word_mode) and the target
1509          mode.  Instead, create a temporary and use convert_move to set
1510          the target.  */
1511       if (REG_P (target)
1512           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1513         {
1514           target = gen_lowpart (ext_mode, target);
1515           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1516             spec_target_subreg = target;
1517         }
1518       else
1519         target = gen_reg_rtx (ext_mode);
1520     }
1521
1522   create_output_operand (&ops[0], target, ext_mode);
1523   create_fixed_operand (&ops[1], op0);
1524   create_integer_operand (&ops[2], bitsize);
1525   create_integer_operand (&ops[3], bitnum);
1526   if (maybe_expand_insn (extv->icode, 4, ops))
1527     {
1528       target = ops[0].value;
1529       if (target == spec_target)
1530         return target;
1531       if (target == spec_target_subreg)
1532         return spec_target;
1533       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1534     }
1535   return NULL_RTX;
1536 }
1537
1538 /* A subroutine of extract_bit_field, with the same arguments.
1539    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1540    if we can find no other means of implementing the operation.
1541    if FALLBACK_P is false, return NULL instead.  */
1542
1543 static rtx
1544 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1545                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1546                      machine_mode mode, machine_mode tmode,
1547                      bool reverse, bool fallback_p, rtx *alt_rtl)
1548 {
1549   rtx op0 = str_rtx;
1550   machine_mode mode1;
1551
1552   if (tmode == VOIDmode)
1553     tmode = mode;
1554
1555   while (GET_CODE (op0) == SUBREG)
1556     {
1557       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1558       op0 = SUBREG_REG (op0);
1559     }
1560
1561   /* If we have an out-of-bounds access to a register, just return an
1562      uninitialized register of the required mode.  This can occur if the
1563      source code contains an out-of-bounds access to a small array.  */
1564   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1565     return gen_reg_rtx (tmode);
1566
1567   if (REG_P (op0)
1568       && mode == GET_MODE (op0)
1569       && bitnum == 0
1570       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1571     {
1572       if (reverse)
1573         op0 = flip_storage_order (mode, op0);
1574       /* We're trying to extract a full register from itself.  */
1575       return op0;
1576     }
1577
1578   /* First try to check for vector from vector extractions.  */
1579   if (VECTOR_MODE_P (GET_MODE (op0))
1580       && !MEM_P (op0)
1581       && VECTOR_MODE_P (tmode)
1582       && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1583     {
1584       machine_mode new_mode = GET_MODE (op0);
1585       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1586         {
1587           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1588           unsigned int nunits = (GET_MODE_BITSIZE (GET_MODE (op0))
1589                                  / GET_MODE_UNIT_BITSIZE (tmode));
1590           if (!mode_for_vector (inner_mode, nunits).exists (&new_mode)
1591               || !VECTOR_MODE_P (new_mode)
1592               || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1593               || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1594               || !targetm.vector_mode_supported_p (new_mode))
1595             new_mode = VOIDmode;
1596         }
1597       if (new_mode != VOIDmode
1598           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1599               != CODE_FOR_nothing)
1600           && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1601               == bitnum / GET_MODE_BITSIZE (tmode)))
1602         {
1603           struct expand_operand ops[3];
1604           machine_mode outermode = new_mode;
1605           machine_mode innermode = tmode;
1606           enum insn_code icode
1607             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1608           unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1609
1610           if (new_mode != GET_MODE (op0))
1611             op0 = gen_lowpart (new_mode, op0);
1612           create_output_operand (&ops[0], target, innermode);
1613           ops[0].target = 1;
1614           create_input_operand (&ops[1], op0, outermode);
1615           create_integer_operand (&ops[2], pos);
1616           if (maybe_expand_insn (icode, 3, ops))
1617             {
1618               if (alt_rtl && ops[0].target)
1619                 *alt_rtl = target;
1620               target = ops[0].value;
1621               if (GET_MODE (target) != mode)
1622                 return gen_lowpart (tmode, target);
1623               return target;
1624             }
1625         }
1626     }
1627
1628   /* See if we can get a better vector mode before extracting.  */
1629   if (VECTOR_MODE_P (GET_MODE (op0))
1630       && !MEM_P (op0)
1631       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1632     {
1633       machine_mode new_mode;
1634
1635       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1636         new_mode = MIN_MODE_VECTOR_FLOAT;
1637       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1638         new_mode = MIN_MODE_VECTOR_FRACT;
1639       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1640         new_mode = MIN_MODE_VECTOR_UFRACT;
1641       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1642         new_mode = MIN_MODE_VECTOR_ACCUM;
1643       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1644         new_mode = MIN_MODE_VECTOR_UACCUM;
1645       else
1646         new_mode = MIN_MODE_VECTOR_INT;
1647
1648       FOR_EACH_MODE_FROM (new_mode, new_mode)
1649         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1650             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1651             && targetm.vector_mode_supported_p (new_mode))
1652           break;
1653       if (new_mode != VOIDmode)
1654         op0 = gen_lowpart (new_mode, op0);
1655     }
1656
1657   /* Use vec_extract patterns for extracting parts of vectors whenever
1658      available.  */
1659   machine_mode outermode = GET_MODE (op0);
1660   scalar_mode innermode = GET_MODE_INNER (outermode);
1661   if (VECTOR_MODE_P (outermode)
1662       && !MEM_P (op0)
1663       && (convert_optab_handler (vec_extract_optab, outermode, innermode)
1664           != CODE_FOR_nothing)
1665       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (innermode)
1666           == bitnum / GET_MODE_BITSIZE (innermode)))
1667     {
1668       struct expand_operand ops[3];
1669       enum insn_code icode
1670         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1671       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1672
1673       create_output_operand (&ops[0], target, innermode);
1674       ops[0].target = 1;
1675       create_input_operand (&ops[1], op0, outermode);
1676       create_integer_operand (&ops[2], pos);
1677       if (maybe_expand_insn (icode, 3, ops))
1678         {
1679           if (alt_rtl && ops[0].target)
1680             *alt_rtl = target;
1681           target = ops[0].value;
1682           if (GET_MODE (target) != mode)
1683             return gen_lowpart (tmode, target);
1684           return target;
1685         }
1686     }
1687
1688   /* Make sure we are playing with integral modes.  Pun with subregs
1689      if we aren't.  */
1690   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1691   scalar_int_mode imode;
1692   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1693     {
1694       if (MEM_P (op0))
1695         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1696                                             0, MEM_SIZE (op0));
1697       else if (op0_mode.exists (&imode))
1698         {
1699           op0 = gen_lowpart (imode, op0);
1700
1701           /* If we got a SUBREG, force it into a register since we
1702              aren't going to be able to do another SUBREG on it.  */
1703           if (GET_CODE (op0) == SUBREG)
1704             op0 = force_reg (imode, op0);
1705         }
1706       else
1707         {
1708           HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1709           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1710           emit_move_insn (mem, op0);
1711           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1712         }
1713     }
1714
1715   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1716      If that's wrong, the solution is to test for it and set TARGET to 0
1717      if needed.  */
1718
1719   /* Get the mode of the field to use for atomic access or subreg
1720      conversion.  */
1721   if (!SCALAR_INT_MODE_P (tmode)
1722       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1723     mode1 = mode;
1724   gcc_assert (mode1 != BLKmode);
1725
1726   /* Extraction of a full MODE1 value can be done with a subreg as long
1727      as the least significant bit of the value is the least significant
1728      bit of either OP0 or a word of OP0.  */
1729   if (!MEM_P (op0)
1730       && !reverse
1731       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
1732       && bitsize == GET_MODE_BITSIZE (mode1)
1733       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, op0_mode.require ()))
1734     {
1735       rtx sub = simplify_gen_subreg (mode1, op0, op0_mode.require (),
1736                                      bitnum / BITS_PER_UNIT);
1737       if (sub)
1738         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1739     }
1740
1741   /* Extraction of a full MODE1 value can be done with a load as long as
1742      the field is on a byte boundary and is sufficiently aligned.  */
1743   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1744     {
1745       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1746       if (reverse)
1747         op0 = flip_storage_order (mode1, op0);
1748       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1749     }
1750
1751   /* Handle fields bigger than a word.  */
1752
1753   if (bitsize > BITS_PER_WORD)
1754     {
1755       /* Here we transfer the words of the field
1756          in the order least significant first.
1757          This is because the most significant word is the one which may
1758          be less than full.  */
1759
1760       const bool backwards = WORDS_BIG_ENDIAN;
1761       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1762       unsigned int i;
1763       rtx_insn *last;
1764
1765       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1766         target = gen_reg_rtx (mode);
1767
1768       /* In case we're about to clobber a base register or something
1769          (see gcc.c-torture/execute/20040625-1.c).   */
1770       if (reg_mentioned_p (target, str_rtx))
1771         target = gen_reg_rtx (mode);
1772
1773       /* Indicate for flow that the entire target reg is being set.  */
1774       emit_clobber (target);
1775
1776       last = get_last_insn ();
1777       for (i = 0; i < nwords; i++)
1778         {
1779           /* If I is 0, use the low-order word in both field and target;
1780              if I is 1, use the next to lowest word; and so on.  */
1781           /* Word number in TARGET to use.  */
1782           unsigned int wordnum
1783             = (backwards
1784                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1785                : i);
1786           /* Offset from start of field in OP0.  */
1787           unsigned int bit_offset = (backwards ^ reverse
1788                                      ? MAX ((int) bitsize - ((int) i + 1)
1789                                             * BITS_PER_WORD,
1790                                             0)
1791                                      : (int) i * BITS_PER_WORD);
1792           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1793           rtx result_part
1794             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1795                                              bitsize - i * BITS_PER_WORD),
1796                                    bitnum + bit_offset, 1, target_part,
1797                                    mode, word_mode, reverse, fallback_p, NULL);
1798
1799           gcc_assert (target_part);
1800           if (!result_part)
1801             {
1802               delete_insns_since (last);
1803               return NULL;
1804             }
1805
1806           if (result_part != target_part)
1807             emit_move_insn (target_part, result_part);
1808         }
1809
1810       if (unsignedp)
1811         {
1812           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1813              need to be zero'd out.  */
1814           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1815             {
1816               unsigned int i, total_words;
1817
1818               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1819               for (i = nwords; i < total_words; i++)
1820                 emit_move_insn
1821                   (operand_subword (target,
1822                                     backwards ? total_words - i - 1 : i,
1823                                     1, VOIDmode),
1824                    const0_rtx);
1825             }
1826           return target;
1827         }
1828
1829       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1830       target = expand_shift (LSHIFT_EXPR, mode, target,
1831                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1832       return expand_shift (RSHIFT_EXPR, mode, target,
1833                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1834     }
1835
1836   /* If OP0 is a multi-word register, narrow it to the affected word.
1837      If the region spans two words, defer to extract_split_bit_field.  */
1838   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1839     {
1840       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1841         {
1842           if (!fallback_p)
1843             return NULL_RTX;
1844           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1845                                             unsignedp, reverse);
1846           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1847         }
1848       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1849                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1850       op0_mode = word_mode;
1851       bitnum %= BITS_PER_WORD;
1852     }
1853
1854   /* From here on we know the desired field is smaller than a word.
1855      If OP0 is a register, it too fits within a word.  */
1856   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1857   extraction_insn extv;
1858   if (!MEM_P (op0)
1859       && !reverse
1860       /* ??? We could limit the structure size to the part of OP0 that
1861          contains the field, with appropriate checks for endianness
1862          and TARGET_TRULY_NOOP_TRUNCATION.  */
1863       && get_best_reg_extraction_insn (&extv, pattern,
1864                                        GET_MODE_BITSIZE (op0_mode.require ()),
1865                                        tmode))
1866     {
1867       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1868                                                  bitsize, bitnum,
1869                                                  unsignedp, target, mode,
1870                                                  tmode);
1871       if (result)
1872         return result;
1873     }
1874
1875   /* If OP0 is a memory, try copying it to a register and seeing if a
1876      cheap register alternative is available.  */
1877   if (MEM_P (op0) & !reverse)
1878     {
1879       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1880                                         tmode))
1881         {
1882           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1883                                                      bitsize, bitnum,
1884                                                      unsignedp, target, mode,
1885                                                      tmode);
1886           if (result)
1887             return result;
1888         }
1889
1890       rtx_insn *last = get_last_insn ();
1891
1892       /* Try loading part of OP0 into a register and extracting the
1893          bitfield from that.  */
1894       unsigned HOST_WIDE_INT bitpos;
1895       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1896                                                0, 0, tmode, &bitpos);
1897       if (xop0)
1898         {
1899           xop0 = copy_to_reg (xop0);
1900           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1901                                             unsignedp, target,
1902                                             mode, tmode, reverse, false, NULL);
1903           if (result)
1904             return result;
1905           delete_insns_since (last);
1906         }
1907     }
1908
1909   if (!fallback_p)
1910     return NULL;
1911
1912   /* Find a correspondingly-sized integer field, so we can apply
1913      shifts and masks to it.  */
1914   scalar_int_mode int_mode;
1915   if (!int_mode_for_mode (tmode).exists (&int_mode))
1916     /* If this fails, we should probably push op0 out to memory and then
1917        do a load.  */
1918     int_mode = int_mode_for_mode (mode).require ();
1919
1920   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
1921                                     bitnum, target, unsignedp, reverse);
1922
1923   /* Complex values must be reversed piecewise, so we need to undo the global
1924      reversal, convert to the complex mode and reverse again.  */
1925   if (reverse && COMPLEX_MODE_P (tmode))
1926     {
1927       target = flip_storage_order (int_mode, target);
1928       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1929       target = flip_storage_order (tmode, target);
1930     }
1931   else
1932     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1933
1934   return target;
1935 }
1936
1937 /* Generate code to extract a byte-field from STR_RTX
1938    containing BITSIZE bits, starting at BITNUM,
1939    and put it in TARGET if possible (if TARGET is nonzero).
1940    Regardless of TARGET, we return the rtx for where the value is placed.
1941
1942    STR_RTX is the structure containing the byte (a REG or MEM).
1943    UNSIGNEDP is nonzero if this is an unsigned bit field.
1944    MODE is the natural mode of the field value once extracted.
1945    TMODE is the mode the caller would like the value to have;
1946    but the value may be returned with type MODE instead.
1947
1948    If REVERSE is true, the extraction is to be done in reverse order.
1949
1950    If a TARGET is specified and we can store in it at no extra cost,
1951    we do so, and return TARGET.
1952    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1953    if they are equally easy.  */
1954
1955 rtx
1956 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1957                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1958                    machine_mode mode, machine_mode tmode, bool reverse,
1959                    rtx *alt_rtl)
1960 {
1961   machine_mode mode1;
1962
1963   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1964   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1965     mode1 = GET_MODE (str_rtx);
1966   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1967     mode1 = GET_MODE (target);
1968   else
1969     mode1 = tmode;
1970
1971   scalar_int_mode int_mode;
1972   if (is_a <scalar_int_mode> (mode1, &int_mode)
1973       && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, 0, 0))
1974     {
1975       /* Extraction of a full INT_MODE value can be done with a simple load.
1976          We know here that the field can be accessed with one single
1977          instruction.  For targets that support unaligned memory,
1978          an unaligned access may be necessary.  */
1979       if (bitsize == GET_MODE_BITSIZE (int_mode))
1980         {
1981           rtx result = adjust_bitfield_address (str_rtx, int_mode,
1982                                                 bitnum / BITS_PER_UNIT);
1983           if (reverse)
1984             result = flip_storage_order (int_mode, result);
1985           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1986           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1987         }
1988
1989       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1990                                       &bitnum);
1991       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1992       str_rtx = copy_to_reg (str_rtx);
1993     }
1994
1995   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1996                               target, mode, tmode, reverse, true, alt_rtl);
1997 }
1998 \f
1999 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2000    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2001    otherwise OP0 is a BLKmode MEM.
2002
2003    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2004    If REVERSE is true, the extraction is to be done in reverse order.
2005
2006    If TARGET is nonzero, attempts to store the value there
2007    and return TARGET, but this is not guaranteed.
2008    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2009
2010 static rtx
2011 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2012                          opt_scalar_int_mode op0_mode,
2013                          unsigned HOST_WIDE_INT bitsize,
2014                          unsigned HOST_WIDE_INT bitnum, rtx target,
2015                          int unsignedp, bool reverse)
2016 {
2017   scalar_int_mode mode;
2018   if (MEM_P (op0))
2019     {
2020       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2021                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2022         /* The only way this should occur is if the field spans word
2023            boundaries.  */
2024         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2025                                         unsignedp, reverse);
2026
2027       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2028     }
2029   else
2030     mode = op0_mode.require ();
2031
2032   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2033                                     target, unsignedp, reverse);
2034 }
2035
2036 /* Helper function for extract_fixed_bit_field, extracts
2037    the bit field always using MODE, which is the mode of OP0.
2038    The other arguments are as for extract_fixed_bit_field.  */
2039
2040 static rtx
2041 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2042                            unsigned HOST_WIDE_INT bitsize,
2043                            unsigned HOST_WIDE_INT bitnum, rtx target,
2044                            int unsignedp, bool reverse)
2045 {
2046   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2047      for invalid input, such as extract equivalent of f5 from
2048      gcc.dg/pr48335-2.c.  */
2049
2050   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2051     /* BITNUM is the distance between our msb and that of OP0.
2052        Convert it to the distance from the lsb.  */
2053     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2054
2055   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2056      We have reduced the big-endian case to the little-endian case.  */
2057   if (reverse)
2058     op0 = flip_storage_order (mode, op0);
2059
2060   if (unsignedp)
2061     {
2062       if (bitnum)
2063         {
2064           /* If the field does not already start at the lsb,
2065              shift it so it does.  */
2066           /* Maybe propagate the target for the shift.  */
2067           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2068           if (tmode != mode)
2069             subtarget = 0;
2070           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2071         }
2072       /* Convert the value to the desired mode.  TMODE must also be a
2073          scalar integer for this conversion to make sense, since we
2074          shouldn't reinterpret the bits.  */
2075       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2076       if (mode != new_mode)
2077         op0 = convert_to_mode (new_mode, op0, 1);
2078
2079       /* Unless the msb of the field used to be the msb when we shifted,
2080          mask out the upper bits.  */
2081
2082       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2083         return expand_binop (new_mode, and_optab, op0,
2084                              mask_rtx (new_mode, 0, bitsize, 0),
2085                              target, 1, OPTAB_LIB_WIDEN);
2086       return op0;
2087     }
2088
2089   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2090      then arithmetic-shift its lsb to the lsb of the word.  */
2091   op0 = force_reg (mode, op0);
2092
2093   /* Find the narrowest integer mode that contains the field.  */
2094
2095   opt_scalar_int_mode mode_iter;
2096   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2097     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2098       break;
2099
2100   mode = mode_iter.require ();
2101   op0 = convert_to_mode (mode, op0, 0);
2102
2103   if (mode != tmode)
2104     target = 0;
2105
2106   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2107     {
2108       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2109       /* Maybe propagate the target for the shift.  */
2110       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2111       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2112     }
2113
2114   return expand_shift (RSHIFT_EXPR, mode, op0,
2115                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2116 }
2117
2118 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2119    VALUE << BITPOS.  */
2120
2121 static rtx
2122 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2123               int bitpos)
2124 {
2125   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2126 }
2127 \f
2128 /* Extract a bit field that is split across two words
2129    and return an RTX for the result.
2130
2131    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2132    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2133    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2134    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2135    a BLKmode MEM.
2136
2137    If REVERSE is true, the extraction is to be done in reverse order.  */
2138
2139 static rtx
2140 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2141                          unsigned HOST_WIDE_INT bitsize,
2142                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2143                          bool reverse)
2144 {
2145   unsigned int unit;
2146   unsigned int bitsdone = 0;
2147   rtx result = NULL_RTX;
2148   int first = 1;
2149
2150   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2151      much at a time.  */
2152   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2153     unit = BITS_PER_WORD;
2154   else
2155     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2156
2157   while (bitsdone < bitsize)
2158     {
2159       unsigned HOST_WIDE_INT thissize;
2160       rtx part;
2161       unsigned HOST_WIDE_INT thispos;
2162       unsigned HOST_WIDE_INT offset;
2163
2164       offset = (bitpos + bitsdone) / unit;
2165       thispos = (bitpos + bitsdone) % unit;
2166
2167       /* THISSIZE must not overrun a word boundary.  Otherwise,
2168          extract_fixed_bit_field will call us again, and we will mutually
2169          recurse forever.  */
2170       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2171       thissize = MIN (thissize, unit - thispos);
2172
2173       /* If OP0 is a register, then handle OFFSET here.  */
2174       rtx op0_piece = op0;
2175       opt_scalar_int_mode op0_piece_mode = op0_mode;
2176       if (SUBREG_P (op0) || REG_P (op0))
2177         {
2178           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2179           op0_piece_mode = word_mode;
2180           offset = 0;
2181         }
2182
2183       /* Extract the parts in bit-counting order,
2184          whose meaning is determined by BYTES_PER_UNIT.
2185          OFFSET is in UNITs, and UNIT is in bits.  */
2186       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2187                                       thissize, offset * unit + thispos,
2188                                       0, 1, reverse);
2189       bitsdone += thissize;
2190
2191       /* Shift this part into place for the result.  */
2192       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2193         {
2194           if (bitsize != bitsdone)
2195             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2196                                  bitsize - bitsdone, 0, 1);
2197         }
2198       else
2199         {
2200           if (bitsdone != thissize)
2201             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2202                                  bitsdone - thissize, 0, 1);
2203         }
2204
2205       if (first)
2206         result = part;
2207       else
2208         /* Combine the parts with bitwise or.  This works
2209            because we extracted each part as an unsigned bit field.  */
2210         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2211                                OPTAB_LIB_WIDEN);
2212
2213       first = 0;
2214     }
2215
2216   /* Unsigned bit field: we are done.  */
2217   if (unsignedp)
2218     return result;
2219   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2220   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2221                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2222   return expand_shift (RSHIFT_EXPR, word_mode, result,
2223                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2224 }
2225 \f
2226 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2227    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2228    MODE, fill the upper bits with zeros.  Fail if the layout of either
2229    mode is unknown (as for CC modes) or if the extraction would involve
2230    unprofitable mode punning.  Return the value on success, otherwise
2231    return null.
2232
2233    This is different from gen_lowpart* in these respects:
2234
2235      - the returned value must always be considered an rvalue
2236
2237      - when MODE is wider than SRC_MODE, the extraction involves
2238        a zero extension
2239
2240      - when MODE is smaller than SRC_MODE, the extraction involves
2241        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2242
2243    In other words, this routine performs a computation, whereas the
2244    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2245    operations.  */
2246
2247 rtx
2248 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2249 {
2250   scalar_int_mode int_mode, src_int_mode;
2251
2252   if (mode == src_mode)
2253     return src;
2254
2255   if (CONSTANT_P (src))
2256     {
2257       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2258          fails, it will happily create (subreg (symbol_ref)) or similar
2259          invalid SUBREGs.  */
2260       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2261       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2262       if (ret)
2263         return ret;
2264
2265       if (GET_MODE (src) == VOIDmode
2266           || !validate_subreg (mode, src_mode, src, byte))
2267         return NULL_RTX;
2268
2269       src = force_reg (GET_MODE (src), src);
2270       return gen_rtx_SUBREG (mode, src, byte);
2271     }
2272
2273   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2274     return NULL_RTX;
2275
2276   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2277       && targetm.modes_tieable_p (mode, src_mode))
2278     {
2279       rtx x = gen_lowpart_common (mode, src);
2280       if (x)
2281         return x;
2282     }
2283
2284   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2285       || !int_mode_for_mode (mode).exists (&int_mode))
2286     return NULL_RTX;
2287
2288   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2289     return NULL_RTX;
2290   if (!targetm.modes_tieable_p (int_mode, mode))
2291     return NULL_RTX;
2292
2293   src = gen_lowpart (src_int_mode, src);
2294   src = convert_modes (int_mode, src_int_mode, src, true);
2295   src = gen_lowpart (mode, src);
2296   return src;
2297 }
2298 \f
2299 /* Add INC into TARGET.  */
2300
2301 void
2302 expand_inc (rtx target, rtx inc)
2303 {
2304   rtx value = expand_binop (GET_MODE (target), add_optab,
2305                             target, inc,
2306                             target, 0, OPTAB_LIB_WIDEN);
2307   if (value != target)
2308     emit_move_insn (target, value);
2309 }
2310
2311 /* Subtract DEC from TARGET.  */
2312
2313 void
2314 expand_dec (rtx target, rtx dec)
2315 {
2316   rtx value = expand_binop (GET_MODE (target), sub_optab,
2317                             target, dec,
2318                             target, 0, OPTAB_LIB_WIDEN);
2319   if (value != target)
2320     emit_move_insn (target, value);
2321 }
2322 \f
2323 /* Output a shift instruction for expression code CODE,
2324    with SHIFTED being the rtx for the value to shift,
2325    and AMOUNT the rtx for the amount to shift by.
2326    Store the result in the rtx TARGET, if that is convenient.
2327    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2328    Return the rtx for where the value is.
2329    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2330    in which case 0 is returned.  */
2331
2332 static rtx
2333 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2334                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2335 {
2336   rtx op1, temp = 0;
2337   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2338   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2339   optab lshift_optab = ashl_optab;
2340   optab rshift_arith_optab = ashr_optab;
2341   optab rshift_uns_optab = lshr_optab;
2342   optab lrotate_optab = rotl_optab;
2343   optab rrotate_optab = rotr_optab;
2344   machine_mode op1_mode;
2345   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2346   int attempt;
2347   bool speed = optimize_insn_for_speed_p ();
2348
2349   op1 = amount;
2350   op1_mode = GET_MODE (op1);
2351
2352   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2353      shift amount is a vector, use the vector/vector shift patterns.  */
2354   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2355     {
2356       lshift_optab = vashl_optab;
2357       rshift_arith_optab = vashr_optab;
2358       rshift_uns_optab = vlshr_optab;
2359       lrotate_optab = vrotl_optab;
2360       rrotate_optab = vrotr_optab;
2361     }
2362
2363   /* Previously detected shift-counts computed by NEGATE_EXPR
2364      and shifted in the other direction; but that does not work
2365      on all machines.  */
2366
2367   if (SHIFT_COUNT_TRUNCATED)
2368     {
2369       if (CONST_INT_P (op1)
2370           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2371               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2372         op1 = gen_int_shift_amount (mode,
2373                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2374                                     % GET_MODE_BITSIZE (scalar_mode));
2375       else if (GET_CODE (op1) == SUBREG
2376                && subreg_lowpart_p (op1)
2377                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2378                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2379         op1 = SUBREG_REG (op1);
2380     }
2381
2382   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2383      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2384      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2385      amount instead.  */
2386   if (rotate
2387       && CONST_INT_P (op1)
2388       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2389                    GET_MODE_BITSIZE (scalar_mode) - 1))
2390     {
2391       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2392                                          - INTVAL (op1)));
2393       left = !left;
2394       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2395     }
2396
2397   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2398      Note that this is not the case for bigger values.  For instance a rotation
2399      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2400      0x04030201 (bswapsi).  */
2401   if (rotate
2402       && CONST_INT_P (op1)
2403       && INTVAL (op1) == BITS_PER_UNIT
2404       && GET_MODE_SIZE (scalar_mode) == 2
2405       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2406     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2407                                   unsignedp);
2408
2409   if (op1 == const0_rtx)
2410     return shifted;
2411
2412   /* Check whether its cheaper to implement a left shift by a constant
2413      bit count by a sequence of additions.  */
2414   if (code == LSHIFT_EXPR
2415       && CONST_INT_P (op1)
2416       && INTVAL (op1) > 0
2417       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2418       && INTVAL (op1) < MAX_BITS_PER_WORD
2419       && (shift_cost (speed, mode, INTVAL (op1))
2420           > INTVAL (op1) * add_cost (speed, mode))
2421       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2422     {
2423       int i;
2424       for (i = 0; i < INTVAL (op1); i++)
2425         {
2426           temp = force_reg (mode, shifted);
2427           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2428                                   unsignedp, OPTAB_LIB_WIDEN);
2429         }
2430       return shifted;
2431     }
2432
2433   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2434     {
2435       enum optab_methods methods;
2436
2437       if (attempt == 0)
2438         methods = OPTAB_DIRECT;
2439       else if (attempt == 1)
2440         methods = OPTAB_WIDEN;
2441       else
2442         methods = OPTAB_LIB_WIDEN;
2443
2444       if (rotate)
2445         {
2446           /* Widening does not work for rotation.  */
2447           if (methods == OPTAB_WIDEN)
2448             continue;
2449           else if (methods == OPTAB_LIB_WIDEN)
2450             {
2451               /* If we have been unable to open-code this by a rotation,
2452                  do it as the IOR of two shifts.  I.e., to rotate A
2453                  by N bits, compute
2454                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2455                  where C is the bitsize of A.
2456
2457                  It is theoretically possible that the target machine might
2458                  not be able to perform either shift and hence we would
2459                  be making two libcalls rather than just the one for the
2460                  shift (similarly if IOR could not be done).  We will allow
2461                  this extremely unlikely lossage to avoid complicating the
2462                  code below.  */
2463
2464               rtx subtarget = target == shifted ? 0 : target;
2465               rtx new_amount, other_amount;
2466               rtx temp1;
2467
2468               new_amount = op1;
2469               if (op1 == const0_rtx)
2470                 return shifted;
2471               else if (CONST_INT_P (op1))
2472                 other_amount = gen_int_shift_amount
2473                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2474               else
2475                 {
2476                   other_amount
2477                     = simplify_gen_unary (NEG, GET_MODE (op1),
2478                                           op1, GET_MODE (op1));
2479                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2480                   other_amount
2481                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2482                                            gen_int_mode (mask, GET_MODE (op1)));
2483                 }
2484
2485               shifted = force_reg (mode, shifted);
2486
2487               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2488                                      mode, shifted, new_amount, 0, 1);
2489               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2490                                       mode, shifted, other_amount,
2491                                       subtarget, 1);
2492               return expand_binop (mode, ior_optab, temp, temp1, target,
2493                                    unsignedp, methods);
2494             }
2495
2496           temp = expand_binop (mode,
2497                                left ? lrotate_optab : rrotate_optab,
2498                                shifted, op1, target, unsignedp, methods);
2499         }
2500       else if (unsignedp)
2501         temp = expand_binop (mode,
2502                              left ? lshift_optab : rshift_uns_optab,
2503                              shifted, op1, target, unsignedp, methods);
2504
2505       /* Do arithmetic shifts.
2506          Also, if we are going to widen the operand, we can just as well
2507          use an arithmetic right-shift instead of a logical one.  */
2508       if (temp == 0 && ! rotate
2509           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2510         {
2511           enum optab_methods methods1 = methods;
2512
2513           /* If trying to widen a log shift to an arithmetic shift,
2514              don't accept an arithmetic shift of the same size.  */
2515           if (unsignedp)
2516             methods1 = OPTAB_MUST_WIDEN;
2517
2518           /* Arithmetic shift */
2519
2520           temp = expand_binop (mode,
2521                                left ? lshift_optab : rshift_arith_optab,
2522                                shifted, op1, target, unsignedp, methods1);
2523         }
2524
2525       /* We used to try extzv here for logical right shifts, but that was
2526          only useful for one machine, the VAX, and caused poor code
2527          generation there for lshrdi3, so the code was deleted and a
2528          define_expand for lshrsi3 was added to vax.md.  */
2529     }
2530
2531   gcc_assert (temp != NULL_RTX || may_fail);
2532   return temp;
2533 }
2534
2535 /* Output a shift instruction for expression code CODE,
2536    with SHIFTED being the rtx for the value to shift,
2537    and AMOUNT the amount to shift by.
2538    Store the result in the rtx TARGET, if that is convenient.
2539    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2540    Return the rtx for where the value is.  */
2541
2542 rtx
2543 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2544               int amount, rtx target, int unsignedp)
2545 {
2546   return expand_shift_1 (code, mode, shifted,
2547                          gen_int_shift_amount (mode, amount),
2548                          target, unsignedp);
2549 }
2550
2551 /* Likewise, but return 0 if that cannot be done.  */
2552
2553 static rtx
2554 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2555                     int amount, rtx target, int unsignedp)
2556 {
2557   return expand_shift_1 (code, mode,
2558                          shifted, GEN_INT (amount), target, unsignedp, true);
2559 }
2560
2561 /* Output a shift instruction for expression code CODE,
2562    with SHIFTED being the rtx for the value to shift,
2563    and AMOUNT the tree for the amount to shift by.
2564    Store the result in the rtx TARGET, if that is convenient.
2565    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2566    Return the rtx for where the value is.  */
2567
2568 rtx
2569 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2570                        tree amount, rtx target, int unsignedp)
2571 {
2572   return expand_shift_1 (code, mode,
2573                          shifted, expand_normal (amount), target, unsignedp);
2574 }
2575
2576 \f
2577 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2578                         const struct mult_cost *, machine_mode mode);
2579 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2580                               const struct algorithm *, enum mult_variant);
2581 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2582 static rtx extract_high_half (scalar_int_mode, rtx);
2583 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2584 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2585                                        int, int);
2586 /* Compute and return the best algorithm for multiplying by T.
2587    The algorithm must cost less than cost_limit
2588    If retval.cost >= COST_LIMIT, no algorithm was found and all
2589    other field of the returned struct are undefined.
2590    MODE is the machine mode of the multiplication.  */
2591
2592 static void
2593 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2594             const struct mult_cost *cost_limit, machine_mode mode)
2595 {
2596   int m;
2597   struct algorithm *alg_in, *best_alg;
2598   struct mult_cost best_cost;
2599   struct mult_cost new_limit;
2600   int op_cost, op_latency;
2601   unsigned HOST_WIDE_INT orig_t = t;
2602   unsigned HOST_WIDE_INT q;
2603   int maxm, hash_index;
2604   bool cache_hit = false;
2605   enum alg_code cache_alg = alg_zero;
2606   bool speed = optimize_insn_for_speed_p ();
2607   scalar_int_mode imode;
2608   struct alg_hash_entry *entry_ptr;
2609
2610   /* Indicate that no algorithm is yet found.  If no algorithm
2611      is found, this value will be returned and indicate failure.  */
2612   alg_out->cost.cost = cost_limit->cost + 1;
2613   alg_out->cost.latency = cost_limit->latency + 1;
2614
2615   if (cost_limit->cost < 0
2616       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2617     return;
2618
2619   /* Be prepared for vector modes.  */
2620   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2621
2622   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2623
2624   /* Restrict the bits of "t" to the multiplication's mode.  */
2625   t &= GET_MODE_MASK (imode);
2626
2627   /* t == 1 can be done in zero cost.  */
2628   if (t == 1)
2629     {
2630       alg_out->ops = 1;
2631       alg_out->cost.cost = 0;
2632       alg_out->cost.latency = 0;
2633       alg_out->op[0] = alg_m;
2634       return;
2635     }
2636
2637   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2638      fail now.  */
2639   if (t == 0)
2640     {
2641       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2642         return;
2643       else
2644         {
2645           alg_out->ops = 1;
2646           alg_out->cost.cost = zero_cost (speed);
2647           alg_out->cost.latency = zero_cost (speed);
2648           alg_out->op[0] = alg_zero;
2649           return;
2650         }
2651     }
2652
2653   /* We'll be needing a couple extra algorithm structures now.  */
2654
2655   alg_in = XALLOCA (struct algorithm);
2656   best_alg = XALLOCA (struct algorithm);
2657   best_cost = *cost_limit;
2658
2659   /* Compute the hash index.  */
2660   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2661
2662   /* See if we already know what to do for T.  */
2663   entry_ptr = alg_hash_entry_ptr (hash_index);
2664   if (entry_ptr->t == t
2665       && entry_ptr->mode == mode
2666       && entry_ptr->speed == speed
2667       && entry_ptr->alg != alg_unknown)
2668     {
2669       cache_alg = entry_ptr->alg;
2670
2671       if (cache_alg == alg_impossible)
2672         {
2673           /* The cache tells us that it's impossible to synthesize
2674              multiplication by T within entry_ptr->cost.  */
2675           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2676             /* COST_LIMIT is at least as restrictive as the one
2677                recorded in the hash table, in which case we have no
2678                hope of synthesizing a multiplication.  Just
2679                return.  */
2680             return;
2681
2682           /* If we get here, COST_LIMIT is less restrictive than the
2683              one recorded in the hash table, so we may be able to
2684              synthesize a multiplication.  Proceed as if we didn't
2685              have the cache entry.  */
2686         }
2687       else
2688         {
2689           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2690             /* The cached algorithm shows that this multiplication
2691                requires more cost than COST_LIMIT.  Just return.  This
2692                way, we don't clobber this cache entry with
2693                alg_impossible but retain useful information.  */
2694             return;
2695
2696           cache_hit = true;
2697
2698           switch (cache_alg)
2699             {
2700             case alg_shift:
2701               goto do_alg_shift;
2702
2703             case alg_add_t_m2:
2704             case alg_sub_t_m2:
2705               goto do_alg_addsub_t_m2;
2706
2707             case alg_add_factor:
2708             case alg_sub_factor:
2709               goto do_alg_addsub_factor;
2710
2711             case alg_add_t2_m:
2712               goto do_alg_add_t2_m;
2713
2714             case alg_sub_t2_m:
2715               goto do_alg_sub_t2_m;
2716
2717             default:
2718               gcc_unreachable ();
2719             }
2720         }
2721     }
2722
2723   /* If we have a group of zero bits at the low-order part of T, try
2724      multiplying by the remaining bits and then doing a shift.  */
2725
2726   if ((t & 1) == 0)
2727     {
2728     do_alg_shift:
2729       m = ctz_or_zero (t); /* m = number of low zero bits */
2730       if (m < maxm)
2731         {
2732           q = t >> m;
2733           /* The function expand_shift will choose between a shift and
2734              a sequence of additions, so the observed cost is given as
2735              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2736           op_cost = m * add_cost (speed, mode);
2737           if (shift_cost (speed, mode, m) < op_cost)
2738             op_cost = shift_cost (speed, mode, m);
2739           new_limit.cost = best_cost.cost - op_cost;
2740           new_limit.latency = best_cost.latency - op_cost;
2741           synth_mult (alg_in, q, &new_limit, mode);
2742
2743           alg_in->cost.cost += op_cost;
2744           alg_in->cost.latency += op_cost;
2745           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2746             {
2747               best_cost = alg_in->cost;
2748               std::swap (alg_in, best_alg);
2749               best_alg->log[best_alg->ops] = m;
2750               best_alg->op[best_alg->ops] = alg_shift;
2751             }
2752
2753           /* See if treating ORIG_T as a signed number yields a better
2754              sequence.  Try this sequence only for a negative ORIG_T
2755              as it would be useless for a non-negative ORIG_T.  */
2756           if ((HOST_WIDE_INT) orig_t < 0)
2757             {
2758               /* Shift ORIG_T as follows because a right shift of a
2759                  negative-valued signed type is implementation
2760                  defined.  */
2761               q = ~(~orig_t >> m);
2762               /* The function expand_shift will choose between a shift
2763                  and a sequence of additions, so the observed cost is
2764                  given as MIN (m * add_cost(speed, mode),
2765                  shift_cost(speed, mode, m)).  */
2766               op_cost = m * add_cost (speed, mode);
2767               if (shift_cost (speed, mode, m) < op_cost)
2768                 op_cost = shift_cost (speed, mode, m);
2769               new_limit.cost = best_cost.cost - op_cost;
2770               new_limit.latency = best_cost.latency - op_cost;
2771               synth_mult (alg_in, q, &new_limit, mode);
2772
2773               alg_in->cost.cost += op_cost;
2774               alg_in->cost.latency += op_cost;
2775               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2776                 {
2777                   best_cost = alg_in->cost;
2778                   std::swap (alg_in, best_alg);
2779                   best_alg->log[best_alg->ops] = m;
2780                   best_alg->op[best_alg->ops] = alg_shift;
2781                 }
2782             }
2783         }
2784       if (cache_hit)
2785         goto done;
2786     }
2787
2788   /* If we have an odd number, add or subtract one.  */
2789   if ((t & 1) != 0)
2790     {
2791       unsigned HOST_WIDE_INT w;
2792
2793     do_alg_addsub_t_m2:
2794       for (w = 1; (w & t) != 0; w <<= 1)
2795         ;
2796       /* If T was -1, then W will be zero after the loop.  This is another
2797          case where T ends with ...111.  Handling this with (T + 1) and
2798          subtract 1 produces slightly better code and results in algorithm
2799          selection much faster than treating it like the ...0111 case
2800          below.  */
2801       if (w == 0
2802           || (w > 2
2803               /* Reject the case where t is 3.
2804                  Thus we prefer addition in that case.  */
2805               && t != 3))
2806         {
2807           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2808
2809           op_cost = add_cost (speed, mode);
2810           new_limit.cost = best_cost.cost - op_cost;
2811           new_limit.latency = best_cost.latency - op_cost;
2812           synth_mult (alg_in, t + 1, &new_limit, mode);
2813
2814           alg_in->cost.cost += op_cost;
2815           alg_in->cost.latency += op_cost;
2816           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2817             {
2818               best_cost = alg_in->cost;
2819               std::swap (alg_in, best_alg);
2820               best_alg->log[best_alg->ops] = 0;
2821               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2822             }
2823         }
2824       else
2825         {
2826           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2827
2828           op_cost = add_cost (speed, mode);
2829           new_limit.cost = best_cost.cost - op_cost;
2830           new_limit.latency = best_cost.latency - op_cost;
2831           synth_mult (alg_in, t - 1, &new_limit, mode);
2832
2833           alg_in->cost.cost += op_cost;
2834           alg_in->cost.latency += op_cost;
2835           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2836             {
2837               best_cost = alg_in->cost;
2838               std::swap (alg_in, best_alg);
2839               best_alg->log[best_alg->ops] = 0;
2840               best_alg->op[best_alg->ops] = alg_add_t_m2;
2841             }
2842         }
2843
2844       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2845          quickly with a - a * n for some appropriate constant n.  */
2846       m = exact_log2 (-orig_t + 1);
2847       if (m >= 0 && m < maxm)
2848         {
2849           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2850           /* If the target has a cheap shift-and-subtract insn use
2851              that in preference to a shift insn followed by a sub insn.
2852              Assume that the shift-and-sub is "atomic" with a latency
2853              equal to it's cost, otherwise assume that on superscalar
2854              hardware the shift may be executed concurrently with the
2855              earlier steps in the algorithm.  */
2856           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2857             {
2858               op_cost = shiftsub1_cost (speed, mode, m);
2859               op_latency = op_cost;
2860             }
2861           else
2862             op_latency = add_cost (speed, mode);
2863
2864           new_limit.cost = best_cost.cost - op_cost;
2865           new_limit.latency = best_cost.latency - op_latency;
2866           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2867                       &new_limit, mode);
2868
2869           alg_in->cost.cost += op_cost;
2870           alg_in->cost.latency += op_latency;
2871           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2872             {
2873               best_cost = alg_in->cost;
2874               std::swap (alg_in, best_alg);
2875               best_alg->log[best_alg->ops] = m;
2876               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2877             }
2878         }
2879
2880       if (cache_hit)
2881         goto done;
2882     }
2883
2884   /* Look for factors of t of the form
2885      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2886      If we find such a factor, we can multiply by t using an algorithm that
2887      multiplies by q, shift the result by m and add/subtract it to itself.
2888
2889      We search for large factors first and loop down, even if large factors
2890      are less probable than small; if we find a large factor we will find a
2891      good sequence quickly, and therefore be able to prune (by decreasing
2892      COST_LIMIT) the search.  */
2893
2894  do_alg_addsub_factor:
2895   for (m = floor_log2 (t - 1); m >= 2; m--)
2896     {
2897       unsigned HOST_WIDE_INT d;
2898
2899       d = (HOST_WIDE_INT_1U << m) + 1;
2900       if (t % d == 0 && t > d && m < maxm
2901           && (!cache_hit || cache_alg == alg_add_factor))
2902         {
2903           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2904           if (shiftadd_cost (speed, mode, m) <= op_cost)
2905             op_cost = shiftadd_cost (speed, mode, m);
2906
2907           op_latency = op_cost;
2908
2909
2910           new_limit.cost = best_cost.cost - op_cost;
2911           new_limit.latency = best_cost.latency - op_latency;
2912           synth_mult (alg_in, t / d, &new_limit, mode);
2913
2914           alg_in->cost.cost += op_cost;
2915           alg_in->cost.latency += op_latency;
2916           if (alg_in->cost.latency < op_cost)
2917             alg_in->cost.latency = op_cost;
2918           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2919             {
2920               best_cost = alg_in->cost;
2921               std::swap (alg_in, best_alg);
2922               best_alg->log[best_alg->ops] = m;
2923               best_alg->op[best_alg->ops] = alg_add_factor;
2924             }
2925           /* Other factors will have been taken care of in the recursion.  */
2926           break;
2927         }
2928
2929       d = (HOST_WIDE_INT_1U << m) - 1;
2930       if (t % d == 0 && t > d && m < maxm
2931           && (!cache_hit || cache_alg == alg_sub_factor))
2932         {
2933           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2934           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2935             op_cost = shiftsub0_cost (speed, mode, m);
2936
2937           op_latency = op_cost;
2938
2939           new_limit.cost = best_cost.cost - op_cost;
2940           new_limit.latency = best_cost.latency - op_latency;
2941           synth_mult (alg_in, t / d, &new_limit, mode);
2942
2943           alg_in->cost.cost += op_cost;
2944           alg_in->cost.latency += op_latency;
2945           if (alg_in->cost.latency < op_cost)
2946             alg_in->cost.latency = op_cost;
2947           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2948             {
2949               best_cost = alg_in->cost;
2950               std::swap (alg_in, best_alg);
2951               best_alg->log[best_alg->ops] = m;
2952               best_alg->op[best_alg->ops] = alg_sub_factor;
2953             }
2954           break;
2955         }
2956     }
2957   if (cache_hit)
2958     goto done;
2959
2960   /* Try shift-and-add (load effective address) instructions,
2961      i.e. do a*3, a*5, a*9.  */
2962   if ((t & 1) != 0)
2963     {
2964     do_alg_add_t2_m:
2965       q = t - 1;
2966       m = ctz_hwi (q);
2967       if (q && m < maxm)
2968         {
2969           op_cost = shiftadd_cost (speed, mode, m);
2970           new_limit.cost = best_cost.cost - op_cost;
2971           new_limit.latency = best_cost.latency - op_cost;
2972           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2973
2974           alg_in->cost.cost += op_cost;
2975           alg_in->cost.latency += op_cost;
2976           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2977             {
2978               best_cost = alg_in->cost;
2979               std::swap (alg_in, best_alg);
2980               best_alg->log[best_alg->ops] = m;
2981               best_alg->op[best_alg->ops] = alg_add_t2_m;
2982             }
2983         }
2984       if (cache_hit)
2985         goto done;
2986
2987     do_alg_sub_t2_m:
2988       q = t + 1;
2989       m = ctz_hwi (q);
2990       if (q && m < maxm)
2991         {
2992           op_cost = shiftsub0_cost (speed, mode, m);
2993           new_limit.cost = best_cost.cost - op_cost;
2994           new_limit.latency = best_cost.latency - op_cost;
2995           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2996
2997           alg_in->cost.cost += op_cost;
2998           alg_in->cost.latency += op_cost;
2999           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3000             {
3001               best_cost = alg_in->cost;
3002               std::swap (alg_in, best_alg);
3003               best_alg->log[best_alg->ops] = m;
3004               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3005             }
3006         }
3007       if (cache_hit)
3008         goto done;
3009     }
3010
3011  done:
3012   /* If best_cost has not decreased, we have not found any algorithm.  */
3013   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3014     {
3015       /* We failed to find an algorithm.  Record alg_impossible for
3016          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3017          we are asked to find an algorithm for T within the same or
3018          lower COST_LIMIT, we can immediately return to the
3019          caller.  */
3020       entry_ptr->t = t;
3021       entry_ptr->mode = mode;
3022       entry_ptr->speed = speed;
3023       entry_ptr->alg = alg_impossible;
3024       entry_ptr->cost = *cost_limit;
3025       return;
3026     }
3027
3028   /* Cache the result.  */
3029   if (!cache_hit)
3030     {
3031       entry_ptr->t = t;
3032       entry_ptr->mode = mode;
3033       entry_ptr->speed = speed;
3034       entry_ptr->alg = best_alg->op[best_alg->ops];
3035       entry_ptr->cost.cost = best_cost.cost;
3036       entry_ptr->cost.latency = best_cost.latency;
3037     }
3038
3039   /* If we are getting a too long sequence for `struct algorithm'
3040      to record, make this search fail.  */
3041   if (best_alg->ops == MAX_BITS_PER_WORD)
3042     return;
3043
3044   /* Copy the algorithm from temporary space to the space at alg_out.
3045      We avoid using structure assignment because the majority of
3046      best_alg is normally undefined, and this is a critical function.  */
3047   alg_out->ops = best_alg->ops + 1;
3048   alg_out->cost = best_cost;
3049   memcpy (alg_out->op, best_alg->op,
3050           alg_out->ops * sizeof *alg_out->op);
3051   memcpy (alg_out->log, best_alg->log,
3052           alg_out->ops * sizeof *alg_out->log);
3053 }
3054 \f
3055 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3056    Try three variations:
3057
3058        - a shift/add sequence based on VAL itself
3059        - a shift/add sequence based on -VAL, followed by a negation
3060        - a shift/add sequence based on VAL - 1, followed by an addition.
3061
3062    Return true if the cheapest of these cost less than MULT_COST,
3063    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3064
3065 bool
3066 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3067                      struct algorithm *alg, enum mult_variant *variant,
3068                      int mult_cost)
3069 {
3070   struct algorithm alg2;
3071   struct mult_cost limit;
3072   int op_cost;
3073   bool speed = optimize_insn_for_speed_p ();
3074
3075   /* Fail quickly for impossible bounds.  */
3076   if (mult_cost < 0)
3077     return false;
3078
3079   /* Ensure that mult_cost provides a reasonable upper bound.
3080      Any constant multiplication can be performed with less
3081      than 2 * bits additions.  */
3082   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3083   if (mult_cost > op_cost)
3084     mult_cost = op_cost;
3085
3086   *variant = basic_variant;
3087   limit.cost = mult_cost;
3088   limit.latency = mult_cost;
3089   synth_mult (alg, val, &limit, mode);
3090
3091   /* This works only if the inverted value actually fits in an
3092      `unsigned int' */
3093   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3094     {
3095       op_cost = neg_cost (speed, mode);
3096       if (MULT_COST_LESS (&alg->cost, mult_cost))
3097         {
3098           limit.cost = alg->cost.cost - op_cost;
3099           limit.latency = alg->cost.latency - op_cost;
3100         }
3101       else
3102         {
3103           limit.cost = mult_cost - op_cost;
3104           limit.latency = mult_cost - op_cost;
3105         }
3106
3107       synth_mult (&alg2, -val, &limit, mode);
3108       alg2.cost.cost += op_cost;
3109       alg2.cost.latency += op_cost;
3110       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3111         *alg = alg2, *variant = negate_variant;
3112     }
3113
3114   /* This proves very useful for division-by-constant.  */
3115   op_cost = add_cost (speed, mode);
3116   if (MULT_COST_LESS (&alg->cost, mult_cost))
3117     {
3118       limit.cost = alg->cost.cost - op_cost;
3119       limit.latency = alg->cost.latency - op_cost;
3120     }
3121   else
3122     {
3123       limit.cost = mult_cost - op_cost;
3124       limit.latency = mult_cost - op_cost;
3125     }
3126
3127   synth_mult (&alg2, val - 1, &limit, mode);
3128   alg2.cost.cost += op_cost;
3129   alg2.cost.latency += op_cost;
3130   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3131     *alg = alg2, *variant = add_variant;
3132
3133   return MULT_COST_LESS (&alg->cost, mult_cost);
3134 }
3135
3136 /* A subroutine of expand_mult, used for constant multiplications.
3137    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3138    convenient.  Use the shift/add sequence described by ALG and apply
3139    the final fixup specified by VARIANT.  */
3140
3141 static rtx
3142 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3143                    rtx target, const struct algorithm *alg,
3144                    enum mult_variant variant)
3145 {
3146   unsigned HOST_WIDE_INT val_so_far;
3147   rtx_insn *insn;
3148   rtx accum, tem;
3149   int opno;
3150   machine_mode nmode;
3151
3152   /* Avoid referencing memory over and over and invalid sharing
3153      on SUBREGs.  */
3154   op0 = force_reg (mode, op0);
3155
3156   /* ACCUM starts out either as OP0 or as a zero, depending on
3157      the first operation.  */
3158
3159   if (alg->op[0] == alg_zero)
3160     {
3161       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3162       val_so_far = 0;
3163     }
3164   else if (alg->op[0] == alg_m)
3165     {
3166       accum = copy_to_mode_reg (mode, op0);
3167       val_so_far = 1;
3168     }
3169   else
3170     gcc_unreachable ();
3171
3172   for (opno = 1; opno < alg->ops; opno++)
3173     {
3174       int log = alg->log[opno];
3175       rtx shift_subtarget = optimize ? 0 : accum;
3176       rtx add_target
3177         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3178            && !optimize)
3179           ? target : 0;
3180       rtx accum_target = optimize ? 0 : accum;
3181       rtx accum_inner;
3182
3183       switch (alg->op[opno])
3184         {
3185         case alg_shift:
3186           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3187           /* REG_EQUAL note will be attached to the following insn.  */
3188           emit_move_insn (accum, tem);
3189           val_so_far <<= log;
3190           break;
3191
3192         case alg_add_t_m2:
3193           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3194           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3195                                  add_target ? add_target : accum_target);
3196           val_so_far += HOST_WIDE_INT_1U << log;
3197           break;
3198
3199         case alg_sub_t_m2:
3200           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3201           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3202                                  add_target ? add_target : accum_target);
3203           val_so_far -= HOST_WIDE_INT_1U << log;
3204           break;
3205
3206         case alg_add_t2_m:
3207           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3208                                 log, shift_subtarget, 0);
3209           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3210                                  add_target ? add_target : accum_target);
3211           val_so_far = (val_so_far << log) + 1;
3212           break;
3213
3214         case alg_sub_t2_m:
3215           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3216                                 log, shift_subtarget, 0);
3217           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3218                                  add_target ? add_target : accum_target);
3219           val_so_far = (val_so_far << log) - 1;
3220           break;
3221
3222         case alg_add_factor:
3223           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3224           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3225                                  add_target ? add_target : accum_target);
3226           val_so_far += val_so_far << log;
3227           break;
3228
3229         case alg_sub_factor:
3230           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3231           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3232                                  (add_target
3233                                   ? add_target : (optimize ? 0 : tem)));
3234           val_so_far = (val_so_far << log) - val_so_far;
3235           break;
3236
3237         default:
3238           gcc_unreachable ();
3239         }
3240
3241       if (SCALAR_INT_MODE_P (mode))
3242         {
3243           /* Write a REG_EQUAL note on the last insn so that we can cse
3244              multiplication sequences.  Note that if ACCUM is a SUBREG,
3245              we've set the inner register and must properly indicate that.  */
3246           tem = op0, nmode = mode;
3247           accum_inner = accum;
3248           if (GET_CODE (accum) == SUBREG)
3249             {
3250               accum_inner = SUBREG_REG (accum);
3251               nmode = GET_MODE (accum_inner);
3252               tem = gen_lowpart (nmode, op0);
3253             }
3254
3255           insn = get_last_insn ();
3256           set_dst_reg_note (insn, REG_EQUAL,
3257                             gen_rtx_MULT (nmode, tem,
3258                                           gen_int_mode (val_so_far, nmode)),
3259                             accum_inner);
3260         }
3261     }
3262
3263   if (variant == negate_variant)
3264     {
3265       val_so_far = -val_so_far;
3266       accum = expand_unop (mode, neg_optab, accum, target, 0);
3267     }
3268   else if (variant == add_variant)
3269     {
3270       val_so_far = val_so_far + 1;
3271       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3272     }
3273
3274   /* Compare only the bits of val and val_so_far that are significant
3275      in the result mode, to avoid sign-/zero-extension confusion.  */
3276   nmode = GET_MODE_INNER (mode);
3277   val &= GET_MODE_MASK (nmode);
3278   val_so_far &= GET_MODE_MASK (nmode);
3279   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3280
3281   return accum;
3282 }
3283
3284 /* Perform a multiplication and return an rtx for the result.
3285    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3286    TARGET is a suggestion for where to store the result (an rtx).
3287
3288    We check specially for a constant integer as OP1.
3289    If you want this check for OP0 as well, then before calling
3290    you should swap the two operands if OP0 would be constant.  */
3291
3292 rtx
3293 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3294              int unsignedp, bool no_libcall)
3295 {
3296   enum mult_variant variant;
3297   struct algorithm algorithm;
3298   rtx scalar_op1;
3299   int max_cost;
3300   bool speed = optimize_insn_for_speed_p ();
3301   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3302
3303   if (CONSTANT_P (op0))
3304     std::swap (op0, op1);
3305
3306   /* For vectors, there are several simplifications that can be made if
3307      all elements of the vector constant are identical.  */
3308   scalar_op1 = unwrap_const_vec_duplicate (op1);
3309
3310   if (INTEGRAL_MODE_P (mode))
3311     {
3312       rtx fake_reg;
3313       HOST_WIDE_INT coeff;
3314       bool is_neg;
3315       int mode_bitsize;
3316
3317       if (op1 == CONST0_RTX (mode))
3318         return op1;
3319       if (op1 == CONST1_RTX (mode))
3320         return op0;
3321       if (op1 == CONSTM1_RTX (mode))
3322         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3323                             op0, target, 0);
3324
3325       if (do_trapv)
3326         goto skip_synth;
3327
3328       /* If mode is integer vector mode, check if the backend supports
3329          vector lshift (by scalar or vector) at all.  If not, we can't use
3330          synthetized multiply.  */
3331       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3332           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3333           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3334         goto skip_synth;
3335
3336       /* These are the operations that are potentially turned into
3337          a sequence of shifts and additions.  */
3338       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3339
3340       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3341          less than or equal in size to `unsigned int' this doesn't matter.
3342          If the mode is larger than `unsigned int', then synth_mult works
3343          only if the constant value exactly fits in an `unsigned int' without
3344          any truncation.  This means that multiplying by negative values does
3345          not work; results are off by 2^32 on a 32 bit machine.  */
3346       if (CONST_INT_P (scalar_op1))
3347         {
3348           coeff = INTVAL (scalar_op1);
3349           is_neg = coeff < 0;
3350         }
3351 #if TARGET_SUPPORTS_WIDE_INT
3352       else if (CONST_WIDE_INT_P (scalar_op1))
3353 #else
3354       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3355 #endif
3356         {
3357           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3358           /* Perfect power of 2 (other than 1, which is handled above).  */
3359           if (shift > 0)
3360             return expand_shift (LSHIFT_EXPR, mode, op0,
3361                                  shift, target, unsignedp);
3362           else
3363             goto skip_synth;
3364         }
3365       else
3366         goto skip_synth;
3367
3368       /* We used to test optimize here, on the grounds that it's better to
3369          produce a smaller program when -O is not used.  But this causes
3370          such a terrible slowdown sometimes that it seems better to always
3371          use synth_mult.  */
3372
3373       /* Special case powers of two.  */
3374       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3375           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3376         return expand_shift (LSHIFT_EXPR, mode, op0,
3377                              floor_log2 (coeff), target, unsignedp);
3378
3379       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3380
3381       /* Attempt to handle multiplication of DImode values by negative
3382          coefficients, by performing the multiplication by a positive
3383          multiplier and then inverting the result.  */
3384       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3385         {
3386           /* Its safe to use -coeff even for INT_MIN, as the
3387              result is interpreted as an unsigned coefficient.
3388              Exclude cost of op0 from max_cost to match the cost
3389              calculation of the synth_mult.  */
3390           coeff = -(unsigned HOST_WIDE_INT) coeff;
3391           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3392                                     mode, speed)
3393                       - neg_cost (speed, mode));
3394           if (max_cost <= 0)
3395             goto skip_synth;
3396
3397           /* Special case powers of two.  */
3398           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3399             {
3400               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3401                                        floor_log2 (coeff), target, unsignedp);
3402               return expand_unop (mode, neg_optab, temp, target, 0);
3403             }
3404
3405           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3406                                    max_cost))
3407             {
3408               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3409                                             &algorithm, variant);
3410               return expand_unop (mode, neg_optab, temp, target, 0);
3411             }
3412           goto skip_synth;
3413         }
3414
3415       /* Exclude cost of op0 from max_cost to match the cost
3416          calculation of the synth_mult.  */
3417       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3418       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3419         return expand_mult_const (mode, op0, coeff, target,
3420                                   &algorithm, variant);
3421     }
3422  skip_synth:
3423
3424   /* Expand x*2.0 as x+x.  */
3425   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3426       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3427     {
3428       op0 = force_reg (GET_MODE (op0), op0);
3429       return expand_binop (mode, add_optab, op0, op0,
3430                            target, unsignedp,
3431                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3432     }
3433
3434   /* This used to use umul_optab if unsigned, but for non-widening multiply
3435      there is no difference between signed and unsigned.  */
3436   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3437                       op0, op1, target, unsignedp,
3438                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3439   gcc_assert (op0 || no_libcall);
3440   return op0;
3441 }
3442
3443 /* Return a cost estimate for multiplying a register by the given
3444    COEFFicient in the given MODE and SPEED.  */
3445
3446 int
3447 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3448 {
3449   int max_cost;
3450   struct algorithm algorithm;
3451   enum mult_variant variant;
3452
3453   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3454   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3455                            mode, speed);
3456   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3457     return algorithm.cost.cost;
3458   else
3459     return max_cost;
3460 }
3461
3462 /* Perform a widening multiplication and return an rtx for the result.
3463    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3464    TARGET is a suggestion for where to store the result (an rtx).
3465    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3466    or smul_widen_optab.
3467
3468    We check specially for a constant integer as OP1, comparing the
3469    cost of a widening multiply against the cost of a sequence of shifts
3470    and adds.  */
3471
3472 rtx
3473 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3474                       int unsignedp, optab this_optab)
3475 {
3476   bool speed = optimize_insn_for_speed_p ();
3477   rtx cop1;
3478
3479   if (CONST_INT_P (op1)
3480       && GET_MODE (op0) != VOIDmode
3481       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3482                                 this_optab == umul_widen_optab))
3483       && CONST_INT_P (cop1)
3484       && (INTVAL (cop1) >= 0
3485           || HWI_COMPUTABLE_MODE_P (mode)))
3486     {
3487       HOST_WIDE_INT coeff = INTVAL (cop1);
3488       int max_cost;
3489       enum mult_variant variant;
3490       struct algorithm algorithm;
3491
3492       if (coeff == 0)
3493         return CONST0_RTX (mode);
3494
3495       /* Special case powers of two.  */
3496       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3497         {
3498           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3499           return expand_shift (LSHIFT_EXPR, mode, op0,
3500                                floor_log2 (coeff), target, unsignedp);
3501         }
3502
3503       /* Exclude cost of op0 from max_cost to match the cost
3504          calculation of the synth_mult.  */
3505       max_cost = mul_widen_cost (speed, mode);
3506       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3507                                max_cost))
3508         {
3509           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3510           return expand_mult_const (mode, op0, coeff, target,
3511                                     &algorithm, variant);
3512         }
3513     }
3514   return expand_binop (mode, this_optab, op0, op1, target,
3515                        unsignedp, OPTAB_LIB_WIDEN);
3516 }
3517 \f
3518 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3519    replace division by D, and put the least significant N bits of the result
3520    in *MULTIPLIER_PTR and return the most significant bit.
3521
3522    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3523    needed precision is in PRECISION (should be <= N).
3524
3525    PRECISION should be as small as possible so this function can choose
3526    multiplier more freely.
3527
3528    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3529    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3530
3531    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3532    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3533
3534 unsigned HOST_WIDE_INT
3535 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3536                    unsigned HOST_WIDE_INT *multiplier_ptr,
3537                    int *post_shift_ptr, int *lgup_ptr)
3538 {
3539   int lgup, post_shift;
3540   int pow, pow2;
3541
3542   /* lgup = ceil(log2(divisor)); */
3543   lgup = ceil_log2 (d);
3544
3545   gcc_assert (lgup <= n);
3546
3547   pow = n + lgup;
3548   pow2 = n + lgup - precision;
3549
3550   /* mlow = 2^(N + lgup)/d */
3551   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3552   wide_int mlow = wi::udiv_trunc (val, d);
3553
3554   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3555   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3556   wide_int mhigh = wi::udiv_trunc (val, d);
3557
3558   /* If precision == N, then mlow, mhigh exceed 2^N
3559      (but they do not exceed 2^(N+1)).  */
3560
3561   /* Reduce to lowest terms.  */
3562   for (post_shift = lgup; post_shift > 0; post_shift--)
3563     {
3564       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3565                                                        HOST_BITS_PER_WIDE_INT);
3566       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3567                                                        HOST_BITS_PER_WIDE_INT);
3568       if (ml_lo >= mh_lo)
3569         break;
3570
3571       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3572       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3573     }
3574
3575   *post_shift_ptr = post_shift;
3576   *lgup_ptr = lgup;
3577   if (n < HOST_BITS_PER_WIDE_INT)
3578     {
3579       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3580       *multiplier_ptr = mhigh.to_uhwi () & mask;
3581       return mhigh.to_uhwi () >= mask;
3582     }
3583   else
3584     {
3585       *multiplier_ptr = mhigh.to_uhwi ();
3586       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3587     }
3588 }
3589
3590 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3591    congruent to 1 (mod 2**N).  */
3592
3593 static unsigned HOST_WIDE_INT
3594 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3595 {
3596   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3597
3598   /* The algorithm notes that the choice y = x satisfies
3599      x*y == 1 mod 2^3, since x is assumed odd.
3600      Each iteration doubles the number of bits of significance in y.  */
3601
3602   unsigned HOST_WIDE_INT mask;
3603   unsigned HOST_WIDE_INT y = x;
3604   int nbit = 3;
3605
3606   mask = (n == HOST_BITS_PER_WIDE_INT
3607           ? HOST_WIDE_INT_M1U
3608           : (HOST_WIDE_INT_1U << n) - 1);
3609
3610   while (nbit < n)
3611     {
3612       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3613       nbit *= 2;
3614     }
3615   return y;
3616 }
3617
3618 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3619    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3620    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3621    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3622    become signed.
3623
3624    The result is put in TARGET if that is convenient.
3625
3626    MODE is the mode of operation.  */
3627
3628 rtx
3629 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3630                              rtx op1, rtx target, int unsignedp)
3631 {
3632   rtx tem;
3633   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3634
3635   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3636                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3637   tem = expand_and (mode, tem, op1, NULL_RTX);
3638   adj_operand
3639     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3640                      adj_operand);
3641
3642   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3643                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3644   tem = expand_and (mode, tem, op0, NULL_RTX);
3645   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3646                           target);
3647
3648   return target;
3649 }
3650
3651 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3652
3653 static rtx
3654 extract_high_half (scalar_int_mode mode, rtx op)
3655 {
3656   if (mode == word_mode)
3657     return gen_highpart (mode, op);
3658
3659   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3660
3661   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3662                      GET_MODE_BITSIZE (mode), 0, 1);
3663   return convert_modes (mode, wider_mode, op, 0);
3664 }
3665
3666 /* Like expmed_mult_highpart, but only consider using a multiplication
3667    optab.  OP1 is an rtx for the constant operand.  */
3668
3669 static rtx
3670 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3671                             rtx target, int unsignedp, int max_cost)
3672 {
3673   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3674   optab moptab;
3675   rtx tem;
3676   int size;
3677   bool speed = optimize_insn_for_speed_p ();
3678
3679   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3680
3681   size = GET_MODE_BITSIZE (mode);
3682
3683   /* Firstly, try using a multiplication insn that only generates the needed
3684      high part of the product, and in the sign flavor of unsignedp.  */
3685   if (mul_highpart_cost (speed, mode) < max_cost)
3686     {
3687       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3688       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3689                           unsignedp, OPTAB_DIRECT);
3690       if (tem)
3691         return tem;
3692     }
3693
3694   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3695      Need to adjust the result after the multiplication.  */
3696   if (size - 1 < BITS_PER_WORD
3697       && (mul_highpart_cost (speed, mode)
3698           + 2 * shift_cost (speed, mode, size-1)
3699           + 4 * add_cost (speed, mode) < max_cost))
3700     {
3701       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3702       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3703                           unsignedp, OPTAB_DIRECT);
3704       if (tem)
3705         /* We used the wrong signedness.  Adjust the result.  */
3706         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3707                                             tem, unsignedp);
3708     }
3709
3710   /* Try widening multiplication.  */
3711   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3712   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3713       && mul_widen_cost (speed, wider_mode) < max_cost)
3714     {
3715       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3716                           unsignedp, OPTAB_WIDEN);
3717       if (tem)
3718         return extract_high_half (mode, tem);
3719     }
3720
3721   /* Try widening the mode and perform a non-widening multiplication.  */
3722   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3723       && size - 1 < BITS_PER_WORD
3724       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3725           < max_cost))
3726     {
3727       rtx_insn *insns;
3728       rtx wop0, wop1;
3729
3730       /* We need to widen the operands, for example to ensure the
3731          constant multiplier is correctly sign or zero extended.
3732          Use a sequence to clean-up any instructions emitted by
3733          the conversions if things don't work out.  */
3734       start_sequence ();
3735       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3736       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3737       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3738                           unsignedp, OPTAB_WIDEN);
3739       insns = get_insns ();
3740       end_sequence ();
3741
3742       if (tem)
3743         {
3744           emit_insn (insns);
3745           return extract_high_half (mode, tem);
3746         }
3747     }
3748
3749   /* Try widening multiplication of opposite signedness, and adjust.  */
3750   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3751   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3752       && size - 1 < BITS_PER_WORD
3753       && (mul_widen_cost (speed, wider_mode)
3754           + 2 * shift_cost (speed, mode, size-1)
3755           + 4 * add_cost (speed, mode) < max_cost))
3756     {
3757       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3758                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3759       if (tem != 0)
3760         {
3761           tem = extract_high_half (mode, tem);
3762           /* We used the wrong signedness.  Adjust the result.  */
3763           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3764                                               target, unsignedp);
3765         }
3766     }
3767
3768   return 0;
3769 }
3770
3771 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3772    putting the high half of the result in TARGET if that is convenient,
3773    and return where the result is.  If the operation can not be performed,
3774    0 is returned.
3775
3776    MODE is the mode of operation and result.
3777
3778    UNSIGNEDP nonzero means unsigned multiply.
3779
3780    MAX_COST is the total allowed cost for the expanded RTL.  */
3781
3782 static rtx
3783 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3784                       rtx target, int unsignedp, int max_cost)
3785 {
3786   unsigned HOST_WIDE_INT cnst1;
3787   int extra_cost;
3788   bool sign_adjust = false;
3789   enum mult_variant variant;
3790   struct algorithm alg;
3791   rtx tem;
3792   bool speed = optimize_insn_for_speed_p ();
3793
3794   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3795   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3796
3797   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3798
3799   /* We can't optimize modes wider than BITS_PER_WORD.
3800      ??? We might be able to perform double-word arithmetic if
3801      mode == word_mode, however all the cost calculations in
3802      synth_mult etc. assume single-word operations.  */
3803   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3804   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3805     return expmed_mult_highpart_optab (mode, op0, op1, target,
3806                                        unsignedp, max_cost);
3807
3808   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3809
3810   /* Check whether we try to multiply by a negative constant.  */
3811   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3812     {
3813       sign_adjust = true;
3814       extra_cost += add_cost (speed, mode);
3815     }
3816
3817   /* See whether shift/add multiplication is cheap enough.  */
3818   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3819                            max_cost - extra_cost))
3820     {
3821       /* See whether the specialized multiplication optabs are
3822          cheaper than the shift/add version.  */
3823       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3824                                         alg.cost.cost + extra_cost);
3825       if (tem)
3826         return tem;
3827
3828       tem = convert_to_mode (wider_mode, op0, unsignedp);
3829       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3830       tem = extract_high_half (mode, tem);
3831
3832       /* Adjust result for signedness.  */
3833       if (sign_adjust)
3834         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3835
3836       return tem;
3837     }
3838   return expmed_mult_highpart_optab (mode, op0, op1, target,
3839                                      unsignedp, max_cost);
3840 }
3841
3842
3843 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3844
3845 static rtx
3846 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3847 {
3848   rtx result, temp, shift;
3849   rtx_code_label *label;
3850   int logd;
3851   int prec = GET_MODE_PRECISION (mode);
3852
3853   logd = floor_log2 (d);
3854   result = gen_reg_rtx (mode);
3855
3856   /* Avoid conditional branches when they're expensive.  */
3857   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3858       && optimize_insn_for_speed_p ())
3859     {
3860       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3861                                       mode, 0, -1);
3862       if (signmask)
3863         {
3864           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3865           signmask = force_reg (mode, signmask);
3866           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
3867
3868           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3869              which instruction sequence to use.  If logical right shifts
3870              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3871              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3872
3873           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3874           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3875               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3876                   > COSTS_N_INSNS (2)))
3877             {
3878               temp = expand_binop (mode, xor_optab, op0, signmask,
3879                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3880               temp = expand_binop (mode, sub_optab, temp, signmask,
3881                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3882               temp = expand_binop (mode, and_optab, temp,
3883                                    gen_int_mode (masklow, mode),
3884                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3885               temp = expand_binop (mode, xor_optab, temp, signmask,
3886                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3887               temp = expand_binop (mode, sub_optab, temp, signmask,
3888                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3889             }
3890           else
3891             {
3892               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3893                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3894               signmask = force_reg (mode, signmask);
3895
3896               temp = expand_binop (mode, add_optab, op0, signmask,
3897                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3898               temp = expand_binop (mode, and_optab, temp,
3899                                    gen_int_mode (masklow, mode),
3900                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3901               temp = expand_binop (mode, sub_optab, temp, signmask,
3902                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3903             }
3904           return temp;
3905         }
3906     }
3907
3908   /* Mask contains the mode's signbit and the significant bits of the
3909      modulus.  By including the signbit in the operation, many targets
3910      can avoid an explicit compare operation in the following comparison
3911      against zero.  */
3912   wide_int mask = wi::mask (logd, false, prec);
3913   mask = wi::set_bit (mask, prec - 1);
3914
3915   temp = expand_binop (mode, and_optab, op0,
3916                        immed_wide_int_const (mask, mode),
3917                        result, 1, OPTAB_LIB_WIDEN);
3918   if (temp != result)
3919     emit_move_insn (result, temp);
3920
3921   label = gen_label_rtx ();
3922   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3923
3924   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3925                        0, OPTAB_LIB_WIDEN);
3926
3927   mask = wi::mask (logd, true, prec);
3928   temp = expand_binop (mode, ior_optab, temp,
3929                        immed_wide_int_const (mask, mode),
3930                        result, 1, OPTAB_LIB_WIDEN);
3931   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3932                        0, OPTAB_LIB_WIDEN);
3933   if (temp != result)
3934     emit_move_insn (result, temp);
3935   emit_label (label);
3936   return result;
3937 }
3938
3939 /* Expand signed division of OP0 by a power of two D in mode MODE.
3940    This routine is only called for positive values of D.  */
3941
3942 static rtx
3943 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3944 {
3945   rtx temp;
3946   rtx_code_label *label;
3947   int logd;
3948
3949   logd = floor_log2 (d);
3950
3951   if (d == 2
3952       && BRANCH_COST (optimize_insn_for_speed_p (),
3953                       false) >= 1)
3954     {
3955       temp = gen_reg_rtx (mode);
3956       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3957       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3958                            0, OPTAB_LIB_WIDEN);
3959       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3960     }
3961
3962   if (HAVE_conditional_move
3963       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3964     {
3965       rtx temp2;
3966
3967       start_sequence ();
3968       temp2 = copy_to_mode_reg (mode, op0);
3969       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3970                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3971       temp = force_reg (mode, temp);
3972
3973       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3974       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3975                                      mode, temp, temp2, mode, 0);
3976       if (temp2)
3977         {
3978           rtx_insn *seq = get_insns ();
3979           end_sequence ();
3980           emit_insn (seq);
3981           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3982         }
3983       end_sequence ();
3984     }
3985
3986   if (BRANCH_COST (optimize_insn_for_speed_p (),
3987                    false) >= 2)
3988     {
3989       int ushift = GET_MODE_BITSIZE (mode) - logd;
3990
3991       temp = gen_reg_rtx (mode);
3992       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3993       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3994           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3995              > COSTS_N_INSNS (1))
3996         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3997                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3998       else
3999         temp = expand_shift (RSHIFT_EXPR, mode, temp,
4000                              ushift, NULL_RTX, 1);
4001       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4002                            0, OPTAB_LIB_WIDEN);
4003       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4004     }
4005
4006   label = gen_label_rtx ();
4007   temp = copy_to_mode_reg (mode, op0);
4008   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4009   expand_inc (temp, gen_int_mode (d - 1, mode));
4010   emit_label (label);
4011   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4012 }
4013 \f
4014 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4015    if that is convenient, and returning where the result is.
4016    You may request either the quotient or the remainder as the result;
4017    specify REM_FLAG nonzero to get the remainder.
4018
4019    CODE is the expression code for which kind of division this is;
4020    it controls how rounding is done.  MODE is the machine mode to use.
4021    UNSIGNEDP nonzero means do unsigned division.  */
4022
4023 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4024    and then correct it by or'ing in missing high bits
4025    if result of ANDI is nonzero.
4026    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4027    This could optimize to a bfexts instruction.
4028    But C doesn't use these operations, so their optimizations are
4029    left for later.  */
4030 /* ??? For modulo, we don't actually need the highpart of the first product,
4031    the low part will do nicely.  And for small divisors, the second multiply
4032    can also be a low-part only multiply or even be completely left out.
4033    E.g. to calculate the remainder of a division by 3 with a 32 bit
4034    multiply, multiply with 0x55555556 and extract the upper two bits;
4035    the result is exact for inputs up to 0x1fffffff.
4036    The input range can be reduced by using cross-sum rules.
4037    For odd divisors >= 3, the following table gives right shift counts
4038    so that if a number is shifted by an integer multiple of the given
4039    amount, the remainder stays the same:
4040    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4041    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4042    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4043    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4044    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4045
4046    Cross-sum rules for even numbers can be derived by leaving as many bits
4047    to the right alone as the divisor has zeros to the right.
4048    E.g. if x is an unsigned 32 bit number:
4049    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4050    */
4051
4052 rtx
4053 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4054                rtx op0, rtx op1, rtx target, int unsignedp)
4055 {
4056   machine_mode compute_mode;
4057   rtx tquotient;
4058   rtx quotient = 0, remainder = 0;
4059   rtx_insn *last;
4060   rtx_insn *insn;
4061   optab optab1, optab2;
4062   int op1_is_constant, op1_is_pow2 = 0;
4063   int max_cost, extra_cost;
4064   static HOST_WIDE_INT last_div_const = 0;
4065   bool speed = optimize_insn_for_speed_p ();
4066
4067   op1_is_constant = CONST_INT_P (op1);
4068   if (op1_is_constant)
4069     {
4070       wide_int ext_op1 = rtx_mode_t (op1, mode);
4071       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4072                      || (! unsignedp
4073                          && wi::popcount (wi::neg (ext_op1)) == 1));
4074     }
4075
4076   /*
4077      This is the structure of expand_divmod:
4078
4079      First comes code to fix up the operands so we can perform the operations
4080      correctly and efficiently.
4081
4082      Second comes a switch statement with code specific for each rounding mode.
4083      For some special operands this code emits all RTL for the desired
4084      operation, for other cases, it generates only a quotient and stores it in
4085      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4086      to indicate that it has not done anything.
4087
4088      Last comes code that finishes the operation.  If QUOTIENT is set and
4089      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4090      QUOTIENT is not set, it is computed using trunc rounding.
4091
4092      We try to generate special code for division and remainder when OP1 is a
4093      constant.  If |OP1| = 2**n we can use shifts and some other fast
4094      operations.  For other values of OP1, we compute a carefully selected
4095      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4096      by m.
4097
4098      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4099      half of the product.  Different strategies for generating the product are
4100      implemented in expmed_mult_highpart.
4101
4102      If what we actually want is the remainder, we generate that by another
4103      by-constant multiplication and a subtraction.  */
4104
4105   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4106      code below will malfunction if we are, so check here and handle
4107      the special case if so.  */
4108   if (op1 == const1_rtx)
4109     return rem_flag ? const0_rtx : op0;
4110
4111     /* When dividing by -1, we could get an overflow.
4112      negv_optab can handle overflows.  */
4113   if (! unsignedp && op1 == constm1_rtx)
4114     {
4115       if (rem_flag)
4116         return const0_rtx;
4117       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4118                           ? negv_optab : neg_optab, op0, target, 0);
4119     }
4120
4121   if (target
4122       /* Don't use the function value register as a target
4123          since we have to read it as well as write it,
4124          and function-inlining gets confused by this.  */
4125       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4126           /* Don't clobber an operand while doing a multi-step calculation.  */
4127           || ((rem_flag || op1_is_constant)
4128               && (reg_mentioned_p (target, op0)
4129                   || (MEM_P (op0) && MEM_P (target))))
4130           || reg_mentioned_p (target, op1)
4131           || (MEM_P (op1) && MEM_P (target))))
4132     target = 0;
4133
4134   /* Get the mode in which to perform this computation.  Normally it will
4135      be MODE, but sometimes we can't do the desired operation in MODE.
4136      If so, pick a wider mode in which we can do the operation.  Convert
4137      to that mode at the start to avoid repeated conversions.
4138
4139      First see what operations we need.  These depend on the expression
4140      we are evaluating.  (We assume that divxx3 insns exist under the
4141      same conditions that modxx3 insns and that these insns don't normally
4142      fail.  If these assumptions are not correct, we may generate less
4143      efficient code in some cases.)
4144
4145      Then see if we find a mode in which we can open-code that operation
4146      (either a division, modulus, or shift).  Finally, check for the smallest
4147      mode for which we can do the operation with a library call.  */
4148
4149   /* We might want to refine this now that we have division-by-constant
4150      optimization.  Since expmed_mult_highpart tries so many variants, it is
4151      not straightforward to generalize this.  Maybe we should make an array
4152      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4153
4154   optab1 = (op1_is_pow2
4155             ? (unsignedp ? lshr_optab : ashr_optab)
4156             : (unsignedp ? udiv_optab : sdiv_optab));
4157   optab2 = (op1_is_pow2 ? optab1
4158             : (unsignedp ? udivmod_optab : sdivmod_optab));
4159
4160   FOR_EACH_MODE_FROM (compute_mode, mode)
4161     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4162         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4163       break;
4164
4165   if (compute_mode == VOIDmode)
4166     FOR_EACH_MODE_FROM (compute_mode, mode)
4167       if (optab_libfunc (optab1, compute_mode)
4168           || optab_libfunc (optab2, compute_mode))
4169         break;
4170
4171   /* If we still couldn't find a mode, use MODE, but expand_binop will
4172      probably die.  */
4173   if (compute_mode == VOIDmode)
4174     compute_mode = mode;
4175
4176   if (target && GET_MODE (target) == compute_mode)
4177     tquotient = target;
4178   else
4179     tquotient = gen_reg_rtx (compute_mode);
4180
4181 #if 0
4182   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4183      (mode), and thereby get better code when OP1 is a constant.  Do that
4184      later.  It will require going over all usages of SIZE below.  */
4185   size = GET_MODE_BITSIZE (mode);
4186 #endif
4187
4188   /* Only deduct something for a REM if the last divide done was
4189      for a different constant.   Then set the constant of the last
4190      divide.  */
4191   max_cost = (unsignedp
4192               ? udiv_cost (speed, compute_mode)
4193               : sdiv_cost (speed, compute_mode));
4194   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4195                      && INTVAL (op1) == last_div_const))
4196     max_cost -= (mul_cost (speed, compute_mode)
4197                  + add_cost (speed, compute_mode));
4198
4199   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4200
4201   /* Now convert to the best mode to use.  */
4202   if (compute_mode != mode)
4203     {
4204       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4205       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4206
4207       /* convert_modes may have placed op1 into a register, so we
4208          must recompute the following.  */
4209       op1_is_constant = CONST_INT_P (op1);
4210       if (op1_is_constant)
4211         {
4212           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4213           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4214                          || (! unsignedp
4215                              && wi::popcount (wi::neg (ext_op1)) == 1));
4216         }
4217       else
4218         op1_is_pow2 = 0;
4219     }
4220
4221   /* If one of the operands is a volatile MEM, copy it into a register.  */
4222
4223   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4224     op0 = force_reg (compute_mode, op0);
4225   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4226     op1 = force_reg (compute_mode, op1);
4227
4228   /* If we need the remainder or if OP1 is constant, we need to
4229      put OP0 in a register in case it has any queued subexpressions.  */
4230   if (rem_flag || op1_is_constant)
4231     op0 = force_reg (compute_mode, op0);
4232
4233   last = get_last_insn ();
4234
4235   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4236   if (unsignedp)
4237     {
4238       if (code == FLOOR_DIV_EXPR)
4239         code = TRUNC_DIV_EXPR;
4240       if (code == FLOOR_MOD_EXPR)
4241         code = TRUNC_MOD_EXPR;
4242       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4243         code = TRUNC_DIV_EXPR;
4244     }
4245
4246   if (op1 != const0_rtx)
4247     switch (code)
4248       {
4249       case TRUNC_MOD_EXPR:
4250       case TRUNC_DIV_EXPR:
4251         if (op1_is_constant)
4252           {
4253             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4254             int size = GET_MODE_BITSIZE (int_mode);
4255             if (unsignedp)
4256               {
4257                 unsigned HOST_WIDE_INT mh, ml;
4258                 int pre_shift, post_shift;
4259                 int dummy;
4260                 wide_int wd = rtx_mode_t (op1, int_mode);
4261                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4262
4263                 if (wi::popcount (wd) == 1)
4264                   {
4265                     pre_shift = floor_log2 (d);
4266                     if (rem_flag)
4267                       {
4268                         unsigned HOST_WIDE_INT mask
4269                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4270                         remainder
4271                           = expand_binop (int_mode, and_optab, op0,
4272                                           gen_int_mode (mask, int_mode),
4273                                           remainder, 1,
4274                                           OPTAB_LIB_WIDEN);
4275                         if (remainder)
4276                           return gen_lowpart (mode, remainder);
4277                       }
4278                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4279                                              pre_shift, tquotient, 1);
4280                   }
4281                 else if (size <= HOST_BITS_PER_WIDE_INT)
4282                   {
4283                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4284                       {
4285                         /* Most significant bit of divisor is set; emit an scc
4286                            insn.  */
4287                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4288                                                           int_mode, 1, 1);
4289                       }
4290                     else
4291                       {
4292                         /* Find a suitable multiplier and right shift count
4293                            instead of multiplying with D.  */
4294
4295                         mh = choose_multiplier (d, size, size,
4296                                                 &ml, &post_shift, &dummy);
4297
4298                         /* If the suggested multiplier is more than SIZE bits,
4299                            we can do better for even divisors, using an
4300                            initial right shift.  */
4301                         if (mh != 0 && (d & 1) == 0)
4302                           {
4303                             pre_shift = ctz_or_zero (d);
4304                             mh = choose_multiplier (d >> pre_shift, size,
4305                                                     size - pre_shift,
4306                                                     &ml, &post_shift, &dummy);
4307                             gcc_assert (!mh);
4308                           }
4309                         else
4310                           pre_shift = 0;
4311
4312                         if (mh != 0)
4313                           {
4314                             rtx t1, t2, t3, t4;
4315
4316                             if (post_shift - 1 >= BITS_PER_WORD)
4317                               goto fail1;
4318
4319                             extra_cost
4320                               = (shift_cost (speed, int_mode, post_shift - 1)
4321                                  + shift_cost (speed, int_mode, 1)
4322                                  + 2 * add_cost (speed, int_mode));
4323                             t1 = expmed_mult_highpart
4324                               (int_mode, op0, gen_int_mode (ml, int_mode),
4325                                NULL_RTX, 1, max_cost - extra_cost);
4326                             if (t1 == 0)
4327                               goto fail1;
4328                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4329                                                                op0, t1),
4330                                                 NULL_RTX);
4331                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4332                                                t2, 1, NULL_RTX, 1);
4333                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4334                                                               t1, t3),
4335                                                 NULL_RTX);
4336                             quotient = expand_shift
4337                               (RSHIFT_EXPR, int_mode, t4,
4338                                post_shift - 1, tquotient, 1);
4339                           }
4340                         else
4341                           {
4342                             rtx t1, t2;
4343
4344                             if (pre_shift >= BITS_PER_WORD
4345                                 || post_shift >= BITS_PER_WORD)
4346                               goto fail1;
4347
4348                             t1 = expand_shift
4349                               (RSHIFT_EXPR, int_mode, op0,
4350                                pre_shift, NULL_RTX, 1);
4351                             extra_cost
4352                               = (shift_cost (speed, int_mode, pre_shift)
4353                                  + shift_cost (speed, int_mode, post_shift));
4354                             t2 = expmed_mult_highpart
4355                               (int_mode, t1,
4356                                gen_int_mode (ml, int_mode),
4357                                NULL_RTX, 1, max_cost - extra_cost);
4358                             if (t2 == 0)
4359                               goto fail1;
4360                             quotient = expand_shift
4361                               (RSHIFT_EXPR, int_mode, t2,
4362                                post_shift, tquotient, 1);
4363                           }
4364                       }
4365                   }
4366                 else            /* Too wide mode to use tricky code */
4367                   break;
4368
4369                 insn = get_last_insn ();
4370                 if (insn != last)
4371                   set_dst_reg_note (insn, REG_EQUAL,
4372                                     gen_rtx_UDIV (int_mode, op0, op1),
4373                                     quotient);
4374               }
4375             else                /* TRUNC_DIV, signed */
4376               {
4377                 unsigned HOST_WIDE_INT ml;
4378                 int lgup, post_shift;
4379                 rtx mlr;
4380                 HOST_WIDE_INT d = INTVAL (op1);
4381                 unsigned HOST_WIDE_INT abs_d;
4382
4383                 /* Since d might be INT_MIN, we have to cast to
4384                    unsigned HOST_WIDE_INT before negating to avoid
4385                    undefined signed overflow.  */
4386                 abs_d = (d >= 0
4387                          ? (unsigned HOST_WIDE_INT) d
4388                          : - (unsigned HOST_WIDE_INT) d);
4389
4390                 /* n rem d = n rem -d */
4391                 if (rem_flag && d < 0)
4392                   {
4393                     d = abs_d;
4394                     op1 = gen_int_mode (abs_d, int_mode);
4395                   }
4396
4397                 if (d == 1)
4398                   quotient = op0;
4399                 else if (d == -1)
4400                   quotient = expand_unop (int_mode, neg_optab, op0,
4401                                           tquotient, 0);
4402                 else if (size <= HOST_BITS_PER_WIDE_INT
4403                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4404                   {
4405                     /* This case is not handled correctly below.  */
4406                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4407                                                 int_mode, 1, 1);
4408                     if (quotient == 0)
4409                       goto fail1;
4410                   }
4411                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4412                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4413                          && (rem_flag
4414                              ? smod_pow2_cheap (speed, int_mode)
4415                              : sdiv_pow2_cheap (speed, int_mode))
4416                          /* We assume that cheap metric is true if the
4417                             optab has an expander for this mode.  */
4418                          && ((optab_handler ((rem_flag ? smod_optab
4419                                               : sdiv_optab),
4420                                              int_mode)
4421                               != CODE_FOR_nothing)
4422                              || (optab_handler (sdivmod_optab, int_mode)
4423                                  != CODE_FOR_nothing)))
4424                   ;
4425                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4426                          && (size <= HOST_BITS_PER_WIDE_INT
4427                              || abs_d != (unsigned HOST_WIDE_INT) d))
4428                   {
4429                     if (rem_flag)
4430                       {
4431                         remainder = expand_smod_pow2 (int_mode, op0, d);
4432                         if (remainder)
4433                           return gen_lowpart (mode, remainder);
4434                       }
4435
4436                     if (sdiv_pow2_cheap (speed, int_mode)
4437                         && ((optab_handler (sdiv_optab, int_mode)
4438                              != CODE_FOR_nothing)
4439                             || (optab_handler (sdivmod_optab, int_mode)
4440                                 != CODE_FOR_nothing)))
4441                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4442                                                 int_mode, op0,
4443                                                 gen_int_mode (abs_d,
4444                                                               int_mode),
4445                                                 NULL_RTX, 0);
4446                     else
4447                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4448
4449                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4450                        negate the quotient.  */
4451                     if (d < 0)
4452                       {
4453                         insn = get_last_insn ();
4454                         if (insn != last
4455                             && abs_d < (HOST_WIDE_INT_1U
4456                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4457                           set_dst_reg_note (insn, REG_EQUAL,
4458                                             gen_rtx_DIV (int_mode, op0,
4459                                                          gen_int_mode
4460                                                            (abs_d,
4461                                                             int_mode)),
4462                                             quotient);
4463
4464                         quotient = expand_unop (int_mode, neg_optab,
4465                                                 quotient, quotient, 0);
4466                       }
4467                   }
4468                 else if (size <= HOST_BITS_PER_WIDE_INT)
4469                   {
4470                     choose_multiplier (abs_d, size, size - 1,
4471                                        &ml, &post_shift, &lgup);
4472                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4473                       {
4474                         rtx t1, t2, t3;
4475
4476                         if (post_shift >= BITS_PER_WORD
4477                             || size - 1 >= BITS_PER_WORD)
4478                           goto fail1;
4479
4480                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4481                                       + shift_cost (speed, int_mode, size - 1)
4482                                       + add_cost (speed, int_mode));
4483                         t1 = expmed_mult_highpart
4484                           (int_mode, op0, gen_int_mode (ml, int_mode),
4485                            NULL_RTX, 0, max_cost - extra_cost);
4486                         if (t1 == 0)
4487                           goto fail1;
4488                         t2 = expand_shift
4489                           (RSHIFT_EXPR, int_mode, t1,
4490                            post_shift, NULL_RTX, 0);
4491                         t3 = expand_shift
4492                           (RSHIFT_EXPR, int_mode, op0,
4493                            size - 1, NULL_RTX, 0);
4494                         if (d < 0)
4495                           quotient
4496                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4497                                              tquotient);
4498                         else
4499                           quotient
4500                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4501                                              tquotient);
4502                       }
4503                     else
4504                       {
4505                         rtx t1, t2, t3, t4;
4506
4507                         if (post_shift >= BITS_PER_WORD
4508                             || size - 1 >= BITS_PER_WORD)
4509                           goto fail1;
4510
4511                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4512                         mlr = gen_int_mode (ml, int_mode);
4513                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4514                                       + shift_cost (speed, int_mode, size - 1)
4515                                       + 2 * add_cost (speed, int_mode));
4516                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4517                                                    NULL_RTX, 0,
4518                                                    max_cost - extra_cost);
4519                         if (t1 == 0)
4520                           goto fail1;
4521                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4522                                             NULL_RTX);
4523                         t3 = expand_shift
4524                           (RSHIFT_EXPR, int_mode, t2,
4525                            post_shift, NULL_RTX, 0);
4526                         t4 = expand_shift
4527                           (RSHIFT_EXPR, int_mode, op0,
4528                            size - 1, NULL_RTX, 0);
4529                         if (d < 0)
4530                           quotient
4531                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4532                                              tquotient);
4533                         else
4534                           quotient
4535                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4536                                              tquotient);
4537                       }
4538                   }
4539                 else            /* Too wide mode to use tricky code */
4540                   break;
4541
4542                 insn = get_last_insn ();
4543                 if (insn != last)
4544                   set_dst_reg_note (insn, REG_EQUAL,
4545                                     gen_rtx_DIV (int_mode, op0, op1),
4546                                     quotient);
4547               }
4548             break;
4549           }
4550       fail1:
4551         delete_insns_since (last);
4552         break;
4553
4554       case FLOOR_DIV_EXPR:
4555       case FLOOR_MOD_EXPR:
4556       /* We will come here only for signed operations.  */
4557         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4558           {
4559             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4560             int size = GET_MODE_BITSIZE (int_mode);
4561             unsigned HOST_WIDE_INT mh, ml;
4562             int pre_shift, lgup, post_shift;
4563             HOST_WIDE_INT d = INTVAL (op1);
4564
4565             if (d > 0)
4566               {
4567                 /* We could just as easily deal with negative constants here,
4568                    but it does not seem worth the trouble for GCC 2.6.  */
4569                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4570                   {
4571                     pre_shift = floor_log2 (d);
4572                     if (rem_flag)
4573                       {
4574                         unsigned HOST_WIDE_INT mask
4575                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4576                         remainder = expand_binop
4577                           (int_mode, and_optab, op0,
4578                            gen_int_mode (mask, int_mode),
4579                            remainder, 0, OPTAB_LIB_WIDEN);
4580                         if (remainder)
4581                           return gen_lowpart (mode, remainder);
4582                       }
4583                     quotient = expand_shift
4584                       (RSHIFT_EXPR, int_mode, op0,
4585                        pre_shift, tquotient, 0);
4586                   }
4587                 else
4588                   {
4589                     rtx t1, t2, t3, t4;
4590
4591                     mh = choose_multiplier (d, size, size - 1,
4592                                             &ml, &post_shift, &lgup);
4593                     gcc_assert (!mh);
4594
4595                     if (post_shift < BITS_PER_WORD
4596                         && size - 1 < BITS_PER_WORD)
4597                       {
4598                         t1 = expand_shift
4599                           (RSHIFT_EXPR, int_mode, op0,
4600                            size - 1, NULL_RTX, 0);
4601                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4602                                            NULL_RTX, 0, OPTAB_WIDEN);
4603                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4604                                       + shift_cost (speed, int_mode, size - 1)
4605                                       + 2 * add_cost (speed, int_mode));
4606                         t3 = expmed_mult_highpart
4607                           (int_mode, t2, gen_int_mode (ml, int_mode),
4608                            NULL_RTX, 1, max_cost - extra_cost);
4609                         if (t3 != 0)
4610                           {
4611                             t4 = expand_shift
4612                               (RSHIFT_EXPR, int_mode, t3,
4613                                post_shift, NULL_RTX, 1);
4614                             quotient = expand_binop (int_mode, xor_optab,
4615                                                      t4, t1, tquotient, 0,
4616                                                      OPTAB_WIDEN);
4617                           }
4618                       }
4619                   }
4620               }
4621             else
4622               {
4623                 rtx nsign, t1, t2, t3, t4;
4624                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4625                                                   op0, constm1_rtx), NULL_RTX);
4626                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4627                                    0, OPTAB_WIDEN);
4628                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4629                                       size - 1, NULL_RTX, 0);
4630                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4631                                     NULL_RTX);
4632                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4633                                     NULL_RTX, 0);
4634                 if (t4)
4635                   {
4636                     rtx t5;
4637                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4638                                       NULL_RTX, 0);
4639                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4640                                               tquotient);
4641                   }
4642               }
4643           }
4644
4645         if (quotient != 0)
4646           break;
4647         delete_insns_since (last);
4648
4649         /* Try using an instruction that produces both the quotient and
4650            remainder, using truncation.  We can easily compensate the quotient
4651            or remainder to get floor rounding, once we have the remainder.
4652            Notice that we compute also the final remainder value here,
4653            and return the result right away.  */
4654         if (target == 0 || GET_MODE (target) != compute_mode)
4655           target = gen_reg_rtx (compute_mode);
4656
4657         if (rem_flag)
4658           {
4659             remainder
4660               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4661             quotient = gen_reg_rtx (compute_mode);
4662           }
4663         else
4664           {
4665             quotient
4666               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4667             remainder = gen_reg_rtx (compute_mode);
4668           }
4669
4670         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4671                                  quotient, remainder, 0))
4672           {
4673             /* This could be computed with a branch-less sequence.
4674                Save that for later.  */
4675             rtx tem;
4676             rtx_code_label *label = gen_label_rtx ();
4677             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4678             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4679                                 NULL_RTX, 0, OPTAB_WIDEN);
4680             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4681             expand_dec (quotient, const1_rtx);
4682             expand_inc (remainder, op1);
4683             emit_label (label);
4684             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4685           }
4686
4687         /* No luck with division elimination or divmod.  Have to do it
4688            by conditionally adjusting op0 *and* the result.  */
4689         {
4690           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4691           rtx adjusted_op0;
4692           rtx tem;
4693
4694           quotient = gen_reg_rtx (compute_mode);
4695           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4696           label1 = gen_label_rtx ();
4697           label2 = gen_label_rtx ();
4698           label3 = gen_label_rtx ();
4699           label4 = gen_label_rtx ();
4700           label5 = gen_label_rtx ();
4701           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4702           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4703           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4704                               quotient, 0, OPTAB_LIB_WIDEN);
4705           if (tem != quotient)
4706             emit_move_insn (quotient, tem);
4707           emit_jump_insn (targetm.gen_jump (label5));
4708           emit_barrier ();
4709           emit_label (label1);
4710           expand_inc (adjusted_op0, const1_rtx);
4711           emit_jump_insn (targetm.gen_jump (label4));
4712           emit_barrier ();
4713           emit_label (label2);
4714           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4715           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4716                               quotient, 0, OPTAB_LIB_WIDEN);
4717           if (tem != quotient)
4718             emit_move_insn (quotient, tem);
4719           emit_jump_insn (targetm.gen_jump (label5));
4720           emit_barrier ();
4721           emit_label (label3);
4722           expand_dec (adjusted_op0, const1_rtx);
4723           emit_label (label4);
4724           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4725                               quotient, 0, OPTAB_LIB_WIDEN);
4726           if (tem != quotient)
4727             emit_move_insn (quotient, tem);
4728           expand_dec (quotient, const1_rtx);
4729           emit_label (label5);
4730         }
4731         break;
4732
4733       case CEIL_DIV_EXPR:
4734       case CEIL_MOD_EXPR:
4735         if (unsignedp)
4736           {
4737             if (op1_is_constant
4738                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4739                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4740                     || INTVAL (op1) >= 0))
4741               {
4742                 scalar_int_mode int_mode
4743                   = as_a <scalar_int_mode> (compute_mode);
4744                 rtx t1, t2, t3;
4745                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4746                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4747                                    floor_log2 (d), tquotient, 1);
4748                 t2 = expand_binop (int_mode, and_optab, op0,
4749                                    gen_int_mode (d - 1, int_mode),
4750                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4751                 t3 = gen_reg_rtx (int_mode);
4752                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4753                 if (t3 == 0)
4754                   {
4755                     rtx_code_label *lab;
4756                     lab = gen_label_rtx ();
4757                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4758                     expand_inc (t1, const1_rtx);
4759                     emit_label (lab);
4760                     quotient = t1;
4761                   }
4762                 else
4763                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4764                                             tquotient);
4765                 break;
4766               }
4767
4768             /* Try using an instruction that produces both the quotient and
4769                remainder, using truncation.  We can easily compensate the
4770                quotient or remainder to get ceiling rounding, once we have the
4771                remainder.  Notice that we compute also the final remainder
4772                value here, and return the result right away.  */
4773             if (target == 0 || GET_MODE (target) != compute_mode)
4774               target = gen_reg_rtx (compute_mode);
4775
4776             if (rem_flag)
4777               {
4778                 remainder = (REG_P (target)
4779                              ? target : gen_reg_rtx (compute_mode));
4780                 quotient = gen_reg_rtx (compute_mode);
4781               }
4782             else
4783               {
4784                 quotient = (REG_P (target)
4785                             ? target : gen_reg_rtx (compute_mode));
4786                 remainder = gen_reg_rtx (compute_mode);
4787               }
4788
4789             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4790                                      remainder, 1))
4791               {
4792                 /* This could be computed with a branch-less sequence.
4793                    Save that for later.  */
4794                 rtx_code_label *label = gen_label_rtx ();
4795                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4796                                  compute_mode, label);
4797                 expand_inc (quotient, const1_rtx);
4798                 expand_dec (remainder, op1);
4799                 emit_label (label);
4800                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4801               }
4802
4803             /* No luck with division elimination or divmod.  Have to do it
4804                by conditionally adjusting op0 *and* the result.  */
4805             {
4806               rtx_code_label *label1, *label2;
4807               rtx adjusted_op0, tem;
4808
4809               quotient = gen_reg_rtx (compute_mode);
4810               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4811               label1 = gen_label_rtx ();
4812               label2 = gen_label_rtx ();
4813               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4814                                compute_mode, label1);
4815               emit_move_insn  (quotient, const0_rtx);
4816               emit_jump_insn (targetm.gen_jump (label2));
4817               emit_barrier ();
4818               emit_label (label1);
4819               expand_dec (adjusted_op0, const1_rtx);
4820               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4821                                   quotient, 1, OPTAB_LIB_WIDEN);
4822               if (tem != quotient)
4823                 emit_move_insn (quotient, tem);
4824               expand_inc (quotient, const1_rtx);
4825               emit_label (label2);
4826             }
4827           }
4828         else /* signed */
4829           {
4830             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4831                 && INTVAL (op1) >= 0)
4832               {
4833                 /* This is extremely similar to the code for the unsigned case
4834                    above.  For 2.7 we should merge these variants, but for
4835                    2.6.1 I don't want to touch the code for unsigned since that
4836                    get used in C.  The signed case will only be used by other
4837                    languages (Ada).  */
4838
4839                 rtx t1, t2, t3;
4840                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4841                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4842                                    floor_log2 (d), tquotient, 0);
4843                 t2 = expand_binop (compute_mode, and_optab, op0,
4844                                    gen_int_mode (d - 1, compute_mode),
4845                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4846                 t3 = gen_reg_rtx (compute_mode);
4847                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4848                                       compute_mode, 1, 1);
4849                 if (t3 == 0)
4850                   {
4851                     rtx_code_label *lab;
4852                     lab = gen_label_rtx ();
4853                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4854                     expand_inc (t1, const1_rtx);
4855                     emit_label (lab);
4856                     quotient = t1;
4857                   }
4858                 else
4859                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4860                                                           t1, t3),
4861                                             tquotient);
4862                 break;
4863               }
4864
4865             /* Try using an instruction that produces both the quotient and
4866                remainder, using truncation.  We can easily compensate the
4867                quotient or remainder to get ceiling rounding, once we have the
4868                remainder.  Notice that we compute also the final remainder
4869                value here, and return the result right away.  */
4870             if (target == 0 || GET_MODE (target) != compute_mode)
4871               target = gen_reg_rtx (compute_mode);
4872             if (rem_flag)
4873               {
4874                 remainder= (REG_P (target)
4875                             ? target : gen_reg_rtx (compute_mode));
4876                 quotient = gen_reg_rtx (compute_mode);
4877               }
4878             else
4879               {
4880                 quotient = (REG_P (target)
4881                             ? target : gen_reg_rtx (compute_mode));
4882                 remainder = gen_reg_rtx (compute_mode);
4883               }
4884
4885             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4886                                      remainder, 0))
4887               {
4888                 /* This could be computed with a branch-less sequence.
4889                    Save that for later.  */
4890                 rtx tem;
4891                 rtx_code_label *label = gen_label_rtx ();
4892                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4893                                  compute_mode, label);
4894                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4895                                     NULL_RTX, 0, OPTAB_WIDEN);
4896                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4897                 expand_inc (quotient, const1_rtx);
4898                 expand_dec (remainder, op1);
4899                 emit_label (label);
4900                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4901               }
4902
4903             /* No luck with division elimination or divmod.  Have to do it
4904                by conditionally adjusting op0 *and* the result.  */
4905             {
4906               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4907               rtx adjusted_op0;
4908               rtx tem;
4909
4910               quotient = gen_reg_rtx (compute_mode);
4911               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4912               label1 = gen_label_rtx ();
4913               label2 = gen_label_rtx ();
4914               label3 = gen_label_rtx ();
4915               label4 = gen_label_rtx ();
4916               label5 = gen_label_rtx ();
4917               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4918               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4919                                compute_mode, label1);
4920               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4921                                   quotient, 0, OPTAB_LIB_WIDEN);
4922               if (tem != quotient)
4923                 emit_move_insn (quotient, tem);
4924               emit_jump_insn (targetm.gen_jump (label5));
4925               emit_barrier ();
4926               emit_label (label1);
4927               expand_dec (adjusted_op0, const1_rtx);
4928               emit_jump_insn (targetm.gen_jump (label4));
4929               emit_barrier ();
4930               emit_label (label2);
4931               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4932                                compute_mode, label3);
4933               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4934                                   quotient, 0, OPTAB_LIB_WIDEN);
4935               if (tem != quotient)
4936                 emit_move_insn (quotient, tem);
4937               emit_jump_insn (targetm.gen_jump (label5));
4938               emit_barrier ();
4939               emit_label (label3);
4940               expand_inc (adjusted_op0, const1_rtx);
4941               emit_label (label4);
4942               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4943                                   quotient, 0, OPTAB_LIB_WIDEN);
4944               if (tem != quotient)
4945                 emit_move_insn (quotient, tem);
4946               expand_inc (quotient, const1_rtx);
4947               emit_label (label5);
4948             }
4949           }
4950         break;
4951
4952       case EXACT_DIV_EXPR:
4953         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4954           {
4955             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4956             int size = GET_MODE_BITSIZE (int_mode);
4957             HOST_WIDE_INT d = INTVAL (op1);
4958             unsigned HOST_WIDE_INT ml;
4959             int pre_shift;
4960             rtx t1;
4961
4962             pre_shift = ctz_or_zero (d);
4963             ml = invert_mod2n (d >> pre_shift, size);
4964             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4965                                pre_shift, NULL_RTX, unsignedp);
4966             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
4967                                     NULL_RTX, 1);
4968
4969             insn = get_last_insn ();
4970             set_dst_reg_note (insn, REG_EQUAL,
4971                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4972                                               int_mode, op0, op1),
4973                               quotient);
4974           }
4975         break;
4976
4977       case ROUND_DIV_EXPR:
4978       case ROUND_MOD_EXPR:
4979         if (unsignedp)
4980           {
4981             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4982             rtx tem;
4983             rtx_code_label *label;
4984             label = gen_label_rtx ();
4985             quotient = gen_reg_rtx (int_mode);
4986             remainder = gen_reg_rtx (int_mode);
4987             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4988               {
4989                 rtx tem;
4990                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
4991                                          quotient, 1, OPTAB_LIB_WIDEN);
4992                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
4993                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
4994                                           remainder, 1, OPTAB_LIB_WIDEN);
4995               }
4996             tem = plus_constant (int_mode, op1, -1);
4997             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
4998             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
4999             expand_inc (quotient, const1_rtx);
5000             expand_dec (remainder, op1);
5001             emit_label (label);
5002           }
5003         else
5004           {
5005             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5006             int size = GET_MODE_BITSIZE (int_mode);
5007             rtx abs_rem, abs_op1, tem, mask;
5008             rtx_code_label *label;
5009             label = gen_label_rtx ();
5010             quotient = gen_reg_rtx (int_mode);
5011             remainder = gen_reg_rtx (int_mode);
5012             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5013               {
5014                 rtx tem;
5015                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5016                                          quotient, 0, OPTAB_LIB_WIDEN);
5017                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5018                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5019                                           remainder, 0, OPTAB_LIB_WIDEN);
5020               }
5021             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5022             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5023             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5024                                 1, NULL_RTX, 1);
5025             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5026             tem = expand_binop (int_mode, xor_optab, op0, op1,
5027                                 NULL_RTX, 0, OPTAB_WIDEN);
5028             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5029                                  size - 1, NULL_RTX, 0);
5030             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5031                                 NULL_RTX, 0, OPTAB_WIDEN);
5032             tem = expand_binop (int_mode, sub_optab, tem, mask,
5033                                 NULL_RTX, 0, OPTAB_WIDEN);
5034             expand_inc (quotient, tem);
5035             tem = expand_binop (int_mode, xor_optab, mask, op1,
5036                                 NULL_RTX, 0, OPTAB_WIDEN);
5037             tem = expand_binop (int_mode, sub_optab, tem, mask,
5038                                 NULL_RTX, 0, OPTAB_WIDEN);
5039             expand_dec (remainder, tem);
5040             emit_label (label);
5041           }
5042         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5043
5044       default:
5045         gcc_unreachable ();
5046       }
5047
5048   if (quotient == 0)
5049     {
5050       if (target && GET_MODE (target) != compute_mode)
5051         target = 0;
5052
5053       if (rem_flag)
5054         {
5055           /* Try to produce the remainder without producing the quotient.
5056              If we seem to have a divmod pattern that does not require widening,
5057              don't try widening here.  We should really have a WIDEN argument
5058              to expand_twoval_binop, since what we'd really like to do here is
5059              1) try a mod insn in compute_mode
5060              2) try a divmod insn in compute_mode
5061              3) try a div insn in compute_mode and multiply-subtract to get
5062                 remainder
5063              4) try the same things with widening allowed.  */
5064           remainder
5065             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5066                                  op0, op1, target,
5067                                  unsignedp,
5068                                  ((optab_handler (optab2, compute_mode)
5069                                    != CODE_FOR_nothing)
5070                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5071           if (remainder == 0)
5072             {
5073               /* No luck there.  Can we do remainder and divide at once
5074                  without a library call?  */
5075               remainder = gen_reg_rtx (compute_mode);
5076               if (! expand_twoval_binop ((unsignedp
5077                                           ? udivmod_optab
5078                                           : sdivmod_optab),
5079                                          op0, op1,
5080                                          NULL_RTX, remainder, unsignedp))
5081                 remainder = 0;
5082             }
5083
5084           if (remainder)
5085             return gen_lowpart (mode, remainder);
5086         }
5087
5088       /* Produce the quotient.  Try a quotient insn, but not a library call.
5089          If we have a divmod in this mode, use it in preference to widening
5090          the div (for this test we assume it will not fail). Note that optab2
5091          is set to the one of the two optabs that the call below will use.  */
5092       quotient
5093         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5094                              op0, op1, rem_flag ? NULL_RTX : target,
5095                              unsignedp,
5096                              ((optab_handler (optab2, compute_mode)
5097                                != CODE_FOR_nothing)
5098                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5099
5100       if (quotient == 0)
5101         {
5102           /* No luck there.  Try a quotient-and-remainder insn,
5103              keeping the quotient alone.  */
5104           quotient = gen_reg_rtx (compute_mode);
5105           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5106                                      op0, op1,
5107                                      quotient, NULL_RTX, unsignedp))
5108             {
5109               quotient = 0;
5110               if (! rem_flag)
5111                 /* Still no luck.  If we are not computing the remainder,
5112                    use a library call for the quotient.  */
5113                 quotient = sign_expand_binop (compute_mode,
5114                                               udiv_optab, sdiv_optab,
5115                                               op0, op1, target,
5116                                               unsignedp, OPTAB_LIB_WIDEN);
5117             }
5118         }
5119     }
5120
5121   if (rem_flag)
5122     {
5123       if (target && GET_MODE (target) != compute_mode)
5124         target = 0;
5125
5126       if (quotient == 0)
5127         {
5128           /* No divide instruction either.  Use library for remainder.  */
5129           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5130                                          op0, op1, target,
5131                                          unsignedp, OPTAB_LIB_WIDEN);
5132           /* No remainder function.  Try a quotient-and-remainder
5133              function, keeping the remainder.  */
5134           if (!remainder)
5135             {
5136               remainder = gen_reg_rtx (compute_mode);
5137               if (!expand_twoval_binop_libfunc
5138                   (unsignedp ? udivmod_optab : sdivmod_optab,
5139                    op0, op1,
5140                    NULL_RTX, remainder,
5141                    unsignedp ? UMOD : MOD))
5142                 remainder = NULL_RTX;
5143             }
5144         }
5145       else
5146         {
5147           /* We divided.  Now finish doing X - Y * (X / Y).  */
5148           remainder = expand_mult (compute_mode, quotient, op1,
5149                                    NULL_RTX, unsignedp);
5150           remainder = expand_binop (compute_mode, sub_optab, op0,
5151                                     remainder, target, unsignedp,
5152                                     OPTAB_LIB_WIDEN);
5153         }
5154     }
5155
5156   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5157 }
5158 \f
5159 /* Return a tree node with data type TYPE, describing the value of X.
5160    Usually this is an VAR_DECL, if there is no obvious better choice.
5161    X may be an expression, however we only support those expressions
5162    generated by loop.c.  */
5163
5164 tree
5165 make_tree (tree type, rtx x)
5166 {
5167   tree t;
5168
5169   switch (GET_CODE (x))
5170     {
5171     case CONST_INT:
5172     case CONST_WIDE_INT:
5173       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5174       return t;
5175
5176     case CONST_DOUBLE:
5177       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5178       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5179         t = wide_int_to_tree (type,
5180                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5181                                                     HOST_BITS_PER_WIDE_INT * 2));
5182       else
5183         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5184
5185       return t;
5186
5187     case CONST_VECTOR:
5188       {
5189         int units = CONST_VECTOR_NUNITS (x);
5190         tree itype = TREE_TYPE (type);
5191         int i;
5192
5193         /* Build a tree with vector elements.  */
5194         tree_vector_builder elts (type, units, 1);
5195         for (i = 0; i < units; ++i)
5196           {
5197             rtx elt = CONST_VECTOR_ELT (x, i);
5198             elts.quick_push (make_tree (itype, elt));
5199           }
5200
5201         return elts.build ();
5202       }
5203
5204     case PLUS:
5205       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5206                           make_tree (type, XEXP (x, 1)));
5207
5208     case MINUS:
5209       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5210                           make_tree (type, XEXP (x, 1)));
5211
5212     case NEG:
5213       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5214
5215     case MULT:
5216       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5217                           make_tree (type, XEXP (x, 1)));
5218
5219     case ASHIFT:
5220       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5221                           make_tree (type, XEXP (x, 1)));
5222
5223     case LSHIFTRT:
5224       t = unsigned_type_for (type);
5225       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5226                                          make_tree (t, XEXP (x, 0)),
5227                                          make_tree (type, XEXP (x, 1))));
5228
5229     case ASHIFTRT:
5230       t = signed_type_for (type);
5231       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5232                                          make_tree (t, XEXP (x, 0)),
5233                                          make_tree (type, XEXP (x, 1))));
5234
5235     case DIV:
5236       if (TREE_CODE (type) != REAL_TYPE)
5237         t = signed_type_for (type);
5238       else
5239         t = type;
5240
5241       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5242                                          make_tree (t, XEXP (x, 0)),
5243                                          make_tree (t, XEXP (x, 1))));
5244     case UDIV:
5245       t = unsigned_type_for (type);
5246       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5247                                          make_tree (t, XEXP (x, 0)),
5248                                          make_tree (t, XEXP (x, 1))));
5249
5250     case SIGN_EXTEND:
5251     case ZERO_EXTEND:
5252       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5253                                           GET_CODE (x) == ZERO_EXTEND);
5254       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5255
5256     case CONST:
5257       {
5258         rtx op = XEXP (x, 0);
5259         if (GET_CODE (op) == VEC_DUPLICATE)
5260           {
5261             tree elt_tree = make_tree (TREE_TYPE (type), XEXP (op, 0));
5262             return build_vector_from_val (type, elt_tree);
5263           }
5264         if (GET_CODE (op) == VEC_SERIES)
5265           {
5266             tree itype = TREE_TYPE (type);
5267             tree base_tree = make_tree (itype, XEXP (op, 0));
5268             tree step_tree = make_tree (itype, XEXP (op, 1));
5269             return build_vec_series (type, base_tree, step_tree);
5270           }
5271         return make_tree (type, op);
5272       }
5273
5274     case SYMBOL_REF:
5275       t = SYMBOL_REF_DECL (x);
5276       if (t)
5277         return fold_convert (type, build_fold_addr_expr (t));
5278       /* fall through.  */
5279
5280     default:
5281       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5282
5283       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5284          address mode to pointer mode.  */
5285       if (POINTER_TYPE_P (type))
5286         x = convert_memory_address_addr_space
5287           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5288
5289       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5290          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5291       t->decl_with_rtl.rtl = x;
5292
5293       return t;
5294     }
5295 }
5296 \f
5297 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5298    and returning TARGET.
5299
5300    If TARGET is 0, a pseudo-register or constant is returned.  */
5301
5302 rtx
5303 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5304 {
5305   rtx tem = 0;
5306
5307   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5308     tem = simplify_binary_operation (AND, mode, op0, op1);
5309   if (tem == 0)
5310     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5311
5312   if (target == 0)
5313     target = tem;
5314   else if (tem != target)
5315     emit_move_insn (target, tem);
5316   return target;
5317 }
5318
5319 /* Helper function for emit_store_flag.  */
5320 rtx
5321 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5322              machine_mode mode, machine_mode compare_mode,
5323              int unsignedp, rtx x, rtx y, int normalizep,
5324              machine_mode target_mode)
5325 {
5326   struct expand_operand ops[4];
5327   rtx op0, comparison, subtarget;
5328   rtx_insn *last;
5329   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5330   scalar_int_mode int_target_mode;
5331
5332   last = get_last_insn ();
5333   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5334   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5335   if (!x || !y)
5336     {
5337       delete_insns_since (last);
5338       return NULL_RTX;
5339     }
5340
5341   if (target_mode == VOIDmode)
5342     int_target_mode = result_mode;
5343   else
5344     int_target_mode = as_a <scalar_int_mode> (target_mode);
5345   if (!target)
5346     target = gen_reg_rtx (int_target_mode);
5347
5348   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5349
5350   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5351   create_fixed_operand (&ops[1], comparison);
5352   create_fixed_operand (&ops[2], x);
5353   create_fixed_operand (&ops[3], y);
5354   if (!maybe_expand_insn (icode, 4, ops))
5355     {
5356       delete_insns_since (last);
5357       return NULL_RTX;
5358     }
5359   subtarget = ops[0].value;
5360
5361   /* If we are converting to a wider mode, first convert to
5362      INT_TARGET_MODE, then normalize.  This produces better combining
5363      opportunities on machines that have a SIGN_EXTRACT when we are
5364      testing a single bit.  This mostly benefits the 68k.
5365
5366      If STORE_FLAG_VALUE does not have the sign bit set when
5367      interpreted in MODE, we can do this conversion as unsigned, which
5368      is usually more efficient.  */
5369   if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5370     {
5371       convert_move (target, subtarget,
5372                     val_signbit_known_clear_p (result_mode,
5373                                                STORE_FLAG_VALUE));
5374       op0 = target;
5375       result_mode = int_target_mode;
5376     }
5377   else
5378     op0 = subtarget;
5379
5380   /* If we want to keep subexpressions around, don't reuse our last
5381      target.  */
5382   if (optimize)
5383     subtarget = 0;
5384
5385   /* Now normalize to the proper value in MODE.  Sometimes we don't
5386      have to do anything.  */
5387   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5388     ;
5389   /* STORE_FLAG_VALUE might be the most negative number, so write
5390      the comparison this way to avoid a compiler-time warning.  */
5391   else if (- normalizep == STORE_FLAG_VALUE)
5392     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5393
5394   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5395      it hard to use a value of just the sign bit due to ANSI integer
5396      constant typing rules.  */
5397   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5398     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5399                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5400                         normalizep == 1);
5401   else
5402     {
5403       gcc_assert (STORE_FLAG_VALUE & 1);
5404
5405       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5406       if (normalizep == -1)
5407         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5408     }
5409
5410   /* If we were converting to a smaller mode, do the conversion now.  */
5411   if (int_target_mode != result_mode)
5412     {
5413       convert_move (target, op0, 0);
5414       return target;
5415     }
5416   else
5417     return op0;
5418 }
5419
5420
5421 /* A subroutine of emit_store_flag only including "tricks" that do not
5422    need a recursive call.  These are kept separate to avoid infinite
5423    loops.  */
5424
5425 static rtx
5426 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5427                    machine_mode mode, int unsignedp, int normalizep,
5428                    machine_mode target_mode)
5429 {
5430   rtx subtarget;
5431   enum insn_code icode;
5432   machine_mode compare_mode;
5433   enum mode_class mclass;
5434   enum rtx_code scode;
5435
5436   if (unsignedp)
5437     code = unsigned_condition (code);
5438   scode = swap_condition (code);
5439
5440   /* If one operand is constant, make it the second one.  Only do this
5441      if the other operand is not constant as well.  */
5442
5443   if (swap_commutative_operands_p (op0, op1))
5444     {
5445       std::swap (op0, op1);
5446       code = swap_condition (code);
5447     }
5448
5449   if (mode == VOIDmode)
5450     mode = GET_MODE (op0);
5451
5452   /* For some comparisons with 1 and -1, we can convert this to
5453      comparisons with zero.  This will often produce more opportunities for
5454      store-flag insns.  */
5455
5456   switch (code)
5457     {
5458     case LT:
5459       if (op1 == const1_rtx)
5460         op1 = const0_rtx, code = LE;
5461       break;
5462     case LE:
5463       if (op1 == constm1_rtx)
5464         op1 = const0_rtx, code = LT;
5465       break;
5466     case GE:
5467       if (op1 == const1_rtx)
5468         op1 = const0_rtx, code = GT;
5469       break;
5470     case GT:
5471       if (op1 == constm1_rtx)
5472         op1 = const0_rtx, code = GE;
5473       break;
5474     case GEU:
5475       if (op1 == const1_rtx)
5476         op1 = const0_rtx, code = NE;
5477       break;
5478     case LTU:
5479       if (op1 == const1_rtx)
5480         op1 = const0_rtx, code = EQ;
5481       break;
5482     default:
5483       break;
5484     }
5485
5486   /* If we are comparing a double-word integer with zero or -1, we can
5487      convert the comparison into one involving a single word.  */
5488   scalar_int_mode int_mode;
5489   if (is_int_mode (mode, &int_mode)
5490       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5491       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5492     {
5493       rtx tem;
5494       if ((code == EQ || code == NE)
5495           && (op1 == const0_rtx || op1 == constm1_rtx))
5496         {
5497           rtx op00, op01;
5498
5499           /* Do a logical OR or AND of the two words and compare the
5500              result.  */
5501           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5502           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5503           tem = expand_binop (word_mode,
5504                               op1 == const0_rtx ? ior_optab : and_optab,
5505                               op00, op01, NULL_RTX, unsignedp,
5506                               OPTAB_DIRECT);
5507
5508           if (tem != 0)
5509             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5510                                    unsignedp, normalizep);
5511         }
5512       else if ((code == LT || code == GE) && op1 == const0_rtx)
5513         {
5514           rtx op0h;
5515
5516           /* If testing the sign bit, can just test on high word.  */
5517           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5518                                       subreg_highpart_offset (word_mode,
5519                                                               int_mode));
5520           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5521                                  unsignedp, normalizep);
5522         }
5523       else
5524         tem = NULL_RTX;
5525
5526       if (tem)
5527         {
5528           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5529             return tem;
5530           if (!target)
5531             target = gen_reg_rtx (target_mode);
5532
5533           convert_move (target, tem,
5534                         !val_signbit_known_set_p (word_mode,
5535                                                   (normalizep ? normalizep
5536                                                    : STORE_FLAG_VALUE)));
5537           return target;
5538         }
5539     }
5540
5541   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5542      complement of A (for GE) and shifting the sign bit to the low bit.  */
5543   if (op1 == const0_rtx && (code == LT || code == GE)
5544       && is_int_mode (mode, &int_mode)
5545       && (normalizep || STORE_FLAG_VALUE == 1
5546           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5547     {
5548       scalar_int_mode int_target_mode;
5549       subtarget = target;
5550
5551       if (!target)
5552         int_target_mode = int_mode;
5553       else
5554         {
5555           /* If the result is to be wider than OP0, it is best to convert it
5556              first.  If it is to be narrower, it is *incorrect* to convert it
5557              first.  */
5558           int_target_mode = as_a <scalar_int_mode> (target_mode);
5559           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5560             {
5561               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5562               int_mode = int_target_mode;
5563             }
5564         }
5565
5566       if (int_target_mode != int_mode)
5567         subtarget = 0;
5568
5569       if (code == GE)
5570         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5571                            ((STORE_FLAG_VALUE == 1 || normalizep)
5572                             ? 0 : subtarget), 0);
5573
5574       if (STORE_FLAG_VALUE == 1 || normalizep)
5575         /* If we are supposed to produce a 0/1 value, we want to do
5576            a logical shift from the sign bit to the low-order bit; for
5577            a -1/0 value, we do an arithmetic shift.  */
5578         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5579                             GET_MODE_BITSIZE (int_mode) - 1,
5580                             subtarget, normalizep != -1);
5581
5582       if (int_mode != int_target_mode)
5583         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5584
5585       return op0;
5586     }
5587
5588   mclass = GET_MODE_CLASS (mode);
5589   FOR_EACH_MODE_FROM (compare_mode, mode)
5590     {
5591      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5592      icode = optab_handler (cstore_optab, optab_mode);
5593      if (icode != CODE_FOR_nothing)
5594         {
5595           do_pending_stack_adjust ();
5596           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5597                                  unsignedp, op0, op1, normalizep, target_mode);
5598           if (tem)
5599             return tem;
5600
5601           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5602             {
5603               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5604                                  unsignedp, op1, op0, normalizep, target_mode);
5605               if (tem)
5606                 return tem;
5607             }
5608           break;
5609         }
5610     }
5611
5612   return 0;
5613 }
5614
5615 /* Subroutine of emit_store_flag that handles cases in which the operands
5616    are scalar integers.  SUBTARGET is the target to use for temporary
5617    operations and TRUEVAL is the value to store when the condition is
5618    true.  All other arguments are as for emit_store_flag.  */
5619
5620 rtx
5621 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5622                      rtx op1, scalar_int_mode mode, int unsignedp,
5623                      int normalizep, rtx trueval)
5624 {
5625   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5626   rtx_insn *last = get_last_insn ();
5627
5628   /* If this is an equality comparison of integers, we can try to exclusive-or
5629      (or subtract) the two operands and use a recursive call to try the
5630      comparison with zero.  Don't do any of these cases if branches are
5631      very cheap.  */
5632
5633   if ((code == EQ || code == NE) && op1 != const0_rtx)
5634     {
5635       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5636                               OPTAB_WIDEN);
5637
5638       if (tem == 0)
5639         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5640                             OPTAB_WIDEN);
5641       if (tem != 0)
5642         tem = emit_store_flag (target, code, tem, const0_rtx,
5643                                mode, unsignedp, normalizep);
5644       if (tem != 0)
5645         return tem;
5646
5647       delete_insns_since (last);
5648     }
5649
5650   /* For integer comparisons, try the reverse comparison.  However, for
5651      small X and if we'd have anyway to extend, implementing "X != 0"
5652      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5653   rtx_code rcode = reverse_condition (code);
5654   if (can_compare_p (rcode, mode, ccp_store_flag)
5655       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5656             && code == NE
5657             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5658             && op1 == const0_rtx))
5659     {
5660       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5661                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5662
5663       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5664       if (want_add
5665           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5666                        optimize_insn_for_speed_p ()) == 0)
5667         {
5668           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5669                                        STORE_FLAG_VALUE, target_mode);
5670           if (tem != 0)
5671             tem = expand_binop (target_mode, add_optab, tem,
5672                                 gen_int_mode (normalizep, target_mode),
5673                                 target, 0, OPTAB_WIDEN);
5674           if (tem != 0)
5675             return tem;
5676         }
5677       else if (!want_add
5678                && rtx_cost (trueval, mode, XOR, 1,
5679                             optimize_insn_for_speed_p ()) == 0)
5680         {
5681           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5682                                        normalizep, target_mode);
5683           if (tem != 0)
5684             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5685                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5686           if (tem != 0)
5687             return tem;
5688         }
5689
5690       delete_insns_since (last);
5691     }
5692
5693   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5694      the constant zero.  Reject all other comparisons at this point.  Only
5695      do LE and GT if branches are expensive since they are expensive on
5696      2-operand machines.  */
5697
5698   if (op1 != const0_rtx
5699       || (code != EQ && code != NE
5700           && (BRANCH_COST (optimize_insn_for_speed_p (),
5701                            false) <= 1 || (code != LE && code != GT))))
5702     return 0;
5703
5704   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5705      do the necessary operation below.  */
5706
5707   rtx tem = 0;
5708
5709   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5710      the sign bit set.  */
5711
5712   if (code == LE)
5713     {
5714       /* This is destructive, so SUBTARGET can't be OP0.  */
5715       if (rtx_equal_p (subtarget, op0))
5716         subtarget = 0;
5717
5718       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5719                           OPTAB_WIDEN);
5720       if (tem)
5721         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5722                             OPTAB_WIDEN);
5723     }
5724
5725   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5726      number of bits in the mode of OP0, minus one.  */
5727
5728   if (code == GT)
5729     {
5730       if (rtx_equal_p (subtarget, op0))
5731         subtarget = 0;
5732
5733       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5734                                 GET_MODE_BITSIZE (mode) - 1,
5735                                 subtarget, 0);
5736       if (tem)
5737         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5738                             OPTAB_WIDEN);
5739     }
5740
5741   if (code == EQ || code == NE)
5742     {
5743       /* For EQ or NE, one way to do the comparison is to apply an operation
5744          that converts the operand into a positive number if it is nonzero
5745          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5746          for NE we negate.  This puts the result in the sign bit.  Then we
5747          normalize with a shift, if needed.
5748
5749          Two operations that can do the above actions are ABS and FFS, so try
5750          them.  If that doesn't work, and MODE is smaller than a full word,
5751          we can use zero-extension to the wider mode (an unsigned conversion)
5752          as the operation.  */
5753
5754       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5755          that is compensated by the subsequent overflow when subtracting
5756          one / negating.  */
5757
5758       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5759         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5760       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5761         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5762       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5763         {
5764           tem = convert_modes (word_mode, mode, op0, 1);
5765           mode = word_mode;
5766         }
5767
5768       if (tem != 0)
5769         {
5770           if (code == EQ)
5771             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5772                                 0, OPTAB_WIDEN);
5773           else
5774             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5775         }
5776
5777       /* If we couldn't do it that way, for NE we can "or" the two's complement
5778          of the value with itself.  For EQ, we take the one's complement of
5779          that "or", which is an extra insn, so we only handle EQ if branches
5780          are expensive.  */
5781
5782       if (tem == 0
5783           && (code == NE
5784               || BRANCH_COST (optimize_insn_for_speed_p (),
5785                               false) > 1))
5786         {
5787           if (rtx_equal_p (subtarget, op0))
5788             subtarget = 0;
5789
5790           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5791           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5792                               OPTAB_WIDEN);
5793
5794           if (tem && code == EQ)
5795             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5796         }
5797     }
5798
5799   if (tem && normalizep)
5800     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5801                               GET_MODE_BITSIZE (mode) - 1,
5802                               subtarget, normalizep == 1);
5803
5804   if (tem)
5805     {
5806       if (!target)
5807         ;
5808       else if (GET_MODE (tem) != target_mode)
5809         {
5810           convert_move (target, tem, 0);
5811           tem = target;
5812         }
5813       else if (!subtarget)
5814         {
5815           emit_move_insn (target, tem);
5816           tem = target;
5817         }
5818     }
5819   else
5820     delete_insns_since (last);
5821
5822   return tem;
5823 }
5824
5825 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5826    and storing in TARGET.  Normally return TARGET.
5827    Return 0 if that cannot be done.
5828
5829    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5830    it is VOIDmode, they cannot both be CONST_INT.
5831
5832    UNSIGNEDP is for the case where we have to widen the operands
5833    to perform the operation.  It says to use zero-extension.
5834
5835    NORMALIZEP is 1 if we should convert the result to be either zero
5836    or one.  Normalize is -1 if we should convert the result to be
5837    either zero or -1.  If NORMALIZEP is zero, the result will be left
5838    "raw" out of the scc insn.  */
5839
5840 rtx
5841 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5842                  machine_mode mode, int unsignedp, int normalizep)
5843 {
5844   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5845   enum rtx_code rcode;
5846   rtx subtarget;
5847   rtx tem, trueval;
5848   rtx_insn *last;
5849
5850   /* If we compare constants, we shouldn't use a store-flag operation,
5851      but a constant load.  We can get there via the vanilla route that
5852      usually generates a compare-branch sequence, but will in this case
5853      fold the comparison to a constant, and thus elide the branch.  */
5854   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5855     return NULL_RTX;
5856
5857   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5858                            target_mode);
5859   if (tem)
5860     return tem;
5861
5862   /* If we reached here, we can't do this with a scc insn, however there
5863      are some comparisons that can be done in other ways.  Don't do any
5864      of these cases if branches are very cheap.  */
5865   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5866     return 0;
5867
5868   /* See what we need to return.  We can only return a 1, -1, or the
5869      sign bit.  */
5870
5871   if (normalizep == 0)
5872     {
5873       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5874         normalizep = STORE_FLAG_VALUE;
5875
5876       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5877         ;
5878       else
5879         return 0;
5880     }
5881
5882   last = get_last_insn ();
5883
5884   /* If optimizing, use different pseudo registers for each insn, instead
5885      of reusing the same pseudo.  This leads to better CSE, but slows
5886      down the compiler, since there are more pseudos.  */
5887   subtarget = (!optimize
5888                && (target_mode == mode)) ? target : NULL_RTX;
5889   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5890
5891   /* For floating-point comparisons, try the reverse comparison or try
5892      changing the "orderedness" of the comparison.  */
5893   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5894     {
5895       enum rtx_code first_code;
5896       bool and_them;
5897
5898       rcode = reverse_condition_maybe_unordered (code);
5899       if (can_compare_p (rcode, mode, ccp_store_flag)
5900           && (code == ORDERED || code == UNORDERED
5901               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5902               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5903         {
5904           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5905                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5906
5907           /* For the reverse comparison, use either an addition or a XOR.  */
5908           if (want_add
5909               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5910                            optimize_insn_for_speed_p ()) == 0)
5911             {
5912               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5913                                        STORE_FLAG_VALUE, target_mode);
5914               if (tem)
5915                 return expand_binop (target_mode, add_optab, tem,
5916                                      gen_int_mode (normalizep, target_mode),
5917                                      target, 0, OPTAB_WIDEN);
5918             }
5919           else if (!want_add
5920                    && rtx_cost (trueval, mode, XOR, 1,
5921                                 optimize_insn_for_speed_p ()) == 0)
5922             {
5923               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5924                                        normalizep, target_mode);
5925               if (tem)
5926                 return expand_binop (target_mode, xor_optab, tem, trueval,
5927                                      target, INTVAL (trueval) >= 0,
5928                                      OPTAB_WIDEN);
5929             }
5930         }
5931
5932       delete_insns_since (last);
5933
5934       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
5935       if (code == ORDERED || code == UNORDERED)
5936         return 0;
5937
5938       and_them = split_comparison (code, mode, &first_code, &code);
5939
5940       /* If there are no NaNs, the first comparison should always fall through.
5941          Effectively change the comparison to the other one.  */
5942       if (!HONOR_NANS (mode))
5943         {
5944           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5945           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5946                                     target_mode);
5947         }
5948
5949       if (!HAVE_conditional_move)
5950         return 0;
5951
5952       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5953          conditional move.  */
5954       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5955                                normalizep, target_mode);
5956       if (tem == 0)
5957         return 0;
5958
5959       if (and_them)
5960         tem = emit_conditional_move (target, code, op0, op1, mode,
5961                                      tem, const0_rtx, GET_MODE (tem), 0);
5962       else
5963         tem = emit_conditional_move (target, code, op0, op1, mode,
5964                                      trueval, tem, GET_MODE (tem), 0);
5965
5966       if (tem == 0)
5967         delete_insns_since (last);
5968       return tem;
5969     }
5970
5971   /* The remaining tricks only apply to integer comparisons.  */
5972
5973   scalar_int_mode int_mode;
5974   if (is_int_mode (mode, &int_mode))
5975     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
5976                                 unsignedp, normalizep, trueval);
5977
5978   return 0;
5979 }
5980
5981 /* Like emit_store_flag, but always succeeds.  */
5982
5983 rtx
5984 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5985                        machine_mode mode, int unsignedp, int normalizep)
5986 {
5987   rtx tem;
5988   rtx_code_label *label;
5989   rtx trueval, falseval;
5990
5991   /* First see if emit_store_flag can do the job.  */
5992   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5993   if (tem != 0)
5994     return tem;
5995
5996   if (!target)
5997     target = gen_reg_rtx (word_mode);
5998
5999   /* If this failed, we have to do this with set/compare/jump/set code.
6000      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6001   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6002   if (code == NE
6003       && GET_MODE_CLASS (mode) == MODE_INT
6004       && REG_P (target)
6005       && op0 == target
6006       && op1 == const0_rtx)
6007     {
6008       label = gen_label_rtx ();
6009       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6010                                NULL_RTX, NULL, label,
6011                                profile_probability::uninitialized ());
6012       emit_move_insn (target, trueval);
6013       emit_label (label);
6014       return target;
6015     }
6016
6017   if (!REG_P (target)
6018       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6019     target = gen_reg_rtx (GET_MODE (target));
6020
6021   /* Jump in the right direction if the target cannot implement CODE
6022      but can jump on its reverse condition.  */
6023   falseval = const0_rtx;
6024   if (! can_compare_p (code, mode, ccp_jump)
6025       && (! FLOAT_MODE_P (mode)
6026           || code == ORDERED || code == UNORDERED
6027           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6028           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6029     {
6030       enum rtx_code rcode;
6031       if (FLOAT_MODE_P (mode))
6032         rcode = reverse_condition_maybe_unordered (code);
6033       else
6034         rcode = reverse_condition (code);
6035
6036       /* Canonicalize to UNORDERED for the libcall.  */
6037       if (can_compare_p (rcode, mode, ccp_jump)
6038           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6039         {
6040           falseval = trueval;
6041           trueval = const0_rtx;
6042           code = rcode;
6043         }
6044     }
6045
6046   emit_move_insn (target, trueval);
6047   label = gen_label_rtx ();
6048   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6049                            label, profile_probability::uninitialized ());
6050
6051   emit_move_insn (target, falseval);
6052   emit_label (label);
6053
6054   return target;
6055 }
6056 \f
6057 /* Perform possibly multi-word comparison and conditional jump to LABEL
6058    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6059    now a thin wrapper around do_compare_rtx_and_jump.  */
6060
6061 static void
6062 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6063                  rtx_code_label *label)
6064 {
6065   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6066   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6067                            NULL, label, profile_probability::uninitialized ());
6068 }