gcc/lower-subreg.c

   1 /* Decompose multiword subregs.
   2    Copyright (C) 2007-2015 Free Software Foundation, Inc.
   3    Contributed by Richard Henderson <rth@redhat.com>
   4                   Ian Lance Taylor <iant@google.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "cfghooks.h"
  27 #include "tree.h"
  28 #include "rtl.h"
  29 #include "df.h"
  30 #include "alias.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "obstack.h"
  35 #include "cfgrtl.h"
  36 #include "cfgbuild.h"
  37 #include "recog.h"
  38 #include "dce.h"
  39 #include "expmed.h"
  40 #include "dojump.h"
  41 #include "explow.h"
  42 #include "calls.h"
  43 #include "emit-rtl.h"
  44 #include "varasm.h"
  45 #include "stmt.h"
  46 #include "expr.h"
  47 #include "except.h"
  48 #include "regs.h"
  49 #include "tree-pass.h"
  50 #include "lower-subreg.h"
  51 #include "rtl-iter.h"
  52
  53
  54 /* Decompose multi-word pseudo-registers into individual
  55    pseudo-registers when possible and profitable.  This is possible
  56    when all the uses of a multi-word register are via SUBREG, or are
  57    copies of the register to another location.  Breaking apart the
  58    register permits more CSE and permits better register allocation.
  59    This is profitable if the machine does not have move instructions
  60    to do this.
  61
  62    This pass only splits moves with modes that are wider than
  63    word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
  64    integer modes that are twice the width of word_mode.  The latter
  65    could be generalized if there was a need to do this, but the trend in
  66    architectures is to not need this.
  67
  68    There are two useful preprocessor defines for use by maintainers:
  69
  70    #define LOG_COSTS 1
  71
  72    if you wish to see the actual cost estimates that are being used
  73    for each mode wider than word mode and the cost estimates for zero
  74    extension and the shifts.   This can be useful when port maintainers
  75    are tuning insn rtx costs.
  76
  77    #define FORCE_LOWERING 1
  78
  79    if you wish to test the pass with all the transformation forced on.
  80    This can be useful for finding bugs in the transformations.  */
  81
  82 #define LOG_COSTS 0
  83 #define FORCE_LOWERING 0
  84
  85 /* Bit N in this bitmap is set if regno N is used in a context in
  86    which we can decompose it.  */
  87 static bitmap decomposable_context;
  88
  89 /* Bit N in this bitmap is set if regno N is used in a context in
  90    which it can not be decomposed.  */
  91 static bitmap non_decomposable_context;
  92
  93 /* Bit N in this bitmap is set if regno N is used in a subreg
  94    which changes the mode but not the size.  This typically happens
  95    when the register accessed as a floating-point value; we want to
  96    avoid generating accesses to its subwords in integer modes.  */
  97 static bitmap subreg_context;
  98
  99 /* Bit N in the bitmap in element M of this array is set if there is a
 100    copy from reg M to reg N.  */
 101 static vec<bitmap> reg_copy_graph;
 102
 103 struct target_lower_subreg default_target_lower_subreg;
 104 #if SWITCHABLE_TARGET
 105 struct target_lower_subreg *this_target_lower_subreg
 106   = &default_target_lower_subreg;
 107 #endif
 108
 109 #define twice_word_mode \
 110   this_target_lower_subreg->x_twice_word_mode
 111 #define choices \
 112   this_target_lower_subreg->x_choices
 113
 114 /* RTXes used while computing costs.  */
 115 struct cost_rtxes {
 116   /* Source and target registers.  */
 117   rtx source;
 118   rtx target;
 119
 120   /* A twice_word_mode ZERO_EXTEND of SOURCE.  */
 121   rtx zext;
 122
 123   /* A shift of SOURCE.  */
 124   rtx shift;
 125
 126   /* A SET of TARGET.  */
 127   rtx set;
 128 };
 129
 130 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
 131    rtxes in RTXES.  SPEED_P selects between the speed and size cost.  */
 132
 133 static int
 134 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
 135             machine_mode mode, int op1)
 136 {
 137   PUT_CODE (rtxes->shift, code);
 138   PUT_MODE (rtxes->shift, mode);
 139   PUT_MODE (rtxes->source, mode);
 140   XEXP (rtxes->shift, 1) = GEN_INT (op1);
 141   return set_src_cost (rtxes->shift, mode, speed_p);
 142 }
 143
 144 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
 145    to true if it is profitable to split a double-word CODE shift
 146    of X + BITS_PER_WORD bits.  SPEED_P says whether we are testing
 147    for speed or size profitability.
 148
 149    Use the rtxes in RTXES to calculate costs.  WORD_MOVE_ZERO_COST is
 150    the cost of moving zero into a word-mode register.  WORD_MOVE_COST
 151    is the cost of moving between word registers.  */
 152
 153 static void
 154 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
 155                          bool *splitting, enum rtx_code code,
 156                          int word_move_zero_cost, int word_move_cost)
 157 {
 158   int wide_cost, narrow_cost, upper_cost, i;
 159
 160   for (i = 0; i < BITS_PER_WORD; i++)
 161     {
 162       wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
 163                               i + BITS_PER_WORD);
 164       if (i == 0)
 165         narrow_cost = word_move_cost;
 166       else
 167         narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
 168
 169       if (code != ASHIFTRT)
 170         upper_cost = word_move_zero_cost;
 171       else if (i == BITS_PER_WORD - 1)
 172         upper_cost = word_move_cost;
 173       else
 174         upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
 175                                  BITS_PER_WORD - 1);
 176
 177       if (LOG_COSTS)
 178         fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
 179                  GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
 180                  i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
 181
 182       if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
 183         splitting[i] = true;
 184     }
 185 }
 186
 187 /* Compute what we should do when optimizing for speed or size; SPEED_P
 188    selects which.  Use RTXES for computing costs.  */
 189
 190 static void
 191 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
 192 {
 193   unsigned int i;
 194   int word_move_zero_cost, word_move_cost;
 195
 196   PUT_MODE (rtxes->target, word_mode);
 197   SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
 198   word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
 199
 200   SET_SRC (rtxes->set) = rtxes->source;
 201   word_move_cost = set_rtx_cost (rtxes->set, speed_p);
 202
 203   if (LOG_COSTS)
 204     fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
 205              GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
 206
 207   for (i = 0; i < MAX_MACHINE_MODE; i++)
 208     {
 209       machine_mode mode = (machine_mode) i;
 210       int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
 211       if (factor > 1)
 212         {
 213           int mode_move_cost;
 214
 215           PUT_MODE (rtxes->target, mode);
 216           PUT_MODE (rtxes->source, mode);
 217           mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
 218
 219           if (LOG_COSTS)
 220             fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
 221                      GET_MODE_NAME (mode), mode_move_cost,
 222                      word_move_cost, factor);
 223
 224           if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
 225             {
 226               choices[speed_p].move_modes_to_split[i] = true;
 227               choices[speed_p].something_to_do = true;
 228             }
 229         }
 230     }
 231
 232   /* For the moves and shifts, the only case that is checked is one
 233      where the mode of the target is an integer mode twice the width
 234      of the word_mode.
 235
 236      If it is not profitable to split a double word move then do not
 237      even consider the shifts or the zero extension.  */
 238   if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
 239     {
 240       int zext_cost;
 241
 242       /* The only case here to check to see if moving the upper part with a
 243          zero is cheaper than doing the zext itself.  */
 244       PUT_MODE (rtxes->source, word_mode);
 245       zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
 246
 247       if (LOG_COSTS)
 248         fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
 249                  GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
 250                  zext_cost, word_move_cost, word_move_zero_cost);
 251
 252       if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
 253         choices[speed_p].splitting_zext = true;
 254
 255       compute_splitting_shift (speed_p, rtxes,
 256                                choices[speed_p].splitting_ashift, ASHIFT,
 257                                word_move_zero_cost, word_move_cost);
 258       compute_splitting_shift (speed_p, rtxes,
 259                                choices[speed_p].splitting_lshiftrt, LSHIFTRT,
 260                                word_move_zero_cost, word_move_cost);
 261       compute_splitting_shift (speed_p, rtxes,
 262                                choices[speed_p].splitting_ashiftrt, ASHIFTRT,
 263                                word_move_zero_cost, word_move_cost);
 264     }
 265 }
 266
 267 /* Do one-per-target initialisation.  This involves determining
 268    which operations on the machine are profitable.  If none are found,
 269    then the pass just returns when called.  */
 270
 271 void
 272 init_lower_subreg (void)
 273 {
 274   struct cost_rtxes rtxes;
 275
 276   memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
 277
 278   twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
 279
 280   rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
 281   rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
 282   rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
 283   rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
 284   rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
 285
 286   if (LOG_COSTS)
 287     fprintf (stderr, "\nSize costs\n==========\n\n");
 288   compute_costs (false, &rtxes);
 289
 290   if (LOG_COSTS)
 291     fprintf (stderr, "\nSpeed costs\n===========\n\n");
 292   compute_costs (true, &rtxes);
 293 }
 294
 295 static bool
 296 simple_move_operand (rtx x)
 297 {
 298   if (GET_CODE (x) == SUBREG)
 299     x = SUBREG_REG (x);
 300
 301   if (!OBJECT_P (x))
 302     return false;
 303
 304   if (GET_CODE (x) == LABEL_REF
 305       || GET_CODE (x) == SYMBOL_REF
 306       || GET_CODE (x) == HIGH
 307       || GET_CODE (x) == CONST)
 308     return false;
 309
 310   if (MEM_P (x)
 311       && (MEM_VOLATILE_P (x)
 312           || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
 313     return false;
 314
 315   return true;
 316 }
 317
 318 /* If INSN is a single set between two objects that we want to split,
 319    return the single set.  SPEED_P says whether we are optimizing
 320    INSN for speed or size.
 321
 322    INSN should have been passed to recog and extract_insn before this
 323    is called.  */
 324
 325 static rtx
 326 simple_move (rtx_insn *insn, bool speed_p)
 327 {
 328   rtx x;
 329   rtx set;
 330   machine_mode mode;
 331
 332   if (recog_data.n_operands != 2)
 333     return NULL_RTX;
 334
 335   set = single_set (insn);
 336   if (!set)
 337     return NULL_RTX;
 338
 339   x = SET_DEST (set);
 340   if (x != recog_data.operand[0] && x != recog_data.operand[1])
 341     return NULL_RTX;
 342   if (!simple_move_operand (x))
 343     return NULL_RTX;
 344
 345   x = SET_SRC (set);
 346   if (x != recog_data.operand[0] && x != recog_data.operand[1])
 347     return NULL_RTX;
 348   /* For the src we can handle ASM_OPERANDS, and it is beneficial for
 349      things like x86 rdtsc which returns a DImode value.  */
 350   if (GET_CODE (x) != ASM_OPERANDS
 351       && !simple_move_operand (x))
 352     return NULL_RTX;
 353
 354   /* We try to decompose in integer modes, to avoid generating
 355      inefficient code copying between integer and floating point
 356      registers.  That means that we can't decompose if this is a
 357      non-integer mode for which there is no integer mode of the same
 358      size.  */
 359   mode = GET_MODE (SET_DEST (set));
 360   if (!SCALAR_INT_MODE_P (mode)
 361       && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
 362           == BLKmode))
 363     return NULL_RTX;
 364
 365   /* Reject PARTIAL_INT modes.  They are used for processor specific
 366      purposes and it's probably best not to tamper with them.  */
 367   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
 368     return NULL_RTX;
 369
 370   if (!choices[speed_p].move_modes_to_split[(int) mode])
 371     return NULL_RTX;
 372
 373   return set;
 374 }
 375
 376 /* If SET is a copy from one multi-word pseudo-register to another,
 377    record that in reg_copy_graph.  Return whether it is such a
 378    copy.  */
 379
 380 static bool
 381 find_pseudo_copy (rtx set)
 382 {
 383   rtx dest = SET_DEST (set);
 384   rtx src = SET_SRC (set);
 385   unsigned int rd, rs;
 386   bitmap b;
 387
 388   if (!REG_P (dest) || !REG_P (src))
 389     return false;
 390
 391   rd = REGNO (dest);
 392   rs = REGNO (src);
 393   if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
 394     return false;
 395
 396   b = reg_copy_graph[rs];
 397   if (b == NULL)
 398     {
 399       b = BITMAP_ALLOC (NULL);
 400       reg_copy_graph[rs] = b;
 401     }
 402
 403   bitmap_set_bit (b, rd);
 404
 405   return true;
 406 }
 407
 408 /* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
 409    where they are copied to another register, add the register to
 410    which they are copied to DECOMPOSABLE_CONTEXT.  Use
 411    NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
 412    copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */
 413
 414 static void
 415 propagate_pseudo_copies (void)
 416 {
 417   bitmap queue, propagate;
 418
 419   queue = BITMAP_ALLOC (NULL);
 420   propagate = BITMAP_ALLOC (NULL);
 421
 422   bitmap_copy (queue, decomposable_context);
 423   do
 424     {
 425       bitmap_iterator iter;
 426       unsigned int i;
 427
 428       bitmap_clear (propagate);
 429
 430       EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
 431         {
 432           bitmap b = reg_copy_graph[i];
 433           if (b)
 434             bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
 435         }
 436
 437       bitmap_and_compl (queue, propagate, decomposable_context);
 438       bitmap_ior_into (decomposable_context, propagate);
 439     }
 440   while (!bitmap_empty_p (queue));
 441
 442   BITMAP_FREE (queue);
 443   BITMAP_FREE (propagate);
 444 }
 445
 446 /* A pointer to one of these values is passed to
 447    find_decomposable_subregs.  */
 448
 449 enum classify_move_insn
 450 {
 451   /* Not a simple move from one location to another.  */
 452   NOT_SIMPLE_MOVE,
 453   /* A simple move we want to decompose.  */
 454   DECOMPOSABLE_SIMPLE_MOVE,
 455   /* Any other simple move.  */
 456   SIMPLE_MOVE
 457 };
 458
 459 /* If we find a SUBREG in *LOC which we could use to decompose a
 460    pseudo-register, set a bit in DECOMPOSABLE_CONTEXT.  If we find an
 461    unadorned register which is not a simple pseudo-register copy,
 462    DATA will point at the type of move, and we set a bit in
 463    DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate.  */
 464
 465 static void
 466 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
 467 {
 468   subrtx_var_iterator::array_type array;
 469   FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
 470     {
 471       rtx x = *iter;
 472       if (GET_CODE (x) == SUBREG)
 473         {
 474           rtx inner = SUBREG_REG (x);
 475           unsigned int regno, outer_size, inner_size, outer_words, inner_words;
 476
 477           if (!REG_P (inner))
 478             continue;
 479
 480           regno = REGNO (inner);
 481           if (HARD_REGISTER_NUM_P (regno))
 482             {
 483               iter.skip_subrtxes ();
 484               continue;
 485             }
 486
 487           outer_size = GET_MODE_SIZE (GET_MODE (x));
 488           inner_size = GET_MODE_SIZE (GET_MODE (inner));
 489           outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 490           inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 491
 492           /* We only try to decompose single word subregs of multi-word
 493              registers.  When we find one, we return -1 to avoid iterating
 494              over the inner register.
 495
 496              ??? This doesn't allow, e.g., DImode subregs of TImode values
 497              on 32-bit targets.  We would need to record the way the
 498              pseudo-register was used, and only decompose if all the uses
 499              were the same number and size of pieces.  Hopefully this
 500              doesn't happen much.  */
 501
 502           if (outer_words == 1 && inner_words > 1)
 503             {
 504               bitmap_set_bit (decomposable_context, regno);
 505               iter.skip_subrtxes ();
 506               continue;
 507             }
 508
 509           /* If this is a cast from one mode to another, where the modes
 510              have the same size, and they are not tieable, then mark this
 511              register as non-decomposable.  If we decompose it we are
 512              likely to mess up whatever the backend is trying to do.  */
 513           if (outer_words > 1
 514               && outer_size == inner_size
 515               && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
 516             {
 517               bitmap_set_bit (non_decomposable_context, regno);
 518               bitmap_set_bit (subreg_context, regno);
 519               iter.skip_subrtxes ();
 520               continue;
 521             }
 522         }
 523       else if (REG_P (x))
 524         {
 525           unsigned int regno;
 526
 527           /* We will see an outer SUBREG before we see the inner REG, so
 528              when we see a plain REG here it means a direct reference to
 529              the register.
 530
 531              If this is not a simple copy from one location to another,
 532              then we can not decompose this register.  If this is a simple
 533              copy we want to decompose, and the mode is right,
 534              then we mark the register as decomposable.
 535              Otherwise we don't say anything about this register --
 536              it could be decomposed, but whether that would be
 537              profitable depends upon how it is used elsewhere.
 538
 539              We only set bits in the bitmap for multi-word
 540              pseudo-registers, since those are the only ones we care about
 541              and it keeps the size of the bitmaps down.  */
 542
 543           regno = REGNO (x);
 544           if (!HARD_REGISTER_NUM_P (regno)
 545               && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
 546             {
 547               switch (*pcmi)
 548                 {
 549                 case NOT_SIMPLE_MOVE:
 550                   bitmap_set_bit (non_decomposable_context, regno);
 551                   break;
 552                 case DECOMPOSABLE_SIMPLE_MOVE:
 553                   if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
 554                     bitmap_set_bit (decomposable_context, regno);
 555                   break;
 556                 case SIMPLE_MOVE:
 557                   break;
 558                 default:
 559                   gcc_unreachable ();
 560                 }
 561             }
 562         }
 563       else if (MEM_P (x))
 564         {
 565           enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
 566
 567           /* Any registers used in a MEM do not participate in a
 568              SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
 569              here, and return -1 to block the parent's recursion.  */
 570           find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
 571           iter.skip_subrtxes ();
 572         }
 573     }
 574 }
 575
 576 /* Decompose REGNO into word-sized components.  We smash the REG node
 577    in place.  This ensures that (1) something goes wrong quickly if we
 578    fail to make some replacement, and (2) the debug information inside
 579    the symbol table is automatically kept up to date.  */
 580
 581 static void
 582 decompose_register (unsigned int regno)
 583 {
 584   rtx reg;
 585   unsigned int words, i;
 586   rtvec v;
 587
 588   reg = regno_reg_rtx[regno];
 589
 590   regno_reg_rtx[regno] = NULL_RTX;
 591
 592   words = GET_MODE_SIZE (GET_MODE (reg));
 593   words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 594
 595   v = rtvec_alloc (words);
 596   for (i = 0; i < words; ++i)
 597     RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
 598
 599   PUT_CODE (reg, CONCATN);
 600   XVEC (reg, 0) = v;
 601
 602   if (dump_file)
 603     {
 604       fprintf (dump_file, "; Splitting reg %u ->", regno);
 605       for (i = 0; i < words; ++i)
 606         fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
 607       fputc ('\n', dump_file);
 608     }
 609 }
 610
 611 /* Get a SUBREG of a CONCATN.  */
 612
 613 static rtx
 614 simplify_subreg_concatn (machine_mode outermode, rtx op,
 615                          unsigned int byte)
 616 {
 617   unsigned int inner_size;
 618   machine_mode innermode, partmode;
 619   rtx part;
 620   unsigned int final_offset;
 621
 622   gcc_assert (GET_CODE (op) == CONCATN);
 623   gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
 624
 625   innermode = GET_MODE (op);
 626   gcc_assert (byte < GET_MODE_SIZE (innermode));
 627   gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
 628
 629   inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
 630   part = XVECEXP (op, 0, byte / inner_size);
 631   partmode = GET_MODE (part);
 632
 633   /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
 634      regular CONST_VECTORs.  They have vector or integer modes, depending
 635      on the capabilities of the target.  Cope with them.  */
 636   if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
 637     partmode = GET_MODE_INNER (innermode);
 638   else if (partmode == VOIDmode)
 639     {
 640       enum mode_class mclass = GET_MODE_CLASS (innermode);
 641       partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
 642     }
 643
 644   final_offset = byte % inner_size;
 645   if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
 646     return NULL_RTX;
 647
 648   return simplify_gen_subreg (outermode, part, partmode, final_offset);
 649 }
 650
 651 /* Wrapper around simplify_gen_subreg which handles CONCATN.  */
 652
 653 static rtx
 654 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
 655                              machine_mode innermode, unsigned int byte)
 656 {
 657   rtx ret;
 658
 659   /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
 660      If OP is a SUBREG of a CONCATN, then it must be a simple mode
 661      change with the same size and offset 0, or it must extract a
 662      part.  We shouldn't see anything else here.  */
 663   if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
 664     {
 665       rtx op2;
 666
 667       if ((GET_MODE_SIZE (GET_MODE (op))
 668            == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
 669           && SUBREG_BYTE (op) == 0)
 670         return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
 671                                             GET_MODE (SUBREG_REG (op)), byte);
 672
 673       op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
 674                                      SUBREG_BYTE (op));
 675       if (op2 == NULL_RTX)
 676         {
 677           /* We don't handle paradoxical subregs here.  */
 678           gcc_assert (GET_MODE_SIZE (outermode)
 679                       <= GET_MODE_SIZE (GET_MODE (op)));
 680           gcc_assert (GET_MODE_SIZE (GET_MODE (op))
 681                       <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
 682           op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
 683                                          byte + SUBREG_BYTE (op));
 684           gcc_assert (op2 != NULL_RTX);
 685           return op2;
 686         }
 687
 688       op = op2;
 689       gcc_assert (op != NULL_RTX);
 690       gcc_assert (innermode == GET_MODE (op));
 691     }
 692
 693   if (GET_CODE (op) == CONCATN)
 694     return simplify_subreg_concatn (outermode, op, byte);
 695
 696   ret = simplify_gen_subreg (outermode, op, innermode, byte);
 697
 698   /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
 699      resolve_simple_move will ask for the high part of the paradoxical
 700      subreg, which does not have a value.  Just return a zero.  */
 701   if (ret == NULL_RTX
 702       && GET_CODE (op) == SUBREG
 703       && SUBREG_BYTE (op) == 0
 704       && (GET_MODE_SIZE (innermode)
 705           > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
 706     return CONST0_RTX (outermode);
 707
 708   gcc_assert (ret != NULL_RTX);
 709   return ret;
 710 }
 711
 712 /* Return whether we should resolve X into the registers into which it
 713    was decomposed.  */
 714
 715 static bool
 716 resolve_reg_p (rtx x)
 717 {
 718   return GET_CODE (x) == CONCATN;
 719 }
 720
 721 /* Return whether X is a SUBREG of a register which we need to
 722    resolve.  */
 723
 724 static bool
 725 resolve_subreg_p (rtx x)
 726 {
 727   if (GET_CODE (x) != SUBREG)
 728     return false;
 729   return resolve_reg_p (SUBREG_REG (x));
 730 }
 731
 732 /* Look for SUBREGs in *LOC which need to be decomposed.  */
 733
 734 static bool
 735 resolve_subreg_use (rtx *loc, rtx insn)
 736 {
 737   subrtx_ptr_iterator::array_type array;
 738   FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
 739     {
 740       rtx *loc = *iter;
 741       rtx x = *loc;
 742       if (resolve_subreg_p (x))
 743         {
 744           x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
 745                                        SUBREG_BYTE (x));
 746
 747           /* It is possible for a note to contain a reference which we can
 748              decompose.  In this case, return 1 to the caller to indicate
 749              that the note must be removed.  */
 750           if (!x)
 751             {
 752               gcc_assert (!insn);
 753               return true;
 754             }
 755
 756           validate_change (insn, loc, x, 1);
 757           iter.skip_subrtxes ();
 758         }
 759       else if (resolve_reg_p (x))
 760         /* Return 1 to the caller to indicate that we found a direct
 761            reference to a register which is being decomposed.  This can
 762            happen inside notes, multiword shift or zero-extend
 763            instructions.  */
 764         return true;
 765     }
 766
 767   return false;
 768 }
 769
 770 /* Resolve any decomposed registers which appear in register notes on
 771    INSN.  */
 772
 773 static void
 774 resolve_reg_notes (rtx_insn *insn)
 775 {
 776   rtx *pnote, note;
 777
 778   note = find_reg_equal_equiv_note (insn);
 779   if (note)
 780     {
 781       int old_count = num_validated_changes ();
 782       if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
 783         remove_note (insn, note);
 784       else
 785         if (old_count != num_validated_changes ())
 786           df_notes_rescan (insn);
 787     }
 788
 789   pnote = &REG_NOTES (insn);
 790   while (*pnote != NULL_RTX)
 791     {
 792       bool del = false;
 793
 794       note = *pnote;
 795       switch (REG_NOTE_KIND (note))
 796         {
 797         case REG_DEAD:
 798         case REG_UNUSED:
 799           if (resolve_reg_p (XEXP (note, 0)))
 800             del = true;
 801           break;
 802
 803         default:
 804           break;
 805         }
 806
 807       if (del)
 808         *pnote = XEXP (note, 1);
 809       else
 810         pnote = &XEXP (note, 1);
 811     }
 812 }
 813
 814 /* Return whether X can be decomposed into subwords.  */
 815
 816 static bool
 817 can_decompose_p (rtx x)
 818 {
 819   if (REG_P (x))
 820     {
 821       unsigned int regno = REGNO (x);
 822
 823       if (HARD_REGISTER_NUM_P (regno))
 824         {
 825           unsigned int byte, num_bytes;
 826
 827           num_bytes = GET_MODE_SIZE (GET_MODE (x));
 828           for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
 829             if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
 830               return false;
 831           return true;
 832         }
 833       else
 834         return !bitmap_bit_p (subreg_context, regno);
 835     }
 836
 837   return true;
 838 }
 839
 840 /* Decompose the registers used in a simple move SET within INSN.  If
 841    we don't change anything, return INSN, otherwise return the start
 842    of the sequence of moves.  */
 843
 844 static rtx_insn *
 845 resolve_simple_move (rtx set, rtx_insn *insn)
 846 {
 847   rtx src, dest, real_dest;
 848   rtx_insn *insns;
 849   machine_mode orig_mode, dest_mode;
 850   unsigned int words;
 851   bool pushing;
 852
 853   src = SET_SRC (set);
 854   dest = SET_DEST (set);
 855   orig_mode = GET_MODE (dest);
 856
 857   words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 858   gcc_assert (words > 1);
 859
 860   start_sequence ();
 861
 862   /* We have to handle copying from a SUBREG of a decomposed reg where
 863      the SUBREG is larger than word size.  Rather than assume that we
 864      can take a word_mode SUBREG of the destination, we copy to a new
 865      register and then copy that to the destination.  */
 866
 867   real_dest = NULL_RTX;
 868
 869   if (GET_CODE (src) == SUBREG
 870       && resolve_reg_p (SUBREG_REG (src))
 871       && (SUBREG_BYTE (src) != 0
 872           || (GET_MODE_SIZE (orig_mode)
 873               != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
 874     {
 875       real_dest = dest;
 876       dest = gen_reg_rtx (orig_mode);
 877       if (REG_P (real_dest))
 878         REG_ATTRS (dest) = REG_ATTRS (real_dest);
 879     }
 880
 881   /* Similarly if we are copying to a SUBREG of a decomposed reg where
 882      the SUBREG is larger than word size.  */
 883
 884   if (GET_CODE (dest) == SUBREG
 885       && resolve_reg_p (SUBREG_REG (dest))
 886       && (SUBREG_BYTE (dest) != 0
 887           || (GET_MODE_SIZE (orig_mode)
 888               != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
 889     {
 890       rtx reg, smove;
 891       rtx_insn *minsn;
 892
 893       reg = gen_reg_rtx (orig_mode);
 894       minsn = emit_move_insn (reg, src);
 895       smove = single_set (minsn);
 896       gcc_assert (smove != NULL_RTX);
 897       resolve_simple_move (smove, minsn);
 898       src = reg;
 899     }
 900
 901   /* If we didn't have any big SUBREGS of decomposed registers, and
 902      neither side of the move is a register we are decomposing, then
 903      we don't have to do anything here.  */
 904
 905   if (src == SET_SRC (set)
 906       && dest == SET_DEST (set)
 907       && !resolve_reg_p (src)
 908       && !resolve_subreg_p (src)
 909       && !resolve_reg_p (dest)
 910       && !resolve_subreg_p (dest))
 911     {
 912       end_sequence ();
 913       return insn;
 914     }
 915
 916   /* It's possible for the code to use a subreg of a decomposed
 917      register while forming an address.  We need to handle that before
 918      passing the address to emit_move_insn.  We pass NULL_RTX as the
 919      insn parameter to resolve_subreg_use because we can not validate
 920      the insn yet.  */
 921   if (MEM_P (src) || MEM_P (dest))
 922     {
 923       int acg;
 924
 925       if (MEM_P (src))
 926         resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
 927       if (MEM_P (dest))
 928         resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
 929       acg = apply_change_group ();
 930       gcc_assert (acg);
 931     }
 932
 933   /* If SRC is a register which we can't decompose, or has side
 934      effects, we need to move via a temporary register.  */
 935
 936   if (!can_decompose_p (src)
 937       || side_effects_p (src)
 938       || GET_CODE (src) == ASM_OPERANDS)
 939     {
 940       rtx reg;
 941
 942       reg = gen_reg_rtx (orig_mode);
 943
 944       if (AUTO_INC_DEC)
 945         {
 946           rtx move = emit_move_insn (reg, src);
 947           if (MEM_P (src))
 948             {
 949               rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
 950               if (note)
 951                 add_reg_note (move, REG_INC, XEXP (note, 0));
 952             }
 953         }
 954       else
 955         emit_move_insn (reg, src);
 956
 957       src = reg;
 958     }
 959
 960   /* If DEST is a register which we can't decompose, or has side
 961      effects, we need to first move to a temporary register.  We
 962      handle the common case of pushing an operand directly.  We also
 963      go through a temporary register if it holds a floating point
 964      value.  This gives us better code on systems which can't move
 965      data easily between integer and floating point registers.  */
 966
 967   dest_mode = orig_mode;
 968   pushing = push_operand (dest, dest_mode);
 969   if (!can_decompose_p (dest)
 970       || (side_effects_p (dest) && !pushing)
 971       || (!SCALAR_INT_MODE_P (dest_mode)
 972           && !resolve_reg_p (dest)
 973           && !resolve_subreg_p (dest)))
 974     {
 975       if (real_dest == NULL_RTX)
 976         real_dest = dest;
 977       if (!SCALAR_INT_MODE_P (dest_mode))
 978         {
 979           dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
 980                                      MODE_INT, 0);
 981           gcc_assert (dest_mode != BLKmode);
 982         }
 983       dest = gen_reg_rtx (dest_mode);
 984       if (REG_P (real_dest))
 985         REG_ATTRS (dest) = REG_ATTRS (real_dest);
 986     }
 987
 988   if (pushing)
 989     {
 990       unsigned int i, j, jinc;
 991
 992       gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
 993       gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
 994       gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
 995
 996       if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
 997         {
 998           j = 0;
 999           jinc = 1;
1000         }
1001       else
1002         {
1003           j = words - 1;
1004           jinc = -1;
1005         }
1006
1007       for (i = 0; i < words; ++i, j += jinc)
1008         {
1009           rtx temp;
1010
1011           temp = copy_rtx (XEXP (dest, 0));
1012           temp = adjust_automodify_address_nv (dest, word_mode, temp,
1013                                                j * UNITS_PER_WORD);
1014           emit_move_insn (temp,
1015                           simplify_gen_subreg_concatn (word_mode, src,
1016                                                        orig_mode,
1017                                                        j * UNITS_PER_WORD));
1018         }
1019     }
1020   else
1021     {
1022       unsigned int i;
1023
1024       if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1025         emit_clobber (dest);
1026
1027       for (i = 0; i < words; ++i)
1028         emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1029                                                      dest_mode,
1030                                                      i * UNITS_PER_WORD),
1031                         simplify_gen_subreg_concatn (word_mode, src,
1032                                                      orig_mode,
1033                                                      i * UNITS_PER_WORD));
1034     }
1035
1036   if (real_dest != NULL_RTX)
1037     {
1038       rtx mdest, smove;
1039       rtx_insn *minsn;
1040
1041       if (dest_mode == orig_mode)
1042         mdest = dest;
1043       else
1044         mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1045       minsn = emit_move_insn (real_dest, mdest);
1046
1047   if (AUTO_INC_DEC && MEM_P (real_dest)
1048       && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1049     {
1050       rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1051       if (note)
1052         add_reg_note (minsn, REG_INC, XEXP (note, 0));
1053     }
1054
1055       smove = single_set (minsn);
1056       gcc_assert (smove != NULL_RTX);
1057
1058       resolve_simple_move (smove, minsn);
1059     }
1060
1061   insns = get_insns ();
1062   end_sequence ();
1063
1064   copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1065
1066   emit_insn_before (insns, insn);
1067
1068   /* If we get here via self-recursion, then INSN is not yet in the insns
1069      chain and delete_insn will fail.  We only want to remove INSN from the
1070      current sequence.  See PR56738.  */
1071   if (in_sequence_p ())
1072     remove_insn (insn);
1073   else
1074     delete_insn (insn);
1075
1076   return insns;
1077 }
1078
1079 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1080    component registers.  Return whether we changed something.  */
1081
1082 static bool
1083 resolve_clobber (rtx pat, rtx_insn *insn)
1084 {
1085   rtx reg;
1086   machine_mode orig_mode;
1087   unsigned int words, i;
1088   int ret;
1089
1090   reg = XEXP (pat, 0);
1091   if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1092     return false;
1093
1094   orig_mode = GET_MODE (reg);
1095   words = GET_MODE_SIZE (orig_mode);
1096   words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1097
1098   ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1099                          simplify_gen_subreg_concatn (word_mode, reg,
1100                                                       orig_mode, 0),
1101                          0);
1102   df_insn_rescan (insn);
1103   gcc_assert (ret != 0);
1104
1105   for (i = words - 1; i > 0; --i)
1106     {
1107       rtx x;
1108
1109       x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1110                                        i * UNITS_PER_WORD);
1111       x = gen_rtx_CLOBBER (VOIDmode, x);
1112       emit_insn_after (x, insn);
1113     }
1114
1115   resolve_reg_notes (insn);
1116
1117   return true;
1118 }
1119
1120 /* A USE of a decomposed register is no longer meaningful.  Return
1121    whether we changed something.  */
1122
1123 static bool
1124 resolve_use (rtx pat, rtx_insn *insn)
1125 {
1126   if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1127     {
1128       delete_insn (insn);
1129       return true;
1130     }
1131
1132   resolve_reg_notes (insn);
1133
1134   return false;
1135 }
1136
1137 /* A VAR_LOCATION can be simplified.  */
1138
1139 static void
1140 resolve_debug (rtx_insn *insn)
1141 {
1142   subrtx_ptr_iterator::array_type array;
1143   FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1144     {
1145       rtx *loc = *iter;
1146       rtx x = *loc;
1147       if (resolve_subreg_p (x))
1148         {
1149           x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1150                                        SUBREG_BYTE (x));
1151
1152           if (x)
1153             *loc = x;
1154           else
1155             x = copy_rtx (*loc);
1156         }
1157       if (resolve_reg_p (x))
1158         *loc = copy_rtx (x);
1159     }
1160
1161   df_insn_rescan (insn);
1162
1163   resolve_reg_notes (insn);
1164 }
1165
1166 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1167    set the decomposable_context bitmap accordingly.  SPEED_P is true
1168    if we are optimizing INSN for speed rather than size.  Return true
1169    if INSN is decomposable.  */
1170
1171 static bool
1172 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1173 {
1174   rtx set;
1175   rtx op;
1176   rtx op_operand;
1177
1178   set = single_set (insn);
1179   if (!set)
1180     return false;
1181
1182   op = SET_SRC (set);
1183   if (GET_CODE (op) != ASHIFT
1184       && GET_CODE (op) != LSHIFTRT
1185       && GET_CODE (op) != ASHIFTRT
1186       && GET_CODE (op) != ZERO_EXTEND)
1187     return false;
1188
1189   op_operand = XEXP (op, 0);
1190   if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1191       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1192       || HARD_REGISTER_NUM_P (REGNO (op_operand))
1193       || GET_MODE (op) != twice_word_mode)
1194     return false;
1195
1196   if (GET_CODE (op) == ZERO_EXTEND)
1197     {
1198       if (GET_MODE (op_operand) != word_mode
1199           || !choices[speed_p].splitting_zext)
1200         return false;
1201     }
1202   else /* left or right shift */
1203     {
1204       bool *splitting = (GET_CODE (op) == ASHIFT
1205                          ? choices[speed_p].splitting_ashift
1206                          : GET_CODE (op) == ASHIFTRT
1207                          ? choices[speed_p].splitting_ashiftrt
1208                          : choices[speed_p].splitting_lshiftrt);
1209       if (!CONST_INT_P (XEXP (op, 1))
1210           || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1211                         2 * BITS_PER_WORD - 1)
1212           || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1213         return false;
1214
1215       bitmap_set_bit (decomposable_context, REGNO (op_operand));
1216     }
1217
1218   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1219
1220   return true;
1221 }
1222
1223 /* Decompose a more than word wide shift (in INSN) of a multiword
1224    pseudo or a multiword zero-extend of a wordmode pseudo into a move
1225    and 'set to zero' insn.  Return a pointer to the new insn when a
1226    replacement was done.  */
1227
1228 static rtx_insn *
1229 resolve_shift_zext (rtx_insn *insn)
1230 {
1231   rtx set;
1232   rtx op;
1233   rtx op_operand;
1234   rtx_insn *insns;
1235   rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1236   int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1237
1238   set = single_set (insn);
1239   if (!set)
1240     return NULL;
1241
1242   op = SET_SRC (set);
1243   if (GET_CODE (op) != ASHIFT
1244       && GET_CODE (op) != LSHIFTRT
1245       && GET_CODE (op) != ASHIFTRT
1246       && GET_CODE (op) != ZERO_EXTEND)
1247     return NULL;
1248
1249   op_operand = XEXP (op, 0);
1250
1251   /* We can tear this operation apart only if the regs were already
1252      torn apart.  */
1253   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1254     return NULL;
1255
1256   /* src_reg_num is the number of the word mode register which we
1257      are operating on.  For a left shift and a zero_extend on little
1258      endian machines this is register 0.  */
1259   src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1260                 ? 1 : 0;
1261
1262   if (WORDS_BIG_ENDIAN
1263       && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1264     src_reg_num = 1 - src_reg_num;
1265
1266   if (GET_CODE (op) == ZERO_EXTEND)
1267     dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1268   else
1269     dest_reg_num = 1 - src_reg_num;
1270
1271   offset1 = UNITS_PER_WORD * dest_reg_num;
1272   offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1273   src_offset = UNITS_PER_WORD * src_reg_num;
1274
1275   start_sequence ();
1276
1277   dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1278                                           GET_MODE (SET_DEST (set)),
1279                                           offset1);
1280   dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1281                                             GET_MODE (SET_DEST (set)),
1282                                             offset2);
1283   src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1284                                          GET_MODE (op_operand),
1285                                          src_offset);
1286   if (GET_CODE (op) == ASHIFTRT
1287       && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1288     upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1289                               BITS_PER_WORD - 1, NULL_RTX, 0);
1290
1291   if (GET_CODE (op) != ZERO_EXTEND)
1292     {
1293       int shift_count = INTVAL (XEXP (op, 1));
1294       if (shift_count > BITS_PER_WORD)
1295         src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1296                                 LSHIFT_EXPR : RSHIFT_EXPR,
1297                                 word_mode, src_reg,
1298                                 shift_count - BITS_PER_WORD,
1299                                 dest_reg, GET_CODE (op) != ASHIFTRT);
1300     }
1301
1302   if (dest_reg != src_reg)
1303     emit_move_insn (dest_reg, src_reg);
1304   if (GET_CODE (op) != ASHIFTRT)
1305     emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1306   else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1307     emit_move_insn (dest_upper, copy_rtx (src_reg));
1308   else
1309     emit_move_insn (dest_upper, upper_src);
1310   insns = get_insns ();
1311
1312   end_sequence ();
1313
1314   emit_insn_before (insns, insn);
1315
1316   if (dump_file)
1317     {
1318       rtx_insn *in;
1319       fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1320       for (in = insns; in != insn; in = NEXT_INSN (in))
1321         fprintf (dump_file, "%d ", INSN_UID (in));
1322       fprintf (dump_file, "\n");
1323     }
1324
1325   delete_insn (insn);
1326   return insns;
1327 }
1328
1329 /* Print to dump_file a description of what we're doing with shift code CODE.
1330    SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD.  */
1331
1332 static void
1333 dump_shift_choices (enum rtx_code code, bool *splitting)
1334 {
1335   int i;
1336   const char *sep;
1337
1338   fprintf (dump_file,
1339            "  Splitting mode %s for %s lowering with shift amounts = ",
1340            GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1341   sep = "";
1342   for (i = 0; i < BITS_PER_WORD; i++)
1343     if (splitting[i])
1344       {
1345         fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1346         sep = ",";
1347       }
1348   fprintf (dump_file, "\n");
1349 }
1350
1351 /* Print to dump_file a description of what we're doing when optimizing
1352    for speed or size; SPEED_P says which.  DESCRIPTION is a description
1353    of the SPEED_P choice.  */
1354
1355 static void
1356 dump_choices (bool speed_p, const char *description)
1357 {
1358   unsigned int i;
1359
1360   fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1361
1362   for (i = 0; i < MAX_MACHINE_MODE; i++)
1363     if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1364       fprintf (dump_file, "  %s mode %s for copy lowering.\n",
1365                choices[speed_p].move_modes_to_split[i]
1366                ? "Splitting"
1367                : "Skipping",
1368                GET_MODE_NAME ((machine_mode) i));
1369
1370   fprintf (dump_file, "  %s mode %s for zero_extend lowering.\n",
1371            choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1372            GET_MODE_NAME (twice_word_mode));
1373
1374   dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1375   dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1376   dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1377   fprintf (dump_file, "\n");
1378 }
1379
1380 /* Look for registers which are always accessed via word-sized SUBREGs
1381    or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
1382    registers into several word-sized pseudo-registers.  */
1383
1384 static void
1385 decompose_multiword_subregs (bool decompose_copies)
1386 {
1387   unsigned int max;
1388   basic_block bb;
1389   bool speed_p;
1390
1391   if (dump_file)
1392     {
1393       dump_choices (false, "size");
1394       dump_choices (true, "speed");
1395     }
1396
1397   /* Check if this target even has any modes to consider lowering.   */
1398   if (!choices[false].something_to_do && !choices[true].something_to_do)
1399     {
1400       if (dump_file)
1401         fprintf (dump_file, "Nothing to do!\n");
1402       return;
1403     }
1404
1405   max = max_reg_num ();
1406
1407   /* First see if there are any multi-word pseudo-registers.  If there
1408      aren't, there is nothing we can do.  This should speed up this
1409      pass in the normal case, since it should be faster than scanning
1410      all the insns.  */
1411   {
1412     unsigned int i;
1413     bool useful_modes_seen = false;
1414
1415     for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1416       if (regno_reg_rtx[i] != NULL)
1417         {
1418           machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1419           if (choices[false].move_modes_to_split[(int) mode]
1420               || choices[true].move_modes_to_split[(int) mode])
1421             {
1422               useful_modes_seen = true;
1423               break;
1424             }
1425         }
1426
1427     if (!useful_modes_seen)
1428       {
1429         if (dump_file)
1430           fprintf (dump_file, "Nothing to lower in this function.\n");
1431         return;
1432       }
1433   }
1434
1435   if (df)
1436     {
1437       df_set_flags (DF_DEFER_INSN_RESCAN);
1438       run_word_dce ();
1439     }
1440
1441   /* FIXME: It may be possible to change this code to look for each
1442      multi-word pseudo-register and to find each insn which sets or
1443      uses that register.  That should be faster than scanning all the
1444      insns.  */
1445
1446   decomposable_context = BITMAP_ALLOC (NULL);
1447   non_decomposable_context = BITMAP_ALLOC (NULL);
1448   subreg_context = BITMAP_ALLOC (NULL);
1449
1450   reg_copy_graph.create (max);
1451   reg_copy_graph.safe_grow_cleared (max);
1452   memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1453
1454   speed_p = optimize_function_for_speed_p (cfun);
1455   FOR_EACH_BB_FN (bb, cfun)
1456     {
1457       rtx_insn *insn;
1458
1459       FOR_BB_INSNS (bb, insn)
1460         {
1461           rtx set;
1462           enum classify_move_insn cmi;
1463           int i, n;
1464
1465           if (!INSN_P (insn)
1466               || GET_CODE (PATTERN (insn)) == CLOBBER
1467               || GET_CODE (PATTERN (insn)) == USE)
1468             continue;
1469
1470           recog_memoized (insn);
1471
1472           if (find_decomposable_shift_zext (insn, speed_p))
1473             continue;
1474
1475           extract_insn (insn);
1476
1477           set = simple_move (insn, speed_p);
1478
1479           if (!set)
1480             cmi = NOT_SIMPLE_MOVE;
1481           else
1482             {
1483               /* We mark pseudo-to-pseudo copies as decomposable during the
1484                  second pass only.  The first pass is so early that there is
1485                  good chance such moves will be optimized away completely by
1486                  subsequent optimizations anyway.
1487
1488                  However, we call find_pseudo_copy even during the first pass
1489                  so as to properly set up the reg_copy_graph.  */
1490               if (find_pseudo_copy (set))
1491                 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1492               else
1493                 cmi = SIMPLE_MOVE;
1494             }
1495
1496           n = recog_data.n_operands;
1497           for (i = 0; i < n; ++i)
1498             {
1499               find_decomposable_subregs (&recog_data.operand[i], &cmi);
1500
1501               /* We handle ASM_OPERANDS as a special case to support
1502                  things like x86 rdtsc which returns a DImode value.
1503                  We can decompose the output, which will certainly be
1504                  operand 0, but not the inputs.  */
1505
1506               if (cmi == SIMPLE_MOVE
1507                   && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1508                 {
1509                   gcc_assert (i == 0);
1510                   cmi = NOT_SIMPLE_MOVE;
1511                 }
1512             }
1513         }
1514     }
1515
1516   bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1517   if (!bitmap_empty_p (decomposable_context))
1518     {
1519       sbitmap sub_blocks;
1520       unsigned int i;
1521       sbitmap_iterator sbi;
1522       bitmap_iterator iter;
1523       unsigned int regno;
1524
1525       propagate_pseudo_copies ();
1526
1527       sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1528       bitmap_clear (sub_blocks);
1529
1530       EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1531         decompose_register (regno);
1532
1533       FOR_EACH_BB_FN (bb, cfun)
1534         {
1535           rtx_insn *insn;
1536
1537           FOR_BB_INSNS (bb, insn)
1538             {
1539               rtx pat;
1540
1541               if (!INSN_P (insn))
1542                 continue;
1543
1544               pat = PATTERN (insn);
1545               if (GET_CODE (pat) == CLOBBER)
1546                 resolve_clobber (pat, insn);
1547               else if (GET_CODE (pat) == USE)
1548                 resolve_use (pat, insn);
1549               else if (DEBUG_INSN_P (insn))
1550                 resolve_debug (insn);
1551               else
1552                 {
1553                   rtx set;
1554                   int i;
1555
1556                   recog_memoized (insn);
1557                   extract_insn (insn);
1558
1559                   set = simple_move (insn, speed_p);
1560                   if (set)
1561                     {
1562                       rtx_insn *orig_insn = insn;
1563                       bool cfi = control_flow_insn_p (insn);
1564
1565                       /* We can end up splitting loads to multi-word pseudos
1566                          into separate loads to machine word size pseudos.
1567                          When this happens, we first had one load that can
1568                          throw, and after resolve_simple_move we'll have a
1569                          bunch of loads (at least two).  All those loads may
1570                          trap if we can have non-call exceptions, so they
1571                          all will end the current basic block.  We split the
1572                          block after the outer loop over all insns, but we
1573                          make sure here that we will be able to split the
1574                          basic block and still produce the correct control
1575                          flow graph for it.  */
1576                       gcc_assert (!cfi
1577                                   || (cfun->can_throw_non_call_exceptions
1578                                       && can_throw_internal (insn)));
1579
1580                       insn = resolve_simple_move (set, insn);
1581                       if (insn != orig_insn)
1582                         {
1583                           recog_memoized (insn);
1584                           extract_insn (insn);
1585
1586                           if (cfi)
1587                             bitmap_set_bit (sub_blocks, bb->index);
1588                         }
1589                     }
1590                   else
1591                     {
1592                       rtx_insn *decomposed_shift;
1593
1594                       decomposed_shift = resolve_shift_zext (insn);
1595                       if (decomposed_shift != NULL_RTX)
1596                         {
1597                           insn = decomposed_shift;
1598                           recog_memoized (insn);
1599                           extract_insn (insn);
1600                         }
1601                     }
1602
1603                   for (i = recog_data.n_operands - 1; i >= 0; --i)
1604                     resolve_subreg_use (recog_data.operand_loc[i], insn);
1605
1606                   resolve_reg_notes (insn);
1607
1608                   if (num_validated_changes () > 0)
1609                     {
1610                       for (i = recog_data.n_dups - 1; i >= 0; --i)
1611                         {
1612                           rtx *pl = recog_data.dup_loc[i];
1613                           int dup_num = recog_data.dup_num[i];
1614                           rtx *px = recog_data.operand_loc[dup_num];
1615
1616                           validate_unshare_change (insn, pl, *px, 1);
1617                         }
1618
1619                       i = apply_change_group ();
1620                       gcc_assert (i);
1621                     }
1622                 }
1623             }
1624         }
1625
1626       /* If we had insns to split that caused control flow insns in the middle
1627          of a basic block, split those blocks now.  Note that we only handle
1628          the case where splitting a load has caused multiple possibly trapping
1629          loads to appear.  */
1630       EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1631         {
1632           rtx_insn *insn, *end;
1633           edge fallthru;
1634
1635           bb = BASIC_BLOCK_FOR_FN (cfun, i);
1636           insn = BB_HEAD (bb);
1637           end = BB_END (bb);
1638
1639           while (insn != end)
1640             {
1641               if (control_flow_insn_p (insn))
1642                 {
1643                   /* Split the block after insn.  There will be a fallthru
1644                      edge, which is OK so we keep it.  We have to create the
1645                      exception edges ourselves.  */
1646                   fallthru = split_block (bb, insn);
1647                   rtl_make_eh_edge (NULL, bb, BB_END (bb));
1648                   bb = fallthru->dest;
1649                   insn = BB_HEAD (bb);
1650                 }
1651               else
1652                 insn = NEXT_INSN (insn);
1653             }
1654         }
1655
1656       sbitmap_free (sub_blocks);
1657     }
1658
1659   {
1660     unsigned int i;
1661     bitmap b;
1662
1663     FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1664       if (b)
1665         BITMAP_FREE (b);
1666   }
1667
1668   reg_copy_graph.release ();
1669
1670   BITMAP_FREE (decomposable_context);
1671   BITMAP_FREE (non_decomposable_context);
1672   BITMAP_FREE (subreg_context);
1673 }
1674 \f
1675 /* Implement first lower subreg pass.  */
1676
1677 namespace {
1678
1679 const pass_data pass_data_lower_subreg =
1680 {
1681   RTL_PASS, /* type */
1682   "subreg1", /* name */
1683   OPTGROUP_NONE, /* optinfo_flags */
1684   TV_LOWER_SUBREG, /* tv_id */
1685   0, /* properties_required */
1686   0, /* properties_provided */
1687   0, /* properties_destroyed */
1688   0, /* todo_flags_start */
1689   0, /* todo_flags_finish */
1690 };
1691
1692 class pass_lower_subreg : public rtl_opt_pass
1693 {
1694 public:
1695   pass_lower_subreg (gcc::context *ctxt)
1696     : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1697   {}
1698
1699   /* opt_pass methods: */
1700   virtual bool gate (function *) { return flag_split_wide_types != 0; }
1701   virtual unsigned int execute (function *)
1702     {
1703       decompose_multiword_subregs (false);
1704       return 0;
1705     }
1706
1707 }; // class pass_lower_subreg
1708
1709 } // anon namespace
1710
1711 rtl_opt_pass *
1712 make_pass_lower_subreg (gcc::context *ctxt)
1713 {
1714   return new pass_lower_subreg (ctxt);
1715 }
1716
1717 /* Implement second lower subreg pass.  */
1718
1719 namespace {
1720
1721 const pass_data pass_data_lower_subreg2 =
1722 {
1723   RTL_PASS, /* type */
1724   "subreg2", /* name */
1725   OPTGROUP_NONE, /* optinfo_flags */
1726   TV_LOWER_SUBREG, /* tv_id */
1727   0, /* properties_required */
1728   0, /* properties_provided */
1729   0, /* properties_destroyed */
1730   0, /* todo_flags_start */
1731   TODO_df_finish, /* todo_flags_finish */
1732 };
1733
1734 class pass_lower_subreg2 : public rtl_opt_pass
1735 {
1736 public:
1737   pass_lower_subreg2 (gcc::context *ctxt)
1738     : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1739   {}
1740
1741   /* opt_pass methods: */
1742   virtual bool gate (function *) { return flag_split_wide_types != 0; }
1743   virtual unsigned int execute (function *)
1744     {
1745       decompose_multiword_subregs (true);
1746       return 0;
1747     }
1748
1749 }; // class pass_lower_subreg2
1750
1751 } // anon namespace
1752
1753 rtl_opt_pass *
1754 make_pass_lower_subreg2 (gcc::context *ctxt)
1755 {
1756   return new pass_lower_subreg2 (ctxt);
1757 }