]>
Commit | Line | Data |
---|---|---|
e53a16e7 | 1 | /* Decompose multiword subregs. |
5624e564 | 2 | Copyright (C) 2007-2015 Free Software Foundation, Inc. |
e53a16e7 ILT |
3 | Contributed by Richard Henderson <rth@redhat.com> |
4 | Ian Lance Taylor <iant@google.com> | |
5 | ||
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 10 | Software Foundation; either version 3, or (at your option) any later |
e53a16e7 ILT |
11 | version. |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ | |
e53a16e7 ILT |
21 | |
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
25 | #include "machmode.h" | |
26 | #include "tm.h" | |
40e23961 MC |
27 | #include "hash-set.h" |
28 | #include "machmode.h" | |
29 | #include "vec.h" | |
30 | #include "double-int.h" | |
31 | #include "input.h" | |
32 | #include "alias.h" | |
33 | #include "symtab.h" | |
34 | #include "wide-int.h" | |
35 | #include "inchash.h" | |
4d648807 | 36 | #include "tree.h" |
e53a16e7 ILT |
37 | #include "rtl.h" |
38 | #include "tm_p.h" | |
e53a16e7 ILT |
39 | #include "flags.h" |
40 | #include "insn-config.h" | |
41 | #include "obstack.h" | |
60393bbc | 42 | #include "predict.h" |
60393bbc AM |
43 | #include "hard-reg-set.h" |
44 | #include "input.h" | |
45 | #include "function.h" | |
46 | #include "dominance.h" | |
47 | #include "cfg.h" | |
48 | #include "cfgrtl.h" | |
49 | #include "cfgbuild.h" | |
e53a16e7 ILT |
50 | #include "basic-block.h" |
51 | #include "recog.h" | |
52 | #include "bitmap.h" | |
8d074192 | 53 | #include "dce.h" |
e53a16e7 | 54 | #include "expr.h" |
7984c787 | 55 | #include "except.h" |
e53a16e7 ILT |
56 | #include "regs.h" |
57 | #include "tree-pass.h" | |
6fb5fa3c | 58 | #include "df.h" |
af4ba423 | 59 | #include "lower-subreg.h" |
cf55cb6a | 60 | #include "rtl-iter.h" |
e53a16e7 ILT |
61 | |
62 | #ifdef STACK_GROWS_DOWNWARD | |
63 | # undef STACK_GROWS_DOWNWARD | |
64 | # define STACK_GROWS_DOWNWARD 1 | |
65 | #else | |
66 | # define STACK_GROWS_DOWNWARD 0 | |
67 | #endif | |
68 | ||
e53a16e7 ILT |
69 | |
70 | /* Decompose multi-word pseudo-registers into individual | |
af4ba423 KZ |
71 | pseudo-registers when possible and profitable. This is possible |
72 | when all the uses of a multi-word register are via SUBREG, or are | |
73 | copies of the register to another location. Breaking apart the | |
74 | register permits more CSE and permits better register allocation. | |
75 | This is profitable if the machine does not have move instructions | |
76 | to do this. | |
77 | ||
78 | This pass only splits moves with modes that are wider than | |
d7fde18c JJ |
79 | word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with |
80 | integer modes that are twice the width of word_mode. The latter | |
81 | could be generalized if there was a need to do this, but the trend in | |
af4ba423 KZ |
82 | architectures is to not need this. |
83 | ||
84 | There are two useful preprocessor defines for use by maintainers: | |
85 | ||
86 | #define LOG_COSTS 1 | |
87 | ||
88 | if you wish to see the actual cost estimates that are being used | |
89 | for each mode wider than word mode and the cost estimates for zero | |
90 | extension and the shifts. This can be useful when port maintainers | |
91 | are tuning insn rtx costs. | |
92 | ||
93 | #define FORCE_LOWERING 1 | |
94 | ||
95 | if you wish to test the pass with all the transformation forced on. | |
96 | This can be useful for finding bugs in the transformations. */ | |
97 | ||
98 | #define LOG_COSTS 0 | |
99 | #define FORCE_LOWERING 0 | |
e53a16e7 ILT |
100 | |
101 | /* Bit N in this bitmap is set if regno N is used in a context in | |
102 | which we can decompose it. */ | |
103 | static bitmap decomposable_context; | |
104 | ||
105 | /* Bit N in this bitmap is set if regno N is used in a context in | |
106 | which it can not be decomposed. */ | |
107 | static bitmap non_decomposable_context; | |
108 | ||
402464a0 BS |
109 | /* Bit N in this bitmap is set if regno N is used in a subreg |
110 | which changes the mode but not the size. This typically happens | |
111 | when the register accessed as a floating-point value; we want to | |
112 | avoid generating accesses to its subwords in integer modes. */ | |
113 | static bitmap subreg_context; | |
114 | ||
e53a16e7 ILT |
115 | /* Bit N in the bitmap in element M of this array is set if there is a |
116 | copy from reg M to reg N. */ | |
9771b263 | 117 | static vec<bitmap> reg_copy_graph; |
e53a16e7 | 118 | |
af4ba423 KZ |
119 | struct target_lower_subreg default_target_lower_subreg; |
120 | #if SWITCHABLE_TARGET | |
121 | struct target_lower_subreg *this_target_lower_subreg | |
122 | = &default_target_lower_subreg; | |
123 | #endif | |
124 | ||
125 | #define twice_word_mode \ | |
126 | this_target_lower_subreg->x_twice_word_mode | |
127 | #define choices \ | |
128 | this_target_lower_subreg->x_choices | |
129 | ||
130 | /* RTXes used while computing costs. */ | |
131 | struct cost_rtxes { | |
132 | /* Source and target registers. */ | |
133 | rtx source; | |
134 | rtx target; | |
135 | ||
136 | /* A twice_word_mode ZERO_EXTEND of SOURCE. */ | |
137 | rtx zext; | |
138 | ||
139 | /* A shift of SOURCE. */ | |
140 | rtx shift; | |
141 | ||
142 | /* A SET of TARGET. */ | |
143 | rtx set; | |
144 | }; | |
145 | ||
146 | /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the | |
147 | rtxes in RTXES. SPEED_P selects between the speed and size cost. */ | |
148 | ||
149 | static int | |
150 | shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code, | |
ef4bddc2 | 151 | machine_mode mode, int op1) |
af4ba423 | 152 | { |
af4ba423 KZ |
153 | PUT_CODE (rtxes->shift, code); |
154 | PUT_MODE (rtxes->shift, mode); | |
155 | PUT_MODE (rtxes->source, mode); | |
156 | XEXP (rtxes->shift, 1) = GEN_INT (op1); | |
69523a7c | 157 | return set_src_cost (rtxes->shift, speed_p); |
af4ba423 KZ |
158 | } |
159 | ||
160 | /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X] | |
161 | to true if it is profitable to split a double-word CODE shift | |
162 | of X + BITS_PER_WORD bits. SPEED_P says whether we are testing | |
163 | for speed or size profitability. | |
164 | ||
165 | Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is | |
166 | the cost of moving zero into a word-mode register. WORD_MOVE_COST | |
167 | is the cost of moving between word registers. */ | |
168 | ||
169 | static void | |
170 | compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes, | |
171 | bool *splitting, enum rtx_code code, | |
172 | int word_move_zero_cost, int word_move_cost) | |
173 | { | |
d7fde18c | 174 | int wide_cost, narrow_cost, upper_cost, i; |
af4ba423 KZ |
175 | |
176 | for (i = 0; i < BITS_PER_WORD; i++) | |
177 | { | |
178 | wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode, | |
179 | i + BITS_PER_WORD); | |
180 | if (i == 0) | |
181 | narrow_cost = word_move_cost; | |
182 | else | |
183 | narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i); | |
184 | ||
d7fde18c JJ |
185 | if (code != ASHIFTRT) |
186 | upper_cost = word_move_zero_cost; | |
187 | else if (i == BITS_PER_WORD - 1) | |
188 | upper_cost = word_move_cost; | |
189 | else | |
190 | upper_cost = shift_cost (speed_p, rtxes, code, word_mode, | |
191 | BITS_PER_WORD - 1); | |
192 | ||
af4ba423 KZ |
193 | if (LOG_COSTS) |
194 | fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n", | |
195 | GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code), | |
d7fde18c | 196 | i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost); |
af4ba423 | 197 | |
d7fde18c | 198 | if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost) |
af4ba423 KZ |
199 | splitting[i] = true; |
200 | } | |
201 | } | |
202 | ||
203 | /* Compute what we should do when optimizing for speed or size; SPEED_P | |
204 | selects which. Use RTXES for computing costs. */ | |
205 | ||
206 | static void | |
207 | compute_costs (bool speed_p, struct cost_rtxes *rtxes) | |
208 | { | |
209 | unsigned int i; | |
210 | int word_move_zero_cost, word_move_cost; | |
211 | ||
69523a7c | 212 | PUT_MODE (rtxes->target, word_mode); |
af4ba423 | 213 | SET_SRC (rtxes->set) = CONST0_RTX (word_mode); |
69523a7c | 214 | word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p); |
af4ba423 KZ |
215 | |
216 | SET_SRC (rtxes->set) = rtxes->source; | |
69523a7c | 217 | word_move_cost = set_rtx_cost (rtxes->set, speed_p); |
af4ba423 KZ |
218 | |
219 | if (LOG_COSTS) | |
220 | fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n", | |
221 | GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost); | |
222 | ||
223 | for (i = 0; i < MAX_MACHINE_MODE; i++) | |
224 | { | |
ef4bddc2 | 225 | machine_mode mode = (machine_mode) i; |
af4ba423 KZ |
226 | int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD; |
227 | if (factor > 1) | |
228 | { | |
229 | int mode_move_cost; | |
230 | ||
231 | PUT_MODE (rtxes->target, mode); | |
232 | PUT_MODE (rtxes->source, mode); | |
69523a7c | 233 | mode_move_cost = set_rtx_cost (rtxes->set, speed_p); |
af4ba423 KZ |
234 | |
235 | if (LOG_COSTS) | |
236 | fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n", | |
237 | GET_MODE_NAME (mode), mode_move_cost, | |
238 | word_move_cost, factor); | |
239 | ||
240 | if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor) | |
241 | { | |
242 | choices[speed_p].move_modes_to_split[i] = true; | |
243 | choices[speed_p].something_to_do = true; | |
244 | } | |
245 | } | |
246 | } | |
247 | ||
248 | /* For the moves and shifts, the only case that is checked is one | |
249 | where the mode of the target is an integer mode twice the width | |
250 | of the word_mode. | |
251 | ||
252 | If it is not profitable to split a double word move then do not | |
253 | even consider the shifts or the zero extension. */ | |
254 | if (choices[speed_p].move_modes_to_split[(int) twice_word_mode]) | |
255 | { | |
256 | int zext_cost; | |
257 | ||
258 | /* The only case here to check to see if moving the upper part with a | |
259 | zero is cheaper than doing the zext itself. */ | |
af4ba423 | 260 | PUT_MODE (rtxes->source, word_mode); |
69523a7c | 261 | zext_cost = set_src_cost (rtxes->zext, speed_p); |
af4ba423 KZ |
262 | |
263 | if (LOG_COSTS) | |
264 | fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n", | |
265 | GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND), | |
266 | zext_cost, word_move_cost, word_move_zero_cost); | |
267 | ||
268 | if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost) | |
269 | choices[speed_p].splitting_zext = true; | |
270 | ||
271 | compute_splitting_shift (speed_p, rtxes, | |
272 | choices[speed_p].splitting_ashift, ASHIFT, | |
273 | word_move_zero_cost, word_move_cost); | |
274 | compute_splitting_shift (speed_p, rtxes, | |
275 | choices[speed_p].splitting_lshiftrt, LSHIFTRT, | |
276 | word_move_zero_cost, word_move_cost); | |
d7fde18c JJ |
277 | compute_splitting_shift (speed_p, rtxes, |
278 | choices[speed_p].splitting_ashiftrt, ASHIFTRT, | |
279 | word_move_zero_cost, word_move_cost); | |
af4ba423 KZ |
280 | } |
281 | } | |
282 | ||
283 | /* Do one-per-target initialisation. This involves determining | |
284 | which operations on the machine are profitable. If none are found, | |
285 | then the pass just returns when called. */ | |
286 | ||
287 | void | |
288 | init_lower_subreg (void) | |
289 | { | |
290 | struct cost_rtxes rtxes; | |
291 | ||
292 | memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg)); | |
293 | ||
294 | twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode); | |
295 | ||
296 | rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER); | |
297 | rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1); | |
298 | rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source); | |
299 | rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source); | |
300 | rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx); | |
301 | ||
302 | if (LOG_COSTS) | |
303 | fprintf (stderr, "\nSize costs\n==========\n\n"); | |
304 | compute_costs (false, &rtxes); | |
305 | ||
306 | if (LOG_COSTS) | |
307 | fprintf (stderr, "\nSpeed costs\n===========\n\n"); | |
308 | compute_costs (true, &rtxes); | |
309 | } | |
2b54c30f ILT |
310 | |
311 | static bool | |
312 | simple_move_operand (rtx x) | |
313 | { | |
314 | if (GET_CODE (x) == SUBREG) | |
315 | x = SUBREG_REG (x); | |
316 | ||
317 | if (!OBJECT_P (x)) | |
318 | return false; | |
319 | ||
320 | if (GET_CODE (x) == LABEL_REF | |
321 | || GET_CODE (x) == SYMBOL_REF | |
7e0c3f57 ILT |
322 | || GET_CODE (x) == HIGH |
323 | || GET_CODE (x) == CONST) | |
2b54c30f ILT |
324 | return false; |
325 | ||
326 | if (MEM_P (x) | |
327 | && (MEM_VOLATILE_P (x) | |
5bfed9a9 | 328 | || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x)))) |
2b54c30f ILT |
329 | return false; |
330 | ||
331 | return true; | |
332 | } | |
333 | ||
af4ba423 KZ |
334 | /* If INSN is a single set between two objects that we want to split, |
335 | return the single set. SPEED_P says whether we are optimizing | |
336 | INSN for speed or size. | |
337 | ||
338 | INSN should have been passed to recog and extract_insn before this | |
339 | is called. */ | |
e53a16e7 ILT |
340 | |
341 | static rtx | |
e967cc2f | 342 | simple_move (rtx_insn *insn, bool speed_p) |
e53a16e7 ILT |
343 | { |
344 | rtx x; | |
345 | rtx set; | |
ef4bddc2 | 346 | machine_mode mode; |
e53a16e7 ILT |
347 | |
348 | if (recog_data.n_operands != 2) | |
349 | return NULL_RTX; | |
350 | ||
351 | set = single_set (insn); | |
352 | if (!set) | |
353 | return NULL_RTX; | |
354 | ||
355 | x = SET_DEST (set); | |
356 | if (x != recog_data.operand[0] && x != recog_data.operand[1]) | |
357 | return NULL_RTX; | |
2b54c30f | 358 | if (!simple_move_operand (x)) |
e53a16e7 ILT |
359 | return NULL_RTX; |
360 | ||
361 | x = SET_SRC (set); | |
362 | if (x != recog_data.operand[0] && x != recog_data.operand[1]) | |
363 | return NULL_RTX; | |
2b54c30f ILT |
364 | /* For the src we can handle ASM_OPERANDS, and it is beneficial for |
365 | things like x86 rdtsc which returns a DImode value. */ | |
366 | if (GET_CODE (x) != ASM_OPERANDS | |
367 | && !simple_move_operand (x)) | |
e53a16e7 ILT |
368 | return NULL_RTX; |
369 | ||
370 | /* We try to decompose in integer modes, to avoid generating | |
371 | inefficient code copying between integer and floating point | |
372 | registers. That means that we can't decompose if this is a | |
373 | non-integer mode for which there is no integer mode of the same | |
374 | size. */ | |
576fe41a | 375 | mode = GET_MODE (SET_DEST (set)); |
e53a16e7 ILT |
376 | if (!SCALAR_INT_MODE_P (mode) |
377 | && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0) | |
378 | == BLKmode)) | |
379 | return NULL_RTX; | |
380 | ||
1f64a081 ILT |
381 | /* Reject PARTIAL_INT modes. They are used for processor specific |
382 | purposes and it's probably best not to tamper with them. */ | |
383 | if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) | |
384 | return NULL_RTX; | |
385 | ||
af4ba423 KZ |
386 | if (!choices[speed_p].move_modes_to_split[(int) mode]) |
387 | return NULL_RTX; | |
388 | ||
e53a16e7 ILT |
389 | return set; |
390 | } | |
391 | ||
392 | /* If SET is a copy from one multi-word pseudo-register to another, | |
393 | record that in reg_copy_graph. Return whether it is such a | |
394 | copy. */ | |
395 | ||
396 | static bool | |
397 | find_pseudo_copy (rtx set) | |
398 | { | |
399 | rtx dest = SET_DEST (set); | |
400 | rtx src = SET_SRC (set); | |
401 | unsigned int rd, rs; | |
402 | bitmap b; | |
403 | ||
404 | if (!REG_P (dest) || !REG_P (src)) | |
405 | return false; | |
406 | ||
407 | rd = REGNO (dest); | |
408 | rs = REGNO (src); | |
409 | if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs)) | |
410 | return false; | |
411 | ||
9771b263 | 412 | b = reg_copy_graph[rs]; |
e53a16e7 ILT |
413 | if (b == NULL) |
414 | { | |
415 | b = BITMAP_ALLOC (NULL); | |
9771b263 | 416 | reg_copy_graph[rs] = b; |
e53a16e7 ILT |
417 | } |
418 | ||
419 | bitmap_set_bit (b, rd); | |
420 | ||
421 | return true; | |
422 | } | |
423 | ||
424 | /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case | |
425 | where they are copied to another register, add the register to | |
426 | which they are copied to DECOMPOSABLE_CONTEXT. Use | |
427 | NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track | |
428 | copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */ | |
429 | ||
430 | static void | |
431 | propagate_pseudo_copies (void) | |
432 | { | |
433 | bitmap queue, propagate; | |
434 | ||
435 | queue = BITMAP_ALLOC (NULL); | |
436 | propagate = BITMAP_ALLOC (NULL); | |
437 | ||
438 | bitmap_copy (queue, decomposable_context); | |
439 | do | |
440 | { | |
441 | bitmap_iterator iter; | |
442 | unsigned int i; | |
443 | ||
444 | bitmap_clear (propagate); | |
445 | ||
446 | EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter) | |
447 | { | |
9771b263 | 448 | bitmap b = reg_copy_graph[i]; |
e53a16e7 ILT |
449 | if (b) |
450 | bitmap_ior_and_compl_into (propagate, b, non_decomposable_context); | |
451 | } | |
452 | ||
453 | bitmap_and_compl (queue, propagate, decomposable_context); | |
454 | bitmap_ior_into (decomposable_context, propagate); | |
455 | } | |
456 | while (!bitmap_empty_p (queue)); | |
457 | ||
458 | BITMAP_FREE (queue); | |
459 | BITMAP_FREE (propagate); | |
460 | } | |
461 | ||
462 | /* A pointer to one of these values is passed to | |
a36a1928 | 463 | find_decomposable_subregs. */ |
e53a16e7 ILT |
464 | |
465 | enum classify_move_insn | |
466 | { | |
467 | /* Not a simple move from one location to another. */ | |
468 | NOT_SIMPLE_MOVE, | |
c2c47e8f UW |
469 | /* A simple move we want to decompose. */ |
470 | DECOMPOSABLE_SIMPLE_MOVE, | |
471 | /* Any other simple move. */ | |
e53a16e7 ILT |
472 | SIMPLE_MOVE |
473 | }; | |
474 | ||
a36a1928 RS |
475 | /* If we find a SUBREG in *LOC which we could use to decompose a |
476 | pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an | |
477 | unadorned register which is not a simple pseudo-register copy, | |
478 | DATA will point at the type of move, and we set a bit in | |
479 | DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */ | |
e53a16e7 | 480 | |
a36a1928 RS |
481 | static void |
482 | find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi) | |
e53a16e7 | 483 | { |
a36a1928 RS |
484 | subrtx_var_iterator::array_type array; |
485 | FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST) | |
e53a16e7 | 486 | { |
a36a1928 RS |
487 | rtx x = *iter; |
488 | if (GET_CODE (x) == SUBREG) | |
489 | { | |
490 | rtx inner = SUBREG_REG (x); | |
491 | unsigned int regno, outer_size, inner_size, outer_words, inner_words; | |
e53a16e7 | 492 | |
a36a1928 RS |
493 | if (!REG_P (inner)) |
494 | continue; | |
e53a16e7 | 495 | |
a36a1928 RS |
496 | regno = REGNO (inner); |
497 | if (HARD_REGISTER_NUM_P (regno)) | |
498 | { | |
499 | iter.skip_subrtxes (); | |
500 | continue; | |
501 | } | |
e53a16e7 | 502 | |
a36a1928 RS |
503 | outer_size = GET_MODE_SIZE (GET_MODE (x)); |
504 | inner_size = GET_MODE_SIZE (GET_MODE (inner)); | |
505 | outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
506 | inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
e53a16e7 | 507 | |
a36a1928 RS |
508 | /* We only try to decompose single word subregs of multi-word |
509 | registers. When we find one, we return -1 to avoid iterating | |
510 | over the inner register. | |
e53a16e7 | 511 | |
a36a1928 RS |
512 | ??? This doesn't allow, e.g., DImode subregs of TImode values |
513 | on 32-bit targets. We would need to record the way the | |
514 | pseudo-register was used, and only decompose if all the uses | |
515 | were the same number and size of pieces. Hopefully this | |
516 | doesn't happen much. */ | |
e53a16e7 | 517 | |
a36a1928 RS |
518 | if (outer_words == 1 && inner_words > 1) |
519 | { | |
520 | bitmap_set_bit (decomposable_context, regno); | |
521 | iter.skip_subrtxes (); | |
522 | continue; | |
523 | } | |
03743286 | 524 | |
a36a1928 RS |
525 | /* If this is a cast from one mode to another, where the modes |
526 | have the same size, and they are not tieable, then mark this | |
527 | register as non-decomposable. If we decompose it we are | |
528 | likely to mess up whatever the backend is trying to do. */ | |
529 | if (outer_words > 1 | |
530 | && outer_size == inner_size | |
531 | && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner))) | |
532 | { | |
533 | bitmap_set_bit (non_decomposable_context, regno); | |
534 | bitmap_set_bit (subreg_context, regno); | |
535 | iter.skip_subrtxes (); | |
536 | continue; | |
537 | } | |
03743286 | 538 | } |
a36a1928 | 539 | else if (REG_P (x)) |
e53a16e7 | 540 | { |
a36a1928 RS |
541 | unsigned int regno; |
542 | ||
543 | /* We will see an outer SUBREG before we see the inner REG, so | |
544 | when we see a plain REG here it means a direct reference to | |
545 | the register. | |
546 | ||
547 | If this is not a simple copy from one location to another, | |
548 | then we can not decompose this register. If this is a simple | |
549 | copy we want to decompose, and the mode is right, | |
550 | then we mark the register as decomposable. | |
551 | Otherwise we don't say anything about this register -- | |
552 | it could be decomposed, but whether that would be | |
553 | profitable depends upon how it is used elsewhere. | |
554 | ||
555 | We only set bits in the bitmap for multi-word | |
556 | pseudo-registers, since those are the only ones we care about | |
557 | and it keeps the size of the bitmaps down. */ | |
558 | ||
559 | regno = REGNO (x); | |
560 | if (!HARD_REGISTER_NUM_P (regno) | |
561 | && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) | |
e53a16e7 | 562 | { |
a36a1928 RS |
563 | switch (*pcmi) |
564 | { | |
565 | case NOT_SIMPLE_MOVE: | |
566 | bitmap_set_bit (non_decomposable_context, regno); | |
567 | break; | |
568 | case DECOMPOSABLE_SIMPLE_MOVE: | |
569 | if (MODES_TIEABLE_P (GET_MODE (x), word_mode)) | |
570 | bitmap_set_bit (decomposable_context, regno); | |
571 | break; | |
572 | case SIMPLE_MOVE: | |
573 | break; | |
574 | default: | |
575 | gcc_unreachable (); | |
576 | } | |
e53a16e7 ILT |
577 | } |
578 | } | |
a36a1928 RS |
579 | else if (MEM_P (x)) |
580 | { | |
581 | enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE; | |
2b54c30f | 582 | |
a36a1928 RS |
583 | /* Any registers used in a MEM do not participate in a |
584 | SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion | |
585 | here, and return -1 to block the parent's recursion. */ | |
586 | find_decomposable_subregs (&XEXP (x, 0), &cmi_mem); | |
587 | iter.skip_subrtxes (); | |
588 | } | |
2b54c30f | 589 | } |
e53a16e7 ILT |
590 | } |
591 | ||
592 | /* Decompose REGNO into word-sized components. We smash the REG node | |
593 | in place. This ensures that (1) something goes wrong quickly if we | |
594 | fail to make some replacement, and (2) the debug information inside | |
595 | the symbol table is automatically kept up to date. */ | |
596 | ||
597 | static void | |
598 | decompose_register (unsigned int regno) | |
599 | { | |
600 | rtx reg; | |
601 | unsigned int words, i; | |
602 | rtvec v; | |
603 | ||
604 | reg = regno_reg_rtx[regno]; | |
605 | ||
606 | regno_reg_rtx[regno] = NULL_RTX; | |
e53a16e7 ILT |
607 | |
608 | words = GET_MODE_SIZE (GET_MODE (reg)); | |
609 | words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
610 | ||
611 | v = rtvec_alloc (words); | |
612 | for (i = 0; i < words; ++i) | |
613 | RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD); | |
614 | ||
615 | PUT_CODE (reg, CONCATN); | |
616 | XVEC (reg, 0) = v; | |
617 | ||
618 | if (dump_file) | |
619 | { | |
620 | fprintf (dump_file, "; Splitting reg %u ->", regno); | |
621 | for (i = 0; i < words; ++i) | |
622 | fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i))); | |
623 | fputc ('\n', dump_file); | |
624 | } | |
625 | } | |
626 | ||
627 | /* Get a SUBREG of a CONCATN. */ | |
628 | ||
629 | static rtx | |
ef4bddc2 | 630 | simplify_subreg_concatn (machine_mode outermode, rtx op, |
e53a16e7 ILT |
631 | unsigned int byte) |
632 | { | |
633 | unsigned int inner_size; | |
ef4bddc2 | 634 | machine_mode innermode, partmode; |
e53a16e7 ILT |
635 | rtx part; |
636 | unsigned int final_offset; | |
637 | ||
638 | gcc_assert (GET_CODE (op) == CONCATN); | |
639 | gcc_assert (byte % GET_MODE_SIZE (outermode) == 0); | |
640 | ||
641 | innermode = GET_MODE (op); | |
642 | gcc_assert (byte < GET_MODE_SIZE (innermode)); | |
643 | gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode)); | |
644 | ||
645 | inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0); | |
646 | part = XVECEXP (op, 0, byte / inner_size); | |
695ae295 UB |
647 | partmode = GET_MODE (part); |
648 | ||
822a55a0 UB |
649 | /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of |
650 | regular CONST_VECTORs. They have vector or integer modes, depending | |
651 | on the capabilities of the target. Cope with them. */ | |
652 | if (partmode == VOIDmode && VECTOR_MODE_P (innermode)) | |
653 | partmode = GET_MODE_INNER (innermode); | |
654 | else if (partmode == VOIDmode) | |
695ae295 | 655 | { |
822a55a0 UB |
656 | enum mode_class mclass = GET_MODE_CLASS (innermode); |
657 | partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0); | |
695ae295 UB |
658 | } |
659 | ||
e53a16e7 ILT |
660 | final_offset = byte % inner_size; |
661 | if (final_offset + GET_MODE_SIZE (outermode) > inner_size) | |
662 | return NULL_RTX; | |
663 | ||
695ae295 | 664 | return simplify_gen_subreg (outermode, part, partmode, final_offset); |
e53a16e7 ILT |
665 | } |
666 | ||
667 | /* Wrapper around simplify_gen_subreg which handles CONCATN. */ | |
668 | ||
669 | static rtx | |
ef4bddc2 RS |
670 | simplify_gen_subreg_concatn (machine_mode outermode, rtx op, |
671 | machine_mode innermode, unsigned int byte) | |
e53a16e7 | 672 | { |
0e6c5b58 ILT |
673 | rtx ret; |
674 | ||
e53a16e7 ILT |
675 | /* We have to handle generating a SUBREG of a SUBREG of a CONCATN. |
676 | If OP is a SUBREG of a CONCATN, then it must be a simple mode | |
677 | change with the same size and offset 0, or it must extract a | |
678 | part. We shouldn't see anything else here. */ | |
679 | if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN) | |
680 | { | |
681 | rtx op2; | |
682 | ||
683 | if ((GET_MODE_SIZE (GET_MODE (op)) | |
684 | == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))) | |
685 | && SUBREG_BYTE (op) == 0) | |
686 | return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op), | |
687 | GET_MODE (SUBREG_REG (op)), byte); | |
688 | ||
689 | op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op), | |
690 | SUBREG_BYTE (op)); | |
691 | if (op2 == NULL_RTX) | |
692 | { | |
693 | /* We don't handle paradoxical subregs here. */ | |
694 | gcc_assert (GET_MODE_SIZE (outermode) | |
695 | <= GET_MODE_SIZE (GET_MODE (op))); | |
696 | gcc_assert (GET_MODE_SIZE (GET_MODE (op)) | |
697 | <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))); | |
698 | op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op), | |
699 | byte + SUBREG_BYTE (op)); | |
700 | gcc_assert (op2 != NULL_RTX); | |
701 | return op2; | |
702 | } | |
703 | ||
704 | op = op2; | |
705 | gcc_assert (op != NULL_RTX); | |
706 | gcc_assert (innermode == GET_MODE (op)); | |
707 | } | |
0e6c5b58 | 708 | |
e53a16e7 ILT |
709 | if (GET_CODE (op) == CONCATN) |
710 | return simplify_subreg_concatn (outermode, op, byte); | |
0e6c5b58 ILT |
711 | |
712 | ret = simplify_gen_subreg (outermode, op, innermode, byte); | |
713 | ||
714 | /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then | |
715 | resolve_simple_move will ask for the high part of the paradoxical | |
716 | subreg, which does not have a value. Just return a zero. */ | |
717 | if (ret == NULL_RTX | |
718 | && GET_CODE (op) == SUBREG | |
719 | && SUBREG_BYTE (op) == 0 | |
720 | && (GET_MODE_SIZE (innermode) | |
721 | > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))) | |
722 | return CONST0_RTX (outermode); | |
723 | ||
724 | gcc_assert (ret != NULL_RTX); | |
725 | return ret; | |
e53a16e7 ILT |
726 | } |
727 | ||
728 | /* Return whether we should resolve X into the registers into which it | |
729 | was decomposed. */ | |
730 | ||
731 | static bool | |
732 | resolve_reg_p (rtx x) | |
733 | { | |
734 | return GET_CODE (x) == CONCATN; | |
735 | } | |
736 | ||
737 | /* Return whether X is a SUBREG of a register which we need to | |
738 | resolve. */ | |
739 | ||
740 | static bool | |
741 | resolve_subreg_p (rtx x) | |
742 | { | |
743 | if (GET_CODE (x) != SUBREG) | |
744 | return false; | |
745 | return resolve_reg_p (SUBREG_REG (x)); | |
746 | } | |
747 | ||
cf55cb6a | 748 | /* Look for SUBREGs in *LOC which need to be decomposed. */ |
e53a16e7 | 749 | |
cf55cb6a RS |
750 | static bool |
751 | resolve_subreg_use (rtx *loc, rtx insn) | |
e53a16e7 | 752 | { |
cf55cb6a RS |
753 | subrtx_ptr_iterator::array_type array; |
754 | FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST) | |
e53a16e7 | 755 | { |
cf55cb6a RS |
756 | rtx *loc = *iter; |
757 | rtx x = *loc; | |
758 | if (resolve_subreg_p (x)) | |
e53a16e7 | 759 | { |
cf55cb6a RS |
760 | x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), |
761 | SUBREG_BYTE (x)); | |
e53a16e7 | 762 | |
cf55cb6a RS |
763 | /* It is possible for a note to contain a reference which we can |
764 | decompose. In this case, return 1 to the caller to indicate | |
765 | that the note must be removed. */ | |
766 | if (!x) | |
767 | { | |
768 | gcc_assert (!insn); | |
769 | return true; | |
770 | } | |
e53a16e7 | 771 | |
cf55cb6a RS |
772 | validate_change (insn, loc, x, 1); |
773 | iter.skip_subrtxes (); | |
774 | } | |
775 | else if (resolve_reg_p (x)) | |
776 | /* Return 1 to the caller to indicate that we found a direct | |
777 | reference to a register which is being decomposed. This can | |
778 | happen inside notes, multiword shift or zero-extend | |
779 | instructions. */ | |
780 | return true; | |
e53a16e7 ILT |
781 | } |
782 | ||
cf55cb6a | 783 | return false; |
e53a16e7 ILT |
784 | } |
785 | ||
e53a16e7 ILT |
786 | /* Resolve any decomposed registers which appear in register notes on |
787 | INSN. */ | |
788 | ||
789 | static void | |
e967cc2f | 790 | resolve_reg_notes (rtx_insn *insn) |
e53a16e7 ILT |
791 | { |
792 | rtx *pnote, note; | |
793 | ||
794 | note = find_reg_equal_equiv_note (insn); | |
795 | if (note) | |
796 | { | |
6fb5fa3c | 797 | int old_count = num_validated_changes (); |
cf55cb6a | 798 | if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX)) |
4a8cae83 | 799 | remove_note (insn, note); |
6fb5fa3c DB |
800 | else |
801 | if (old_count != num_validated_changes ()) | |
802 | df_notes_rescan (insn); | |
e53a16e7 ILT |
803 | } |
804 | ||
805 | pnote = ®_NOTES (insn); | |
806 | while (*pnote != NULL_RTX) | |
807 | { | |
60564289 | 808 | bool del = false; |
e53a16e7 ILT |
809 | |
810 | note = *pnote; | |
811 | switch (REG_NOTE_KIND (note)) | |
812 | { | |
6fb5fa3c DB |
813 | case REG_DEAD: |
814 | case REG_UNUSED: | |
e53a16e7 | 815 | if (resolve_reg_p (XEXP (note, 0))) |
60564289 | 816 | del = true; |
e53a16e7 ILT |
817 | break; |
818 | ||
819 | default: | |
820 | break; | |
821 | } | |
822 | ||
60564289 | 823 | if (del) |
e53a16e7 ILT |
824 | *pnote = XEXP (note, 1); |
825 | else | |
826 | pnote = &XEXP (note, 1); | |
827 | } | |
828 | } | |
829 | ||
2b54c30f | 830 | /* Return whether X can be decomposed into subwords. */ |
e53a16e7 ILT |
831 | |
832 | static bool | |
2b54c30f | 833 | can_decompose_p (rtx x) |
e53a16e7 ILT |
834 | { |
835 | if (REG_P (x)) | |
836 | { | |
837 | unsigned int regno = REGNO (x); | |
838 | ||
839 | if (HARD_REGISTER_NUM_P (regno)) | |
488c8379 RS |
840 | { |
841 | unsigned int byte, num_bytes; | |
842 | ||
843 | num_bytes = GET_MODE_SIZE (GET_MODE (x)); | |
844 | for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD) | |
845 | if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0) | |
846 | return false; | |
847 | return true; | |
848 | } | |
e53a16e7 | 849 | else |
402464a0 | 850 | return !bitmap_bit_p (subreg_context, regno); |
e53a16e7 ILT |
851 | } |
852 | ||
2b54c30f | 853 | return true; |
e53a16e7 ILT |
854 | } |
855 | ||
856 | /* Decompose the registers used in a simple move SET within INSN. If | |
857 | we don't change anything, return INSN, otherwise return the start | |
858 | of the sequence of moves. */ | |
859 | ||
e967cc2f DM |
860 | static rtx_insn * |
861 | resolve_simple_move (rtx set, rtx_insn *insn) | |
e53a16e7 | 862 | { |
e967cc2f DM |
863 | rtx src, dest, real_dest; |
864 | rtx_insn *insns; | |
ef4bddc2 | 865 | machine_mode orig_mode, dest_mode; |
e53a16e7 ILT |
866 | unsigned int words; |
867 | bool pushing; | |
868 | ||
869 | src = SET_SRC (set); | |
870 | dest = SET_DEST (set); | |
871 | orig_mode = GET_MODE (dest); | |
872 | ||
873 | words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
af4ba423 | 874 | gcc_assert (words > 1); |
e53a16e7 ILT |
875 | |
876 | start_sequence (); | |
877 | ||
878 | /* We have to handle copying from a SUBREG of a decomposed reg where | |
879 | the SUBREG is larger than word size. Rather than assume that we | |
880 | can take a word_mode SUBREG of the destination, we copy to a new | |
881 | register and then copy that to the destination. */ | |
882 | ||
883 | real_dest = NULL_RTX; | |
884 | ||
885 | if (GET_CODE (src) == SUBREG | |
886 | && resolve_reg_p (SUBREG_REG (src)) | |
887 | && (SUBREG_BYTE (src) != 0 | |
888 | || (GET_MODE_SIZE (orig_mode) | |
889 | != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))))) | |
890 | { | |
891 | real_dest = dest; | |
892 | dest = gen_reg_rtx (orig_mode); | |
893 | if (REG_P (real_dest)) | |
894 | REG_ATTRS (dest) = REG_ATTRS (real_dest); | |
895 | } | |
896 | ||
897 | /* Similarly if we are copying to a SUBREG of a decomposed reg where | |
898 | the SUBREG is larger than word size. */ | |
899 | ||
900 | if (GET_CODE (dest) == SUBREG | |
901 | && resolve_reg_p (SUBREG_REG (dest)) | |
902 | && (SUBREG_BYTE (dest) != 0 | |
903 | || (GET_MODE_SIZE (orig_mode) | |
904 | != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))) | |
905 | { | |
e967cc2f DM |
906 | rtx reg, smove; |
907 | rtx_insn *minsn; | |
e53a16e7 ILT |
908 | |
909 | reg = gen_reg_rtx (orig_mode); | |
910 | minsn = emit_move_insn (reg, src); | |
911 | smove = single_set (minsn); | |
912 | gcc_assert (smove != NULL_RTX); | |
913 | resolve_simple_move (smove, minsn); | |
914 | src = reg; | |
915 | } | |
916 | ||
917 | /* If we didn't have any big SUBREGS of decomposed registers, and | |
918 | neither side of the move is a register we are decomposing, then | |
919 | we don't have to do anything here. */ | |
920 | ||
921 | if (src == SET_SRC (set) | |
922 | && dest == SET_DEST (set) | |
923 | && !resolve_reg_p (src) | |
924 | && !resolve_subreg_p (src) | |
925 | && !resolve_reg_p (dest) | |
926 | && !resolve_subreg_p (dest)) | |
927 | { | |
928 | end_sequence (); | |
929 | return insn; | |
930 | } | |
931 | ||
30d18db4 ILT |
932 | /* It's possible for the code to use a subreg of a decomposed |
933 | register while forming an address. We need to handle that before | |
934 | passing the address to emit_move_insn. We pass NULL_RTX as the | |
935 | insn parameter to resolve_subreg_use because we can not validate | |
936 | the insn yet. */ | |
937 | if (MEM_P (src) || MEM_P (dest)) | |
938 | { | |
939 | int acg; | |
940 | ||
941 | if (MEM_P (src)) | |
cf55cb6a | 942 | resolve_subreg_use (&XEXP (src, 0), NULL_RTX); |
30d18db4 | 943 | if (MEM_P (dest)) |
cf55cb6a | 944 | resolve_subreg_use (&XEXP (dest, 0), NULL_RTX); |
30d18db4 ILT |
945 | acg = apply_change_group (); |
946 | gcc_assert (acg); | |
947 | } | |
948 | ||
e53a16e7 ILT |
949 | /* If SRC is a register which we can't decompose, or has side |
950 | effects, we need to move via a temporary register. */ | |
951 | ||
2b54c30f | 952 | if (!can_decompose_p (src) |
e53a16e7 ILT |
953 | || side_effects_p (src) |
954 | || GET_CODE (src) == ASM_OPERANDS) | |
955 | { | |
956 | rtx reg; | |
957 | ||
958 | reg = gen_reg_rtx (orig_mode); | |
ce5d49a8 ZC |
959 | |
960 | #ifdef AUTO_INC_DEC | |
961 | { | |
962 | rtx move = emit_move_insn (reg, src); | |
963 | if (MEM_P (src)) | |
964 | { | |
965 | rtx note = find_reg_note (insn, REG_INC, NULL_RTX); | |
966 | if (note) | |
967 | add_reg_note (move, REG_INC, XEXP (note, 0)); | |
968 | } | |
969 | } | |
970 | #else | |
e53a16e7 | 971 | emit_move_insn (reg, src); |
ce5d49a8 | 972 | #endif |
e53a16e7 ILT |
973 | src = reg; |
974 | } | |
975 | ||
976 | /* If DEST is a register which we can't decompose, or has side | |
977 | effects, we need to first move to a temporary register. We | |
978 | handle the common case of pushing an operand directly. We also | |
979 | go through a temporary register if it holds a floating point | |
980 | value. This gives us better code on systems which can't move | |
981 | data easily between integer and floating point registers. */ | |
982 | ||
983 | dest_mode = orig_mode; | |
984 | pushing = push_operand (dest, dest_mode); | |
2b54c30f | 985 | if (!can_decompose_p (dest) |
e53a16e7 ILT |
986 | || (side_effects_p (dest) && !pushing) |
987 | || (!SCALAR_INT_MODE_P (dest_mode) | |
988 | && !resolve_reg_p (dest) | |
989 | && !resolve_subreg_p (dest))) | |
990 | { | |
991 | if (real_dest == NULL_RTX) | |
992 | real_dest = dest; | |
993 | if (!SCALAR_INT_MODE_P (dest_mode)) | |
994 | { | |
995 | dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT, | |
996 | MODE_INT, 0); | |
997 | gcc_assert (dest_mode != BLKmode); | |
998 | } | |
999 | dest = gen_reg_rtx (dest_mode); | |
1000 | if (REG_P (real_dest)) | |
1001 | REG_ATTRS (dest) = REG_ATTRS (real_dest); | |
1002 | } | |
1003 | ||
1004 | if (pushing) | |
1005 | { | |
1006 | unsigned int i, j, jinc; | |
1007 | ||
1008 | gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0); | |
1009 | gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY); | |
1010 | gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY); | |
1011 | ||
1012 | if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD) | |
1013 | { | |
1014 | j = 0; | |
1015 | jinc = 1; | |
1016 | } | |
1017 | else | |
1018 | { | |
1019 | j = words - 1; | |
1020 | jinc = -1; | |
1021 | } | |
1022 | ||
1023 | for (i = 0; i < words; ++i, j += jinc) | |
1024 | { | |
1025 | rtx temp; | |
1026 | ||
1027 | temp = copy_rtx (XEXP (dest, 0)); | |
1028 | temp = adjust_automodify_address_nv (dest, word_mode, temp, | |
1029 | j * UNITS_PER_WORD); | |
1030 | emit_move_insn (temp, | |
1031 | simplify_gen_subreg_concatn (word_mode, src, | |
1032 | orig_mode, | |
1033 | j * UNITS_PER_WORD)); | |
1034 | } | |
1035 | } | |
1036 | else | |
1037 | { | |
1038 | unsigned int i; | |
1039 | ||
1040 | if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest))) | |
c41c1387 | 1041 | emit_clobber (dest); |
e53a16e7 ILT |
1042 | |
1043 | for (i = 0; i < words; ++i) | |
1044 | emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest, | |
1045 | dest_mode, | |
1046 | i * UNITS_PER_WORD), | |
1047 | simplify_gen_subreg_concatn (word_mode, src, | |
1048 | orig_mode, | |
1049 | i * UNITS_PER_WORD)); | |
1050 | } | |
1051 | ||
1052 | if (real_dest != NULL_RTX) | |
1053 | { | |
e967cc2f DM |
1054 | rtx mdest, smove; |
1055 | rtx_insn *minsn; | |
e53a16e7 ILT |
1056 | |
1057 | if (dest_mode == orig_mode) | |
1058 | mdest = dest; | |
1059 | else | |
1060 | mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0); | |
1061 | minsn = emit_move_insn (real_dest, mdest); | |
1062 | ||
ce5d49a8 ZC |
1063 | #ifdef AUTO_INC_DEC |
1064 | if (MEM_P (real_dest) | |
1065 | && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest))) | |
1066 | { | |
1067 | rtx note = find_reg_note (insn, REG_INC, NULL_RTX); | |
1068 | if (note) | |
1069 | add_reg_note (minsn, REG_INC, XEXP (note, 0)); | |
1070 | } | |
1071 | #endif | |
1072 | ||
e53a16e7 ILT |
1073 | smove = single_set (minsn); |
1074 | gcc_assert (smove != NULL_RTX); | |
1075 | ||
1076 | resolve_simple_move (smove, minsn); | |
1077 | } | |
1078 | ||
1079 | insns = get_insns (); | |
1080 | end_sequence (); | |
1081 | ||
1d65f45c | 1082 | copy_reg_eh_region_note_forward (insn, insns, NULL_RTX); |
73663bb7 | 1083 | |
e53a16e7 ILT |
1084 | emit_insn_before (insns, insn); |
1085 | ||
82981227 | 1086 | /* If we get here via self-recursion, then INSN is not yet in the insns |
6873ecab SB |
1087 | chain and delete_insn will fail. We only want to remove INSN from the |
1088 | current sequence. See PR56738. */ | |
1089 | if (in_sequence_p ()) | |
1090 | remove_insn (insn); | |
1091 | else | |
1092 | delete_insn (insn); | |
e53a16e7 ILT |
1093 | |
1094 | return insns; | |
1095 | } | |
1096 | ||
1097 | /* Change a CLOBBER of a decomposed register into a CLOBBER of the | |
1098 | component registers. Return whether we changed something. */ | |
1099 | ||
1100 | static bool | |
e967cc2f | 1101 | resolve_clobber (rtx pat, rtx_insn *insn) |
e53a16e7 | 1102 | { |
d70dcf29 | 1103 | rtx reg; |
ef4bddc2 | 1104 | machine_mode orig_mode; |
e53a16e7 | 1105 | unsigned int words, i; |
7e0c3f57 | 1106 | int ret; |
e53a16e7 ILT |
1107 | |
1108 | reg = XEXP (pat, 0); | |
9a5a8e58 | 1109 | if (!resolve_reg_p (reg) && !resolve_subreg_p (reg)) |
e53a16e7 ILT |
1110 | return false; |
1111 | ||
1112 | orig_mode = GET_MODE (reg); | |
1113 | words = GET_MODE_SIZE (orig_mode); | |
1114 | words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
1115 | ||
7e0c3f57 ILT |
1116 | ret = validate_change (NULL_RTX, &XEXP (pat, 0), |
1117 | simplify_gen_subreg_concatn (word_mode, reg, | |
1118 | orig_mode, 0), | |
1119 | 0); | |
6fb5fa3c | 1120 | df_insn_rescan (insn); |
7e0c3f57 ILT |
1121 | gcc_assert (ret != 0); |
1122 | ||
e53a16e7 ILT |
1123 | for (i = words - 1; i > 0; --i) |
1124 | { | |
1125 | rtx x; | |
1126 | ||
9a5a8e58 ILT |
1127 | x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode, |
1128 | i * UNITS_PER_WORD); | |
e53a16e7 ILT |
1129 | x = gen_rtx_CLOBBER (VOIDmode, x); |
1130 | emit_insn_after (x, insn); | |
1131 | } | |
1132 | ||
d4fd3465 ILT |
1133 | resolve_reg_notes (insn); |
1134 | ||
e53a16e7 ILT |
1135 | return true; |
1136 | } | |
1137 | ||
1138 | /* A USE of a decomposed register is no longer meaningful. Return | |
1139 | whether we changed something. */ | |
1140 | ||
1141 | static bool | |
e967cc2f | 1142 | resolve_use (rtx pat, rtx_insn *insn) |
e53a16e7 ILT |
1143 | { |
1144 | if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0))) | |
1145 | { | |
1146 | delete_insn (insn); | |
1147 | return true; | |
1148 | } | |
d4fd3465 ILT |
1149 | |
1150 | resolve_reg_notes (insn); | |
1151 | ||
e53a16e7 ILT |
1152 | return false; |
1153 | } | |
1154 | ||
b5b8b0ac AO |
1155 | /* A VAR_LOCATION can be simplified. */ |
1156 | ||
1157 | static void | |
e967cc2f | 1158 | resolve_debug (rtx_insn *insn) |
b5b8b0ac | 1159 | { |
f2d3f347 RS |
1160 | subrtx_ptr_iterator::array_type array; |
1161 | FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST) | |
1162 | { | |
1163 | rtx *loc = *iter; | |
1164 | rtx x = *loc; | |
1165 | if (resolve_subreg_p (x)) | |
1166 | { | |
1167 | x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), | |
1168 | SUBREG_BYTE (x)); | |
1169 | ||
1170 | if (x) | |
1171 | *loc = x; | |
1172 | else | |
1173 | x = copy_rtx (*loc); | |
1174 | } | |
1175 | if (resolve_reg_p (x)) | |
1176 | *loc = copy_rtx (x); | |
1177 | } | |
b5b8b0ac AO |
1178 | |
1179 | df_insn_rescan (insn); | |
1180 | ||
1181 | resolve_reg_notes (insn); | |
1182 | } | |
1183 | ||
af4ba423 KZ |
1184 | /* Check if INSN is a decomposable multiword-shift or zero-extend and |
1185 | set the decomposable_context bitmap accordingly. SPEED_P is true | |
1186 | if we are optimizing INSN for speed rather than size. Return true | |
1187 | if INSN is decomposable. */ | |
e0892570 | 1188 | |
af4ba423 | 1189 | static bool |
e967cc2f | 1190 | find_decomposable_shift_zext (rtx_insn *insn, bool speed_p) |
e0892570 AK |
1191 | { |
1192 | rtx set; | |
1193 | rtx op; | |
1194 | rtx op_operand; | |
1195 | ||
1196 | set = single_set (insn); | |
1197 | if (!set) | |
af4ba423 | 1198 | return false; |
e0892570 AK |
1199 | |
1200 | op = SET_SRC (set); | |
1201 | if (GET_CODE (op) != ASHIFT | |
1202 | && GET_CODE (op) != LSHIFTRT | |
d7fde18c | 1203 | && GET_CODE (op) != ASHIFTRT |
e0892570 | 1204 | && GET_CODE (op) != ZERO_EXTEND) |
af4ba423 | 1205 | return false; |
e0892570 AK |
1206 | |
1207 | op_operand = XEXP (op, 0); | |
1208 | if (!REG_P (SET_DEST (set)) || !REG_P (op_operand) | |
1209 | || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set))) | |
1210 | || HARD_REGISTER_NUM_P (REGNO (op_operand)) | |
af4ba423 KZ |
1211 | || GET_MODE (op) != twice_word_mode) |
1212 | return false; | |
e0892570 AK |
1213 | |
1214 | if (GET_CODE (op) == ZERO_EXTEND) | |
1215 | { | |
1216 | if (GET_MODE (op_operand) != word_mode | |
af4ba423 KZ |
1217 | || !choices[speed_p].splitting_zext) |
1218 | return false; | |
e0892570 AK |
1219 | } |
1220 | else /* left or right shift */ | |
1221 | { | |
af4ba423 KZ |
1222 | bool *splitting = (GET_CODE (op) == ASHIFT |
1223 | ? choices[speed_p].splitting_ashift | |
d7fde18c JJ |
1224 | : GET_CODE (op) == ASHIFTRT |
1225 | ? choices[speed_p].splitting_ashiftrt | |
af4ba423 | 1226 | : choices[speed_p].splitting_lshiftrt); |
481683e1 | 1227 | if (!CONST_INT_P (XEXP (op, 1)) |
af4ba423 KZ |
1228 | || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD, |
1229 | 2 * BITS_PER_WORD - 1) | |
1230 | || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD]) | |
1231 | return false; | |
1232 | ||
1233 | bitmap_set_bit (decomposable_context, REGNO (op_operand)); | |
e0892570 AK |
1234 | } |
1235 | ||
1236 | bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set))); | |
1237 | ||
af4ba423 | 1238 | return true; |
e0892570 AK |
1239 | } |
1240 | ||
1241 | /* Decompose a more than word wide shift (in INSN) of a multiword | |
1242 | pseudo or a multiword zero-extend of a wordmode pseudo into a move | |
1243 | and 'set to zero' insn. Return a pointer to the new insn when a | |
1244 | replacement was done. */ | |
1245 | ||
e967cc2f DM |
1246 | static rtx_insn * |
1247 | resolve_shift_zext (rtx_insn *insn) | |
e0892570 AK |
1248 | { |
1249 | rtx set; | |
1250 | rtx op; | |
1251 | rtx op_operand; | |
e967cc2f | 1252 | rtx_insn *insns; |
d7fde18c | 1253 | rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX; |
e0892570 AK |
1254 | int src_reg_num, dest_reg_num, offset1, offset2, src_offset; |
1255 | ||
1256 | set = single_set (insn); | |
1257 | if (!set) | |
e967cc2f | 1258 | return NULL; |
e0892570 AK |
1259 | |
1260 | op = SET_SRC (set); | |
1261 | if (GET_CODE (op) != ASHIFT | |
1262 | && GET_CODE (op) != LSHIFTRT | |
d7fde18c | 1263 | && GET_CODE (op) != ASHIFTRT |
e0892570 | 1264 | && GET_CODE (op) != ZERO_EXTEND) |
e967cc2f | 1265 | return NULL; |
e0892570 AK |
1266 | |
1267 | op_operand = XEXP (op, 0); | |
1268 | ||
af4ba423 KZ |
1269 | /* We can tear this operation apart only if the regs were already |
1270 | torn apart. */ | |
e0892570 | 1271 | if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand)) |
e967cc2f | 1272 | return NULL; |
e0892570 AK |
1273 | |
1274 | /* src_reg_num is the number of the word mode register which we | |
1275 | are operating on. For a left shift and a zero_extend on little | |
1276 | endian machines this is register 0. */ | |
d7fde18c JJ |
1277 | src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) |
1278 | ? 1 : 0; | |
e0892570 | 1279 | |
acbe5496 AK |
1280 | if (WORDS_BIG_ENDIAN |
1281 | && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD) | |
e0892570 AK |
1282 | src_reg_num = 1 - src_reg_num; |
1283 | ||
1284 | if (GET_CODE (op) == ZERO_EXTEND) | |
acbe5496 | 1285 | dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0; |
e0892570 AK |
1286 | else |
1287 | dest_reg_num = 1 - src_reg_num; | |
1288 | ||
1289 | offset1 = UNITS_PER_WORD * dest_reg_num; | |
1290 | offset2 = UNITS_PER_WORD * (1 - dest_reg_num); | |
1291 | src_offset = UNITS_PER_WORD * src_reg_num; | |
1292 | ||
e0892570 AK |
1293 | start_sequence (); |
1294 | ||
1295 | dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), | |
1296 | GET_MODE (SET_DEST (set)), | |
1297 | offset1); | |
d7fde18c JJ |
1298 | dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), |
1299 | GET_MODE (SET_DEST (set)), | |
1300 | offset2); | |
e0892570 AK |
1301 | src_reg = simplify_gen_subreg_concatn (word_mode, op_operand, |
1302 | GET_MODE (op_operand), | |
1303 | src_offset); | |
d7fde18c JJ |
1304 | if (GET_CODE (op) == ASHIFTRT |
1305 | && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1) | |
1306 | upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg), | |
1307 | BITS_PER_WORD - 1, NULL_RTX, 0); | |
1308 | ||
e0892570 AK |
1309 | if (GET_CODE (op) != ZERO_EXTEND) |
1310 | { | |
1311 | int shift_count = INTVAL (XEXP (op, 1)); | |
1312 | if (shift_count > BITS_PER_WORD) | |
1313 | src_reg = expand_shift (GET_CODE (op) == ASHIFT ? | |
1314 | LSHIFT_EXPR : RSHIFT_EXPR, | |
1315 | word_mode, src_reg, | |
eb6c3df1 | 1316 | shift_count - BITS_PER_WORD, |
d7fde18c | 1317 | dest_reg, GET_CODE (op) != ASHIFTRT); |
e0892570 AK |
1318 | } |
1319 | ||
1320 | if (dest_reg != src_reg) | |
1321 | emit_move_insn (dest_reg, src_reg); | |
d7fde18c JJ |
1322 | if (GET_CODE (op) != ASHIFTRT) |
1323 | emit_move_insn (dest_upper, CONST0_RTX (word_mode)); | |
1324 | else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1) | |
1325 | emit_move_insn (dest_upper, copy_rtx (src_reg)); | |
1326 | else | |
1327 | emit_move_insn (dest_upper, upper_src); | |
e0892570 AK |
1328 | insns = get_insns (); |
1329 | ||
1330 | end_sequence (); | |
1331 | ||
1332 | emit_insn_before (insns, insn); | |
1333 | ||
1334 | if (dump_file) | |
1335 | { | |
e967cc2f | 1336 | rtx_insn *in; |
e0892570 AK |
1337 | fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn)); |
1338 | for (in = insns; in != insn; in = NEXT_INSN (in)) | |
1339 | fprintf (dump_file, "%d ", INSN_UID (in)); | |
1340 | fprintf (dump_file, "\n"); | |
1341 | } | |
1342 | ||
1343 | delete_insn (insn); | |
1344 | return insns; | |
1345 | } | |
1346 | ||
af4ba423 KZ |
1347 | /* Print to dump_file a description of what we're doing with shift code CODE. |
1348 | SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */ | |
1349 | ||
1350 | static void | |
1351 | dump_shift_choices (enum rtx_code code, bool *splitting) | |
1352 | { | |
1353 | int i; | |
1354 | const char *sep; | |
1355 | ||
1356 | fprintf (dump_file, | |
1357 | " Splitting mode %s for %s lowering with shift amounts = ", | |
1358 | GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code)); | |
1359 | sep = ""; | |
1360 | for (i = 0; i < BITS_PER_WORD; i++) | |
1361 | if (splitting[i]) | |
1362 | { | |
1363 | fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD); | |
1364 | sep = ","; | |
1365 | } | |
1366 | fprintf (dump_file, "\n"); | |
1367 | } | |
1368 | ||
1369 | /* Print to dump_file a description of what we're doing when optimizing | |
1370 | for speed or size; SPEED_P says which. DESCRIPTION is a description | |
1371 | of the SPEED_P choice. */ | |
1372 | ||
1373 | static void | |
1374 | dump_choices (bool speed_p, const char *description) | |
1375 | { | |
1376 | unsigned int i; | |
1377 | ||
1378 | fprintf (dump_file, "Choices when optimizing for %s:\n", description); | |
1379 | ||
1380 | for (i = 0; i < MAX_MACHINE_MODE; i++) | |
ef4bddc2 | 1381 | if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD) |
af4ba423 KZ |
1382 | fprintf (dump_file, " %s mode %s for copy lowering.\n", |
1383 | choices[speed_p].move_modes_to_split[i] | |
1384 | ? "Splitting" | |
1385 | : "Skipping", | |
ef4bddc2 | 1386 | GET_MODE_NAME ((machine_mode) i)); |
af4ba423 KZ |
1387 | |
1388 | fprintf (dump_file, " %s mode %s for zero_extend lowering.\n", | |
1389 | choices[speed_p].splitting_zext ? "Splitting" : "Skipping", | |
1390 | GET_MODE_NAME (twice_word_mode)); | |
1391 | ||
1392 | dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift); | |
d7fde18c JJ |
1393 | dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt); |
1394 | dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt); | |
af4ba423 KZ |
1395 | fprintf (dump_file, "\n"); |
1396 | } | |
1397 | ||
e53a16e7 | 1398 | /* Look for registers which are always accessed via word-sized SUBREGs |
c2c47e8f UW |
1399 | or -if DECOMPOSE_COPIES is true- via copies. Decompose these |
1400 | registers into several word-sized pseudo-registers. */ | |
e53a16e7 ILT |
1401 | |
1402 | static void | |
c2c47e8f | 1403 | decompose_multiword_subregs (bool decompose_copies) |
e53a16e7 ILT |
1404 | { |
1405 | unsigned int max; | |
1406 | basic_block bb; | |
af4ba423 | 1407 | bool speed_p; |
e53a16e7 | 1408 | |
af4ba423 KZ |
1409 | if (dump_file) |
1410 | { | |
1411 | dump_choices (false, "size"); | |
1412 | dump_choices (true, "speed"); | |
1413 | } | |
1414 | ||
1415 | /* Check if this target even has any modes to consider lowering. */ | |
1416 | if (!choices[false].something_to_do && !choices[true].something_to_do) | |
1417 | { | |
1418 | if (dump_file) | |
1419 | fprintf (dump_file, "Nothing to do!\n"); | |
1420 | return; | |
1421 | } | |
6fb5fa3c | 1422 | |
e53a16e7 ILT |
1423 | max = max_reg_num (); |
1424 | ||
1425 | /* First see if there are any multi-word pseudo-registers. If there | |
1426 | aren't, there is nothing we can do. This should speed up this | |
1427 | pass in the normal case, since it should be faster than scanning | |
1428 | all the insns. */ | |
1429 | { | |
1430 | unsigned int i; | |
af4ba423 | 1431 | bool useful_modes_seen = false; |
e53a16e7 ILT |
1432 | |
1433 | for (i = FIRST_PSEUDO_REGISTER; i < max; ++i) | |
af4ba423 KZ |
1434 | if (regno_reg_rtx[i] != NULL) |
1435 | { | |
ef4bddc2 | 1436 | machine_mode mode = GET_MODE (regno_reg_rtx[i]); |
af4ba423 KZ |
1437 | if (choices[false].move_modes_to_split[(int) mode] |
1438 | || choices[true].move_modes_to_split[(int) mode]) | |
1439 | { | |
1440 | useful_modes_seen = true; | |
1441 | break; | |
1442 | } | |
1443 | } | |
1444 | ||
1445 | if (!useful_modes_seen) | |
e53a16e7 | 1446 | { |
af4ba423 KZ |
1447 | if (dump_file) |
1448 | fprintf (dump_file, "Nothing to lower in this function.\n"); | |
1449 | return; | |
e53a16e7 | 1450 | } |
e53a16e7 ILT |
1451 | } |
1452 | ||
8d074192 | 1453 | if (df) |
af4ba423 KZ |
1454 | { |
1455 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
1456 | run_word_dce (); | |
1457 | } | |
8d074192 | 1458 | |
af4ba423 KZ |
1459 | /* FIXME: It may be possible to change this code to look for each |
1460 | multi-word pseudo-register and to find each insn which sets or | |
1461 | uses that register. That should be faster than scanning all the | |
1462 | insns. */ | |
e53a16e7 ILT |
1463 | |
1464 | decomposable_context = BITMAP_ALLOC (NULL); | |
1465 | non_decomposable_context = BITMAP_ALLOC (NULL); | |
402464a0 | 1466 | subreg_context = BITMAP_ALLOC (NULL); |
e53a16e7 | 1467 | |
9771b263 DN |
1468 | reg_copy_graph.create (max); |
1469 | reg_copy_graph.safe_grow_cleared (max); | |
1470 | memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max); | |
e53a16e7 | 1471 | |
af4ba423 | 1472 | speed_p = optimize_function_for_speed_p (cfun); |
11cd3bed | 1473 | FOR_EACH_BB_FN (bb, cfun) |
e53a16e7 | 1474 | { |
e967cc2f | 1475 | rtx_insn *insn; |
e53a16e7 ILT |
1476 | |
1477 | FOR_BB_INSNS (bb, insn) | |
1478 | { | |
1479 | rtx set; | |
1480 | enum classify_move_insn cmi; | |
1481 | int i, n; | |
1482 | ||
1483 | if (!INSN_P (insn) | |
1484 | || GET_CODE (PATTERN (insn)) == CLOBBER | |
1485 | || GET_CODE (PATTERN (insn)) == USE) | |
1486 | continue; | |
1487 | ||
d5785e76 JJ |
1488 | recog_memoized (insn); |
1489 | ||
af4ba423 | 1490 | if (find_decomposable_shift_zext (insn, speed_p)) |
e0892570 AK |
1491 | continue; |
1492 | ||
e53a16e7 ILT |
1493 | extract_insn (insn); |
1494 | ||
af4ba423 | 1495 | set = simple_move (insn, speed_p); |
e53a16e7 ILT |
1496 | |
1497 | if (!set) | |
1498 | cmi = NOT_SIMPLE_MOVE; | |
1499 | else | |
1500 | { | |
c2c47e8f UW |
1501 | /* We mark pseudo-to-pseudo copies as decomposable during the |
1502 | second pass only. The first pass is so early that there is | |
1503 | good chance such moves will be optimized away completely by | |
1504 | subsequent optimizations anyway. | |
1505 | ||
1506 | However, we call find_pseudo_copy even during the first pass | |
1507 | so as to properly set up the reg_copy_graph. */ | |
4a8cae83 | 1508 | if (find_pseudo_copy (set)) |
c2c47e8f | 1509 | cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE; |
e53a16e7 ILT |
1510 | else |
1511 | cmi = SIMPLE_MOVE; | |
1512 | } | |
1513 | ||
1514 | n = recog_data.n_operands; | |
1515 | for (i = 0; i < n; ++i) | |
1516 | { | |
a36a1928 | 1517 | find_decomposable_subregs (&recog_data.operand[i], &cmi); |
e53a16e7 ILT |
1518 | |
1519 | /* We handle ASM_OPERANDS as a special case to support | |
1520 | things like x86 rdtsc which returns a DImode value. | |
1521 | We can decompose the output, which will certainly be | |
1522 | operand 0, but not the inputs. */ | |
1523 | ||
1524 | if (cmi == SIMPLE_MOVE | |
1525 | && GET_CODE (SET_SRC (set)) == ASM_OPERANDS) | |
1526 | { | |
1527 | gcc_assert (i == 0); | |
1528 | cmi = NOT_SIMPLE_MOVE; | |
1529 | } | |
1530 | } | |
1531 | } | |
1532 | } | |
1533 | ||
1534 | bitmap_and_compl_into (decomposable_context, non_decomposable_context); | |
1535 | if (!bitmap_empty_p (decomposable_context)) | |
1536 | { | |
73663bb7 | 1537 | sbitmap sub_blocks; |
7984c787 SB |
1538 | unsigned int i; |
1539 | sbitmap_iterator sbi; | |
e53a16e7 ILT |
1540 | bitmap_iterator iter; |
1541 | unsigned int regno; | |
1542 | ||
1543 | propagate_pseudo_copies (); | |
1544 | ||
8b1c6fd7 | 1545 | sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun)); |
f61e445a | 1546 | bitmap_clear (sub_blocks); |
e53a16e7 ILT |
1547 | |
1548 | EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter) | |
1549 | decompose_register (regno); | |
1550 | ||
11cd3bed | 1551 | FOR_EACH_BB_FN (bb, cfun) |
e53a16e7 | 1552 | { |
e967cc2f | 1553 | rtx_insn *insn; |
e53a16e7 | 1554 | |
ba4807a0 | 1555 | FOR_BB_INSNS (bb, insn) |
e53a16e7 | 1556 | { |
11895e28 | 1557 | rtx pat; |
e53a16e7 ILT |
1558 | |
1559 | if (!INSN_P (insn)) | |
1560 | continue; | |
1561 | ||
e53a16e7 ILT |
1562 | pat = PATTERN (insn); |
1563 | if (GET_CODE (pat) == CLOBBER) | |
d4fd3465 | 1564 | resolve_clobber (pat, insn); |
e53a16e7 | 1565 | else if (GET_CODE (pat) == USE) |
d4fd3465 | 1566 | resolve_use (pat, insn); |
b5b8b0ac AO |
1567 | else if (DEBUG_INSN_P (insn)) |
1568 | resolve_debug (insn); | |
e53a16e7 ILT |
1569 | else |
1570 | { | |
1571 | rtx set; | |
1572 | int i; | |
1573 | ||
1574 | recog_memoized (insn); | |
1575 | extract_insn (insn); | |
1576 | ||
af4ba423 | 1577 | set = simple_move (insn, speed_p); |
e53a16e7 ILT |
1578 | if (set) |
1579 | { | |
e967cc2f | 1580 | rtx_insn *orig_insn = insn; |
73663bb7 | 1581 | bool cfi = control_flow_insn_p (insn); |
e53a16e7 | 1582 | |
7984c787 SB |
1583 | /* We can end up splitting loads to multi-word pseudos |
1584 | into separate loads to machine word size pseudos. | |
1585 | When this happens, we first had one load that can | |
1586 | throw, and after resolve_simple_move we'll have a | |
1587 | bunch of loads (at least two). All those loads may | |
1588 | trap if we can have non-call exceptions, so they | |
1589 | all will end the current basic block. We split the | |
1590 | block after the outer loop over all insns, but we | |
1591 | make sure here that we will be able to split the | |
1592 | basic block and still produce the correct control | |
1593 | flow graph for it. */ | |
1594 | gcc_assert (!cfi | |
8f4f502f | 1595 | || (cfun->can_throw_non_call_exceptions |
7984c787 SB |
1596 | && can_throw_internal (insn))); |
1597 | ||
e53a16e7 ILT |
1598 | insn = resolve_simple_move (set, insn); |
1599 | if (insn != orig_insn) | |
1600 | { | |
e53a16e7 ILT |
1601 | recog_memoized (insn); |
1602 | extract_insn (insn); | |
73663bb7 ILT |
1603 | |
1604 | if (cfi) | |
d7c028c0 | 1605 | bitmap_set_bit (sub_blocks, bb->index); |
e53a16e7 ILT |
1606 | } |
1607 | } | |
e0892570 AK |
1608 | else |
1609 | { | |
e967cc2f | 1610 | rtx_insn *decomposed_shift; |
e0892570 AK |
1611 | |
1612 | decomposed_shift = resolve_shift_zext (insn); | |
1613 | if (decomposed_shift != NULL_RTX) | |
1614 | { | |
e0892570 AK |
1615 | insn = decomposed_shift; |
1616 | recog_memoized (insn); | |
1617 | extract_insn (insn); | |
1618 | } | |
1619 | } | |
e53a16e7 ILT |
1620 | |
1621 | for (i = recog_data.n_operands - 1; i >= 0; --i) | |
cf55cb6a | 1622 | resolve_subreg_use (recog_data.operand_loc[i], insn); |
e53a16e7 ILT |
1623 | |
1624 | resolve_reg_notes (insn); | |
1625 | ||
1626 | if (num_validated_changes () > 0) | |
1627 | { | |
1628 | for (i = recog_data.n_dups - 1; i >= 0; --i) | |
1629 | { | |
1630 | rtx *pl = recog_data.dup_loc[i]; | |
1631 | int dup_num = recog_data.dup_num[i]; | |
1632 | rtx *px = recog_data.operand_loc[dup_num]; | |
1633 | ||
1a309dfb | 1634 | validate_unshare_change (insn, pl, *px, 1); |
e53a16e7 ILT |
1635 | } |
1636 | ||
1637 | i = apply_change_group (); | |
1638 | gcc_assert (i); | |
e53a16e7 ILT |
1639 | } |
1640 | } | |
e53a16e7 ILT |
1641 | } |
1642 | } | |
1643 | ||
7984c787 SB |
1644 | /* If we had insns to split that caused control flow insns in the middle |
1645 | of a basic block, split those blocks now. Note that we only handle | |
1646 | the case where splitting a load has caused multiple possibly trapping | |
1647 | loads to appear. */ | |
d4ac4ce2 | 1648 | EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi) |
7984c787 | 1649 | { |
e967cc2f | 1650 | rtx_insn *insn, *end; |
7984c787 SB |
1651 | edge fallthru; |
1652 | ||
06e28de2 | 1653 | bb = BASIC_BLOCK_FOR_FN (cfun, i); |
7984c787 SB |
1654 | insn = BB_HEAD (bb); |
1655 | end = BB_END (bb); | |
1656 | ||
1657 | while (insn != end) | |
1658 | { | |
1659 | if (control_flow_insn_p (insn)) | |
1660 | { | |
1661 | /* Split the block after insn. There will be a fallthru | |
1662 | edge, which is OK so we keep it. We have to create the | |
1663 | exception edges ourselves. */ | |
1664 | fallthru = split_block (bb, insn); | |
1665 | rtl_make_eh_edge (NULL, bb, BB_END (bb)); | |
1666 | bb = fallthru->dest; | |
1667 | insn = BB_HEAD (bb); | |
1668 | } | |
1669 | else | |
1670 | insn = NEXT_INSN (insn); | |
1671 | } | |
1672 | } | |
73663bb7 | 1673 | |
73663bb7 | 1674 | sbitmap_free (sub_blocks); |
e53a16e7 ILT |
1675 | } |
1676 | ||
1677 | { | |
1678 | unsigned int i; | |
1679 | bitmap b; | |
1680 | ||
9771b263 | 1681 | FOR_EACH_VEC_ELT (reg_copy_graph, i, b) |
e53a16e7 ILT |
1682 | if (b) |
1683 | BITMAP_FREE (b); | |
1684 | } | |
1685 | ||
9771b263 | 1686 | reg_copy_graph.release (); |
e53a16e7 ILT |
1687 | |
1688 | BITMAP_FREE (decomposable_context); | |
1689 | BITMAP_FREE (non_decomposable_context); | |
402464a0 | 1690 | BITMAP_FREE (subreg_context); |
e53a16e7 ILT |
1691 | } |
1692 | \f | |
e53a16e7 ILT |
1693 | /* Implement first lower subreg pass. */ |
1694 | ||
27a4cd48 DM |
1695 | namespace { |
1696 | ||
1697 | const pass_data pass_data_lower_subreg = | |
e53a16e7 | 1698 | { |
27a4cd48 DM |
1699 | RTL_PASS, /* type */ |
1700 | "subreg1", /* name */ | |
1701 | OPTGROUP_NONE, /* optinfo_flags */ | |
27a4cd48 DM |
1702 | TV_LOWER_SUBREG, /* tv_id */ |
1703 | 0, /* properties_required */ | |
1704 | 0, /* properties_provided */ | |
1705 | 0, /* properties_destroyed */ | |
1706 | 0, /* todo_flags_start */ | |
3bea341f | 1707 | 0, /* todo_flags_finish */ |
e53a16e7 ILT |
1708 | }; |
1709 | ||
27a4cd48 DM |
1710 | class pass_lower_subreg : public rtl_opt_pass |
1711 | { | |
1712 | public: | |
c3284718 RS |
1713 | pass_lower_subreg (gcc::context *ctxt) |
1714 | : rtl_opt_pass (pass_data_lower_subreg, ctxt) | |
27a4cd48 DM |
1715 | {} |
1716 | ||
1717 | /* opt_pass methods: */ | |
1a3d085c | 1718 | virtual bool gate (function *) { return flag_split_wide_types != 0; } |
be55bfe6 TS |
1719 | virtual unsigned int execute (function *) |
1720 | { | |
1721 | decompose_multiword_subregs (false); | |
1722 | return 0; | |
1723 | } | |
27a4cd48 DM |
1724 | |
1725 | }; // class pass_lower_subreg | |
1726 | ||
1727 | } // anon namespace | |
1728 | ||
1729 | rtl_opt_pass * | |
1730 | make_pass_lower_subreg (gcc::context *ctxt) | |
1731 | { | |
1732 | return new pass_lower_subreg (ctxt); | |
1733 | } | |
1734 | ||
be55bfe6 TS |
1735 | /* Implement second lower subreg pass. */ |
1736 | ||
27a4cd48 DM |
1737 | namespace { |
1738 | ||
1739 | const pass_data pass_data_lower_subreg2 = | |
e53a16e7 | 1740 | { |
27a4cd48 DM |
1741 | RTL_PASS, /* type */ |
1742 | "subreg2", /* name */ | |
1743 | OPTGROUP_NONE, /* optinfo_flags */ | |
27a4cd48 DM |
1744 | TV_LOWER_SUBREG, /* tv_id */ |
1745 | 0, /* properties_required */ | |
1746 | 0, /* properties_provided */ | |
1747 | 0, /* properties_destroyed */ | |
1748 | 0, /* todo_flags_start */ | |
3bea341f | 1749 | TODO_df_finish, /* todo_flags_finish */ |
e53a16e7 | 1750 | }; |
27a4cd48 DM |
1751 | |
1752 | class pass_lower_subreg2 : public rtl_opt_pass | |
1753 | { | |
1754 | public: | |
c3284718 RS |
1755 | pass_lower_subreg2 (gcc::context *ctxt) |
1756 | : rtl_opt_pass (pass_data_lower_subreg2, ctxt) | |
27a4cd48 DM |
1757 | {} |
1758 | ||
1759 | /* opt_pass methods: */ | |
1a3d085c | 1760 | virtual bool gate (function *) { return flag_split_wide_types != 0; } |
be55bfe6 TS |
1761 | virtual unsigned int execute (function *) |
1762 | { | |
1763 | decompose_multiword_subregs (true); | |
1764 | return 0; | |
1765 | } | |
27a4cd48 DM |
1766 | |
1767 | }; // class pass_lower_subreg2 | |
1768 | ||
1769 | } // anon namespace | |
1770 | ||
1771 | rtl_opt_pass * | |
1772 | make_pass_lower_subreg2 (gcc::context *ctxt) | |
1773 | { | |
1774 | return new pass_lower_subreg2 (ctxt); | |
1775 | } |