]>
Commit | Line | Data |
---|---|---|
1a6a0f2a | 1 | /* Decompose multiword subregs. |
8e8f6434 | 2 | Copyright (C) 2007-2018 Free Software Foundation, Inc. |
1a6a0f2a | 3 | Contributed by Richard Henderson <rth@redhat.com> |
4 | Ian Lance Taylor <iant@google.com> | |
5 | ||
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 10 | Software Foundation; either version 3, or (at your option) any later |
1a6a0f2a | 11 | version. |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ | |
1a6a0f2a | 21 | |
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
9ef16211 | 25 | #include "backend.h" |
1a6a0f2a | 26 | #include "rtl.h" |
7c29e30e | 27 | #include "tree.h" |
28 | #include "cfghooks.h" | |
9ef16211 | 29 | #include "df.h" |
ad7b10a2 | 30 | #include "memmodel.h" |
1a6a0f2a | 31 | #include "tm_p.h" |
7c29e30e | 32 | #include "expmed.h" |
1a6a0f2a | 33 | #include "insn-config.h" |
7c29e30e | 34 | #include "emit-rtl.h" |
35 | #include "recog.h" | |
94ea8568 | 36 | #include "cfgrtl.h" |
37 | #include "cfgbuild.h" | |
0e8e9be3 | 38 | #include "dce.h" |
1a6a0f2a | 39 | #include "expr.h" |
1a6a0f2a | 40 | #include "tree-pass.h" |
c7944dce | 41 | #include "lower-subreg.h" |
2e3cae91 | 42 | #include "rtl-iter.h" |
5f6dcf1a | 43 | #include "target.h" |
1a6a0f2a | 44 | |
1a6a0f2a | 45 | |
46 | /* Decompose multi-word pseudo-registers into individual | |
c7944dce | 47 | pseudo-registers when possible and profitable. This is possible |
48 | when all the uses of a multi-word register are via SUBREG, or are | |
49 | copies of the register to another location. Breaking apart the | |
50 | register permits more CSE and permits better register allocation. | |
51 | This is profitable if the machine does not have move instructions | |
52 | to do this. | |
53 | ||
54 | This pass only splits moves with modes that are wider than | |
4d5cf08a | 55 | word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with |
56 | integer modes that are twice the width of word_mode. The latter | |
57 | could be generalized if there was a need to do this, but the trend in | |
c7944dce | 58 | architectures is to not need this. |
59 | ||
60 | There are two useful preprocessor defines for use by maintainers: | |
61 | ||
62 | #define LOG_COSTS 1 | |
63 | ||
64 | if you wish to see the actual cost estimates that are being used | |
65 | for each mode wider than word mode and the cost estimates for zero | |
66 | extension and the shifts. This can be useful when port maintainers | |
67 | are tuning insn rtx costs. | |
68 | ||
69 | #define FORCE_LOWERING 1 | |
70 | ||
71 | if you wish to test the pass with all the transformation forced on. | |
72 | This can be useful for finding bugs in the transformations. */ | |
73 | ||
74 | #define LOG_COSTS 0 | |
75 | #define FORCE_LOWERING 0 | |
1a6a0f2a | 76 | |
77 | /* Bit N in this bitmap is set if regno N is used in a context in | |
78 | which we can decompose it. */ | |
79 | static bitmap decomposable_context; | |
80 | ||
81 | /* Bit N in this bitmap is set if regno N is used in a context in | |
82 | which it can not be decomposed. */ | |
83 | static bitmap non_decomposable_context; | |
84 | ||
5277d36e | 85 | /* Bit N in this bitmap is set if regno N is used in a subreg |
86 | which changes the mode but not the size. This typically happens | |
87 | when the register accessed as a floating-point value; we want to | |
88 | avoid generating accesses to its subwords in integer modes. */ | |
89 | static bitmap subreg_context; | |
90 | ||
1a6a0f2a | 91 | /* Bit N in the bitmap in element M of this array is set if there is a |
92 | copy from reg M to reg N. */ | |
f1f41a6c | 93 | static vec<bitmap> reg_copy_graph; |
1a6a0f2a | 94 | |
c7944dce | 95 | struct target_lower_subreg default_target_lower_subreg; |
96 | #if SWITCHABLE_TARGET | |
97 | struct target_lower_subreg *this_target_lower_subreg | |
98 | = &default_target_lower_subreg; | |
99 | #endif | |
100 | ||
101 | #define twice_word_mode \ | |
102 | this_target_lower_subreg->x_twice_word_mode | |
103 | #define choices \ | |
104 | this_target_lower_subreg->x_choices | |
105 | ||
50e9e5b3 | 106 | /* Return true if MODE is a mode we know how to lower. When returning true, |
107 | store its byte size in *BYTES and its word size in *WORDS. */ | |
108 | ||
109 | static inline bool | |
110 | interesting_mode_p (machine_mode mode, unsigned int *bytes, | |
111 | unsigned int *words) | |
112 | { | |
52acb7ae | 113 | if (!GET_MODE_SIZE (mode).is_constant (bytes)) |
114 | return false; | |
50e9e5b3 | 115 | *words = CEIL (*bytes, UNITS_PER_WORD); |
116 | return true; | |
117 | } | |
118 | ||
c7944dce | 119 | /* RTXes used while computing costs. */ |
120 | struct cost_rtxes { | |
121 | /* Source and target registers. */ | |
122 | rtx source; | |
123 | rtx target; | |
124 | ||
125 | /* A twice_word_mode ZERO_EXTEND of SOURCE. */ | |
126 | rtx zext; | |
127 | ||
128 | /* A shift of SOURCE. */ | |
129 | rtx shift; | |
130 | ||
131 | /* A SET of TARGET. */ | |
132 | rtx set; | |
133 | }; | |
134 | ||
135 | /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the | |
136 | rtxes in RTXES. SPEED_P selects between the speed and size cost. */ | |
137 | ||
138 | static int | |
139 | shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code, | |
3754d046 | 140 | machine_mode mode, int op1) |
c7944dce | 141 | { |
c7944dce | 142 | PUT_CODE (rtxes->shift, code); |
143 | PUT_MODE (rtxes->shift, mode); | |
144 | PUT_MODE (rtxes->source, mode); | |
bd39703a | 145 | XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1); |
5ae4887d | 146 | return set_src_cost (rtxes->shift, mode, speed_p); |
c7944dce | 147 | } |
148 | ||
149 | /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X] | |
150 | to true if it is profitable to split a double-word CODE shift | |
151 | of X + BITS_PER_WORD bits. SPEED_P says whether we are testing | |
152 | for speed or size profitability. | |
153 | ||
154 | Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is | |
155 | the cost of moving zero into a word-mode register. WORD_MOVE_COST | |
156 | is the cost of moving between word registers. */ | |
157 | ||
158 | static void | |
159 | compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes, | |
160 | bool *splitting, enum rtx_code code, | |
161 | int word_move_zero_cost, int word_move_cost) | |
162 | { | |
4d5cf08a | 163 | int wide_cost, narrow_cost, upper_cost, i; |
c7944dce | 164 | |
165 | for (i = 0; i < BITS_PER_WORD; i++) | |
166 | { | |
167 | wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode, | |
168 | i + BITS_PER_WORD); | |
169 | if (i == 0) | |
170 | narrow_cost = word_move_cost; | |
171 | else | |
172 | narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i); | |
173 | ||
4d5cf08a | 174 | if (code != ASHIFTRT) |
175 | upper_cost = word_move_zero_cost; | |
176 | else if (i == BITS_PER_WORD - 1) | |
177 | upper_cost = word_move_cost; | |
178 | else | |
179 | upper_cost = shift_cost (speed_p, rtxes, code, word_mode, | |
180 | BITS_PER_WORD - 1); | |
181 | ||
c7944dce | 182 | if (LOG_COSTS) |
183 | fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n", | |
184 | GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code), | |
4d5cf08a | 185 | i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost); |
c7944dce | 186 | |
4d5cf08a | 187 | if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost) |
c7944dce | 188 | splitting[i] = true; |
189 | } | |
190 | } | |
191 | ||
192 | /* Compute what we should do when optimizing for speed or size; SPEED_P | |
193 | selects which. Use RTXES for computing costs. */ | |
194 | ||
195 | static void | |
196 | compute_costs (bool speed_p, struct cost_rtxes *rtxes) | |
197 | { | |
198 | unsigned int i; | |
199 | int word_move_zero_cost, word_move_cost; | |
200 | ||
a6d935b7 | 201 | PUT_MODE (rtxes->target, word_mode); |
c7944dce | 202 | SET_SRC (rtxes->set) = CONST0_RTX (word_mode); |
a6d935b7 | 203 | word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p); |
c7944dce | 204 | |
205 | SET_SRC (rtxes->set) = rtxes->source; | |
a6d935b7 | 206 | word_move_cost = set_rtx_cost (rtxes->set, speed_p); |
c7944dce | 207 | |
208 | if (LOG_COSTS) | |
209 | fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n", | |
210 | GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost); | |
211 | ||
212 | for (i = 0; i < MAX_MACHINE_MODE; i++) | |
213 | { | |
3754d046 | 214 | machine_mode mode = (machine_mode) i; |
50e9e5b3 | 215 | unsigned int size, factor; |
216 | if (interesting_mode_p (mode, &size, &factor) && factor > 1) | |
c7944dce | 217 | { |
50e9e5b3 | 218 | unsigned int mode_move_cost; |
c7944dce | 219 | |
220 | PUT_MODE (rtxes->target, mode); | |
221 | PUT_MODE (rtxes->source, mode); | |
a6d935b7 | 222 | mode_move_cost = set_rtx_cost (rtxes->set, speed_p); |
c7944dce | 223 | |
224 | if (LOG_COSTS) | |
225 | fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n", | |
226 | GET_MODE_NAME (mode), mode_move_cost, | |
227 | word_move_cost, factor); | |
228 | ||
229 | if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor) | |
230 | { | |
231 | choices[speed_p].move_modes_to_split[i] = true; | |
232 | choices[speed_p].something_to_do = true; | |
233 | } | |
234 | } | |
235 | } | |
236 | ||
237 | /* For the moves and shifts, the only case that is checked is one | |
238 | where the mode of the target is an integer mode twice the width | |
239 | of the word_mode. | |
240 | ||
241 | If it is not profitable to split a double word move then do not | |
242 | even consider the shifts or the zero extension. */ | |
243 | if (choices[speed_p].move_modes_to_split[(int) twice_word_mode]) | |
244 | { | |
245 | int zext_cost; | |
246 | ||
247 | /* The only case here to check to see if moving the upper part with a | |
248 | zero is cheaper than doing the zext itself. */ | |
c7944dce | 249 | PUT_MODE (rtxes->source, word_mode); |
5ae4887d | 250 | zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p); |
c7944dce | 251 | |
252 | if (LOG_COSTS) | |
253 | fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n", | |
254 | GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND), | |
255 | zext_cost, word_move_cost, word_move_zero_cost); | |
256 | ||
257 | if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost) | |
258 | choices[speed_p].splitting_zext = true; | |
259 | ||
260 | compute_splitting_shift (speed_p, rtxes, | |
261 | choices[speed_p].splitting_ashift, ASHIFT, | |
262 | word_move_zero_cost, word_move_cost); | |
263 | compute_splitting_shift (speed_p, rtxes, | |
264 | choices[speed_p].splitting_lshiftrt, LSHIFTRT, | |
265 | word_move_zero_cost, word_move_cost); | |
4d5cf08a | 266 | compute_splitting_shift (speed_p, rtxes, |
267 | choices[speed_p].splitting_ashiftrt, ASHIFTRT, | |
268 | word_move_zero_cost, word_move_cost); | |
c7944dce | 269 | } |
270 | } | |
271 | ||
272 | /* Do one-per-target initialisation. This involves determining | |
273 | which operations on the machine are profitable. If none are found, | |
274 | then the pass just returns when called. */ | |
275 | ||
276 | void | |
277 | init_lower_subreg (void) | |
278 | { | |
279 | struct cost_rtxes rtxes; | |
280 | ||
281 | memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg)); | |
282 | ||
28ebc73c | 283 | twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require (); |
c7944dce | 284 | |
dcd6d0f4 | 285 | rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1); |
286 | rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2); | |
d1f9b275 | 287 | rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source); |
c7944dce | 288 | rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source); |
289 | rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx); | |
290 | ||
291 | if (LOG_COSTS) | |
292 | fprintf (stderr, "\nSize costs\n==========\n\n"); | |
293 | compute_costs (false, &rtxes); | |
294 | ||
295 | if (LOG_COSTS) | |
296 | fprintf (stderr, "\nSpeed costs\n===========\n\n"); | |
297 | compute_costs (true, &rtxes); | |
298 | } | |
67c3f580 | 299 | |
300 | static bool | |
301 | simple_move_operand (rtx x) | |
302 | { | |
303 | if (GET_CODE (x) == SUBREG) | |
304 | x = SUBREG_REG (x); | |
305 | ||
306 | if (!OBJECT_P (x)) | |
307 | return false; | |
308 | ||
309 | if (GET_CODE (x) == LABEL_REF | |
310 | || GET_CODE (x) == SYMBOL_REF | |
ab9eaa97 | 311 | || GET_CODE (x) == HIGH |
312 | || GET_CODE (x) == CONST) | |
67c3f580 | 313 | return false; |
314 | ||
315 | if (MEM_P (x) | |
316 | && (MEM_VOLATILE_P (x) | |
4e27ffd0 | 317 | || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x)))) |
67c3f580 | 318 | return false; |
319 | ||
320 | return true; | |
321 | } | |
322 | ||
c7944dce | 323 | /* If INSN is a single set between two objects that we want to split, |
324 | return the single set. SPEED_P says whether we are optimizing | |
325 | INSN for speed or size. | |
326 | ||
327 | INSN should have been passed to recog and extract_insn before this | |
328 | is called. */ | |
1a6a0f2a | 329 | |
330 | static rtx | |
a5942062 | 331 | simple_move (rtx_insn *insn, bool speed_p) |
1a6a0f2a | 332 | { |
333 | rtx x; | |
334 | rtx set; | |
3754d046 | 335 | machine_mode mode; |
1a6a0f2a | 336 | |
337 | if (recog_data.n_operands != 2) | |
338 | return NULL_RTX; | |
339 | ||
340 | set = single_set (insn); | |
341 | if (!set) | |
342 | return NULL_RTX; | |
343 | ||
344 | x = SET_DEST (set); | |
345 | if (x != recog_data.operand[0] && x != recog_data.operand[1]) | |
346 | return NULL_RTX; | |
67c3f580 | 347 | if (!simple_move_operand (x)) |
1a6a0f2a | 348 | return NULL_RTX; |
349 | ||
350 | x = SET_SRC (set); | |
351 | if (x != recog_data.operand[0] && x != recog_data.operand[1]) | |
352 | return NULL_RTX; | |
67c3f580 | 353 | /* For the src we can handle ASM_OPERANDS, and it is beneficial for |
354 | things like x86 rdtsc which returns a DImode value. */ | |
355 | if (GET_CODE (x) != ASM_OPERANDS | |
356 | && !simple_move_operand (x)) | |
1a6a0f2a | 357 | return NULL_RTX; |
358 | ||
359 | /* We try to decompose in integer modes, to avoid generating | |
360 | inefficient code copying between integer and floating point | |
361 | registers. That means that we can't decompose if this is a | |
362 | non-integer mode for which there is no integer mode of the same | |
363 | size. */ | |
36c98bd9 | 364 | mode = GET_MODE (SET_DEST (set)); |
1a6a0f2a | 365 | if (!SCALAR_INT_MODE_P (mode) |
44504d18 | 366 | && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ()) |
1a6a0f2a | 367 | return NULL_RTX; |
368 | ||
5e016dfc | 369 | /* Reject PARTIAL_INT modes. They are used for processor specific |
370 | purposes and it's probably best not to tamper with them. */ | |
371 | if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) | |
372 | return NULL_RTX; | |
373 | ||
c7944dce | 374 | if (!choices[speed_p].move_modes_to_split[(int) mode]) |
375 | return NULL_RTX; | |
376 | ||
1a6a0f2a | 377 | return set; |
378 | } | |
379 | ||
380 | /* If SET is a copy from one multi-word pseudo-register to another, | |
381 | record that in reg_copy_graph. Return whether it is such a | |
382 | copy. */ | |
383 | ||
384 | static bool | |
385 | find_pseudo_copy (rtx set) | |
386 | { | |
387 | rtx dest = SET_DEST (set); | |
388 | rtx src = SET_SRC (set); | |
389 | unsigned int rd, rs; | |
390 | bitmap b; | |
391 | ||
392 | if (!REG_P (dest) || !REG_P (src)) | |
393 | return false; | |
394 | ||
395 | rd = REGNO (dest); | |
396 | rs = REGNO (src); | |
397 | if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs)) | |
398 | return false; | |
399 | ||
f1f41a6c | 400 | b = reg_copy_graph[rs]; |
1a6a0f2a | 401 | if (b == NULL) |
402 | { | |
403 | b = BITMAP_ALLOC (NULL); | |
f1f41a6c | 404 | reg_copy_graph[rs] = b; |
1a6a0f2a | 405 | } |
406 | ||
407 | bitmap_set_bit (b, rd); | |
408 | ||
409 | return true; | |
410 | } | |
411 | ||
412 | /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case | |
413 | where they are copied to another register, add the register to | |
414 | which they are copied to DECOMPOSABLE_CONTEXT. Use | |
415 | NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track | |
416 | copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */ | |
417 | ||
418 | static void | |
419 | propagate_pseudo_copies (void) | |
420 | { | |
035def86 | 421 | auto_bitmap queue, propagate; |
1a6a0f2a | 422 | |
423 | bitmap_copy (queue, decomposable_context); | |
424 | do | |
425 | { | |
426 | bitmap_iterator iter; | |
427 | unsigned int i; | |
428 | ||
429 | bitmap_clear (propagate); | |
430 | ||
431 | EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter) | |
432 | { | |
f1f41a6c | 433 | bitmap b = reg_copy_graph[i]; |
1a6a0f2a | 434 | if (b) |
435 | bitmap_ior_and_compl_into (propagate, b, non_decomposable_context); | |
436 | } | |
437 | ||
438 | bitmap_and_compl (queue, propagate, decomposable_context); | |
439 | bitmap_ior_into (decomposable_context, propagate); | |
440 | } | |
441 | while (!bitmap_empty_p (queue)); | |
1a6a0f2a | 442 | } |
443 | ||
444 | /* A pointer to one of these values is passed to | |
665db605 | 445 | find_decomposable_subregs. */ |
1a6a0f2a | 446 | |
447 | enum classify_move_insn | |
448 | { | |
449 | /* Not a simple move from one location to another. */ | |
450 | NOT_SIMPLE_MOVE, | |
b5ca6624 | 451 | /* A simple move we want to decompose. */ |
452 | DECOMPOSABLE_SIMPLE_MOVE, | |
453 | /* Any other simple move. */ | |
1a6a0f2a | 454 | SIMPLE_MOVE |
455 | }; | |
456 | ||
665db605 | 457 | /* If we find a SUBREG in *LOC which we could use to decompose a |
458 | pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an | |
459 | unadorned register which is not a simple pseudo-register copy, | |
460 | DATA will point at the type of move, and we set a bit in | |
461 | DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */ | |
1a6a0f2a | 462 | |
665db605 | 463 | static void |
464 | find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi) | |
1a6a0f2a | 465 | { |
665db605 | 466 | subrtx_var_iterator::array_type array; |
467 | FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST) | |
1a6a0f2a | 468 | { |
665db605 | 469 | rtx x = *iter; |
470 | if (GET_CODE (x) == SUBREG) | |
471 | { | |
472 | rtx inner = SUBREG_REG (x); | |
473 | unsigned int regno, outer_size, inner_size, outer_words, inner_words; | |
1a6a0f2a | 474 | |
665db605 | 475 | if (!REG_P (inner)) |
476 | continue; | |
1a6a0f2a | 477 | |
665db605 | 478 | regno = REGNO (inner); |
479 | if (HARD_REGISTER_NUM_P (regno)) | |
480 | { | |
481 | iter.skip_subrtxes (); | |
482 | continue; | |
483 | } | |
1a6a0f2a | 484 | |
50e9e5b3 | 485 | if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words) |
486 | || !interesting_mode_p (GET_MODE (inner), &inner_size, | |
487 | &inner_words)) | |
488 | continue; | |
1a6a0f2a | 489 | |
665db605 | 490 | /* We only try to decompose single word subregs of multi-word |
491 | registers. When we find one, we return -1 to avoid iterating | |
492 | over the inner register. | |
1a6a0f2a | 493 | |
665db605 | 494 | ??? This doesn't allow, e.g., DImode subregs of TImode values |
495 | on 32-bit targets. We would need to record the way the | |
496 | pseudo-register was used, and only decompose if all the uses | |
497 | were the same number and size of pieces. Hopefully this | |
498 | doesn't happen much. */ | |
1a6a0f2a | 499 | |
665db605 | 500 | if (outer_words == 1 && inner_words > 1) |
501 | { | |
502 | bitmap_set_bit (decomposable_context, regno); | |
503 | iter.skip_subrtxes (); | |
504 | continue; | |
505 | } | |
4e7a1eb8 | 506 | |
665db605 | 507 | /* If this is a cast from one mode to another, where the modes |
508 | have the same size, and they are not tieable, then mark this | |
509 | register as non-decomposable. If we decompose it we are | |
510 | likely to mess up whatever the backend is trying to do. */ | |
511 | if (outer_words > 1 | |
512 | && outer_size == inner_size | |
5f6dcf1a | 513 | && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner))) |
665db605 | 514 | { |
515 | bitmap_set_bit (non_decomposable_context, regno); | |
516 | bitmap_set_bit (subreg_context, regno); | |
517 | iter.skip_subrtxes (); | |
518 | continue; | |
519 | } | |
4e7a1eb8 | 520 | } |
665db605 | 521 | else if (REG_P (x)) |
1a6a0f2a | 522 | { |
50e9e5b3 | 523 | unsigned int regno, size, words; |
665db605 | 524 | |
525 | /* We will see an outer SUBREG before we see the inner REG, so | |
526 | when we see a plain REG here it means a direct reference to | |
527 | the register. | |
528 | ||
529 | If this is not a simple copy from one location to another, | |
530 | then we can not decompose this register. If this is a simple | |
531 | copy we want to decompose, and the mode is right, | |
532 | then we mark the register as decomposable. | |
533 | Otherwise we don't say anything about this register -- | |
534 | it could be decomposed, but whether that would be | |
535 | profitable depends upon how it is used elsewhere. | |
536 | ||
537 | We only set bits in the bitmap for multi-word | |
538 | pseudo-registers, since those are the only ones we care about | |
539 | and it keeps the size of the bitmaps down. */ | |
540 | ||
541 | regno = REGNO (x); | |
542 | if (!HARD_REGISTER_NUM_P (regno) | |
50e9e5b3 | 543 | && interesting_mode_p (GET_MODE (x), &size, &words) |
544 | && words > 1) | |
1a6a0f2a | 545 | { |
665db605 | 546 | switch (*pcmi) |
547 | { | |
548 | case NOT_SIMPLE_MOVE: | |
549 | bitmap_set_bit (non_decomposable_context, regno); | |
550 | break; | |
551 | case DECOMPOSABLE_SIMPLE_MOVE: | |
5f6dcf1a | 552 | if (targetm.modes_tieable_p (GET_MODE (x), word_mode)) |
665db605 | 553 | bitmap_set_bit (decomposable_context, regno); |
554 | break; | |
555 | case SIMPLE_MOVE: | |
556 | break; | |
557 | default: | |
558 | gcc_unreachable (); | |
559 | } | |
1a6a0f2a | 560 | } |
561 | } | |
665db605 | 562 | else if (MEM_P (x)) |
563 | { | |
564 | enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE; | |
67c3f580 | 565 | |
665db605 | 566 | /* Any registers used in a MEM do not participate in a |
567 | SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion | |
568 | here, and return -1 to block the parent's recursion. */ | |
569 | find_decomposable_subregs (&XEXP (x, 0), &cmi_mem); | |
570 | iter.skip_subrtxes (); | |
571 | } | |
67c3f580 | 572 | } |
1a6a0f2a | 573 | } |
574 | ||
575 | /* Decompose REGNO into word-sized components. We smash the REG node | |
576 | in place. This ensures that (1) something goes wrong quickly if we | |
577 | fail to make some replacement, and (2) the debug information inside | |
578 | the symbol table is automatically kept up to date. */ | |
579 | ||
580 | static void | |
581 | decompose_register (unsigned int regno) | |
582 | { | |
583 | rtx reg; | |
50e9e5b3 | 584 | unsigned int size, words, i; |
1a6a0f2a | 585 | rtvec v; |
586 | ||
587 | reg = regno_reg_rtx[regno]; | |
588 | ||
589 | regno_reg_rtx[regno] = NULL_RTX; | |
1a6a0f2a | 590 | |
50e9e5b3 | 591 | if (!interesting_mode_p (GET_MODE (reg), &size, &words)) |
592 | gcc_unreachable (); | |
1a6a0f2a | 593 | |
594 | v = rtvec_alloc (words); | |
595 | for (i = 0; i < words; ++i) | |
596 | RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD); | |
597 | ||
598 | PUT_CODE (reg, CONCATN); | |
599 | XVEC (reg, 0) = v; | |
600 | ||
601 | if (dump_file) | |
602 | { | |
603 | fprintf (dump_file, "; Splitting reg %u ->", regno); | |
604 | for (i = 0; i < words; ++i) | |
605 | fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i))); | |
606 | fputc ('\n', dump_file); | |
607 | } | |
608 | } | |
609 | ||
610 | /* Get a SUBREG of a CONCATN. */ | |
611 | ||
612 | static rtx | |
9edf7ea8 | 613 | simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte) |
1a6a0f2a | 614 | { |
50e9e5b3 | 615 | unsigned int outer_size, outer_words, inner_size, inner_words; |
3754d046 | 616 | machine_mode innermode, partmode; |
1a6a0f2a | 617 | rtx part; |
618 | unsigned int final_offset; | |
9edf7ea8 | 619 | unsigned int byte; |
1a6a0f2a | 620 | |
50e9e5b3 | 621 | innermode = GET_MODE (op); |
622 | if (!interesting_mode_p (outermode, &outer_size, &outer_words) | |
623 | || !interesting_mode_p (innermode, &inner_size, &inner_words)) | |
624 | gcc_unreachable (); | |
625 | ||
9edf7ea8 | 626 | /* Must be constant if interesting_mode_p passes. */ |
627 | byte = orig_byte.to_constant (); | |
1a6a0f2a | 628 | gcc_assert (GET_CODE (op) == CONCATN); |
50e9e5b3 | 629 | gcc_assert (byte % outer_size == 0); |
1a6a0f2a | 630 | |
50e9e5b3 | 631 | gcc_assert (byte < inner_size); |
632 | if (outer_size > inner_size) | |
ae1f04be | 633 | return NULL_RTX; |
1a6a0f2a | 634 | |
50e9e5b3 | 635 | inner_size /= XVECLEN (op, 0); |
1a6a0f2a | 636 | part = XVECEXP (op, 0, byte / inner_size); |
50bdfec8 | 637 | partmode = GET_MODE (part); |
638 | ||
ef379746 | 639 | final_offset = byte % inner_size; |
50e9e5b3 | 640 | if (final_offset + outer_size > inner_size) |
ef379746 | 641 | return NULL_RTX; |
642 | ||
598ffe59 | 643 | /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of |
644 | regular CONST_VECTORs. They have vector or integer modes, depending | |
645 | on the capabilities of the target. Cope with them. */ | |
646 | if (partmode == VOIDmode && VECTOR_MODE_P (innermode)) | |
647 | partmode = GET_MODE_INNER (innermode); | |
648 | else if (partmode == VOIDmode) | |
ef379746 | 649 | partmode = mode_for_size (inner_size * BITS_PER_UNIT, |
650 | GET_MODE_CLASS (innermode), 0).require (); | |
1a6a0f2a | 651 | |
50bdfec8 | 652 | return simplify_gen_subreg (outermode, part, partmode, final_offset); |
1a6a0f2a | 653 | } |
654 | ||
655 | /* Wrapper around simplify_gen_subreg which handles CONCATN. */ | |
656 | ||
657 | static rtx | |
3754d046 | 658 | simplify_gen_subreg_concatn (machine_mode outermode, rtx op, |
659 | machine_mode innermode, unsigned int byte) | |
1a6a0f2a | 660 | { |
3fa57b79 | 661 | rtx ret; |
662 | ||
1a6a0f2a | 663 | /* We have to handle generating a SUBREG of a SUBREG of a CONCATN. |
664 | If OP is a SUBREG of a CONCATN, then it must be a simple mode | |
665 | change with the same size and offset 0, or it must extract a | |
666 | part. We shouldn't see anything else here. */ | |
667 | if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN) | |
668 | { | |
669 | rtx op2; | |
670 | ||
52acb7ae | 671 | if (known_eq (GET_MODE_SIZE (GET_MODE (op)), |
672 | GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))) | |
9edf7ea8 | 673 | && known_eq (SUBREG_BYTE (op), 0)) |
1a6a0f2a | 674 | return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op), |
675 | GET_MODE (SUBREG_REG (op)), byte); | |
676 | ||
677 | op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op), | |
678 | SUBREG_BYTE (op)); | |
679 | if (op2 == NULL_RTX) | |
680 | { | |
681 | /* We don't handle paradoxical subregs here. */ | |
d0257d43 | 682 | gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op))); |
683 | gcc_assert (!paradoxical_subreg_p (op)); | |
1a6a0f2a | 684 | op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op), |
685 | byte + SUBREG_BYTE (op)); | |
686 | gcc_assert (op2 != NULL_RTX); | |
687 | return op2; | |
688 | } | |
689 | ||
690 | op = op2; | |
691 | gcc_assert (op != NULL_RTX); | |
692 | gcc_assert (innermode == GET_MODE (op)); | |
693 | } | |
3fa57b79 | 694 | |
1a6a0f2a | 695 | if (GET_CODE (op) == CONCATN) |
696 | return simplify_subreg_concatn (outermode, op, byte); | |
3fa57b79 | 697 | |
698 | ret = simplify_gen_subreg (outermode, op, innermode, byte); | |
699 | ||
700 | /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then | |
701 | resolve_simple_move will ask for the high part of the paradoxical | |
702 | subreg, which does not have a value. Just return a zero. */ | |
703 | if (ret == NULL_RTX | |
d0257d43 | 704 | && paradoxical_subreg_p (op)) |
3fa57b79 | 705 | return CONST0_RTX (outermode); |
706 | ||
707 | gcc_assert (ret != NULL_RTX); | |
708 | return ret; | |
1a6a0f2a | 709 | } |
710 | ||
711 | /* Return whether we should resolve X into the registers into which it | |
712 | was decomposed. */ | |
713 | ||
714 | static bool | |
715 | resolve_reg_p (rtx x) | |
716 | { | |
717 | return GET_CODE (x) == CONCATN; | |
718 | } | |
719 | ||
720 | /* Return whether X is a SUBREG of a register which we need to | |
721 | resolve. */ | |
722 | ||
723 | static bool | |
724 | resolve_subreg_p (rtx x) | |
725 | { | |
726 | if (GET_CODE (x) != SUBREG) | |
727 | return false; | |
728 | return resolve_reg_p (SUBREG_REG (x)); | |
729 | } | |
730 | ||
2e3cae91 | 731 | /* Look for SUBREGs in *LOC which need to be decomposed. */ |
1a6a0f2a | 732 | |
2e3cae91 | 733 | static bool |
734 | resolve_subreg_use (rtx *loc, rtx insn) | |
1a6a0f2a | 735 | { |
2e3cae91 | 736 | subrtx_ptr_iterator::array_type array; |
737 | FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST) | |
1a6a0f2a | 738 | { |
2e3cae91 | 739 | rtx *loc = *iter; |
740 | rtx x = *loc; | |
741 | if (resolve_subreg_p (x)) | |
1a6a0f2a | 742 | { |
2e3cae91 | 743 | x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), |
744 | SUBREG_BYTE (x)); | |
1a6a0f2a | 745 | |
2e3cae91 | 746 | /* It is possible for a note to contain a reference which we can |
747 | decompose. In this case, return 1 to the caller to indicate | |
748 | that the note must be removed. */ | |
749 | if (!x) | |
750 | { | |
751 | gcc_assert (!insn); | |
752 | return true; | |
753 | } | |
1a6a0f2a | 754 | |
2e3cae91 | 755 | validate_change (insn, loc, x, 1); |
756 | iter.skip_subrtxes (); | |
757 | } | |
758 | else if (resolve_reg_p (x)) | |
759 | /* Return 1 to the caller to indicate that we found a direct | |
760 | reference to a register which is being decomposed. This can | |
761 | happen inside notes, multiword shift or zero-extend | |
762 | instructions. */ | |
763 | return true; | |
1a6a0f2a | 764 | } |
765 | ||
2e3cae91 | 766 | return false; |
1a6a0f2a | 767 | } |
768 | ||
1a6a0f2a | 769 | /* Resolve any decomposed registers which appear in register notes on |
770 | INSN. */ | |
771 | ||
772 | static void | |
a5942062 | 773 | resolve_reg_notes (rtx_insn *insn) |
1a6a0f2a | 774 | { |
775 | rtx *pnote, note; | |
776 | ||
777 | note = find_reg_equal_equiv_note (insn); | |
778 | if (note) | |
779 | { | |
3072d30e | 780 | int old_count = num_validated_changes (); |
2e3cae91 | 781 | if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX)) |
1e5b92fa | 782 | remove_note (insn, note); |
3072d30e | 783 | else |
784 | if (old_count != num_validated_changes ()) | |
785 | df_notes_rescan (insn); | |
1a6a0f2a | 786 | } |
787 | ||
788 | pnote = ®_NOTES (insn); | |
789 | while (*pnote != NULL_RTX) | |
790 | { | |
9ce37fa7 | 791 | bool del = false; |
1a6a0f2a | 792 | |
793 | note = *pnote; | |
794 | switch (REG_NOTE_KIND (note)) | |
795 | { | |
3072d30e | 796 | case REG_DEAD: |
797 | case REG_UNUSED: | |
1a6a0f2a | 798 | if (resolve_reg_p (XEXP (note, 0))) |
9ce37fa7 | 799 | del = true; |
1a6a0f2a | 800 | break; |
801 | ||
802 | default: | |
803 | break; | |
804 | } | |
805 | ||
9ce37fa7 | 806 | if (del) |
1a6a0f2a | 807 | *pnote = XEXP (note, 1); |
808 | else | |
809 | pnote = &XEXP (note, 1); | |
810 | } | |
811 | } | |
812 | ||
67c3f580 | 813 | /* Return whether X can be decomposed into subwords. */ |
1a6a0f2a | 814 | |
815 | static bool | |
67c3f580 | 816 | can_decompose_p (rtx x) |
1a6a0f2a | 817 | { |
818 | if (REG_P (x)) | |
819 | { | |
820 | unsigned int regno = REGNO (x); | |
821 | ||
822 | if (HARD_REGISTER_NUM_P (regno)) | |
5f961ca4 | 823 | { |
50e9e5b3 | 824 | unsigned int byte, num_bytes, num_words; |
5f961ca4 | 825 | |
50e9e5b3 | 826 | if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words)) |
827 | return false; | |
5f961ca4 | 828 | for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD) |
829 | if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0) | |
830 | return false; | |
831 | return true; | |
832 | } | |
1a6a0f2a | 833 | else |
5277d36e | 834 | return !bitmap_bit_p (subreg_context, regno); |
1a6a0f2a | 835 | } |
836 | ||
67c3f580 | 837 | return true; |
1a6a0f2a | 838 | } |
839 | ||
840 | /* Decompose the registers used in a simple move SET within INSN. If | |
841 | we don't change anything, return INSN, otherwise return the start | |
842 | of the sequence of moves. */ | |
843 | ||
a5942062 | 844 | static rtx_insn * |
845 | resolve_simple_move (rtx set, rtx_insn *insn) | |
1a6a0f2a | 846 | { |
a5942062 | 847 | rtx src, dest, real_dest; |
848 | rtx_insn *insns; | |
3754d046 | 849 | machine_mode orig_mode, dest_mode; |
50e9e5b3 | 850 | unsigned int orig_size, words; |
1a6a0f2a | 851 | bool pushing; |
852 | ||
853 | src = SET_SRC (set); | |
854 | dest = SET_DEST (set); | |
855 | orig_mode = GET_MODE (dest); | |
856 | ||
50e9e5b3 | 857 | if (!interesting_mode_p (orig_mode, &orig_size, &words)) |
858 | gcc_unreachable (); | |
c7944dce | 859 | gcc_assert (words > 1); |
1a6a0f2a | 860 | |
861 | start_sequence (); | |
862 | ||
863 | /* We have to handle copying from a SUBREG of a decomposed reg where | |
864 | the SUBREG is larger than word size. Rather than assume that we | |
865 | can take a word_mode SUBREG of the destination, we copy to a new | |
866 | register and then copy that to the destination. */ | |
867 | ||
868 | real_dest = NULL_RTX; | |
869 | ||
870 | if (GET_CODE (src) == SUBREG | |
871 | && resolve_reg_p (SUBREG_REG (src)) | |
9edf7ea8 | 872 | && (maybe_ne (SUBREG_BYTE (src), 0) |
52acb7ae | 873 | || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))))) |
1a6a0f2a | 874 | { |
875 | real_dest = dest; | |
876 | dest = gen_reg_rtx (orig_mode); | |
877 | if (REG_P (real_dest)) | |
878 | REG_ATTRS (dest) = REG_ATTRS (real_dest); | |
879 | } | |
880 | ||
881 | /* Similarly if we are copying to a SUBREG of a decomposed reg where | |
882 | the SUBREG is larger than word size. */ | |
883 | ||
884 | if (GET_CODE (dest) == SUBREG | |
885 | && resolve_reg_p (SUBREG_REG (dest)) | |
9edf7ea8 | 886 | && (maybe_ne (SUBREG_BYTE (dest), 0) |
52acb7ae | 887 | || maybe_ne (orig_size, |
888 | GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))) | |
1a6a0f2a | 889 | { |
a5942062 | 890 | rtx reg, smove; |
891 | rtx_insn *minsn; | |
1a6a0f2a | 892 | |
893 | reg = gen_reg_rtx (orig_mode); | |
894 | minsn = emit_move_insn (reg, src); | |
895 | smove = single_set (minsn); | |
896 | gcc_assert (smove != NULL_RTX); | |
897 | resolve_simple_move (smove, minsn); | |
898 | src = reg; | |
899 | } | |
900 | ||
901 | /* If we didn't have any big SUBREGS of decomposed registers, and | |
902 | neither side of the move is a register we are decomposing, then | |
903 | we don't have to do anything here. */ | |
904 | ||
905 | if (src == SET_SRC (set) | |
906 | && dest == SET_DEST (set) | |
907 | && !resolve_reg_p (src) | |
908 | && !resolve_subreg_p (src) | |
909 | && !resolve_reg_p (dest) | |
910 | && !resolve_subreg_p (dest)) | |
911 | { | |
912 | end_sequence (); | |
913 | return insn; | |
914 | } | |
915 | ||
ccd1ec59 | 916 | /* It's possible for the code to use a subreg of a decomposed |
917 | register while forming an address. We need to handle that before | |
918 | passing the address to emit_move_insn. We pass NULL_RTX as the | |
919 | insn parameter to resolve_subreg_use because we can not validate | |
920 | the insn yet. */ | |
921 | if (MEM_P (src) || MEM_P (dest)) | |
922 | { | |
923 | int acg; | |
924 | ||
925 | if (MEM_P (src)) | |
2e3cae91 | 926 | resolve_subreg_use (&XEXP (src, 0), NULL_RTX); |
ccd1ec59 | 927 | if (MEM_P (dest)) |
2e3cae91 | 928 | resolve_subreg_use (&XEXP (dest, 0), NULL_RTX); |
ccd1ec59 | 929 | acg = apply_change_group (); |
930 | gcc_assert (acg); | |
931 | } | |
932 | ||
1a6a0f2a | 933 | /* If SRC is a register which we can't decompose, or has side |
934 | effects, we need to move via a temporary register. */ | |
935 | ||
67c3f580 | 936 | if (!can_decompose_p (src) |
1a6a0f2a | 937 | || side_effects_p (src) |
938 | || GET_CODE (src) == ASM_OPERANDS) | |
939 | { | |
940 | rtx reg; | |
941 | ||
942 | reg = gen_reg_rtx (orig_mode); | |
68a8f1b3 | 943 | |
32aa77d9 | 944 | if (AUTO_INC_DEC) |
945 | { | |
26cd1198 | 946 | rtx_insn *move = emit_move_insn (reg, src); |
32aa77d9 | 947 | if (MEM_P (src)) |
948 | { | |
949 | rtx note = find_reg_note (insn, REG_INC, NULL_RTX); | |
950 | if (note) | |
951 | add_reg_note (move, REG_INC, XEXP (note, 0)); | |
952 | } | |
953 | } | |
954 | else | |
955 | emit_move_insn (reg, src); | |
956 | ||
1a6a0f2a | 957 | src = reg; |
958 | } | |
959 | ||
960 | /* If DEST is a register which we can't decompose, or has side | |
961 | effects, we need to first move to a temporary register. We | |
962 | handle the common case of pushing an operand directly. We also | |
963 | go through a temporary register if it holds a floating point | |
964 | value. This gives us better code on systems which can't move | |
965 | data easily between integer and floating point registers. */ | |
966 | ||
967 | dest_mode = orig_mode; | |
968 | pushing = push_operand (dest, dest_mode); | |
67c3f580 | 969 | if (!can_decompose_p (dest) |
1a6a0f2a | 970 | || (side_effects_p (dest) && !pushing) |
971 | || (!SCALAR_INT_MODE_P (dest_mode) | |
972 | && !resolve_reg_p (dest) | |
973 | && !resolve_subreg_p (dest))) | |
974 | { | |
975 | if (real_dest == NULL_RTX) | |
976 | real_dest = dest; | |
977 | if (!SCALAR_INT_MODE_P (dest_mode)) | |
e2cd4ccd | 978 | dest_mode = int_mode_for_mode (dest_mode).require (); |
1a6a0f2a | 979 | dest = gen_reg_rtx (dest_mode); |
980 | if (REG_P (real_dest)) | |
981 | REG_ATTRS (dest) = REG_ATTRS (real_dest); | |
982 | } | |
983 | ||
984 | if (pushing) | |
985 | { | |
986 | unsigned int i, j, jinc; | |
987 | ||
50e9e5b3 | 988 | gcc_assert (orig_size % UNITS_PER_WORD == 0); |
1a6a0f2a | 989 | gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY); |
990 | gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY); | |
991 | ||
992 | if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD) | |
993 | { | |
994 | j = 0; | |
995 | jinc = 1; | |
996 | } | |
997 | else | |
998 | { | |
999 | j = words - 1; | |
1000 | jinc = -1; | |
1001 | } | |
1002 | ||
1003 | for (i = 0; i < words; ++i, j += jinc) | |
1004 | { | |
1005 | rtx temp; | |
1006 | ||
1007 | temp = copy_rtx (XEXP (dest, 0)); | |
1008 | temp = adjust_automodify_address_nv (dest, word_mode, temp, | |
1009 | j * UNITS_PER_WORD); | |
1010 | emit_move_insn (temp, | |
1011 | simplify_gen_subreg_concatn (word_mode, src, | |
1012 | orig_mode, | |
1013 | j * UNITS_PER_WORD)); | |
1014 | } | |
1015 | } | |
1016 | else | |
1017 | { | |
1018 | unsigned int i; | |
1019 | ||
1020 | if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest))) | |
18b42941 | 1021 | emit_clobber (dest); |
1a6a0f2a | 1022 | |
1023 | for (i = 0; i < words; ++i) | |
1024 | emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest, | |
1025 | dest_mode, | |
1026 | i * UNITS_PER_WORD), | |
1027 | simplify_gen_subreg_concatn (word_mode, src, | |
1028 | orig_mode, | |
1029 | i * UNITS_PER_WORD)); | |
1030 | } | |
1031 | ||
1032 | if (real_dest != NULL_RTX) | |
1033 | { | |
a5942062 | 1034 | rtx mdest, smove; |
1035 | rtx_insn *minsn; | |
1a6a0f2a | 1036 | |
1037 | if (dest_mode == orig_mode) | |
1038 | mdest = dest; | |
1039 | else | |
1040 | mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0); | |
1041 | minsn = emit_move_insn (real_dest, mdest); | |
1042 | ||
32aa77d9 | 1043 | if (AUTO_INC_DEC && MEM_P (real_dest) |
68a8f1b3 | 1044 | && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest))) |
1045 | { | |
1046 | rtx note = find_reg_note (insn, REG_INC, NULL_RTX); | |
1047 | if (note) | |
1048 | add_reg_note (minsn, REG_INC, XEXP (note, 0)); | |
1049 | } | |
68a8f1b3 | 1050 | |
1a6a0f2a | 1051 | smove = single_set (minsn); |
1052 | gcc_assert (smove != NULL_RTX); | |
1053 | ||
1054 | resolve_simple_move (smove, minsn); | |
1055 | } | |
1056 | ||
1057 | insns = get_insns (); | |
1058 | end_sequence (); | |
1059 | ||
e38def9c | 1060 | copy_reg_eh_region_note_forward (insn, insns, NULL_RTX); |
97bb2849 | 1061 | |
1a6a0f2a | 1062 | emit_insn_before (insns, insn); |
1063 | ||
87c46d87 | 1064 | /* If we get here via self-recursion, then INSN is not yet in the insns |
c7ee1482 | 1065 | chain and delete_insn will fail. We only want to remove INSN from the |
1066 | current sequence. See PR56738. */ | |
1067 | if (in_sequence_p ()) | |
1068 | remove_insn (insn); | |
1069 | else | |
1070 | delete_insn (insn); | |
1a6a0f2a | 1071 | |
1072 | return insns; | |
1073 | } | |
1074 | ||
1075 | /* Change a CLOBBER of a decomposed register into a CLOBBER of the | |
1076 | component registers. Return whether we changed something. */ | |
1077 | ||
1078 | static bool | |
a5942062 | 1079 | resolve_clobber (rtx pat, rtx_insn *insn) |
1a6a0f2a | 1080 | { |
e29831db | 1081 | rtx reg; |
3754d046 | 1082 | machine_mode orig_mode; |
50e9e5b3 | 1083 | unsigned int orig_size, words, i; |
ab9eaa97 | 1084 | int ret; |
1a6a0f2a | 1085 | |
1086 | reg = XEXP (pat, 0); | |
2289a5f2 | 1087 | if (!resolve_reg_p (reg) && !resolve_subreg_p (reg)) |
1a6a0f2a | 1088 | return false; |
1089 | ||
1090 | orig_mode = GET_MODE (reg); | |
50e9e5b3 | 1091 | if (!interesting_mode_p (orig_mode, &orig_size, &words)) |
1092 | gcc_unreachable (); | |
1a6a0f2a | 1093 | |
ab9eaa97 | 1094 | ret = validate_change (NULL_RTX, &XEXP (pat, 0), |
1095 | simplify_gen_subreg_concatn (word_mode, reg, | |
1096 | orig_mode, 0), | |
1097 | 0); | |
3072d30e | 1098 | df_insn_rescan (insn); |
ab9eaa97 | 1099 | gcc_assert (ret != 0); |
1100 | ||
1a6a0f2a | 1101 | for (i = words - 1; i > 0; --i) |
1102 | { | |
1103 | rtx x; | |
1104 | ||
2289a5f2 | 1105 | x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode, |
1106 | i * UNITS_PER_WORD); | |
1a6a0f2a | 1107 | x = gen_rtx_CLOBBER (VOIDmode, x); |
1108 | emit_insn_after (x, insn); | |
1109 | } | |
1110 | ||
db2200eb | 1111 | resolve_reg_notes (insn); |
1112 | ||
1a6a0f2a | 1113 | return true; |
1114 | } | |
1115 | ||
1116 | /* A USE of a decomposed register is no longer meaningful. Return | |
1117 | whether we changed something. */ | |
1118 | ||
1119 | static bool | |
a5942062 | 1120 | resolve_use (rtx pat, rtx_insn *insn) |
1a6a0f2a | 1121 | { |
1122 | if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0))) | |
1123 | { | |
1124 | delete_insn (insn); | |
1125 | return true; | |
1126 | } | |
db2200eb | 1127 | |
1128 | resolve_reg_notes (insn); | |
1129 | ||
1a6a0f2a | 1130 | return false; |
1131 | } | |
1132 | ||
9845d120 | 1133 | /* A VAR_LOCATION can be simplified. */ |
1134 | ||
1135 | static void | |
a5942062 | 1136 | resolve_debug (rtx_insn *insn) |
9845d120 | 1137 | { |
d1f3d29f | 1138 | subrtx_ptr_iterator::array_type array; |
1139 | FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST) | |
1140 | { | |
1141 | rtx *loc = *iter; | |
1142 | rtx x = *loc; | |
1143 | if (resolve_subreg_p (x)) | |
1144 | { | |
1145 | x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), | |
1146 | SUBREG_BYTE (x)); | |
1147 | ||
1148 | if (x) | |
1149 | *loc = x; | |
1150 | else | |
1151 | x = copy_rtx (*loc); | |
1152 | } | |
1153 | if (resolve_reg_p (x)) | |
1154 | *loc = copy_rtx (x); | |
1155 | } | |
9845d120 | 1156 | |
1157 | df_insn_rescan (insn); | |
1158 | ||
1159 | resolve_reg_notes (insn); | |
1160 | } | |
1161 | ||
c7944dce | 1162 | /* Check if INSN is a decomposable multiword-shift or zero-extend and |
1163 | set the decomposable_context bitmap accordingly. SPEED_P is true | |
1164 | if we are optimizing INSN for speed rather than size. Return true | |
1165 | if INSN is decomposable. */ | |
9cf5d19e | 1166 | |
c7944dce | 1167 | static bool |
a5942062 | 1168 | find_decomposable_shift_zext (rtx_insn *insn, bool speed_p) |
9cf5d19e | 1169 | { |
1170 | rtx set; | |
1171 | rtx op; | |
1172 | rtx op_operand; | |
1173 | ||
1174 | set = single_set (insn); | |
1175 | if (!set) | |
c7944dce | 1176 | return false; |
9cf5d19e | 1177 | |
1178 | op = SET_SRC (set); | |
1179 | if (GET_CODE (op) != ASHIFT | |
1180 | && GET_CODE (op) != LSHIFTRT | |
4d5cf08a | 1181 | && GET_CODE (op) != ASHIFTRT |
9cf5d19e | 1182 | && GET_CODE (op) != ZERO_EXTEND) |
c7944dce | 1183 | return false; |
9cf5d19e | 1184 | |
1185 | op_operand = XEXP (op, 0); | |
1186 | if (!REG_P (SET_DEST (set)) || !REG_P (op_operand) | |
1187 | || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set))) | |
1188 | || HARD_REGISTER_NUM_P (REGNO (op_operand)) | |
c7944dce | 1189 | || GET_MODE (op) != twice_word_mode) |
1190 | return false; | |
9cf5d19e | 1191 | |
1192 | if (GET_CODE (op) == ZERO_EXTEND) | |
1193 | { | |
1194 | if (GET_MODE (op_operand) != word_mode | |
c7944dce | 1195 | || !choices[speed_p].splitting_zext) |
1196 | return false; | |
9cf5d19e | 1197 | } |
1198 | else /* left or right shift */ | |
1199 | { | |
c7944dce | 1200 | bool *splitting = (GET_CODE (op) == ASHIFT |
1201 | ? choices[speed_p].splitting_ashift | |
4d5cf08a | 1202 | : GET_CODE (op) == ASHIFTRT |
1203 | ? choices[speed_p].splitting_ashiftrt | |
c7944dce | 1204 | : choices[speed_p].splitting_lshiftrt); |
971ba038 | 1205 | if (!CONST_INT_P (XEXP (op, 1)) |
c7944dce | 1206 | || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD, |
1207 | 2 * BITS_PER_WORD - 1) | |
1208 | || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD]) | |
1209 | return false; | |
1210 | ||
1211 | bitmap_set_bit (decomposable_context, REGNO (op_operand)); | |
9cf5d19e | 1212 | } |
1213 | ||
1214 | bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set))); | |
1215 | ||
c7944dce | 1216 | return true; |
9cf5d19e | 1217 | } |
1218 | ||
1219 | /* Decompose a more than word wide shift (in INSN) of a multiword | |
1220 | pseudo or a multiword zero-extend of a wordmode pseudo into a move | |
1221 | and 'set to zero' insn. Return a pointer to the new insn when a | |
1222 | replacement was done. */ | |
1223 | ||
a5942062 | 1224 | static rtx_insn * |
1225 | resolve_shift_zext (rtx_insn *insn) | |
9cf5d19e | 1226 | { |
1227 | rtx set; | |
1228 | rtx op; | |
1229 | rtx op_operand; | |
a5942062 | 1230 | rtx_insn *insns; |
4d5cf08a | 1231 | rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX; |
9cf5d19e | 1232 | int src_reg_num, dest_reg_num, offset1, offset2, src_offset; |
086e7f88 | 1233 | scalar_int_mode inner_mode; |
9cf5d19e | 1234 | |
1235 | set = single_set (insn); | |
1236 | if (!set) | |
a5942062 | 1237 | return NULL; |
9cf5d19e | 1238 | |
1239 | op = SET_SRC (set); | |
1240 | if (GET_CODE (op) != ASHIFT | |
1241 | && GET_CODE (op) != LSHIFTRT | |
4d5cf08a | 1242 | && GET_CODE (op) != ASHIFTRT |
9cf5d19e | 1243 | && GET_CODE (op) != ZERO_EXTEND) |
a5942062 | 1244 | return NULL; |
9cf5d19e | 1245 | |
1246 | op_operand = XEXP (op, 0); | |
086e7f88 | 1247 | if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode)) |
1248 | return NULL; | |
9cf5d19e | 1249 | |
c7944dce | 1250 | /* We can tear this operation apart only if the regs were already |
1251 | torn apart. */ | |
9cf5d19e | 1252 | if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand)) |
a5942062 | 1253 | return NULL; |
9cf5d19e | 1254 | |
1255 | /* src_reg_num is the number of the word mode register which we | |
1256 | are operating on. For a left shift and a zero_extend on little | |
1257 | endian machines this is register 0. */ | |
4d5cf08a | 1258 | src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) |
1259 | ? 1 : 0; | |
9cf5d19e | 1260 | |
086e7f88 | 1261 | if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD) |
9cf5d19e | 1262 | src_reg_num = 1 - src_reg_num; |
1263 | ||
1264 | if (GET_CODE (op) == ZERO_EXTEND) | |
4d8ad352 | 1265 | dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0; |
9cf5d19e | 1266 | else |
1267 | dest_reg_num = 1 - src_reg_num; | |
1268 | ||
1269 | offset1 = UNITS_PER_WORD * dest_reg_num; | |
1270 | offset2 = UNITS_PER_WORD * (1 - dest_reg_num); | |
1271 | src_offset = UNITS_PER_WORD * src_reg_num; | |
1272 | ||
9cf5d19e | 1273 | start_sequence (); |
1274 | ||
1275 | dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), | |
1276 | GET_MODE (SET_DEST (set)), | |
1277 | offset1); | |
4d5cf08a | 1278 | dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), |
1279 | GET_MODE (SET_DEST (set)), | |
1280 | offset2); | |
9cf5d19e | 1281 | src_reg = simplify_gen_subreg_concatn (word_mode, op_operand, |
1282 | GET_MODE (op_operand), | |
1283 | src_offset); | |
4d5cf08a | 1284 | if (GET_CODE (op) == ASHIFTRT |
1285 | && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1) | |
1286 | upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg), | |
1287 | BITS_PER_WORD - 1, NULL_RTX, 0); | |
1288 | ||
9cf5d19e | 1289 | if (GET_CODE (op) != ZERO_EXTEND) |
1290 | { | |
1291 | int shift_count = INTVAL (XEXP (op, 1)); | |
1292 | if (shift_count > BITS_PER_WORD) | |
1293 | src_reg = expand_shift (GET_CODE (op) == ASHIFT ? | |
1294 | LSHIFT_EXPR : RSHIFT_EXPR, | |
1295 | word_mode, src_reg, | |
f5ff0b21 | 1296 | shift_count - BITS_PER_WORD, |
4d5cf08a | 1297 | dest_reg, GET_CODE (op) != ASHIFTRT); |
9cf5d19e | 1298 | } |
1299 | ||
1300 | if (dest_reg != src_reg) | |
1301 | emit_move_insn (dest_reg, src_reg); | |
4d5cf08a | 1302 | if (GET_CODE (op) != ASHIFTRT) |
1303 | emit_move_insn (dest_upper, CONST0_RTX (word_mode)); | |
1304 | else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1) | |
1305 | emit_move_insn (dest_upper, copy_rtx (src_reg)); | |
1306 | else | |
1307 | emit_move_insn (dest_upper, upper_src); | |
9cf5d19e | 1308 | insns = get_insns (); |
1309 | ||
1310 | end_sequence (); | |
1311 | ||
1312 | emit_insn_before (insns, insn); | |
1313 | ||
1314 | if (dump_file) | |
1315 | { | |
a5942062 | 1316 | rtx_insn *in; |
9cf5d19e | 1317 | fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn)); |
1318 | for (in = insns; in != insn; in = NEXT_INSN (in)) | |
1319 | fprintf (dump_file, "%d ", INSN_UID (in)); | |
1320 | fprintf (dump_file, "\n"); | |
1321 | } | |
1322 | ||
1323 | delete_insn (insn); | |
1324 | return insns; | |
1325 | } | |
1326 | ||
c7944dce | 1327 | /* Print to dump_file a description of what we're doing with shift code CODE. |
1328 | SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */ | |
1329 | ||
1330 | static void | |
1331 | dump_shift_choices (enum rtx_code code, bool *splitting) | |
1332 | { | |
1333 | int i; | |
1334 | const char *sep; | |
1335 | ||
1336 | fprintf (dump_file, | |
1337 | " Splitting mode %s for %s lowering with shift amounts = ", | |
1338 | GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code)); | |
1339 | sep = ""; | |
1340 | for (i = 0; i < BITS_PER_WORD; i++) | |
1341 | if (splitting[i]) | |
1342 | { | |
1343 | fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD); | |
1344 | sep = ","; | |
1345 | } | |
1346 | fprintf (dump_file, "\n"); | |
1347 | } | |
1348 | ||
1349 | /* Print to dump_file a description of what we're doing when optimizing | |
1350 | for speed or size; SPEED_P says which. DESCRIPTION is a description | |
1351 | of the SPEED_P choice. */ | |
1352 | ||
1353 | static void | |
1354 | dump_choices (bool speed_p, const char *description) | |
1355 | { | |
50e9e5b3 | 1356 | unsigned int size, factor, i; |
c7944dce | 1357 | |
1358 | fprintf (dump_file, "Choices when optimizing for %s:\n", description); | |
1359 | ||
1360 | for (i = 0; i < MAX_MACHINE_MODE; i++) | |
50e9e5b3 | 1361 | if (interesting_mode_p ((machine_mode) i, &size, &factor) |
1362 | && factor > 1) | |
c7944dce | 1363 | fprintf (dump_file, " %s mode %s for copy lowering.\n", |
1364 | choices[speed_p].move_modes_to_split[i] | |
1365 | ? "Splitting" | |
1366 | : "Skipping", | |
3754d046 | 1367 | GET_MODE_NAME ((machine_mode) i)); |
c7944dce | 1368 | |
1369 | fprintf (dump_file, " %s mode %s for zero_extend lowering.\n", | |
1370 | choices[speed_p].splitting_zext ? "Splitting" : "Skipping", | |
1371 | GET_MODE_NAME (twice_word_mode)); | |
1372 | ||
1373 | dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift); | |
4d5cf08a | 1374 | dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt); |
1375 | dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt); | |
c7944dce | 1376 | fprintf (dump_file, "\n"); |
1377 | } | |
1378 | ||
1a6a0f2a | 1379 | /* Look for registers which are always accessed via word-sized SUBREGs |
b5ca6624 | 1380 | or -if DECOMPOSE_COPIES is true- via copies. Decompose these |
1381 | registers into several word-sized pseudo-registers. */ | |
1a6a0f2a | 1382 | |
1383 | static void | |
b5ca6624 | 1384 | decompose_multiword_subregs (bool decompose_copies) |
1a6a0f2a | 1385 | { |
1386 | unsigned int max; | |
1387 | basic_block bb; | |
c7944dce | 1388 | bool speed_p; |
1a6a0f2a | 1389 | |
c7944dce | 1390 | if (dump_file) |
1391 | { | |
1392 | dump_choices (false, "size"); | |
1393 | dump_choices (true, "speed"); | |
1394 | } | |
1395 | ||
1396 | /* Check if this target even has any modes to consider lowering. */ | |
1397 | if (!choices[false].something_to_do && !choices[true].something_to_do) | |
1398 | { | |
1399 | if (dump_file) | |
1400 | fprintf (dump_file, "Nothing to do!\n"); | |
1401 | return; | |
1402 | } | |
3072d30e | 1403 | |
1a6a0f2a | 1404 | max = max_reg_num (); |
1405 | ||
1406 | /* First see if there are any multi-word pseudo-registers. If there | |
1407 | aren't, there is nothing we can do. This should speed up this | |
1408 | pass in the normal case, since it should be faster than scanning | |
1409 | all the insns. */ | |
1410 | { | |
1411 | unsigned int i; | |
c7944dce | 1412 | bool useful_modes_seen = false; |
1a6a0f2a | 1413 | |
1414 | for (i = FIRST_PSEUDO_REGISTER; i < max; ++i) | |
c7944dce | 1415 | if (regno_reg_rtx[i] != NULL) |
1416 | { | |
3754d046 | 1417 | machine_mode mode = GET_MODE (regno_reg_rtx[i]); |
c7944dce | 1418 | if (choices[false].move_modes_to_split[(int) mode] |
1419 | || choices[true].move_modes_to_split[(int) mode]) | |
1420 | { | |
1421 | useful_modes_seen = true; | |
1422 | break; | |
1423 | } | |
1424 | } | |
1425 | ||
1426 | if (!useful_modes_seen) | |
1a6a0f2a | 1427 | { |
c7944dce | 1428 | if (dump_file) |
1429 | fprintf (dump_file, "Nothing to lower in this function.\n"); | |
1430 | return; | |
1a6a0f2a | 1431 | } |
1a6a0f2a | 1432 | } |
1433 | ||
0e8e9be3 | 1434 | if (df) |
c7944dce | 1435 | { |
1436 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
1437 | run_word_dce (); | |
1438 | } | |
0e8e9be3 | 1439 | |
c7944dce | 1440 | /* FIXME: It may be possible to change this code to look for each |
1441 | multi-word pseudo-register and to find each insn which sets or | |
1442 | uses that register. That should be faster than scanning all the | |
1443 | insns. */ | |
1a6a0f2a | 1444 | |
1445 | decomposable_context = BITMAP_ALLOC (NULL); | |
1446 | non_decomposable_context = BITMAP_ALLOC (NULL); | |
5277d36e | 1447 | subreg_context = BITMAP_ALLOC (NULL); |
1a6a0f2a | 1448 | |
f1f41a6c | 1449 | reg_copy_graph.create (max); |
1450 | reg_copy_graph.safe_grow_cleared (max); | |
1451 | memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max); | |
1a6a0f2a | 1452 | |
c7944dce | 1453 | speed_p = optimize_function_for_speed_p (cfun); |
fc00614f | 1454 | FOR_EACH_BB_FN (bb, cfun) |
1a6a0f2a | 1455 | { |
a5942062 | 1456 | rtx_insn *insn; |
1a6a0f2a | 1457 | |
1458 | FOR_BB_INSNS (bb, insn) | |
1459 | { | |
1460 | rtx set; | |
1461 | enum classify_move_insn cmi; | |
1462 | int i, n; | |
1463 | ||
1464 | if (!INSN_P (insn) | |
1465 | || GET_CODE (PATTERN (insn)) == CLOBBER | |
1466 | || GET_CODE (PATTERN (insn)) == USE) | |
1467 | continue; | |
1468 | ||
08b31038 | 1469 | recog_memoized (insn); |
1470 | ||
c7944dce | 1471 | if (find_decomposable_shift_zext (insn, speed_p)) |
9cf5d19e | 1472 | continue; |
1473 | ||
1a6a0f2a | 1474 | extract_insn (insn); |
1475 | ||
c7944dce | 1476 | set = simple_move (insn, speed_p); |
1a6a0f2a | 1477 | |
1478 | if (!set) | |
1479 | cmi = NOT_SIMPLE_MOVE; | |
1480 | else | |
1481 | { | |
b5ca6624 | 1482 | /* We mark pseudo-to-pseudo copies as decomposable during the |
1483 | second pass only. The first pass is so early that there is | |
1484 | good chance such moves will be optimized away completely by | |
1485 | subsequent optimizations anyway. | |
1486 | ||
1487 | However, we call find_pseudo_copy even during the first pass | |
1488 | so as to properly set up the reg_copy_graph. */ | |
1e5b92fa | 1489 | if (find_pseudo_copy (set)) |
b5ca6624 | 1490 | cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE; |
1a6a0f2a | 1491 | else |
1492 | cmi = SIMPLE_MOVE; | |
1493 | } | |
1494 | ||
1495 | n = recog_data.n_operands; | |
1496 | for (i = 0; i < n; ++i) | |
1497 | { | |
665db605 | 1498 | find_decomposable_subregs (&recog_data.operand[i], &cmi); |
1a6a0f2a | 1499 | |
1500 | /* We handle ASM_OPERANDS as a special case to support | |
1501 | things like x86 rdtsc which returns a DImode value. | |
1502 | We can decompose the output, which will certainly be | |
1503 | operand 0, but not the inputs. */ | |
1504 | ||
1505 | if (cmi == SIMPLE_MOVE | |
1506 | && GET_CODE (SET_SRC (set)) == ASM_OPERANDS) | |
1507 | { | |
1508 | gcc_assert (i == 0); | |
1509 | cmi = NOT_SIMPLE_MOVE; | |
1510 | } | |
1511 | } | |
1512 | } | |
1513 | } | |
1514 | ||
1515 | bitmap_and_compl_into (decomposable_context, non_decomposable_context); | |
1516 | if (!bitmap_empty_p (decomposable_context)) | |
1517 | { | |
db1c50be | 1518 | unsigned int i; |
1519 | sbitmap_iterator sbi; | |
1a6a0f2a | 1520 | bitmap_iterator iter; |
1521 | unsigned int regno; | |
1522 | ||
1523 | propagate_pseudo_copies (); | |
1524 | ||
3c6549f8 | 1525 | auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun)); |
53c5d9d4 | 1526 | bitmap_clear (sub_blocks); |
1a6a0f2a | 1527 | |
1528 | EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter) | |
1529 | decompose_register (regno); | |
1530 | ||
fc00614f | 1531 | FOR_EACH_BB_FN (bb, cfun) |
1a6a0f2a | 1532 | { |
a5942062 | 1533 | rtx_insn *insn; |
1a6a0f2a | 1534 | |
201f6961 | 1535 | FOR_BB_INSNS (bb, insn) |
1a6a0f2a | 1536 | { |
da7a04f1 | 1537 | rtx pat; |
1a6a0f2a | 1538 | |
1539 | if (!INSN_P (insn)) | |
1540 | continue; | |
1541 | ||
1a6a0f2a | 1542 | pat = PATTERN (insn); |
1543 | if (GET_CODE (pat) == CLOBBER) | |
db2200eb | 1544 | resolve_clobber (pat, insn); |
1a6a0f2a | 1545 | else if (GET_CODE (pat) == USE) |
db2200eb | 1546 | resolve_use (pat, insn); |
9845d120 | 1547 | else if (DEBUG_INSN_P (insn)) |
1548 | resolve_debug (insn); | |
1a6a0f2a | 1549 | else |
1550 | { | |
1551 | rtx set; | |
1552 | int i; | |
1553 | ||
1554 | recog_memoized (insn); | |
1555 | extract_insn (insn); | |
1556 | ||
c7944dce | 1557 | set = simple_move (insn, speed_p); |
1a6a0f2a | 1558 | if (set) |
1559 | { | |
a5942062 | 1560 | rtx_insn *orig_insn = insn; |
97bb2849 | 1561 | bool cfi = control_flow_insn_p (insn); |
1a6a0f2a | 1562 | |
db1c50be | 1563 | /* We can end up splitting loads to multi-word pseudos |
1564 | into separate loads to machine word size pseudos. | |
1565 | When this happens, we first had one load that can | |
1566 | throw, and after resolve_simple_move we'll have a | |
1567 | bunch of loads (at least two). All those loads may | |
1568 | trap if we can have non-call exceptions, so they | |
1569 | all will end the current basic block. We split the | |
1570 | block after the outer loop over all insns, but we | |
1571 | make sure here that we will be able to split the | |
1572 | basic block and still produce the correct control | |
1573 | flow graph for it. */ | |
1574 | gcc_assert (!cfi | |
cbeb677e | 1575 | || (cfun->can_throw_non_call_exceptions |
db1c50be | 1576 | && can_throw_internal (insn))); |
1577 | ||
1a6a0f2a | 1578 | insn = resolve_simple_move (set, insn); |
1579 | if (insn != orig_insn) | |
1580 | { | |
1a6a0f2a | 1581 | recog_memoized (insn); |
1582 | extract_insn (insn); | |
97bb2849 | 1583 | |
1584 | if (cfi) | |
08b7917c | 1585 | bitmap_set_bit (sub_blocks, bb->index); |
1a6a0f2a | 1586 | } |
1587 | } | |
9cf5d19e | 1588 | else |
1589 | { | |
a5942062 | 1590 | rtx_insn *decomposed_shift; |
9cf5d19e | 1591 | |
1592 | decomposed_shift = resolve_shift_zext (insn); | |
1593 | if (decomposed_shift != NULL_RTX) | |
1594 | { | |
9cf5d19e | 1595 | insn = decomposed_shift; |
1596 | recog_memoized (insn); | |
1597 | extract_insn (insn); | |
1598 | } | |
1599 | } | |
1a6a0f2a | 1600 | |
1601 | for (i = recog_data.n_operands - 1; i >= 0; --i) | |
2e3cae91 | 1602 | resolve_subreg_use (recog_data.operand_loc[i], insn); |
1a6a0f2a | 1603 | |
1604 | resolve_reg_notes (insn); | |
1605 | ||
1606 | if (num_validated_changes () > 0) | |
1607 | { | |
1608 | for (i = recog_data.n_dups - 1; i >= 0; --i) | |
1609 | { | |
1610 | rtx *pl = recog_data.dup_loc[i]; | |
1611 | int dup_num = recog_data.dup_num[i]; | |
1612 | rtx *px = recog_data.operand_loc[dup_num]; | |
1613 | ||
c47adb48 | 1614 | validate_unshare_change (insn, pl, *px, 1); |
1a6a0f2a | 1615 | } |
1616 | ||
1617 | i = apply_change_group (); | |
1618 | gcc_assert (i); | |
1a6a0f2a | 1619 | } |
1620 | } | |
1a6a0f2a | 1621 | } |
1622 | } | |
1623 | ||
db1c50be | 1624 | /* If we had insns to split that caused control flow insns in the middle |
1625 | of a basic block, split those blocks now. Note that we only handle | |
1626 | the case where splitting a load has caused multiple possibly trapping | |
1627 | loads to appear. */ | |
0d211963 | 1628 | EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi) |
db1c50be | 1629 | { |
a5942062 | 1630 | rtx_insn *insn, *end; |
db1c50be | 1631 | edge fallthru; |
1632 | ||
f5a6b05f | 1633 | bb = BASIC_BLOCK_FOR_FN (cfun, i); |
db1c50be | 1634 | insn = BB_HEAD (bb); |
1635 | end = BB_END (bb); | |
1636 | ||
1637 | while (insn != end) | |
1638 | { | |
1639 | if (control_flow_insn_p (insn)) | |
1640 | { | |
1641 | /* Split the block after insn. There will be a fallthru | |
1642 | edge, which is OK so we keep it. We have to create the | |
1643 | exception edges ourselves. */ | |
1644 | fallthru = split_block (bb, insn); | |
1645 | rtl_make_eh_edge (NULL, bb, BB_END (bb)); | |
1646 | bb = fallthru->dest; | |
1647 | insn = BB_HEAD (bb); | |
1648 | } | |
1649 | else | |
1650 | insn = NEXT_INSN (insn); | |
1651 | } | |
1652 | } | |
1a6a0f2a | 1653 | } |
1654 | ||
1655 | { | |
1656 | unsigned int i; | |
1657 | bitmap b; | |
1658 | ||
f1f41a6c | 1659 | FOR_EACH_VEC_ELT (reg_copy_graph, i, b) |
1a6a0f2a | 1660 | if (b) |
1661 | BITMAP_FREE (b); | |
1662 | } | |
1663 | ||
f1f41a6c | 1664 | reg_copy_graph.release (); |
1a6a0f2a | 1665 | |
1666 | BITMAP_FREE (decomposable_context); | |
1667 | BITMAP_FREE (non_decomposable_context); | |
5277d36e | 1668 | BITMAP_FREE (subreg_context); |
1a6a0f2a | 1669 | } |
1670 | \f | |
1a6a0f2a | 1671 | /* Implement first lower subreg pass. */ |
1672 | ||
cbe8bda8 | 1673 | namespace { |
1674 | ||
1675 | const pass_data pass_data_lower_subreg = | |
1a6a0f2a | 1676 | { |
cbe8bda8 | 1677 | RTL_PASS, /* type */ |
1678 | "subreg1", /* name */ | |
1679 | OPTGROUP_NONE, /* optinfo_flags */ | |
cbe8bda8 | 1680 | TV_LOWER_SUBREG, /* tv_id */ |
1681 | 0, /* properties_required */ | |
1682 | 0, /* properties_provided */ | |
1683 | 0, /* properties_destroyed */ | |
1684 | 0, /* todo_flags_start */ | |
8b88439e | 1685 | 0, /* todo_flags_finish */ |
1a6a0f2a | 1686 | }; |
1687 | ||
cbe8bda8 | 1688 | class pass_lower_subreg : public rtl_opt_pass |
1689 | { | |
1690 | public: | |
9af5ce0c | 1691 | pass_lower_subreg (gcc::context *ctxt) |
1692 | : rtl_opt_pass (pass_data_lower_subreg, ctxt) | |
cbe8bda8 | 1693 | {} |
1694 | ||
1695 | /* opt_pass methods: */ | |
31315c24 | 1696 | virtual bool gate (function *) { return flag_split_wide_types != 0; } |
65b0537f | 1697 | virtual unsigned int execute (function *) |
1698 | { | |
1699 | decompose_multiword_subregs (false); | |
1700 | return 0; | |
1701 | } | |
cbe8bda8 | 1702 | |
1703 | }; // class pass_lower_subreg | |
1704 | ||
1705 | } // anon namespace | |
1706 | ||
1707 | rtl_opt_pass * | |
1708 | make_pass_lower_subreg (gcc::context *ctxt) | |
1709 | { | |
1710 | return new pass_lower_subreg (ctxt); | |
1711 | } | |
1712 | ||
65b0537f | 1713 | /* Implement second lower subreg pass. */ |
1714 | ||
cbe8bda8 | 1715 | namespace { |
1716 | ||
1717 | const pass_data pass_data_lower_subreg2 = | |
1a6a0f2a | 1718 | { |
cbe8bda8 | 1719 | RTL_PASS, /* type */ |
1720 | "subreg2", /* name */ | |
1721 | OPTGROUP_NONE, /* optinfo_flags */ | |
cbe8bda8 | 1722 | TV_LOWER_SUBREG, /* tv_id */ |
1723 | 0, /* properties_required */ | |
1724 | 0, /* properties_provided */ | |
1725 | 0, /* properties_destroyed */ | |
1726 | 0, /* todo_flags_start */ | |
8b88439e | 1727 | TODO_df_finish, /* todo_flags_finish */ |
1a6a0f2a | 1728 | }; |
cbe8bda8 | 1729 | |
1730 | class pass_lower_subreg2 : public rtl_opt_pass | |
1731 | { | |
1732 | public: | |
9af5ce0c | 1733 | pass_lower_subreg2 (gcc::context *ctxt) |
1734 | : rtl_opt_pass (pass_data_lower_subreg2, ctxt) | |
cbe8bda8 | 1735 | {} |
1736 | ||
1737 | /* opt_pass methods: */ | |
31315c24 | 1738 | virtual bool gate (function *) { return flag_split_wide_types != 0; } |
65b0537f | 1739 | virtual unsigned int execute (function *) |
1740 | { | |
1741 | decompose_multiword_subregs (true); | |
1742 | return 0; | |
1743 | } | |
cbe8bda8 | 1744 | |
1745 | }; // class pass_lower_subreg2 | |
1746 | ||
1747 | } // anon namespace | |
1748 | ||
1749 | rtl_opt_pass * | |
1750 | make_pass_lower_subreg2 (gcc::context *ctxt) | |
1751 | { | |
1752 | return new pass_lower_subreg2 (ctxt); | |
1753 | } |