]>
Commit | Line | Data |
---|---|---|
0dc6645f AS |
1 | /* Subroutines used to remove unnecessary doubleword swaps |
2 | for p8 little-endian VSX code. | |
7adcbafe | 3 | Copyright (C) 1991-2022 Free Software Foundation, Inc. |
0dc6645f AS |
4 | |
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published | |
9 | by the Free Software Foundation; either version 3, or (at your | |
10 | option) any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
8fcc61f8 RS |
21 | #define IN_TARGET_CODE 1 |
22 | ||
0dc6645f AS |
23 | #include "config.h" |
24 | #include "system.h" | |
25 | #include "coretypes.h" | |
26 | #include "backend.h" | |
27 | #include "rtl.h" | |
28 | #include "tree.h" | |
29 | #include "memmodel.h" | |
30 | #include "df.h" | |
31 | #include "tm_p.h" | |
32 | #include "ira.h" | |
33 | #include "print-tree.h" | |
34 | #include "varasm.h" | |
35 | #include "explow.h" | |
36 | #include "expr.h" | |
37 | #include "output.h" | |
38 | #include "tree-pass.h" | |
3877c560 | 39 | #include "rtx-vector-builder.h" |
0dc6645f AS |
40 | |
41 | /* Analyze vector computations and remove unnecessary doubleword | |
42 | swaps (xxswapdi instructions). This pass is performed only | |
43 | for little-endian VSX code generation. | |
44 | ||
45 | For this specific case, loads and stores of 4x32 and 2x64 vectors | |
46 | are inefficient. These are implemented using the lvx2dx and | |
47 | stvx2dx instructions, which invert the order of doublewords in | |
48 | a vector register. Thus the code generation inserts an xxswapdi | |
49 | after each such load, and prior to each such store. (For spill | |
50 | code after register assignment, an additional xxswapdi is inserted | |
51 | following each store in order to return a hard register to its | |
52 | unpermuted value.) | |
53 | ||
54 | The extra xxswapdi instructions reduce performance. This can be | |
55 | particularly bad for vectorized code. The purpose of this pass | |
56 | is to reduce the number of xxswapdi instructions required for | |
57 | correctness. | |
58 | ||
59 | The primary insight is that much code that operates on vectors | |
60 | does not care about the relative order of elements in a register, | |
61 | so long as the correct memory order is preserved. If we have | |
62 | a computation where all input values are provided by lvxd2x/xxswapdi | |
63 | sequences, all outputs are stored using xxswapdi/stvxd2x sequences, | |
64 | and all intermediate computations are pure SIMD (independent of | |
65 | element order), then all the xxswapdi's associated with the loads | |
66 | and stores may be removed. | |
67 | ||
68 | This pass uses some of the infrastructure and logical ideas from | |
e53b6e56 | 69 | the "web" pass in web.cc. We create maximal webs of computations |
0dc6645f AS |
70 | fitting the description above using union-find. Each such web is |
71 | then optimized by removing its unnecessary xxswapdi instructions. | |
72 | ||
73 | The pass is placed prior to global optimization so that we can | |
74 | perform the optimization in the safest and simplest way possible; | |
75 | that is, by replacing each xxswapdi insn with a register copy insn. | |
76 | Subsequent forward propagation will remove copies where possible. | |
77 | ||
78 | There are some operations sensitive to element order for which we | |
79 | can still allow the operation, provided we modify those operations. | |
80 | These include CONST_VECTORs, for which we must swap the first and | |
81 | second halves of the constant vector; and SUBREGs, for which we | |
82 | must adjust the byte offset to account for the swapped doublewords. | |
83 | A remaining opportunity would be non-immediate-form splats, for | |
84 | which we should adjust the selected lane of the input. We should | |
85 | also make code generation adjustments for sum-across operations, | |
86 | since this is a common vectorizer reduction. | |
87 | ||
88 | Because we run prior to the first split, we can see loads and stores | |
89 | here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla | |
90 | vector loads and stores that have not yet been split into a permuting | |
91 | load/store and a swap. (One way this can happen is with a builtin | |
92 | call to vec_vsx_{ld,st}.) We can handle these as well, but rather | |
93 | than deleting a swap, we convert the load/store into a permuting | |
94 | load/store (which effectively removes the swap). */ | |
95 | ||
96 | /* Notes on Permutes | |
97 | ||
98 | We do not currently handle computations that contain permutes. There | |
99 | is a general transformation that can be performed correctly, but it | |
100 | may introduce more expensive code than it replaces. To handle these | |
101 | would require a cost model to determine when to perform the optimization. | |
102 | This commentary records how this could be done if desired. | |
103 | ||
104 | The most general permute is something like this (example for V16QI): | |
105 | ||
106 | (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI)) | |
107 | (parallel [(const_int a0) (const_int a1) | |
108 | ... | |
109 | (const_int a14) (const_int a15)])) | |
110 | ||
111 | where a0,...,a15 are in [0,31] and select elements from op1 and op2 | |
112 | to produce in the result. | |
113 | ||
114 | Regardless of mode, we can convert the PARALLEL to a mask of 16 | |
115 | byte-element selectors. Let's call this M, with M[i] representing | |
116 | the ith byte-element selector value. Then if we swap doublewords | |
117 | throughout the computation, we can get correct behavior by replacing | |
118 | M with M' as follows: | |
119 | ||
120 | M'[i] = { (M[i]+8)%16 : M[i] in [0,15] | |
121 | { ((M[i]+8)%16)+16 : M[i] in [16,31] | |
122 | ||
123 | This seems promising at first, since we are just replacing one mask | |
124 | with another. But certain masks are preferable to others. If M | |
125 | is a mask that matches a vmrghh pattern, for example, M' certainly | |
126 | will not. Instead of a single vmrghh, we would generate a load of | |
127 | M' and a vperm. So we would need to know how many xxswapd's we can | |
128 | remove as a result of this transformation to determine if it's | |
129 | profitable; and preferably the logic would need to be aware of all | |
130 | the special preferable masks. | |
131 | ||
132 | Another form of permute is an UNSPEC_VPERM, in which the mask is | |
133 | already in a register. In some cases, this mask may be a constant | |
134 | that we can discover with ud-chains, in which case the above | |
135 | transformation is ok. However, the common usage here is for the | |
136 | mask to be produced by an UNSPEC_LVSL, in which case the mask | |
137 | cannot be known at compile time. In such a case we would have to | |
138 | generate several instructions to compute M' as above at run time, | |
139 | and a cost model is needed again. | |
140 | ||
141 | However, when the mask M for an UNSPEC_VPERM is loaded from the | |
142 | constant pool, we can replace M with M' as above at no cost | |
143 | beyond adding a constant pool entry. */ | |
144 | ||
e53b6e56 | 145 | /* This is based on the union-find logic in web.cc. web_entry_base is |
0dc6645f AS |
146 | defined in df.h. */ |
147 | class swap_web_entry : public web_entry_base | |
148 | { | |
149 | public: | |
150 | /* Pointer to the insn. */ | |
151 | rtx_insn *insn; | |
152 | /* Set if insn contains a mention of a vector register. All other | |
153 | fields are undefined if this field is unset. */ | |
154 | unsigned int is_relevant : 1; | |
155 | /* Set if insn is a load. */ | |
156 | unsigned int is_load : 1; | |
157 | /* Set if insn is a store. */ | |
158 | unsigned int is_store : 1; | |
159 | /* Set if insn is a doubleword swap. This can either be a register swap | |
160 | or a permuting load or store (test is_load and is_store for this). */ | |
161 | unsigned int is_swap : 1; | |
162 | /* Set if the insn has a live-in use of a parameter register. */ | |
163 | unsigned int is_live_in : 1; | |
164 | /* Set if the insn has a live-out def of a return register. */ | |
165 | unsigned int is_live_out : 1; | |
166 | /* Set if the insn contains a subreg reference of a vector register. */ | |
167 | unsigned int contains_subreg : 1; | |
168 | /* Set if the insn contains a 128-bit integer operand. */ | |
169 | unsigned int is_128_int : 1; | |
170 | /* Set if this is a call-insn. */ | |
171 | unsigned int is_call : 1; | |
172 | /* Set if this insn does not perform a vector operation for which | |
173 | element order matters, or if we know how to fix it up if it does. | |
174 | Undefined if is_swap is set. */ | |
175 | unsigned int is_swappable : 1; | |
176 | /* A nonzero value indicates what kind of special handling for this | |
177 | insn is required if doublewords are swapped. Undefined if | |
178 | is_swappable is not set. */ | |
179 | unsigned int special_handling : 4; | |
180 | /* Set if the web represented by this entry cannot be optimized. */ | |
181 | unsigned int web_not_optimizable : 1; | |
182 | /* Set if this insn should be deleted. */ | |
183 | unsigned int will_delete : 1; | |
184 | }; | |
185 | ||
186 | enum special_handling_values { | |
187 | SH_NONE = 0, | |
188 | SH_CONST_VECTOR, | |
189 | SH_SUBREG, | |
190 | SH_NOSWAP_LD, | |
191 | SH_NOSWAP_ST, | |
192 | SH_EXTRACT, | |
193 | SH_SPLAT, | |
194 | SH_XXPERMDI, | |
195 | SH_CONCAT, | |
196 | SH_VPERM | |
197 | }; | |
198 | ||
199 | /* Union INSN with all insns containing definitions that reach USE. | |
200 | Detect whether USE is live-in to the current function. */ | |
201 | static void | |
202 | union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use) | |
203 | { | |
204 | struct df_link *link = DF_REF_CHAIN (use); | |
205 | ||
206 | if (!link) | |
207 | insn_entry[INSN_UID (insn)].is_live_in = 1; | |
208 | ||
209 | while (link) | |
210 | { | |
211 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
212 | insn_entry[INSN_UID (insn)].is_live_in = 1; | |
213 | ||
214 | if (DF_REF_INSN_INFO (link->ref)) | |
215 | { | |
216 | rtx def_insn = DF_REF_INSN (link->ref); | |
217 | (void)unionfind_union (insn_entry + INSN_UID (insn), | |
218 | insn_entry + INSN_UID (def_insn)); | |
219 | } | |
220 | ||
221 | link = link->next; | |
222 | } | |
223 | } | |
224 | ||
225 | /* Union INSN with all insns containing uses reached from DEF. | |
226 | Detect whether DEF is live-out from the current function. */ | |
227 | static void | |
228 | union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def) | |
229 | { | |
230 | struct df_link *link = DF_REF_CHAIN (def); | |
231 | ||
232 | if (!link) | |
233 | insn_entry[INSN_UID (insn)].is_live_out = 1; | |
234 | ||
235 | while (link) | |
236 | { | |
237 | /* This could be an eh use or some other artificial use; | |
238 | we treat these all the same (killing the optimization). */ | |
239 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
240 | insn_entry[INSN_UID (insn)].is_live_out = 1; | |
241 | ||
242 | if (DF_REF_INSN_INFO (link->ref)) | |
243 | { | |
244 | rtx use_insn = DF_REF_INSN (link->ref); | |
245 | (void)unionfind_union (insn_entry + INSN_UID (insn), | |
246 | insn_entry + INSN_UID (use_insn)); | |
247 | } | |
248 | ||
249 | link = link->next; | |
250 | } | |
251 | } | |
252 | ||
f700e4b0 XL |
253 | /* Return 1 iff PAT (a SINGLE_SET) is a rotate 64 bit expression; else return |
254 | 0. */ | |
255 | ||
256 | static bool | |
257 | pattern_is_rotate64 (rtx pat) | |
258 | { | |
259 | rtx rot = SET_SRC (pat); | |
260 | ||
261 | if (GET_CODE (rot) == ROTATE && CONST_INT_P (XEXP (rot, 1)) | |
262 | && INTVAL (XEXP (rot, 1)) == 64) | |
263 | return true; | |
264 | ||
265 | return false; | |
266 | } | |
267 | ||
0dc6645f AS |
268 | /* Return 1 iff INSN is a load insn, including permuting loads that |
269 | represent an lvxd2x instruction; else return 0. */ | |
270 | static unsigned int | |
271 | insn_is_load_p (rtx insn) | |
272 | { | |
273 | rtx body = PATTERN (insn); | |
274 | ||
275 | if (GET_CODE (body) == SET) | |
276 | { | |
2e42a52f | 277 | if (MEM_P (SET_SRC (body))) |
0dc6645f AS |
278 | return 1; |
279 | ||
280 | if (GET_CODE (SET_SRC (body)) == VEC_SELECT | |
2e42a52f | 281 | && MEM_P (XEXP (SET_SRC (body), 0))) |
0dc6645f AS |
282 | return 1; |
283 | ||
f700e4b0 XL |
284 | if (pattern_is_rotate64 (body) && MEM_P (XEXP (SET_SRC (body), 0))) |
285 | return 1; | |
286 | ||
0dc6645f AS |
287 | return 0; |
288 | } | |
289 | ||
290 | if (GET_CODE (body) != PARALLEL) | |
291 | return 0; | |
292 | ||
293 | rtx set = XVECEXP (body, 0, 0); | |
294 | ||
2e42a52f | 295 | if (GET_CODE (set) == SET && MEM_P (SET_SRC (set))) |
0dc6645f AS |
296 | return 1; |
297 | ||
298 | return 0; | |
299 | } | |
300 | ||
301 | /* Return 1 iff INSN is a store insn, including permuting stores that | |
302 | represent an stvxd2x instruction; else return 0. */ | |
303 | static unsigned int | |
304 | insn_is_store_p (rtx insn) | |
305 | { | |
306 | rtx body = PATTERN (insn); | |
2e42a52f | 307 | if (GET_CODE (body) == SET && MEM_P (SET_DEST (body))) |
0dc6645f AS |
308 | return 1; |
309 | if (GET_CODE (body) != PARALLEL) | |
310 | return 0; | |
311 | rtx set = XVECEXP (body, 0, 0); | |
2e42a52f | 312 | if (GET_CODE (set) == SET && MEM_P (SET_DEST (set))) |
0dc6645f AS |
313 | return 1; |
314 | return 0; | |
315 | } | |
316 | ||
317 | /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap, | |
318 | a permuting load, or a permuting store. */ | |
319 | static unsigned int | |
320 | insn_is_swap_p (rtx insn) | |
321 | { | |
322 | rtx body = PATTERN (insn); | |
323 | if (GET_CODE (body) != SET) | |
324 | return 0; | |
325 | rtx rhs = SET_SRC (body); | |
f700e4b0 XL |
326 | if (pattern_is_rotate64 (body)) |
327 | return 1; | |
0dc6645f AS |
328 | if (GET_CODE (rhs) != VEC_SELECT) |
329 | return 0; | |
330 | rtx parallel = XEXP (rhs, 1); | |
331 | if (GET_CODE (parallel) != PARALLEL) | |
332 | return 0; | |
333 | unsigned int len = XVECLEN (parallel, 0); | |
334 | if (len != 2 && len != 4 && len != 8 && len != 16) | |
335 | return 0; | |
336 | for (unsigned int i = 0; i < len / 2; ++i) | |
337 | { | |
338 | rtx op = XVECEXP (parallel, 0, i); | |
2e42a52f | 339 | if (!CONST_INT_P (op) || INTVAL (op) != len / 2 + i) |
0dc6645f AS |
340 | return 0; |
341 | } | |
342 | for (unsigned int i = len / 2; i < len; ++i) | |
343 | { | |
344 | rtx op = XVECEXP (parallel, 0, i); | |
2e42a52f | 345 | if (!CONST_INT_P (op) || INTVAL (op) != i - len / 2) |
0dc6645f AS |
346 | return 0; |
347 | } | |
348 | return 1; | |
349 | } | |
350 | ||
a3a821c9 KN |
351 | /* Return true iff EXPR represents the sum of two registers. */ |
352 | bool | |
353 | rs6000_sum_of_two_registers_p (const_rtx expr) | |
354 | { | |
355 | if (GET_CODE (expr) == PLUS) | |
356 | { | |
357 | const_rtx operand1 = XEXP (expr, 0); | |
358 | const_rtx operand2 = XEXP (expr, 1); | |
359 | return (REG_P (operand1) && REG_P (operand2)); | |
360 | } | |
361 | return false; | |
362 | } | |
363 | ||
364 | /* Return true iff EXPR represents an address expression that masks off | |
365 | the low-order 4 bits in the style of an lvx or stvx rtl pattern. */ | |
366 | bool | |
367 | rs6000_quadword_masked_address_p (const_rtx expr) | |
368 | { | |
369 | if (GET_CODE (expr) == AND) | |
370 | { | |
371 | const_rtx operand1 = XEXP (expr, 0); | |
372 | const_rtx operand2 = XEXP (expr, 1); | |
373 | if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1)) | |
374 | && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16) | |
375 | return true; | |
376 | } | |
377 | return false; | |
378 | } | |
379 | ||
380 | /* Return TRUE if INSN represents a swap of a swapped load from memory | |
381 | and the memory address is quad-word aligned. */ | |
382 | static bool | |
383 | quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn) | |
384 | { | |
385 | unsigned uid = INSN_UID (insn); | |
386 | if (!insn_entry[uid].is_swap || insn_entry[uid].is_load) | |
387 | return false; | |
388 | ||
389 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
390 | ||
391 | /* Since insn is known to represent a swap instruction, we know it | |
392 | "uses" only one input variable. */ | |
393 | df_ref use = DF_INSN_INFO_USES (insn_info); | |
394 | ||
395 | /* Figure out where this input variable is defined. */ | |
396 | struct df_link *def_link = DF_REF_CHAIN (use); | |
397 | ||
398 | /* If there is no definition or the definition is artificial or there are | |
399 | multiple definitions, punt. */ | |
400 | if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref) | |
401 | || def_link->next) | |
402 | return false; | |
403 | ||
404 | rtx def_insn = DF_REF_INSN (def_link->ref); | |
405 | unsigned uid2 = INSN_UID (def_insn); | |
406 | /* We're looking for a load-with-swap insn. If this is not that, | |
407 | return false. */ | |
408 | if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap) | |
409 | return false; | |
410 | ||
411 | /* If the source of the rtl def is not a set from memory, return | |
412 | false. */ | |
413 | rtx body = PATTERN (def_insn); | |
414 | if (GET_CODE (body) != SET | |
f700e4b0 XL |
415 | || !(GET_CODE (SET_SRC (body)) == VEC_SELECT |
416 | || pattern_is_rotate64 (body)) | |
2e42a52f | 417 | || !MEM_P (XEXP (SET_SRC (body), 0))) |
a3a821c9 KN |
418 | return false; |
419 | ||
420 | rtx mem = XEXP (SET_SRC (body), 0); | |
421 | rtx base_reg = XEXP (mem, 0); | |
422 | return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg)) | |
423 | && MEM_ALIGN (mem) >= 128) ? true : false; | |
424 | } | |
425 | ||
426 | /* Return TRUE if INSN represents a store-with-swap of a swapped value | |
427 | and the memory address is quad-word aligned. */ | |
428 | static bool | |
429 | quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn) | |
430 | { | |
431 | unsigned uid = INSN_UID (insn); | |
432 | if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store) | |
433 | return false; | |
434 | ||
435 | rtx body = PATTERN (insn); | |
436 | rtx dest_address = XEXP (SET_DEST (body), 0); | |
437 | rtx swap_reg = XEXP (SET_SRC (body), 0); | |
438 | ||
439 | /* If the base address for the memory expression is not represented | |
440 | by a single register and is not the sum of two registers, punt. */ | |
441 | if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address)) | |
442 | return false; | |
443 | ||
444 | /* Confirm that the value to be stored is produced by a swap | |
445 | instruction. */ | |
446 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
447 | df_ref use; | |
448 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
449 | { | |
450 | struct df_link *def_link = DF_REF_CHAIN (use); | |
451 | ||
452 | /* If this is not the definition of the candidate swap register, | |
453 | then skip it. I am interested in a different definition. */ | |
454 | if (!rtx_equal_p (DF_REF_REG (use), swap_reg)) | |
455 | continue; | |
456 | ||
457 | /* If there is no def or the def is artifical or there are | |
458 | multiple defs, punt. */ | |
459 | if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref) | |
460 | || def_link->next) | |
461 | return false; | |
462 | ||
463 | rtx def_insn = DF_REF_INSN (def_link->ref); | |
464 | unsigned uid2 = INSN_UID (def_insn); | |
465 | ||
466 | /* If this source value is not a simple swap, return false */ | |
467 | if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load | |
468 | || insn_entry[uid2].is_store) | |
469 | return false; | |
470 | ||
471 | /* I've processed the use that I care about, so break out of | |
472 | this loop. */ | |
473 | break; | |
474 | } | |
475 | ||
476 | /* At this point, we know the source data comes from a swap. The | |
477 | remaining question is whether the memory address is aligned. */ | |
478 | rtx set = single_set (insn); | |
479 | if (set) | |
480 | { | |
481 | rtx dest = SET_DEST (set); | |
482 | if (MEM_P (dest)) | |
483 | return (MEM_ALIGN (dest) >= 128); | |
484 | } | |
485 | return false; | |
486 | } | |
487 | ||
6012c652 BS |
488 | /* Return 1 iff UID, known to reference a swap, is both fed by a load |
489 | and a feeder of a store. */ | |
490 | static unsigned int | |
491 | swap_feeds_both_load_and_store (swap_web_entry *insn_entry) | |
492 | { | |
493 | rtx insn = insn_entry->insn; | |
494 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
495 | df_ref def, use; | |
496 | struct df_link *link = 0; | |
497 | rtx_insn *load = 0, *store = 0; | |
498 | bool fed_by_load = 0; | |
499 | bool feeds_store = 0; | |
500 | ||
501 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
502 | { | |
503 | link = DF_REF_CHAIN (use); | |
504 | load = DF_REF_INSN (link->ref); | |
505 | if (insn_is_load_p (load) && insn_is_swap_p (load)) | |
506 | fed_by_load = 1; | |
507 | } | |
508 | ||
509 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
510 | { | |
511 | link = DF_REF_CHAIN (def); | |
512 | store = DF_REF_INSN (link->ref); | |
513 | if (insn_is_store_p (store) && insn_is_swap_p (store)) | |
514 | feeds_store = 1; | |
515 | } | |
516 | ||
517 | return fed_by_load && feeds_store; | |
518 | } | |
519 | ||
0dc6645f AS |
520 | /* Return TRUE if insn is a swap fed by a load from the constant pool. */ |
521 | static bool | |
522 | const_load_sequence_p (swap_web_entry *insn_entry, rtx insn) | |
523 | { | |
524 | unsigned uid = INSN_UID (insn); | |
525 | if (!insn_entry[uid].is_swap || insn_entry[uid].is_load) | |
526 | return false; | |
527 | ||
528 | const_rtx tocrel_base; | |
529 | ||
0dc6645f AS |
530 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); |
531 | df_ref use; | |
a3a821c9 KN |
532 | |
533 | /* Iterate over the definitions that are used by this insn. Since | |
534 | this is known to be a swap insn, expect only one used definnition. */ | |
0dc6645f AS |
535 | FOR_EACH_INSN_INFO_USE (use, insn_info) |
536 | { | |
537 | struct df_link *def_link = DF_REF_CHAIN (use); | |
6e0cc90b KN |
538 | |
539 | /* If there is no def or the def is artificial or there are | |
540 | multiple defs, punt. */ | |
541 | if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref) | |
542 | || def_link->next) | |
0dc6645f AS |
543 | return false; |
544 | ||
545 | rtx def_insn = DF_REF_INSN (def_link->ref); | |
546 | unsigned uid2 = INSN_UID (def_insn); | |
6e0cc90b | 547 | /* If this is not a load or is not a swap, return false. */ |
0dc6645f AS |
548 | if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap) |
549 | return false; | |
550 | ||
6e0cc90b KN |
551 | /* If the source of the rtl def is not a set from memory, return |
552 | false. */ | |
0dc6645f AS |
553 | rtx body = PATTERN (def_insn); |
554 | if (GET_CODE (body) != SET | |
f700e4b0 XL |
555 | || !(GET_CODE (SET_SRC (body)) == VEC_SELECT |
556 | || pattern_is_rotate64 (body)) | |
2e42a52f | 557 | || !MEM_P (XEXP (SET_SRC (body), 0))) |
0dc6645f AS |
558 | return false; |
559 | ||
560 | rtx mem = XEXP (SET_SRC (body), 0); | |
561 | rtx base_reg = XEXP (mem, 0); | |
6e0cc90b KN |
562 | /* If the base address for the memory expression is not |
563 | represented by a register, punt. */ | |
564 | if (!REG_P (base_reg)) | |
565 | return false; | |
0dc6645f AS |
566 | |
567 | df_ref base_use; | |
568 | insn_info = DF_INSN_INFO_GET (def_insn); | |
569 | FOR_EACH_INSN_INFO_USE (base_use, insn_info) | |
570 | { | |
6e0cc90b KN |
571 | /* If base_use does not represent base_reg, look for another |
572 | use. */ | |
0dc6645f AS |
573 | if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) |
574 | continue; | |
575 | ||
576 | struct df_link *base_def_link = DF_REF_CHAIN (base_use); | |
577 | if (!base_def_link || base_def_link->next) | |
578 | return false; | |
579 | ||
6e0cc90b KN |
580 | /* Constants held on the stack are not "true" constants |
581 | because their values are not part of the static load | |
582 | image. If this constant's base reference is a stack | |
583 | or frame pointer, it is seen as an artificial | |
584 | reference. */ | |
585 | if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) | |
586 | return false; | |
587 | ||
0dc6645f AS |
588 | rtx tocrel_insn = DF_REF_INSN (base_def_link->ref); |
589 | rtx tocrel_body = PATTERN (tocrel_insn); | |
590 | rtx base, offset; | |
591 | if (GET_CODE (tocrel_body) != SET) | |
592 | return false; | |
593 | /* There is an extra level of indirection for small/large | |
594 | code models. */ | |
595 | rtx tocrel_expr = SET_SRC (tocrel_body); | |
2e42a52f | 596 | if (MEM_P (tocrel_expr)) |
0dc6645f AS |
597 | tocrel_expr = XEXP (tocrel_expr, 0); |
598 | if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL)) | |
599 | return false; | |
600 | split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); | |
6e0cc90b | 601 | |
2e42a52f | 602 | if (!SYMBOL_REF_P (base) || !CONSTANT_POOL_ADDRESS_P (base)) |
0dc6645f | 603 | return false; |
6e0cc90b KN |
604 | else |
605 | { | |
606 | /* FIXME: The conditions under which | |
2e42a52f PB |
607 | (SYMBOL_REF_P (const_vector) |
608 | && !CONSTANT_POOL_ADDRESS_P (const_vector)) | |
6e0cc90b KN |
609 | are not well understood. This code prevents |
610 | an internal compiler error which will occur in | |
611 | replace_swapped_load_constant () if we were to return | |
612 | true. Some day, we should figure out how to properly | |
613 | handle this condition in | |
614 | replace_swapped_load_constant () and then we can | |
615 | remove this special test. */ | |
616 | rtx const_vector = get_pool_constant (base); | |
2e42a52f | 617 | if (SYMBOL_REF_P (const_vector) |
c61199f2 JJ |
618 | && CONSTANT_POOL_ADDRESS_P (const_vector)) |
619 | const_vector = get_pool_constant (const_vector); | |
620 | if (GET_CODE (const_vector) != CONST_VECTOR) | |
621 | return false; | |
6e0cc90b | 622 | } |
0dc6645f AS |
623 | } |
624 | } | |
625 | return true; | |
626 | } | |
627 | ||
628 | /* Return TRUE iff OP matches a V2DF reduction pattern. See the | |
629 | definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */ | |
630 | static bool | |
631 | v2df_reduction_p (rtx op) | |
632 | { | |
633 | if (GET_MODE (op) != V2DFmode) | |
634 | return false; | |
635 | ||
636 | enum rtx_code code = GET_CODE (op); | |
637 | if (code != PLUS && code != SMIN && code != SMAX) | |
638 | return false; | |
639 | ||
640 | rtx concat = XEXP (op, 0); | |
641 | if (GET_CODE (concat) != VEC_CONCAT) | |
642 | return false; | |
643 | ||
644 | rtx select0 = XEXP (concat, 0); | |
645 | rtx select1 = XEXP (concat, 1); | |
646 | if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT) | |
647 | return false; | |
648 | ||
649 | rtx reg0 = XEXP (select0, 0); | |
650 | rtx reg1 = XEXP (select1, 0); | |
651 | if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0)) | |
652 | return false; | |
653 | ||
654 | rtx parallel0 = XEXP (select0, 1); | |
655 | rtx parallel1 = XEXP (select1, 1); | |
656 | if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL) | |
657 | return false; | |
658 | ||
659 | if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx) | |
660 | || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx)) | |
661 | return false; | |
662 | ||
663 | return true; | |
664 | } | |
665 | ||
666 | /* Return 1 iff OP is an operand that will not be affected by having | |
667 | vector doublewords swapped in memory. */ | |
668 | static unsigned int | |
669 | rtx_is_swappable_p (rtx op, unsigned int *special) | |
670 | { | |
671 | enum rtx_code code = GET_CODE (op); | |
672 | int i, j; | |
673 | rtx parallel; | |
674 | ||
675 | switch (code) | |
676 | { | |
677 | case LABEL_REF: | |
678 | case SYMBOL_REF: | |
679 | case CLOBBER: | |
680 | case REG: | |
681 | return 1; | |
682 | ||
683 | case VEC_CONCAT: | |
684 | case ASM_INPUT: | |
685 | case ASM_OPERANDS: | |
686 | return 0; | |
687 | ||
688 | case CONST_VECTOR: | |
689 | { | |
690 | *special = SH_CONST_VECTOR; | |
691 | return 1; | |
692 | } | |
693 | ||
694 | case VEC_DUPLICATE: | |
695 | /* Opportunity: If XEXP (op, 0) has the same mode as the result, | |
696 | and XEXP (op, 1) is a PARALLEL with a single QImode const int, | |
697 | it represents a vector splat for which we can do special | |
698 | handling. */ | |
2e42a52f | 699 | if (CONST_INT_P (XEXP (op, 0))) |
0dc6645f AS |
700 | return 1; |
701 | else if (REG_P (XEXP (op, 0)) | |
702 | && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) | |
703 | /* This catches V2DF and V2DI splat, at a minimum. */ | |
704 | return 1; | |
705 | else if (GET_CODE (XEXP (op, 0)) == TRUNCATE | |
706 | && REG_P (XEXP (XEXP (op, 0), 0)) | |
707 | && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) | |
708 | /* This catches splat of a truncated value. */ | |
709 | return 1; | |
710 | else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT) | |
711 | /* If the duplicated item is from a select, defer to the select | |
712 | processing to see if we can change the lane for the splat. */ | |
713 | return rtx_is_swappable_p (XEXP (op, 0), special); | |
714 | else | |
715 | return 0; | |
716 | ||
717 | case VEC_SELECT: | |
718 | /* A vec_extract operation is ok if we change the lane. */ | |
2e42a52f | 719 | if (REG_P (XEXP (op, 0)) |
0dc6645f AS |
720 | && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op) |
721 | && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL | |
722 | && XVECLEN (parallel, 0) == 1 | |
2e42a52f | 723 | && CONST_INT_P (XVECEXP (parallel, 0, 0))) |
0dc6645f AS |
724 | { |
725 | *special = SH_EXTRACT; | |
726 | return 1; | |
727 | } | |
728 | /* An XXPERMDI is ok if we adjust the lanes. Note that if the | |
729 | XXPERMDI is a swap operation, it will be identified by | |
730 | insn_is_swap_p and therefore we won't get here. */ | |
731 | else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT | |
732 | && (GET_MODE (XEXP (op, 0)) == V4DFmode | |
733 | || GET_MODE (XEXP (op, 0)) == V4DImode) | |
734 | && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL | |
735 | && XVECLEN (parallel, 0) == 2 | |
2e42a52f PB |
736 | && CONST_INT_P (XVECEXP (parallel, 0, 0)) |
737 | && CONST_INT_P (XVECEXP (parallel, 0, 1))) | |
0dc6645f AS |
738 | { |
739 | *special = SH_XXPERMDI; | |
740 | return 1; | |
741 | } | |
742 | else if (v2df_reduction_p (op)) | |
743 | return 1; | |
744 | else | |
745 | return 0; | |
746 | ||
747 | case UNSPEC: | |
748 | { | |
749 | /* Various operations are unsafe for this optimization, at least | |
750 | without significant additional work. Permutes are obviously | |
751 | problematic, as both the permute control vector and the ordering | |
752 | of the target values are invalidated by doubleword swapping. | |
753 | Vector pack and unpack modify the number of vector lanes. | |
754 | Merge-high/low will not operate correctly on swapped operands. | |
755 | Vector shifts across element boundaries are clearly uncool, | |
756 | as are vector select and concatenate operations. Vector | |
757 | sum-across instructions define one operand with a specific | |
758 | order-dependent element, so additional fixup code would be | |
759 | needed to make those work. Vector set and non-immediate-form | |
760 | vector splat are element-order sensitive. A few of these | |
761 | cases might be workable with special handling if required. | |
762 | Adding cost modeling would be appropriate in some cases. */ | |
763 | int val = XINT (op, 1); | |
764 | switch (val) | |
765 | { | |
766 | default: | |
767 | break; | |
73598b33 | 768 | case UNSPEC_VBPERMQ: |
0dc6645f AS |
769 | case UNSPEC_VPACK_SIGN_SIGN_SAT: |
770 | case UNSPEC_VPACK_SIGN_UNS_SAT: | |
771 | case UNSPEC_VPACK_UNS_UNS_MOD: | |
772 | case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT: | |
773 | case UNSPEC_VPACK_UNS_UNS_SAT: | |
774 | case UNSPEC_VPERM: | |
775 | case UNSPEC_VPERM_UNS: | |
776 | case UNSPEC_VPERMHI: | |
777 | case UNSPEC_VPERMSI: | |
cb90e18c | 778 | case UNSPEC_VPERMXOR: |
0dc6645f AS |
779 | case UNSPEC_VPKPX: |
780 | case UNSPEC_VSLDOI: | |
781 | case UNSPEC_VSLO: | |
782 | case UNSPEC_VSRO: | |
783 | case UNSPEC_VSUM2SWS: | |
784 | case UNSPEC_VSUM4S: | |
785 | case UNSPEC_VSUM4UBS: | |
786 | case UNSPEC_VSUMSWS: | |
787 | case UNSPEC_VSUMSWS_DIRECT: | |
788 | case UNSPEC_VSX_CONCAT: | |
73598b33 AM |
789 | case UNSPEC_VSX_CVDPSPN: |
790 | case UNSPEC_VSX_CVSPDP: | |
791 | case UNSPEC_VSX_CVSPDPN: | |
792 | case UNSPEC_VSX_EXTRACT: | |
0dc6645f AS |
793 | case UNSPEC_VSX_SET: |
794 | case UNSPEC_VSX_SLDWI: | |
73598b33 | 795 | case UNSPEC_VSX_VSLO: |
0dc6645f AS |
796 | case UNSPEC_VUNPACK_HI_SIGN: |
797 | case UNSPEC_VUNPACK_HI_SIGN_DIRECT: | |
798 | case UNSPEC_VUNPACK_LO_SIGN: | |
799 | case UNSPEC_VUNPACK_LO_SIGN_DIRECT: | |
800 | case UNSPEC_VUPKHPX: | |
801 | case UNSPEC_VUPKHS_V4SF: | |
802 | case UNSPEC_VUPKHU_V4SF: | |
803 | case UNSPEC_VUPKLPX: | |
804 | case UNSPEC_VUPKLS_V4SF: | |
805 | case UNSPEC_VUPKLU_V4SF: | |
0dc6645f AS |
806 | return 0; |
807 | case UNSPEC_VSPLT_DIRECT: | |
808 | case UNSPEC_VSX_XXSPLTD: | |
809 | *special = SH_SPLAT; | |
810 | return 1; | |
811 | case UNSPEC_REDUC_PLUS: | |
812 | case UNSPEC_REDUC: | |
813 | return 1; | |
2a2592a1 WS |
814 | case UNSPEC_VPMSUM: |
815 | /* vpmsumd is not swappable, but vpmsum[bhw] are. */ | |
816 | if (GET_MODE (op) == V2DImode) | |
817 | return 0; | |
818 | break; | |
0dc6645f AS |
819 | } |
820 | } | |
821 | ||
822 | default: | |
823 | break; | |
824 | } | |
825 | ||
826 | const char *fmt = GET_RTX_FORMAT (code); | |
827 | int ok = 1; | |
828 | ||
829 | for (i = 0; i < GET_RTX_LENGTH (code); ++i) | |
830 | if (fmt[i] == 'e' || fmt[i] == 'u') | |
831 | { | |
832 | unsigned int special_op = SH_NONE; | |
833 | ok &= rtx_is_swappable_p (XEXP (op, i), &special_op); | |
834 | if (special_op == SH_NONE) | |
835 | continue; | |
836 | /* Ensure we never have two kinds of special handling | |
837 | for the same insn. */ | |
838 | if (*special != SH_NONE && *special != special_op) | |
839 | return 0; | |
840 | *special = special_op; | |
841 | } | |
842 | else if (fmt[i] == 'E') | |
843 | for (j = 0; j < XVECLEN (op, i); ++j) | |
844 | { | |
845 | unsigned int special_op = SH_NONE; | |
846 | ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op); | |
847 | if (special_op == SH_NONE) | |
848 | continue; | |
849 | /* Ensure we never have two kinds of special handling | |
850 | for the same insn. */ | |
851 | if (*special != SH_NONE && *special != special_op) | |
852 | return 0; | |
853 | *special = special_op; | |
854 | } | |
855 | ||
856 | return ok; | |
857 | } | |
858 | ||
859 | /* Return 1 iff INSN is an operand that will not be affected by | |
860 | having vector doublewords swapped in memory (in which case | |
861 | *SPECIAL is unchanged), or that can be modified to be correct | |
862 | if vector doublewords are swapped in memory (in which case | |
863 | *SPECIAL is changed to a value indicating how). */ | |
864 | static unsigned int | |
865 | insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, | |
866 | unsigned int *special) | |
867 | { | |
868 | /* Calls are always bad. */ | |
869 | if (GET_CODE (insn) == CALL_INSN) | |
870 | return 0; | |
871 | ||
872 | /* Loads and stores seen here are not permuting, but we can still | |
873 | fix them up by converting them to permuting ones. Exceptions: | |
874 | UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL | |
875 | body instead of a SET; and UNSPEC_STVE, which has an UNSPEC | |
876 | for the SET source. Also we must now make an exception for lvx | |
877 | and stvx when they are not in the UNSPEC_LVX/STVX form (with the | |
878 | explicit "& -16") since this leads to unrecognizable insns. */ | |
879 | rtx body = PATTERN (insn); | |
880 | int i = INSN_UID (insn); | |
881 | ||
882 | if (insn_entry[i].is_load) | |
883 | { | |
884 | if (GET_CODE (body) == SET) | |
885 | { | |
886 | rtx rhs = SET_SRC (body); | |
887 | /* Even without a swap, the RHS might be a vec_select for, say, | |
888 | a byte-reversing load. */ | |
2e42a52f | 889 | if (!MEM_P (rhs)) |
0dc6645f AS |
890 | return 0; |
891 | if (GET_CODE (XEXP (rhs, 0)) == AND) | |
892 | return 0; | |
893 | ||
894 | *special = SH_NOSWAP_LD; | |
895 | return 1; | |
896 | } | |
897 | else | |
898 | return 0; | |
899 | } | |
900 | ||
901 | if (insn_entry[i].is_store) | |
902 | { | |
903 | if (GET_CODE (body) == SET | |
d10cff95 CL |
904 | && GET_CODE (SET_SRC (body)) != UNSPEC |
905 | && GET_CODE (SET_SRC (body)) != VEC_SELECT) | |
0dc6645f AS |
906 | { |
907 | rtx lhs = SET_DEST (body); | |
d10cff95 | 908 | /* Even without a swap, the RHS might be a vec_select for, say, |
0dc6645f | 909 | a byte-reversing store. */ |
2e42a52f | 910 | if (!MEM_P (lhs)) |
0dc6645f AS |
911 | return 0; |
912 | if (GET_CODE (XEXP (lhs, 0)) == AND) | |
913 | return 0; | |
914 | ||
915 | *special = SH_NOSWAP_ST; | |
916 | return 1; | |
917 | } | |
918 | else | |
919 | return 0; | |
920 | } | |
921 | ||
922 | /* A convert to single precision can be left as is provided that | |
923 | all of its uses are in xxspltw instructions that splat BE element | |
924 | zero. */ | |
925 | if (GET_CODE (body) == SET | |
926 | && GET_CODE (SET_SRC (body)) == UNSPEC | |
927 | && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN) | |
928 | { | |
929 | df_ref def; | |
930 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
931 | ||
932 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
933 | { | |
934 | struct df_link *link = DF_REF_CHAIN (def); | |
935 | if (!link) | |
936 | return 0; | |
937 | ||
938 | for (; link; link = link->next) { | |
939 | rtx use_insn = DF_REF_INSN (link->ref); | |
940 | rtx use_body = PATTERN (use_insn); | |
941 | if (GET_CODE (use_body) != SET | |
942 | || GET_CODE (SET_SRC (use_body)) != UNSPEC | |
943 | || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW | |
944 | || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx) | |
945 | return 0; | |
946 | } | |
947 | } | |
948 | ||
949 | return 1; | |
950 | } | |
951 | ||
952 | /* A concatenation of two doublewords is ok if we reverse the | |
953 | order of the inputs. */ | |
954 | if (GET_CODE (body) == SET | |
955 | && GET_CODE (SET_SRC (body)) == VEC_CONCAT | |
956 | && (GET_MODE (SET_SRC (body)) == V2DFmode | |
957 | || GET_MODE (SET_SRC (body)) == V2DImode)) | |
958 | { | |
959 | *special = SH_CONCAT; | |
960 | return 1; | |
961 | } | |
962 | ||
963 | /* V2DF reductions are always swappable. */ | |
964 | if (GET_CODE (body) == PARALLEL) | |
965 | { | |
966 | rtx expr = XVECEXP (body, 0, 0); | |
967 | if (GET_CODE (expr) == SET | |
968 | && v2df_reduction_p (SET_SRC (expr))) | |
969 | return 1; | |
970 | } | |
971 | ||
972 | /* An UNSPEC_VPERM is ok if the mask operand is loaded from the | |
973 | constant pool. */ | |
974 | if (GET_CODE (body) == SET | |
975 | && GET_CODE (SET_SRC (body)) == UNSPEC | |
976 | && XINT (SET_SRC (body), 1) == UNSPEC_VPERM | |
977 | && XVECLEN (SET_SRC (body), 0) == 3 | |
2e42a52f | 978 | && REG_P (XVECEXP (SET_SRC (body), 0, 2))) |
0dc6645f AS |
979 | { |
980 | rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2); | |
981 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
982 | df_ref use; | |
983 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
984 | if (rtx_equal_p (DF_REF_REG (use), mask_reg)) | |
985 | { | |
986 | struct df_link *def_link = DF_REF_CHAIN (use); | |
987 | /* Punt if multiple definitions for this reg. */ | |
988 | if (def_link && !def_link->next && | |
989 | const_load_sequence_p (insn_entry, | |
990 | DF_REF_INSN (def_link->ref))) | |
991 | { | |
992 | *special = SH_VPERM; | |
993 | return 1; | |
994 | } | |
995 | } | |
996 | } | |
997 | ||
998 | /* Otherwise check the operands for vector lane violations. */ | |
999 | return rtx_is_swappable_p (body, special); | |
1000 | } | |
1001 | ||
1002 | enum chain_purpose { FOR_LOADS, FOR_STORES }; | |
1003 | ||
1004 | /* Return true if the UD or DU chain headed by LINK is non-empty, | |
1005 | and every entry on the chain references an insn that is a | |
1006 | register swap. Furthermore, if PURPOSE is FOR_LOADS, each such | |
1007 | register swap must have only permuting loads as reaching defs. | |
1008 | If PURPOSE is FOR_STORES, each such register swap must have only | |
1009 | register swaps or permuting stores as reached uses. */ | |
1010 | static bool | |
1011 | chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link, | |
1012 | enum chain_purpose purpose) | |
1013 | { | |
1014 | if (!link) | |
1015 | return false; | |
1016 | ||
1017 | for (; link; link = link->next) | |
1018 | { | |
1019 | if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref)))) | |
1020 | continue; | |
1021 | ||
1022 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
1023 | return false; | |
1024 | ||
1025 | rtx reached_insn = DF_REF_INSN (link->ref); | |
1026 | unsigned uid = INSN_UID (reached_insn); | |
1027 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn); | |
1028 | ||
1029 | if (!insn_entry[uid].is_swap || insn_entry[uid].is_load | |
1030 | || insn_entry[uid].is_store) | |
1031 | return false; | |
1032 | ||
1033 | if (purpose == FOR_LOADS) | |
1034 | { | |
1035 | df_ref use; | |
1036 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
1037 | { | |
1038 | struct df_link *swap_link = DF_REF_CHAIN (use); | |
1039 | ||
1040 | while (swap_link) | |
1041 | { | |
1042 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
1043 | return false; | |
1044 | ||
1045 | rtx swap_def_insn = DF_REF_INSN (swap_link->ref); | |
1046 | unsigned uid2 = INSN_UID (swap_def_insn); | |
1047 | ||
1048 | /* Only permuting loads are allowed. */ | |
1049 | if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load) | |
1050 | return false; | |
1051 | ||
1052 | swap_link = swap_link->next; | |
1053 | } | |
1054 | } | |
1055 | } | |
1056 | else if (purpose == FOR_STORES) | |
1057 | { | |
1058 | df_ref def; | |
1059 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
1060 | { | |
1061 | struct df_link *swap_link = DF_REF_CHAIN (def); | |
1062 | ||
1063 | while (swap_link) | |
1064 | { | |
1065 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
1066 | return false; | |
1067 | ||
1068 | rtx swap_use_insn = DF_REF_INSN (swap_link->ref); | |
1069 | unsigned uid2 = INSN_UID (swap_use_insn); | |
1070 | ||
1071 | /* Permuting stores or register swaps are allowed. */ | |
1072 | if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load) | |
1073 | return false; | |
1074 | ||
1075 | swap_link = swap_link->next; | |
1076 | } | |
1077 | } | |
1078 | } | |
1079 | } | |
1080 | ||
1081 | return true; | |
1082 | } | |
1083 | ||
1084 | /* Mark the xxswapdi instructions associated with permuting loads and | |
1085 | stores for removal. Note that we only flag them for deletion here, | |
1086 | as there is a possibility of a swap being reached from multiple | |
1087 | loads, etc. */ | |
1088 | static void | |
1089 | mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i) | |
1090 | { | |
1091 | rtx insn = insn_entry[i].insn; | |
1092 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
1093 | ||
1094 | if (insn_entry[i].is_load) | |
1095 | { | |
1096 | df_ref def; | |
1097 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
1098 | { | |
1099 | struct df_link *link = DF_REF_CHAIN (def); | |
1100 | ||
1101 | /* We know by now that these are swaps, so we can delete | |
1102 | them confidently. */ | |
1103 | while (link) | |
1104 | { | |
1105 | rtx use_insn = DF_REF_INSN (link->ref); | |
1106 | insn_entry[INSN_UID (use_insn)].will_delete = 1; | |
1107 | link = link->next; | |
1108 | } | |
1109 | } | |
1110 | } | |
1111 | else if (insn_entry[i].is_store) | |
1112 | { | |
1113 | df_ref use; | |
1114 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
1115 | { | |
1116 | /* Ignore uses for addressability. */ | |
1117 | machine_mode mode = GET_MODE (DF_REF_REG (use)); | |
1118 | if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode)) | |
1119 | continue; | |
1120 | ||
1121 | struct df_link *link = DF_REF_CHAIN (use); | |
1122 | ||
1123 | /* We know by now that these are swaps, so we can delete | |
1124 | them confidently. */ | |
1125 | while (link) | |
1126 | { | |
1127 | rtx def_insn = DF_REF_INSN (link->ref); | |
1128 | insn_entry[INSN_UID (def_insn)].will_delete = 1; | |
1129 | link = link->next; | |
1130 | } | |
1131 | } | |
1132 | } | |
1133 | } | |
1134 | ||
3877c560 | 1135 | /* *OP_PTR is either a CONST_VECTOR or an expression containing one. |
0dc6645f AS |
1136 | Swap the first half of the vector with the second in the first |
1137 | case. Recurse to find it in the second. */ | |
1138 | static void | |
3877c560 | 1139 | swap_const_vector_halves (rtx *op_ptr) |
0dc6645f AS |
1140 | { |
1141 | int i; | |
3877c560 | 1142 | rtx op = *op_ptr; |
0dc6645f AS |
1143 | enum rtx_code code = GET_CODE (op); |
1144 | if (GET_CODE (op) == CONST_VECTOR) | |
1145 | { | |
3877c560 RS |
1146 | int units = GET_MODE_NUNITS (GET_MODE (op)); |
1147 | rtx_vector_builder builder (GET_MODE (op), units, 1); | |
1148 | for (i = 0; i < units / 2; ++i) | |
1149 | builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2)); | |
1150 | for (i = 0; i < units / 2; ++i) | |
1151 | builder.quick_push (CONST_VECTOR_ELT (op, i)); | |
1152 | *op_ptr = builder.build (); | |
0dc6645f AS |
1153 | } |
1154 | else | |
1155 | { | |
1156 | int j; | |
1157 | const char *fmt = GET_RTX_FORMAT (code); | |
1158 | for (i = 0; i < GET_RTX_LENGTH (code); ++i) | |
1159 | if (fmt[i] == 'e' || fmt[i] == 'u') | |
3877c560 | 1160 | swap_const_vector_halves (&XEXP (op, i)); |
0dc6645f AS |
1161 | else if (fmt[i] == 'E') |
1162 | for (j = 0; j < XVECLEN (op, i); ++j) | |
3877c560 | 1163 | swap_const_vector_halves (&XVECEXP (op, i, j)); |
0dc6645f AS |
1164 | } |
1165 | } | |
1166 | ||
1167 | /* Find all subregs of a vector expression that perform a narrowing, | |
1168 | and adjust the subreg index to account for doubleword swapping. */ | |
1169 | static void | |
1170 | adjust_subreg_index (rtx op) | |
1171 | { | |
1172 | enum rtx_code code = GET_CODE (op); | |
1173 | if (code == SUBREG | |
1174 | && (GET_MODE_SIZE (GET_MODE (op)) | |
1175 | < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))) | |
1176 | { | |
1177 | unsigned int index = SUBREG_BYTE (op); | |
1178 | if (index < 8) | |
1179 | index += 8; | |
1180 | else | |
1181 | index -= 8; | |
1182 | SUBREG_BYTE (op) = index; | |
1183 | } | |
1184 | ||
1185 | const char *fmt = GET_RTX_FORMAT (code); | |
1186 | int i,j; | |
1187 | for (i = 0; i < GET_RTX_LENGTH (code); ++i) | |
1188 | if (fmt[i] == 'e' || fmt[i] == 'u') | |
1189 | adjust_subreg_index (XEXP (op, i)); | |
1190 | else if (fmt[i] == 'E') | |
1191 | for (j = 0; j < XVECLEN (op, i); ++j) | |
1192 | adjust_subreg_index (XVECEXP (op, i, j)); | |
1193 | } | |
1194 | ||
1195 | /* Convert the non-permuting load INSN to a permuting one. */ | |
1196 | static void | |
1197 | permute_load (rtx_insn *insn) | |
1198 | { | |
1199 | rtx body = PATTERN (insn); | |
1200 | rtx mem_op = SET_SRC (body); | |
1201 | rtx tgt_reg = SET_DEST (body); | |
1202 | machine_mode mode = GET_MODE (tgt_reg); | |
1203 | int n_elts = GET_MODE_NUNITS (mode); | |
1204 | int half_elts = n_elts / 2; | |
1205 | rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); | |
1206 | int i, j; | |
1207 | for (i = 0, j = half_elts; i < half_elts; ++i, ++j) | |
1208 | XVECEXP (par, 0, i) = GEN_INT (j); | |
1209 | for (i = half_elts, j = 0; j < half_elts; ++i, ++j) | |
1210 | XVECEXP (par, 0, i) = GEN_INT (j); | |
1211 | rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par); | |
1212 | SET_SRC (body) = sel; | |
1213 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
1214 | df_insn_rescan (insn); | |
1215 | ||
1216 | if (dump_file) | |
1217 | fprintf (dump_file, "Replacing load %d with permuted load\n", | |
1218 | INSN_UID (insn)); | |
1219 | } | |
1220 | ||
1221 | /* Convert the non-permuting store INSN to a permuting one. */ | |
1222 | static void | |
1223 | permute_store (rtx_insn *insn) | |
1224 | { | |
1225 | rtx body = PATTERN (insn); | |
1226 | rtx src_reg = SET_SRC (body); | |
1227 | machine_mode mode = GET_MODE (src_reg); | |
1228 | int n_elts = GET_MODE_NUNITS (mode); | |
1229 | int half_elts = n_elts / 2; | |
1230 | rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); | |
1231 | int i, j; | |
1232 | for (i = 0, j = half_elts; i < half_elts; ++i, ++j) | |
1233 | XVECEXP (par, 0, i) = GEN_INT (j); | |
1234 | for (i = half_elts, j = 0; j < half_elts; ++i, ++j) | |
1235 | XVECEXP (par, 0, i) = GEN_INT (j); | |
1236 | rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par); | |
1237 | SET_SRC (body) = sel; | |
1238 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
1239 | df_insn_rescan (insn); | |
1240 | ||
1241 | if (dump_file) | |
1242 | fprintf (dump_file, "Replacing store %d with permuted store\n", | |
1243 | INSN_UID (insn)); | |
1244 | } | |
1245 | ||
1246 | /* Given OP that contains a vector extract operation, adjust the index | |
1247 | of the extracted lane to account for the doubleword swap. */ | |
1248 | static void | |
1249 | adjust_extract (rtx_insn *insn) | |
1250 | { | |
1251 | rtx pattern = PATTERN (insn); | |
1252 | if (GET_CODE (pattern) == PARALLEL) | |
1253 | pattern = XVECEXP (pattern, 0, 0); | |
1254 | rtx src = SET_SRC (pattern); | |
1255 | /* The vec_select may be wrapped in a vec_duplicate for a splat, so | |
1256 | account for that. */ | |
1257 | rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src; | |
1258 | rtx par = XEXP (sel, 1); | |
1259 | int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1; | |
1260 | int lane = INTVAL (XVECEXP (par, 0, 0)); | |
1261 | lane = lane >= half_elts ? lane - half_elts : lane + half_elts; | |
1262 | XVECEXP (par, 0, 0) = GEN_INT (lane); | |
1263 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
1264 | df_insn_rescan (insn); | |
1265 | ||
1266 | if (dump_file) | |
1267 | fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn)); | |
1268 | } | |
1269 | ||
1270 | /* Given OP that contains a vector direct-splat operation, adjust the index | |
1271 | of the source lane to account for the doubleword swap. */ | |
1272 | static void | |
1273 | adjust_splat (rtx_insn *insn) | |
1274 | { | |
1275 | rtx body = PATTERN (insn); | |
1276 | rtx unspec = XEXP (body, 1); | |
1277 | int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1; | |
1278 | int lane = INTVAL (XVECEXP (unspec, 0, 1)); | |
1279 | lane = lane >= half_elts ? lane - half_elts : lane + half_elts; | |
1280 | XVECEXP (unspec, 0, 1) = GEN_INT (lane); | |
1281 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
1282 | df_insn_rescan (insn); | |
1283 | ||
1284 | if (dump_file) | |
1285 | fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn)); | |
1286 | } | |
1287 | ||
1288 | /* Given OP that contains an XXPERMDI operation (that is not a doubleword | |
1289 | swap), reverse the order of the source operands and adjust the indices | |
1290 | of the source lanes to account for doubleword reversal. */ | |
1291 | static void | |
1292 | adjust_xxpermdi (rtx_insn *insn) | |
1293 | { | |
1294 | rtx set = PATTERN (insn); | |
1295 | rtx select = XEXP (set, 1); | |
1296 | rtx concat = XEXP (select, 0); | |
1297 | rtx src0 = XEXP (concat, 0); | |
1298 | XEXP (concat, 0) = XEXP (concat, 1); | |
1299 | XEXP (concat, 1) = src0; | |
1300 | rtx parallel = XEXP (select, 1); | |
1301 | int lane0 = INTVAL (XVECEXP (parallel, 0, 0)); | |
1302 | int lane1 = INTVAL (XVECEXP (parallel, 0, 1)); | |
1303 | int new_lane0 = 3 - lane1; | |
1304 | int new_lane1 = 3 - lane0; | |
1305 | XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0); | |
1306 | XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1); | |
1307 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
1308 | df_insn_rescan (insn); | |
1309 | ||
1310 | if (dump_file) | |
1311 | fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn)); | |
1312 | } | |
1313 | ||
1314 | /* Given OP that contains a VEC_CONCAT operation of two doublewords, | |
1315 | reverse the order of those inputs. */ | |
1316 | static void | |
1317 | adjust_concat (rtx_insn *insn) | |
1318 | { | |
1319 | rtx set = PATTERN (insn); | |
1320 | rtx concat = XEXP (set, 1); | |
1321 | rtx src0 = XEXP (concat, 0); | |
1322 | XEXP (concat, 0) = XEXP (concat, 1); | |
1323 | XEXP (concat, 1) = src0; | |
1324 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
1325 | df_insn_rescan (insn); | |
1326 | ||
1327 | if (dump_file) | |
1328 | fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn)); | |
1329 | } | |
1330 | ||
1331 | /* Given an UNSPEC_VPERM insn, modify the mask loaded from the | |
1332 | constant pool to reflect swapped doublewords. */ | |
1333 | static void | |
1334 | adjust_vperm (rtx_insn *insn) | |
1335 | { | |
1336 | /* We previously determined that the UNSPEC_VPERM was fed by a | |
1337 | swap of a swapping load of a TOC-relative constant pool symbol. | |
1338 | Find the MEM in the swapping load and replace it with a MEM for | |
1339 | the adjusted mask constant. */ | |
1340 | rtx set = PATTERN (insn); | |
1341 | rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2); | |
1342 | ||
1343 | /* Find the swap. */ | |
1344 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
1345 | df_ref use; | |
1346 | rtx_insn *swap_insn = 0; | |
1347 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
1348 | if (rtx_equal_p (DF_REF_REG (use), mask_reg)) | |
1349 | { | |
1350 | struct df_link *def_link = DF_REF_CHAIN (use); | |
1351 | gcc_assert (def_link && !def_link->next); | |
1352 | swap_insn = DF_REF_INSN (def_link->ref); | |
1353 | break; | |
1354 | } | |
1355 | gcc_assert (swap_insn); | |
1356 | ||
1357 | /* Find the load. */ | |
1358 | insn_info = DF_INSN_INFO_GET (swap_insn); | |
1359 | rtx_insn *load_insn = 0; | |
1360 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
1361 | { | |
1362 | struct df_link *def_link = DF_REF_CHAIN (use); | |
1363 | gcc_assert (def_link && !def_link->next); | |
1364 | load_insn = DF_REF_INSN (def_link->ref); | |
1365 | break; | |
1366 | } | |
1367 | gcc_assert (load_insn); | |
1368 | ||
1369 | /* Find the TOC-relative symbol access. */ | |
1370 | insn_info = DF_INSN_INFO_GET (load_insn); | |
1371 | rtx_insn *tocrel_insn = 0; | |
1372 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
1373 | { | |
1374 | struct df_link *def_link = DF_REF_CHAIN (use); | |
1375 | gcc_assert (def_link && !def_link->next); | |
1376 | tocrel_insn = DF_REF_INSN (def_link->ref); | |
1377 | break; | |
1378 | } | |
1379 | gcc_assert (tocrel_insn); | |
1380 | ||
1381 | /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p | |
1382 | to set tocrel_base; otherwise it would be unnecessary as we've | |
1383 | already established it will return true. */ | |
1384 | rtx base, offset; | |
1385 | const_rtx tocrel_base; | |
1386 | rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn)); | |
1387 | /* There is an extra level of indirection for small/large code models. */ | |
2e42a52f | 1388 | if (MEM_P (tocrel_expr)) |
0dc6645f AS |
1389 | tocrel_expr = XEXP (tocrel_expr, 0); |
1390 | if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL)) | |
1391 | gcc_unreachable (); | |
1392 | split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); | |
1393 | rtx const_vector = get_pool_constant (base); | |
1394 | /* With the extra indirection, get_pool_constant will produce the | |
1395 | real constant from the reg_equal expression, so get the real | |
1396 | constant. */ | |
2e42a52f | 1397 | if (SYMBOL_REF_P (const_vector)) |
0dc6645f AS |
1398 | const_vector = get_pool_constant (const_vector); |
1399 | gcc_assert (GET_CODE (const_vector) == CONST_VECTOR); | |
1400 | ||
1401 | /* Create an adjusted mask from the initial mask. */ | |
1402 | unsigned int new_mask[16], i, val; | |
1403 | for (i = 0; i < 16; ++i) { | |
1404 | val = INTVAL (XVECEXP (const_vector, 0, i)); | |
1405 | if (val < 16) | |
1406 | new_mask[i] = (val + 8) % 16; | |
1407 | else | |
1408 | new_mask[i] = ((val + 8) % 16) + 16; | |
1409 | } | |
1410 | ||
1411 | /* Create a new CONST_VECTOR and a MEM that references it. */ | |
1412 | rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); | |
1413 | for (i = 0; i < 16; ++i) | |
1414 | XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]); | |
1415 | rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0)); | |
1416 | rtx new_mem = force_const_mem (V16QImode, new_const_vector); | |
1417 | /* This gives us a MEM whose base operand is a SYMBOL_REF, which we | |
1418 | can't recognize. Force the SYMBOL_REF into a register. */ | |
1419 | if (!REG_P (XEXP (new_mem, 0))) { | |
1420 | rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0)); | |
1421 | XEXP (new_mem, 0) = base_reg; | |
1422 | /* Move the newly created insn ahead of the load insn. */ | |
1423 | rtx_insn *force_insn = get_last_insn (); | |
1424 | remove_insn (force_insn); | |
1425 | rtx_insn *before_load_insn = PREV_INSN (load_insn); | |
1426 | add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn)); | |
1427 | df_insn_rescan (before_load_insn); | |
1428 | df_insn_rescan (force_insn); | |
1429 | } | |
1430 | ||
1431 | /* Replace the MEM in the load instruction and rescan it. */ | |
1432 | XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem; | |
1433 | INSN_CODE (load_insn) = -1; /* Force re-recognition. */ | |
1434 | df_insn_rescan (load_insn); | |
1435 | ||
1436 | if (dump_file) | |
1437 | fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn)); | |
1438 | } | |
1439 | ||
1440 | /* The insn described by INSN_ENTRY[I] can be swapped, but only | |
1441 | with special handling. Take care of that here. */ | |
1442 | static void | |
1443 | handle_special_swappables (swap_web_entry *insn_entry, unsigned i) | |
1444 | { | |
1445 | rtx_insn *insn = insn_entry[i].insn; | |
1446 | rtx body = PATTERN (insn); | |
1447 | ||
1448 | switch (insn_entry[i].special_handling) | |
1449 | { | |
1450 | default: | |
1451 | gcc_unreachable (); | |
1452 | case SH_CONST_VECTOR: | |
1453 | { | |
1454 | /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */ | |
1455 | gcc_assert (GET_CODE (body) == SET); | |
3877c560 | 1456 | swap_const_vector_halves (&SET_SRC (body)); |
0dc6645f AS |
1457 | if (dump_file) |
1458 | fprintf (dump_file, "Swapping constant halves in insn %d\n", i); | |
1459 | break; | |
1460 | } | |
1461 | case SH_SUBREG: | |
1462 | /* A subreg of the same size is already safe. For subregs that | |
1463 | select a smaller portion of a reg, adjust the index for | |
1464 | swapped doublewords. */ | |
1465 | adjust_subreg_index (body); | |
1466 | if (dump_file) | |
1467 | fprintf (dump_file, "Adjusting subreg in insn %d\n", i); | |
1468 | break; | |
1469 | case SH_NOSWAP_LD: | |
1470 | /* Convert a non-permuting load to a permuting one. */ | |
1471 | permute_load (insn); | |
1472 | break; | |
1473 | case SH_NOSWAP_ST: | |
1474 | /* Convert a non-permuting store to a permuting one. */ | |
1475 | permute_store (insn); | |
1476 | break; | |
1477 | case SH_EXTRACT: | |
1478 | /* Change the lane on an extract operation. */ | |
1479 | adjust_extract (insn); | |
1480 | break; | |
1481 | case SH_SPLAT: | |
1482 | /* Change the lane on a direct-splat operation. */ | |
1483 | adjust_splat (insn); | |
1484 | break; | |
1485 | case SH_XXPERMDI: | |
1486 | /* Change the lanes on an XXPERMDI operation. */ | |
1487 | adjust_xxpermdi (insn); | |
1488 | break; | |
1489 | case SH_CONCAT: | |
1490 | /* Reverse the order of a concatenation operation. */ | |
1491 | adjust_concat (insn); | |
1492 | break; | |
1493 | case SH_VPERM: | |
1494 | /* Change the mask loaded from the constant pool for a VPERM. */ | |
1495 | adjust_vperm (insn); | |
1496 | break; | |
1497 | } | |
1498 | } | |
1499 | ||
1500 | /* Find the insn from the Ith table entry, which is known to be a | |
1501 | register swap Y = SWAP(X). Replace it with a copy Y = X. */ | |
1502 | static void | |
1503 | replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i) | |
1504 | { | |
1505 | rtx_insn *insn = insn_entry[i].insn; | |
1506 | rtx body = PATTERN (insn); | |
1507 | rtx src_reg = XEXP (SET_SRC (body), 0); | |
1508 | rtx copy = gen_rtx_SET (SET_DEST (body), src_reg); | |
1509 | rtx_insn *new_insn = emit_insn_before (copy, insn); | |
1510 | set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn)); | |
1511 | df_insn_rescan (new_insn); | |
1512 | ||
1513 | if (dump_file) | |
1514 | { | |
1515 | unsigned int new_uid = INSN_UID (new_insn); | |
1516 | fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid); | |
1517 | } | |
1518 | ||
1519 | df_insn_delete (insn); | |
1520 | remove_insn (insn); | |
1521 | insn->set_deleted (); | |
1522 | } | |
1523 | ||
ad5f8ac1 BS |
1524 | /* INSN is known to contain a SUBREG, which we can normally handle, |
1525 | but if the SUBREG itself contains a MULT then we need to leave it alone | |
1526 | to avoid turning a mult_hipart into a mult_lopart, for example. */ | |
1527 | static bool | |
1528 | has_part_mult (rtx_insn *insn) | |
1529 | { | |
1530 | rtx body = PATTERN (insn); | |
1531 | if (GET_CODE (body) != SET) | |
1532 | return false; | |
1533 | rtx src = SET_SRC (body); | |
1534 | if (GET_CODE (src) != SUBREG) | |
1535 | return false; | |
1536 | rtx inner = XEXP (src, 0); | |
1537 | return (GET_CODE (inner) == MULT); | |
1538 | } | |
1539 | ||
a3a821c9 KN |
1540 | /* Make NEW_MEM_EXP's attributes and flags resemble those of |
1541 | ORIGINAL_MEM_EXP. */ | |
1542 | static void | |
1543 | mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp) | |
1544 | { | |
1545 | RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump); | |
1546 | RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call); | |
1547 | RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging); | |
1548 | RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil); | |
1549 | RTX_FLAG (new_mem_exp, frame_related) = | |
1550 | RTX_FLAG (original_mem_exp, frame_related); | |
1551 | ||
1552 | /* The following fields may not be used with MEM subexpressions */ | |
1553 | RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct); | |
1554 | RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val); | |
1555 | ||
1556 | struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp); | |
1557 | ||
1558 | alias_set_type set = original_attrs.alias; | |
1559 | set_mem_alias_set (new_mem_exp, set); | |
1560 | ||
1561 | addr_space_t addrspace = original_attrs.addrspace; | |
1562 | set_mem_addr_space (new_mem_exp, addrspace); | |
1563 | ||
1564 | unsigned int align = original_attrs.align; | |
1565 | set_mem_align (new_mem_exp, align); | |
1566 | ||
1567 | tree expr = original_attrs.expr; | |
1568 | set_mem_expr (new_mem_exp, expr); | |
1569 | ||
1570 | if (original_attrs.offset_known_p) | |
1571 | { | |
1572 | HOST_WIDE_INT offset = original_attrs.offset; | |
1573 | set_mem_offset (new_mem_exp, offset); | |
1574 | } | |
1575 | else | |
1576 | clear_mem_offset (new_mem_exp); | |
1577 | ||
1578 | if (original_attrs.size_known_p) | |
1579 | { | |
1580 | HOST_WIDE_INT size = original_attrs.size; | |
1581 | set_mem_size (new_mem_exp, size); | |
1582 | } | |
1583 | else | |
1584 | clear_mem_size (new_mem_exp); | |
1585 | } | |
1586 | ||
1587 | /* Generate an rtx expression to represent use of the stvx insn to store | |
1588 | the value represented by register SRC_EXP into the memory at address | |
1589 | DEST_EXP, with vector mode MODE. */ | |
1590 | rtx | |
1591 | rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) | |
1592 | { | |
a3a821c9 KN |
1593 | rtx stvx; |
1594 | ||
91d014ff PB |
1595 | if (mode == V16QImode) |
1596 | stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp); | |
1597 | else if (mode == V8HImode) | |
1598 | stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp); | |
a3a821c9 | 1599 | #ifdef HAVE_V8HFmode |
91d014ff PB |
1600 | else if (mode == V8HFmode) |
1601 | stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp); | |
a3a821c9 | 1602 | #endif |
91d014ff PB |
1603 | else if (mode == V4SImode) |
1604 | stvx = gen_altivec_stvx_v4si (src_exp, dest_exp); | |
1605 | else if (mode == V4SFmode) | |
1606 | stvx = gen_altivec_stvx_v4sf (src_exp, dest_exp); | |
1607 | else if (mode == V2DImode) | |
1608 | stvx = gen_altivec_stvx_v2di (src_exp, dest_exp); | |
1609 | else if (mode == V2DFmode) | |
1610 | stvx = gen_altivec_stvx_v2df (src_exp, dest_exp); | |
1611 | else if (mode == V1TImode) | |
1612 | stvx = gen_altivec_stvx_v1ti (src_exp, dest_exp); | |
1613 | else | |
1614 | /* KFmode, TFmode, other modes not expected in this context. */ | |
1615 | gcc_unreachable (); | |
a3a821c9 | 1616 | |
91d014ff | 1617 | rtx new_mem_exp = SET_DEST (PATTERN (stvx)); |
a3a821c9 KN |
1618 | mimic_memory_attributes_and_flags (new_mem_exp, dest_exp); |
1619 | return stvx; | |
1620 | } | |
1621 | ||
1622 | /* Given that STORE_INSN represents an aligned store-with-swap of a | |
1623 | swapped value, replace the store with an aligned store (without | |
1624 | swap) and replace the swap with a copy insn. */ | |
1625 | static void | |
1626 | replace_swapped_aligned_store (swap_web_entry *insn_entry, | |
1627 | rtx_insn *store_insn) | |
1628 | { | |
1629 | unsigned uid = INSN_UID (store_insn); | |
1630 | gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store); | |
1631 | ||
1632 | rtx body = PATTERN (store_insn); | |
1633 | rtx dest_address = XEXP (SET_DEST (body), 0); | |
1634 | rtx swap_reg = XEXP (SET_SRC (body), 0); | |
1635 | gcc_assert (REG_P (dest_address) | |
1636 | || rs6000_sum_of_two_registers_p (dest_address)); | |
1637 | ||
1638 | /* Find the swap instruction that provides the value to be stored by | |
1639 | * this store-with-swap instruction. */ | |
1640 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn); | |
1641 | df_ref use; | |
1642 | rtx_insn *swap_insn = NULL; | |
1643 | unsigned uid2 = 0; | |
1644 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
1645 | { | |
1646 | struct df_link *def_link = DF_REF_CHAIN (use); | |
1647 | ||
1648 | /* if this is not the definition of the candidate swap register, | |
1649 | then skip it. I am only interested in the swap insnd. */ | |
1650 | if (!rtx_equal_p (DF_REF_REG (use), swap_reg)) | |
1651 | continue; | |
1652 | ||
1653 | /* If there is no def or the def is artifical or there are | |
1654 | multiple defs, we should not be here. */ | |
1655 | gcc_assert (def_link && def_link->ref && !def_link->next | |
1656 | && !DF_REF_IS_ARTIFICIAL (def_link->ref)); | |
1657 | ||
1658 | swap_insn = DF_REF_INSN (def_link->ref); | |
1659 | uid2 = INSN_UID (swap_insn); | |
1660 | ||
1661 | /* If this source value is not a simple swap, we should not be here. */ | |
1662 | gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load | |
1663 | && !insn_entry[uid2].is_store); | |
1664 | ||
1665 | /* We've processed the use we care about, so break out of | |
1666 | this loop. */ | |
1667 | break; | |
1668 | } | |
1669 | ||
1670 | /* At this point, swap_insn and uid2 represent the swap instruction | |
1671 | that feeds the store. */ | |
1672 | gcc_assert (swap_insn); | |
1673 | rtx set = single_set (store_insn); | |
1674 | gcc_assert (set); | |
1675 | rtx dest_exp = SET_DEST (set); | |
1676 | rtx src_exp = XEXP (SET_SRC (body), 0); | |
1677 | enum machine_mode mode = GET_MODE (dest_exp); | |
1678 | gcc_assert (MEM_P (dest_exp)); | |
1679 | gcc_assert (MEM_ALIGN (dest_exp) >= 128); | |
1680 | ||
1681 | /* Replace the copy with a new insn. */ | |
1682 | rtx stvx; | |
1683 | stvx = rs6000_gen_stvx (mode, dest_exp, src_exp); | |
1684 | ||
1685 | rtx_insn *new_insn = emit_insn_before (stvx, store_insn); | |
1686 | rtx new_body = PATTERN (new_insn); | |
1687 | ||
1688 | gcc_assert ((GET_CODE (new_body) == SET) | |
2e42a52f | 1689 | && MEM_P (SET_DEST (new_body))); |
a3a821c9 KN |
1690 | |
1691 | set_block_for_insn (new_insn, BLOCK_FOR_INSN (store_insn)); | |
1692 | df_insn_rescan (new_insn); | |
1693 | ||
1694 | df_insn_delete (store_insn); | |
1695 | remove_insn (store_insn); | |
1696 | store_insn->set_deleted (); | |
1697 | ||
1698 | /* Replace the swap with a copy. */ | |
1699 | uid2 = INSN_UID (swap_insn); | |
1700 | mark_swaps_for_removal (insn_entry, uid2); | |
1701 | replace_swap_with_copy (insn_entry, uid2); | |
1702 | } | |
1703 | ||
1704 | /* Generate an rtx expression to represent use of the lvx insn to load | |
1705 | from memory SRC_EXP into register DEST_EXP with vector mode MODE. */ | |
1706 | rtx | |
1707 | rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) | |
1708 | { | |
a3a821c9 KN |
1709 | rtx lvx; |
1710 | ||
91d014ff PB |
1711 | if (mode == V16QImode) |
1712 | lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp); | |
1713 | else if (mode == V8HImode) | |
1714 | lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp); | |
a3a821c9 | 1715 | #ifdef HAVE_V8HFmode |
91d014ff PB |
1716 | else if (mode == V8HFmode) |
1717 | lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp); | |
a3a821c9 | 1718 | #endif |
91d014ff PB |
1719 | else if (mode == V4SImode) |
1720 | lvx = gen_altivec_lvx_v4si (dest_exp, src_exp); | |
1721 | else if (mode == V4SFmode) | |
1722 | lvx = gen_altivec_lvx_v4sf (dest_exp, src_exp); | |
1723 | else if (mode == V2DImode) | |
1724 | lvx = gen_altivec_lvx_v2di (dest_exp, src_exp); | |
1725 | else if (mode == V2DFmode) | |
1726 | lvx = gen_altivec_lvx_v2df (dest_exp, src_exp); | |
1727 | else if (mode == V1TImode) | |
1728 | lvx = gen_altivec_lvx_v1ti (dest_exp, src_exp); | |
1729 | else | |
1730 | /* KFmode, TFmode, other modes not expected in this context. */ | |
1731 | gcc_unreachable (); | |
a3a821c9 | 1732 | |
91d014ff | 1733 | rtx new_mem_exp = SET_SRC (PATTERN (lvx)); |
a3a821c9 KN |
1734 | mimic_memory_attributes_and_flags (new_mem_exp, src_exp); |
1735 | ||
1736 | return lvx; | |
1737 | } | |
1738 | ||
1739 | /* Given that SWAP_INSN represents a swap of an aligned | |
1740 | load-with-swap, replace the load with an aligned load (without | |
1741 | swap) and replace the swap with a copy insn. */ | |
1742 | static void | |
1743 | replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn) | |
1744 | { | |
1745 | /* Find the load. */ | |
1746 | unsigned uid = INSN_UID (swap_insn); | |
1747 | /* Only call this if quad_aligned_load_p (swap_insn). */ | |
1748 | gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load); | |
1749 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn); | |
1750 | ||
1751 | /* Since insn is known to represent a swap instruction, we know it | |
1752 | "uses" only one input variable. */ | |
1753 | df_ref use = DF_INSN_INFO_USES (insn_info); | |
1754 | ||
1755 | /* Figure out where this input variable is defined. */ | |
1756 | struct df_link *def_link = DF_REF_CHAIN (use); | |
1757 | gcc_assert (def_link && !def_link->next); | |
1758 | gcc_assert (def_link && def_link->ref && | |
1759 | !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next); | |
1760 | ||
1761 | rtx_insn *def_insn = DF_REF_INSN (def_link->ref); | |
1762 | unsigned uid2 = INSN_UID (def_insn); | |
1763 | ||
1764 | /* We're expecting a load-with-swap insn. */ | |
1765 | gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap); | |
1766 | ||
1767 | /* We expect this to be a set to memory, with source representing a | |
1768 | swap (indicated by code VEC_SELECT). */ | |
1769 | rtx body = PATTERN (def_insn); | |
1770 | gcc_assert ((GET_CODE (body) == SET) | |
f700e4b0 XL |
1771 | && (GET_CODE (SET_SRC (body)) == VEC_SELECT |
1772 | || pattern_is_rotate64 (body)) | |
2e42a52f | 1773 | && MEM_P (XEXP (SET_SRC (body), 0))); |
a3a821c9 KN |
1774 | |
1775 | rtx src_exp = XEXP (SET_SRC (body), 0); | |
1776 | enum machine_mode mode = GET_MODE (src_exp); | |
1777 | rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp); | |
1778 | ||
1779 | rtx_insn *new_insn = emit_insn_before (lvx, def_insn); | |
1780 | rtx new_body = PATTERN (new_insn); | |
1781 | ||
1782 | gcc_assert ((GET_CODE (new_body) == SET) | |
2e42a52f | 1783 | && MEM_P (SET_SRC (new_body))); |
a3a821c9 KN |
1784 | |
1785 | set_block_for_insn (new_insn, BLOCK_FOR_INSN (def_insn)); | |
1786 | df_insn_rescan (new_insn); | |
1787 | ||
1788 | df_insn_delete (def_insn); | |
1789 | remove_insn (def_insn); | |
1790 | def_insn->set_deleted (); | |
1791 | ||
1792 | /* Replace the swap with a copy. */ | |
1793 | mark_swaps_for_removal (insn_entry, uid); | |
1794 | replace_swap_with_copy (insn_entry, uid); | |
1795 | } | |
1796 | ||
1797 | /* Given that SWAP_INSN represents a swap of a load of a constant | |
6e0cc90b KN |
1798 | vector value, replace with a single instruction that loads a |
1799 | swapped variant of the original constant. | |
1800 | ||
1801 | The "natural" representation of a byte array in memory is the same | |
1802 | for big endian and little endian. | |
1803 | ||
1804 | unsigned char byte_array[] = | |
1805 | { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }; | |
1806 | ||
1807 | However, when loaded into a vector register, the representation | |
1808 | depends on endian conventions. | |
1809 | ||
1810 | In big-endian mode, the register holds: | |
1811 | ||
1812 | MSB LSB | |
1813 | [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ] | |
1814 | ||
1815 | In little-endian mode, the register holds: | |
1816 | ||
1817 | MSB LSB | |
1818 | [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ] | |
1819 | ||
1820 | Word arrays require different handling. Consider the word array: | |
1821 | ||
1822 | unsigned int word_array[] = | |
1823 | { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f }; | |
1824 | ||
1825 | The in-memory representation depends on endian configuration. The | |
1826 | equivalent array, declared as a byte array, in memory would be: | |
1827 | ||
1828 | unsigned char big_endian_word_array_data[] = | |
1829 | { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f } | |
1830 | ||
1831 | unsigned char little_endian_word_array_data[] = | |
1832 | { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c } | |
1833 | ||
1834 | In big-endian mode, the register holds: | |
1835 | ||
1836 | MSB LSB | |
1837 | [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ] | |
1838 | ||
1839 | In little-endian mode, the register holds: | |
1840 | ||
1841 | MSB LSB | |
1842 | [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ] | |
1843 | ||
1844 | ||
1845 | Similar transformations apply to the vector of half-word and vector | |
1846 | of double-word representations. | |
1847 | ||
1848 | For now, don't handle vectors of quad-precision values. Just return. | |
1849 | A better solution is to fix the code generator to emit lvx/stvx for | |
1850 | those. */ | |
1851 | static void | |
1852 | replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn) | |
1853 | { | |
1854 | /* Find the load. */ | |
1855 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn); | |
1856 | rtx_insn *load_insn; | |
1857 | df_ref use = DF_INSN_INFO_USES (insn_info); | |
1858 | struct df_link *def_link = DF_REF_CHAIN (use); | |
1859 | gcc_assert (def_link && !def_link->next); | |
1860 | ||
1861 | load_insn = DF_REF_INSN (def_link->ref); | |
1862 | gcc_assert (load_insn); | |
1863 | ||
1864 | /* Find the TOC-relative symbol access. */ | |
1865 | insn_info = DF_INSN_INFO_GET (load_insn); | |
1866 | use = DF_INSN_INFO_USES (insn_info); | |
1867 | ||
1868 | def_link = DF_REF_CHAIN (use); | |
1869 | gcc_assert (def_link && !def_link->next); | |
1870 | ||
1871 | rtx_insn *tocrel_insn = DF_REF_INSN (def_link->ref); | |
1872 | gcc_assert (tocrel_insn); | |
1873 | ||
1874 | /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p | |
1875 | to set tocrel_base; otherwise it would be unnecessary as we've | |
1876 | already established it will return true. */ | |
1877 | rtx base, offset; | |
1878 | rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn)); | |
1879 | const_rtx tocrel_base; | |
1880 | ||
1881 | /* There is an extra level of indirection for small/large code models. */ | |
2e42a52f | 1882 | if (MEM_P (tocrel_expr)) |
6e0cc90b KN |
1883 | tocrel_expr = XEXP (tocrel_expr, 0); |
1884 | ||
1885 | if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL)) | |
1886 | gcc_unreachable (); | |
1887 | ||
1888 | split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); | |
1889 | rtx const_vector = get_pool_constant (base); | |
1890 | ||
1891 | /* With the extra indirection, get_pool_constant will produce the | |
1892 | real constant from the reg_equal expression, so get the real | |
1893 | constant. */ | |
2e42a52f | 1894 | if (SYMBOL_REF_P (const_vector)) |
6e0cc90b KN |
1895 | const_vector = get_pool_constant (const_vector); |
1896 | gcc_assert (GET_CODE (const_vector) == CONST_VECTOR); | |
1897 | ||
1898 | rtx new_mem; | |
1899 | enum machine_mode mode = GET_MODE (const_vector); | |
1900 | ||
1901 | /* Create an adjusted constant from the original constant. */ | |
1902 | if (mode == V1TImode) | |
1903 | /* Leave this code as is. */ | |
1904 | return; | |
1905 | else if (mode == V16QImode) | |
1906 | { | |
1907 | rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (16)); | |
1908 | int i; | |
1909 | ||
1910 | for (i = 0; i < 16; i++) | |
1911 | XVECEXP (vals, 0, ((i+8) % 16)) = XVECEXP (const_vector, 0, i); | |
1912 | rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); | |
1913 | new_mem = force_const_mem (mode, new_const_vector); | |
1914 | } | |
1915 | else if ((mode == V8HImode) | |
1916 | #ifdef HAVE_V8HFmode | |
1917 | || (mode == V8HFmode) | |
1918 | #endif | |
1919 | ) | |
1920 | { | |
1921 | rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8)); | |
1922 | int i; | |
1923 | ||
1924 | for (i = 0; i < 8; i++) | |
1925 | XVECEXP (vals, 0, ((i+4) % 8)) = XVECEXP (const_vector, 0, i); | |
1926 | rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); | |
1927 | new_mem = force_const_mem (mode, new_const_vector); | |
1928 | } | |
1929 | else if ((mode == V4SImode) || (mode == V4SFmode)) | |
1930 | { | |
1931 | rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (4)); | |
1932 | int i; | |
1933 | ||
1934 | for (i = 0; i < 4; i++) | |
1935 | XVECEXP (vals, 0, ((i+2) % 4)) = XVECEXP (const_vector, 0, i); | |
1936 | rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); | |
1937 | new_mem = force_const_mem (mode, new_const_vector); | |
1938 | } | |
1939 | else if ((mode == V2DImode) || (mode == V2DFmode)) | |
1940 | { | |
1941 | rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (2)); | |
1942 | int i; | |
1943 | ||
1944 | for (i = 0; i < 2; i++) | |
1945 | XVECEXP (vals, 0, ((i+1) % 2)) = XVECEXP (const_vector, 0, i); | |
1946 | rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); | |
1947 | new_mem = force_const_mem (mode, new_const_vector); | |
1948 | } | |
1949 | else | |
1950 | { | |
1951 | /* We do not expect other modes to be constant-load-swapped. */ | |
1952 | gcc_unreachable (); | |
1953 | } | |
1954 | ||
1955 | /* This gives us a MEM whose base operand is a SYMBOL_REF, which we | |
1956 | can't recognize. Force the SYMBOL_REF into a register. */ | |
1957 | if (!REG_P (XEXP (new_mem, 0))) { | |
1958 | rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0)); | |
1959 | XEXP (new_mem, 0) = base_reg; | |
1960 | ||
1961 | /* Move the newly created insn ahead of the load insn. */ | |
700d4cb0 | 1962 | /* The last insn is the insn that forced new_mem into a register. */ |
6e0cc90b KN |
1963 | rtx_insn *force_insn = get_last_insn (); |
1964 | /* Remove this insn from the end of the instruction sequence. */ | |
1965 | remove_insn (force_insn); | |
1966 | rtx_insn *before_load_insn = PREV_INSN (load_insn); | |
1967 | ||
1968 | /* And insert this insn back into the sequence before the previous | |
1969 | load insn so this new expression will be available when the | |
1970 | existing load is modified to load the swapped constant. */ | |
1971 | add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn)); | |
1972 | df_insn_rescan (before_load_insn); | |
1973 | df_insn_rescan (force_insn); | |
1974 | } | |
1975 | ||
1976 | /* Replace the MEM in the load instruction and rescan it. */ | |
1977 | XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem; | |
1978 | INSN_CODE (load_insn) = -1; /* Force re-recognition. */ | |
1979 | df_insn_rescan (load_insn); | |
1980 | ||
1981 | unsigned int uid = INSN_UID (swap_insn); | |
1982 | mark_swaps_for_removal (insn_entry, uid); | |
1983 | replace_swap_with_copy (insn_entry, uid); | |
1984 | } | |
1985 | ||
0dc6645f AS |
1986 | /* Dump the swap table to DUMP_FILE. */ |
1987 | static void | |
1988 | dump_swap_insn_table (swap_web_entry *insn_entry) | |
1989 | { | |
1990 | int e = get_max_uid (); | |
1991 | fprintf (dump_file, "\nRelevant insns with their flag settings\n\n"); | |
1992 | ||
1993 | for (int i = 0; i < e; ++i) | |
1994 | if (insn_entry[i].is_relevant) | |
1995 | { | |
1996 | swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred (); | |
1997 | fprintf (dump_file, "%6d %6d ", i, | |
1998 | pred_entry && pred_entry->insn | |
1999 | ? INSN_UID (pred_entry->insn) : 0); | |
2000 | if (insn_entry[i].is_load) | |
2001 | fputs ("load ", dump_file); | |
2002 | if (insn_entry[i].is_store) | |
2003 | fputs ("store ", dump_file); | |
2004 | if (insn_entry[i].is_swap) | |
2005 | fputs ("swap ", dump_file); | |
2006 | if (insn_entry[i].is_live_in) | |
2007 | fputs ("live-in ", dump_file); | |
2008 | if (insn_entry[i].is_live_out) | |
2009 | fputs ("live-out ", dump_file); | |
2010 | if (insn_entry[i].contains_subreg) | |
2011 | fputs ("subreg ", dump_file); | |
2012 | if (insn_entry[i].is_128_int) | |
2013 | fputs ("int128 ", dump_file); | |
2014 | if (insn_entry[i].is_call) | |
2015 | fputs ("call ", dump_file); | |
2016 | if (insn_entry[i].is_swappable) | |
2017 | { | |
2018 | fputs ("swappable ", dump_file); | |
2019 | if (insn_entry[i].special_handling == SH_CONST_VECTOR) | |
2020 | fputs ("special:constvec ", dump_file); | |
2021 | else if (insn_entry[i].special_handling == SH_SUBREG) | |
2022 | fputs ("special:subreg ", dump_file); | |
2023 | else if (insn_entry[i].special_handling == SH_NOSWAP_LD) | |
2024 | fputs ("special:load ", dump_file); | |
2025 | else if (insn_entry[i].special_handling == SH_NOSWAP_ST) | |
2026 | fputs ("special:store ", dump_file); | |
2027 | else if (insn_entry[i].special_handling == SH_EXTRACT) | |
2028 | fputs ("special:extract ", dump_file); | |
2029 | else if (insn_entry[i].special_handling == SH_SPLAT) | |
2030 | fputs ("special:splat ", dump_file); | |
2031 | else if (insn_entry[i].special_handling == SH_XXPERMDI) | |
2032 | fputs ("special:xxpermdi ", dump_file); | |
2033 | else if (insn_entry[i].special_handling == SH_CONCAT) | |
2034 | fputs ("special:concat ", dump_file); | |
2035 | else if (insn_entry[i].special_handling == SH_VPERM) | |
2036 | fputs ("special:vperm ", dump_file); | |
2037 | } | |
2038 | if (insn_entry[i].web_not_optimizable) | |
2039 | fputs ("unoptimizable ", dump_file); | |
2040 | if (insn_entry[i].will_delete) | |
2041 | fputs ("delete ", dump_file); | |
2042 | fputs ("\n", dump_file); | |
2043 | } | |
2044 | fputs ("\n", dump_file); | |
2045 | } | |
2046 | ||
2047 | /* Return RTX with its address canonicalized to (reg) or (+ reg reg). | |
2048 | Here RTX is an (& addr (const_int -16)). Always return a new copy | |
2049 | to avoid problems with combine. */ | |
2050 | static rtx | |
2051 | alignment_with_canonical_addr (rtx align) | |
2052 | { | |
2053 | rtx canon; | |
2054 | rtx addr = XEXP (align, 0); | |
2055 | ||
2056 | if (REG_P (addr)) | |
2057 | canon = addr; | |
2058 | ||
2059 | else if (GET_CODE (addr) == PLUS) | |
2060 | { | |
2061 | rtx addrop0 = XEXP (addr, 0); | |
2062 | rtx addrop1 = XEXP (addr, 1); | |
2063 | ||
2064 | if (!REG_P (addrop0)) | |
2065 | addrop0 = force_reg (GET_MODE (addrop0), addrop0); | |
2066 | ||
2067 | if (!REG_P (addrop1)) | |
2068 | addrop1 = force_reg (GET_MODE (addrop1), addrop1); | |
2069 | ||
2070 | canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1); | |
2071 | } | |
2072 | ||
2073 | else | |
2074 | canon = force_reg (GET_MODE (addr), addr); | |
2075 | ||
2076 | return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16)); | |
2077 | } | |
2078 | ||
2079 | /* Check whether an rtx is an alignment mask, and if so, return | |
2080 | a fully-expanded rtx for the masking operation. */ | |
2081 | static rtx | |
2082 | alignment_mask (rtx_insn *insn) | |
2083 | { | |
2084 | rtx body = PATTERN (insn); | |
2085 | ||
2086 | if (GET_CODE (body) != SET | |
2087 | || GET_CODE (SET_SRC (body)) != AND | |
2088 | || !REG_P (XEXP (SET_SRC (body), 0))) | |
2089 | return 0; | |
2090 | ||
2091 | rtx mask = XEXP (SET_SRC (body), 1); | |
2092 | ||
2e42a52f | 2093 | if (CONST_INT_P (mask)) |
0dc6645f AS |
2094 | { |
2095 | if (INTVAL (mask) == -16) | |
2096 | return alignment_with_canonical_addr (SET_SRC (body)); | |
2097 | else | |
2098 | return 0; | |
2099 | } | |
2100 | ||
2101 | if (!REG_P (mask)) | |
2102 | return 0; | |
2103 | ||
2104 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
2105 | df_ref use; | |
2106 | rtx real_mask = 0; | |
2107 | ||
2108 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
2109 | { | |
2110 | if (!rtx_equal_p (DF_REF_REG (use), mask)) | |
2111 | continue; | |
2112 | ||
2113 | struct df_link *def_link = DF_REF_CHAIN (use); | |
2114 | if (!def_link || def_link->next) | |
2115 | return 0; | |
2116 | ||
2117 | rtx_insn *const_insn = DF_REF_INSN (def_link->ref); | |
2118 | rtx const_body = PATTERN (const_insn); | |
2119 | if (GET_CODE (const_body) != SET) | |
2120 | return 0; | |
2121 | ||
2122 | real_mask = SET_SRC (const_body); | |
2123 | ||
2e42a52f | 2124 | if (!CONST_INT_P (real_mask) |
0dc6645f AS |
2125 | || INTVAL (real_mask) != -16) |
2126 | return 0; | |
2127 | } | |
2128 | ||
2129 | if (real_mask == 0) | |
2130 | return 0; | |
2131 | ||
2132 | return alignment_with_canonical_addr (SET_SRC (body)); | |
2133 | } | |
2134 | ||
e0d98ffd KL |
2135 | /* Given INSN that's a load or store based at BASE_REG, check if |
2136 | all of its feeding computations align its address on a 16-byte | |
2137 | boundary. If so, return true and add all definition insns into | |
2138 | AND_INSNS and their corresponding fully-expanded rtxes for the | |
2139 | masking operations into AND_OPS. */ | |
2140 | ||
2141 | static bool | |
2142 | find_alignment_op (rtx_insn *insn, rtx base_reg, vec<rtx_insn *> *and_insns, | |
2143 | vec<rtx> *and_ops) | |
0dc6645f AS |
2144 | { |
2145 | df_ref base_use; | |
2146 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
2147 | rtx and_operation = 0; | |
2148 | ||
2149 | FOR_EACH_INSN_INFO_USE (base_use, insn_info) | |
2150 | { | |
2151 | if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) | |
2152 | continue; | |
2153 | ||
2154 | struct df_link *base_def_link = DF_REF_CHAIN (base_use); | |
e0d98ffd KL |
2155 | if (!base_def_link) |
2156 | return false; | |
0dc6645f | 2157 | |
e0d98ffd KL |
2158 | while (base_def_link) |
2159 | { | |
2160 | /* With stack-protector code enabled, and possibly in other | |
2161 | circumstances, there may not be an associated insn for | |
2162 | the def. */ | |
2163 | if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) | |
2164 | return false; | |
0dc6645f | 2165 | |
e0d98ffd KL |
2166 | rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref); |
2167 | and_operation = alignment_mask (and_insn); | |
2168 | ||
2169 | /* Stop if we find any one which doesn't align. */ | |
2170 | if (!and_operation) | |
2171 | return false; | |
2172 | ||
2173 | and_insns->safe_push (and_insn); | |
2174 | and_ops->safe_push (and_operation); | |
2175 | base_def_link = base_def_link->next; | |
2176 | } | |
0dc6645f AS |
2177 | } |
2178 | ||
2179 | return and_operation; | |
2180 | } | |
2181 | ||
2182 | struct del_info { bool replace; rtx_insn *replace_insn; }; | |
2183 | ||
2184 | /* If INSN is the load for an lvx pattern, put it in canonical form. */ | |
2185 | static void | |
2186 | recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete) | |
2187 | { | |
2188 | rtx body = PATTERN (insn); | |
2189 | gcc_assert (GET_CODE (body) == SET | |
f700e4b0 XL |
2190 | && (GET_CODE (SET_SRC (body)) == VEC_SELECT |
2191 | || pattern_is_rotate64 (body)) | |
2e42a52f | 2192 | && MEM_P (XEXP (SET_SRC (body), 0))); |
0dc6645f AS |
2193 | |
2194 | rtx mem = XEXP (SET_SRC (body), 0); | |
2195 | rtx base_reg = XEXP (mem, 0); | |
2196 | ||
e0d98ffd KL |
2197 | auto_vec<rtx_insn *> and_insns; |
2198 | auto_vec<rtx> and_ops; | |
2199 | bool is_any_def_and | |
2200 | = find_alignment_op (insn, base_reg, &and_insns, &and_ops); | |
0dc6645f | 2201 | |
e0d98ffd | 2202 | if (is_any_def_and) |
0dc6645f | 2203 | { |
e0d98ffd | 2204 | gcc_assert (and_insns.length () == and_ops.length ()); |
0dc6645f AS |
2205 | df_ref def; |
2206 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
2207 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
2208 | { | |
2209 | struct df_link *link = DF_REF_CHAIN (def); | |
2210 | if (!link || link->next) | |
2211 | break; | |
2212 | ||
2213 | rtx_insn *swap_insn = DF_REF_INSN (link->ref); | |
2214 | if (!insn_is_swap_p (swap_insn) | |
2215 | || insn_is_load_p (swap_insn) | |
2216 | || insn_is_store_p (swap_insn)) | |
2217 | break; | |
2218 | ||
2219 | /* Expected lvx pattern found. Change the swap to | |
2220 | a copy, and propagate the AND operation into the | |
2221 | load. */ | |
2222 | to_delete[INSN_UID (swap_insn)].replace = true; | |
2223 | to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; | |
2224 | ||
e0d98ffd KL |
2225 | rtx new_reg = 0; |
2226 | rtx and_mask = 0; | |
2227 | for (unsigned i = 0; i < and_insns.length (); i++) | |
2228 | { | |
2229 | /* However, first we must be sure that we make the | |
2230 | base register from the AND operation available | |
2231 | in case the register has been overwritten. Copy | |
2232 | the base register to a new pseudo and use that | |
2233 | as the base register of the AND operation in | |
2234 | the new LVX instruction. */ | |
2235 | rtx_insn *and_insn = and_insns[i]; | |
2236 | rtx and_op = and_ops[i]; | |
2237 | rtx and_base = XEXP (and_op, 0); | |
2238 | if (!new_reg) | |
2239 | { | |
2240 | new_reg = gen_reg_rtx (GET_MODE (and_base)); | |
2241 | and_mask = XEXP (and_op, 1); | |
2242 | } | |
2243 | rtx copy = gen_rtx_SET (new_reg, and_base); | |
2244 | rtx_insn *new_insn = emit_insn_after (copy, and_insn); | |
2245 | set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); | |
2246 | df_insn_rescan (new_insn); | |
2247 | } | |
2248 | ||
2249 | XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask); | |
0dc6645f AS |
2250 | SET_SRC (body) = mem; |
2251 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
2252 | df_insn_rescan (insn); | |
e0d98ffd | 2253 | |
0dc6645f AS |
2254 | if (dump_file) |
2255 | fprintf (dump_file, "lvx opportunity found at %d\n", | |
2256 | INSN_UID (insn)); | |
2257 | } | |
2258 | } | |
2259 | } | |
2260 | ||
2261 | /* If INSN is the store for an stvx pattern, put it in canonical form. */ | |
2262 | static void | |
2263 | recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) | |
2264 | { | |
2265 | rtx body = PATTERN (insn); | |
2266 | gcc_assert (GET_CODE (body) == SET | |
2e42a52f | 2267 | && MEM_P (SET_DEST (body)) |
f700e4b0 XL |
2268 | && (GET_CODE (SET_SRC (body)) == VEC_SELECT |
2269 | || pattern_is_rotate64 (body))); | |
0dc6645f AS |
2270 | rtx mem = SET_DEST (body); |
2271 | rtx base_reg = XEXP (mem, 0); | |
2272 | ||
e0d98ffd KL |
2273 | auto_vec<rtx_insn *> and_insns; |
2274 | auto_vec<rtx> and_ops; | |
2275 | bool is_any_def_and | |
2276 | = find_alignment_op (insn, base_reg, &and_insns, &and_ops); | |
0dc6645f | 2277 | |
e0d98ffd | 2278 | if (is_any_def_and) |
0dc6645f | 2279 | { |
e0d98ffd | 2280 | gcc_assert (and_insns.length () == and_ops.length ()); |
0dc6645f AS |
2281 | rtx src_reg = XEXP (SET_SRC (body), 0); |
2282 | df_ref src_use; | |
2283 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
2284 | FOR_EACH_INSN_INFO_USE (src_use, insn_info) | |
2285 | { | |
2286 | if (!rtx_equal_p (DF_REF_REG (src_use), src_reg)) | |
2287 | continue; | |
2288 | ||
2289 | struct df_link *link = DF_REF_CHAIN (src_use); | |
2290 | if (!link || link->next) | |
2291 | break; | |
2292 | ||
2293 | rtx_insn *swap_insn = DF_REF_INSN (link->ref); | |
2294 | if (!insn_is_swap_p (swap_insn) | |
2295 | || insn_is_load_p (swap_insn) | |
2296 | || insn_is_store_p (swap_insn)) | |
2297 | break; | |
2298 | ||
2299 | /* Expected stvx pattern found. Change the swap to | |
2300 | a copy, and propagate the AND operation into the | |
2301 | store. */ | |
2302 | to_delete[INSN_UID (swap_insn)].replace = true; | |
2303 | to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; | |
2304 | ||
e0d98ffd KL |
2305 | rtx new_reg = 0; |
2306 | rtx and_mask = 0; | |
2307 | for (unsigned i = 0; i < and_insns.length (); i++) | |
2308 | { | |
2309 | /* However, first we must be sure that we make the | |
2310 | base register from the AND operation available | |
2311 | in case the register has been overwritten. Copy | |
2312 | the base register to a new pseudo and use that | |
2313 | as the base register of the AND operation in | |
2314 | the new STVX instruction. */ | |
2315 | rtx_insn *and_insn = and_insns[i]; | |
2316 | rtx and_op = and_ops[i]; | |
2317 | rtx and_base = XEXP (and_op, 0); | |
2318 | if (!new_reg) | |
2319 | { | |
2320 | new_reg = gen_reg_rtx (GET_MODE (and_base)); | |
2321 | and_mask = XEXP (and_op, 1); | |
2322 | } | |
2323 | rtx copy = gen_rtx_SET (new_reg, and_base); | |
2324 | rtx_insn *new_insn = emit_insn_after (copy, and_insn); | |
2325 | set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); | |
2326 | df_insn_rescan (new_insn); | |
2327 | } | |
2328 | ||
2329 | XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask); | |
0dc6645f AS |
2330 | SET_SRC (body) = src_reg; |
2331 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
2332 | df_insn_rescan (insn); | |
e0d98ffd | 2333 | |
0dc6645f AS |
2334 | if (dump_file) |
2335 | fprintf (dump_file, "stvx opportunity found at %d\n", | |
2336 | INSN_UID (insn)); | |
2337 | } | |
2338 | } | |
2339 | } | |
2340 | ||
2341 | /* Look for patterns created from builtin lvx and stvx calls, and | |
2342 | canonicalize them to be properly recognized as such. */ | |
2343 | static void | |
2344 | recombine_lvx_stvx_patterns (function *fun) | |
2345 | { | |
2346 | int i; | |
2347 | basic_block bb; | |
2348 | rtx_insn *insn; | |
2349 | ||
2350 | int num_insns = get_max_uid (); | |
2351 | del_info *to_delete = XCNEWVEC (del_info, num_insns); | |
2352 | ||
2353 | FOR_ALL_BB_FN (bb, fun) | |
2354 | FOR_BB_INSNS (bb, insn) | |
2355 | { | |
2356 | if (!NONDEBUG_INSN_P (insn)) | |
2357 | continue; | |
2358 | ||
2359 | if (insn_is_load_p (insn) && insn_is_swap_p (insn)) | |
2360 | recombine_lvx_pattern (insn, to_delete); | |
2361 | else if (insn_is_store_p (insn) && insn_is_swap_p (insn)) | |
2362 | recombine_stvx_pattern (insn, to_delete); | |
2363 | } | |
2364 | ||
2365 | /* Turning swaps into copies is delayed until now, to avoid problems | |
2366 | with deleting instructions during the insn walk. */ | |
2367 | for (i = 0; i < num_insns; i++) | |
2368 | if (to_delete[i].replace) | |
2369 | { | |
2370 | rtx swap_body = PATTERN (to_delete[i].replace_insn); | |
2371 | rtx src_reg = XEXP (SET_SRC (swap_body), 0); | |
2372 | rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg); | |
2373 | rtx_insn *new_insn = emit_insn_before (copy, | |
2374 | to_delete[i].replace_insn); | |
2375 | set_block_for_insn (new_insn, | |
2376 | BLOCK_FOR_INSN (to_delete[i].replace_insn)); | |
2377 | df_insn_rescan (new_insn); | |
2378 | df_insn_delete (to_delete[i].replace_insn); | |
2379 | remove_insn (to_delete[i].replace_insn); | |
2380 | to_delete[i].replace_insn->set_deleted (); | |
2381 | } | |
2382 | ||
2383 | free (to_delete); | |
2384 | } | |
2385 | ||
2386 | /* Main entry point for this pass. */ | |
2387 | unsigned int | |
2388 | rs6000_analyze_swaps (function *fun) | |
2389 | { | |
2390 | swap_web_entry *insn_entry; | |
2391 | basic_block bb; | |
2392 | rtx_insn *insn, *curr_insn = 0; | |
2393 | ||
2394 | /* Dataflow analysis for use-def chains. */ | |
2395 | df_set_flags (DF_RD_PRUNE_DEAD_DEFS); | |
2396 | df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); | |
2397 | df_analyze (); | |
2398 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
2399 | ||
2400 | /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */ | |
2401 | recombine_lvx_stvx_patterns (fun); | |
792deebf BS |
2402 | |
2403 | /* Rebuild ud- and du-chains. */ | |
2404 | df_remove_problem (df_chain); | |
3395b656 | 2405 | df_process_deferred_rescans (); |
792deebf BS |
2406 | df_set_flags (DF_RD_PRUNE_DEAD_DEFS); |
2407 | df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); | |
2408 | df_analyze (); | |
2409 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
0dc6645f AS |
2410 | |
2411 | /* Allocate structure to represent webs of insns. */ | |
2412 | insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); | |
2413 | ||
2414 | /* Walk the insns to gather basic data. */ | |
2415 | FOR_ALL_BB_FN (bb, fun) | |
2416 | FOR_BB_INSNS_SAFE (bb, insn, curr_insn) | |
2417 | { | |
2418 | unsigned int uid = INSN_UID (insn); | |
2419 | if (NONDEBUG_INSN_P (insn)) | |
2420 | { | |
2421 | insn_entry[uid].insn = insn; | |
2422 | ||
2423 | if (GET_CODE (insn) == CALL_INSN) | |
2424 | insn_entry[uid].is_call = 1; | |
2425 | ||
2426 | /* Walk the uses and defs to see if we mention vector regs. | |
2427 | Record any constraints on optimization of such mentions. */ | |
2428 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
2429 | df_ref mention; | |
2430 | FOR_EACH_INSN_INFO_USE (mention, insn_info) | |
2431 | { | |
2432 | /* We use DF_REF_REAL_REG here to get inside any subregs. */ | |
2433 | machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); | |
2434 | ||
2435 | /* If a use gets its value from a call insn, it will be | |
2436 | a hard register and will look like (reg:V4SI 3 3). | |
2437 | The df analysis creates two mentions for GPR3 and GPR4, | |
2438 | both DImode. We must recognize this and treat it as a | |
2439 | vector mention to ensure the call is unioned with this | |
2440 | use. */ | |
2441 | if (mode == DImode && DF_REF_INSN_INFO (mention)) | |
2442 | { | |
2443 | rtx feeder = DF_REF_INSN (mention); | |
2444 | /* FIXME: It is pretty hard to get from the df mention | |
2445 | to the mode of the use in the insn. We arbitrarily | |
2446 | pick a vector mode here, even though the use might | |
2447 | be a real DImode. We can be too conservative | |
2448 | (create a web larger than necessary) because of | |
2449 | this, so consider eventually fixing this. */ | |
2450 | if (GET_CODE (feeder) == CALL_INSN) | |
2451 | mode = V4SImode; | |
2452 | } | |
2453 | ||
2454 | if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode) | |
2455 | { | |
2456 | insn_entry[uid].is_relevant = 1; | |
2457 | if (mode == TImode || mode == V1TImode | |
2458 | || FLOAT128_VECTOR_P (mode)) | |
2459 | insn_entry[uid].is_128_int = 1; | |
2460 | if (DF_REF_INSN_INFO (mention)) | |
2461 | insn_entry[uid].contains_subreg | |
2462 | = !rtx_equal_p (DF_REF_REG (mention), | |
2463 | DF_REF_REAL_REG (mention)); | |
2464 | union_defs (insn_entry, insn, mention); | |
2465 | } | |
2466 | } | |
2467 | FOR_EACH_INSN_INFO_DEF (mention, insn_info) | |
2468 | { | |
2469 | /* We use DF_REF_REAL_REG here to get inside any subregs. */ | |
2470 | machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); | |
2471 | ||
2472 | /* If we're loading up a hard vector register for a call, | |
2473 | it looks like (set (reg:V4SI 9 9) (...)). The df | |
2474 | analysis creates two mentions for GPR9 and GPR10, both | |
2475 | DImode. So relying on the mode from the mentions | |
2476 | isn't sufficient to ensure we union the call into the | |
2477 | web with the parameter setup code. */ | |
2478 | if (mode == DImode && GET_CODE (insn) == SET | |
2479 | && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn)))) | |
2480 | mode = GET_MODE (SET_DEST (insn)); | |
2481 | ||
2482 | if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode) | |
2483 | { | |
2484 | insn_entry[uid].is_relevant = 1; | |
2485 | if (mode == TImode || mode == V1TImode | |
2486 | || FLOAT128_VECTOR_P (mode)) | |
2487 | insn_entry[uid].is_128_int = 1; | |
2488 | if (DF_REF_INSN_INFO (mention)) | |
2489 | insn_entry[uid].contains_subreg | |
2490 | = !rtx_equal_p (DF_REF_REG (mention), | |
2491 | DF_REF_REAL_REG (mention)); | |
2492 | /* REG_FUNCTION_VALUE_P is not valid for subregs. */ | |
2493 | else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention))) | |
2494 | insn_entry[uid].is_live_out = 1; | |
2495 | union_uses (insn_entry, insn, mention); | |
2496 | } | |
2497 | } | |
2498 | ||
2499 | if (insn_entry[uid].is_relevant) | |
2500 | { | |
2501 | /* Determine if this is a load or store. */ | |
2502 | insn_entry[uid].is_load = insn_is_load_p (insn); | |
2503 | insn_entry[uid].is_store = insn_is_store_p (insn); | |
2504 | ||
2505 | /* Determine if this is a doubleword swap. If not, | |
2506 | determine whether it can legally be swapped. */ | |
2507 | if (insn_is_swap_p (insn)) | |
2508 | insn_entry[uid].is_swap = 1; | |
2509 | else | |
2510 | { | |
2511 | unsigned int special = SH_NONE; | |
2512 | insn_entry[uid].is_swappable | |
2513 | = insn_is_swappable_p (insn_entry, insn, &special); | |
2514 | if (special != SH_NONE && insn_entry[uid].contains_subreg) | |
2515 | insn_entry[uid].is_swappable = 0; | |
2516 | else if (special != SH_NONE) | |
2517 | insn_entry[uid].special_handling = special; | |
ad5f8ac1 BS |
2518 | else if (insn_entry[uid].contains_subreg |
2519 | && has_part_mult (insn)) | |
2520 | insn_entry[uid].is_swappable = 0; | |
0dc6645f AS |
2521 | else if (insn_entry[uid].contains_subreg) |
2522 | insn_entry[uid].special_handling = SH_SUBREG; | |
2523 | } | |
2524 | } | |
2525 | } | |
2526 | } | |
2527 | ||
2528 | if (dump_file) | |
2529 | { | |
2530 | fprintf (dump_file, "\nSwap insn entry table when first built\n"); | |
2531 | dump_swap_insn_table (insn_entry); | |
2532 | } | |
2533 | ||
2534 | /* Record unoptimizable webs. */ | |
2535 | unsigned e = get_max_uid (), i; | |
2536 | for (i = 0; i < e; ++i) | |
2537 | { | |
2538 | if (!insn_entry[i].is_relevant) | |
2539 | continue; | |
2540 | ||
2541 | swap_web_entry *root | |
2542 | = (swap_web_entry*)(&insn_entry[i])->unionfind_root (); | |
2543 | ||
2544 | if (insn_entry[i].is_live_in || insn_entry[i].is_live_out | |
2545 | || (insn_entry[i].contains_subreg | |
2546 | && insn_entry[i].special_handling != SH_SUBREG) | |
2547 | || insn_entry[i].is_128_int || insn_entry[i].is_call | |
2548 | || !(insn_entry[i].is_swappable || insn_entry[i].is_swap)) | |
2549 | root->web_not_optimizable = 1; | |
2550 | ||
2551 | /* If we have loads or stores that aren't permuting then the | |
2552 | optimization isn't appropriate. */ | |
2553 | else if ((insn_entry[i].is_load || insn_entry[i].is_store) | |
2554 | && !insn_entry[i].is_swap && !insn_entry[i].is_swappable) | |
2555 | root->web_not_optimizable = 1; | |
2556 | ||
6012c652 BS |
2557 | /* If we have a swap that is both fed by a permuting load |
2558 | and a feeder of a permuting store, then the optimization | |
2559 | isn't appropriate. (Consider vec_xl followed by vec_xst_be.) */ | |
2560 | else if (insn_entry[i].is_swap && !insn_entry[i].is_load | |
2561 | && !insn_entry[i].is_store | |
2562 | && swap_feeds_both_load_and_store (&insn_entry[i])) | |
2563 | root->web_not_optimizable = 1; | |
2564 | ||
0dc6645f AS |
2565 | /* If we have permuting loads or stores that are not accompanied |
2566 | by a register swap, the optimization isn't appropriate. */ | |
2567 | else if (insn_entry[i].is_load && insn_entry[i].is_swap) | |
2568 | { | |
2569 | rtx insn = insn_entry[i].insn; | |
2570 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
2571 | df_ref def; | |
2572 | ||
2573 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
2574 | { | |
2575 | struct df_link *link = DF_REF_CHAIN (def); | |
2576 | ||
2577 | if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS)) | |
2578 | { | |
2579 | root->web_not_optimizable = 1; | |
2580 | break; | |
2581 | } | |
2582 | } | |
2583 | } | |
2584 | else if (insn_entry[i].is_store && insn_entry[i].is_swap) | |
2585 | { | |
2586 | rtx insn = insn_entry[i].insn; | |
2587 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
2588 | df_ref use; | |
2589 | ||
2590 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
2591 | { | |
2592 | struct df_link *link = DF_REF_CHAIN (use); | |
2593 | ||
2594 | if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES)) | |
2595 | { | |
2596 | root->web_not_optimizable = 1; | |
2597 | break; | |
2598 | } | |
2599 | } | |
2600 | } | |
2601 | } | |
2602 | ||
2603 | if (dump_file) | |
2604 | { | |
2605 | fprintf (dump_file, "\nSwap insn entry table after web analysis\n"); | |
2606 | dump_swap_insn_table (insn_entry); | |
2607 | } | |
2608 | ||
2609 | /* For each load and store in an optimizable web (which implies | |
2610 | the loads and stores are permuting), find the associated | |
2611 | register swaps and mark them for removal. Due to various | |
2612 | optimizations we may mark the same swap more than once. Also | |
2613 | perform special handling for swappable insns that require it. */ | |
2614 | for (i = 0; i < e; ++i) | |
2615 | if ((insn_entry[i].is_load || insn_entry[i].is_store) | |
2616 | && insn_entry[i].is_swap) | |
2617 | { | |
2618 | swap_web_entry* root_entry | |
2619 | = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); | |
2620 | if (!root_entry->web_not_optimizable) | |
2621 | mark_swaps_for_removal (insn_entry, i); | |
2622 | } | |
2623 | else if (insn_entry[i].is_swappable && insn_entry[i].special_handling) | |
2624 | { | |
2625 | swap_web_entry* root_entry | |
2626 | = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); | |
2627 | if (!root_entry->web_not_optimizable) | |
2628 | handle_special_swappables (insn_entry, i); | |
2629 | } | |
2630 | ||
2631 | /* Now delete the swaps marked for removal. */ | |
2632 | for (i = 0; i < e; ++i) | |
2633 | if (insn_entry[i].will_delete) | |
2634 | replace_swap_with_copy (insn_entry, i); | |
2635 | ||
2636 | /* Clean up. */ | |
2637 | free (insn_entry); | |
6e0cc90b | 2638 | |
a3a821c9 KN |
2639 | /* Use a second pass over rtl to detect that certain vector values |
2640 | fetched from or stored to memory on quad-word aligned addresses | |
2641 | can use lvx/stvx without swaps. */ | |
2642 | ||
2643 | /* First, rebuild ud chains. */ | |
2644 | df_remove_problem (df_chain); | |
2645 | df_process_deferred_rescans (); | |
2646 | df_set_flags (DF_RD_PRUNE_DEAD_DEFS); | |
2647 | df_chain_add_problem (DF_UD_CHAIN); | |
2648 | df_analyze (); | |
2649 | ||
6e0cc90b KN |
2650 | swap_web_entry *pass2_insn_entry; |
2651 | pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); | |
2652 | ||
2653 | /* Walk the insns to gather basic data. */ | |
2654 | FOR_ALL_BB_FN (bb, fun) | |
2655 | FOR_BB_INSNS_SAFE (bb, insn, curr_insn) | |
2656 | { | |
2657 | unsigned int uid = INSN_UID (insn); | |
2658 | if (NONDEBUG_INSN_P (insn)) | |
2659 | { | |
2660 | pass2_insn_entry[uid].insn = insn; | |
2661 | ||
2662 | pass2_insn_entry[uid].is_relevant = 1; | |
2663 | pass2_insn_entry[uid].is_load = insn_is_load_p (insn); | |
2664 | pass2_insn_entry[uid].is_store = insn_is_store_p (insn); | |
2665 | ||
2666 | /* Determine if this is a doubleword swap. If not, | |
2667 | determine whether it can legally be swapped. */ | |
2668 | if (insn_is_swap_p (insn)) | |
2669 | pass2_insn_entry[uid].is_swap = 1; | |
2670 | } | |
2671 | } | |
2672 | ||
2673 | e = get_max_uid (); | |
2674 | for (unsigned i = 0; i < e; ++i) | |
2675 | if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load | |
2676 | && !pass2_insn_entry[i].is_store) | |
2677 | { | |
a3a821c9 KN |
2678 | /* Replace swap of aligned load-swap with aligned unswapped |
2679 | load. */ | |
2680 | rtx_insn *rtx_insn = pass2_insn_entry[i].insn; | |
2681 | if (quad_aligned_load_p (pass2_insn_entry, rtx_insn)) | |
2682 | replace_swapped_aligned_load (pass2_insn_entry, rtx_insn); | |
2683 | } | |
2684 | else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store) | |
2685 | { | |
2686 | /* Replace aligned store-swap of swapped value with aligned | |
2687 | unswapped store. */ | |
2688 | rtx_insn *rtx_insn = pass2_insn_entry[i].insn; | |
2689 | if (quad_aligned_store_p (pass2_insn_entry, rtx_insn)) | |
2690 | replace_swapped_aligned_store (pass2_insn_entry, rtx_insn); | |
6e0cc90b KN |
2691 | } |
2692 | ||
2693 | /* Clean up. */ | |
2694 | free (pass2_insn_entry); | |
a3a821c9 KN |
2695 | |
2696 | /* Use a third pass over rtl to replace swap(load(vector constant)) | |
2697 | with load(swapped vector constant). */ | |
2698 | ||
2699 | /* First, rebuild ud chains. */ | |
2700 | df_remove_problem (df_chain); | |
2701 | df_process_deferred_rescans (); | |
2702 | df_set_flags (DF_RD_PRUNE_DEAD_DEFS); | |
2703 | df_chain_add_problem (DF_UD_CHAIN); | |
2704 | df_analyze (); | |
2705 | ||
2706 | swap_web_entry *pass3_insn_entry; | |
2707 | pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); | |
2708 | ||
2709 | /* Walk the insns to gather basic data. */ | |
2710 | FOR_ALL_BB_FN (bb, fun) | |
2711 | FOR_BB_INSNS_SAFE (bb, insn, curr_insn) | |
2712 | { | |
2713 | unsigned int uid = INSN_UID (insn); | |
2714 | if (NONDEBUG_INSN_P (insn)) | |
2715 | { | |
2716 | pass3_insn_entry[uid].insn = insn; | |
2717 | ||
2718 | pass3_insn_entry[uid].is_relevant = 1; | |
2719 | pass3_insn_entry[uid].is_load = insn_is_load_p (insn); | |
2720 | pass3_insn_entry[uid].is_store = insn_is_store_p (insn); | |
2721 | ||
2722 | /* Determine if this is a doubleword swap. If not, | |
2723 | determine whether it can legally be swapped. */ | |
2724 | if (insn_is_swap_p (insn)) | |
2725 | pass3_insn_entry[uid].is_swap = 1; | |
2726 | } | |
2727 | } | |
2728 | ||
2729 | e = get_max_uid (); | |
2730 | for (unsigned i = 0; i < e; ++i) | |
2731 | if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load | |
2732 | && !pass3_insn_entry[i].is_store) | |
2733 | { | |
2734 | insn = pass3_insn_entry[i].insn; | |
2735 | if (const_load_sequence_p (pass3_insn_entry, insn)) | |
2736 | replace_swapped_load_constant (pass3_insn_entry, insn); | |
2737 | } | |
2738 | ||
2739 | /* Clean up. */ | |
2740 | free (pass3_insn_entry); | |
0dc6645f AS |
2741 | return 0; |
2742 | } | |
2743 | ||
2744 | const pass_data pass_data_analyze_swaps = | |
2745 | { | |
2746 | RTL_PASS, /* type */ | |
2747 | "swaps", /* name */ | |
2748 | OPTGROUP_NONE, /* optinfo_flags */ | |
2749 | TV_NONE, /* tv_id */ | |
2750 | 0, /* properties_required */ | |
2751 | 0, /* properties_provided */ | |
2752 | 0, /* properties_destroyed */ | |
2753 | 0, /* todo_flags_start */ | |
2754 | TODO_df_finish, /* todo_flags_finish */ | |
2755 | }; | |
2756 | ||
2757 | class pass_analyze_swaps : public rtl_opt_pass | |
2758 | { | |
2759 | public: | |
2760 | pass_analyze_swaps(gcc::context *ctxt) | |
2761 | : rtl_opt_pass(pass_data_analyze_swaps, ctxt) | |
2762 | {} | |
2763 | ||
2764 | /* opt_pass methods: */ | |
2765 | virtual bool gate (function *) | |
2766 | { | |
2767 | return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX | |
2768 | && !TARGET_P9_VECTOR && rs6000_optimize_swaps); | |
2769 | } | |
2770 | ||
2771 | virtual unsigned int execute (function *fun) | |
2772 | { | |
2773 | return rs6000_analyze_swaps (fun); | |
2774 | } | |
2775 | ||
2776 | opt_pass *clone () | |
2777 | { | |
2778 | return new pass_analyze_swaps (m_ctxt); | |
2779 | } | |
2780 | ||
2781 | }; // class pass_analyze_swaps | |
2782 | ||
2783 | rtl_opt_pass * | |
2784 | make_pass_analyze_swaps (gcc::context *ctxt) | |
2785 | { | |
2786 | return new pass_analyze_swaps (ctxt); | |
2787 | } | |
2788 |