]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000-p8swap.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000-p8swap.cc
1 /* Subroutines used to remove unnecessary doubleword swaps
2 for p8 little-endian VSX code.
3 Copyright (C) 1991-2024 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "ira.h"
33 #include "print-tree.h"
34 #include "varasm.h"
35 #include "explow.h"
36 #include "expr.h"
37 #include "output.h"
38 #include "tree-pass.h"
39 #include "rtx-vector-builder.h"
40
41 /* Analyze vector computations and remove unnecessary doubleword
42 swaps (xxswapdi instructions). This pass is performed only
43 for little-endian VSX code generation.
44
45 For this specific case, loads and stores of 4x32 and 2x64 vectors
46 are inefficient. These are implemented using the lvx2dx and
47 stvx2dx instructions, which invert the order of doublewords in
48 a vector register. Thus the code generation inserts an xxswapdi
49 after each such load, and prior to each such store. (For spill
50 code after register assignment, an additional xxswapdi is inserted
51 following each store in order to return a hard register to its
52 unpermuted value.)
53
54 The extra xxswapdi instructions reduce performance. This can be
55 particularly bad for vectorized code. The purpose of this pass
56 is to reduce the number of xxswapdi instructions required for
57 correctness.
58
59 The primary insight is that much code that operates on vectors
60 does not care about the relative order of elements in a register,
61 so long as the correct memory order is preserved. If we have
62 a computation where all input values are provided by lvxd2x/xxswapdi
63 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
64 and all intermediate computations are pure SIMD (independent of
65 element order), then all the xxswapdi's associated with the loads
66 and stores may be removed.
67
68 This pass uses some of the infrastructure and logical ideas from
69 the "web" pass in web.cc. We create maximal webs of computations
70 fitting the description above using union-find. Each such web is
71 then optimized by removing its unnecessary xxswapdi instructions.
72
73 The pass is placed prior to global optimization so that we can
74 perform the optimization in the safest and simplest way possible;
75 that is, by replacing each xxswapdi insn with a register copy insn.
76 Subsequent forward propagation will remove copies where possible.
77
78 There are some operations sensitive to element order for which we
79 can still allow the operation, provided we modify those operations.
80 These include CONST_VECTORs, for which we must swap the first and
81 second halves of the constant vector; and SUBREGs, for which we
82 must adjust the byte offset to account for the swapped doublewords.
83 A remaining opportunity would be non-immediate-form splats, for
84 which we should adjust the selected lane of the input. We should
85 also make code generation adjustments for sum-across operations,
86 since this is a common vectorizer reduction.
87
88 Because we run prior to the first split, we can see loads and stores
89 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
90 vector loads and stores that have not yet been split into a permuting
91 load/store and a swap. (One way this can happen is with a builtin
92 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
93 than deleting a swap, we convert the load/store into a permuting
94 load/store (which effectively removes the swap). */
95
96 /* Notes on Permutes
97
98 We do not currently handle computations that contain permutes. There
99 is a general transformation that can be performed correctly, but it
100 may introduce more expensive code than it replaces. To handle these
101 would require a cost model to determine when to perform the optimization.
102 This commentary records how this could be done if desired.
103
104 The most general permute is something like this (example for V16QI):
105
106 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
107 (parallel [(const_int a0) (const_int a1)
108 ...
109 (const_int a14) (const_int a15)]))
110
111 where a0,...,a15 are in [0,31] and select elements from op1 and op2
112 to produce in the result.
113
114 Regardless of mode, we can convert the PARALLEL to a mask of 16
115 byte-element selectors. Let's call this M, with M[i] representing
116 the ith byte-element selector value. Then if we swap doublewords
117 throughout the computation, we can get correct behavior by replacing
118 M with M' as follows:
119
120 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
121 { ((M[i]+8)%16)+16 : M[i] in [16,31]
122
123 This seems promising at first, since we are just replacing one mask
124 with another. But certain masks are preferable to others. If M
125 is a mask that matches a vmrghh pattern, for example, M' certainly
126 will not. Instead of a single vmrghh, we would generate a load of
127 M' and a vperm. So we would need to know how many xxswapd's we can
128 remove as a result of this transformation to determine if it's
129 profitable; and preferably the logic would need to be aware of all
130 the special preferable masks.
131
132 Another form of permute is an UNSPEC_VPERM, in which the mask is
133 already in a register. In some cases, this mask may be a constant
134 that we can discover with ud-chains, in which case the above
135 transformation is ok. However, the common usage here is for the
136 mask to be produced by an UNSPEC_LVSL, in which case the mask
137 cannot be known at compile time. In such a case we would have to
138 generate several instructions to compute M' as above at run time,
139 and a cost model is needed again.
140
141 However, when the mask M for an UNSPEC_VPERM is loaded from the
142 constant pool, we can replace M with M' as above at no cost
143 beyond adding a constant pool entry. */
144
145 /* This is based on the union-find logic in web.cc. web_entry_base is
146 defined in df.h. */
147 class swap_web_entry : public web_entry_base
148 {
149 public:
150 /* Pointer to the insn. */
151 rtx_insn *insn;
152 /* Set if insn contains a mention of a vector register. All other
153 fields are undefined if this field is unset. */
154 unsigned int is_relevant : 1;
155 /* Set if insn is a load. */
156 unsigned int is_load : 1;
157 /* Set if insn is a store. */
158 unsigned int is_store : 1;
159 /* Set if insn is a doubleword swap. This can either be a register swap
160 or a permuting load or store (test is_load and is_store for this). */
161 unsigned int is_swap : 1;
162 /* Set if the insn has a live-in use of a parameter register. */
163 unsigned int is_live_in : 1;
164 /* Set if the insn has a live-out def of a return register. */
165 unsigned int is_live_out : 1;
166 /* Set if the insn contains a subreg reference of a vector register. */
167 unsigned int contains_subreg : 1;
168 /* Set if the insn contains a 128-bit integer operand. */
169 unsigned int is_128_int : 1;
170 /* Set if this is a call-insn. */
171 unsigned int is_call : 1;
172 /* Set if this insn does not perform a vector operation for which
173 element order matters, or if we know how to fix it up if it does.
174 Undefined if is_swap is set. */
175 unsigned int is_swappable : 1;
176 /* A nonzero value indicates what kind of special handling for this
177 insn is required if doublewords are swapped. Undefined if
178 is_swappable is not set. */
179 unsigned int special_handling : 4;
180 /* Set if the web represented by this entry cannot be optimized. */
181 unsigned int web_not_optimizable : 1;
182 /* Set if this insn should be deleted. */
183 unsigned int will_delete : 1;
184 };
185
186 enum special_handling_values {
187 SH_NONE = 0,
188 SH_CONST_VECTOR,
189 SH_SUBREG,
190 SH_NOSWAP_LD,
191 SH_NOSWAP_ST,
192 SH_EXTRACT,
193 SH_SPLAT,
194 SH_XXPERMDI,
195 SH_CONCAT,
196 SH_VPERM
197 };
198
199 /* Union INSN with all insns containing definitions that reach USE.
200 Detect whether USE is live-in to the current function. */
201 static void
202 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
203 {
204 struct df_link *link = DF_REF_CHAIN (use);
205
206 if (!link)
207 insn_entry[INSN_UID (insn)].is_live_in = 1;
208
209 while (link)
210 {
211 if (DF_REF_IS_ARTIFICIAL (link->ref))
212 insn_entry[INSN_UID (insn)].is_live_in = 1;
213
214 if (DF_REF_INSN_INFO (link->ref))
215 {
216 rtx def_insn = DF_REF_INSN (link->ref);
217 gcc_assert (NONDEBUG_INSN_P (def_insn));
218 unionfind_union (insn_entry + INSN_UID (insn),
219 insn_entry + INSN_UID (def_insn));
220 }
221
222 link = link->next;
223 }
224 }
225
226 /* Union INSN with all insns containing uses reached from DEF.
227 Detect whether DEF is live-out from the current function. */
228 static void
229 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
230 {
231 struct df_link *link = DF_REF_CHAIN (def);
232
233 if (!link)
234 insn_entry[INSN_UID (insn)].is_live_out = 1;
235
236 while (link)
237 {
238 /* This could be an eh use or some other artificial use;
239 we treat these all the same (killing the optimization). */
240 if (DF_REF_IS_ARTIFICIAL (link->ref))
241 insn_entry[INSN_UID (insn)].is_live_out = 1;
242
243 if (DF_REF_INSN_INFO (link->ref))
244 {
245 rtx use_insn = DF_REF_INSN (link->ref);
246 if (NONDEBUG_INSN_P (use_insn))
247 unionfind_union (insn_entry + INSN_UID (insn),
248 insn_entry + INSN_UID (use_insn));
249 }
250
251 link = link->next;
252 }
253 }
254
255 /* Return 1 iff PAT (a SINGLE_SET) is a rotate 64 bit expression; else return
256 0. */
257
258 static bool
259 pattern_is_rotate64 (rtx pat)
260 {
261 rtx rot = SET_SRC (pat);
262
263 if (GET_CODE (rot) == ROTATE && CONST_INT_P (XEXP (rot, 1))
264 && INTVAL (XEXP (rot, 1)) == 64)
265 return true;
266
267 return false;
268 }
269
270 /* Return 1 iff INSN is a load insn, including permuting loads that
271 represent an lvxd2x instruction; else return 0. */
272 static unsigned int
273 insn_is_load_p (rtx insn)
274 {
275 rtx body = PATTERN (insn);
276
277 if (GET_CODE (body) == SET)
278 {
279 if (MEM_P (SET_SRC (body)))
280 return 1;
281
282 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
283 && MEM_P (XEXP (SET_SRC (body), 0)))
284 return 1;
285
286 if (pattern_is_rotate64 (body) && MEM_P (XEXP (SET_SRC (body), 0)))
287 return 1;
288
289 return 0;
290 }
291
292 if (GET_CODE (body) != PARALLEL)
293 return 0;
294
295 rtx set = XVECEXP (body, 0, 0);
296
297 if (GET_CODE (set) == SET && MEM_P (SET_SRC (set)))
298 return 1;
299
300 return 0;
301 }
302
303 /* Return 1 iff INSN is a store insn, including permuting stores that
304 represent an stvxd2x instruction; else return 0. */
305 static unsigned int
306 insn_is_store_p (rtx insn)
307 {
308 rtx body = PATTERN (insn);
309 if (GET_CODE (body) == SET && MEM_P (SET_DEST (body)))
310 return 1;
311 if (GET_CODE (body) != PARALLEL)
312 return 0;
313 rtx set = XVECEXP (body, 0, 0);
314 if (GET_CODE (set) == SET && MEM_P (SET_DEST (set)))
315 return 1;
316 return 0;
317 }
318
319 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
320 a permuting load, or a permuting store. */
321 static unsigned int
322 insn_is_swap_p (rtx insn)
323 {
324 rtx body = PATTERN (insn);
325 if (GET_CODE (body) != SET)
326 return 0;
327 rtx rhs = SET_SRC (body);
328 if (pattern_is_rotate64 (body))
329 return 1;
330 if (GET_CODE (rhs) != VEC_SELECT)
331 return 0;
332 rtx parallel = XEXP (rhs, 1);
333 if (GET_CODE (parallel) != PARALLEL)
334 return 0;
335 unsigned int len = XVECLEN (parallel, 0);
336 if (len != 2 && len != 4 && len != 8 && len != 16)
337 return 0;
338 for (unsigned int i = 0; i < len / 2; ++i)
339 {
340 rtx op = XVECEXP (parallel, 0, i);
341 if (!CONST_INT_P (op) || INTVAL (op) != len / 2 + i)
342 return 0;
343 }
344 for (unsigned int i = len / 2; i < len; ++i)
345 {
346 rtx op = XVECEXP (parallel, 0, i);
347 if (!CONST_INT_P (op) || INTVAL (op) != i - len / 2)
348 return 0;
349 }
350 return 1;
351 }
352
353 /* Return true iff EXPR represents the sum of two registers. */
354 bool
355 rs6000_sum_of_two_registers_p (const_rtx expr)
356 {
357 if (GET_CODE (expr) == PLUS)
358 {
359 const_rtx operand1 = XEXP (expr, 0);
360 const_rtx operand2 = XEXP (expr, 1);
361 return (REG_P (operand1) && REG_P (operand2));
362 }
363 return false;
364 }
365
366 /* Return true iff EXPR represents an address expression that masks off
367 the low-order 4 bits in the style of an lvx or stvx rtl pattern. */
368 bool
369 rs6000_quadword_masked_address_p (const_rtx expr)
370 {
371 if (GET_CODE (expr) == AND)
372 {
373 const_rtx operand1 = XEXP (expr, 0);
374 const_rtx operand2 = XEXP (expr, 1);
375 if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1))
376 && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16)
377 return true;
378 }
379 return false;
380 }
381
382 /* Return TRUE if INSN represents a swap of a swapped load from memory
383 and the memory address is quad-word aligned. */
384 static bool
385 quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn)
386 {
387 unsigned uid = INSN_UID (insn);
388 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
389 return false;
390
391 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
392
393 /* Since insn is known to represent a swap instruction, we know it
394 "uses" only one input variable. */
395 df_ref use = DF_INSN_INFO_USES (insn_info);
396
397 /* Figure out where this input variable is defined. */
398 struct df_link *def_link = DF_REF_CHAIN (use);
399
400 /* If there is no definition or the definition is artificial or there are
401 multiple definitions, punt. */
402 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
403 || def_link->next)
404 return false;
405
406 rtx def_insn = DF_REF_INSN (def_link->ref);
407 unsigned uid2 = INSN_UID (def_insn);
408 /* We're looking for a load-with-swap insn. If this is not that,
409 return false. */
410 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
411 return false;
412
413 /* If the source of the rtl def is not a set from memory, return
414 false. */
415 rtx body = PATTERN (def_insn);
416 if (GET_CODE (body) != SET
417 || !(GET_CODE (SET_SRC (body)) == VEC_SELECT
418 || pattern_is_rotate64 (body))
419 || !MEM_P (XEXP (SET_SRC (body), 0)))
420 return false;
421
422 rtx mem = XEXP (SET_SRC (body), 0);
423 rtx base_reg = XEXP (mem, 0);
424 return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg))
425 && MEM_ALIGN (mem) >= 128) ? true : false;
426 }
427
428 /* Return TRUE if INSN represents a store-with-swap of a swapped value
429 and the memory address is quad-word aligned. */
430 static bool
431 quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn)
432 {
433 unsigned uid = INSN_UID (insn);
434 if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store)
435 return false;
436
437 rtx body = PATTERN (insn);
438 rtx dest_address = XEXP (SET_DEST (body), 0);
439 rtx swap_reg = XEXP (SET_SRC (body), 0);
440
441 /* If the base address for the memory expression is not represented
442 by a single register and is not the sum of two registers, punt. */
443 if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address))
444 return false;
445
446 /* Confirm that the value to be stored is produced by a swap
447 instruction. */
448 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
449 df_ref use;
450 FOR_EACH_INSN_INFO_USE (use, insn_info)
451 {
452 struct df_link *def_link = DF_REF_CHAIN (use);
453
454 /* If this is not the definition of the candidate swap register,
455 then skip it. I am interested in a different definition. */
456 if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
457 continue;
458
459 /* If there is no def or the def is artifical or there are
460 multiple defs, punt. */
461 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
462 || def_link->next)
463 return false;
464
465 rtx def_insn = DF_REF_INSN (def_link->ref);
466 unsigned uid2 = INSN_UID (def_insn);
467
468 /* If this source value is not a simple swap, return false */
469 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load
470 || insn_entry[uid2].is_store)
471 return false;
472
473 /* I've processed the use that I care about, so break out of
474 this loop. */
475 break;
476 }
477
478 /* At this point, we know the source data comes from a swap. The
479 remaining question is whether the memory address is aligned. */
480 rtx set = single_set (insn);
481 if (set)
482 {
483 rtx dest = SET_DEST (set);
484 if (MEM_P (dest))
485 return (MEM_ALIGN (dest) >= 128);
486 }
487 return false;
488 }
489
490 /* Return 1 iff UID, known to reference a swap, is both fed by a load
491 and a feeder of a store. */
492 static unsigned int
493 swap_feeds_both_load_and_store (swap_web_entry *insn_entry)
494 {
495 rtx insn = insn_entry->insn;
496 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
497 df_ref def, use;
498 struct df_link *link = 0;
499 rtx_insn *load = 0, *store = 0;
500 bool fed_by_load = 0;
501 bool feeds_store = 0;
502
503 FOR_EACH_INSN_INFO_USE (use, insn_info)
504 {
505 link = DF_REF_CHAIN (use);
506 load = DF_REF_INSN (link->ref);
507 if (insn_is_load_p (load) && insn_is_swap_p (load))
508 fed_by_load = 1;
509 }
510
511 FOR_EACH_INSN_INFO_DEF (def, insn_info)
512 {
513 link = DF_REF_CHAIN (def);
514 store = DF_REF_INSN (link->ref);
515 if (insn_is_store_p (store) && insn_is_swap_p (store))
516 feeds_store = 1;
517 }
518
519 return fed_by_load && feeds_store;
520 }
521
522 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
523 static bool
524 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
525 {
526 unsigned uid = INSN_UID (insn);
527 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
528 return false;
529
530 const_rtx tocrel_base;
531
532 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
533 df_ref use;
534
535 /* Iterate over the definitions that are used by this insn. Since
536 this is known to be a swap insn, expect only one used definnition. */
537 FOR_EACH_INSN_INFO_USE (use, insn_info)
538 {
539 struct df_link *def_link = DF_REF_CHAIN (use);
540
541 /* If there is no def or the def is artificial or there are
542 multiple defs, punt. */
543 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
544 || def_link->next)
545 return false;
546
547 rtx def_insn = DF_REF_INSN (def_link->ref);
548 unsigned uid2 = INSN_UID (def_insn);
549 /* If this is not a load or is not a swap, return false. */
550 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
551 return false;
552
553 /* If the source of the rtl def is not a set from memory, return
554 false. */
555 rtx body = PATTERN (def_insn);
556 if (GET_CODE (body) != SET
557 || !(GET_CODE (SET_SRC (body)) == VEC_SELECT
558 || pattern_is_rotate64 (body))
559 || !MEM_P (XEXP (SET_SRC (body), 0)))
560 return false;
561
562 rtx mem = XEXP (SET_SRC (body), 0);
563 rtx base_reg = XEXP (mem, 0);
564 /* If the base address for the memory expression is not
565 represented by a register, punt. */
566 if (!REG_P (base_reg))
567 return false;
568
569 df_ref base_use;
570 insn_info = DF_INSN_INFO_GET (def_insn);
571 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
572 {
573 /* If base_use does not represent base_reg, look for another
574 use. */
575 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
576 continue;
577
578 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
579 if (!base_def_link || base_def_link->next)
580 return false;
581
582 /* Constants held on the stack are not "true" constants
583 because their values are not part of the static load
584 image. If this constant's base reference is a stack
585 or frame pointer, it is seen as an artificial
586 reference. */
587 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
588 return false;
589
590 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
591 rtx tocrel_body = PATTERN (tocrel_insn);
592 rtx base, offset;
593 if (GET_CODE (tocrel_body) != SET)
594 return false;
595 /* There is an extra level of indirection for small/large
596 code models. */
597 rtx tocrel_expr = SET_SRC (tocrel_body);
598 if (MEM_P (tocrel_expr))
599 tocrel_expr = XEXP (tocrel_expr, 0);
600 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
601 return false;
602 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
603
604 if (!SYMBOL_REF_P (base) || !CONSTANT_POOL_ADDRESS_P (base))
605 return false;
606 else
607 {
608 /* FIXME: The conditions under which
609 (SYMBOL_REF_P (const_vector)
610 && !CONSTANT_POOL_ADDRESS_P (const_vector))
611 are not well understood. This code prevents
612 an internal compiler error which will occur in
613 replace_swapped_load_constant () if we were to return
614 true. Some day, we should figure out how to properly
615 handle this condition in
616 replace_swapped_load_constant () and then we can
617 remove this special test. */
618 rtx const_vector = get_pool_constant (base);
619 if (SYMBOL_REF_P (const_vector)
620 && CONSTANT_POOL_ADDRESS_P (const_vector))
621 const_vector = get_pool_constant (const_vector);
622 if (GET_CODE (const_vector) != CONST_VECTOR)
623 return false;
624 }
625 }
626 }
627 return true;
628 }
629
630 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
631 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
632 static bool
633 v2df_reduction_p (rtx op)
634 {
635 if (GET_MODE (op) != V2DFmode)
636 return false;
637
638 enum rtx_code code = GET_CODE (op);
639 if (code != PLUS && code != SMIN && code != SMAX)
640 return false;
641
642 rtx concat = XEXP (op, 0);
643 if (GET_CODE (concat) != VEC_CONCAT)
644 return false;
645
646 rtx select0 = XEXP (concat, 0);
647 rtx select1 = XEXP (concat, 1);
648 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
649 return false;
650
651 rtx reg0 = XEXP (select0, 0);
652 rtx reg1 = XEXP (select1, 0);
653 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
654 return false;
655
656 rtx parallel0 = XEXP (select0, 1);
657 rtx parallel1 = XEXP (select1, 1);
658 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
659 return false;
660
661 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
662 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
663 return false;
664
665 return true;
666 }
667
668 /* Return 1 iff OP is an operand that will not be affected by having
669 vector doublewords swapped in memory. */
670 static unsigned int
671 rtx_is_swappable_p (rtx op, unsigned int *special)
672 {
673 enum rtx_code code = GET_CODE (op);
674 int i, j;
675 rtx parallel;
676
677 switch (code)
678 {
679 case LABEL_REF:
680 case SYMBOL_REF:
681 case CLOBBER:
682 case REG:
683 return 1;
684
685 case VEC_CONCAT:
686 case ASM_INPUT:
687 case ASM_OPERANDS:
688 return 0;
689
690 case CONST_VECTOR:
691 {
692 *special = SH_CONST_VECTOR;
693 return 1;
694 }
695
696 case VEC_DUPLICATE:
697 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
698 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
699 it represents a vector splat for which we can do special
700 handling. */
701 if (CONST_INT_P (XEXP (op, 0)))
702 return 1;
703 else if (REG_P (XEXP (op, 0))
704 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
705 /* This catches V2DF and V2DI splat, at a minimum. */
706 return 1;
707 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
708 && REG_P (XEXP (XEXP (op, 0), 0))
709 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
710 /* This catches splat of a truncated value. */
711 return 1;
712 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
713 /* If the duplicated item is from a select, defer to the select
714 processing to see if we can change the lane for the splat. */
715 return rtx_is_swappable_p (XEXP (op, 0), special);
716 else
717 return 0;
718
719 case VEC_SELECT:
720 /* A vec_extract operation is ok if we change the lane. */
721 if (REG_P (XEXP (op, 0))
722 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
723 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
724 && XVECLEN (parallel, 0) == 1
725 && CONST_INT_P (XVECEXP (parallel, 0, 0)))
726 {
727 *special = SH_EXTRACT;
728 return 1;
729 }
730 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
731 XXPERMDI is a swap operation, it will be identified by
732 insn_is_swap_p and therefore we won't get here. */
733 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
734 && (GET_MODE (XEXP (op, 0)) == V4DFmode
735 || GET_MODE (XEXP (op, 0)) == V4DImode)
736 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
737 && XVECLEN (parallel, 0) == 2
738 && CONST_INT_P (XVECEXP (parallel, 0, 0))
739 && CONST_INT_P (XVECEXP (parallel, 0, 1)))
740 {
741 *special = SH_XXPERMDI;
742 return 1;
743 }
744 else if (v2df_reduction_p (op))
745 return 1;
746 else
747 return 0;
748
749 case UNSPEC:
750 {
751 /* Various operations are unsafe for this optimization, at least
752 without significant additional work. Permutes are obviously
753 problematic, as both the permute control vector and the ordering
754 of the target values are invalidated by doubleword swapping.
755 Vector pack and unpack modify the number of vector lanes.
756 Merge-high/low will not operate correctly on swapped operands.
757 Vector shifts across element boundaries are clearly uncool,
758 as are vector select and concatenate operations. Vector
759 sum-across instructions define one operand with a specific
760 order-dependent element, so additional fixup code would be
761 needed to make those work. Vector set and non-immediate-form
762 vector splat are element-order sensitive. A few of these
763 cases might be workable with special handling if required.
764 Adding cost modeling would be appropriate in some cases. */
765 int val = XINT (op, 1);
766 switch (val)
767 {
768 default:
769 break;
770 case UNSPEC_VBPERMQ:
771 case UNSPEC_VPACK_SIGN_SIGN_SAT:
772 case UNSPEC_VPACK_SIGN_UNS_SAT:
773 case UNSPEC_VPACK_UNS_UNS_MOD:
774 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
775 case UNSPEC_VPACK_UNS_UNS_SAT:
776 case UNSPEC_VPERM:
777 case UNSPEC_VPERM_UNS:
778 case UNSPEC_VPERMHI:
779 case UNSPEC_VPERMSI:
780 case UNSPEC_VPERMXOR:
781 case UNSPEC_VPKPX:
782 case UNSPEC_VSLDOI:
783 case UNSPEC_VSLO:
784 case UNSPEC_VSRO:
785 case UNSPEC_VSUM2SWS:
786 case UNSPEC_VSUM4S:
787 case UNSPEC_VSUM4UBS:
788 case UNSPEC_VSUMSWS:
789 case UNSPEC_VSUMSWS_DIRECT:
790 case UNSPEC_VSX_CONCAT:
791 case UNSPEC_VSX_CVDPSPN:
792 case UNSPEC_VSX_CVSPDP:
793 case UNSPEC_VSX_CVSPDPN:
794 case UNSPEC_VSX_EXTRACT:
795 case UNSPEC_VSX_SET:
796 case UNSPEC_VSX_SLDWI:
797 case UNSPEC_VSX_VSLO:
798 case UNSPEC_VUNPACK_HI_SIGN:
799 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
800 case UNSPEC_VUNPACK_LO_SIGN:
801 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
802 case UNSPEC_VUPKHPX:
803 case UNSPEC_VUPKHS_V4SF:
804 case UNSPEC_VUPKHU_V4SF:
805 case UNSPEC_VUPKLPX:
806 case UNSPEC_VUPKLS_V4SF:
807 case UNSPEC_VUPKLU_V4SF:
808 return 0;
809 case UNSPEC_VSPLT_DIRECT:
810 case UNSPEC_VSX_XXSPLTD:
811 *special = SH_SPLAT;
812 return 1;
813 case UNSPEC_REDUC_PLUS:
814 case UNSPEC_REDUC:
815 return 1;
816 case UNSPEC_VPMSUM:
817 /* vpmsumd is not swappable, but vpmsum[bhw] are. */
818 if (GET_MODE (op) == V2DImode)
819 return 0;
820 break;
821 }
822 }
823
824 default:
825 break;
826 }
827
828 const char *fmt = GET_RTX_FORMAT (code);
829 int ok = 1;
830
831 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
832 if (fmt[i] == 'e' || fmt[i] == 'u')
833 {
834 unsigned int special_op = SH_NONE;
835 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
836 if (special_op == SH_NONE)
837 continue;
838 /* Ensure we never have two kinds of special handling
839 for the same insn. */
840 if (*special != SH_NONE && *special != special_op)
841 return 0;
842 *special = special_op;
843 }
844 else if (fmt[i] == 'E')
845 for (j = 0; j < XVECLEN (op, i); ++j)
846 {
847 unsigned int special_op = SH_NONE;
848 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
849 if (special_op == SH_NONE)
850 continue;
851 /* Ensure we never have two kinds of special handling
852 for the same insn. */
853 if (*special != SH_NONE && *special != special_op)
854 return 0;
855 *special = special_op;
856 }
857
858 return ok;
859 }
860
861 /* Return 1 iff INSN is an operand that will not be affected by
862 having vector doublewords swapped in memory (in which case
863 *SPECIAL is unchanged), or that can be modified to be correct
864 if vector doublewords are swapped in memory (in which case
865 *SPECIAL is changed to a value indicating how). */
866 static unsigned int
867 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
868 unsigned int *special)
869 {
870 /* Calls are always bad. */
871 if (GET_CODE (insn) == CALL_INSN)
872 return 0;
873
874 /* Loads and stores seen here are not permuting, but we can still
875 fix them up by converting them to permuting ones. Exceptions:
876 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
877 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
878 for the SET source. Also we must now make an exception for lvx
879 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
880 explicit "& -16") since this leads to unrecognizable insns. */
881 rtx body = PATTERN (insn);
882 int i = INSN_UID (insn);
883
884 if (insn_entry[i].is_load)
885 {
886 if (GET_CODE (body) == SET)
887 {
888 rtx rhs = SET_SRC (body);
889 /* Even without a swap, the RHS might be a vec_select for, say,
890 a byte-reversing load. */
891 if (!MEM_P (rhs))
892 return 0;
893 if (GET_CODE (XEXP (rhs, 0)) == AND)
894 return 0;
895
896 *special = SH_NOSWAP_LD;
897 return 1;
898 }
899 else
900 return 0;
901 }
902
903 if (insn_entry[i].is_store)
904 {
905 if (GET_CODE (body) == SET
906 && GET_CODE (SET_SRC (body)) != UNSPEC
907 && GET_CODE (SET_SRC (body)) != VEC_SELECT)
908 {
909 rtx lhs = SET_DEST (body);
910 /* Even without a swap, the RHS might be a vec_select for, say,
911 a byte-reversing store. */
912 if (!MEM_P (lhs))
913 return 0;
914 if (GET_CODE (XEXP (lhs, 0)) == AND)
915 return 0;
916
917 *special = SH_NOSWAP_ST;
918 return 1;
919 }
920 else
921 return 0;
922 }
923
924 /* A convert to single precision can be left as is provided that
925 all of its uses are in xxspltw instructions that splat BE element
926 zero. */
927 if (GET_CODE (body) == SET
928 && GET_CODE (SET_SRC (body)) == UNSPEC
929 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
930 {
931 df_ref def;
932 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
933
934 FOR_EACH_INSN_INFO_DEF (def, insn_info)
935 {
936 struct df_link *link = DF_REF_CHAIN (def);
937 if (!link)
938 return 0;
939
940 for (; link; link = link->next) {
941 rtx use_insn = DF_REF_INSN (link->ref);
942 rtx use_body = PATTERN (use_insn);
943 if (GET_CODE (use_body) != SET
944 || GET_CODE (SET_SRC (use_body)) != UNSPEC
945 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
946 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
947 return 0;
948 }
949 }
950
951 return 1;
952 }
953
954 /* A concatenation of two doublewords is ok if we reverse the
955 order of the inputs. */
956 if (GET_CODE (body) == SET
957 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
958 && (GET_MODE (SET_SRC (body)) == V2DFmode
959 || GET_MODE (SET_SRC (body)) == V2DImode))
960 {
961 *special = SH_CONCAT;
962 return 1;
963 }
964
965 /* V2DF reductions are always swappable. */
966 if (GET_CODE (body) == PARALLEL)
967 {
968 rtx expr = XVECEXP (body, 0, 0);
969 if (GET_CODE (expr) == SET
970 && v2df_reduction_p (SET_SRC (expr)))
971 return 1;
972 }
973
974 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
975 constant pool. */
976 if (GET_CODE (body) == SET
977 && GET_CODE (SET_SRC (body)) == UNSPEC
978 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
979 && XVECLEN (SET_SRC (body), 0) == 3
980 && REG_P (XVECEXP (SET_SRC (body), 0, 2)))
981 {
982 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
983 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
984 df_ref use;
985 FOR_EACH_INSN_INFO_USE (use, insn_info)
986 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
987 {
988 struct df_link *def_link = DF_REF_CHAIN (use);
989 /* Punt if multiple definitions for this reg. */
990 if (def_link && !def_link->next &&
991 const_load_sequence_p (insn_entry,
992 DF_REF_INSN (def_link->ref)))
993 {
994 *special = SH_VPERM;
995 return 1;
996 }
997 }
998 }
999
1000 /* Otherwise check the operands for vector lane violations. */
1001 return rtx_is_swappable_p (body, special);
1002 }
1003
1004 enum chain_purpose { FOR_LOADS, FOR_STORES };
1005
1006 /* Return true if the UD or DU chain headed by LINK is non-empty,
1007 and every entry on the chain references an insn that is a
1008 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
1009 register swap must have only permuting loads as reaching defs.
1010 If PURPOSE is FOR_STORES, each such register swap must have only
1011 register swaps or permuting stores as reached uses. */
1012 static bool
1013 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
1014 enum chain_purpose purpose)
1015 {
1016 if (!link)
1017 return false;
1018
1019 for (; link; link = link->next)
1020 {
1021 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
1022 continue;
1023
1024 if (DF_REF_IS_ARTIFICIAL (link->ref))
1025 return false;
1026
1027 rtx reached_insn = DF_REF_INSN (link->ref);
1028 unsigned uid = INSN_UID (reached_insn);
1029 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
1030
1031 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
1032 || insn_entry[uid].is_store)
1033 return false;
1034
1035 if (purpose == FOR_LOADS)
1036 {
1037 df_ref use;
1038 FOR_EACH_INSN_INFO_USE (use, insn_info)
1039 {
1040 struct df_link *swap_link = DF_REF_CHAIN (use);
1041
1042 while (swap_link)
1043 {
1044 if (DF_REF_IS_ARTIFICIAL (link->ref))
1045 return false;
1046
1047 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
1048 unsigned uid2 = INSN_UID (swap_def_insn);
1049
1050 /* Only permuting loads are allowed. */
1051 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
1052 return false;
1053
1054 swap_link = swap_link->next;
1055 }
1056 }
1057 }
1058 else if (purpose == FOR_STORES)
1059 {
1060 df_ref def;
1061 FOR_EACH_INSN_INFO_DEF (def, insn_info)
1062 {
1063 struct df_link *swap_link = DF_REF_CHAIN (def);
1064
1065 while (swap_link)
1066 {
1067 if (DF_REF_IS_ARTIFICIAL (link->ref))
1068 return false;
1069
1070 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
1071 unsigned uid2 = INSN_UID (swap_use_insn);
1072
1073 /* Permuting stores or register swaps are allowed. */
1074 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
1075 return false;
1076
1077 swap_link = swap_link->next;
1078 }
1079 }
1080 }
1081 }
1082
1083 return true;
1084 }
1085
1086 /* Mark the xxswapdi instructions associated with permuting loads and
1087 stores for removal. Note that we only flag them for deletion here,
1088 as there is a possibility of a swap being reached from multiple
1089 loads, etc. */
1090 static void
1091 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
1092 {
1093 rtx insn = insn_entry[i].insn;
1094 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1095
1096 if (insn_entry[i].is_load)
1097 {
1098 df_ref def;
1099 FOR_EACH_INSN_INFO_DEF (def, insn_info)
1100 {
1101 struct df_link *link = DF_REF_CHAIN (def);
1102
1103 /* We know by now that these are swaps, so we can delete
1104 them confidently. */
1105 while (link)
1106 {
1107 rtx use_insn = DF_REF_INSN (link->ref);
1108 insn_entry[INSN_UID (use_insn)].will_delete = 1;
1109 link = link->next;
1110 }
1111 }
1112 }
1113 else if (insn_entry[i].is_store)
1114 {
1115 df_ref use;
1116 FOR_EACH_INSN_INFO_USE (use, insn_info)
1117 {
1118 /* Ignore uses for addressability. */
1119 machine_mode mode = GET_MODE (DF_REF_REG (use));
1120 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
1121 continue;
1122
1123 struct df_link *link = DF_REF_CHAIN (use);
1124
1125 /* We know by now that these are swaps, so we can delete
1126 them confidently. */
1127 while (link)
1128 {
1129 rtx def_insn = DF_REF_INSN (link->ref);
1130 insn_entry[INSN_UID (def_insn)].will_delete = 1;
1131 link = link->next;
1132 }
1133 }
1134 }
1135 }
1136
1137 /* *OP_PTR is either a CONST_VECTOR or an expression containing one.
1138 Swap the first half of the vector with the second in the first
1139 case. Recurse to find it in the second. */
1140 static void
1141 swap_const_vector_halves (rtx *op_ptr)
1142 {
1143 int i;
1144 rtx op = *op_ptr;
1145 enum rtx_code code = GET_CODE (op);
1146 if (GET_CODE (op) == CONST_VECTOR)
1147 {
1148 int units = GET_MODE_NUNITS (GET_MODE (op));
1149 rtx_vector_builder builder (GET_MODE (op), units, 1);
1150 for (i = 0; i < units / 2; ++i)
1151 builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2));
1152 for (i = 0; i < units / 2; ++i)
1153 builder.quick_push (CONST_VECTOR_ELT (op, i));
1154 *op_ptr = builder.build ();
1155 }
1156 else
1157 {
1158 int j;
1159 const char *fmt = GET_RTX_FORMAT (code);
1160 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1161 if (fmt[i] == 'e' || fmt[i] == 'u')
1162 swap_const_vector_halves (&XEXP (op, i));
1163 else if (fmt[i] == 'E')
1164 for (j = 0; j < XVECLEN (op, i); ++j)
1165 swap_const_vector_halves (&XVECEXP (op, i, j));
1166 }
1167 }
1168
1169 /* Find all subregs of a vector expression that perform a narrowing,
1170 and adjust the subreg index to account for doubleword swapping. */
1171 static void
1172 adjust_subreg_index (rtx op)
1173 {
1174 enum rtx_code code = GET_CODE (op);
1175 if (code == SUBREG
1176 && (GET_MODE_SIZE (GET_MODE (op))
1177 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
1178 {
1179 unsigned int index = SUBREG_BYTE (op);
1180 if (index < 8)
1181 index += 8;
1182 else
1183 index -= 8;
1184 SUBREG_BYTE (op) = index;
1185 }
1186
1187 const char *fmt = GET_RTX_FORMAT (code);
1188 int i,j;
1189 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1190 if (fmt[i] == 'e' || fmt[i] == 'u')
1191 adjust_subreg_index (XEXP (op, i));
1192 else if (fmt[i] == 'E')
1193 for (j = 0; j < XVECLEN (op, i); ++j)
1194 adjust_subreg_index (XVECEXP (op, i, j));
1195 }
1196
1197 /* Convert the non-permuting load INSN to a permuting one. */
1198 static void
1199 permute_load (rtx_insn *insn)
1200 {
1201 rtx body = PATTERN (insn);
1202 rtx mem_op = SET_SRC (body);
1203 rtx tgt_reg = SET_DEST (body);
1204 machine_mode mode = GET_MODE (tgt_reg);
1205 int n_elts = GET_MODE_NUNITS (mode);
1206 int half_elts = n_elts / 2;
1207 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1208 int i, j;
1209 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1210 XVECEXP (par, 0, i) = GEN_INT (j);
1211 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1212 XVECEXP (par, 0, i) = GEN_INT (j);
1213 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
1214 SET_SRC (body) = sel;
1215 INSN_CODE (insn) = -1; /* Force re-recognition. */
1216 df_insn_rescan (insn);
1217
1218 if (dump_file)
1219 fprintf (dump_file, "Replacing load %d with permuted load\n",
1220 INSN_UID (insn));
1221 }
1222
1223 /* Convert the non-permuting store INSN to a permuting one. */
1224 static void
1225 permute_store (rtx_insn *insn)
1226 {
1227 rtx body = PATTERN (insn);
1228 rtx src_reg = SET_SRC (body);
1229 machine_mode mode = GET_MODE (src_reg);
1230 int n_elts = GET_MODE_NUNITS (mode);
1231 int half_elts = n_elts / 2;
1232 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1233 int i, j;
1234 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1235 XVECEXP (par, 0, i) = GEN_INT (j);
1236 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1237 XVECEXP (par, 0, i) = GEN_INT (j);
1238 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
1239 SET_SRC (body) = sel;
1240 INSN_CODE (insn) = -1; /* Force re-recognition. */
1241 df_insn_rescan (insn);
1242
1243 if (dump_file)
1244 fprintf (dump_file, "Replacing store %d with permuted store\n",
1245 INSN_UID (insn));
1246 }
1247
1248 /* Given OP that contains a vector extract operation, adjust the index
1249 of the extracted lane to account for the doubleword swap. */
1250 static void
1251 adjust_extract (rtx_insn *insn)
1252 {
1253 rtx pattern = PATTERN (insn);
1254 if (GET_CODE (pattern) == PARALLEL)
1255 pattern = XVECEXP (pattern, 0, 0);
1256 rtx src = SET_SRC (pattern);
1257 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
1258 account for that. */
1259 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
1260 rtx par = XEXP (sel, 1);
1261 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
1262 int lane = INTVAL (XVECEXP (par, 0, 0));
1263 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1264 XVECEXP (par, 0, 0) = GEN_INT (lane);
1265 INSN_CODE (insn) = -1; /* Force re-recognition. */
1266 df_insn_rescan (insn);
1267
1268 if (dump_file)
1269 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
1270 }
1271
1272 /* Given OP that contains a vector direct-splat operation, adjust the index
1273 of the source lane to account for the doubleword swap. */
1274 static void
1275 adjust_splat (rtx_insn *insn)
1276 {
1277 rtx body = PATTERN (insn);
1278 rtx unspec = XEXP (body, 1);
1279 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
1280 int lane = INTVAL (XVECEXP (unspec, 0, 1));
1281 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1282 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
1283 INSN_CODE (insn) = -1; /* Force re-recognition. */
1284 df_insn_rescan (insn);
1285
1286 if (dump_file)
1287 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
1288 }
1289
1290 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
1291 swap), reverse the order of the source operands and adjust the indices
1292 of the source lanes to account for doubleword reversal. */
1293 static void
1294 adjust_xxpermdi (rtx_insn *insn)
1295 {
1296 rtx set = PATTERN (insn);
1297 rtx select = XEXP (set, 1);
1298 rtx concat = XEXP (select, 0);
1299 rtx src0 = XEXP (concat, 0);
1300 XEXP (concat, 0) = XEXP (concat, 1);
1301 XEXP (concat, 1) = src0;
1302 rtx parallel = XEXP (select, 1);
1303 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
1304 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
1305 int new_lane0 = 3 - lane1;
1306 int new_lane1 = 3 - lane0;
1307 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
1308 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
1309 INSN_CODE (insn) = -1; /* Force re-recognition. */
1310 df_insn_rescan (insn);
1311
1312 if (dump_file)
1313 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
1314 }
1315
1316 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
1317 reverse the order of those inputs. */
1318 static void
1319 adjust_concat (rtx_insn *insn)
1320 {
1321 rtx set = PATTERN (insn);
1322 rtx concat = XEXP (set, 1);
1323 rtx src0 = XEXP (concat, 0);
1324 XEXP (concat, 0) = XEXP (concat, 1);
1325 XEXP (concat, 1) = src0;
1326 INSN_CODE (insn) = -1; /* Force re-recognition. */
1327 df_insn_rescan (insn);
1328
1329 if (dump_file)
1330 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
1331 }
1332
1333 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
1334 constant pool to reflect swapped doublewords. */
1335 static void
1336 adjust_vperm (rtx_insn *insn)
1337 {
1338 /* We previously determined that the UNSPEC_VPERM was fed by a
1339 swap of a swapping load of a TOC-relative constant pool symbol.
1340 Find the MEM in the swapping load and replace it with a MEM for
1341 the adjusted mask constant. */
1342 rtx set = PATTERN (insn);
1343 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
1344
1345 /* Find the swap. */
1346 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1347 df_ref use;
1348 rtx_insn *swap_insn = 0;
1349 FOR_EACH_INSN_INFO_USE (use, insn_info)
1350 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
1351 {
1352 struct df_link *def_link = DF_REF_CHAIN (use);
1353 gcc_assert (def_link && !def_link->next);
1354 swap_insn = DF_REF_INSN (def_link->ref);
1355 break;
1356 }
1357 gcc_assert (swap_insn);
1358
1359 /* Find the load. */
1360 insn_info = DF_INSN_INFO_GET (swap_insn);
1361 rtx_insn *load_insn = 0;
1362 FOR_EACH_INSN_INFO_USE (use, insn_info)
1363 {
1364 struct df_link *def_link = DF_REF_CHAIN (use);
1365 gcc_assert (def_link && !def_link->next);
1366 load_insn = DF_REF_INSN (def_link->ref);
1367 break;
1368 }
1369 gcc_assert (load_insn);
1370
1371 /* Find the TOC-relative symbol access. */
1372 insn_info = DF_INSN_INFO_GET (load_insn);
1373 rtx_insn *tocrel_insn = 0;
1374 FOR_EACH_INSN_INFO_USE (use, insn_info)
1375 {
1376 struct df_link *def_link = DF_REF_CHAIN (use);
1377 gcc_assert (def_link && !def_link->next);
1378 tocrel_insn = DF_REF_INSN (def_link->ref);
1379 break;
1380 }
1381 gcc_assert (tocrel_insn);
1382
1383 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1384 to set tocrel_base; otherwise it would be unnecessary as we've
1385 already established it will return true. */
1386 rtx base, offset;
1387 const_rtx tocrel_base;
1388 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1389 /* There is an extra level of indirection for small/large code models. */
1390 if (MEM_P (tocrel_expr))
1391 tocrel_expr = XEXP (tocrel_expr, 0);
1392 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1393 gcc_unreachable ();
1394 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1395 rtx const_vector = get_pool_constant (base);
1396 /* With the extra indirection, get_pool_constant will produce the
1397 real constant from the reg_equal expression, so get the real
1398 constant. */
1399 if (SYMBOL_REF_P (const_vector))
1400 const_vector = get_pool_constant (const_vector);
1401 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1402
1403 /* Create an adjusted mask from the initial mask. */
1404 unsigned int new_mask[16], i, val;
1405 for (i = 0; i < 16; ++i) {
1406 val = INTVAL (XVECEXP (const_vector, 0, i));
1407 if (val < 16)
1408 new_mask[i] = (val + 8) % 16;
1409 else
1410 new_mask[i] = ((val + 8) % 16) + 16;
1411 }
1412
1413 /* Create a new CONST_VECTOR and a MEM that references it. */
1414 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
1415 for (i = 0; i < 16; ++i)
1416 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
1417 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
1418 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
1419 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1420 can't recognize. Force the SYMBOL_REF into a register. */
1421 if (!REG_P (XEXP (new_mem, 0))) {
1422 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1423 XEXP (new_mem, 0) = base_reg;
1424 /* Move the newly created insn ahead of the load insn. */
1425 rtx_insn *force_insn = get_last_insn ();
1426 remove_insn (force_insn);
1427 rtx_insn *before_load_insn = PREV_INSN (load_insn);
1428 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1429 df_insn_rescan (before_load_insn);
1430 df_insn_rescan (force_insn);
1431 }
1432
1433 /* Replace the MEM in the load instruction and rescan it. */
1434 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1435 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
1436 df_insn_rescan (load_insn);
1437
1438 if (dump_file)
1439 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
1440 }
1441
1442 /* The insn described by INSN_ENTRY[I] can be swapped, but only
1443 with special handling. Take care of that here. */
1444 static void
1445 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
1446 {
1447 rtx_insn *insn = insn_entry[i].insn;
1448 rtx body = PATTERN (insn);
1449
1450 switch (insn_entry[i].special_handling)
1451 {
1452 default:
1453 gcc_unreachable ();
1454 case SH_CONST_VECTOR:
1455 {
1456 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
1457 gcc_assert (GET_CODE (body) == SET);
1458 swap_const_vector_halves (&SET_SRC (body));
1459 if (dump_file)
1460 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
1461 break;
1462 }
1463 case SH_SUBREG:
1464 /* A subreg of the same size is already safe. For subregs that
1465 select a smaller portion of a reg, adjust the index for
1466 swapped doublewords. */
1467 adjust_subreg_index (body);
1468 if (dump_file)
1469 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
1470 break;
1471 case SH_NOSWAP_LD:
1472 /* Convert a non-permuting load to a permuting one. */
1473 permute_load (insn);
1474 break;
1475 case SH_NOSWAP_ST:
1476 /* Convert a non-permuting store to a permuting one. */
1477 permute_store (insn);
1478 break;
1479 case SH_EXTRACT:
1480 /* Change the lane on an extract operation. */
1481 adjust_extract (insn);
1482 break;
1483 case SH_SPLAT:
1484 /* Change the lane on a direct-splat operation. */
1485 adjust_splat (insn);
1486 break;
1487 case SH_XXPERMDI:
1488 /* Change the lanes on an XXPERMDI operation. */
1489 adjust_xxpermdi (insn);
1490 break;
1491 case SH_CONCAT:
1492 /* Reverse the order of a concatenation operation. */
1493 adjust_concat (insn);
1494 break;
1495 case SH_VPERM:
1496 /* Change the mask loaded from the constant pool for a VPERM. */
1497 adjust_vperm (insn);
1498 break;
1499 }
1500 }
1501
1502 /* Find the insn from the Ith table entry, which is known to be a
1503 register swap Y = SWAP(X). Replace it with a copy Y = X. */
1504 static void
1505 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
1506 {
1507 rtx_insn *insn = insn_entry[i].insn;
1508 rtx body = PATTERN (insn);
1509 rtx src_reg = XEXP (SET_SRC (body), 0);
1510 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
1511 rtx_insn *new_insn = emit_insn_before (copy, insn);
1512 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
1513 df_insn_rescan (new_insn);
1514
1515 if (dump_file)
1516 {
1517 unsigned int new_uid = INSN_UID (new_insn);
1518 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
1519 }
1520
1521 df_insn_delete (insn);
1522 remove_insn (insn);
1523 insn->set_deleted ();
1524 }
1525
1526 /* INSN is known to contain a SUBREG, which we can normally handle,
1527 but if the SUBREG itself contains a MULT then we need to leave it alone
1528 to avoid turning a mult_hipart into a mult_lopart, for example. */
1529 static bool
1530 has_part_mult (rtx_insn *insn)
1531 {
1532 rtx body = PATTERN (insn);
1533 if (GET_CODE (body) != SET)
1534 return false;
1535 rtx src = SET_SRC (body);
1536 if (GET_CODE (src) != SUBREG)
1537 return false;
1538 rtx inner = XEXP (src, 0);
1539 return (GET_CODE (inner) == MULT);
1540 }
1541
1542 /* Make NEW_MEM_EXP's attributes and flags resemble those of
1543 ORIGINAL_MEM_EXP. */
1544 static void
1545 mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp)
1546 {
1547 RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump);
1548 RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call);
1549 RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging);
1550 RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil);
1551 RTX_FLAG (new_mem_exp, frame_related) =
1552 RTX_FLAG (original_mem_exp, frame_related);
1553
1554 /* The following fields may not be used with MEM subexpressions */
1555 RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct);
1556 RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val);
1557
1558 struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp);
1559
1560 alias_set_type set = original_attrs.alias;
1561 set_mem_alias_set (new_mem_exp, set);
1562
1563 addr_space_t addrspace = original_attrs.addrspace;
1564 set_mem_addr_space (new_mem_exp, addrspace);
1565
1566 unsigned int align = original_attrs.align;
1567 set_mem_align (new_mem_exp, align);
1568
1569 tree expr = original_attrs.expr;
1570 set_mem_expr (new_mem_exp, expr);
1571
1572 if (original_attrs.offset_known_p)
1573 {
1574 HOST_WIDE_INT offset = original_attrs.offset;
1575 set_mem_offset (new_mem_exp, offset);
1576 }
1577 else
1578 clear_mem_offset (new_mem_exp);
1579
1580 if (original_attrs.size_known_p)
1581 {
1582 HOST_WIDE_INT size = original_attrs.size;
1583 set_mem_size (new_mem_exp, size);
1584 }
1585 else
1586 clear_mem_size (new_mem_exp);
1587 }
1588
1589 /* Generate an rtx expression to represent use of the stvx insn to store
1590 the value represented by register SRC_EXP into the memory at address
1591 DEST_EXP, with vector mode MODE. */
1592 rtx
1593 rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1594 {
1595 rtx stvx;
1596
1597 if (mode == V16QImode)
1598 stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp);
1599 else if (mode == V8HImode)
1600 stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp);
1601 #ifdef HAVE_V8HFmode
1602 else if (mode == V8HFmode)
1603 stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp);
1604 #endif
1605 else if (mode == V4SImode)
1606 stvx = gen_altivec_stvx_v4si (src_exp, dest_exp);
1607 else if (mode == V4SFmode)
1608 stvx = gen_altivec_stvx_v4sf (src_exp, dest_exp);
1609 else if (mode == V2DImode)
1610 stvx = gen_altivec_stvx_v2di (src_exp, dest_exp);
1611 else if (mode == V2DFmode)
1612 stvx = gen_altivec_stvx_v2df (src_exp, dest_exp);
1613 else if (mode == V1TImode)
1614 stvx = gen_altivec_stvx_v1ti (src_exp, dest_exp);
1615 else
1616 /* KFmode, TFmode, other modes not expected in this context. */
1617 gcc_unreachable ();
1618
1619 rtx new_mem_exp = SET_DEST (PATTERN (stvx));
1620 mimic_memory_attributes_and_flags (new_mem_exp, dest_exp);
1621 return stvx;
1622 }
1623
1624 /* Given that STORE_INSN represents an aligned store-with-swap of a
1625 swapped value, replace the store with an aligned store (without
1626 swap) and replace the swap with a copy insn. */
1627 static void
1628 replace_swapped_aligned_store (swap_web_entry *insn_entry,
1629 rtx_insn *store_insn)
1630 {
1631 unsigned uid = INSN_UID (store_insn);
1632 gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store);
1633
1634 rtx body = PATTERN (store_insn);
1635 rtx dest_address = XEXP (SET_DEST (body), 0);
1636 rtx swap_reg = XEXP (SET_SRC (body), 0);
1637 gcc_assert (REG_P (dest_address)
1638 || rs6000_sum_of_two_registers_p (dest_address));
1639
1640 /* Find the swap instruction that provides the value to be stored by
1641 * this store-with-swap instruction. */
1642 struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn);
1643 df_ref use;
1644 rtx_insn *swap_insn = NULL;
1645 unsigned uid2 = 0;
1646 FOR_EACH_INSN_INFO_USE (use, insn_info)
1647 {
1648 struct df_link *def_link = DF_REF_CHAIN (use);
1649
1650 /* if this is not the definition of the candidate swap register,
1651 then skip it. I am only interested in the swap insnd. */
1652 if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
1653 continue;
1654
1655 /* If there is no def or the def is artifical or there are
1656 multiple defs, we should not be here. */
1657 gcc_assert (def_link && def_link->ref && !def_link->next
1658 && !DF_REF_IS_ARTIFICIAL (def_link->ref));
1659
1660 swap_insn = DF_REF_INSN (def_link->ref);
1661 uid2 = INSN_UID (swap_insn);
1662
1663 /* If this source value is not a simple swap, we should not be here. */
1664 gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load
1665 && !insn_entry[uid2].is_store);
1666
1667 /* We've processed the use we care about, so break out of
1668 this loop. */
1669 break;
1670 }
1671
1672 /* At this point, swap_insn and uid2 represent the swap instruction
1673 that feeds the store. */
1674 gcc_assert (swap_insn);
1675 rtx set = single_set (store_insn);
1676 gcc_assert (set);
1677 rtx dest_exp = SET_DEST (set);
1678 rtx src_exp = XEXP (SET_SRC (body), 0);
1679 enum machine_mode mode = GET_MODE (dest_exp);
1680 gcc_assert (MEM_P (dest_exp));
1681 gcc_assert (MEM_ALIGN (dest_exp) >= 128);
1682
1683 /* Replace the copy with a new insn. */
1684 rtx stvx;
1685 stvx = rs6000_gen_stvx (mode, dest_exp, src_exp);
1686
1687 rtx_insn *new_insn = emit_insn_before (stvx, store_insn);
1688 rtx new_body = PATTERN (new_insn);
1689
1690 gcc_assert ((GET_CODE (new_body) == SET)
1691 && MEM_P (SET_DEST (new_body)));
1692
1693 basic_block bb = BLOCK_FOR_INSN (store_insn);
1694 set_block_for_insn (new_insn, bb);
1695 /* Handle REG_EH_REGION note. */
1696 if (cfun->can_throw_non_call_exceptions && BB_END (bb) == store_insn)
1697 {
1698 rtx note = find_reg_note (store_insn, REG_EH_REGION, NULL_RTX);
1699 if (note)
1700 add_reg_note (new_insn, REG_EH_REGION, XEXP (note, 0));
1701 }
1702 df_insn_rescan (new_insn);
1703
1704 df_insn_delete (store_insn);
1705 remove_insn (store_insn);
1706 store_insn->set_deleted ();
1707
1708 /* Replace the swap with a copy. */
1709 uid2 = INSN_UID (swap_insn);
1710 mark_swaps_for_removal (insn_entry, uid2);
1711 replace_swap_with_copy (insn_entry, uid2);
1712 }
1713
1714 /* Generate an rtx expression to represent use of the lvx insn to load
1715 from memory SRC_EXP into register DEST_EXP with vector mode MODE. */
1716 rtx
1717 rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1718 {
1719 rtx lvx;
1720
1721 if (mode == V16QImode)
1722 lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp);
1723 else if (mode == V8HImode)
1724 lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp);
1725 #ifdef HAVE_V8HFmode
1726 else if (mode == V8HFmode)
1727 lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp);
1728 #endif
1729 else if (mode == V4SImode)
1730 lvx = gen_altivec_lvx_v4si (dest_exp, src_exp);
1731 else if (mode == V4SFmode)
1732 lvx = gen_altivec_lvx_v4sf (dest_exp, src_exp);
1733 else if (mode == V2DImode)
1734 lvx = gen_altivec_lvx_v2di (dest_exp, src_exp);
1735 else if (mode == V2DFmode)
1736 lvx = gen_altivec_lvx_v2df (dest_exp, src_exp);
1737 else if (mode == V1TImode)
1738 lvx = gen_altivec_lvx_v1ti (dest_exp, src_exp);
1739 else
1740 /* KFmode, TFmode, other modes not expected in this context. */
1741 gcc_unreachable ();
1742
1743 rtx new_mem_exp = SET_SRC (PATTERN (lvx));
1744 mimic_memory_attributes_and_flags (new_mem_exp, src_exp);
1745
1746 return lvx;
1747 }
1748
1749 /* Given that SWAP_INSN represents a swap of an aligned
1750 load-with-swap, replace the load with an aligned load (without
1751 swap) and replace the swap with a copy insn. */
1752 static void
1753 replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn)
1754 {
1755 /* Find the load. */
1756 unsigned uid = INSN_UID (swap_insn);
1757 /* Only call this if quad_aligned_load_p (swap_insn). */
1758 gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load);
1759 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1760
1761 /* Since insn is known to represent a swap instruction, we know it
1762 "uses" only one input variable. */
1763 df_ref use = DF_INSN_INFO_USES (insn_info);
1764
1765 /* Figure out where this input variable is defined. */
1766 struct df_link *def_link = DF_REF_CHAIN (use);
1767 gcc_assert (def_link && !def_link->next);
1768 gcc_assert (def_link && def_link->ref &&
1769 !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next);
1770
1771 rtx_insn *def_insn = DF_REF_INSN (def_link->ref);
1772 unsigned uid2 = INSN_UID (def_insn);
1773
1774 /* We're expecting a load-with-swap insn. */
1775 gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap);
1776
1777 /* We expect this to be a set to memory, with source representing a
1778 swap (indicated by code VEC_SELECT). */
1779 rtx body = PATTERN (def_insn);
1780 gcc_assert ((GET_CODE (body) == SET)
1781 && (GET_CODE (SET_SRC (body)) == VEC_SELECT
1782 || pattern_is_rotate64 (body))
1783 && MEM_P (XEXP (SET_SRC (body), 0)));
1784
1785 rtx src_exp = XEXP (SET_SRC (body), 0);
1786 enum machine_mode mode = GET_MODE (src_exp);
1787 rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp);
1788
1789 rtx_insn *new_insn = emit_insn_before (lvx, def_insn);
1790 rtx new_body = PATTERN (new_insn);
1791
1792 gcc_assert ((GET_CODE (new_body) == SET)
1793 && MEM_P (SET_SRC (new_body)));
1794
1795 basic_block bb = BLOCK_FOR_INSN (def_insn);
1796 set_block_for_insn (new_insn, bb);
1797 /* Handle REG_EH_REGION note. */
1798 if (cfun->can_throw_non_call_exceptions && BB_END (bb) == def_insn)
1799 {
1800 rtx note = find_reg_note (def_insn, REG_EH_REGION, NULL_RTX);
1801 if (note)
1802 add_reg_note (new_insn, REG_EH_REGION, XEXP (note, 0));
1803 }
1804 df_insn_rescan (new_insn);
1805
1806 df_insn_delete (def_insn);
1807 remove_insn (def_insn);
1808 def_insn->set_deleted ();
1809
1810 /* Replace the swap with a copy. */
1811 mark_swaps_for_removal (insn_entry, uid);
1812 replace_swap_with_copy (insn_entry, uid);
1813 }
1814
1815 /* Given that SWAP_INSN represents a swap of a load of a constant
1816 vector value, replace with a single instruction that loads a
1817 swapped variant of the original constant.
1818
1819 The "natural" representation of a byte array in memory is the same
1820 for big endian and little endian.
1821
1822 unsigned char byte_array[] =
1823 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f };
1824
1825 However, when loaded into a vector register, the representation
1826 depends on endian conventions.
1827
1828 In big-endian mode, the register holds:
1829
1830 MSB LSB
1831 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1832
1833 In little-endian mode, the register holds:
1834
1835 MSB LSB
1836 [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]
1837
1838 Word arrays require different handling. Consider the word array:
1839
1840 unsigned int word_array[] =
1841 { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f };
1842
1843 The in-memory representation depends on endian configuration. The
1844 equivalent array, declared as a byte array, in memory would be:
1845
1846 unsigned char big_endian_word_array_data[] =
1847 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }
1848
1849 unsigned char little_endian_word_array_data[] =
1850 { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c }
1851
1852 In big-endian mode, the register holds:
1853
1854 MSB LSB
1855 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1856
1857 In little-endian mode, the register holds:
1858
1859 MSB LSB
1860 [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ]
1861
1862
1863 Similar transformations apply to the vector of half-word and vector
1864 of double-word representations.
1865
1866 For now, don't handle vectors of quad-precision values. Just return.
1867 A better solution is to fix the code generator to emit lvx/stvx for
1868 those. */
1869 static void
1870 replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn)
1871 {
1872 /* Find the load. */
1873 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1874 rtx_insn *load_insn;
1875 df_ref use = DF_INSN_INFO_USES (insn_info);
1876 struct df_link *def_link = DF_REF_CHAIN (use);
1877 gcc_assert (def_link && !def_link->next);
1878
1879 load_insn = DF_REF_INSN (def_link->ref);
1880 gcc_assert (load_insn);
1881
1882 /* Find the TOC-relative symbol access. */
1883 insn_info = DF_INSN_INFO_GET (load_insn);
1884 use = DF_INSN_INFO_USES (insn_info);
1885
1886 def_link = DF_REF_CHAIN (use);
1887 gcc_assert (def_link && !def_link->next);
1888
1889 rtx_insn *tocrel_insn = DF_REF_INSN (def_link->ref);
1890 gcc_assert (tocrel_insn);
1891
1892 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1893 to set tocrel_base; otherwise it would be unnecessary as we've
1894 already established it will return true. */
1895 rtx base, offset;
1896 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1897 const_rtx tocrel_base;
1898
1899 /* There is an extra level of indirection for small/large code models. */
1900 if (MEM_P (tocrel_expr))
1901 tocrel_expr = XEXP (tocrel_expr, 0);
1902
1903 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1904 gcc_unreachable ();
1905
1906 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1907 rtx const_vector = get_pool_constant (base);
1908
1909 /* With the extra indirection, get_pool_constant will produce the
1910 real constant from the reg_equal expression, so get the real
1911 constant. */
1912 if (SYMBOL_REF_P (const_vector))
1913 const_vector = get_pool_constant (const_vector);
1914 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1915
1916 rtx new_mem;
1917 enum machine_mode mode = GET_MODE (const_vector);
1918
1919 /* Create an adjusted constant from the original constant. */
1920 if (mode == V1TImode)
1921 /* Leave this code as is. */
1922 return;
1923 else if (mode == V16QImode)
1924 {
1925 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (16));
1926 int i;
1927
1928 for (i = 0; i < 16; i++)
1929 XVECEXP (vals, 0, ((i+8) % 16)) = XVECEXP (const_vector, 0, i);
1930 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1931 new_mem = force_const_mem (mode, new_const_vector);
1932 }
1933 else if ((mode == V8HImode)
1934 #ifdef HAVE_V8HFmode
1935 || (mode == V8HFmode)
1936 #endif
1937 )
1938 {
1939 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8));
1940 int i;
1941
1942 for (i = 0; i < 8; i++)
1943 XVECEXP (vals, 0, ((i+4) % 8)) = XVECEXP (const_vector, 0, i);
1944 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1945 new_mem = force_const_mem (mode, new_const_vector);
1946 }
1947 else if ((mode == V4SImode) || (mode == V4SFmode))
1948 {
1949 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (4));
1950 int i;
1951
1952 for (i = 0; i < 4; i++)
1953 XVECEXP (vals, 0, ((i+2) % 4)) = XVECEXP (const_vector, 0, i);
1954 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1955 new_mem = force_const_mem (mode, new_const_vector);
1956 }
1957 else if ((mode == V2DImode) || (mode == V2DFmode))
1958 {
1959 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (2));
1960 int i;
1961
1962 for (i = 0; i < 2; i++)
1963 XVECEXP (vals, 0, ((i+1) % 2)) = XVECEXP (const_vector, 0, i);
1964 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1965 new_mem = force_const_mem (mode, new_const_vector);
1966 }
1967 else
1968 {
1969 /* We do not expect other modes to be constant-load-swapped. */
1970 gcc_unreachable ();
1971 }
1972
1973 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1974 can't recognize. Force the SYMBOL_REF into a register. */
1975 if (!REG_P (XEXP (new_mem, 0))) {
1976 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1977 XEXP (new_mem, 0) = base_reg;
1978
1979 /* Move the newly created insn ahead of the load insn. */
1980 /* The last insn is the insn that forced new_mem into a register. */
1981 rtx_insn *force_insn = get_last_insn ();
1982 /* Remove this insn from the end of the instruction sequence. */
1983 remove_insn (force_insn);
1984 rtx_insn *before_load_insn = PREV_INSN (load_insn);
1985
1986 /* And insert this insn back into the sequence before the previous
1987 load insn so this new expression will be available when the
1988 existing load is modified to load the swapped constant. */
1989 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1990 df_insn_rescan (before_load_insn);
1991 df_insn_rescan (force_insn);
1992 }
1993
1994 /* Replace the MEM in the load instruction and rescan it. */
1995 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1996 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
1997 df_insn_rescan (load_insn);
1998
1999 unsigned int uid = INSN_UID (swap_insn);
2000 mark_swaps_for_removal (insn_entry, uid);
2001 replace_swap_with_copy (insn_entry, uid);
2002 }
2003
2004 /* Dump the swap table to DUMP_FILE. */
2005 static void
2006 dump_swap_insn_table (swap_web_entry *insn_entry)
2007 {
2008 int e = get_max_uid ();
2009 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
2010
2011 for (int i = 0; i < e; ++i)
2012 if (insn_entry[i].is_relevant)
2013 {
2014 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
2015 fprintf (dump_file, "%6d %6d ", i,
2016 pred_entry && pred_entry->insn
2017 ? INSN_UID (pred_entry->insn) : 0);
2018 if (insn_entry[i].is_load)
2019 fputs ("load ", dump_file);
2020 if (insn_entry[i].is_store)
2021 fputs ("store ", dump_file);
2022 if (insn_entry[i].is_swap)
2023 fputs ("swap ", dump_file);
2024 if (insn_entry[i].is_live_in)
2025 fputs ("live-in ", dump_file);
2026 if (insn_entry[i].is_live_out)
2027 fputs ("live-out ", dump_file);
2028 if (insn_entry[i].contains_subreg)
2029 fputs ("subreg ", dump_file);
2030 if (insn_entry[i].is_128_int)
2031 fputs ("int128 ", dump_file);
2032 if (insn_entry[i].is_call)
2033 fputs ("call ", dump_file);
2034 if (insn_entry[i].is_swappable)
2035 {
2036 fputs ("swappable ", dump_file);
2037 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
2038 fputs ("special:constvec ", dump_file);
2039 else if (insn_entry[i].special_handling == SH_SUBREG)
2040 fputs ("special:subreg ", dump_file);
2041 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
2042 fputs ("special:load ", dump_file);
2043 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
2044 fputs ("special:store ", dump_file);
2045 else if (insn_entry[i].special_handling == SH_EXTRACT)
2046 fputs ("special:extract ", dump_file);
2047 else if (insn_entry[i].special_handling == SH_SPLAT)
2048 fputs ("special:splat ", dump_file);
2049 else if (insn_entry[i].special_handling == SH_XXPERMDI)
2050 fputs ("special:xxpermdi ", dump_file);
2051 else if (insn_entry[i].special_handling == SH_CONCAT)
2052 fputs ("special:concat ", dump_file);
2053 else if (insn_entry[i].special_handling == SH_VPERM)
2054 fputs ("special:vperm ", dump_file);
2055 }
2056 if (insn_entry[i].web_not_optimizable)
2057 fputs ("unoptimizable ", dump_file);
2058 if (insn_entry[i].will_delete)
2059 fputs ("delete ", dump_file);
2060 fputs ("\n", dump_file);
2061 }
2062 fputs ("\n", dump_file);
2063 }
2064
2065 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
2066 Here RTX is an (& addr (const_int -16)). Always return a new copy
2067 to avoid problems with combine. */
2068 static rtx
2069 alignment_with_canonical_addr (rtx align)
2070 {
2071 rtx canon;
2072 rtx addr = XEXP (align, 0);
2073
2074 if (REG_P (addr))
2075 canon = addr;
2076
2077 else if (GET_CODE (addr) == PLUS)
2078 {
2079 rtx addrop0 = XEXP (addr, 0);
2080 rtx addrop1 = XEXP (addr, 1);
2081
2082 if (!REG_P (addrop0))
2083 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
2084
2085 if (!REG_P (addrop1))
2086 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
2087
2088 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
2089 }
2090
2091 else
2092 canon = force_reg (GET_MODE (addr), addr);
2093
2094 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
2095 }
2096
2097 /* Check whether an rtx is an alignment mask, and if so, return
2098 a fully-expanded rtx for the masking operation. */
2099 static rtx
2100 alignment_mask (rtx_insn *insn)
2101 {
2102 rtx body = PATTERN (insn);
2103
2104 if (GET_CODE (body) != SET
2105 || GET_CODE (SET_SRC (body)) != AND
2106 || !REG_P (XEXP (SET_SRC (body), 0)))
2107 return 0;
2108
2109 rtx mask = XEXP (SET_SRC (body), 1);
2110
2111 if (CONST_INT_P (mask))
2112 {
2113 if (INTVAL (mask) == -16)
2114 return alignment_with_canonical_addr (SET_SRC (body));
2115 else
2116 return 0;
2117 }
2118
2119 if (!REG_P (mask))
2120 return 0;
2121
2122 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2123 df_ref use;
2124 rtx real_mask = 0;
2125
2126 FOR_EACH_INSN_INFO_USE (use, insn_info)
2127 {
2128 if (!rtx_equal_p (DF_REF_REG (use), mask))
2129 continue;
2130
2131 struct df_link *def_link = DF_REF_CHAIN (use);
2132 if (!def_link || def_link->next)
2133 return 0;
2134
2135 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
2136 rtx const_body = PATTERN (const_insn);
2137 if (GET_CODE (const_body) != SET)
2138 return 0;
2139
2140 real_mask = SET_SRC (const_body);
2141
2142 if (!CONST_INT_P (real_mask)
2143 || INTVAL (real_mask) != -16)
2144 return 0;
2145 }
2146
2147 if (real_mask == 0)
2148 return 0;
2149
2150 return alignment_with_canonical_addr (SET_SRC (body));
2151 }
2152
2153 /* Given INSN that's a load or store based at BASE_REG, check if
2154 all of its feeding computations align its address on a 16-byte
2155 boundary. If so, return true and add all definition insns into
2156 AND_INSNS and their corresponding fully-expanded rtxes for the
2157 masking operations into AND_OPS. */
2158
2159 static bool
2160 find_alignment_op (rtx_insn *insn, rtx base_reg, vec<rtx_insn *> *and_insns,
2161 vec<rtx> *and_ops)
2162 {
2163 df_ref base_use;
2164 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2165 rtx and_operation = 0;
2166
2167 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
2168 {
2169 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
2170 continue;
2171
2172 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
2173 if (!base_def_link)
2174 return false;
2175
2176 while (base_def_link)
2177 {
2178 /* With stack-protector code enabled, and possibly in other
2179 circumstances, there may not be an associated insn for
2180 the def. */
2181 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
2182 return false;
2183
2184 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
2185 and_operation = alignment_mask (and_insn);
2186
2187 /* Stop if we find any one which doesn't align. */
2188 if (!and_operation)
2189 return false;
2190
2191 and_insns->safe_push (and_insn);
2192 and_ops->safe_push (and_operation);
2193 base_def_link = base_def_link->next;
2194 }
2195 }
2196
2197 return and_operation;
2198 }
2199
2200 struct del_info { bool replace; rtx_insn *replace_insn; };
2201
2202 /* If INSN is the load for an lvx pattern, put it in canonical form. */
2203 static void
2204 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
2205 {
2206 rtx body = PATTERN (insn);
2207 gcc_assert (GET_CODE (body) == SET
2208 && (GET_CODE (SET_SRC (body)) == VEC_SELECT
2209 || pattern_is_rotate64 (body))
2210 && MEM_P (XEXP (SET_SRC (body), 0)));
2211
2212 rtx mem = XEXP (SET_SRC (body), 0);
2213 rtx base_reg = XEXP (mem, 0);
2214
2215 auto_vec<rtx_insn *> and_insns;
2216 auto_vec<rtx> and_ops;
2217 bool is_any_def_and
2218 = find_alignment_op (insn, base_reg, &and_insns, &and_ops);
2219
2220 if (is_any_def_and)
2221 {
2222 gcc_assert (and_insns.length () == and_ops.length ());
2223 df_ref def;
2224 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2225 FOR_EACH_INSN_INFO_DEF (def, insn_info)
2226 {
2227 struct df_link *link = DF_REF_CHAIN (def);
2228 if (!link || link->next)
2229 break;
2230
2231 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2232 if (!insn_is_swap_p (swap_insn)
2233 || insn_is_load_p (swap_insn)
2234 || insn_is_store_p (swap_insn))
2235 break;
2236
2237 /* Expected lvx pattern found. Change the swap to
2238 a copy, and propagate the AND operation into the
2239 load. */
2240 to_delete[INSN_UID (swap_insn)].replace = true;
2241 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2242
2243 rtx new_reg = 0;
2244 rtx and_mask = 0;
2245 for (unsigned i = 0; i < and_insns.length (); i++)
2246 {
2247 /* However, first we must be sure that we make the
2248 base register from the AND operation available
2249 in case the register has been overwritten. Copy
2250 the base register to a new pseudo and use that
2251 as the base register of the AND operation in
2252 the new LVX instruction. */
2253 rtx_insn *and_insn = and_insns[i];
2254 rtx and_op = and_ops[i];
2255 rtx and_base = XEXP (and_op, 0);
2256 if (!new_reg)
2257 {
2258 new_reg = gen_reg_rtx (GET_MODE (and_base));
2259 and_mask = XEXP (and_op, 1);
2260 }
2261 rtx copy = gen_rtx_SET (new_reg, and_base);
2262 rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2263 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2264 df_insn_rescan (new_insn);
2265 }
2266
2267 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask);
2268 SET_SRC (body) = mem;
2269 INSN_CODE (insn) = -1; /* Force re-recognition. */
2270 df_insn_rescan (insn);
2271
2272 if (dump_file)
2273 fprintf (dump_file, "lvx opportunity found at %d\n",
2274 INSN_UID (insn));
2275 }
2276 }
2277 }
2278
2279 /* If INSN is the store for an stvx pattern, put it in canonical form. */
2280 static void
2281 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
2282 {
2283 rtx body = PATTERN (insn);
2284 gcc_assert (GET_CODE (body) == SET
2285 && MEM_P (SET_DEST (body))
2286 && (GET_CODE (SET_SRC (body)) == VEC_SELECT
2287 || pattern_is_rotate64 (body)));
2288 rtx mem = SET_DEST (body);
2289 rtx base_reg = XEXP (mem, 0);
2290
2291 auto_vec<rtx_insn *> and_insns;
2292 auto_vec<rtx> and_ops;
2293 bool is_any_def_and
2294 = find_alignment_op (insn, base_reg, &and_insns, &and_ops);
2295
2296 if (is_any_def_and)
2297 {
2298 gcc_assert (and_insns.length () == and_ops.length ());
2299 rtx src_reg = XEXP (SET_SRC (body), 0);
2300 df_ref src_use;
2301 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2302 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
2303 {
2304 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
2305 continue;
2306
2307 struct df_link *link = DF_REF_CHAIN (src_use);
2308 if (!link || link->next)
2309 break;
2310
2311 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2312 if (!insn_is_swap_p (swap_insn)
2313 || insn_is_load_p (swap_insn)
2314 || insn_is_store_p (swap_insn))
2315 break;
2316
2317 /* Expected stvx pattern found. Change the swap to
2318 a copy, and propagate the AND operation into the
2319 store. */
2320 to_delete[INSN_UID (swap_insn)].replace = true;
2321 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2322
2323 rtx new_reg = 0;
2324 rtx and_mask = 0;
2325 for (unsigned i = 0; i < and_insns.length (); i++)
2326 {
2327 /* However, first we must be sure that we make the
2328 base register from the AND operation available
2329 in case the register has been overwritten. Copy
2330 the base register to a new pseudo and use that
2331 as the base register of the AND operation in
2332 the new STVX instruction. */
2333 rtx_insn *and_insn = and_insns[i];
2334 rtx and_op = and_ops[i];
2335 rtx and_base = XEXP (and_op, 0);
2336 if (!new_reg)
2337 {
2338 new_reg = gen_reg_rtx (GET_MODE (and_base));
2339 and_mask = XEXP (and_op, 1);
2340 }
2341 rtx copy = gen_rtx_SET (new_reg, and_base);
2342 rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2343 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2344 df_insn_rescan (new_insn);
2345 }
2346
2347 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask);
2348 SET_SRC (body) = src_reg;
2349 INSN_CODE (insn) = -1; /* Force re-recognition. */
2350 df_insn_rescan (insn);
2351
2352 if (dump_file)
2353 fprintf (dump_file, "stvx opportunity found at %d\n",
2354 INSN_UID (insn));
2355 }
2356 }
2357 }
2358
2359 /* Look for patterns created from builtin lvx and stvx calls, and
2360 canonicalize them to be properly recognized as such. */
2361 static void
2362 recombine_lvx_stvx_patterns (function *fun)
2363 {
2364 int i;
2365 basic_block bb;
2366 rtx_insn *insn;
2367
2368 int num_insns = get_max_uid ();
2369 del_info *to_delete = XCNEWVEC (del_info, num_insns);
2370
2371 FOR_ALL_BB_FN (bb, fun)
2372 FOR_BB_INSNS (bb, insn)
2373 {
2374 if (!NONDEBUG_INSN_P (insn))
2375 continue;
2376
2377 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
2378 recombine_lvx_pattern (insn, to_delete);
2379 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
2380 recombine_stvx_pattern (insn, to_delete);
2381 }
2382
2383 /* Turning swaps into copies is delayed until now, to avoid problems
2384 with deleting instructions during the insn walk. */
2385 for (i = 0; i < num_insns; i++)
2386 if (to_delete[i].replace)
2387 {
2388 rtx swap_body = PATTERN (to_delete[i].replace_insn);
2389 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
2390 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
2391 rtx_insn *new_insn = emit_insn_before (copy,
2392 to_delete[i].replace_insn);
2393 set_block_for_insn (new_insn,
2394 BLOCK_FOR_INSN (to_delete[i].replace_insn));
2395 df_insn_rescan (new_insn);
2396 df_insn_delete (to_delete[i].replace_insn);
2397 remove_insn (to_delete[i].replace_insn);
2398 to_delete[i].replace_insn->set_deleted ();
2399 }
2400
2401 free (to_delete);
2402 }
2403
2404 /* Main entry point for this pass. */
2405 unsigned int
2406 rs6000_analyze_swaps (function *fun)
2407 {
2408 swap_web_entry *insn_entry;
2409 basic_block bb;
2410 rtx_insn *insn, *curr_insn = 0;
2411
2412 /* Dataflow analysis for use-def chains. */
2413 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2414 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2415 df_analyze ();
2416 df_set_flags (DF_DEFER_INSN_RESCAN);
2417
2418 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
2419 recombine_lvx_stvx_patterns (fun);
2420
2421 /* Rebuild ud- and du-chains. */
2422 df_remove_problem (df_chain);
2423 df_process_deferred_rescans ();
2424 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2425 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2426 df_analyze ();
2427 df_set_flags (DF_DEFER_INSN_RESCAN);
2428
2429 /* Allocate structure to represent webs of insns. */
2430 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2431
2432 /* Walk the insns to gather basic data. */
2433 FOR_ALL_BB_FN (bb, fun)
2434 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2435 {
2436 unsigned int uid = INSN_UID (insn);
2437 if (NONDEBUG_INSN_P (insn))
2438 {
2439 insn_entry[uid].insn = insn;
2440
2441 if (GET_CODE (insn) == CALL_INSN)
2442 insn_entry[uid].is_call = 1;
2443
2444 /* Walk the uses and defs to see if we mention vector regs.
2445 Record any constraints on optimization of such mentions. */
2446 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2447 df_ref mention;
2448 FOR_EACH_INSN_INFO_USE (mention, insn_info)
2449 {
2450 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2451 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2452
2453 /* If a use gets its value from a call insn, it will be
2454 a hard register and will look like (reg:V4SI 3 3).
2455 The df analysis creates two mentions for GPR3 and GPR4,
2456 both DImode. We must recognize this and treat it as a
2457 vector mention to ensure the call is unioned with this
2458 use. */
2459 if (mode == DImode && DF_REF_INSN_INFO (mention))
2460 {
2461 rtx feeder = DF_REF_INSN (mention);
2462 /* FIXME: It is pretty hard to get from the df mention
2463 to the mode of the use in the insn. We arbitrarily
2464 pick a vector mode here, even though the use might
2465 be a real DImode. We can be too conservative
2466 (create a web larger than necessary) because of
2467 this, so consider eventually fixing this. */
2468 if (GET_CODE (feeder) == CALL_INSN)
2469 mode = V4SImode;
2470 }
2471
2472 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2473 {
2474 insn_entry[uid].is_relevant = 1;
2475 if (mode == TImode || mode == V1TImode
2476 || FLOAT128_VECTOR_P (mode))
2477 insn_entry[uid].is_128_int = 1;
2478 if (DF_REF_INSN_INFO (mention))
2479 insn_entry[uid].contains_subreg
2480 = !rtx_equal_p (DF_REF_REG (mention),
2481 DF_REF_REAL_REG (mention));
2482 union_defs (insn_entry, insn, mention);
2483 }
2484 }
2485 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
2486 {
2487 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2488 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2489
2490 /* If we're loading up a hard vector register for a call,
2491 it looks like (set (reg:V4SI 9 9) (...)). The df
2492 analysis creates two mentions for GPR9 and GPR10, both
2493 DImode. So relying on the mode from the mentions
2494 isn't sufficient to ensure we union the call into the
2495 web with the parameter setup code. */
2496 if (mode == DImode && GET_CODE (insn) == SET
2497 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
2498 mode = GET_MODE (SET_DEST (insn));
2499
2500 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2501 {
2502 insn_entry[uid].is_relevant = 1;
2503 if (mode == TImode || mode == V1TImode
2504 || FLOAT128_VECTOR_P (mode))
2505 insn_entry[uid].is_128_int = 1;
2506 if (DF_REF_INSN_INFO (mention))
2507 insn_entry[uid].contains_subreg
2508 = !rtx_equal_p (DF_REF_REG (mention),
2509 DF_REF_REAL_REG (mention));
2510 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
2511 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
2512 insn_entry[uid].is_live_out = 1;
2513 union_uses (insn_entry, insn, mention);
2514 }
2515 }
2516
2517 if (insn_entry[uid].is_relevant)
2518 {
2519 /* Determine if this is a load or store. */
2520 insn_entry[uid].is_load = insn_is_load_p (insn);
2521 insn_entry[uid].is_store = insn_is_store_p (insn);
2522
2523 /* Determine if this is a doubleword swap. If not,
2524 determine whether it can legally be swapped. */
2525 if (insn_is_swap_p (insn))
2526 insn_entry[uid].is_swap = 1;
2527 else
2528 {
2529 unsigned int special = SH_NONE;
2530 insn_entry[uid].is_swappable
2531 = insn_is_swappable_p (insn_entry, insn, &special);
2532 if (special != SH_NONE && insn_entry[uid].contains_subreg)
2533 insn_entry[uid].is_swappable = 0;
2534 else if (special != SH_NONE)
2535 insn_entry[uid].special_handling = special;
2536 else if (insn_entry[uid].contains_subreg
2537 && has_part_mult (insn))
2538 insn_entry[uid].is_swappable = 0;
2539 else if (insn_entry[uid].contains_subreg)
2540 insn_entry[uid].special_handling = SH_SUBREG;
2541 }
2542 }
2543 }
2544 }
2545
2546 if (dump_file)
2547 {
2548 fprintf (dump_file, "\nSwap insn entry table when first built\n");
2549 dump_swap_insn_table (insn_entry);
2550 }
2551
2552 /* Record unoptimizable webs. */
2553 unsigned e = get_max_uid (), i;
2554 for (i = 0; i < e; ++i)
2555 {
2556 if (!insn_entry[i].is_relevant)
2557 continue;
2558
2559 swap_web_entry *root
2560 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
2561
2562 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
2563 || (insn_entry[i].contains_subreg
2564 && insn_entry[i].special_handling != SH_SUBREG)
2565 || insn_entry[i].is_128_int || insn_entry[i].is_call
2566 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
2567 root->web_not_optimizable = 1;
2568
2569 /* If we have loads or stores that aren't permuting then the
2570 optimization isn't appropriate. */
2571 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
2572 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
2573 root->web_not_optimizable = 1;
2574
2575 /* If we have a swap that is both fed by a permuting load
2576 and a feeder of a permuting store, then the optimization
2577 isn't appropriate. (Consider vec_xl followed by vec_xst_be.) */
2578 else if (insn_entry[i].is_swap && !insn_entry[i].is_load
2579 && !insn_entry[i].is_store
2580 && swap_feeds_both_load_and_store (&insn_entry[i]))
2581 root->web_not_optimizable = 1;
2582
2583 /* If we have permuting loads or stores that are not accompanied
2584 by a register swap, the optimization isn't appropriate. */
2585 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
2586 {
2587 rtx insn = insn_entry[i].insn;
2588 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2589 df_ref def;
2590
2591 FOR_EACH_INSN_INFO_DEF (def, insn_info)
2592 {
2593 struct df_link *link = DF_REF_CHAIN (def);
2594
2595 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
2596 {
2597 root->web_not_optimizable = 1;
2598 break;
2599 }
2600 }
2601 }
2602 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
2603 {
2604 rtx insn = insn_entry[i].insn;
2605 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2606 df_ref use;
2607
2608 FOR_EACH_INSN_INFO_USE (use, insn_info)
2609 {
2610 struct df_link *link = DF_REF_CHAIN (use);
2611
2612 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
2613 {
2614 root->web_not_optimizable = 1;
2615 break;
2616 }
2617 }
2618 }
2619 }
2620
2621 if (dump_file)
2622 {
2623 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
2624 dump_swap_insn_table (insn_entry);
2625 }
2626
2627 /* For each load and store in an optimizable web (which implies
2628 the loads and stores are permuting), find the associated
2629 register swaps and mark them for removal. Due to various
2630 optimizations we may mark the same swap more than once. Also
2631 perform special handling for swappable insns that require it. */
2632 for (i = 0; i < e; ++i)
2633 if ((insn_entry[i].is_load || insn_entry[i].is_store)
2634 && insn_entry[i].is_swap)
2635 {
2636 swap_web_entry* root_entry
2637 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2638 if (!root_entry->web_not_optimizable)
2639 mark_swaps_for_removal (insn_entry, i);
2640 }
2641 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
2642 {
2643 swap_web_entry* root_entry
2644 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2645 if (!root_entry->web_not_optimizable)
2646 handle_special_swappables (insn_entry, i);
2647 }
2648
2649 /* Now delete the swaps marked for removal. */
2650 for (i = 0; i < e; ++i)
2651 if (insn_entry[i].will_delete)
2652 replace_swap_with_copy (insn_entry, i);
2653
2654 /* Clean up. */
2655 free (insn_entry);
2656
2657 /* Use a second pass over rtl to detect that certain vector values
2658 fetched from or stored to memory on quad-word aligned addresses
2659 can use lvx/stvx without swaps. */
2660
2661 /* First, rebuild ud chains. */
2662 df_remove_problem (df_chain);
2663 df_process_deferred_rescans ();
2664 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2665 df_chain_add_problem (DF_UD_CHAIN);
2666 df_analyze ();
2667
2668 swap_web_entry *pass2_insn_entry;
2669 pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2670
2671 /* Walk the insns to gather basic data. */
2672 FOR_ALL_BB_FN (bb, fun)
2673 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2674 {
2675 unsigned int uid = INSN_UID (insn);
2676 if (NONDEBUG_INSN_P (insn))
2677 {
2678 pass2_insn_entry[uid].insn = insn;
2679
2680 pass2_insn_entry[uid].is_relevant = 1;
2681 pass2_insn_entry[uid].is_load = insn_is_load_p (insn);
2682 pass2_insn_entry[uid].is_store = insn_is_store_p (insn);
2683
2684 /* Determine if this is a doubleword swap. If not,
2685 determine whether it can legally be swapped. */
2686 if (insn_is_swap_p (insn))
2687 pass2_insn_entry[uid].is_swap = 1;
2688 }
2689 }
2690
2691 e = get_max_uid ();
2692 for (unsigned i = 0; i < e; ++i)
2693 if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load
2694 && !pass2_insn_entry[i].is_store)
2695 {
2696 /* Replace swap of aligned load-swap with aligned unswapped
2697 load. */
2698 rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2699 if (quad_aligned_load_p (pass2_insn_entry, rtx_insn))
2700 replace_swapped_aligned_load (pass2_insn_entry, rtx_insn);
2701 }
2702 else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store)
2703 {
2704 /* Replace aligned store-swap of swapped value with aligned
2705 unswapped store. */
2706 rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2707 if (quad_aligned_store_p (pass2_insn_entry, rtx_insn))
2708 replace_swapped_aligned_store (pass2_insn_entry, rtx_insn);
2709 }
2710
2711 /* Clean up. */
2712 free (pass2_insn_entry);
2713
2714 /* Use a third pass over rtl to replace swap(load(vector constant))
2715 with load(swapped vector constant). */
2716
2717 /* First, rebuild ud chains. */
2718 df_remove_problem (df_chain);
2719 df_process_deferred_rescans ();
2720 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2721 df_chain_add_problem (DF_UD_CHAIN);
2722 df_analyze ();
2723
2724 swap_web_entry *pass3_insn_entry;
2725 pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2726
2727 /* Walk the insns to gather basic data. */
2728 FOR_ALL_BB_FN (bb, fun)
2729 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2730 {
2731 unsigned int uid = INSN_UID (insn);
2732 if (NONDEBUG_INSN_P (insn))
2733 {
2734 pass3_insn_entry[uid].insn = insn;
2735
2736 pass3_insn_entry[uid].is_relevant = 1;
2737 pass3_insn_entry[uid].is_load = insn_is_load_p (insn);
2738 pass3_insn_entry[uid].is_store = insn_is_store_p (insn);
2739
2740 /* Determine if this is a doubleword swap. If not,
2741 determine whether it can legally be swapped. */
2742 if (insn_is_swap_p (insn))
2743 pass3_insn_entry[uid].is_swap = 1;
2744 }
2745 }
2746
2747 e = get_max_uid ();
2748 for (unsigned i = 0; i < e; ++i)
2749 if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load
2750 && !pass3_insn_entry[i].is_store)
2751 {
2752 insn = pass3_insn_entry[i].insn;
2753 if (const_load_sequence_p (pass3_insn_entry, insn))
2754 replace_swapped_load_constant (pass3_insn_entry, insn);
2755 }
2756
2757 /* Clean up. */
2758 free (pass3_insn_entry);
2759 return 0;
2760 }
2761
2762 const pass_data pass_data_analyze_swaps =
2763 {
2764 RTL_PASS, /* type */
2765 "swaps", /* name */
2766 OPTGROUP_NONE, /* optinfo_flags */
2767 TV_NONE, /* tv_id */
2768 0, /* properties_required */
2769 0, /* properties_provided */
2770 0, /* properties_destroyed */
2771 0, /* todo_flags_start */
2772 TODO_df_finish, /* todo_flags_finish */
2773 };
2774
2775 class pass_analyze_swaps : public rtl_opt_pass
2776 {
2777 public:
2778 pass_analyze_swaps(gcc::context *ctxt)
2779 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
2780 {}
2781
2782 /* opt_pass methods: */
2783 virtual bool gate (function *)
2784 {
2785 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
2786 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
2787 }
2788
2789 virtual unsigned int execute (function *fun)
2790 {
2791 return rs6000_analyze_swaps (fun);
2792 }
2793
2794 opt_pass *clone ()
2795 {
2796 return new pass_analyze_swaps (m_ctxt);
2797 }
2798
2799 }; // class pass_analyze_swaps
2800
2801 rtl_opt_pass *
2802 make_pass_analyze_swaps (gcc::context *ctxt)
2803 {
2804 return new pass_analyze_swaps (ctxt);
2805 }
2806