1 /* Subroutines used to remove unnecessary doubleword swaps
2 for p8 little-endian VSX code.
3 Copyright (C) 1991-2022 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
33 #include "print-tree.h"
38 #include "tree-pass.h"
39 #include "rtx-vector-builder.h"
41 /* Analyze vector computations and remove unnecessary doubleword
42 swaps (xxswapdi instructions). This pass is performed only
43 for little-endian VSX code generation.
45 For this specific case, loads and stores of 4x32 and 2x64 vectors
46 are inefficient. These are implemented using the lvx2dx and
47 stvx2dx instructions, which invert the order of doublewords in
48 a vector register. Thus the code generation inserts an xxswapdi
49 after each such load, and prior to each such store. (For spill
50 code after register assignment, an additional xxswapdi is inserted
51 following each store in order to return a hard register to its
54 The extra xxswapdi instructions reduce performance. This can be
55 particularly bad for vectorized code. The purpose of this pass
56 is to reduce the number of xxswapdi instructions required for
59 The primary insight is that much code that operates on vectors
60 does not care about the relative order of elements in a register,
61 so long as the correct memory order is preserved. If we have
62 a computation where all input values are provided by lvxd2x/xxswapdi
63 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
64 and all intermediate computations are pure SIMD (independent of
65 element order), then all the xxswapdi's associated with the loads
66 and stores may be removed.
68 This pass uses some of the infrastructure and logical ideas from
69 the "web" pass in web.c. We create maximal webs of computations
70 fitting the description above using union-find. Each such web is
71 then optimized by removing its unnecessary xxswapdi instructions.
73 The pass is placed prior to global optimization so that we can
74 perform the optimization in the safest and simplest way possible;
75 that is, by replacing each xxswapdi insn with a register copy insn.
76 Subsequent forward propagation will remove copies where possible.
78 There are some operations sensitive to element order for which we
79 can still allow the operation, provided we modify those operations.
80 These include CONST_VECTORs, for which we must swap the first and
81 second halves of the constant vector; and SUBREGs, for which we
82 must adjust the byte offset to account for the swapped doublewords.
83 A remaining opportunity would be non-immediate-form splats, for
84 which we should adjust the selected lane of the input. We should
85 also make code generation adjustments for sum-across operations,
86 since this is a common vectorizer reduction.
88 Because we run prior to the first split, we can see loads and stores
89 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
90 vector loads and stores that have not yet been split into a permuting
91 load/store and a swap. (One way this can happen is with a builtin
92 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
93 than deleting a swap, we convert the load/store into a permuting
94 load/store (which effectively removes the swap). */
98 We do not currently handle computations that contain permutes. There
99 is a general transformation that can be performed correctly, but it
100 may introduce more expensive code than it replaces. To handle these
101 would require a cost model to determine when to perform the optimization.
102 This commentary records how this could be done if desired.
104 The most general permute is something like this (example for V16QI):
106 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
107 (parallel [(const_int a0) (const_int a1)
109 (const_int a14) (const_int a15)]))
111 where a0,...,a15 are in [0,31] and select elements from op1 and op2
112 to produce in the result.
114 Regardless of mode, we can convert the PARALLEL to a mask of 16
115 byte-element selectors. Let's call this M, with M[i] representing
116 the ith byte-element selector value. Then if we swap doublewords
117 throughout the computation, we can get correct behavior by replacing
118 M with M' as follows:
120 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
121 { ((M[i]+8)%16)+16 : M[i] in [16,31]
123 This seems promising at first, since we are just replacing one mask
124 with another. But certain masks are preferable to others. If M
125 is a mask that matches a vmrghh pattern, for example, M' certainly
126 will not. Instead of a single vmrghh, we would generate a load of
127 M' and a vperm. So we would need to know how many xxswapd's we can
128 remove as a result of this transformation to determine if it's
129 profitable; and preferably the logic would need to be aware of all
130 the special preferable masks.
132 Another form of permute is an UNSPEC_VPERM, in which the mask is
133 already in a register. In some cases, this mask may be a constant
134 that we can discover with ud-chains, in which case the above
135 transformation is ok. However, the common usage here is for the
136 mask to be produced by an UNSPEC_LVSL, in which case the mask
137 cannot be known at compile time. In such a case we would have to
138 generate several instructions to compute M' as above at run time,
139 and a cost model is needed again.
141 However, when the mask M for an UNSPEC_VPERM is loaded from the
142 constant pool, we can replace M with M' as above at no cost
143 beyond adding a constant pool entry. */
145 /* This is based on the union-find logic in web.c. web_entry_base is
147 class swap_web_entry
: public web_entry_base
150 /* Pointer to the insn. */
152 /* Set if insn contains a mention of a vector register. All other
153 fields are undefined if this field is unset. */
154 unsigned int is_relevant
: 1;
155 /* Set if insn is a load. */
156 unsigned int is_load
: 1;
157 /* Set if insn is a store. */
158 unsigned int is_store
: 1;
159 /* Set if insn is a doubleword swap. This can either be a register swap
160 or a permuting load or store (test is_load and is_store for this). */
161 unsigned int is_swap
: 1;
162 /* Set if the insn has a live-in use of a parameter register. */
163 unsigned int is_live_in
: 1;
164 /* Set if the insn has a live-out def of a return register. */
165 unsigned int is_live_out
: 1;
166 /* Set if the insn contains a subreg reference of a vector register. */
167 unsigned int contains_subreg
: 1;
168 /* Set if the insn contains a 128-bit integer operand. */
169 unsigned int is_128_int
: 1;
170 /* Set if this is a call-insn. */
171 unsigned int is_call
: 1;
172 /* Set if this insn does not perform a vector operation for which
173 element order matters, or if we know how to fix it up if it does.
174 Undefined if is_swap is set. */
175 unsigned int is_swappable
: 1;
176 /* A nonzero value indicates what kind of special handling for this
177 insn is required if doublewords are swapped. Undefined if
178 is_swappable is not set. */
179 unsigned int special_handling
: 4;
180 /* Set if the web represented by this entry cannot be optimized. */
181 unsigned int web_not_optimizable
: 1;
182 /* Set if this insn should be deleted. */
183 unsigned int will_delete
: 1;
186 enum special_handling_values
{
199 /* Union INSN with all insns containing definitions that reach USE.
200 Detect whether USE is live-in to the current function. */
202 union_defs (swap_web_entry
*insn_entry
, rtx insn
, df_ref use
)
204 struct df_link
*link
= DF_REF_CHAIN (use
);
207 insn_entry
[INSN_UID (insn
)].is_live_in
= 1;
211 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
212 insn_entry
[INSN_UID (insn
)].is_live_in
= 1;
214 if (DF_REF_INSN_INFO (link
->ref
))
216 rtx def_insn
= DF_REF_INSN (link
->ref
);
217 (void)unionfind_union (insn_entry
+ INSN_UID (insn
),
218 insn_entry
+ INSN_UID (def_insn
));
225 /* Union INSN with all insns containing uses reached from DEF.
226 Detect whether DEF is live-out from the current function. */
228 union_uses (swap_web_entry
*insn_entry
, rtx insn
, df_ref def
)
230 struct df_link
*link
= DF_REF_CHAIN (def
);
233 insn_entry
[INSN_UID (insn
)].is_live_out
= 1;
237 /* This could be an eh use or some other artificial use;
238 we treat these all the same (killing the optimization). */
239 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
240 insn_entry
[INSN_UID (insn
)].is_live_out
= 1;
242 if (DF_REF_INSN_INFO (link
->ref
))
244 rtx use_insn
= DF_REF_INSN (link
->ref
);
245 (void)unionfind_union (insn_entry
+ INSN_UID (insn
),
246 insn_entry
+ INSN_UID (use_insn
));
253 /* Return 1 iff PAT (a SINGLE_SET) is a rotate 64 bit expression; else return
257 pattern_is_rotate64 (rtx pat
)
259 rtx rot
= SET_SRC (pat
);
261 if (GET_CODE (rot
) == ROTATE
&& CONST_INT_P (XEXP (rot
, 1))
262 && INTVAL (XEXP (rot
, 1)) == 64)
268 /* Return 1 iff INSN is a load insn, including permuting loads that
269 represent an lvxd2x instruction; else return 0. */
271 insn_is_load_p (rtx insn
)
273 rtx body
= PATTERN (insn
);
275 if (GET_CODE (body
) == SET
)
277 if (MEM_P (SET_SRC (body
)))
280 if (GET_CODE (SET_SRC (body
)) == VEC_SELECT
281 && MEM_P (XEXP (SET_SRC (body
), 0)))
284 if (pattern_is_rotate64 (body
) && MEM_P (XEXP (SET_SRC (body
), 0)))
290 if (GET_CODE (body
) != PARALLEL
)
293 rtx set
= XVECEXP (body
, 0, 0);
295 if (GET_CODE (set
) == SET
&& MEM_P (SET_SRC (set
)))
301 /* Return 1 iff INSN is a store insn, including permuting stores that
302 represent an stvxd2x instruction; else return 0. */
304 insn_is_store_p (rtx insn
)
306 rtx body
= PATTERN (insn
);
307 if (GET_CODE (body
) == SET
&& MEM_P (SET_DEST (body
)))
309 if (GET_CODE (body
) != PARALLEL
)
311 rtx set
= XVECEXP (body
, 0, 0);
312 if (GET_CODE (set
) == SET
&& MEM_P (SET_DEST (set
)))
317 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
318 a permuting load, or a permuting store. */
320 insn_is_swap_p (rtx insn
)
322 rtx body
= PATTERN (insn
);
323 if (GET_CODE (body
) != SET
)
325 rtx rhs
= SET_SRC (body
);
326 if (pattern_is_rotate64 (body
))
328 if (GET_CODE (rhs
) != VEC_SELECT
)
330 rtx parallel
= XEXP (rhs
, 1);
331 if (GET_CODE (parallel
) != PARALLEL
)
333 unsigned int len
= XVECLEN (parallel
, 0);
334 if (len
!= 2 && len
!= 4 && len
!= 8 && len
!= 16)
336 for (unsigned int i
= 0; i
< len
/ 2; ++i
)
338 rtx op
= XVECEXP (parallel
, 0, i
);
339 if (!CONST_INT_P (op
) || INTVAL (op
) != len
/ 2 + i
)
342 for (unsigned int i
= len
/ 2; i
< len
; ++i
)
344 rtx op
= XVECEXP (parallel
, 0, i
);
345 if (!CONST_INT_P (op
) || INTVAL (op
) != i
- len
/ 2)
351 /* Return true iff EXPR represents the sum of two registers. */
353 rs6000_sum_of_two_registers_p (const_rtx expr
)
355 if (GET_CODE (expr
) == PLUS
)
357 const_rtx operand1
= XEXP (expr
, 0);
358 const_rtx operand2
= XEXP (expr
, 1);
359 return (REG_P (operand1
) && REG_P (operand2
));
364 /* Return true iff EXPR represents an address expression that masks off
365 the low-order 4 bits in the style of an lvx or stvx rtl pattern. */
367 rs6000_quadword_masked_address_p (const_rtx expr
)
369 if (GET_CODE (expr
) == AND
)
371 const_rtx operand1
= XEXP (expr
, 0);
372 const_rtx operand2
= XEXP (expr
, 1);
373 if ((REG_P (operand1
) || rs6000_sum_of_two_registers_p (operand1
))
374 && CONST_SCALAR_INT_P (operand2
) && INTVAL (operand2
) == -16)
380 /* Return TRUE if INSN represents a swap of a swapped load from memory
381 and the memory address is quad-word aligned. */
383 quad_aligned_load_p (swap_web_entry
*insn_entry
, rtx_insn
*insn
)
385 unsigned uid
= INSN_UID (insn
);
386 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
)
389 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
391 /* Since insn is known to represent a swap instruction, we know it
392 "uses" only one input variable. */
393 df_ref use
= DF_INSN_INFO_USES (insn_info
);
395 /* Figure out where this input variable is defined. */
396 struct df_link
*def_link
= DF_REF_CHAIN (use
);
398 /* If there is no definition or the definition is artificial or there are
399 multiple definitions, punt. */
400 if (!def_link
|| !def_link
->ref
|| DF_REF_IS_ARTIFICIAL (def_link
->ref
)
404 rtx def_insn
= DF_REF_INSN (def_link
->ref
);
405 unsigned uid2
= INSN_UID (def_insn
);
406 /* We're looking for a load-with-swap insn. If this is not that,
408 if (!insn_entry
[uid2
].is_load
|| !insn_entry
[uid2
].is_swap
)
411 /* If the source of the rtl def is not a set from memory, return
413 rtx body
= PATTERN (def_insn
);
414 if (GET_CODE (body
) != SET
415 || !(GET_CODE (SET_SRC (body
)) == VEC_SELECT
416 || pattern_is_rotate64 (body
))
417 || !MEM_P (XEXP (SET_SRC (body
), 0)))
420 rtx mem
= XEXP (SET_SRC (body
), 0);
421 rtx base_reg
= XEXP (mem
, 0);
422 return ((REG_P (base_reg
) || rs6000_sum_of_two_registers_p (base_reg
))
423 && MEM_ALIGN (mem
) >= 128) ? true : false;
426 /* Return TRUE if INSN represents a store-with-swap of a swapped value
427 and the memory address is quad-word aligned. */
429 quad_aligned_store_p (swap_web_entry
*insn_entry
, rtx_insn
*insn
)
431 unsigned uid
= INSN_UID (insn
);
432 if (!insn_entry
[uid
].is_swap
|| !insn_entry
[uid
].is_store
)
435 rtx body
= PATTERN (insn
);
436 rtx dest_address
= XEXP (SET_DEST (body
), 0);
437 rtx swap_reg
= XEXP (SET_SRC (body
), 0);
439 /* If the base address for the memory expression is not represented
440 by a single register and is not the sum of two registers, punt. */
441 if (!REG_P (dest_address
) && !rs6000_sum_of_two_registers_p (dest_address
))
444 /* Confirm that the value to be stored is produced by a swap
446 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
448 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
450 struct df_link
*def_link
= DF_REF_CHAIN (use
);
452 /* If this is not the definition of the candidate swap register,
453 then skip it. I am interested in a different definition. */
454 if (!rtx_equal_p (DF_REF_REG (use
), swap_reg
))
457 /* If there is no def or the def is artifical or there are
458 multiple defs, punt. */
459 if (!def_link
|| !def_link
->ref
|| DF_REF_IS_ARTIFICIAL (def_link
->ref
)
463 rtx def_insn
= DF_REF_INSN (def_link
->ref
);
464 unsigned uid2
= INSN_UID (def_insn
);
466 /* If this source value is not a simple swap, return false */
467 if (!insn_entry
[uid2
].is_swap
|| insn_entry
[uid2
].is_load
468 || insn_entry
[uid2
].is_store
)
471 /* I've processed the use that I care about, so break out of
476 /* At this point, we know the source data comes from a swap. The
477 remaining question is whether the memory address is aligned. */
478 rtx set
= single_set (insn
);
481 rtx dest
= SET_DEST (set
);
483 return (MEM_ALIGN (dest
) >= 128);
488 /* Return 1 iff UID, known to reference a swap, is both fed by a load
489 and a feeder of a store. */
491 swap_feeds_both_load_and_store (swap_web_entry
*insn_entry
)
493 rtx insn
= insn_entry
->insn
;
494 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
496 struct df_link
*link
= 0;
497 rtx_insn
*load
= 0, *store
= 0;
498 bool fed_by_load
= 0;
499 bool feeds_store
= 0;
501 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
503 link
= DF_REF_CHAIN (use
);
504 load
= DF_REF_INSN (link
->ref
);
505 if (insn_is_load_p (load
) && insn_is_swap_p (load
))
509 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
511 link
= DF_REF_CHAIN (def
);
512 store
= DF_REF_INSN (link
->ref
);
513 if (insn_is_store_p (store
) && insn_is_swap_p (store
))
517 return fed_by_load
&& feeds_store
;
520 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
522 const_load_sequence_p (swap_web_entry
*insn_entry
, rtx insn
)
524 unsigned uid
= INSN_UID (insn
);
525 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
)
528 const_rtx tocrel_base
;
530 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
533 /* Iterate over the definitions that are used by this insn. Since
534 this is known to be a swap insn, expect only one used definnition. */
535 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
537 struct df_link
*def_link
= DF_REF_CHAIN (use
);
539 /* If there is no def or the def is artificial or there are
540 multiple defs, punt. */
541 if (!def_link
|| !def_link
->ref
|| DF_REF_IS_ARTIFICIAL (def_link
->ref
)
545 rtx def_insn
= DF_REF_INSN (def_link
->ref
);
546 unsigned uid2
= INSN_UID (def_insn
);
547 /* If this is not a load or is not a swap, return false. */
548 if (!insn_entry
[uid2
].is_load
|| !insn_entry
[uid2
].is_swap
)
551 /* If the source of the rtl def is not a set from memory, return
553 rtx body
= PATTERN (def_insn
);
554 if (GET_CODE (body
) != SET
555 || !(GET_CODE (SET_SRC (body
)) == VEC_SELECT
556 || pattern_is_rotate64 (body
))
557 || !MEM_P (XEXP (SET_SRC (body
), 0)))
560 rtx mem
= XEXP (SET_SRC (body
), 0);
561 rtx base_reg
= XEXP (mem
, 0);
562 /* If the base address for the memory expression is not
563 represented by a register, punt. */
564 if (!REG_P (base_reg
))
568 insn_info
= DF_INSN_INFO_GET (def_insn
);
569 FOR_EACH_INSN_INFO_USE (base_use
, insn_info
)
571 /* If base_use does not represent base_reg, look for another
573 if (!rtx_equal_p (DF_REF_REG (base_use
), base_reg
))
576 struct df_link
*base_def_link
= DF_REF_CHAIN (base_use
);
577 if (!base_def_link
|| base_def_link
->next
)
580 /* Constants held on the stack are not "true" constants
581 because their values are not part of the static load
582 image. If this constant's base reference is a stack
583 or frame pointer, it is seen as an artificial
585 if (DF_REF_IS_ARTIFICIAL (base_def_link
->ref
))
588 rtx tocrel_insn
= DF_REF_INSN (base_def_link
->ref
);
589 rtx tocrel_body
= PATTERN (tocrel_insn
);
591 if (GET_CODE (tocrel_body
) != SET
)
593 /* There is an extra level of indirection for small/large
595 rtx tocrel_expr
= SET_SRC (tocrel_body
);
596 if (MEM_P (tocrel_expr
))
597 tocrel_expr
= XEXP (tocrel_expr
, 0);
598 if (!toc_relative_expr_p (tocrel_expr
, false, &tocrel_base
, NULL
))
600 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
602 if (!SYMBOL_REF_P (base
) || !CONSTANT_POOL_ADDRESS_P (base
))
606 /* FIXME: The conditions under which
607 (SYMBOL_REF_P (const_vector)
608 && !CONSTANT_POOL_ADDRESS_P (const_vector))
609 are not well understood. This code prevents
610 an internal compiler error which will occur in
611 replace_swapped_load_constant () if we were to return
612 true. Some day, we should figure out how to properly
613 handle this condition in
614 replace_swapped_load_constant () and then we can
615 remove this special test. */
616 rtx const_vector
= get_pool_constant (base
);
617 if (SYMBOL_REF_P (const_vector
)
618 && CONSTANT_POOL_ADDRESS_P (const_vector
))
619 const_vector
= get_pool_constant (const_vector
);
620 if (GET_CODE (const_vector
) != CONST_VECTOR
)
628 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
629 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
631 v2df_reduction_p (rtx op
)
633 if (GET_MODE (op
) != V2DFmode
)
636 enum rtx_code code
= GET_CODE (op
);
637 if (code
!= PLUS
&& code
!= SMIN
&& code
!= SMAX
)
640 rtx concat
= XEXP (op
, 0);
641 if (GET_CODE (concat
) != VEC_CONCAT
)
644 rtx select0
= XEXP (concat
, 0);
645 rtx select1
= XEXP (concat
, 1);
646 if (GET_CODE (select0
) != VEC_SELECT
|| GET_CODE (select1
) != VEC_SELECT
)
649 rtx reg0
= XEXP (select0
, 0);
650 rtx reg1
= XEXP (select1
, 0);
651 if (!rtx_equal_p (reg0
, reg1
) || !REG_P (reg0
))
654 rtx parallel0
= XEXP (select0
, 1);
655 rtx parallel1
= XEXP (select1
, 1);
656 if (GET_CODE (parallel0
) != PARALLEL
|| GET_CODE (parallel1
) != PARALLEL
)
659 if (!rtx_equal_p (XVECEXP (parallel0
, 0, 0), const1_rtx
)
660 || !rtx_equal_p (XVECEXP (parallel1
, 0, 0), const0_rtx
))
666 /* Return 1 iff OP is an operand that will not be affected by having
667 vector doublewords swapped in memory. */
669 rtx_is_swappable_p (rtx op
, unsigned int *special
)
671 enum rtx_code code
= GET_CODE (op
);
690 *special
= SH_CONST_VECTOR
;
695 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
696 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
697 it represents a vector splat for which we can do special
699 if (CONST_INT_P (XEXP (op
, 0)))
701 else if (REG_P (XEXP (op
, 0))
702 && GET_MODE_INNER (GET_MODE (op
)) == GET_MODE (XEXP (op
, 0)))
703 /* This catches V2DF and V2DI splat, at a minimum. */
705 else if (GET_CODE (XEXP (op
, 0)) == TRUNCATE
706 && REG_P (XEXP (XEXP (op
, 0), 0))
707 && GET_MODE_INNER (GET_MODE (op
)) == GET_MODE (XEXP (op
, 0)))
708 /* This catches splat of a truncated value. */
710 else if (GET_CODE (XEXP (op
, 0)) == VEC_SELECT
)
711 /* If the duplicated item is from a select, defer to the select
712 processing to see if we can change the lane for the splat. */
713 return rtx_is_swappable_p (XEXP (op
, 0), special
);
718 /* A vec_extract operation is ok if we change the lane. */
719 if (REG_P (XEXP (op
, 0))
720 && GET_MODE_INNER (GET_MODE (XEXP (op
, 0))) == GET_MODE (op
)
721 && GET_CODE ((parallel
= XEXP (op
, 1))) == PARALLEL
722 && XVECLEN (parallel
, 0) == 1
723 && CONST_INT_P (XVECEXP (parallel
, 0, 0)))
725 *special
= SH_EXTRACT
;
728 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
729 XXPERMDI is a swap operation, it will be identified by
730 insn_is_swap_p and therefore we won't get here. */
731 else if (GET_CODE (XEXP (op
, 0)) == VEC_CONCAT
732 && (GET_MODE (XEXP (op
, 0)) == V4DFmode
733 || GET_MODE (XEXP (op
, 0)) == V4DImode
)
734 && GET_CODE ((parallel
= XEXP (op
, 1))) == PARALLEL
735 && XVECLEN (parallel
, 0) == 2
736 && CONST_INT_P (XVECEXP (parallel
, 0, 0))
737 && CONST_INT_P (XVECEXP (parallel
, 0, 1)))
739 *special
= SH_XXPERMDI
;
742 else if (v2df_reduction_p (op
))
749 /* Various operations are unsafe for this optimization, at least
750 without significant additional work. Permutes are obviously
751 problematic, as both the permute control vector and the ordering
752 of the target values are invalidated by doubleword swapping.
753 Vector pack and unpack modify the number of vector lanes.
754 Merge-high/low will not operate correctly on swapped operands.
755 Vector shifts across element boundaries are clearly uncool,
756 as are vector select and concatenate operations. Vector
757 sum-across instructions define one operand with a specific
758 order-dependent element, so additional fixup code would be
759 needed to make those work. Vector set and non-immediate-form
760 vector splat are element-order sensitive. A few of these
761 cases might be workable with special handling if required.
762 Adding cost modeling would be appropriate in some cases. */
763 int val
= XINT (op
, 1);
769 case UNSPEC_VPACK_SIGN_SIGN_SAT
:
770 case UNSPEC_VPACK_SIGN_UNS_SAT
:
771 case UNSPEC_VPACK_UNS_UNS_MOD
:
772 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
:
773 case UNSPEC_VPACK_UNS_UNS_SAT
:
775 case UNSPEC_VPERM_UNS
:
778 case UNSPEC_VPERMXOR
:
783 case UNSPEC_VSUM2SWS
:
785 case UNSPEC_VSUM4UBS
:
787 case UNSPEC_VSUMSWS_DIRECT
:
788 case UNSPEC_VSX_CONCAT
:
789 case UNSPEC_VSX_CVDPSPN
:
790 case UNSPEC_VSX_CVSPDP
:
791 case UNSPEC_VSX_CVSPDPN
:
792 case UNSPEC_VSX_EXTRACT
:
794 case UNSPEC_VSX_SLDWI
:
795 case UNSPEC_VSX_VSLO
:
796 case UNSPEC_VUNPACK_HI_SIGN
:
797 case UNSPEC_VUNPACK_HI_SIGN_DIRECT
:
798 case UNSPEC_VUNPACK_LO_SIGN
:
799 case UNSPEC_VUNPACK_LO_SIGN_DIRECT
:
801 case UNSPEC_VUPKHS_V4SF
:
802 case UNSPEC_VUPKHU_V4SF
:
804 case UNSPEC_VUPKLS_V4SF
:
805 case UNSPEC_VUPKLU_V4SF
:
807 case UNSPEC_VSPLT_DIRECT
:
808 case UNSPEC_VSX_XXSPLTD
:
811 case UNSPEC_REDUC_PLUS
:
815 /* vpmsumd is not swappable, but vpmsum[bhw] are. */
816 if (GET_MODE (op
) == V2DImode
)
826 const char *fmt
= GET_RTX_FORMAT (code
);
829 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
830 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
832 unsigned int special_op
= SH_NONE
;
833 ok
&= rtx_is_swappable_p (XEXP (op
, i
), &special_op
);
834 if (special_op
== SH_NONE
)
836 /* Ensure we never have two kinds of special handling
837 for the same insn. */
838 if (*special
!= SH_NONE
&& *special
!= special_op
)
840 *special
= special_op
;
842 else if (fmt
[i
] == 'E')
843 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
845 unsigned int special_op
= SH_NONE
;
846 ok
&= rtx_is_swappable_p (XVECEXP (op
, i
, j
), &special_op
);
847 if (special_op
== SH_NONE
)
849 /* Ensure we never have two kinds of special handling
850 for the same insn. */
851 if (*special
!= SH_NONE
&& *special
!= special_op
)
853 *special
= special_op
;
859 /* Return 1 iff INSN is an operand that will not be affected by
860 having vector doublewords swapped in memory (in which case
861 *SPECIAL is unchanged), or that can be modified to be correct
862 if vector doublewords are swapped in memory (in which case
863 *SPECIAL is changed to a value indicating how). */
865 insn_is_swappable_p (swap_web_entry
*insn_entry
, rtx insn
,
866 unsigned int *special
)
868 /* Calls are always bad. */
869 if (GET_CODE (insn
) == CALL_INSN
)
872 /* Loads and stores seen here are not permuting, but we can still
873 fix them up by converting them to permuting ones. Exceptions:
874 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
875 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
876 for the SET source. Also we must now make an exception for lvx
877 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
878 explicit "& -16") since this leads to unrecognizable insns. */
879 rtx body
= PATTERN (insn
);
880 int i
= INSN_UID (insn
);
882 if (insn_entry
[i
].is_load
)
884 if (GET_CODE (body
) == SET
)
886 rtx rhs
= SET_SRC (body
);
887 /* Even without a swap, the RHS might be a vec_select for, say,
888 a byte-reversing load. */
891 if (GET_CODE (XEXP (rhs
, 0)) == AND
)
894 *special
= SH_NOSWAP_LD
;
901 if (insn_entry
[i
].is_store
)
903 if (GET_CODE (body
) == SET
904 && GET_CODE (SET_SRC (body
)) != UNSPEC
905 && GET_CODE (SET_SRC (body
)) != VEC_SELECT
)
907 rtx lhs
= SET_DEST (body
);
908 /* Even without a swap, the RHS might be a vec_select for, say,
909 a byte-reversing store. */
912 if (GET_CODE (XEXP (lhs
, 0)) == AND
)
915 *special
= SH_NOSWAP_ST
;
922 /* A convert to single precision can be left as is provided that
923 all of its uses are in xxspltw instructions that splat BE element
925 if (GET_CODE (body
) == SET
926 && GET_CODE (SET_SRC (body
)) == UNSPEC
927 && XINT (SET_SRC (body
), 1) == UNSPEC_VSX_CVDPSPN
)
930 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
932 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
934 struct df_link
*link
= DF_REF_CHAIN (def
);
938 for (; link
; link
= link
->next
) {
939 rtx use_insn
= DF_REF_INSN (link
->ref
);
940 rtx use_body
= PATTERN (use_insn
);
941 if (GET_CODE (use_body
) != SET
942 || GET_CODE (SET_SRC (use_body
)) != UNSPEC
943 || XINT (SET_SRC (use_body
), 1) != UNSPEC_VSX_XXSPLTW
944 || XVECEXP (SET_SRC (use_body
), 0, 1) != const0_rtx
)
952 /* A concatenation of two doublewords is ok if we reverse the
953 order of the inputs. */
954 if (GET_CODE (body
) == SET
955 && GET_CODE (SET_SRC (body
)) == VEC_CONCAT
956 && (GET_MODE (SET_SRC (body
)) == V2DFmode
957 || GET_MODE (SET_SRC (body
)) == V2DImode
))
959 *special
= SH_CONCAT
;
963 /* V2DF reductions are always swappable. */
964 if (GET_CODE (body
) == PARALLEL
)
966 rtx expr
= XVECEXP (body
, 0, 0);
967 if (GET_CODE (expr
) == SET
968 && v2df_reduction_p (SET_SRC (expr
)))
972 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
974 if (GET_CODE (body
) == SET
975 && GET_CODE (SET_SRC (body
)) == UNSPEC
976 && XINT (SET_SRC (body
), 1) == UNSPEC_VPERM
977 && XVECLEN (SET_SRC (body
), 0) == 3
978 && REG_P (XVECEXP (SET_SRC (body
), 0, 2)))
980 rtx mask_reg
= XVECEXP (SET_SRC (body
), 0, 2);
981 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
983 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
984 if (rtx_equal_p (DF_REF_REG (use
), mask_reg
))
986 struct df_link
*def_link
= DF_REF_CHAIN (use
);
987 /* Punt if multiple definitions for this reg. */
988 if (def_link
&& !def_link
->next
&&
989 const_load_sequence_p (insn_entry
,
990 DF_REF_INSN (def_link
->ref
)))
998 /* Otherwise check the operands for vector lane violations. */
999 return rtx_is_swappable_p (body
, special
);
1002 enum chain_purpose
{ FOR_LOADS
, FOR_STORES
};
1004 /* Return true if the UD or DU chain headed by LINK is non-empty,
1005 and every entry on the chain references an insn that is a
1006 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
1007 register swap must have only permuting loads as reaching defs.
1008 If PURPOSE is FOR_STORES, each such register swap must have only
1009 register swaps or permuting stores as reached uses. */
1011 chain_contains_only_swaps (swap_web_entry
*insn_entry
, struct df_link
*link
,
1012 enum chain_purpose purpose
)
1017 for (; link
; link
= link
->next
)
1019 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link
->ref
))))
1022 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
1025 rtx reached_insn
= DF_REF_INSN (link
->ref
);
1026 unsigned uid
= INSN_UID (reached_insn
);
1027 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (reached_insn
);
1029 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
1030 || insn_entry
[uid
].is_store
)
1033 if (purpose
== FOR_LOADS
)
1036 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
1038 struct df_link
*swap_link
= DF_REF_CHAIN (use
);
1042 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
1045 rtx swap_def_insn
= DF_REF_INSN (swap_link
->ref
);
1046 unsigned uid2
= INSN_UID (swap_def_insn
);
1048 /* Only permuting loads are allowed. */
1049 if (!insn_entry
[uid2
].is_swap
|| !insn_entry
[uid2
].is_load
)
1052 swap_link
= swap_link
->next
;
1056 else if (purpose
== FOR_STORES
)
1059 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
1061 struct df_link
*swap_link
= DF_REF_CHAIN (def
);
1065 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
1068 rtx swap_use_insn
= DF_REF_INSN (swap_link
->ref
);
1069 unsigned uid2
= INSN_UID (swap_use_insn
);
1071 /* Permuting stores or register swaps are allowed. */
1072 if (!insn_entry
[uid2
].is_swap
|| insn_entry
[uid2
].is_load
)
1075 swap_link
= swap_link
->next
;
1084 /* Mark the xxswapdi instructions associated with permuting loads and
1085 stores for removal. Note that we only flag them for deletion here,
1086 as there is a possibility of a swap being reached from multiple
1089 mark_swaps_for_removal (swap_web_entry
*insn_entry
, unsigned int i
)
1091 rtx insn
= insn_entry
[i
].insn
;
1092 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
1094 if (insn_entry
[i
].is_load
)
1097 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
1099 struct df_link
*link
= DF_REF_CHAIN (def
);
1101 /* We know by now that these are swaps, so we can delete
1102 them confidently. */
1105 rtx use_insn
= DF_REF_INSN (link
->ref
);
1106 insn_entry
[INSN_UID (use_insn
)].will_delete
= 1;
1111 else if (insn_entry
[i
].is_store
)
1114 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
1116 /* Ignore uses for addressability. */
1117 machine_mode mode
= GET_MODE (DF_REF_REG (use
));
1118 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
1121 struct df_link
*link
= DF_REF_CHAIN (use
);
1123 /* We know by now that these are swaps, so we can delete
1124 them confidently. */
1127 rtx def_insn
= DF_REF_INSN (link
->ref
);
1128 insn_entry
[INSN_UID (def_insn
)].will_delete
= 1;
1135 /* *OP_PTR is either a CONST_VECTOR or an expression containing one.
1136 Swap the first half of the vector with the second in the first
1137 case. Recurse to find it in the second. */
1139 swap_const_vector_halves (rtx
*op_ptr
)
1143 enum rtx_code code
= GET_CODE (op
);
1144 if (GET_CODE (op
) == CONST_VECTOR
)
1146 int units
= GET_MODE_NUNITS (GET_MODE (op
));
1147 rtx_vector_builder
builder (GET_MODE (op
), units
, 1);
1148 for (i
= 0; i
< units
/ 2; ++i
)
1149 builder
.quick_push (CONST_VECTOR_ELT (op
, i
+ units
/ 2));
1150 for (i
= 0; i
< units
/ 2; ++i
)
1151 builder
.quick_push (CONST_VECTOR_ELT (op
, i
));
1152 *op_ptr
= builder
.build ();
1157 const char *fmt
= GET_RTX_FORMAT (code
);
1158 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
1159 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
1160 swap_const_vector_halves (&XEXP (op
, i
));
1161 else if (fmt
[i
] == 'E')
1162 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
1163 swap_const_vector_halves (&XVECEXP (op
, i
, j
));
1167 /* Find all subregs of a vector expression that perform a narrowing,
1168 and adjust the subreg index to account for doubleword swapping. */
1170 adjust_subreg_index (rtx op
)
1172 enum rtx_code code
= GET_CODE (op
);
1174 && (GET_MODE_SIZE (GET_MODE (op
))
1175 < GET_MODE_SIZE (GET_MODE (XEXP (op
, 0)))))
1177 unsigned int index
= SUBREG_BYTE (op
);
1182 SUBREG_BYTE (op
) = index
;
1185 const char *fmt
= GET_RTX_FORMAT (code
);
1187 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
1188 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
1189 adjust_subreg_index (XEXP (op
, i
));
1190 else if (fmt
[i
] == 'E')
1191 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
1192 adjust_subreg_index (XVECEXP (op
, i
, j
));
1195 /* Convert the non-permuting load INSN to a permuting one. */
1197 permute_load (rtx_insn
*insn
)
1199 rtx body
= PATTERN (insn
);
1200 rtx mem_op
= SET_SRC (body
);
1201 rtx tgt_reg
= SET_DEST (body
);
1202 machine_mode mode
= GET_MODE (tgt_reg
);
1203 int n_elts
= GET_MODE_NUNITS (mode
);
1204 int half_elts
= n_elts
/ 2;
1205 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
1207 for (i
= 0, j
= half_elts
; i
< half_elts
; ++i
, ++j
)
1208 XVECEXP (par
, 0, i
) = GEN_INT (j
);
1209 for (i
= half_elts
, j
= 0; j
< half_elts
; ++i
, ++j
)
1210 XVECEXP (par
, 0, i
) = GEN_INT (j
);
1211 rtx sel
= gen_rtx_VEC_SELECT (mode
, mem_op
, par
);
1212 SET_SRC (body
) = sel
;
1213 INSN_CODE (insn
) = -1; /* Force re-recognition. */
1214 df_insn_rescan (insn
);
1217 fprintf (dump_file
, "Replacing load %d with permuted load\n",
1221 /* Convert the non-permuting store INSN to a permuting one. */
1223 permute_store (rtx_insn
*insn
)
1225 rtx body
= PATTERN (insn
);
1226 rtx src_reg
= SET_SRC (body
);
1227 machine_mode mode
= GET_MODE (src_reg
);
1228 int n_elts
= GET_MODE_NUNITS (mode
);
1229 int half_elts
= n_elts
/ 2;
1230 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
1232 for (i
= 0, j
= half_elts
; i
< half_elts
; ++i
, ++j
)
1233 XVECEXP (par
, 0, i
) = GEN_INT (j
);
1234 for (i
= half_elts
, j
= 0; j
< half_elts
; ++i
, ++j
)
1235 XVECEXP (par
, 0, i
) = GEN_INT (j
);
1236 rtx sel
= gen_rtx_VEC_SELECT (mode
, src_reg
, par
);
1237 SET_SRC (body
) = sel
;
1238 INSN_CODE (insn
) = -1; /* Force re-recognition. */
1239 df_insn_rescan (insn
);
1242 fprintf (dump_file
, "Replacing store %d with permuted store\n",
1246 /* Given OP that contains a vector extract operation, adjust the index
1247 of the extracted lane to account for the doubleword swap. */
1249 adjust_extract (rtx_insn
*insn
)
1251 rtx pattern
= PATTERN (insn
);
1252 if (GET_CODE (pattern
) == PARALLEL
)
1253 pattern
= XVECEXP (pattern
, 0, 0);
1254 rtx src
= SET_SRC (pattern
);
1255 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
1256 account for that. */
1257 rtx sel
= GET_CODE (src
) == VEC_DUPLICATE
? XEXP (src
, 0) : src
;
1258 rtx par
= XEXP (sel
, 1);
1259 int half_elts
= GET_MODE_NUNITS (GET_MODE (XEXP (sel
, 0))) >> 1;
1260 int lane
= INTVAL (XVECEXP (par
, 0, 0));
1261 lane
= lane
>= half_elts
? lane
- half_elts
: lane
+ half_elts
;
1262 XVECEXP (par
, 0, 0) = GEN_INT (lane
);
1263 INSN_CODE (insn
) = -1; /* Force re-recognition. */
1264 df_insn_rescan (insn
);
1267 fprintf (dump_file
, "Changing lane for extract %d\n", INSN_UID (insn
));
1270 /* Given OP that contains a vector direct-splat operation, adjust the index
1271 of the source lane to account for the doubleword swap. */
1273 adjust_splat (rtx_insn
*insn
)
1275 rtx body
= PATTERN (insn
);
1276 rtx unspec
= XEXP (body
, 1);
1277 int half_elts
= GET_MODE_NUNITS (GET_MODE (unspec
)) >> 1;
1278 int lane
= INTVAL (XVECEXP (unspec
, 0, 1));
1279 lane
= lane
>= half_elts
? lane
- half_elts
: lane
+ half_elts
;
1280 XVECEXP (unspec
, 0, 1) = GEN_INT (lane
);
1281 INSN_CODE (insn
) = -1; /* Force re-recognition. */
1282 df_insn_rescan (insn
);
1285 fprintf (dump_file
, "Changing lane for splat %d\n", INSN_UID (insn
));
1288 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
1289 swap), reverse the order of the source operands and adjust the indices
1290 of the source lanes to account for doubleword reversal. */
1292 adjust_xxpermdi (rtx_insn
*insn
)
1294 rtx set
= PATTERN (insn
);
1295 rtx select
= XEXP (set
, 1);
1296 rtx concat
= XEXP (select
, 0);
1297 rtx src0
= XEXP (concat
, 0);
1298 XEXP (concat
, 0) = XEXP (concat
, 1);
1299 XEXP (concat
, 1) = src0
;
1300 rtx parallel
= XEXP (select
, 1);
1301 int lane0
= INTVAL (XVECEXP (parallel
, 0, 0));
1302 int lane1
= INTVAL (XVECEXP (parallel
, 0, 1));
1303 int new_lane0
= 3 - lane1
;
1304 int new_lane1
= 3 - lane0
;
1305 XVECEXP (parallel
, 0, 0) = GEN_INT (new_lane0
);
1306 XVECEXP (parallel
, 0, 1) = GEN_INT (new_lane1
);
1307 INSN_CODE (insn
) = -1; /* Force re-recognition. */
1308 df_insn_rescan (insn
);
1311 fprintf (dump_file
, "Changing lanes for xxpermdi %d\n", INSN_UID (insn
));
1314 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
1315 reverse the order of those inputs. */
1317 adjust_concat (rtx_insn
*insn
)
1319 rtx set
= PATTERN (insn
);
1320 rtx concat
= XEXP (set
, 1);
1321 rtx src0
= XEXP (concat
, 0);
1322 XEXP (concat
, 0) = XEXP (concat
, 1);
1323 XEXP (concat
, 1) = src0
;
1324 INSN_CODE (insn
) = -1; /* Force re-recognition. */
1325 df_insn_rescan (insn
);
1328 fprintf (dump_file
, "Reversing inputs for concat %d\n", INSN_UID (insn
));
1331 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
1332 constant pool to reflect swapped doublewords. */
1334 adjust_vperm (rtx_insn
*insn
)
1336 /* We previously determined that the UNSPEC_VPERM was fed by a
1337 swap of a swapping load of a TOC-relative constant pool symbol.
1338 Find the MEM in the swapping load and replace it with a MEM for
1339 the adjusted mask constant. */
1340 rtx set
= PATTERN (insn
);
1341 rtx mask_reg
= XVECEXP (SET_SRC (set
), 0, 2);
1343 /* Find the swap. */
1344 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
1346 rtx_insn
*swap_insn
= 0;
1347 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
1348 if (rtx_equal_p (DF_REF_REG (use
), mask_reg
))
1350 struct df_link
*def_link
= DF_REF_CHAIN (use
);
1351 gcc_assert (def_link
&& !def_link
->next
);
1352 swap_insn
= DF_REF_INSN (def_link
->ref
);
1355 gcc_assert (swap_insn
);
1357 /* Find the load. */
1358 insn_info
= DF_INSN_INFO_GET (swap_insn
);
1359 rtx_insn
*load_insn
= 0;
1360 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
1362 struct df_link
*def_link
= DF_REF_CHAIN (use
);
1363 gcc_assert (def_link
&& !def_link
->next
);
1364 load_insn
= DF_REF_INSN (def_link
->ref
);
1367 gcc_assert (load_insn
);
1369 /* Find the TOC-relative symbol access. */
1370 insn_info
= DF_INSN_INFO_GET (load_insn
);
1371 rtx_insn
*tocrel_insn
= 0;
1372 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
1374 struct df_link
*def_link
= DF_REF_CHAIN (use
);
1375 gcc_assert (def_link
&& !def_link
->next
);
1376 tocrel_insn
= DF_REF_INSN (def_link
->ref
);
1379 gcc_assert (tocrel_insn
);
1381 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1382 to set tocrel_base; otherwise it would be unnecessary as we've
1383 already established it will return true. */
1385 const_rtx tocrel_base
;
1386 rtx tocrel_expr
= SET_SRC (PATTERN (tocrel_insn
));
1387 /* There is an extra level of indirection for small/large code models. */
1388 if (MEM_P (tocrel_expr
))
1389 tocrel_expr
= XEXP (tocrel_expr
, 0);
1390 if (!toc_relative_expr_p (tocrel_expr
, false, &tocrel_base
, NULL
))
1392 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
1393 rtx const_vector
= get_pool_constant (base
);
1394 /* With the extra indirection, get_pool_constant will produce the
1395 real constant from the reg_equal expression, so get the real
1397 if (SYMBOL_REF_P (const_vector
))
1398 const_vector
= get_pool_constant (const_vector
);
1399 gcc_assert (GET_CODE (const_vector
) == CONST_VECTOR
);
1401 /* Create an adjusted mask from the initial mask. */
1402 unsigned int new_mask
[16], i
, val
;
1403 for (i
= 0; i
< 16; ++i
) {
1404 val
= INTVAL (XVECEXP (const_vector
, 0, i
));
1406 new_mask
[i
] = (val
+ 8) % 16;
1408 new_mask
[i
] = ((val
+ 8) % 16) + 16;
1411 /* Create a new CONST_VECTOR and a MEM that references it. */
1412 rtx vals
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
1413 for (i
= 0; i
< 16; ++i
)
1414 XVECEXP (vals
, 0, i
) = GEN_INT (new_mask
[i
]);
1415 rtx new_const_vector
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (vals
, 0));
1416 rtx new_mem
= force_const_mem (V16QImode
, new_const_vector
);
1417 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1418 can't recognize. Force the SYMBOL_REF into a register. */
1419 if (!REG_P (XEXP (new_mem
, 0))) {
1420 rtx base_reg
= force_reg (Pmode
, XEXP (new_mem
, 0));
1421 XEXP (new_mem
, 0) = base_reg
;
1422 /* Move the newly created insn ahead of the load insn. */
1423 rtx_insn
*force_insn
= get_last_insn ();
1424 remove_insn (force_insn
);
1425 rtx_insn
*before_load_insn
= PREV_INSN (load_insn
);
1426 add_insn_after (force_insn
, before_load_insn
, BLOCK_FOR_INSN (load_insn
));
1427 df_insn_rescan (before_load_insn
);
1428 df_insn_rescan (force_insn
);
1431 /* Replace the MEM in the load instruction and rescan it. */
1432 XEXP (SET_SRC (PATTERN (load_insn
)), 0) = new_mem
;
1433 INSN_CODE (load_insn
) = -1; /* Force re-recognition. */
1434 df_insn_rescan (load_insn
);
1437 fprintf (dump_file
, "Adjusting mask for vperm %d\n", INSN_UID (insn
));
1440 /* The insn described by INSN_ENTRY[I] can be swapped, but only
1441 with special handling. Take care of that here. */
1443 handle_special_swappables (swap_web_entry
*insn_entry
, unsigned i
)
1445 rtx_insn
*insn
= insn_entry
[i
].insn
;
1446 rtx body
= PATTERN (insn
);
1448 switch (insn_entry
[i
].special_handling
)
1452 case SH_CONST_VECTOR
:
1454 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
1455 gcc_assert (GET_CODE (body
) == SET
);
1456 swap_const_vector_halves (&SET_SRC (body
));
1458 fprintf (dump_file
, "Swapping constant halves in insn %d\n", i
);
1462 /* A subreg of the same size is already safe. For subregs that
1463 select a smaller portion of a reg, adjust the index for
1464 swapped doublewords. */
1465 adjust_subreg_index (body
);
1467 fprintf (dump_file
, "Adjusting subreg in insn %d\n", i
);
1470 /* Convert a non-permuting load to a permuting one. */
1471 permute_load (insn
);
1474 /* Convert a non-permuting store to a permuting one. */
1475 permute_store (insn
);
1478 /* Change the lane on an extract operation. */
1479 adjust_extract (insn
);
1482 /* Change the lane on a direct-splat operation. */
1483 adjust_splat (insn
);
1486 /* Change the lanes on an XXPERMDI operation. */
1487 adjust_xxpermdi (insn
);
1490 /* Reverse the order of a concatenation operation. */
1491 adjust_concat (insn
);
1494 /* Change the mask loaded from the constant pool for a VPERM. */
1495 adjust_vperm (insn
);
1500 /* Find the insn from the Ith table entry, which is known to be a
1501 register swap Y = SWAP(X). Replace it with a copy Y = X. */
1503 replace_swap_with_copy (swap_web_entry
*insn_entry
, unsigned i
)
1505 rtx_insn
*insn
= insn_entry
[i
].insn
;
1506 rtx body
= PATTERN (insn
);
1507 rtx src_reg
= XEXP (SET_SRC (body
), 0);
1508 rtx copy
= gen_rtx_SET (SET_DEST (body
), src_reg
);
1509 rtx_insn
*new_insn
= emit_insn_before (copy
, insn
);
1510 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (insn
));
1511 df_insn_rescan (new_insn
);
1515 unsigned int new_uid
= INSN_UID (new_insn
);
1516 fprintf (dump_file
, "Replacing swap %d with copy %d\n", i
, new_uid
);
1519 df_insn_delete (insn
);
1521 insn
->set_deleted ();
1524 /* INSN is known to contain a SUBREG, which we can normally handle,
1525 but if the SUBREG itself contains a MULT then we need to leave it alone
1526 to avoid turning a mult_hipart into a mult_lopart, for example. */
1528 has_part_mult (rtx_insn
*insn
)
1530 rtx body
= PATTERN (insn
);
1531 if (GET_CODE (body
) != SET
)
1533 rtx src
= SET_SRC (body
);
1534 if (GET_CODE (src
) != SUBREG
)
1536 rtx inner
= XEXP (src
, 0);
1537 return (GET_CODE (inner
) == MULT
);
1540 /* Make NEW_MEM_EXP's attributes and flags resemble those of
1541 ORIGINAL_MEM_EXP. */
1543 mimic_memory_attributes_and_flags (rtx new_mem_exp
, const_rtx original_mem_exp
)
1545 RTX_FLAG (new_mem_exp
, jump
) = RTX_FLAG (original_mem_exp
, jump
);
1546 RTX_FLAG (new_mem_exp
, call
) = RTX_FLAG (original_mem_exp
, call
);
1547 RTX_FLAG (new_mem_exp
, unchanging
) = RTX_FLAG (original_mem_exp
, unchanging
);
1548 RTX_FLAG (new_mem_exp
, volatil
) = RTX_FLAG (original_mem_exp
, volatil
);
1549 RTX_FLAG (new_mem_exp
, frame_related
) =
1550 RTX_FLAG (original_mem_exp
, frame_related
);
1552 /* The following fields may not be used with MEM subexpressions */
1553 RTX_FLAG (new_mem_exp
, in_struct
) = RTX_FLAG (original_mem_exp
, in_struct
);
1554 RTX_FLAG (new_mem_exp
, return_val
) = RTX_FLAG (original_mem_exp
, return_val
);
1556 struct mem_attrs original_attrs
= *get_mem_attrs(original_mem_exp
);
1558 alias_set_type set
= original_attrs
.alias
;
1559 set_mem_alias_set (new_mem_exp
, set
);
1561 addr_space_t addrspace
= original_attrs
.addrspace
;
1562 set_mem_addr_space (new_mem_exp
, addrspace
);
1564 unsigned int align
= original_attrs
.align
;
1565 set_mem_align (new_mem_exp
, align
);
1567 tree expr
= original_attrs
.expr
;
1568 set_mem_expr (new_mem_exp
, expr
);
1570 if (original_attrs
.offset_known_p
)
1572 HOST_WIDE_INT offset
= original_attrs
.offset
;
1573 set_mem_offset (new_mem_exp
, offset
);
1576 clear_mem_offset (new_mem_exp
);
1578 if (original_attrs
.size_known_p
)
1580 HOST_WIDE_INT size
= original_attrs
.size
;
1581 set_mem_size (new_mem_exp
, size
);
1584 clear_mem_size (new_mem_exp
);
1587 /* Generate an rtx expression to represent use of the stvx insn to store
1588 the value represented by register SRC_EXP into the memory at address
1589 DEST_EXP, with vector mode MODE. */
1591 rs6000_gen_stvx (enum machine_mode mode
, rtx dest_exp
, rtx src_exp
)
1595 if (mode
== V16QImode
)
1596 stvx
= gen_altivec_stvx_v16qi (src_exp
, dest_exp
);
1597 else if (mode
== V8HImode
)
1598 stvx
= gen_altivec_stvx_v8hi (src_exp
, dest_exp
);
1599 #ifdef HAVE_V8HFmode
1600 else if (mode
== V8HFmode
)
1601 stvx
= gen_altivec_stvx_v8hf (src_exp
, dest_exp
);
1603 else if (mode
== V4SImode
)
1604 stvx
= gen_altivec_stvx_v4si (src_exp
, dest_exp
);
1605 else if (mode
== V4SFmode
)
1606 stvx
= gen_altivec_stvx_v4sf (src_exp
, dest_exp
);
1607 else if (mode
== V2DImode
)
1608 stvx
= gen_altivec_stvx_v2di (src_exp
, dest_exp
);
1609 else if (mode
== V2DFmode
)
1610 stvx
= gen_altivec_stvx_v2df (src_exp
, dest_exp
);
1611 else if (mode
== V1TImode
)
1612 stvx
= gen_altivec_stvx_v1ti (src_exp
, dest_exp
);
1614 /* KFmode, TFmode, other modes not expected in this context. */
1617 rtx new_mem_exp
= SET_DEST (PATTERN (stvx
));
1618 mimic_memory_attributes_and_flags (new_mem_exp
, dest_exp
);
1622 /* Given that STORE_INSN represents an aligned store-with-swap of a
1623 swapped value, replace the store with an aligned store (without
1624 swap) and replace the swap with a copy insn. */
1626 replace_swapped_aligned_store (swap_web_entry
*insn_entry
,
1627 rtx_insn
*store_insn
)
1629 unsigned uid
= INSN_UID (store_insn
);
1630 gcc_assert (insn_entry
[uid
].is_swap
&& insn_entry
[uid
].is_store
);
1632 rtx body
= PATTERN (store_insn
);
1633 rtx dest_address
= XEXP (SET_DEST (body
), 0);
1634 rtx swap_reg
= XEXP (SET_SRC (body
), 0);
1635 gcc_assert (REG_P (dest_address
)
1636 || rs6000_sum_of_two_registers_p (dest_address
));
1638 /* Find the swap instruction that provides the value to be stored by
1639 * this store-with-swap instruction. */
1640 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (store_insn
);
1642 rtx_insn
*swap_insn
= NULL
;
1644 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
1646 struct df_link
*def_link
= DF_REF_CHAIN (use
);
1648 /* if this is not the definition of the candidate swap register,
1649 then skip it. I am only interested in the swap insnd. */
1650 if (!rtx_equal_p (DF_REF_REG (use
), swap_reg
))
1653 /* If there is no def or the def is artifical or there are
1654 multiple defs, we should not be here. */
1655 gcc_assert (def_link
&& def_link
->ref
&& !def_link
->next
1656 && !DF_REF_IS_ARTIFICIAL (def_link
->ref
));
1658 swap_insn
= DF_REF_INSN (def_link
->ref
);
1659 uid2
= INSN_UID (swap_insn
);
1661 /* If this source value is not a simple swap, we should not be here. */
1662 gcc_assert (insn_entry
[uid2
].is_swap
&& !insn_entry
[uid2
].is_load
1663 && !insn_entry
[uid2
].is_store
);
1665 /* We've processed the use we care about, so break out of
1670 /* At this point, swap_insn and uid2 represent the swap instruction
1671 that feeds the store. */
1672 gcc_assert (swap_insn
);
1673 rtx set
= single_set (store_insn
);
1675 rtx dest_exp
= SET_DEST (set
);
1676 rtx src_exp
= XEXP (SET_SRC (body
), 0);
1677 enum machine_mode mode
= GET_MODE (dest_exp
);
1678 gcc_assert (MEM_P (dest_exp
));
1679 gcc_assert (MEM_ALIGN (dest_exp
) >= 128);
1681 /* Replace the copy with a new insn. */
1683 stvx
= rs6000_gen_stvx (mode
, dest_exp
, src_exp
);
1685 rtx_insn
*new_insn
= emit_insn_before (stvx
, store_insn
);
1686 rtx new_body
= PATTERN (new_insn
);
1688 gcc_assert ((GET_CODE (new_body
) == SET
)
1689 && MEM_P (SET_DEST (new_body
)));
1691 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (store_insn
));
1692 df_insn_rescan (new_insn
);
1694 df_insn_delete (store_insn
);
1695 remove_insn (store_insn
);
1696 store_insn
->set_deleted ();
1698 /* Replace the swap with a copy. */
1699 uid2
= INSN_UID (swap_insn
);
1700 mark_swaps_for_removal (insn_entry
, uid2
);
1701 replace_swap_with_copy (insn_entry
, uid2
);
1704 /* Generate an rtx expression to represent use of the lvx insn to load
1705 from memory SRC_EXP into register DEST_EXP with vector mode MODE. */
1707 rs6000_gen_lvx (enum machine_mode mode
, rtx dest_exp
, rtx src_exp
)
1711 if (mode
== V16QImode
)
1712 lvx
= gen_altivec_lvx_v16qi (dest_exp
, src_exp
);
1713 else if (mode
== V8HImode
)
1714 lvx
= gen_altivec_lvx_v8hi (dest_exp
, src_exp
);
1715 #ifdef HAVE_V8HFmode
1716 else if (mode
== V8HFmode
)
1717 lvx
= gen_altivec_lvx_v8hf (dest_exp
, src_exp
);
1719 else if (mode
== V4SImode
)
1720 lvx
= gen_altivec_lvx_v4si (dest_exp
, src_exp
);
1721 else if (mode
== V4SFmode
)
1722 lvx
= gen_altivec_lvx_v4sf (dest_exp
, src_exp
);
1723 else if (mode
== V2DImode
)
1724 lvx
= gen_altivec_lvx_v2di (dest_exp
, src_exp
);
1725 else if (mode
== V2DFmode
)
1726 lvx
= gen_altivec_lvx_v2df (dest_exp
, src_exp
);
1727 else if (mode
== V1TImode
)
1728 lvx
= gen_altivec_lvx_v1ti (dest_exp
, src_exp
);
1730 /* KFmode, TFmode, other modes not expected in this context. */
1733 rtx new_mem_exp
= SET_SRC (PATTERN (lvx
));
1734 mimic_memory_attributes_and_flags (new_mem_exp
, src_exp
);
1739 /* Given that SWAP_INSN represents a swap of an aligned
1740 load-with-swap, replace the load with an aligned load (without
1741 swap) and replace the swap with a copy insn. */
1743 replace_swapped_aligned_load (swap_web_entry
*insn_entry
, rtx swap_insn
)
1745 /* Find the load. */
1746 unsigned uid
= INSN_UID (swap_insn
);
1747 /* Only call this if quad_aligned_load_p (swap_insn). */
1748 gcc_assert (insn_entry
[uid
].is_swap
&& !insn_entry
[uid
].is_load
);
1749 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (swap_insn
);
1751 /* Since insn is known to represent a swap instruction, we know it
1752 "uses" only one input variable. */
1753 df_ref use
= DF_INSN_INFO_USES (insn_info
);
1755 /* Figure out where this input variable is defined. */
1756 struct df_link
*def_link
= DF_REF_CHAIN (use
);
1757 gcc_assert (def_link
&& !def_link
->next
);
1758 gcc_assert (def_link
&& def_link
->ref
&&
1759 !DF_REF_IS_ARTIFICIAL (def_link
->ref
) && !def_link
->next
);
1761 rtx_insn
*def_insn
= DF_REF_INSN (def_link
->ref
);
1762 unsigned uid2
= INSN_UID (def_insn
);
1764 /* We're expecting a load-with-swap insn. */
1765 gcc_assert (insn_entry
[uid2
].is_load
&& insn_entry
[uid2
].is_swap
);
1767 /* We expect this to be a set to memory, with source representing a
1768 swap (indicated by code VEC_SELECT). */
1769 rtx body
= PATTERN (def_insn
);
1770 gcc_assert ((GET_CODE (body
) == SET
)
1771 && (GET_CODE (SET_SRC (body
)) == VEC_SELECT
1772 || pattern_is_rotate64 (body
))
1773 && MEM_P (XEXP (SET_SRC (body
), 0)));
1775 rtx src_exp
= XEXP (SET_SRC (body
), 0);
1776 enum machine_mode mode
= GET_MODE (src_exp
);
1777 rtx lvx
= rs6000_gen_lvx (mode
, SET_DEST (body
), src_exp
);
1779 rtx_insn
*new_insn
= emit_insn_before (lvx
, def_insn
);
1780 rtx new_body
= PATTERN (new_insn
);
1782 gcc_assert ((GET_CODE (new_body
) == SET
)
1783 && MEM_P (SET_SRC (new_body
)));
1785 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (def_insn
));
1786 df_insn_rescan (new_insn
);
1788 df_insn_delete (def_insn
);
1789 remove_insn (def_insn
);
1790 def_insn
->set_deleted ();
1792 /* Replace the swap with a copy. */
1793 mark_swaps_for_removal (insn_entry
, uid
);
1794 replace_swap_with_copy (insn_entry
, uid
);
1797 /* Given that SWAP_INSN represents a swap of a load of a constant
1798 vector value, replace with a single instruction that loads a
1799 swapped variant of the original constant.
1801 The "natural" representation of a byte array in memory is the same
1802 for big endian and little endian.
1804 unsigned char byte_array[] =
1805 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f };
1807 However, when loaded into a vector register, the representation
1808 depends on endian conventions.
1810 In big-endian mode, the register holds:
1813 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1815 In little-endian mode, the register holds:
1818 [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]
1820 Word arrays require different handling. Consider the word array:
1822 unsigned int word_array[] =
1823 { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f };
1825 The in-memory representation depends on endian configuration. The
1826 equivalent array, declared as a byte array, in memory would be:
1828 unsigned char big_endian_word_array_data[] =
1829 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }
1831 unsigned char little_endian_word_array_data[] =
1832 { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c }
1834 In big-endian mode, the register holds:
1837 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1839 In little-endian mode, the register holds:
1842 [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ]
1845 Similar transformations apply to the vector of half-word and vector
1846 of double-word representations.
1848 For now, don't handle vectors of quad-precision values. Just return.
1849 A better solution is to fix the code generator to emit lvx/stvx for
1852 replace_swapped_load_constant (swap_web_entry
*insn_entry
, rtx swap_insn
)
1854 /* Find the load. */
1855 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (swap_insn
);
1856 rtx_insn
*load_insn
;
1857 df_ref use
= DF_INSN_INFO_USES (insn_info
);
1858 struct df_link
*def_link
= DF_REF_CHAIN (use
);
1859 gcc_assert (def_link
&& !def_link
->next
);
1861 load_insn
= DF_REF_INSN (def_link
->ref
);
1862 gcc_assert (load_insn
);
1864 /* Find the TOC-relative symbol access. */
1865 insn_info
= DF_INSN_INFO_GET (load_insn
);
1866 use
= DF_INSN_INFO_USES (insn_info
);
1868 def_link
= DF_REF_CHAIN (use
);
1869 gcc_assert (def_link
&& !def_link
->next
);
1871 rtx_insn
*tocrel_insn
= DF_REF_INSN (def_link
->ref
);
1872 gcc_assert (tocrel_insn
);
1874 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1875 to set tocrel_base; otherwise it would be unnecessary as we've
1876 already established it will return true. */
1878 rtx tocrel_expr
= SET_SRC (PATTERN (tocrel_insn
));
1879 const_rtx tocrel_base
;
1881 /* There is an extra level of indirection for small/large code models. */
1882 if (MEM_P (tocrel_expr
))
1883 tocrel_expr
= XEXP (tocrel_expr
, 0);
1885 if (!toc_relative_expr_p (tocrel_expr
, false, &tocrel_base
, NULL
))
1888 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
1889 rtx const_vector
= get_pool_constant (base
);
1891 /* With the extra indirection, get_pool_constant will produce the
1892 real constant from the reg_equal expression, so get the real
1894 if (SYMBOL_REF_P (const_vector
))
1895 const_vector
= get_pool_constant (const_vector
);
1896 gcc_assert (GET_CODE (const_vector
) == CONST_VECTOR
);
1899 enum machine_mode mode
= GET_MODE (const_vector
);
1901 /* Create an adjusted constant from the original constant. */
1902 if (mode
== V1TImode
)
1903 /* Leave this code as is. */
1905 else if (mode
== V16QImode
)
1907 rtx vals
= gen_rtx_PARALLEL (mode
, rtvec_alloc (16));
1910 for (i
= 0; i
< 16; i
++)
1911 XVECEXP (vals
, 0, ((i
+8) % 16)) = XVECEXP (const_vector
, 0, i
);
1912 rtx new_const_vector
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
1913 new_mem
= force_const_mem (mode
, new_const_vector
);
1915 else if ((mode
== V8HImode
)
1916 #ifdef HAVE_V8HFmode
1917 || (mode
== V8HFmode
)
1921 rtx vals
= gen_rtx_PARALLEL (mode
, rtvec_alloc (8));
1924 for (i
= 0; i
< 8; i
++)
1925 XVECEXP (vals
, 0, ((i
+4) % 8)) = XVECEXP (const_vector
, 0, i
);
1926 rtx new_const_vector
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
1927 new_mem
= force_const_mem (mode
, new_const_vector
);
1929 else if ((mode
== V4SImode
) || (mode
== V4SFmode
))
1931 rtx vals
= gen_rtx_PARALLEL (mode
, rtvec_alloc (4));
1934 for (i
= 0; i
< 4; i
++)
1935 XVECEXP (vals
, 0, ((i
+2) % 4)) = XVECEXP (const_vector
, 0, i
);
1936 rtx new_const_vector
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
1937 new_mem
= force_const_mem (mode
, new_const_vector
);
1939 else if ((mode
== V2DImode
) || (mode
== V2DFmode
))
1941 rtx vals
= gen_rtx_PARALLEL (mode
, rtvec_alloc (2));
1944 for (i
= 0; i
< 2; i
++)
1945 XVECEXP (vals
, 0, ((i
+1) % 2)) = XVECEXP (const_vector
, 0, i
);
1946 rtx new_const_vector
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
1947 new_mem
= force_const_mem (mode
, new_const_vector
);
1951 /* We do not expect other modes to be constant-load-swapped. */
1955 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1956 can't recognize. Force the SYMBOL_REF into a register. */
1957 if (!REG_P (XEXP (new_mem
, 0))) {
1958 rtx base_reg
= force_reg (Pmode
, XEXP (new_mem
, 0));
1959 XEXP (new_mem
, 0) = base_reg
;
1961 /* Move the newly created insn ahead of the load insn. */
1962 /* The last insn is the insn that forced new_mem into a register. */
1963 rtx_insn
*force_insn
= get_last_insn ();
1964 /* Remove this insn from the end of the instruction sequence. */
1965 remove_insn (force_insn
);
1966 rtx_insn
*before_load_insn
= PREV_INSN (load_insn
);
1968 /* And insert this insn back into the sequence before the previous
1969 load insn so this new expression will be available when the
1970 existing load is modified to load the swapped constant. */
1971 add_insn_after (force_insn
, before_load_insn
, BLOCK_FOR_INSN (load_insn
));
1972 df_insn_rescan (before_load_insn
);
1973 df_insn_rescan (force_insn
);
1976 /* Replace the MEM in the load instruction and rescan it. */
1977 XEXP (SET_SRC (PATTERN (load_insn
)), 0) = new_mem
;
1978 INSN_CODE (load_insn
) = -1; /* Force re-recognition. */
1979 df_insn_rescan (load_insn
);
1981 unsigned int uid
= INSN_UID (swap_insn
);
1982 mark_swaps_for_removal (insn_entry
, uid
);
1983 replace_swap_with_copy (insn_entry
, uid
);
1986 /* Dump the swap table to DUMP_FILE. */
1988 dump_swap_insn_table (swap_web_entry
*insn_entry
)
1990 int e
= get_max_uid ();
1991 fprintf (dump_file
, "\nRelevant insns with their flag settings\n\n");
1993 for (int i
= 0; i
< e
; ++i
)
1994 if (insn_entry
[i
].is_relevant
)
1996 swap_web_entry
*pred_entry
= (swap_web_entry
*)insn_entry
[i
].pred ();
1997 fprintf (dump_file
, "%6d %6d ", i
,
1998 pred_entry
&& pred_entry
->insn
1999 ? INSN_UID (pred_entry
->insn
) : 0);
2000 if (insn_entry
[i
].is_load
)
2001 fputs ("load ", dump_file
);
2002 if (insn_entry
[i
].is_store
)
2003 fputs ("store ", dump_file
);
2004 if (insn_entry
[i
].is_swap
)
2005 fputs ("swap ", dump_file
);
2006 if (insn_entry
[i
].is_live_in
)
2007 fputs ("live-in ", dump_file
);
2008 if (insn_entry
[i
].is_live_out
)
2009 fputs ("live-out ", dump_file
);
2010 if (insn_entry
[i
].contains_subreg
)
2011 fputs ("subreg ", dump_file
);
2012 if (insn_entry
[i
].is_128_int
)
2013 fputs ("int128 ", dump_file
);
2014 if (insn_entry
[i
].is_call
)
2015 fputs ("call ", dump_file
);
2016 if (insn_entry
[i
].is_swappable
)
2018 fputs ("swappable ", dump_file
);
2019 if (insn_entry
[i
].special_handling
== SH_CONST_VECTOR
)
2020 fputs ("special:constvec ", dump_file
);
2021 else if (insn_entry
[i
].special_handling
== SH_SUBREG
)
2022 fputs ("special:subreg ", dump_file
);
2023 else if (insn_entry
[i
].special_handling
== SH_NOSWAP_LD
)
2024 fputs ("special:load ", dump_file
);
2025 else if (insn_entry
[i
].special_handling
== SH_NOSWAP_ST
)
2026 fputs ("special:store ", dump_file
);
2027 else if (insn_entry
[i
].special_handling
== SH_EXTRACT
)
2028 fputs ("special:extract ", dump_file
);
2029 else if (insn_entry
[i
].special_handling
== SH_SPLAT
)
2030 fputs ("special:splat ", dump_file
);
2031 else if (insn_entry
[i
].special_handling
== SH_XXPERMDI
)
2032 fputs ("special:xxpermdi ", dump_file
);
2033 else if (insn_entry
[i
].special_handling
== SH_CONCAT
)
2034 fputs ("special:concat ", dump_file
);
2035 else if (insn_entry
[i
].special_handling
== SH_VPERM
)
2036 fputs ("special:vperm ", dump_file
);
2038 if (insn_entry
[i
].web_not_optimizable
)
2039 fputs ("unoptimizable ", dump_file
);
2040 if (insn_entry
[i
].will_delete
)
2041 fputs ("delete ", dump_file
);
2042 fputs ("\n", dump_file
);
2044 fputs ("\n", dump_file
);
2047 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
2048 Here RTX is an (& addr (const_int -16)). Always return a new copy
2049 to avoid problems with combine. */
2051 alignment_with_canonical_addr (rtx align
)
2054 rtx addr
= XEXP (align
, 0);
2059 else if (GET_CODE (addr
) == PLUS
)
2061 rtx addrop0
= XEXP (addr
, 0);
2062 rtx addrop1
= XEXP (addr
, 1);
2064 if (!REG_P (addrop0
))
2065 addrop0
= force_reg (GET_MODE (addrop0
), addrop0
);
2067 if (!REG_P (addrop1
))
2068 addrop1
= force_reg (GET_MODE (addrop1
), addrop1
);
2070 canon
= gen_rtx_PLUS (GET_MODE (addr
), addrop0
, addrop1
);
2074 canon
= force_reg (GET_MODE (addr
), addr
);
2076 return gen_rtx_AND (GET_MODE (align
), canon
, GEN_INT (-16));
2079 /* Check whether an rtx is an alignment mask, and if so, return
2080 a fully-expanded rtx for the masking operation. */
2082 alignment_mask (rtx_insn
*insn
)
2084 rtx body
= PATTERN (insn
);
2086 if (GET_CODE (body
) != SET
2087 || GET_CODE (SET_SRC (body
)) != AND
2088 || !REG_P (XEXP (SET_SRC (body
), 0)))
2091 rtx mask
= XEXP (SET_SRC (body
), 1);
2093 if (CONST_INT_P (mask
))
2095 if (INTVAL (mask
) == -16)
2096 return alignment_with_canonical_addr (SET_SRC (body
));
2104 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2108 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
2110 if (!rtx_equal_p (DF_REF_REG (use
), mask
))
2113 struct df_link
*def_link
= DF_REF_CHAIN (use
);
2114 if (!def_link
|| def_link
->next
)
2117 rtx_insn
*const_insn
= DF_REF_INSN (def_link
->ref
);
2118 rtx const_body
= PATTERN (const_insn
);
2119 if (GET_CODE (const_body
) != SET
)
2122 real_mask
= SET_SRC (const_body
);
2124 if (!CONST_INT_P (real_mask
)
2125 || INTVAL (real_mask
) != -16)
2132 return alignment_with_canonical_addr (SET_SRC (body
));
2135 /* Given INSN that's a load or store based at BASE_REG, check if
2136 all of its feeding computations align its address on a 16-byte
2137 boundary. If so, return true and add all definition insns into
2138 AND_INSNS and their corresponding fully-expanded rtxes for the
2139 masking operations into AND_OPS. */
2142 find_alignment_op (rtx_insn
*insn
, rtx base_reg
, vec
<rtx_insn
*> *and_insns
,
2146 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2147 rtx and_operation
= 0;
2149 FOR_EACH_INSN_INFO_USE (base_use
, insn_info
)
2151 if (!rtx_equal_p (DF_REF_REG (base_use
), base_reg
))
2154 struct df_link
*base_def_link
= DF_REF_CHAIN (base_use
);
2158 while (base_def_link
)
2160 /* With stack-protector code enabled, and possibly in other
2161 circumstances, there may not be an associated insn for
2163 if (DF_REF_IS_ARTIFICIAL (base_def_link
->ref
))
2166 rtx_insn
*and_insn
= DF_REF_INSN (base_def_link
->ref
);
2167 and_operation
= alignment_mask (and_insn
);
2169 /* Stop if we find any one which doesn't align. */
2173 and_insns
->safe_push (and_insn
);
2174 and_ops
->safe_push (and_operation
);
2175 base_def_link
= base_def_link
->next
;
2179 return and_operation
;
2182 struct del_info
{ bool replace
; rtx_insn
*replace_insn
; };
2184 /* If INSN is the load for an lvx pattern, put it in canonical form. */
2186 recombine_lvx_pattern (rtx_insn
*insn
, del_info
*to_delete
)
2188 rtx body
= PATTERN (insn
);
2189 gcc_assert (GET_CODE (body
) == SET
2190 && (GET_CODE (SET_SRC (body
)) == VEC_SELECT
2191 || pattern_is_rotate64 (body
))
2192 && MEM_P (XEXP (SET_SRC (body
), 0)));
2194 rtx mem
= XEXP (SET_SRC (body
), 0);
2195 rtx base_reg
= XEXP (mem
, 0);
2197 auto_vec
<rtx_insn
*> and_insns
;
2198 auto_vec
<rtx
> and_ops
;
2200 = find_alignment_op (insn
, base_reg
, &and_insns
, &and_ops
);
2204 gcc_assert (and_insns
.length () == and_ops
.length ());
2206 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2207 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
2209 struct df_link
*link
= DF_REF_CHAIN (def
);
2210 if (!link
|| link
->next
)
2213 rtx_insn
*swap_insn
= DF_REF_INSN (link
->ref
);
2214 if (!insn_is_swap_p (swap_insn
)
2215 || insn_is_load_p (swap_insn
)
2216 || insn_is_store_p (swap_insn
))
2219 /* Expected lvx pattern found. Change the swap to
2220 a copy, and propagate the AND operation into the
2222 to_delete
[INSN_UID (swap_insn
)].replace
= true;
2223 to_delete
[INSN_UID (swap_insn
)].replace_insn
= swap_insn
;
2227 for (unsigned i
= 0; i
< and_insns
.length (); i
++)
2229 /* However, first we must be sure that we make the
2230 base register from the AND operation available
2231 in case the register has been overwritten. Copy
2232 the base register to a new pseudo and use that
2233 as the base register of the AND operation in
2234 the new LVX instruction. */
2235 rtx_insn
*and_insn
= and_insns
[i
];
2236 rtx and_op
= and_ops
[i
];
2237 rtx and_base
= XEXP (and_op
, 0);
2240 new_reg
= gen_reg_rtx (GET_MODE (and_base
));
2241 and_mask
= XEXP (and_op
, 1);
2243 rtx copy
= gen_rtx_SET (new_reg
, and_base
);
2244 rtx_insn
*new_insn
= emit_insn_after (copy
, and_insn
);
2245 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (and_insn
));
2246 df_insn_rescan (new_insn
);
2249 XEXP (mem
, 0) = gen_rtx_AND (GET_MODE (new_reg
), new_reg
, and_mask
);
2250 SET_SRC (body
) = mem
;
2251 INSN_CODE (insn
) = -1; /* Force re-recognition. */
2252 df_insn_rescan (insn
);
2255 fprintf (dump_file
, "lvx opportunity found at %d\n",
2261 /* If INSN is the store for an stvx pattern, put it in canonical form. */
2263 recombine_stvx_pattern (rtx_insn
*insn
, del_info
*to_delete
)
2265 rtx body
= PATTERN (insn
);
2266 gcc_assert (GET_CODE (body
) == SET
2267 && MEM_P (SET_DEST (body
))
2268 && (GET_CODE (SET_SRC (body
)) == VEC_SELECT
2269 || pattern_is_rotate64 (body
)));
2270 rtx mem
= SET_DEST (body
);
2271 rtx base_reg
= XEXP (mem
, 0);
2273 auto_vec
<rtx_insn
*> and_insns
;
2274 auto_vec
<rtx
> and_ops
;
2276 = find_alignment_op (insn
, base_reg
, &and_insns
, &and_ops
);
2280 gcc_assert (and_insns
.length () == and_ops
.length ());
2281 rtx src_reg
= XEXP (SET_SRC (body
), 0);
2283 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2284 FOR_EACH_INSN_INFO_USE (src_use
, insn_info
)
2286 if (!rtx_equal_p (DF_REF_REG (src_use
), src_reg
))
2289 struct df_link
*link
= DF_REF_CHAIN (src_use
);
2290 if (!link
|| link
->next
)
2293 rtx_insn
*swap_insn
= DF_REF_INSN (link
->ref
);
2294 if (!insn_is_swap_p (swap_insn
)
2295 || insn_is_load_p (swap_insn
)
2296 || insn_is_store_p (swap_insn
))
2299 /* Expected stvx pattern found. Change the swap to
2300 a copy, and propagate the AND operation into the
2302 to_delete
[INSN_UID (swap_insn
)].replace
= true;
2303 to_delete
[INSN_UID (swap_insn
)].replace_insn
= swap_insn
;
2307 for (unsigned i
= 0; i
< and_insns
.length (); i
++)
2309 /* However, first we must be sure that we make the
2310 base register from the AND operation available
2311 in case the register has been overwritten. Copy
2312 the base register to a new pseudo and use that
2313 as the base register of the AND operation in
2314 the new STVX instruction. */
2315 rtx_insn
*and_insn
= and_insns
[i
];
2316 rtx and_op
= and_ops
[i
];
2317 rtx and_base
= XEXP (and_op
, 0);
2320 new_reg
= gen_reg_rtx (GET_MODE (and_base
));
2321 and_mask
= XEXP (and_op
, 1);
2323 rtx copy
= gen_rtx_SET (new_reg
, and_base
);
2324 rtx_insn
*new_insn
= emit_insn_after (copy
, and_insn
);
2325 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (and_insn
));
2326 df_insn_rescan (new_insn
);
2329 XEXP (mem
, 0) = gen_rtx_AND (GET_MODE (new_reg
), new_reg
, and_mask
);
2330 SET_SRC (body
) = src_reg
;
2331 INSN_CODE (insn
) = -1; /* Force re-recognition. */
2332 df_insn_rescan (insn
);
2335 fprintf (dump_file
, "stvx opportunity found at %d\n",
2341 /* Look for patterns created from builtin lvx and stvx calls, and
2342 canonicalize them to be properly recognized as such. */
2344 recombine_lvx_stvx_patterns (function
*fun
)
2350 int num_insns
= get_max_uid ();
2351 del_info
*to_delete
= XCNEWVEC (del_info
, num_insns
);
2353 FOR_ALL_BB_FN (bb
, fun
)
2354 FOR_BB_INSNS (bb
, insn
)
2356 if (!NONDEBUG_INSN_P (insn
))
2359 if (insn_is_load_p (insn
) && insn_is_swap_p (insn
))
2360 recombine_lvx_pattern (insn
, to_delete
);
2361 else if (insn_is_store_p (insn
) && insn_is_swap_p (insn
))
2362 recombine_stvx_pattern (insn
, to_delete
);
2365 /* Turning swaps into copies is delayed until now, to avoid problems
2366 with deleting instructions during the insn walk. */
2367 for (i
= 0; i
< num_insns
; i
++)
2368 if (to_delete
[i
].replace
)
2370 rtx swap_body
= PATTERN (to_delete
[i
].replace_insn
);
2371 rtx src_reg
= XEXP (SET_SRC (swap_body
), 0);
2372 rtx copy
= gen_rtx_SET (SET_DEST (swap_body
), src_reg
);
2373 rtx_insn
*new_insn
= emit_insn_before (copy
,
2374 to_delete
[i
].replace_insn
);
2375 set_block_for_insn (new_insn
,
2376 BLOCK_FOR_INSN (to_delete
[i
].replace_insn
));
2377 df_insn_rescan (new_insn
);
2378 df_insn_delete (to_delete
[i
].replace_insn
);
2379 remove_insn (to_delete
[i
].replace_insn
);
2380 to_delete
[i
].replace_insn
->set_deleted ();
2386 /* Main entry point for this pass. */
2388 rs6000_analyze_swaps (function
*fun
)
2390 swap_web_entry
*insn_entry
;
2392 rtx_insn
*insn
, *curr_insn
= 0;
2394 /* Dataflow analysis for use-def chains. */
2395 df_set_flags (DF_RD_PRUNE_DEAD_DEFS
);
2396 df_chain_add_problem (DF_DU_CHAIN
| DF_UD_CHAIN
);
2398 df_set_flags (DF_DEFER_INSN_RESCAN
);
2400 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
2401 recombine_lvx_stvx_patterns (fun
);
2403 /* Rebuild ud- and du-chains. */
2404 df_remove_problem (df_chain
);
2405 df_process_deferred_rescans ();
2406 df_set_flags (DF_RD_PRUNE_DEAD_DEFS
);
2407 df_chain_add_problem (DF_DU_CHAIN
| DF_UD_CHAIN
);
2409 df_set_flags (DF_DEFER_INSN_RESCAN
);
2411 /* Allocate structure to represent webs of insns. */
2412 insn_entry
= XCNEWVEC (swap_web_entry
, get_max_uid ());
2414 /* Walk the insns to gather basic data. */
2415 FOR_ALL_BB_FN (bb
, fun
)
2416 FOR_BB_INSNS_SAFE (bb
, insn
, curr_insn
)
2418 unsigned int uid
= INSN_UID (insn
);
2419 if (NONDEBUG_INSN_P (insn
))
2421 insn_entry
[uid
].insn
= insn
;
2423 if (GET_CODE (insn
) == CALL_INSN
)
2424 insn_entry
[uid
].is_call
= 1;
2426 /* Walk the uses and defs to see if we mention vector regs.
2427 Record any constraints on optimization of such mentions. */
2428 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2430 FOR_EACH_INSN_INFO_USE (mention
, insn_info
)
2432 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2433 machine_mode mode
= GET_MODE (DF_REF_REAL_REG (mention
));
2435 /* If a use gets its value from a call insn, it will be
2436 a hard register and will look like (reg:V4SI 3 3).
2437 The df analysis creates two mentions for GPR3 and GPR4,
2438 both DImode. We must recognize this and treat it as a
2439 vector mention to ensure the call is unioned with this
2441 if (mode
== DImode
&& DF_REF_INSN_INFO (mention
))
2443 rtx feeder
= DF_REF_INSN (mention
);
2444 /* FIXME: It is pretty hard to get from the df mention
2445 to the mode of the use in the insn. We arbitrarily
2446 pick a vector mode here, even though the use might
2447 be a real DImode. We can be too conservative
2448 (create a web larger than necessary) because of
2449 this, so consider eventually fixing this. */
2450 if (GET_CODE (feeder
) == CALL_INSN
)
2454 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
) || mode
== TImode
)
2456 insn_entry
[uid
].is_relevant
= 1;
2457 if (mode
== TImode
|| mode
== V1TImode
2458 || FLOAT128_VECTOR_P (mode
))
2459 insn_entry
[uid
].is_128_int
= 1;
2460 if (DF_REF_INSN_INFO (mention
))
2461 insn_entry
[uid
].contains_subreg
2462 = !rtx_equal_p (DF_REF_REG (mention
),
2463 DF_REF_REAL_REG (mention
));
2464 union_defs (insn_entry
, insn
, mention
);
2467 FOR_EACH_INSN_INFO_DEF (mention
, insn_info
)
2469 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2470 machine_mode mode
= GET_MODE (DF_REF_REAL_REG (mention
));
2472 /* If we're loading up a hard vector register for a call,
2473 it looks like (set (reg:V4SI 9 9) (...)). The df
2474 analysis creates two mentions for GPR9 and GPR10, both
2475 DImode. So relying on the mode from the mentions
2476 isn't sufficient to ensure we union the call into the
2477 web with the parameter setup code. */
2478 if (mode
== DImode
&& GET_CODE (insn
) == SET
2479 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn
))))
2480 mode
= GET_MODE (SET_DEST (insn
));
2482 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
) || mode
== TImode
)
2484 insn_entry
[uid
].is_relevant
= 1;
2485 if (mode
== TImode
|| mode
== V1TImode
2486 || FLOAT128_VECTOR_P (mode
))
2487 insn_entry
[uid
].is_128_int
= 1;
2488 if (DF_REF_INSN_INFO (mention
))
2489 insn_entry
[uid
].contains_subreg
2490 = !rtx_equal_p (DF_REF_REG (mention
),
2491 DF_REF_REAL_REG (mention
));
2492 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
2493 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention
)))
2494 insn_entry
[uid
].is_live_out
= 1;
2495 union_uses (insn_entry
, insn
, mention
);
2499 if (insn_entry
[uid
].is_relevant
)
2501 /* Determine if this is a load or store. */
2502 insn_entry
[uid
].is_load
= insn_is_load_p (insn
);
2503 insn_entry
[uid
].is_store
= insn_is_store_p (insn
);
2505 /* Determine if this is a doubleword swap. If not,
2506 determine whether it can legally be swapped. */
2507 if (insn_is_swap_p (insn
))
2508 insn_entry
[uid
].is_swap
= 1;
2511 unsigned int special
= SH_NONE
;
2512 insn_entry
[uid
].is_swappable
2513 = insn_is_swappable_p (insn_entry
, insn
, &special
);
2514 if (special
!= SH_NONE
&& insn_entry
[uid
].contains_subreg
)
2515 insn_entry
[uid
].is_swappable
= 0;
2516 else if (special
!= SH_NONE
)
2517 insn_entry
[uid
].special_handling
= special
;
2518 else if (insn_entry
[uid
].contains_subreg
2519 && has_part_mult (insn
))
2520 insn_entry
[uid
].is_swappable
= 0;
2521 else if (insn_entry
[uid
].contains_subreg
)
2522 insn_entry
[uid
].special_handling
= SH_SUBREG
;
2530 fprintf (dump_file
, "\nSwap insn entry table when first built\n");
2531 dump_swap_insn_table (insn_entry
);
2534 /* Record unoptimizable webs. */
2535 unsigned e
= get_max_uid (), i
;
2536 for (i
= 0; i
< e
; ++i
)
2538 if (!insn_entry
[i
].is_relevant
)
2541 swap_web_entry
*root
2542 = (swap_web_entry
*)(&insn_entry
[i
])->unionfind_root ();
2544 if (insn_entry
[i
].is_live_in
|| insn_entry
[i
].is_live_out
2545 || (insn_entry
[i
].contains_subreg
2546 && insn_entry
[i
].special_handling
!= SH_SUBREG
)
2547 || insn_entry
[i
].is_128_int
|| insn_entry
[i
].is_call
2548 || !(insn_entry
[i
].is_swappable
|| insn_entry
[i
].is_swap
))
2549 root
->web_not_optimizable
= 1;
2551 /* If we have loads or stores that aren't permuting then the
2552 optimization isn't appropriate. */
2553 else if ((insn_entry
[i
].is_load
|| insn_entry
[i
].is_store
)
2554 && !insn_entry
[i
].is_swap
&& !insn_entry
[i
].is_swappable
)
2555 root
->web_not_optimizable
= 1;
2557 /* If we have a swap that is both fed by a permuting load
2558 and a feeder of a permuting store, then the optimization
2559 isn't appropriate. (Consider vec_xl followed by vec_xst_be.) */
2560 else if (insn_entry
[i
].is_swap
&& !insn_entry
[i
].is_load
2561 && !insn_entry
[i
].is_store
2562 && swap_feeds_both_load_and_store (&insn_entry
[i
]))
2563 root
->web_not_optimizable
= 1;
2565 /* If we have permuting loads or stores that are not accompanied
2566 by a register swap, the optimization isn't appropriate. */
2567 else if (insn_entry
[i
].is_load
&& insn_entry
[i
].is_swap
)
2569 rtx insn
= insn_entry
[i
].insn
;
2570 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2573 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
2575 struct df_link
*link
= DF_REF_CHAIN (def
);
2577 if (!chain_contains_only_swaps (insn_entry
, link
, FOR_LOADS
))
2579 root
->web_not_optimizable
= 1;
2584 else if (insn_entry
[i
].is_store
&& insn_entry
[i
].is_swap
)
2586 rtx insn
= insn_entry
[i
].insn
;
2587 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
2590 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
2592 struct df_link
*link
= DF_REF_CHAIN (use
);
2594 if (!chain_contains_only_swaps (insn_entry
, link
, FOR_STORES
))
2596 root
->web_not_optimizable
= 1;
2605 fprintf (dump_file
, "\nSwap insn entry table after web analysis\n");
2606 dump_swap_insn_table (insn_entry
);
2609 /* For each load and store in an optimizable web (which implies
2610 the loads and stores are permuting), find the associated
2611 register swaps and mark them for removal. Due to various
2612 optimizations we may mark the same swap more than once. Also
2613 perform special handling for swappable insns that require it. */
2614 for (i
= 0; i
< e
; ++i
)
2615 if ((insn_entry
[i
].is_load
|| insn_entry
[i
].is_store
)
2616 && insn_entry
[i
].is_swap
)
2618 swap_web_entry
* root_entry
2619 = (swap_web_entry
*)((&insn_entry
[i
])->unionfind_root ());
2620 if (!root_entry
->web_not_optimizable
)
2621 mark_swaps_for_removal (insn_entry
, i
);
2623 else if (insn_entry
[i
].is_swappable
&& insn_entry
[i
].special_handling
)
2625 swap_web_entry
* root_entry
2626 = (swap_web_entry
*)((&insn_entry
[i
])->unionfind_root ());
2627 if (!root_entry
->web_not_optimizable
)
2628 handle_special_swappables (insn_entry
, i
);
2631 /* Now delete the swaps marked for removal. */
2632 for (i
= 0; i
< e
; ++i
)
2633 if (insn_entry
[i
].will_delete
)
2634 replace_swap_with_copy (insn_entry
, i
);
2639 /* Use a second pass over rtl to detect that certain vector values
2640 fetched from or stored to memory on quad-word aligned addresses
2641 can use lvx/stvx without swaps. */
2643 /* First, rebuild ud chains. */
2644 df_remove_problem (df_chain
);
2645 df_process_deferred_rescans ();
2646 df_set_flags (DF_RD_PRUNE_DEAD_DEFS
);
2647 df_chain_add_problem (DF_UD_CHAIN
);
2650 swap_web_entry
*pass2_insn_entry
;
2651 pass2_insn_entry
= XCNEWVEC (swap_web_entry
, get_max_uid ());
2653 /* Walk the insns to gather basic data. */
2654 FOR_ALL_BB_FN (bb
, fun
)
2655 FOR_BB_INSNS_SAFE (bb
, insn
, curr_insn
)
2657 unsigned int uid
= INSN_UID (insn
);
2658 if (NONDEBUG_INSN_P (insn
))
2660 pass2_insn_entry
[uid
].insn
= insn
;
2662 pass2_insn_entry
[uid
].is_relevant
= 1;
2663 pass2_insn_entry
[uid
].is_load
= insn_is_load_p (insn
);
2664 pass2_insn_entry
[uid
].is_store
= insn_is_store_p (insn
);
2666 /* Determine if this is a doubleword swap. If not,
2667 determine whether it can legally be swapped. */
2668 if (insn_is_swap_p (insn
))
2669 pass2_insn_entry
[uid
].is_swap
= 1;
2674 for (unsigned i
= 0; i
< e
; ++i
)
2675 if (pass2_insn_entry
[i
].is_swap
&& !pass2_insn_entry
[i
].is_load
2676 && !pass2_insn_entry
[i
].is_store
)
2678 /* Replace swap of aligned load-swap with aligned unswapped
2680 rtx_insn
*rtx_insn
= pass2_insn_entry
[i
].insn
;
2681 if (quad_aligned_load_p (pass2_insn_entry
, rtx_insn
))
2682 replace_swapped_aligned_load (pass2_insn_entry
, rtx_insn
);
2684 else if (pass2_insn_entry
[i
].is_swap
&& pass2_insn_entry
[i
].is_store
)
2686 /* Replace aligned store-swap of swapped value with aligned
2688 rtx_insn
*rtx_insn
= pass2_insn_entry
[i
].insn
;
2689 if (quad_aligned_store_p (pass2_insn_entry
, rtx_insn
))
2690 replace_swapped_aligned_store (pass2_insn_entry
, rtx_insn
);
2694 free (pass2_insn_entry
);
2696 /* Use a third pass over rtl to replace swap(load(vector constant))
2697 with load(swapped vector constant). */
2699 /* First, rebuild ud chains. */
2700 df_remove_problem (df_chain
);
2701 df_process_deferred_rescans ();
2702 df_set_flags (DF_RD_PRUNE_DEAD_DEFS
);
2703 df_chain_add_problem (DF_UD_CHAIN
);
2706 swap_web_entry
*pass3_insn_entry
;
2707 pass3_insn_entry
= XCNEWVEC (swap_web_entry
, get_max_uid ());
2709 /* Walk the insns to gather basic data. */
2710 FOR_ALL_BB_FN (bb
, fun
)
2711 FOR_BB_INSNS_SAFE (bb
, insn
, curr_insn
)
2713 unsigned int uid
= INSN_UID (insn
);
2714 if (NONDEBUG_INSN_P (insn
))
2716 pass3_insn_entry
[uid
].insn
= insn
;
2718 pass3_insn_entry
[uid
].is_relevant
= 1;
2719 pass3_insn_entry
[uid
].is_load
= insn_is_load_p (insn
);
2720 pass3_insn_entry
[uid
].is_store
= insn_is_store_p (insn
);
2722 /* Determine if this is a doubleword swap. If not,
2723 determine whether it can legally be swapped. */
2724 if (insn_is_swap_p (insn
))
2725 pass3_insn_entry
[uid
].is_swap
= 1;
2730 for (unsigned i
= 0; i
< e
; ++i
)
2731 if (pass3_insn_entry
[i
].is_swap
&& !pass3_insn_entry
[i
].is_load
2732 && !pass3_insn_entry
[i
].is_store
)
2734 insn
= pass3_insn_entry
[i
].insn
;
2735 if (const_load_sequence_p (pass3_insn_entry
, insn
))
2736 replace_swapped_load_constant (pass3_insn_entry
, insn
);
2740 free (pass3_insn_entry
);
2744 const pass_data pass_data_analyze_swaps
=
2746 RTL_PASS
, /* type */
2748 OPTGROUP_NONE
, /* optinfo_flags */
2749 TV_NONE
, /* tv_id */
2750 0, /* properties_required */
2751 0, /* properties_provided */
2752 0, /* properties_destroyed */
2753 0, /* todo_flags_start */
2754 TODO_df_finish
, /* todo_flags_finish */
2757 class pass_analyze_swaps
: public rtl_opt_pass
2760 pass_analyze_swaps(gcc::context
*ctxt
)
2761 : rtl_opt_pass(pass_data_analyze_swaps
, ctxt
)
2764 /* opt_pass methods: */
2765 virtual bool gate (function
*)
2767 return (optimize
> 0 && !BYTES_BIG_ENDIAN
&& TARGET_VSX
2768 && !TARGET_P9_VECTOR
&& rs6000_optimize_swaps
);
2771 virtual unsigned int execute (function
*fun
)
2773 return rs6000_analyze_swaps (fun
);
2778 return new pass_analyze_swaps (m_ctxt
);
2781 }; // class pass_analyze_swaps
2784 make_pass_analyze_swaps (gcc::context
*ctxt
)
2786 return new pass_analyze_swaps (ctxt
);