]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000-p8swap.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000-p8swap.c
1 /* Subroutines used to remove unnecessary doubleword swaps
2 for p8 little-endian VSX code.
3 Copyright (C) 1991-2022 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "ira.h"
33 #include "print-tree.h"
34 #include "varasm.h"
35 #include "explow.h"
36 #include "expr.h"
37 #include "output.h"
38 #include "tree-pass.h"
39 #include "rtx-vector-builder.h"
40
41 /* Analyze vector computations and remove unnecessary doubleword
42 swaps (xxswapdi instructions). This pass is performed only
43 for little-endian VSX code generation.
44
45 For this specific case, loads and stores of 4x32 and 2x64 vectors
46 are inefficient. These are implemented using the lvx2dx and
47 stvx2dx instructions, which invert the order of doublewords in
48 a vector register. Thus the code generation inserts an xxswapdi
49 after each such load, and prior to each such store. (For spill
50 code after register assignment, an additional xxswapdi is inserted
51 following each store in order to return a hard register to its
52 unpermuted value.)
53
54 The extra xxswapdi instructions reduce performance. This can be
55 particularly bad for vectorized code. The purpose of this pass
56 is to reduce the number of xxswapdi instructions required for
57 correctness.
58
59 The primary insight is that much code that operates on vectors
60 does not care about the relative order of elements in a register,
61 so long as the correct memory order is preserved. If we have
62 a computation where all input values are provided by lvxd2x/xxswapdi
63 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
64 and all intermediate computations are pure SIMD (independent of
65 element order), then all the xxswapdi's associated with the loads
66 and stores may be removed.
67
68 This pass uses some of the infrastructure and logical ideas from
69 the "web" pass in web.c. We create maximal webs of computations
70 fitting the description above using union-find. Each such web is
71 then optimized by removing its unnecessary xxswapdi instructions.
72
73 The pass is placed prior to global optimization so that we can
74 perform the optimization in the safest and simplest way possible;
75 that is, by replacing each xxswapdi insn with a register copy insn.
76 Subsequent forward propagation will remove copies where possible.
77
78 There are some operations sensitive to element order for which we
79 can still allow the operation, provided we modify those operations.
80 These include CONST_VECTORs, for which we must swap the first and
81 second halves of the constant vector; and SUBREGs, for which we
82 must adjust the byte offset to account for the swapped doublewords.
83 A remaining opportunity would be non-immediate-form splats, for
84 which we should adjust the selected lane of the input. We should
85 also make code generation adjustments for sum-across operations,
86 since this is a common vectorizer reduction.
87
88 Because we run prior to the first split, we can see loads and stores
89 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
90 vector loads and stores that have not yet been split into a permuting
91 load/store and a swap. (One way this can happen is with a builtin
92 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
93 than deleting a swap, we convert the load/store into a permuting
94 load/store (which effectively removes the swap). */
95
96 /* Notes on Permutes
97
98 We do not currently handle computations that contain permutes. There
99 is a general transformation that can be performed correctly, but it
100 may introduce more expensive code than it replaces. To handle these
101 would require a cost model to determine when to perform the optimization.
102 This commentary records how this could be done if desired.
103
104 The most general permute is something like this (example for V16QI):
105
106 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
107 (parallel [(const_int a0) (const_int a1)
108 ...
109 (const_int a14) (const_int a15)]))
110
111 where a0,...,a15 are in [0,31] and select elements from op1 and op2
112 to produce in the result.
113
114 Regardless of mode, we can convert the PARALLEL to a mask of 16
115 byte-element selectors. Let's call this M, with M[i] representing
116 the ith byte-element selector value. Then if we swap doublewords
117 throughout the computation, we can get correct behavior by replacing
118 M with M' as follows:
119
120 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
121 { ((M[i]+8)%16)+16 : M[i] in [16,31]
122
123 This seems promising at first, since we are just replacing one mask
124 with another. But certain masks are preferable to others. If M
125 is a mask that matches a vmrghh pattern, for example, M' certainly
126 will not. Instead of a single vmrghh, we would generate a load of
127 M' and a vperm. So we would need to know how many xxswapd's we can
128 remove as a result of this transformation to determine if it's
129 profitable; and preferably the logic would need to be aware of all
130 the special preferable masks.
131
132 Another form of permute is an UNSPEC_VPERM, in which the mask is
133 already in a register. In some cases, this mask may be a constant
134 that we can discover with ud-chains, in which case the above
135 transformation is ok. However, the common usage here is for the
136 mask to be produced by an UNSPEC_LVSL, in which case the mask
137 cannot be known at compile time. In such a case we would have to
138 generate several instructions to compute M' as above at run time,
139 and a cost model is needed again.
140
141 However, when the mask M for an UNSPEC_VPERM is loaded from the
142 constant pool, we can replace M with M' as above at no cost
143 beyond adding a constant pool entry. */
144
145 /* This is based on the union-find logic in web.c. web_entry_base is
146 defined in df.h. */
147 class swap_web_entry : public web_entry_base
148 {
149 public:
150 /* Pointer to the insn. */
151 rtx_insn *insn;
152 /* Set if insn contains a mention of a vector register. All other
153 fields are undefined if this field is unset. */
154 unsigned int is_relevant : 1;
155 /* Set if insn is a load. */
156 unsigned int is_load : 1;
157 /* Set if insn is a store. */
158 unsigned int is_store : 1;
159 /* Set if insn is a doubleword swap. This can either be a register swap
160 or a permuting load or store (test is_load and is_store for this). */
161 unsigned int is_swap : 1;
162 /* Set if the insn has a live-in use of a parameter register. */
163 unsigned int is_live_in : 1;
164 /* Set if the insn has a live-out def of a return register. */
165 unsigned int is_live_out : 1;
166 /* Set if the insn contains a subreg reference of a vector register. */
167 unsigned int contains_subreg : 1;
168 /* Set if the insn contains a 128-bit integer operand. */
169 unsigned int is_128_int : 1;
170 /* Set if this is a call-insn. */
171 unsigned int is_call : 1;
172 /* Set if this insn does not perform a vector operation for which
173 element order matters, or if we know how to fix it up if it does.
174 Undefined if is_swap is set. */
175 unsigned int is_swappable : 1;
176 /* A nonzero value indicates what kind of special handling for this
177 insn is required if doublewords are swapped. Undefined if
178 is_swappable is not set. */
179 unsigned int special_handling : 4;
180 /* Set if the web represented by this entry cannot be optimized. */
181 unsigned int web_not_optimizable : 1;
182 /* Set if this insn should be deleted. */
183 unsigned int will_delete : 1;
184 };
185
186 enum special_handling_values {
187 SH_NONE = 0,
188 SH_CONST_VECTOR,
189 SH_SUBREG,
190 SH_NOSWAP_LD,
191 SH_NOSWAP_ST,
192 SH_EXTRACT,
193 SH_SPLAT,
194 SH_XXPERMDI,
195 SH_CONCAT,
196 SH_VPERM
197 };
198
199 /* Union INSN with all insns containing definitions that reach USE.
200 Detect whether USE is live-in to the current function. */
201 static void
202 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
203 {
204 struct df_link *link = DF_REF_CHAIN (use);
205
206 if (!link)
207 insn_entry[INSN_UID (insn)].is_live_in = 1;
208
209 while (link)
210 {
211 if (DF_REF_IS_ARTIFICIAL (link->ref))
212 insn_entry[INSN_UID (insn)].is_live_in = 1;
213
214 if (DF_REF_INSN_INFO (link->ref))
215 {
216 rtx def_insn = DF_REF_INSN (link->ref);
217 (void)unionfind_union (insn_entry + INSN_UID (insn),
218 insn_entry + INSN_UID (def_insn));
219 }
220
221 link = link->next;
222 }
223 }
224
225 /* Union INSN with all insns containing uses reached from DEF.
226 Detect whether DEF is live-out from the current function. */
227 static void
228 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
229 {
230 struct df_link *link = DF_REF_CHAIN (def);
231
232 if (!link)
233 insn_entry[INSN_UID (insn)].is_live_out = 1;
234
235 while (link)
236 {
237 /* This could be an eh use or some other artificial use;
238 we treat these all the same (killing the optimization). */
239 if (DF_REF_IS_ARTIFICIAL (link->ref))
240 insn_entry[INSN_UID (insn)].is_live_out = 1;
241
242 if (DF_REF_INSN_INFO (link->ref))
243 {
244 rtx use_insn = DF_REF_INSN (link->ref);
245 (void)unionfind_union (insn_entry + INSN_UID (insn),
246 insn_entry + INSN_UID (use_insn));
247 }
248
249 link = link->next;
250 }
251 }
252
253 /* Return 1 iff PAT (a SINGLE_SET) is a rotate 64 bit expression; else return
254 0. */
255
256 static bool
257 pattern_is_rotate64 (rtx pat)
258 {
259 rtx rot = SET_SRC (pat);
260
261 if (GET_CODE (rot) == ROTATE && CONST_INT_P (XEXP (rot, 1))
262 && INTVAL (XEXP (rot, 1)) == 64)
263 return true;
264
265 return false;
266 }
267
268 /* Return 1 iff INSN is a load insn, including permuting loads that
269 represent an lvxd2x instruction; else return 0. */
270 static unsigned int
271 insn_is_load_p (rtx insn)
272 {
273 rtx body = PATTERN (insn);
274
275 if (GET_CODE (body) == SET)
276 {
277 if (MEM_P (SET_SRC (body)))
278 return 1;
279
280 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
281 && MEM_P (XEXP (SET_SRC (body), 0)))
282 return 1;
283
284 if (pattern_is_rotate64 (body) && MEM_P (XEXP (SET_SRC (body), 0)))
285 return 1;
286
287 return 0;
288 }
289
290 if (GET_CODE (body) != PARALLEL)
291 return 0;
292
293 rtx set = XVECEXP (body, 0, 0);
294
295 if (GET_CODE (set) == SET && MEM_P (SET_SRC (set)))
296 return 1;
297
298 return 0;
299 }
300
301 /* Return 1 iff INSN is a store insn, including permuting stores that
302 represent an stvxd2x instruction; else return 0. */
303 static unsigned int
304 insn_is_store_p (rtx insn)
305 {
306 rtx body = PATTERN (insn);
307 if (GET_CODE (body) == SET && MEM_P (SET_DEST (body)))
308 return 1;
309 if (GET_CODE (body) != PARALLEL)
310 return 0;
311 rtx set = XVECEXP (body, 0, 0);
312 if (GET_CODE (set) == SET && MEM_P (SET_DEST (set)))
313 return 1;
314 return 0;
315 }
316
317 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
318 a permuting load, or a permuting store. */
319 static unsigned int
320 insn_is_swap_p (rtx insn)
321 {
322 rtx body = PATTERN (insn);
323 if (GET_CODE (body) != SET)
324 return 0;
325 rtx rhs = SET_SRC (body);
326 if (pattern_is_rotate64 (body))
327 return 1;
328 if (GET_CODE (rhs) != VEC_SELECT)
329 return 0;
330 rtx parallel = XEXP (rhs, 1);
331 if (GET_CODE (parallel) != PARALLEL)
332 return 0;
333 unsigned int len = XVECLEN (parallel, 0);
334 if (len != 2 && len != 4 && len != 8 && len != 16)
335 return 0;
336 for (unsigned int i = 0; i < len / 2; ++i)
337 {
338 rtx op = XVECEXP (parallel, 0, i);
339 if (!CONST_INT_P (op) || INTVAL (op) != len / 2 + i)
340 return 0;
341 }
342 for (unsigned int i = len / 2; i < len; ++i)
343 {
344 rtx op = XVECEXP (parallel, 0, i);
345 if (!CONST_INT_P (op) || INTVAL (op) != i - len / 2)
346 return 0;
347 }
348 return 1;
349 }
350
351 /* Return true iff EXPR represents the sum of two registers. */
352 bool
353 rs6000_sum_of_two_registers_p (const_rtx expr)
354 {
355 if (GET_CODE (expr) == PLUS)
356 {
357 const_rtx operand1 = XEXP (expr, 0);
358 const_rtx operand2 = XEXP (expr, 1);
359 return (REG_P (operand1) && REG_P (operand2));
360 }
361 return false;
362 }
363
364 /* Return true iff EXPR represents an address expression that masks off
365 the low-order 4 bits in the style of an lvx or stvx rtl pattern. */
366 bool
367 rs6000_quadword_masked_address_p (const_rtx expr)
368 {
369 if (GET_CODE (expr) == AND)
370 {
371 const_rtx operand1 = XEXP (expr, 0);
372 const_rtx operand2 = XEXP (expr, 1);
373 if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1))
374 && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16)
375 return true;
376 }
377 return false;
378 }
379
380 /* Return TRUE if INSN represents a swap of a swapped load from memory
381 and the memory address is quad-word aligned. */
382 static bool
383 quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn)
384 {
385 unsigned uid = INSN_UID (insn);
386 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
387 return false;
388
389 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
390
391 /* Since insn is known to represent a swap instruction, we know it
392 "uses" only one input variable. */
393 df_ref use = DF_INSN_INFO_USES (insn_info);
394
395 /* Figure out where this input variable is defined. */
396 struct df_link *def_link = DF_REF_CHAIN (use);
397
398 /* If there is no definition or the definition is artificial or there are
399 multiple definitions, punt. */
400 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
401 || def_link->next)
402 return false;
403
404 rtx def_insn = DF_REF_INSN (def_link->ref);
405 unsigned uid2 = INSN_UID (def_insn);
406 /* We're looking for a load-with-swap insn. If this is not that,
407 return false. */
408 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
409 return false;
410
411 /* If the source of the rtl def is not a set from memory, return
412 false. */
413 rtx body = PATTERN (def_insn);
414 if (GET_CODE (body) != SET
415 || !(GET_CODE (SET_SRC (body)) == VEC_SELECT
416 || pattern_is_rotate64 (body))
417 || !MEM_P (XEXP (SET_SRC (body), 0)))
418 return false;
419
420 rtx mem = XEXP (SET_SRC (body), 0);
421 rtx base_reg = XEXP (mem, 0);
422 return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg))
423 && MEM_ALIGN (mem) >= 128) ? true : false;
424 }
425
426 /* Return TRUE if INSN represents a store-with-swap of a swapped value
427 and the memory address is quad-word aligned. */
428 static bool
429 quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn)
430 {
431 unsigned uid = INSN_UID (insn);
432 if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store)
433 return false;
434
435 rtx body = PATTERN (insn);
436 rtx dest_address = XEXP (SET_DEST (body), 0);
437 rtx swap_reg = XEXP (SET_SRC (body), 0);
438
439 /* If the base address for the memory expression is not represented
440 by a single register and is not the sum of two registers, punt. */
441 if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address))
442 return false;
443
444 /* Confirm that the value to be stored is produced by a swap
445 instruction. */
446 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
447 df_ref use;
448 FOR_EACH_INSN_INFO_USE (use, insn_info)
449 {
450 struct df_link *def_link = DF_REF_CHAIN (use);
451
452 /* If this is not the definition of the candidate swap register,
453 then skip it. I am interested in a different definition. */
454 if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
455 continue;
456
457 /* If there is no def or the def is artifical or there are
458 multiple defs, punt. */
459 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
460 || def_link->next)
461 return false;
462
463 rtx def_insn = DF_REF_INSN (def_link->ref);
464 unsigned uid2 = INSN_UID (def_insn);
465
466 /* If this source value is not a simple swap, return false */
467 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load
468 || insn_entry[uid2].is_store)
469 return false;
470
471 /* I've processed the use that I care about, so break out of
472 this loop. */
473 break;
474 }
475
476 /* At this point, we know the source data comes from a swap. The
477 remaining question is whether the memory address is aligned. */
478 rtx set = single_set (insn);
479 if (set)
480 {
481 rtx dest = SET_DEST (set);
482 if (MEM_P (dest))
483 return (MEM_ALIGN (dest) >= 128);
484 }
485 return false;
486 }
487
488 /* Return 1 iff UID, known to reference a swap, is both fed by a load
489 and a feeder of a store. */
490 static unsigned int
491 swap_feeds_both_load_and_store (swap_web_entry *insn_entry)
492 {
493 rtx insn = insn_entry->insn;
494 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
495 df_ref def, use;
496 struct df_link *link = 0;
497 rtx_insn *load = 0, *store = 0;
498 bool fed_by_load = 0;
499 bool feeds_store = 0;
500
501 FOR_EACH_INSN_INFO_USE (use, insn_info)
502 {
503 link = DF_REF_CHAIN (use);
504 load = DF_REF_INSN (link->ref);
505 if (insn_is_load_p (load) && insn_is_swap_p (load))
506 fed_by_load = 1;
507 }
508
509 FOR_EACH_INSN_INFO_DEF (def, insn_info)
510 {
511 link = DF_REF_CHAIN (def);
512 store = DF_REF_INSN (link->ref);
513 if (insn_is_store_p (store) && insn_is_swap_p (store))
514 feeds_store = 1;
515 }
516
517 return fed_by_load && feeds_store;
518 }
519
520 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
521 static bool
522 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
523 {
524 unsigned uid = INSN_UID (insn);
525 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
526 return false;
527
528 const_rtx tocrel_base;
529
530 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
531 df_ref use;
532
533 /* Iterate over the definitions that are used by this insn. Since
534 this is known to be a swap insn, expect only one used definnition. */
535 FOR_EACH_INSN_INFO_USE (use, insn_info)
536 {
537 struct df_link *def_link = DF_REF_CHAIN (use);
538
539 /* If there is no def or the def is artificial or there are
540 multiple defs, punt. */
541 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
542 || def_link->next)
543 return false;
544
545 rtx def_insn = DF_REF_INSN (def_link->ref);
546 unsigned uid2 = INSN_UID (def_insn);
547 /* If this is not a load or is not a swap, return false. */
548 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
549 return false;
550
551 /* If the source of the rtl def is not a set from memory, return
552 false. */
553 rtx body = PATTERN (def_insn);
554 if (GET_CODE (body) != SET
555 || !(GET_CODE (SET_SRC (body)) == VEC_SELECT
556 || pattern_is_rotate64 (body))
557 || !MEM_P (XEXP (SET_SRC (body), 0)))
558 return false;
559
560 rtx mem = XEXP (SET_SRC (body), 0);
561 rtx base_reg = XEXP (mem, 0);
562 /* If the base address for the memory expression is not
563 represented by a register, punt. */
564 if (!REG_P (base_reg))
565 return false;
566
567 df_ref base_use;
568 insn_info = DF_INSN_INFO_GET (def_insn);
569 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
570 {
571 /* If base_use does not represent base_reg, look for another
572 use. */
573 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
574 continue;
575
576 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
577 if (!base_def_link || base_def_link->next)
578 return false;
579
580 /* Constants held on the stack are not "true" constants
581 because their values are not part of the static load
582 image. If this constant's base reference is a stack
583 or frame pointer, it is seen as an artificial
584 reference. */
585 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
586 return false;
587
588 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
589 rtx tocrel_body = PATTERN (tocrel_insn);
590 rtx base, offset;
591 if (GET_CODE (tocrel_body) != SET)
592 return false;
593 /* There is an extra level of indirection for small/large
594 code models. */
595 rtx tocrel_expr = SET_SRC (tocrel_body);
596 if (MEM_P (tocrel_expr))
597 tocrel_expr = XEXP (tocrel_expr, 0);
598 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
599 return false;
600 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
601
602 if (!SYMBOL_REF_P (base) || !CONSTANT_POOL_ADDRESS_P (base))
603 return false;
604 else
605 {
606 /* FIXME: The conditions under which
607 (SYMBOL_REF_P (const_vector)
608 && !CONSTANT_POOL_ADDRESS_P (const_vector))
609 are not well understood. This code prevents
610 an internal compiler error which will occur in
611 replace_swapped_load_constant () if we were to return
612 true. Some day, we should figure out how to properly
613 handle this condition in
614 replace_swapped_load_constant () and then we can
615 remove this special test. */
616 rtx const_vector = get_pool_constant (base);
617 if (SYMBOL_REF_P (const_vector)
618 && CONSTANT_POOL_ADDRESS_P (const_vector))
619 const_vector = get_pool_constant (const_vector);
620 if (GET_CODE (const_vector) != CONST_VECTOR)
621 return false;
622 }
623 }
624 }
625 return true;
626 }
627
628 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
629 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
630 static bool
631 v2df_reduction_p (rtx op)
632 {
633 if (GET_MODE (op) != V2DFmode)
634 return false;
635
636 enum rtx_code code = GET_CODE (op);
637 if (code != PLUS && code != SMIN && code != SMAX)
638 return false;
639
640 rtx concat = XEXP (op, 0);
641 if (GET_CODE (concat) != VEC_CONCAT)
642 return false;
643
644 rtx select0 = XEXP (concat, 0);
645 rtx select1 = XEXP (concat, 1);
646 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
647 return false;
648
649 rtx reg0 = XEXP (select0, 0);
650 rtx reg1 = XEXP (select1, 0);
651 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
652 return false;
653
654 rtx parallel0 = XEXP (select0, 1);
655 rtx parallel1 = XEXP (select1, 1);
656 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
657 return false;
658
659 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
660 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
661 return false;
662
663 return true;
664 }
665
666 /* Return 1 iff OP is an operand that will not be affected by having
667 vector doublewords swapped in memory. */
668 static unsigned int
669 rtx_is_swappable_p (rtx op, unsigned int *special)
670 {
671 enum rtx_code code = GET_CODE (op);
672 int i, j;
673 rtx parallel;
674
675 switch (code)
676 {
677 case LABEL_REF:
678 case SYMBOL_REF:
679 case CLOBBER:
680 case REG:
681 return 1;
682
683 case VEC_CONCAT:
684 case ASM_INPUT:
685 case ASM_OPERANDS:
686 return 0;
687
688 case CONST_VECTOR:
689 {
690 *special = SH_CONST_VECTOR;
691 return 1;
692 }
693
694 case VEC_DUPLICATE:
695 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
696 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
697 it represents a vector splat for which we can do special
698 handling. */
699 if (CONST_INT_P (XEXP (op, 0)))
700 return 1;
701 else if (REG_P (XEXP (op, 0))
702 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
703 /* This catches V2DF and V2DI splat, at a minimum. */
704 return 1;
705 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
706 && REG_P (XEXP (XEXP (op, 0), 0))
707 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
708 /* This catches splat of a truncated value. */
709 return 1;
710 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
711 /* If the duplicated item is from a select, defer to the select
712 processing to see if we can change the lane for the splat. */
713 return rtx_is_swappable_p (XEXP (op, 0), special);
714 else
715 return 0;
716
717 case VEC_SELECT:
718 /* A vec_extract operation is ok if we change the lane. */
719 if (REG_P (XEXP (op, 0))
720 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
721 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
722 && XVECLEN (parallel, 0) == 1
723 && CONST_INT_P (XVECEXP (parallel, 0, 0)))
724 {
725 *special = SH_EXTRACT;
726 return 1;
727 }
728 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
729 XXPERMDI is a swap operation, it will be identified by
730 insn_is_swap_p and therefore we won't get here. */
731 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
732 && (GET_MODE (XEXP (op, 0)) == V4DFmode
733 || GET_MODE (XEXP (op, 0)) == V4DImode)
734 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
735 && XVECLEN (parallel, 0) == 2
736 && CONST_INT_P (XVECEXP (parallel, 0, 0))
737 && CONST_INT_P (XVECEXP (parallel, 0, 1)))
738 {
739 *special = SH_XXPERMDI;
740 return 1;
741 }
742 else if (v2df_reduction_p (op))
743 return 1;
744 else
745 return 0;
746
747 case UNSPEC:
748 {
749 /* Various operations are unsafe for this optimization, at least
750 without significant additional work. Permutes are obviously
751 problematic, as both the permute control vector and the ordering
752 of the target values are invalidated by doubleword swapping.
753 Vector pack and unpack modify the number of vector lanes.
754 Merge-high/low will not operate correctly on swapped operands.
755 Vector shifts across element boundaries are clearly uncool,
756 as are vector select and concatenate operations. Vector
757 sum-across instructions define one operand with a specific
758 order-dependent element, so additional fixup code would be
759 needed to make those work. Vector set and non-immediate-form
760 vector splat are element-order sensitive. A few of these
761 cases might be workable with special handling if required.
762 Adding cost modeling would be appropriate in some cases. */
763 int val = XINT (op, 1);
764 switch (val)
765 {
766 default:
767 break;
768 case UNSPEC_VBPERMQ:
769 case UNSPEC_VPACK_SIGN_SIGN_SAT:
770 case UNSPEC_VPACK_SIGN_UNS_SAT:
771 case UNSPEC_VPACK_UNS_UNS_MOD:
772 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
773 case UNSPEC_VPACK_UNS_UNS_SAT:
774 case UNSPEC_VPERM:
775 case UNSPEC_VPERM_UNS:
776 case UNSPEC_VPERMHI:
777 case UNSPEC_VPERMSI:
778 case UNSPEC_VPERMXOR:
779 case UNSPEC_VPKPX:
780 case UNSPEC_VSLDOI:
781 case UNSPEC_VSLO:
782 case UNSPEC_VSRO:
783 case UNSPEC_VSUM2SWS:
784 case UNSPEC_VSUM4S:
785 case UNSPEC_VSUM4UBS:
786 case UNSPEC_VSUMSWS:
787 case UNSPEC_VSUMSWS_DIRECT:
788 case UNSPEC_VSX_CONCAT:
789 case UNSPEC_VSX_CVDPSPN:
790 case UNSPEC_VSX_CVSPDP:
791 case UNSPEC_VSX_CVSPDPN:
792 case UNSPEC_VSX_EXTRACT:
793 case UNSPEC_VSX_SET:
794 case UNSPEC_VSX_SLDWI:
795 case UNSPEC_VSX_VSLO:
796 case UNSPEC_VUNPACK_HI_SIGN:
797 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
798 case UNSPEC_VUNPACK_LO_SIGN:
799 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
800 case UNSPEC_VUPKHPX:
801 case UNSPEC_VUPKHS_V4SF:
802 case UNSPEC_VUPKHU_V4SF:
803 case UNSPEC_VUPKLPX:
804 case UNSPEC_VUPKLS_V4SF:
805 case UNSPEC_VUPKLU_V4SF:
806 return 0;
807 case UNSPEC_VSPLT_DIRECT:
808 case UNSPEC_VSX_XXSPLTD:
809 *special = SH_SPLAT;
810 return 1;
811 case UNSPEC_REDUC_PLUS:
812 case UNSPEC_REDUC:
813 return 1;
814 case UNSPEC_VPMSUM:
815 /* vpmsumd is not swappable, but vpmsum[bhw] are. */
816 if (GET_MODE (op) == V2DImode)
817 return 0;
818 break;
819 }
820 }
821
822 default:
823 break;
824 }
825
826 const char *fmt = GET_RTX_FORMAT (code);
827 int ok = 1;
828
829 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
830 if (fmt[i] == 'e' || fmt[i] == 'u')
831 {
832 unsigned int special_op = SH_NONE;
833 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
834 if (special_op == SH_NONE)
835 continue;
836 /* Ensure we never have two kinds of special handling
837 for the same insn. */
838 if (*special != SH_NONE && *special != special_op)
839 return 0;
840 *special = special_op;
841 }
842 else if (fmt[i] == 'E')
843 for (j = 0; j < XVECLEN (op, i); ++j)
844 {
845 unsigned int special_op = SH_NONE;
846 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
847 if (special_op == SH_NONE)
848 continue;
849 /* Ensure we never have two kinds of special handling
850 for the same insn. */
851 if (*special != SH_NONE && *special != special_op)
852 return 0;
853 *special = special_op;
854 }
855
856 return ok;
857 }
858
859 /* Return 1 iff INSN is an operand that will not be affected by
860 having vector doublewords swapped in memory (in which case
861 *SPECIAL is unchanged), or that can be modified to be correct
862 if vector doublewords are swapped in memory (in which case
863 *SPECIAL is changed to a value indicating how). */
864 static unsigned int
865 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
866 unsigned int *special)
867 {
868 /* Calls are always bad. */
869 if (GET_CODE (insn) == CALL_INSN)
870 return 0;
871
872 /* Loads and stores seen here are not permuting, but we can still
873 fix them up by converting them to permuting ones. Exceptions:
874 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
875 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
876 for the SET source. Also we must now make an exception for lvx
877 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
878 explicit "& -16") since this leads to unrecognizable insns. */
879 rtx body = PATTERN (insn);
880 int i = INSN_UID (insn);
881
882 if (insn_entry[i].is_load)
883 {
884 if (GET_CODE (body) == SET)
885 {
886 rtx rhs = SET_SRC (body);
887 /* Even without a swap, the RHS might be a vec_select for, say,
888 a byte-reversing load. */
889 if (!MEM_P (rhs))
890 return 0;
891 if (GET_CODE (XEXP (rhs, 0)) == AND)
892 return 0;
893
894 *special = SH_NOSWAP_LD;
895 return 1;
896 }
897 else
898 return 0;
899 }
900
901 if (insn_entry[i].is_store)
902 {
903 if (GET_CODE (body) == SET
904 && GET_CODE (SET_SRC (body)) != UNSPEC
905 && GET_CODE (SET_SRC (body)) != VEC_SELECT)
906 {
907 rtx lhs = SET_DEST (body);
908 /* Even without a swap, the RHS might be a vec_select for, say,
909 a byte-reversing store. */
910 if (!MEM_P (lhs))
911 return 0;
912 if (GET_CODE (XEXP (lhs, 0)) == AND)
913 return 0;
914
915 *special = SH_NOSWAP_ST;
916 return 1;
917 }
918 else
919 return 0;
920 }
921
922 /* A convert to single precision can be left as is provided that
923 all of its uses are in xxspltw instructions that splat BE element
924 zero. */
925 if (GET_CODE (body) == SET
926 && GET_CODE (SET_SRC (body)) == UNSPEC
927 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
928 {
929 df_ref def;
930 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
931
932 FOR_EACH_INSN_INFO_DEF (def, insn_info)
933 {
934 struct df_link *link = DF_REF_CHAIN (def);
935 if (!link)
936 return 0;
937
938 for (; link; link = link->next) {
939 rtx use_insn = DF_REF_INSN (link->ref);
940 rtx use_body = PATTERN (use_insn);
941 if (GET_CODE (use_body) != SET
942 || GET_CODE (SET_SRC (use_body)) != UNSPEC
943 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
944 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
945 return 0;
946 }
947 }
948
949 return 1;
950 }
951
952 /* A concatenation of two doublewords is ok if we reverse the
953 order of the inputs. */
954 if (GET_CODE (body) == SET
955 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
956 && (GET_MODE (SET_SRC (body)) == V2DFmode
957 || GET_MODE (SET_SRC (body)) == V2DImode))
958 {
959 *special = SH_CONCAT;
960 return 1;
961 }
962
963 /* V2DF reductions are always swappable. */
964 if (GET_CODE (body) == PARALLEL)
965 {
966 rtx expr = XVECEXP (body, 0, 0);
967 if (GET_CODE (expr) == SET
968 && v2df_reduction_p (SET_SRC (expr)))
969 return 1;
970 }
971
972 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
973 constant pool. */
974 if (GET_CODE (body) == SET
975 && GET_CODE (SET_SRC (body)) == UNSPEC
976 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
977 && XVECLEN (SET_SRC (body), 0) == 3
978 && REG_P (XVECEXP (SET_SRC (body), 0, 2)))
979 {
980 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
981 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
982 df_ref use;
983 FOR_EACH_INSN_INFO_USE (use, insn_info)
984 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
985 {
986 struct df_link *def_link = DF_REF_CHAIN (use);
987 /* Punt if multiple definitions for this reg. */
988 if (def_link && !def_link->next &&
989 const_load_sequence_p (insn_entry,
990 DF_REF_INSN (def_link->ref)))
991 {
992 *special = SH_VPERM;
993 return 1;
994 }
995 }
996 }
997
998 /* Otherwise check the operands for vector lane violations. */
999 return rtx_is_swappable_p (body, special);
1000 }
1001
1002 enum chain_purpose { FOR_LOADS, FOR_STORES };
1003
1004 /* Return true if the UD or DU chain headed by LINK is non-empty,
1005 and every entry on the chain references an insn that is a
1006 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
1007 register swap must have only permuting loads as reaching defs.
1008 If PURPOSE is FOR_STORES, each such register swap must have only
1009 register swaps or permuting stores as reached uses. */
1010 static bool
1011 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
1012 enum chain_purpose purpose)
1013 {
1014 if (!link)
1015 return false;
1016
1017 for (; link; link = link->next)
1018 {
1019 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
1020 continue;
1021
1022 if (DF_REF_IS_ARTIFICIAL (link->ref))
1023 return false;
1024
1025 rtx reached_insn = DF_REF_INSN (link->ref);
1026 unsigned uid = INSN_UID (reached_insn);
1027 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
1028
1029 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
1030 || insn_entry[uid].is_store)
1031 return false;
1032
1033 if (purpose == FOR_LOADS)
1034 {
1035 df_ref use;
1036 FOR_EACH_INSN_INFO_USE (use, insn_info)
1037 {
1038 struct df_link *swap_link = DF_REF_CHAIN (use);
1039
1040 while (swap_link)
1041 {
1042 if (DF_REF_IS_ARTIFICIAL (link->ref))
1043 return false;
1044
1045 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
1046 unsigned uid2 = INSN_UID (swap_def_insn);
1047
1048 /* Only permuting loads are allowed. */
1049 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
1050 return false;
1051
1052 swap_link = swap_link->next;
1053 }
1054 }
1055 }
1056 else if (purpose == FOR_STORES)
1057 {
1058 df_ref def;
1059 FOR_EACH_INSN_INFO_DEF (def, insn_info)
1060 {
1061 struct df_link *swap_link = DF_REF_CHAIN (def);
1062
1063 while (swap_link)
1064 {
1065 if (DF_REF_IS_ARTIFICIAL (link->ref))
1066 return false;
1067
1068 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
1069 unsigned uid2 = INSN_UID (swap_use_insn);
1070
1071 /* Permuting stores or register swaps are allowed. */
1072 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
1073 return false;
1074
1075 swap_link = swap_link->next;
1076 }
1077 }
1078 }
1079 }
1080
1081 return true;
1082 }
1083
1084 /* Mark the xxswapdi instructions associated with permuting loads and
1085 stores for removal. Note that we only flag them for deletion here,
1086 as there is a possibility of a swap being reached from multiple
1087 loads, etc. */
1088 static void
1089 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
1090 {
1091 rtx insn = insn_entry[i].insn;
1092 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1093
1094 if (insn_entry[i].is_load)
1095 {
1096 df_ref def;
1097 FOR_EACH_INSN_INFO_DEF (def, insn_info)
1098 {
1099 struct df_link *link = DF_REF_CHAIN (def);
1100
1101 /* We know by now that these are swaps, so we can delete
1102 them confidently. */
1103 while (link)
1104 {
1105 rtx use_insn = DF_REF_INSN (link->ref);
1106 insn_entry[INSN_UID (use_insn)].will_delete = 1;
1107 link = link->next;
1108 }
1109 }
1110 }
1111 else if (insn_entry[i].is_store)
1112 {
1113 df_ref use;
1114 FOR_EACH_INSN_INFO_USE (use, insn_info)
1115 {
1116 /* Ignore uses for addressability. */
1117 machine_mode mode = GET_MODE (DF_REF_REG (use));
1118 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
1119 continue;
1120
1121 struct df_link *link = DF_REF_CHAIN (use);
1122
1123 /* We know by now that these are swaps, so we can delete
1124 them confidently. */
1125 while (link)
1126 {
1127 rtx def_insn = DF_REF_INSN (link->ref);
1128 insn_entry[INSN_UID (def_insn)].will_delete = 1;
1129 link = link->next;
1130 }
1131 }
1132 }
1133 }
1134
1135 /* *OP_PTR is either a CONST_VECTOR or an expression containing one.
1136 Swap the first half of the vector with the second in the first
1137 case. Recurse to find it in the second. */
1138 static void
1139 swap_const_vector_halves (rtx *op_ptr)
1140 {
1141 int i;
1142 rtx op = *op_ptr;
1143 enum rtx_code code = GET_CODE (op);
1144 if (GET_CODE (op) == CONST_VECTOR)
1145 {
1146 int units = GET_MODE_NUNITS (GET_MODE (op));
1147 rtx_vector_builder builder (GET_MODE (op), units, 1);
1148 for (i = 0; i < units / 2; ++i)
1149 builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2));
1150 for (i = 0; i < units / 2; ++i)
1151 builder.quick_push (CONST_VECTOR_ELT (op, i));
1152 *op_ptr = builder.build ();
1153 }
1154 else
1155 {
1156 int j;
1157 const char *fmt = GET_RTX_FORMAT (code);
1158 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1159 if (fmt[i] == 'e' || fmt[i] == 'u')
1160 swap_const_vector_halves (&XEXP (op, i));
1161 else if (fmt[i] == 'E')
1162 for (j = 0; j < XVECLEN (op, i); ++j)
1163 swap_const_vector_halves (&XVECEXP (op, i, j));
1164 }
1165 }
1166
1167 /* Find all subregs of a vector expression that perform a narrowing,
1168 and adjust the subreg index to account for doubleword swapping. */
1169 static void
1170 adjust_subreg_index (rtx op)
1171 {
1172 enum rtx_code code = GET_CODE (op);
1173 if (code == SUBREG
1174 && (GET_MODE_SIZE (GET_MODE (op))
1175 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
1176 {
1177 unsigned int index = SUBREG_BYTE (op);
1178 if (index < 8)
1179 index += 8;
1180 else
1181 index -= 8;
1182 SUBREG_BYTE (op) = index;
1183 }
1184
1185 const char *fmt = GET_RTX_FORMAT (code);
1186 int i,j;
1187 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1188 if (fmt[i] == 'e' || fmt[i] == 'u')
1189 adjust_subreg_index (XEXP (op, i));
1190 else if (fmt[i] == 'E')
1191 for (j = 0; j < XVECLEN (op, i); ++j)
1192 adjust_subreg_index (XVECEXP (op, i, j));
1193 }
1194
1195 /* Convert the non-permuting load INSN to a permuting one. */
1196 static void
1197 permute_load (rtx_insn *insn)
1198 {
1199 rtx body = PATTERN (insn);
1200 rtx mem_op = SET_SRC (body);
1201 rtx tgt_reg = SET_DEST (body);
1202 machine_mode mode = GET_MODE (tgt_reg);
1203 int n_elts = GET_MODE_NUNITS (mode);
1204 int half_elts = n_elts / 2;
1205 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1206 int i, j;
1207 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1208 XVECEXP (par, 0, i) = GEN_INT (j);
1209 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1210 XVECEXP (par, 0, i) = GEN_INT (j);
1211 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
1212 SET_SRC (body) = sel;
1213 INSN_CODE (insn) = -1; /* Force re-recognition. */
1214 df_insn_rescan (insn);
1215
1216 if (dump_file)
1217 fprintf (dump_file, "Replacing load %d with permuted load\n",
1218 INSN_UID (insn));
1219 }
1220
1221 /* Convert the non-permuting store INSN to a permuting one. */
1222 static void
1223 permute_store (rtx_insn *insn)
1224 {
1225 rtx body = PATTERN (insn);
1226 rtx src_reg = SET_SRC (body);
1227 machine_mode mode = GET_MODE (src_reg);
1228 int n_elts = GET_MODE_NUNITS (mode);
1229 int half_elts = n_elts / 2;
1230 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1231 int i, j;
1232 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1233 XVECEXP (par, 0, i) = GEN_INT (j);
1234 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1235 XVECEXP (par, 0, i) = GEN_INT (j);
1236 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
1237 SET_SRC (body) = sel;
1238 INSN_CODE (insn) = -1; /* Force re-recognition. */
1239 df_insn_rescan (insn);
1240
1241 if (dump_file)
1242 fprintf (dump_file, "Replacing store %d with permuted store\n",
1243 INSN_UID (insn));
1244 }
1245
1246 /* Given OP that contains a vector extract operation, adjust the index
1247 of the extracted lane to account for the doubleword swap. */
1248 static void
1249 adjust_extract (rtx_insn *insn)
1250 {
1251 rtx pattern = PATTERN (insn);
1252 if (GET_CODE (pattern) == PARALLEL)
1253 pattern = XVECEXP (pattern, 0, 0);
1254 rtx src = SET_SRC (pattern);
1255 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
1256 account for that. */
1257 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
1258 rtx par = XEXP (sel, 1);
1259 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
1260 int lane = INTVAL (XVECEXP (par, 0, 0));
1261 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1262 XVECEXP (par, 0, 0) = GEN_INT (lane);
1263 INSN_CODE (insn) = -1; /* Force re-recognition. */
1264 df_insn_rescan (insn);
1265
1266 if (dump_file)
1267 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
1268 }
1269
1270 /* Given OP that contains a vector direct-splat operation, adjust the index
1271 of the source lane to account for the doubleword swap. */
1272 static void
1273 adjust_splat (rtx_insn *insn)
1274 {
1275 rtx body = PATTERN (insn);
1276 rtx unspec = XEXP (body, 1);
1277 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
1278 int lane = INTVAL (XVECEXP (unspec, 0, 1));
1279 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1280 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
1281 INSN_CODE (insn) = -1; /* Force re-recognition. */
1282 df_insn_rescan (insn);
1283
1284 if (dump_file)
1285 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
1286 }
1287
1288 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
1289 swap), reverse the order of the source operands and adjust the indices
1290 of the source lanes to account for doubleword reversal. */
1291 static void
1292 adjust_xxpermdi (rtx_insn *insn)
1293 {
1294 rtx set = PATTERN (insn);
1295 rtx select = XEXP (set, 1);
1296 rtx concat = XEXP (select, 0);
1297 rtx src0 = XEXP (concat, 0);
1298 XEXP (concat, 0) = XEXP (concat, 1);
1299 XEXP (concat, 1) = src0;
1300 rtx parallel = XEXP (select, 1);
1301 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
1302 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
1303 int new_lane0 = 3 - lane1;
1304 int new_lane1 = 3 - lane0;
1305 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
1306 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
1307 INSN_CODE (insn) = -1; /* Force re-recognition. */
1308 df_insn_rescan (insn);
1309
1310 if (dump_file)
1311 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
1312 }
1313
1314 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
1315 reverse the order of those inputs. */
1316 static void
1317 adjust_concat (rtx_insn *insn)
1318 {
1319 rtx set = PATTERN (insn);
1320 rtx concat = XEXP (set, 1);
1321 rtx src0 = XEXP (concat, 0);
1322 XEXP (concat, 0) = XEXP (concat, 1);
1323 XEXP (concat, 1) = src0;
1324 INSN_CODE (insn) = -1; /* Force re-recognition. */
1325 df_insn_rescan (insn);
1326
1327 if (dump_file)
1328 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
1329 }
1330
1331 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
1332 constant pool to reflect swapped doublewords. */
1333 static void
1334 adjust_vperm (rtx_insn *insn)
1335 {
1336 /* We previously determined that the UNSPEC_VPERM was fed by a
1337 swap of a swapping load of a TOC-relative constant pool symbol.
1338 Find the MEM in the swapping load and replace it with a MEM for
1339 the adjusted mask constant. */
1340 rtx set = PATTERN (insn);
1341 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
1342
1343 /* Find the swap. */
1344 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1345 df_ref use;
1346 rtx_insn *swap_insn = 0;
1347 FOR_EACH_INSN_INFO_USE (use, insn_info)
1348 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
1349 {
1350 struct df_link *def_link = DF_REF_CHAIN (use);
1351 gcc_assert (def_link && !def_link->next);
1352 swap_insn = DF_REF_INSN (def_link->ref);
1353 break;
1354 }
1355 gcc_assert (swap_insn);
1356
1357 /* Find the load. */
1358 insn_info = DF_INSN_INFO_GET (swap_insn);
1359 rtx_insn *load_insn = 0;
1360 FOR_EACH_INSN_INFO_USE (use, insn_info)
1361 {
1362 struct df_link *def_link = DF_REF_CHAIN (use);
1363 gcc_assert (def_link && !def_link->next);
1364 load_insn = DF_REF_INSN (def_link->ref);
1365 break;
1366 }
1367 gcc_assert (load_insn);
1368
1369 /* Find the TOC-relative symbol access. */
1370 insn_info = DF_INSN_INFO_GET (load_insn);
1371 rtx_insn *tocrel_insn = 0;
1372 FOR_EACH_INSN_INFO_USE (use, insn_info)
1373 {
1374 struct df_link *def_link = DF_REF_CHAIN (use);
1375 gcc_assert (def_link && !def_link->next);
1376 tocrel_insn = DF_REF_INSN (def_link->ref);
1377 break;
1378 }
1379 gcc_assert (tocrel_insn);
1380
1381 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1382 to set tocrel_base; otherwise it would be unnecessary as we've
1383 already established it will return true. */
1384 rtx base, offset;
1385 const_rtx tocrel_base;
1386 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1387 /* There is an extra level of indirection for small/large code models. */
1388 if (MEM_P (tocrel_expr))
1389 tocrel_expr = XEXP (tocrel_expr, 0);
1390 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1391 gcc_unreachable ();
1392 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1393 rtx const_vector = get_pool_constant (base);
1394 /* With the extra indirection, get_pool_constant will produce the
1395 real constant from the reg_equal expression, so get the real
1396 constant. */
1397 if (SYMBOL_REF_P (const_vector))
1398 const_vector = get_pool_constant (const_vector);
1399 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1400
1401 /* Create an adjusted mask from the initial mask. */
1402 unsigned int new_mask[16], i, val;
1403 for (i = 0; i < 16; ++i) {
1404 val = INTVAL (XVECEXP (const_vector, 0, i));
1405 if (val < 16)
1406 new_mask[i] = (val + 8) % 16;
1407 else
1408 new_mask[i] = ((val + 8) % 16) + 16;
1409 }
1410
1411 /* Create a new CONST_VECTOR and a MEM that references it. */
1412 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
1413 for (i = 0; i < 16; ++i)
1414 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
1415 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
1416 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
1417 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1418 can't recognize. Force the SYMBOL_REF into a register. */
1419 if (!REG_P (XEXP (new_mem, 0))) {
1420 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1421 XEXP (new_mem, 0) = base_reg;
1422 /* Move the newly created insn ahead of the load insn. */
1423 rtx_insn *force_insn = get_last_insn ();
1424 remove_insn (force_insn);
1425 rtx_insn *before_load_insn = PREV_INSN (load_insn);
1426 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1427 df_insn_rescan (before_load_insn);
1428 df_insn_rescan (force_insn);
1429 }
1430
1431 /* Replace the MEM in the load instruction and rescan it. */
1432 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1433 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
1434 df_insn_rescan (load_insn);
1435
1436 if (dump_file)
1437 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
1438 }
1439
1440 /* The insn described by INSN_ENTRY[I] can be swapped, but only
1441 with special handling. Take care of that here. */
1442 static void
1443 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
1444 {
1445 rtx_insn *insn = insn_entry[i].insn;
1446 rtx body = PATTERN (insn);
1447
1448 switch (insn_entry[i].special_handling)
1449 {
1450 default:
1451 gcc_unreachable ();
1452 case SH_CONST_VECTOR:
1453 {
1454 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
1455 gcc_assert (GET_CODE (body) == SET);
1456 swap_const_vector_halves (&SET_SRC (body));
1457 if (dump_file)
1458 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
1459 break;
1460 }
1461 case SH_SUBREG:
1462 /* A subreg of the same size is already safe. For subregs that
1463 select a smaller portion of a reg, adjust the index for
1464 swapped doublewords. */
1465 adjust_subreg_index (body);
1466 if (dump_file)
1467 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
1468 break;
1469 case SH_NOSWAP_LD:
1470 /* Convert a non-permuting load to a permuting one. */
1471 permute_load (insn);
1472 break;
1473 case SH_NOSWAP_ST:
1474 /* Convert a non-permuting store to a permuting one. */
1475 permute_store (insn);
1476 break;
1477 case SH_EXTRACT:
1478 /* Change the lane on an extract operation. */
1479 adjust_extract (insn);
1480 break;
1481 case SH_SPLAT:
1482 /* Change the lane on a direct-splat operation. */
1483 adjust_splat (insn);
1484 break;
1485 case SH_XXPERMDI:
1486 /* Change the lanes on an XXPERMDI operation. */
1487 adjust_xxpermdi (insn);
1488 break;
1489 case SH_CONCAT:
1490 /* Reverse the order of a concatenation operation. */
1491 adjust_concat (insn);
1492 break;
1493 case SH_VPERM:
1494 /* Change the mask loaded from the constant pool for a VPERM. */
1495 adjust_vperm (insn);
1496 break;
1497 }
1498 }
1499
1500 /* Find the insn from the Ith table entry, which is known to be a
1501 register swap Y = SWAP(X). Replace it with a copy Y = X. */
1502 static void
1503 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
1504 {
1505 rtx_insn *insn = insn_entry[i].insn;
1506 rtx body = PATTERN (insn);
1507 rtx src_reg = XEXP (SET_SRC (body), 0);
1508 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
1509 rtx_insn *new_insn = emit_insn_before (copy, insn);
1510 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
1511 df_insn_rescan (new_insn);
1512
1513 if (dump_file)
1514 {
1515 unsigned int new_uid = INSN_UID (new_insn);
1516 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
1517 }
1518
1519 df_insn_delete (insn);
1520 remove_insn (insn);
1521 insn->set_deleted ();
1522 }
1523
1524 /* INSN is known to contain a SUBREG, which we can normally handle,
1525 but if the SUBREG itself contains a MULT then we need to leave it alone
1526 to avoid turning a mult_hipart into a mult_lopart, for example. */
1527 static bool
1528 has_part_mult (rtx_insn *insn)
1529 {
1530 rtx body = PATTERN (insn);
1531 if (GET_CODE (body) != SET)
1532 return false;
1533 rtx src = SET_SRC (body);
1534 if (GET_CODE (src) != SUBREG)
1535 return false;
1536 rtx inner = XEXP (src, 0);
1537 return (GET_CODE (inner) == MULT);
1538 }
1539
1540 /* Make NEW_MEM_EXP's attributes and flags resemble those of
1541 ORIGINAL_MEM_EXP. */
1542 static void
1543 mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp)
1544 {
1545 RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump);
1546 RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call);
1547 RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging);
1548 RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil);
1549 RTX_FLAG (new_mem_exp, frame_related) =
1550 RTX_FLAG (original_mem_exp, frame_related);
1551
1552 /* The following fields may not be used with MEM subexpressions */
1553 RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct);
1554 RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val);
1555
1556 struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp);
1557
1558 alias_set_type set = original_attrs.alias;
1559 set_mem_alias_set (new_mem_exp, set);
1560
1561 addr_space_t addrspace = original_attrs.addrspace;
1562 set_mem_addr_space (new_mem_exp, addrspace);
1563
1564 unsigned int align = original_attrs.align;
1565 set_mem_align (new_mem_exp, align);
1566
1567 tree expr = original_attrs.expr;
1568 set_mem_expr (new_mem_exp, expr);
1569
1570 if (original_attrs.offset_known_p)
1571 {
1572 HOST_WIDE_INT offset = original_attrs.offset;
1573 set_mem_offset (new_mem_exp, offset);
1574 }
1575 else
1576 clear_mem_offset (new_mem_exp);
1577
1578 if (original_attrs.size_known_p)
1579 {
1580 HOST_WIDE_INT size = original_attrs.size;
1581 set_mem_size (new_mem_exp, size);
1582 }
1583 else
1584 clear_mem_size (new_mem_exp);
1585 }
1586
1587 /* Generate an rtx expression to represent use of the stvx insn to store
1588 the value represented by register SRC_EXP into the memory at address
1589 DEST_EXP, with vector mode MODE. */
1590 rtx
1591 rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1592 {
1593 rtx stvx;
1594
1595 if (mode == V16QImode)
1596 stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp);
1597 else if (mode == V8HImode)
1598 stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp);
1599 #ifdef HAVE_V8HFmode
1600 else if (mode == V8HFmode)
1601 stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp);
1602 #endif
1603 else if (mode == V4SImode)
1604 stvx = gen_altivec_stvx_v4si (src_exp, dest_exp);
1605 else if (mode == V4SFmode)
1606 stvx = gen_altivec_stvx_v4sf (src_exp, dest_exp);
1607 else if (mode == V2DImode)
1608 stvx = gen_altivec_stvx_v2di (src_exp, dest_exp);
1609 else if (mode == V2DFmode)
1610 stvx = gen_altivec_stvx_v2df (src_exp, dest_exp);
1611 else if (mode == V1TImode)
1612 stvx = gen_altivec_stvx_v1ti (src_exp, dest_exp);
1613 else
1614 /* KFmode, TFmode, other modes not expected in this context. */
1615 gcc_unreachable ();
1616
1617 rtx new_mem_exp = SET_DEST (PATTERN (stvx));
1618 mimic_memory_attributes_and_flags (new_mem_exp, dest_exp);
1619 return stvx;
1620 }
1621
1622 /* Given that STORE_INSN represents an aligned store-with-swap of a
1623 swapped value, replace the store with an aligned store (without
1624 swap) and replace the swap with a copy insn. */
1625 static void
1626 replace_swapped_aligned_store (swap_web_entry *insn_entry,
1627 rtx_insn *store_insn)
1628 {
1629 unsigned uid = INSN_UID (store_insn);
1630 gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store);
1631
1632 rtx body = PATTERN (store_insn);
1633 rtx dest_address = XEXP (SET_DEST (body), 0);
1634 rtx swap_reg = XEXP (SET_SRC (body), 0);
1635 gcc_assert (REG_P (dest_address)
1636 || rs6000_sum_of_two_registers_p (dest_address));
1637
1638 /* Find the swap instruction that provides the value to be stored by
1639 * this store-with-swap instruction. */
1640 struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn);
1641 df_ref use;
1642 rtx_insn *swap_insn = NULL;
1643 unsigned uid2 = 0;
1644 FOR_EACH_INSN_INFO_USE (use, insn_info)
1645 {
1646 struct df_link *def_link = DF_REF_CHAIN (use);
1647
1648 /* if this is not the definition of the candidate swap register,
1649 then skip it. I am only interested in the swap insnd. */
1650 if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
1651 continue;
1652
1653 /* If there is no def or the def is artifical or there are
1654 multiple defs, we should not be here. */
1655 gcc_assert (def_link && def_link->ref && !def_link->next
1656 && !DF_REF_IS_ARTIFICIAL (def_link->ref));
1657
1658 swap_insn = DF_REF_INSN (def_link->ref);
1659 uid2 = INSN_UID (swap_insn);
1660
1661 /* If this source value is not a simple swap, we should not be here. */
1662 gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load
1663 && !insn_entry[uid2].is_store);
1664
1665 /* We've processed the use we care about, so break out of
1666 this loop. */
1667 break;
1668 }
1669
1670 /* At this point, swap_insn and uid2 represent the swap instruction
1671 that feeds the store. */
1672 gcc_assert (swap_insn);
1673 rtx set = single_set (store_insn);
1674 gcc_assert (set);
1675 rtx dest_exp = SET_DEST (set);
1676 rtx src_exp = XEXP (SET_SRC (body), 0);
1677 enum machine_mode mode = GET_MODE (dest_exp);
1678 gcc_assert (MEM_P (dest_exp));
1679 gcc_assert (MEM_ALIGN (dest_exp) >= 128);
1680
1681 /* Replace the copy with a new insn. */
1682 rtx stvx;
1683 stvx = rs6000_gen_stvx (mode, dest_exp, src_exp);
1684
1685 rtx_insn *new_insn = emit_insn_before (stvx, store_insn);
1686 rtx new_body = PATTERN (new_insn);
1687
1688 gcc_assert ((GET_CODE (new_body) == SET)
1689 && MEM_P (SET_DEST (new_body)));
1690
1691 set_block_for_insn (new_insn, BLOCK_FOR_INSN (store_insn));
1692 df_insn_rescan (new_insn);
1693
1694 df_insn_delete (store_insn);
1695 remove_insn (store_insn);
1696 store_insn->set_deleted ();
1697
1698 /* Replace the swap with a copy. */
1699 uid2 = INSN_UID (swap_insn);
1700 mark_swaps_for_removal (insn_entry, uid2);
1701 replace_swap_with_copy (insn_entry, uid2);
1702 }
1703
1704 /* Generate an rtx expression to represent use of the lvx insn to load
1705 from memory SRC_EXP into register DEST_EXP with vector mode MODE. */
1706 rtx
1707 rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1708 {
1709 rtx lvx;
1710
1711 if (mode == V16QImode)
1712 lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp);
1713 else if (mode == V8HImode)
1714 lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp);
1715 #ifdef HAVE_V8HFmode
1716 else if (mode == V8HFmode)
1717 lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp);
1718 #endif
1719 else if (mode == V4SImode)
1720 lvx = gen_altivec_lvx_v4si (dest_exp, src_exp);
1721 else if (mode == V4SFmode)
1722 lvx = gen_altivec_lvx_v4sf (dest_exp, src_exp);
1723 else if (mode == V2DImode)
1724 lvx = gen_altivec_lvx_v2di (dest_exp, src_exp);
1725 else if (mode == V2DFmode)
1726 lvx = gen_altivec_lvx_v2df (dest_exp, src_exp);
1727 else if (mode == V1TImode)
1728 lvx = gen_altivec_lvx_v1ti (dest_exp, src_exp);
1729 else
1730 /* KFmode, TFmode, other modes not expected in this context. */
1731 gcc_unreachable ();
1732
1733 rtx new_mem_exp = SET_SRC (PATTERN (lvx));
1734 mimic_memory_attributes_and_flags (new_mem_exp, src_exp);
1735
1736 return lvx;
1737 }
1738
1739 /* Given that SWAP_INSN represents a swap of an aligned
1740 load-with-swap, replace the load with an aligned load (without
1741 swap) and replace the swap with a copy insn. */
1742 static void
1743 replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn)
1744 {
1745 /* Find the load. */
1746 unsigned uid = INSN_UID (swap_insn);
1747 /* Only call this if quad_aligned_load_p (swap_insn). */
1748 gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load);
1749 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1750
1751 /* Since insn is known to represent a swap instruction, we know it
1752 "uses" only one input variable. */
1753 df_ref use = DF_INSN_INFO_USES (insn_info);
1754
1755 /* Figure out where this input variable is defined. */
1756 struct df_link *def_link = DF_REF_CHAIN (use);
1757 gcc_assert (def_link && !def_link->next);
1758 gcc_assert (def_link && def_link->ref &&
1759 !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next);
1760
1761 rtx_insn *def_insn = DF_REF_INSN (def_link->ref);
1762 unsigned uid2 = INSN_UID (def_insn);
1763
1764 /* We're expecting a load-with-swap insn. */
1765 gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap);
1766
1767 /* We expect this to be a set to memory, with source representing a
1768 swap (indicated by code VEC_SELECT). */
1769 rtx body = PATTERN (def_insn);
1770 gcc_assert ((GET_CODE (body) == SET)
1771 && (GET_CODE (SET_SRC (body)) == VEC_SELECT
1772 || pattern_is_rotate64 (body))
1773 && MEM_P (XEXP (SET_SRC (body), 0)));
1774
1775 rtx src_exp = XEXP (SET_SRC (body), 0);
1776 enum machine_mode mode = GET_MODE (src_exp);
1777 rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp);
1778
1779 rtx_insn *new_insn = emit_insn_before (lvx, def_insn);
1780 rtx new_body = PATTERN (new_insn);
1781
1782 gcc_assert ((GET_CODE (new_body) == SET)
1783 && MEM_P (SET_SRC (new_body)));
1784
1785 set_block_for_insn (new_insn, BLOCK_FOR_INSN (def_insn));
1786 df_insn_rescan (new_insn);
1787
1788 df_insn_delete (def_insn);
1789 remove_insn (def_insn);
1790 def_insn->set_deleted ();
1791
1792 /* Replace the swap with a copy. */
1793 mark_swaps_for_removal (insn_entry, uid);
1794 replace_swap_with_copy (insn_entry, uid);
1795 }
1796
1797 /* Given that SWAP_INSN represents a swap of a load of a constant
1798 vector value, replace with a single instruction that loads a
1799 swapped variant of the original constant.
1800
1801 The "natural" representation of a byte array in memory is the same
1802 for big endian and little endian.
1803
1804 unsigned char byte_array[] =
1805 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f };
1806
1807 However, when loaded into a vector register, the representation
1808 depends on endian conventions.
1809
1810 In big-endian mode, the register holds:
1811
1812 MSB LSB
1813 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1814
1815 In little-endian mode, the register holds:
1816
1817 MSB LSB
1818 [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]
1819
1820 Word arrays require different handling. Consider the word array:
1821
1822 unsigned int word_array[] =
1823 { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f };
1824
1825 The in-memory representation depends on endian configuration. The
1826 equivalent array, declared as a byte array, in memory would be:
1827
1828 unsigned char big_endian_word_array_data[] =
1829 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }
1830
1831 unsigned char little_endian_word_array_data[] =
1832 { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c }
1833
1834 In big-endian mode, the register holds:
1835
1836 MSB LSB
1837 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1838
1839 In little-endian mode, the register holds:
1840
1841 MSB LSB
1842 [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ]
1843
1844
1845 Similar transformations apply to the vector of half-word and vector
1846 of double-word representations.
1847
1848 For now, don't handle vectors of quad-precision values. Just return.
1849 A better solution is to fix the code generator to emit lvx/stvx for
1850 those. */
1851 static void
1852 replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn)
1853 {
1854 /* Find the load. */
1855 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1856 rtx_insn *load_insn;
1857 df_ref use = DF_INSN_INFO_USES (insn_info);
1858 struct df_link *def_link = DF_REF_CHAIN (use);
1859 gcc_assert (def_link && !def_link->next);
1860
1861 load_insn = DF_REF_INSN (def_link->ref);
1862 gcc_assert (load_insn);
1863
1864 /* Find the TOC-relative symbol access. */
1865 insn_info = DF_INSN_INFO_GET (load_insn);
1866 use = DF_INSN_INFO_USES (insn_info);
1867
1868 def_link = DF_REF_CHAIN (use);
1869 gcc_assert (def_link && !def_link->next);
1870
1871 rtx_insn *tocrel_insn = DF_REF_INSN (def_link->ref);
1872 gcc_assert (tocrel_insn);
1873
1874 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1875 to set tocrel_base; otherwise it would be unnecessary as we've
1876 already established it will return true. */
1877 rtx base, offset;
1878 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1879 const_rtx tocrel_base;
1880
1881 /* There is an extra level of indirection for small/large code models. */
1882 if (MEM_P (tocrel_expr))
1883 tocrel_expr = XEXP (tocrel_expr, 0);
1884
1885 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1886 gcc_unreachable ();
1887
1888 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1889 rtx const_vector = get_pool_constant (base);
1890
1891 /* With the extra indirection, get_pool_constant will produce the
1892 real constant from the reg_equal expression, so get the real
1893 constant. */
1894 if (SYMBOL_REF_P (const_vector))
1895 const_vector = get_pool_constant (const_vector);
1896 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1897
1898 rtx new_mem;
1899 enum machine_mode mode = GET_MODE (const_vector);
1900
1901 /* Create an adjusted constant from the original constant. */
1902 if (mode == V1TImode)
1903 /* Leave this code as is. */
1904 return;
1905 else if (mode == V16QImode)
1906 {
1907 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (16));
1908 int i;
1909
1910 for (i = 0; i < 16; i++)
1911 XVECEXP (vals, 0, ((i+8) % 16)) = XVECEXP (const_vector, 0, i);
1912 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1913 new_mem = force_const_mem (mode, new_const_vector);
1914 }
1915 else if ((mode == V8HImode)
1916 #ifdef HAVE_V8HFmode
1917 || (mode == V8HFmode)
1918 #endif
1919 )
1920 {
1921 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8));
1922 int i;
1923
1924 for (i = 0; i < 8; i++)
1925 XVECEXP (vals, 0, ((i+4) % 8)) = XVECEXP (const_vector, 0, i);
1926 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1927 new_mem = force_const_mem (mode, new_const_vector);
1928 }
1929 else if ((mode == V4SImode) || (mode == V4SFmode))
1930 {
1931 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (4));
1932 int i;
1933
1934 for (i = 0; i < 4; i++)
1935 XVECEXP (vals, 0, ((i+2) % 4)) = XVECEXP (const_vector, 0, i);
1936 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1937 new_mem = force_const_mem (mode, new_const_vector);
1938 }
1939 else if ((mode == V2DImode) || (mode == V2DFmode))
1940 {
1941 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (2));
1942 int i;
1943
1944 for (i = 0; i < 2; i++)
1945 XVECEXP (vals, 0, ((i+1) % 2)) = XVECEXP (const_vector, 0, i);
1946 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1947 new_mem = force_const_mem (mode, new_const_vector);
1948 }
1949 else
1950 {
1951 /* We do not expect other modes to be constant-load-swapped. */
1952 gcc_unreachable ();
1953 }
1954
1955 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1956 can't recognize. Force the SYMBOL_REF into a register. */
1957 if (!REG_P (XEXP (new_mem, 0))) {
1958 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1959 XEXP (new_mem, 0) = base_reg;
1960
1961 /* Move the newly created insn ahead of the load insn. */
1962 /* The last insn is the insn that forced new_mem into a register. */
1963 rtx_insn *force_insn = get_last_insn ();
1964 /* Remove this insn from the end of the instruction sequence. */
1965 remove_insn (force_insn);
1966 rtx_insn *before_load_insn = PREV_INSN (load_insn);
1967
1968 /* And insert this insn back into the sequence before the previous
1969 load insn so this new expression will be available when the
1970 existing load is modified to load the swapped constant. */
1971 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1972 df_insn_rescan (before_load_insn);
1973 df_insn_rescan (force_insn);
1974 }
1975
1976 /* Replace the MEM in the load instruction and rescan it. */
1977 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1978 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
1979 df_insn_rescan (load_insn);
1980
1981 unsigned int uid = INSN_UID (swap_insn);
1982 mark_swaps_for_removal (insn_entry, uid);
1983 replace_swap_with_copy (insn_entry, uid);
1984 }
1985
1986 /* Dump the swap table to DUMP_FILE. */
1987 static void
1988 dump_swap_insn_table (swap_web_entry *insn_entry)
1989 {
1990 int e = get_max_uid ();
1991 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
1992
1993 for (int i = 0; i < e; ++i)
1994 if (insn_entry[i].is_relevant)
1995 {
1996 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
1997 fprintf (dump_file, "%6d %6d ", i,
1998 pred_entry && pred_entry->insn
1999 ? INSN_UID (pred_entry->insn) : 0);
2000 if (insn_entry[i].is_load)
2001 fputs ("load ", dump_file);
2002 if (insn_entry[i].is_store)
2003 fputs ("store ", dump_file);
2004 if (insn_entry[i].is_swap)
2005 fputs ("swap ", dump_file);
2006 if (insn_entry[i].is_live_in)
2007 fputs ("live-in ", dump_file);
2008 if (insn_entry[i].is_live_out)
2009 fputs ("live-out ", dump_file);
2010 if (insn_entry[i].contains_subreg)
2011 fputs ("subreg ", dump_file);
2012 if (insn_entry[i].is_128_int)
2013 fputs ("int128 ", dump_file);
2014 if (insn_entry[i].is_call)
2015 fputs ("call ", dump_file);
2016 if (insn_entry[i].is_swappable)
2017 {
2018 fputs ("swappable ", dump_file);
2019 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
2020 fputs ("special:constvec ", dump_file);
2021 else if (insn_entry[i].special_handling == SH_SUBREG)
2022 fputs ("special:subreg ", dump_file);
2023 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
2024 fputs ("special:load ", dump_file);
2025 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
2026 fputs ("special:store ", dump_file);
2027 else if (insn_entry[i].special_handling == SH_EXTRACT)
2028 fputs ("special:extract ", dump_file);
2029 else if (insn_entry[i].special_handling == SH_SPLAT)
2030 fputs ("special:splat ", dump_file);
2031 else if (insn_entry[i].special_handling == SH_XXPERMDI)
2032 fputs ("special:xxpermdi ", dump_file);
2033 else if (insn_entry[i].special_handling == SH_CONCAT)
2034 fputs ("special:concat ", dump_file);
2035 else if (insn_entry[i].special_handling == SH_VPERM)
2036 fputs ("special:vperm ", dump_file);
2037 }
2038 if (insn_entry[i].web_not_optimizable)
2039 fputs ("unoptimizable ", dump_file);
2040 if (insn_entry[i].will_delete)
2041 fputs ("delete ", dump_file);
2042 fputs ("\n", dump_file);
2043 }
2044 fputs ("\n", dump_file);
2045 }
2046
2047 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
2048 Here RTX is an (& addr (const_int -16)). Always return a new copy
2049 to avoid problems with combine. */
2050 static rtx
2051 alignment_with_canonical_addr (rtx align)
2052 {
2053 rtx canon;
2054 rtx addr = XEXP (align, 0);
2055
2056 if (REG_P (addr))
2057 canon = addr;
2058
2059 else if (GET_CODE (addr) == PLUS)
2060 {
2061 rtx addrop0 = XEXP (addr, 0);
2062 rtx addrop1 = XEXP (addr, 1);
2063
2064 if (!REG_P (addrop0))
2065 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
2066
2067 if (!REG_P (addrop1))
2068 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
2069
2070 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
2071 }
2072
2073 else
2074 canon = force_reg (GET_MODE (addr), addr);
2075
2076 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
2077 }
2078
2079 /* Check whether an rtx is an alignment mask, and if so, return
2080 a fully-expanded rtx for the masking operation. */
2081 static rtx
2082 alignment_mask (rtx_insn *insn)
2083 {
2084 rtx body = PATTERN (insn);
2085
2086 if (GET_CODE (body) != SET
2087 || GET_CODE (SET_SRC (body)) != AND
2088 || !REG_P (XEXP (SET_SRC (body), 0)))
2089 return 0;
2090
2091 rtx mask = XEXP (SET_SRC (body), 1);
2092
2093 if (CONST_INT_P (mask))
2094 {
2095 if (INTVAL (mask) == -16)
2096 return alignment_with_canonical_addr (SET_SRC (body));
2097 else
2098 return 0;
2099 }
2100
2101 if (!REG_P (mask))
2102 return 0;
2103
2104 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2105 df_ref use;
2106 rtx real_mask = 0;
2107
2108 FOR_EACH_INSN_INFO_USE (use, insn_info)
2109 {
2110 if (!rtx_equal_p (DF_REF_REG (use), mask))
2111 continue;
2112
2113 struct df_link *def_link = DF_REF_CHAIN (use);
2114 if (!def_link || def_link->next)
2115 return 0;
2116
2117 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
2118 rtx const_body = PATTERN (const_insn);
2119 if (GET_CODE (const_body) != SET)
2120 return 0;
2121
2122 real_mask = SET_SRC (const_body);
2123
2124 if (!CONST_INT_P (real_mask)
2125 || INTVAL (real_mask) != -16)
2126 return 0;
2127 }
2128
2129 if (real_mask == 0)
2130 return 0;
2131
2132 return alignment_with_canonical_addr (SET_SRC (body));
2133 }
2134
2135 /* Given INSN that's a load or store based at BASE_REG, check if
2136 all of its feeding computations align its address on a 16-byte
2137 boundary. If so, return true and add all definition insns into
2138 AND_INSNS and their corresponding fully-expanded rtxes for the
2139 masking operations into AND_OPS. */
2140
2141 static bool
2142 find_alignment_op (rtx_insn *insn, rtx base_reg, vec<rtx_insn *> *and_insns,
2143 vec<rtx> *and_ops)
2144 {
2145 df_ref base_use;
2146 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2147 rtx and_operation = 0;
2148
2149 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
2150 {
2151 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
2152 continue;
2153
2154 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
2155 if (!base_def_link)
2156 return false;
2157
2158 while (base_def_link)
2159 {
2160 /* With stack-protector code enabled, and possibly in other
2161 circumstances, there may not be an associated insn for
2162 the def. */
2163 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
2164 return false;
2165
2166 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
2167 and_operation = alignment_mask (and_insn);
2168
2169 /* Stop if we find any one which doesn't align. */
2170 if (!and_operation)
2171 return false;
2172
2173 and_insns->safe_push (and_insn);
2174 and_ops->safe_push (and_operation);
2175 base_def_link = base_def_link->next;
2176 }
2177 }
2178
2179 return and_operation;
2180 }
2181
2182 struct del_info { bool replace; rtx_insn *replace_insn; };
2183
2184 /* If INSN is the load for an lvx pattern, put it in canonical form. */
2185 static void
2186 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
2187 {
2188 rtx body = PATTERN (insn);
2189 gcc_assert (GET_CODE (body) == SET
2190 && (GET_CODE (SET_SRC (body)) == VEC_SELECT
2191 || pattern_is_rotate64 (body))
2192 && MEM_P (XEXP (SET_SRC (body), 0)));
2193
2194 rtx mem = XEXP (SET_SRC (body), 0);
2195 rtx base_reg = XEXP (mem, 0);
2196
2197 auto_vec<rtx_insn *> and_insns;
2198 auto_vec<rtx> and_ops;
2199 bool is_any_def_and
2200 = find_alignment_op (insn, base_reg, &and_insns, &and_ops);
2201
2202 if (is_any_def_and)
2203 {
2204 gcc_assert (and_insns.length () == and_ops.length ());
2205 df_ref def;
2206 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2207 FOR_EACH_INSN_INFO_DEF (def, insn_info)
2208 {
2209 struct df_link *link = DF_REF_CHAIN (def);
2210 if (!link || link->next)
2211 break;
2212
2213 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2214 if (!insn_is_swap_p (swap_insn)
2215 || insn_is_load_p (swap_insn)
2216 || insn_is_store_p (swap_insn))
2217 break;
2218
2219 /* Expected lvx pattern found. Change the swap to
2220 a copy, and propagate the AND operation into the
2221 load. */
2222 to_delete[INSN_UID (swap_insn)].replace = true;
2223 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2224
2225 rtx new_reg = 0;
2226 rtx and_mask = 0;
2227 for (unsigned i = 0; i < and_insns.length (); i++)
2228 {
2229 /* However, first we must be sure that we make the
2230 base register from the AND operation available
2231 in case the register has been overwritten. Copy
2232 the base register to a new pseudo and use that
2233 as the base register of the AND operation in
2234 the new LVX instruction. */
2235 rtx_insn *and_insn = and_insns[i];
2236 rtx and_op = and_ops[i];
2237 rtx and_base = XEXP (and_op, 0);
2238 if (!new_reg)
2239 {
2240 new_reg = gen_reg_rtx (GET_MODE (and_base));
2241 and_mask = XEXP (and_op, 1);
2242 }
2243 rtx copy = gen_rtx_SET (new_reg, and_base);
2244 rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2245 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2246 df_insn_rescan (new_insn);
2247 }
2248
2249 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask);
2250 SET_SRC (body) = mem;
2251 INSN_CODE (insn) = -1; /* Force re-recognition. */
2252 df_insn_rescan (insn);
2253
2254 if (dump_file)
2255 fprintf (dump_file, "lvx opportunity found at %d\n",
2256 INSN_UID (insn));
2257 }
2258 }
2259 }
2260
2261 /* If INSN is the store for an stvx pattern, put it in canonical form. */
2262 static void
2263 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
2264 {
2265 rtx body = PATTERN (insn);
2266 gcc_assert (GET_CODE (body) == SET
2267 && MEM_P (SET_DEST (body))
2268 && (GET_CODE (SET_SRC (body)) == VEC_SELECT
2269 || pattern_is_rotate64 (body)));
2270 rtx mem = SET_DEST (body);
2271 rtx base_reg = XEXP (mem, 0);
2272
2273 auto_vec<rtx_insn *> and_insns;
2274 auto_vec<rtx> and_ops;
2275 bool is_any_def_and
2276 = find_alignment_op (insn, base_reg, &and_insns, &and_ops);
2277
2278 if (is_any_def_and)
2279 {
2280 gcc_assert (and_insns.length () == and_ops.length ());
2281 rtx src_reg = XEXP (SET_SRC (body), 0);
2282 df_ref src_use;
2283 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2284 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
2285 {
2286 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
2287 continue;
2288
2289 struct df_link *link = DF_REF_CHAIN (src_use);
2290 if (!link || link->next)
2291 break;
2292
2293 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2294 if (!insn_is_swap_p (swap_insn)
2295 || insn_is_load_p (swap_insn)
2296 || insn_is_store_p (swap_insn))
2297 break;
2298
2299 /* Expected stvx pattern found. Change the swap to
2300 a copy, and propagate the AND operation into the
2301 store. */
2302 to_delete[INSN_UID (swap_insn)].replace = true;
2303 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2304
2305 rtx new_reg = 0;
2306 rtx and_mask = 0;
2307 for (unsigned i = 0; i < and_insns.length (); i++)
2308 {
2309 /* However, first we must be sure that we make the
2310 base register from the AND operation available
2311 in case the register has been overwritten. Copy
2312 the base register to a new pseudo and use that
2313 as the base register of the AND operation in
2314 the new STVX instruction. */
2315 rtx_insn *and_insn = and_insns[i];
2316 rtx and_op = and_ops[i];
2317 rtx and_base = XEXP (and_op, 0);
2318 if (!new_reg)
2319 {
2320 new_reg = gen_reg_rtx (GET_MODE (and_base));
2321 and_mask = XEXP (and_op, 1);
2322 }
2323 rtx copy = gen_rtx_SET (new_reg, and_base);
2324 rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2325 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2326 df_insn_rescan (new_insn);
2327 }
2328
2329 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask);
2330 SET_SRC (body) = src_reg;
2331 INSN_CODE (insn) = -1; /* Force re-recognition. */
2332 df_insn_rescan (insn);
2333
2334 if (dump_file)
2335 fprintf (dump_file, "stvx opportunity found at %d\n",
2336 INSN_UID (insn));
2337 }
2338 }
2339 }
2340
2341 /* Look for patterns created from builtin lvx and stvx calls, and
2342 canonicalize them to be properly recognized as such. */
2343 static void
2344 recombine_lvx_stvx_patterns (function *fun)
2345 {
2346 int i;
2347 basic_block bb;
2348 rtx_insn *insn;
2349
2350 int num_insns = get_max_uid ();
2351 del_info *to_delete = XCNEWVEC (del_info, num_insns);
2352
2353 FOR_ALL_BB_FN (bb, fun)
2354 FOR_BB_INSNS (bb, insn)
2355 {
2356 if (!NONDEBUG_INSN_P (insn))
2357 continue;
2358
2359 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
2360 recombine_lvx_pattern (insn, to_delete);
2361 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
2362 recombine_stvx_pattern (insn, to_delete);
2363 }
2364
2365 /* Turning swaps into copies is delayed until now, to avoid problems
2366 with deleting instructions during the insn walk. */
2367 for (i = 0; i < num_insns; i++)
2368 if (to_delete[i].replace)
2369 {
2370 rtx swap_body = PATTERN (to_delete[i].replace_insn);
2371 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
2372 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
2373 rtx_insn *new_insn = emit_insn_before (copy,
2374 to_delete[i].replace_insn);
2375 set_block_for_insn (new_insn,
2376 BLOCK_FOR_INSN (to_delete[i].replace_insn));
2377 df_insn_rescan (new_insn);
2378 df_insn_delete (to_delete[i].replace_insn);
2379 remove_insn (to_delete[i].replace_insn);
2380 to_delete[i].replace_insn->set_deleted ();
2381 }
2382
2383 free (to_delete);
2384 }
2385
2386 /* Main entry point for this pass. */
2387 unsigned int
2388 rs6000_analyze_swaps (function *fun)
2389 {
2390 swap_web_entry *insn_entry;
2391 basic_block bb;
2392 rtx_insn *insn, *curr_insn = 0;
2393
2394 /* Dataflow analysis for use-def chains. */
2395 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2396 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2397 df_analyze ();
2398 df_set_flags (DF_DEFER_INSN_RESCAN);
2399
2400 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
2401 recombine_lvx_stvx_patterns (fun);
2402
2403 /* Rebuild ud- and du-chains. */
2404 df_remove_problem (df_chain);
2405 df_process_deferred_rescans ();
2406 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2407 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2408 df_analyze ();
2409 df_set_flags (DF_DEFER_INSN_RESCAN);
2410
2411 /* Allocate structure to represent webs of insns. */
2412 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2413
2414 /* Walk the insns to gather basic data. */
2415 FOR_ALL_BB_FN (bb, fun)
2416 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2417 {
2418 unsigned int uid = INSN_UID (insn);
2419 if (NONDEBUG_INSN_P (insn))
2420 {
2421 insn_entry[uid].insn = insn;
2422
2423 if (GET_CODE (insn) == CALL_INSN)
2424 insn_entry[uid].is_call = 1;
2425
2426 /* Walk the uses and defs to see if we mention vector regs.
2427 Record any constraints on optimization of such mentions. */
2428 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2429 df_ref mention;
2430 FOR_EACH_INSN_INFO_USE (mention, insn_info)
2431 {
2432 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2433 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2434
2435 /* If a use gets its value from a call insn, it will be
2436 a hard register and will look like (reg:V4SI 3 3).
2437 The df analysis creates two mentions for GPR3 and GPR4,
2438 both DImode. We must recognize this and treat it as a
2439 vector mention to ensure the call is unioned with this
2440 use. */
2441 if (mode == DImode && DF_REF_INSN_INFO (mention))
2442 {
2443 rtx feeder = DF_REF_INSN (mention);
2444 /* FIXME: It is pretty hard to get from the df mention
2445 to the mode of the use in the insn. We arbitrarily
2446 pick a vector mode here, even though the use might
2447 be a real DImode. We can be too conservative
2448 (create a web larger than necessary) because of
2449 this, so consider eventually fixing this. */
2450 if (GET_CODE (feeder) == CALL_INSN)
2451 mode = V4SImode;
2452 }
2453
2454 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2455 {
2456 insn_entry[uid].is_relevant = 1;
2457 if (mode == TImode || mode == V1TImode
2458 || FLOAT128_VECTOR_P (mode))
2459 insn_entry[uid].is_128_int = 1;
2460 if (DF_REF_INSN_INFO (mention))
2461 insn_entry[uid].contains_subreg
2462 = !rtx_equal_p (DF_REF_REG (mention),
2463 DF_REF_REAL_REG (mention));
2464 union_defs (insn_entry, insn, mention);
2465 }
2466 }
2467 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
2468 {
2469 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2470 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2471
2472 /* If we're loading up a hard vector register for a call,
2473 it looks like (set (reg:V4SI 9 9) (...)). The df
2474 analysis creates two mentions for GPR9 and GPR10, both
2475 DImode. So relying on the mode from the mentions
2476 isn't sufficient to ensure we union the call into the
2477 web with the parameter setup code. */
2478 if (mode == DImode && GET_CODE (insn) == SET
2479 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
2480 mode = GET_MODE (SET_DEST (insn));
2481
2482 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2483 {
2484 insn_entry[uid].is_relevant = 1;
2485 if (mode == TImode || mode == V1TImode
2486 || FLOAT128_VECTOR_P (mode))
2487 insn_entry[uid].is_128_int = 1;
2488 if (DF_REF_INSN_INFO (mention))
2489 insn_entry[uid].contains_subreg
2490 = !rtx_equal_p (DF_REF_REG (mention),
2491 DF_REF_REAL_REG (mention));
2492 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
2493 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
2494 insn_entry[uid].is_live_out = 1;
2495 union_uses (insn_entry, insn, mention);
2496 }
2497 }
2498
2499 if (insn_entry[uid].is_relevant)
2500 {
2501 /* Determine if this is a load or store. */
2502 insn_entry[uid].is_load = insn_is_load_p (insn);
2503 insn_entry[uid].is_store = insn_is_store_p (insn);
2504
2505 /* Determine if this is a doubleword swap. If not,
2506 determine whether it can legally be swapped. */
2507 if (insn_is_swap_p (insn))
2508 insn_entry[uid].is_swap = 1;
2509 else
2510 {
2511 unsigned int special = SH_NONE;
2512 insn_entry[uid].is_swappable
2513 = insn_is_swappable_p (insn_entry, insn, &special);
2514 if (special != SH_NONE && insn_entry[uid].contains_subreg)
2515 insn_entry[uid].is_swappable = 0;
2516 else if (special != SH_NONE)
2517 insn_entry[uid].special_handling = special;
2518 else if (insn_entry[uid].contains_subreg
2519 && has_part_mult (insn))
2520 insn_entry[uid].is_swappable = 0;
2521 else if (insn_entry[uid].contains_subreg)
2522 insn_entry[uid].special_handling = SH_SUBREG;
2523 }
2524 }
2525 }
2526 }
2527
2528 if (dump_file)
2529 {
2530 fprintf (dump_file, "\nSwap insn entry table when first built\n");
2531 dump_swap_insn_table (insn_entry);
2532 }
2533
2534 /* Record unoptimizable webs. */
2535 unsigned e = get_max_uid (), i;
2536 for (i = 0; i < e; ++i)
2537 {
2538 if (!insn_entry[i].is_relevant)
2539 continue;
2540
2541 swap_web_entry *root
2542 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
2543
2544 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
2545 || (insn_entry[i].contains_subreg
2546 && insn_entry[i].special_handling != SH_SUBREG)
2547 || insn_entry[i].is_128_int || insn_entry[i].is_call
2548 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
2549 root->web_not_optimizable = 1;
2550
2551 /* If we have loads or stores that aren't permuting then the
2552 optimization isn't appropriate. */
2553 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
2554 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
2555 root->web_not_optimizable = 1;
2556
2557 /* If we have a swap that is both fed by a permuting load
2558 and a feeder of a permuting store, then the optimization
2559 isn't appropriate. (Consider vec_xl followed by vec_xst_be.) */
2560 else if (insn_entry[i].is_swap && !insn_entry[i].is_load
2561 && !insn_entry[i].is_store
2562 && swap_feeds_both_load_and_store (&insn_entry[i]))
2563 root->web_not_optimizable = 1;
2564
2565 /* If we have permuting loads or stores that are not accompanied
2566 by a register swap, the optimization isn't appropriate. */
2567 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
2568 {
2569 rtx insn = insn_entry[i].insn;
2570 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2571 df_ref def;
2572
2573 FOR_EACH_INSN_INFO_DEF (def, insn_info)
2574 {
2575 struct df_link *link = DF_REF_CHAIN (def);
2576
2577 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
2578 {
2579 root->web_not_optimizable = 1;
2580 break;
2581 }
2582 }
2583 }
2584 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
2585 {
2586 rtx insn = insn_entry[i].insn;
2587 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2588 df_ref use;
2589
2590 FOR_EACH_INSN_INFO_USE (use, insn_info)
2591 {
2592 struct df_link *link = DF_REF_CHAIN (use);
2593
2594 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
2595 {
2596 root->web_not_optimizable = 1;
2597 break;
2598 }
2599 }
2600 }
2601 }
2602
2603 if (dump_file)
2604 {
2605 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
2606 dump_swap_insn_table (insn_entry);
2607 }
2608
2609 /* For each load and store in an optimizable web (which implies
2610 the loads and stores are permuting), find the associated
2611 register swaps and mark them for removal. Due to various
2612 optimizations we may mark the same swap more than once. Also
2613 perform special handling for swappable insns that require it. */
2614 for (i = 0; i < e; ++i)
2615 if ((insn_entry[i].is_load || insn_entry[i].is_store)
2616 && insn_entry[i].is_swap)
2617 {
2618 swap_web_entry* root_entry
2619 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2620 if (!root_entry->web_not_optimizable)
2621 mark_swaps_for_removal (insn_entry, i);
2622 }
2623 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
2624 {
2625 swap_web_entry* root_entry
2626 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2627 if (!root_entry->web_not_optimizable)
2628 handle_special_swappables (insn_entry, i);
2629 }
2630
2631 /* Now delete the swaps marked for removal. */
2632 for (i = 0; i < e; ++i)
2633 if (insn_entry[i].will_delete)
2634 replace_swap_with_copy (insn_entry, i);
2635
2636 /* Clean up. */
2637 free (insn_entry);
2638
2639 /* Use a second pass over rtl to detect that certain vector values
2640 fetched from or stored to memory on quad-word aligned addresses
2641 can use lvx/stvx without swaps. */
2642
2643 /* First, rebuild ud chains. */
2644 df_remove_problem (df_chain);
2645 df_process_deferred_rescans ();
2646 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2647 df_chain_add_problem (DF_UD_CHAIN);
2648 df_analyze ();
2649
2650 swap_web_entry *pass2_insn_entry;
2651 pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2652
2653 /* Walk the insns to gather basic data. */
2654 FOR_ALL_BB_FN (bb, fun)
2655 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2656 {
2657 unsigned int uid = INSN_UID (insn);
2658 if (NONDEBUG_INSN_P (insn))
2659 {
2660 pass2_insn_entry[uid].insn = insn;
2661
2662 pass2_insn_entry[uid].is_relevant = 1;
2663 pass2_insn_entry[uid].is_load = insn_is_load_p (insn);
2664 pass2_insn_entry[uid].is_store = insn_is_store_p (insn);
2665
2666 /* Determine if this is a doubleword swap. If not,
2667 determine whether it can legally be swapped. */
2668 if (insn_is_swap_p (insn))
2669 pass2_insn_entry[uid].is_swap = 1;
2670 }
2671 }
2672
2673 e = get_max_uid ();
2674 for (unsigned i = 0; i < e; ++i)
2675 if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load
2676 && !pass2_insn_entry[i].is_store)
2677 {
2678 /* Replace swap of aligned load-swap with aligned unswapped
2679 load. */
2680 rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2681 if (quad_aligned_load_p (pass2_insn_entry, rtx_insn))
2682 replace_swapped_aligned_load (pass2_insn_entry, rtx_insn);
2683 }
2684 else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store)
2685 {
2686 /* Replace aligned store-swap of swapped value with aligned
2687 unswapped store. */
2688 rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2689 if (quad_aligned_store_p (pass2_insn_entry, rtx_insn))
2690 replace_swapped_aligned_store (pass2_insn_entry, rtx_insn);
2691 }
2692
2693 /* Clean up. */
2694 free (pass2_insn_entry);
2695
2696 /* Use a third pass over rtl to replace swap(load(vector constant))
2697 with load(swapped vector constant). */
2698
2699 /* First, rebuild ud chains. */
2700 df_remove_problem (df_chain);
2701 df_process_deferred_rescans ();
2702 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2703 df_chain_add_problem (DF_UD_CHAIN);
2704 df_analyze ();
2705
2706 swap_web_entry *pass3_insn_entry;
2707 pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2708
2709 /* Walk the insns to gather basic data. */
2710 FOR_ALL_BB_FN (bb, fun)
2711 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2712 {
2713 unsigned int uid = INSN_UID (insn);
2714 if (NONDEBUG_INSN_P (insn))
2715 {
2716 pass3_insn_entry[uid].insn = insn;
2717
2718 pass3_insn_entry[uid].is_relevant = 1;
2719 pass3_insn_entry[uid].is_load = insn_is_load_p (insn);
2720 pass3_insn_entry[uid].is_store = insn_is_store_p (insn);
2721
2722 /* Determine if this is a doubleword swap. If not,
2723 determine whether it can legally be swapped. */
2724 if (insn_is_swap_p (insn))
2725 pass3_insn_entry[uid].is_swap = 1;
2726 }
2727 }
2728
2729 e = get_max_uid ();
2730 for (unsigned i = 0; i < e; ++i)
2731 if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load
2732 && !pass3_insn_entry[i].is_store)
2733 {
2734 insn = pass3_insn_entry[i].insn;
2735 if (const_load_sequence_p (pass3_insn_entry, insn))
2736 replace_swapped_load_constant (pass3_insn_entry, insn);
2737 }
2738
2739 /* Clean up. */
2740 free (pass3_insn_entry);
2741 return 0;
2742 }
2743
2744 const pass_data pass_data_analyze_swaps =
2745 {
2746 RTL_PASS, /* type */
2747 "swaps", /* name */
2748 OPTGROUP_NONE, /* optinfo_flags */
2749 TV_NONE, /* tv_id */
2750 0, /* properties_required */
2751 0, /* properties_provided */
2752 0, /* properties_destroyed */
2753 0, /* todo_flags_start */
2754 TODO_df_finish, /* todo_flags_finish */
2755 };
2756
2757 class pass_analyze_swaps : public rtl_opt_pass
2758 {
2759 public:
2760 pass_analyze_swaps(gcc::context *ctxt)
2761 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
2762 {}
2763
2764 /* opt_pass methods: */
2765 virtual bool gate (function *)
2766 {
2767 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
2768 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
2769 }
2770
2771 virtual unsigned int execute (function *fun)
2772 {
2773 return rs6000_analyze_swaps (fun);
2774 }
2775
2776 opt_pass *clone ()
2777 {
2778 return new pass_analyze_swaps (m_ctxt);
2779 }
2780
2781 }; // class pass_analyze_swaps
2782
2783 rtl_opt_pass *
2784 make_pass_analyze_swaps (gcc::context *ctxt)
2785 {
2786 return new pass_analyze_swaps (ctxt);
2787 }
2788