]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000-p8swap.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000-p8swap.c
1 /* Subroutines used to remove unnecessary doubleword swaps
2 for p8 little-endian VSX code.
3 Copyright (C) 1991-2019 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "ira.h"
33 #include "print-tree.h"
34 #include "varasm.h"
35 #include "explow.h"
36 #include "expr.h"
37 #include "output.h"
38 #include "tree-pass.h"
39 #include "rtx-vector-builder.h"
40
41 /* Analyze vector computations and remove unnecessary doubleword
42 swaps (xxswapdi instructions). This pass is performed only
43 for little-endian VSX code generation.
44
45 For this specific case, loads and stores of 4x32 and 2x64 vectors
46 are inefficient. These are implemented using the lvx2dx and
47 stvx2dx instructions, which invert the order of doublewords in
48 a vector register. Thus the code generation inserts an xxswapdi
49 after each such load, and prior to each such store. (For spill
50 code after register assignment, an additional xxswapdi is inserted
51 following each store in order to return a hard register to its
52 unpermuted value.)
53
54 The extra xxswapdi instructions reduce performance. This can be
55 particularly bad for vectorized code. The purpose of this pass
56 is to reduce the number of xxswapdi instructions required for
57 correctness.
58
59 The primary insight is that much code that operates on vectors
60 does not care about the relative order of elements in a register,
61 so long as the correct memory order is preserved. If we have
62 a computation where all input values are provided by lvxd2x/xxswapdi
63 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
64 and all intermediate computations are pure SIMD (independent of
65 element order), then all the xxswapdi's associated with the loads
66 and stores may be removed.
67
68 This pass uses some of the infrastructure and logical ideas from
69 the "web" pass in web.c. We create maximal webs of computations
70 fitting the description above using union-find. Each such web is
71 then optimized by removing its unnecessary xxswapdi instructions.
72
73 The pass is placed prior to global optimization so that we can
74 perform the optimization in the safest and simplest way possible;
75 that is, by replacing each xxswapdi insn with a register copy insn.
76 Subsequent forward propagation will remove copies where possible.
77
78 There are some operations sensitive to element order for which we
79 can still allow the operation, provided we modify those operations.
80 These include CONST_VECTORs, for which we must swap the first and
81 second halves of the constant vector; and SUBREGs, for which we
82 must adjust the byte offset to account for the swapped doublewords.
83 A remaining opportunity would be non-immediate-form splats, for
84 which we should adjust the selected lane of the input. We should
85 also make code generation adjustments for sum-across operations,
86 since this is a common vectorizer reduction.
87
88 Because we run prior to the first split, we can see loads and stores
89 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
90 vector loads and stores that have not yet been split into a permuting
91 load/store and a swap. (One way this can happen is with a builtin
92 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
93 than deleting a swap, we convert the load/store into a permuting
94 load/store (which effectively removes the swap). */
95
96 /* Notes on Permutes
97
98 We do not currently handle computations that contain permutes. There
99 is a general transformation that can be performed correctly, but it
100 may introduce more expensive code than it replaces. To handle these
101 would require a cost model to determine when to perform the optimization.
102 This commentary records how this could be done if desired.
103
104 The most general permute is something like this (example for V16QI):
105
106 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
107 (parallel [(const_int a0) (const_int a1)
108 ...
109 (const_int a14) (const_int a15)]))
110
111 where a0,...,a15 are in [0,31] and select elements from op1 and op2
112 to produce in the result.
113
114 Regardless of mode, we can convert the PARALLEL to a mask of 16
115 byte-element selectors. Let's call this M, with M[i] representing
116 the ith byte-element selector value. Then if we swap doublewords
117 throughout the computation, we can get correct behavior by replacing
118 M with M' as follows:
119
120 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
121 { ((M[i]+8)%16)+16 : M[i] in [16,31]
122
123 This seems promising at first, since we are just replacing one mask
124 with another. But certain masks are preferable to others. If M
125 is a mask that matches a vmrghh pattern, for example, M' certainly
126 will not. Instead of a single vmrghh, we would generate a load of
127 M' and a vperm. So we would need to know how many xxswapd's we can
128 remove as a result of this transformation to determine if it's
129 profitable; and preferably the logic would need to be aware of all
130 the special preferable masks.
131
132 Another form of permute is an UNSPEC_VPERM, in which the mask is
133 already in a register. In some cases, this mask may be a constant
134 that we can discover with ud-chains, in which case the above
135 transformation is ok. However, the common usage here is for the
136 mask to be produced by an UNSPEC_LVSL, in which case the mask
137 cannot be known at compile time. In such a case we would have to
138 generate several instructions to compute M' as above at run time,
139 and a cost model is needed again.
140
141 However, when the mask M for an UNSPEC_VPERM is loaded from the
142 constant pool, we can replace M with M' as above at no cost
143 beyond adding a constant pool entry. */
144
145 /* This is based on the union-find logic in web.c. web_entry_base is
146 defined in df.h. */
147 class swap_web_entry : public web_entry_base
148 {
149 public:
150 /* Pointer to the insn. */
151 rtx_insn *insn;
152 /* Set if insn contains a mention of a vector register. All other
153 fields are undefined if this field is unset. */
154 unsigned int is_relevant : 1;
155 /* Set if insn is a load. */
156 unsigned int is_load : 1;
157 /* Set if insn is a store. */
158 unsigned int is_store : 1;
159 /* Set if insn is a doubleword swap. This can either be a register swap
160 or a permuting load or store (test is_load and is_store for this). */
161 unsigned int is_swap : 1;
162 /* Set if the insn has a live-in use of a parameter register. */
163 unsigned int is_live_in : 1;
164 /* Set if the insn has a live-out def of a return register. */
165 unsigned int is_live_out : 1;
166 /* Set if the insn contains a subreg reference of a vector register. */
167 unsigned int contains_subreg : 1;
168 /* Set if the insn contains a 128-bit integer operand. */
169 unsigned int is_128_int : 1;
170 /* Set if this is a call-insn. */
171 unsigned int is_call : 1;
172 /* Set if this insn does not perform a vector operation for which
173 element order matters, or if we know how to fix it up if it does.
174 Undefined if is_swap is set. */
175 unsigned int is_swappable : 1;
176 /* A nonzero value indicates what kind of special handling for this
177 insn is required if doublewords are swapped. Undefined if
178 is_swappable is not set. */
179 unsigned int special_handling : 4;
180 /* Set if the web represented by this entry cannot be optimized. */
181 unsigned int web_not_optimizable : 1;
182 /* Set if this insn should be deleted. */
183 unsigned int will_delete : 1;
184 };
185
186 enum special_handling_values {
187 SH_NONE = 0,
188 SH_CONST_VECTOR,
189 SH_SUBREG,
190 SH_NOSWAP_LD,
191 SH_NOSWAP_ST,
192 SH_EXTRACT,
193 SH_SPLAT,
194 SH_XXPERMDI,
195 SH_CONCAT,
196 SH_VPERM
197 };
198
199 /* Union INSN with all insns containing definitions that reach USE.
200 Detect whether USE is live-in to the current function. */
201 static void
202 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
203 {
204 struct df_link *link = DF_REF_CHAIN (use);
205
206 if (!link)
207 insn_entry[INSN_UID (insn)].is_live_in = 1;
208
209 while (link)
210 {
211 if (DF_REF_IS_ARTIFICIAL (link->ref))
212 insn_entry[INSN_UID (insn)].is_live_in = 1;
213
214 if (DF_REF_INSN_INFO (link->ref))
215 {
216 rtx def_insn = DF_REF_INSN (link->ref);
217 (void)unionfind_union (insn_entry + INSN_UID (insn),
218 insn_entry + INSN_UID (def_insn));
219 }
220
221 link = link->next;
222 }
223 }
224
225 /* Union INSN with all insns containing uses reached from DEF.
226 Detect whether DEF is live-out from the current function. */
227 static void
228 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
229 {
230 struct df_link *link = DF_REF_CHAIN (def);
231
232 if (!link)
233 insn_entry[INSN_UID (insn)].is_live_out = 1;
234
235 while (link)
236 {
237 /* This could be an eh use or some other artificial use;
238 we treat these all the same (killing the optimization). */
239 if (DF_REF_IS_ARTIFICIAL (link->ref))
240 insn_entry[INSN_UID (insn)].is_live_out = 1;
241
242 if (DF_REF_INSN_INFO (link->ref))
243 {
244 rtx use_insn = DF_REF_INSN (link->ref);
245 (void)unionfind_union (insn_entry + INSN_UID (insn),
246 insn_entry + INSN_UID (use_insn));
247 }
248
249 link = link->next;
250 }
251 }
252
253 /* Return 1 iff INSN is a load insn, including permuting loads that
254 represent an lvxd2x instruction; else return 0. */
255 static unsigned int
256 insn_is_load_p (rtx insn)
257 {
258 rtx body = PATTERN (insn);
259
260 if (GET_CODE (body) == SET)
261 {
262 if (GET_CODE (SET_SRC (body)) == MEM)
263 return 1;
264
265 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
266 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
267 return 1;
268
269 return 0;
270 }
271
272 if (GET_CODE (body) != PARALLEL)
273 return 0;
274
275 rtx set = XVECEXP (body, 0, 0);
276
277 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
278 return 1;
279
280 return 0;
281 }
282
283 /* Return 1 iff INSN is a store insn, including permuting stores that
284 represent an stvxd2x instruction; else return 0. */
285 static unsigned int
286 insn_is_store_p (rtx insn)
287 {
288 rtx body = PATTERN (insn);
289 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
290 return 1;
291 if (GET_CODE (body) != PARALLEL)
292 return 0;
293 rtx set = XVECEXP (body, 0, 0);
294 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
295 return 1;
296 return 0;
297 }
298
299 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
300 a permuting load, or a permuting store. */
301 static unsigned int
302 insn_is_swap_p (rtx insn)
303 {
304 rtx body = PATTERN (insn);
305 if (GET_CODE (body) != SET)
306 return 0;
307 rtx rhs = SET_SRC (body);
308 if (GET_CODE (rhs) != VEC_SELECT)
309 return 0;
310 rtx parallel = XEXP (rhs, 1);
311 if (GET_CODE (parallel) != PARALLEL)
312 return 0;
313 unsigned int len = XVECLEN (parallel, 0);
314 if (len != 2 && len != 4 && len != 8 && len != 16)
315 return 0;
316 for (unsigned int i = 0; i < len / 2; ++i)
317 {
318 rtx op = XVECEXP (parallel, 0, i);
319 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
320 return 0;
321 }
322 for (unsigned int i = len / 2; i < len; ++i)
323 {
324 rtx op = XVECEXP (parallel, 0, i);
325 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
326 return 0;
327 }
328 return 1;
329 }
330
331 /* Return true iff EXPR represents the sum of two registers. */
332 bool
333 rs6000_sum_of_two_registers_p (const_rtx expr)
334 {
335 if (GET_CODE (expr) == PLUS)
336 {
337 const_rtx operand1 = XEXP (expr, 0);
338 const_rtx operand2 = XEXP (expr, 1);
339 return (REG_P (operand1) && REG_P (operand2));
340 }
341 return false;
342 }
343
344 /* Return true iff EXPR represents an address expression that masks off
345 the low-order 4 bits in the style of an lvx or stvx rtl pattern. */
346 bool
347 rs6000_quadword_masked_address_p (const_rtx expr)
348 {
349 if (GET_CODE (expr) == AND)
350 {
351 const_rtx operand1 = XEXP (expr, 0);
352 const_rtx operand2 = XEXP (expr, 1);
353 if ((REG_P (operand1) || rs6000_sum_of_two_registers_p (operand1))
354 && CONST_SCALAR_INT_P (operand2) && INTVAL (operand2) == -16)
355 return true;
356 }
357 return false;
358 }
359
360 /* Return TRUE if INSN represents a swap of a swapped load from memory
361 and the memory address is quad-word aligned. */
362 static bool
363 quad_aligned_load_p (swap_web_entry *insn_entry, rtx_insn *insn)
364 {
365 unsigned uid = INSN_UID (insn);
366 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
367 return false;
368
369 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
370
371 /* Since insn is known to represent a swap instruction, we know it
372 "uses" only one input variable. */
373 df_ref use = DF_INSN_INFO_USES (insn_info);
374
375 /* Figure out where this input variable is defined. */
376 struct df_link *def_link = DF_REF_CHAIN (use);
377
378 /* If there is no definition or the definition is artificial or there are
379 multiple definitions, punt. */
380 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
381 || def_link->next)
382 return false;
383
384 rtx def_insn = DF_REF_INSN (def_link->ref);
385 unsigned uid2 = INSN_UID (def_insn);
386 /* We're looking for a load-with-swap insn. If this is not that,
387 return false. */
388 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
389 return false;
390
391 /* If the source of the rtl def is not a set from memory, return
392 false. */
393 rtx body = PATTERN (def_insn);
394 if (GET_CODE (body) != SET
395 || GET_CODE (SET_SRC (body)) != VEC_SELECT
396 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
397 return false;
398
399 rtx mem = XEXP (SET_SRC (body), 0);
400 rtx base_reg = XEXP (mem, 0);
401 return ((REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg))
402 && MEM_ALIGN (mem) >= 128) ? true : false;
403 }
404
405 /* Return TRUE if INSN represents a store-with-swap of a swapped value
406 and the memory address is quad-word aligned. */
407 static bool
408 quad_aligned_store_p (swap_web_entry *insn_entry, rtx_insn *insn)
409 {
410 unsigned uid = INSN_UID (insn);
411 if (!insn_entry[uid].is_swap || !insn_entry[uid].is_store)
412 return false;
413
414 rtx body = PATTERN (insn);
415 rtx dest_address = XEXP (SET_DEST (body), 0);
416 rtx swap_reg = XEXP (SET_SRC (body), 0);
417
418 /* If the base address for the memory expression is not represented
419 by a single register and is not the sum of two registers, punt. */
420 if (!REG_P (dest_address) && !rs6000_sum_of_two_registers_p (dest_address))
421 return false;
422
423 /* Confirm that the value to be stored is produced by a swap
424 instruction. */
425 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
426 df_ref use;
427 FOR_EACH_INSN_INFO_USE (use, insn_info)
428 {
429 struct df_link *def_link = DF_REF_CHAIN (use);
430
431 /* If this is not the definition of the candidate swap register,
432 then skip it. I am interested in a different definition. */
433 if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
434 continue;
435
436 /* If there is no def or the def is artifical or there are
437 multiple defs, punt. */
438 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
439 || def_link->next)
440 return false;
441
442 rtx def_insn = DF_REF_INSN (def_link->ref);
443 unsigned uid2 = INSN_UID (def_insn);
444
445 /* If this source value is not a simple swap, return false */
446 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load
447 || insn_entry[uid2].is_store)
448 return false;
449
450 /* I've processed the use that I care about, so break out of
451 this loop. */
452 break;
453 }
454
455 /* At this point, we know the source data comes from a swap. The
456 remaining question is whether the memory address is aligned. */
457 rtx set = single_set (insn);
458 if (set)
459 {
460 rtx dest = SET_DEST (set);
461 if (MEM_P (dest))
462 return (MEM_ALIGN (dest) >= 128);
463 }
464 return false;
465 }
466
467 /* Return 1 iff UID, known to reference a swap, is both fed by a load
468 and a feeder of a store. */
469 static unsigned int
470 swap_feeds_both_load_and_store (swap_web_entry *insn_entry)
471 {
472 rtx insn = insn_entry->insn;
473 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
474 df_ref def, use;
475 struct df_link *link = 0;
476 rtx_insn *load = 0, *store = 0;
477 bool fed_by_load = 0;
478 bool feeds_store = 0;
479
480 FOR_EACH_INSN_INFO_USE (use, insn_info)
481 {
482 link = DF_REF_CHAIN (use);
483 load = DF_REF_INSN (link->ref);
484 if (insn_is_load_p (load) && insn_is_swap_p (load))
485 fed_by_load = 1;
486 }
487
488 FOR_EACH_INSN_INFO_DEF (def, insn_info)
489 {
490 link = DF_REF_CHAIN (def);
491 store = DF_REF_INSN (link->ref);
492 if (insn_is_store_p (store) && insn_is_swap_p (store))
493 feeds_store = 1;
494 }
495
496 return fed_by_load && feeds_store;
497 }
498
499 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
500 static bool
501 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
502 {
503 unsigned uid = INSN_UID (insn);
504 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
505 return false;
506
507 const_rtx tocrel_base;
508
509 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
510 df_ref use;
511
512 /* Iterate over the definitions that are used by this insn. Since
513 this is known to be a swap insn, expect only one used definnition. */
514 FOR_EACH_INSN_INFO_USE (use, insn_info)
515 {
516 struct df_link *def_link = DF_REF_CHAIN (use);
517
518 /* If there is no def or the def is artificial or there are
519 multiple defs, punt. */
520 if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
521 || def_link->next)
522 return false;
523
524 rtx def_insn = DF_REF_INSN (def_link->ref);
525 unsigned uid2 = INSN_UID (def_insn);
526 /* If this is not a load or is not a swap, return false. */
527 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
528 return false;
529
530 /* If the source of the rtl def is not a set from memory, return
531 false. */
532 rtx body = PATTERN (def_insn);
533 if (GET_CODE (body) != SET
534 || GET_CODE (SET_SRC (body)) != VEC_SELECT
535 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
536 return false;
537
538 rtx mem = XEXP (SET_SRC (body), 0);
539 rtx base_reg = XEXP (mem, 0);
540 /* If the base address for the memory expression is not
541 represented by a register, punt. */
542 if (!REG_P (base_reg))
543 return false;
544
545 df_ref base_use;
546 insn_info = DF_INSN_INFO_GET (def_insn);
547 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
548 {
549 /* If base_use does not represent base_reg, look for another
550 use. */
551 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
552 continue;
553
554 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
555 if (!base_def_link || base_def_link->next)
556 return false;
557
558 /* Constants held on the stack are not "true" constants
559 because their values are not part of the static load
560 image. If this constant's base reference is a stack
561 or frame pointer, it is seen as an artificial
562 reference. */
563 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
564 return false;
565
566 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
567 rtx tocrel_body = PATTERN (tocrel_insn);
568 rtx base, offset;
569 if (GET_CODE (tocrel_body) != SET)
570 return false;
571 /* There is an extra level of indirection for small/large
572 code models. */
573 rtx tocrel_expr = SET_SRC (tocrel_body);
574 if (GET_CODE (tocrel_expr) == MEM)
575 tocrel_expr = XEXP (tocrel_expr, 0);
576 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
577 return false;
578 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
579
580 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
581 return false;
582 else
583 {
584 /* FIXME: The conditions under which
585 ((GET_CODE (const_vector) == SYMBOL_REF) &&
586 !CONSTANT_POOL_ADDRESS_P (const_vector))
587 are not well understood. This code prevents
588 an internal compiler error which will occur in
589 replace_swapped_load_constant () if we were to return
590 true. Some day, we should figure out how to properly
591 handle this condition in
592 replace_swapped_load_constant () and then we can
593 remove this special test. */
594 rtx const_vector = get_pool_constant (base);
595 if (GET_CODE (const_vector) == SYMBOL_REF
596 && CONSTANT_POOL_ADDRESS_P (const_vector))
597 const_vector = get_pool_constant (const_vector);
598 if (GET_CODE (const_vector) != CONST_VECTOR)
599 return false;
600 }
601 }
602 }
603 return true;
604 }
605
606 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
607 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
608 static bool
609 v2df_reduction_p (rtx op)
610 {
611 if (GET_MODE (op) != V2DFmode)
612 return false;
613
614 enum rtx_code code = GET_CODE (op);
615 if (code != PLUS && code != SMIN && code != SMAX)
616 return false;
617
618 rtx concat = XEXP (op, 0);
619 if (GET_CODE (concat) != VEC_CONCAT)
620 return false;
621
622 rtx select0 = XEXP (concat, 0);
623 rtx select1 = XEXP (concat, 1);
624 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
625 return false;
626
627 rtx reg0 = XEXP (select0, 0);
628 rtx reg1 = XEXP (select1, 0);
629 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
630 return false;
631
632 rtx parallel0 = XEXP (select0, 1);
633 rtx parallel1 = XEXP (select1, 1);
634 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
635 return false;
636
637 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
638 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
639 return false;
640
641 return true;
642 }
643
644 /* Return 1 iff OP is an operand that will not be affected by having
645 vector doublewords swapped in memory. */
646 static unsigned int
647 rtx_is_swappable_p (rtx op, unsigned int *special)
648 {
649 enum rtx_code code = GET_CODE (op);
650 int i, j;
651 rtx parallel;
652
653 switch (code)
654 {
655 case LABEL_REF:
656 case SYMBOL_REF:
657 case CLOBBER:
658 case REG:
659 return 1;
660
661 case VEC_CONCAT:
662 case ASM_INPUT:
663 case ASM_OPERANDS:
664 return 0;
665
666 case CONST_VECTOR:
667 {
668 *special = SH_CONST_VECTOR;
669 return 1;
670 }
671
672 case VEC_DUPLICATE:
673 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
674 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
675 it represents a vector splat for which we can do special
676 handling. */
677 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
678 return 1;
679 else if (REG_P (XEXP (op, 0))
680 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
681 /* This catches V2DF and V2DI splat, at a minimum. */
682 return 1;
683 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
684 && REG_P (XEXP (XEXP (op, 0), 0))
685 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
686 /* This catches splat of a truncated value. */
687 return 1;
688 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
689 /* If the duplicated item is from a select, defer to the select
690 processing to see if we can change the lane for the splat. */
691 return rtx_is_swappable_p (XEXP (op, 0), special);
692 else
693 return 0;
694
695 case VEC_SELECT:
696 /* A vec_extract operation is ok if we change the lane. */
697 if (GET_CODE (XEXP (op, 0)) == REG
698 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
699 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
700 && XVECLEN (parallel, 0) == 1
701 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
702 {
703 *special = SH_EXTRACT;
704 return 1;
705 }
706 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
707 XXPERMDI is a swap operation, it will be identified by
708 insn_is_swap_p and therefore we won't get here. */
709 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
710 && (GET_MODE (XEXP (op, 0)) == V4DFmode
711 || GET_MODE (XEXP (op, 0)) == V4DImode)
712 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
713 && XVECLEN (parallel, 0) == 2
714 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
715 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
716 {
717 *special = SH_XXPERMDI;
718 return 1;
719 }
720 else if (v2df_reduction_p (op))
721 return 1;
722 else
723 return 0;
724
725 case UNSPEC:
726 {
727 /* Various operations are unsafe for this optimization, at least
728 without significant additional work. Permutes are obviously
729 problematic, as both the permute control vector and the ordering
730 of the target values are invalidated by doubleword swapping.
731 Vector pack and unpack modify the number of vector lanes.
732 Merge-high/low will not operate correctly on swapped operands.
733 Vector shifts across element boundaries are clearly uncool,
734 as are vector select and concatenate operations. Vector
735 sum-across instructions define one operand with a specific
736 order-dependent element, so additional fixup code would be
737 needed to make those work. Vector set and non-immediate-form
738 vector splat are element-order sensitive. A few of these
739 cases might be workable with special handling if required.
740 Adding cost modeling would be appropriate in some cases. */
741 int val = XINT (op, 1);
742 switch (val)
743 {
744 default:
745 break;
746 case UNSPEC_VBPERMQ:
747 case UNSPEC_VMRGH_DIRECT:
748 case UNSPEC_VMRGL_DIRECT:
749 case UNSPEC_VPACK_SIGN_SIGN_SAT:
750 case UNSPEC_VPACK_SIGN_UNS_SAT:
751 case UNSPEC_VPACK_UNS_UNS_MOD:
752 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
753 case UNSPEC_VPACK_UNS_UNS_SAT:
754 case UNSPEC_VPERM:
755 case UNSPEC_VPERM_UNS:
756 case UNSPEC_VPERMHI:
757 case UNSPEC_VPERMSI:
758 case UNSPEC_VPERMXOR:
759 case UNSPEC_VPKPX:
760 case UNSPEC_VSLDOI:
761 case UNSPEC_VSLO:
762 case UNSPEC_VSRO:
763 case UNSPEC_VSUM2SWS:
764 case UNSPEC_VSUM4S:
765 case UNSPEC_VSUM4UBS:
766 case UNSPEC_VSUMSWS:
767 case UNSPEC_VSUMSWS_DIRECT:
768 case UNSPEC_VSX_CONCAT:
769 case UNSPEC_VSX_CVDPSPN:
770 case UNSPEC_VSX_CVSPDP:
771 case UNSPEC_VSX_CVSPDPN:
772 case UNSPEC_VSX_EXTRACT:
773 case UNSPEC_VSX_SET:
774 case UNSPEC_VSX_SLDWI:
775 case UNSPEC_VSX_VSLO:
776 case UNSPEC_VUNPACK_HI_SIGN:
777 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
778 case UNSPEC_VUNPACK_LO_SIGN:
779 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
780 case UNSPEC_VUPKHPX:
781 case UNSPEC_VUPKHS_V4SF:
782 case UNSPEC_VUPKHU_V4SF:
783 case UNSPEC_VUPKLPX:
784 case UNSPEC_VUPKLS_V4SF:
785 case UNSPEC_VUPKLU_V4SF:
786 return 0;
787 case UNSPEC_VSPLT_DIRECT:
788 case UNSPEC_VSX_XXSPLTD:
789 *special = SH_SPLAT;
790 return 1;
791 case UNSPEC_REDUC_PLUS:
792 case UNSPEC_REDUC:
793 return 1;
794 }
795 }
796
797 default:
798 break;
799 }
800
801 const char *fmt = GET_RTX_FORMAT (code);
802 int ok = 1;
803
804 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
805 if (fmt[i] == 'e' || fmt[i] == 'u')
806 {
807 unsigned int special_op = SH_NONE;
808 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
809 if (special_op == SH_NONE)
810 continue;
811 /* Ensure we never have two kinds of special handling
812 for the same insn. */
813 if (*special != SH_NONE && *special != special_op)
814 return 0;
815 *special = special_op;
816 }
817 else if (fmt[i] == 'E')
818 for (j = 0; j < XVECLEN (op, i); ++j)
819 {
820 unsigned int special_op = SH_NONE;
821 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
822 if (special_op == SH_NONE)
823 continue;
824 /* Ensure we never have two kinds of special handling
825 for the same insn. */
826 if (*special != SH_NONE && *special != special_op)
827 return 0;
828 *special = special_op;
829 }
830
831 return ok;
832 }
833
834 /* Return 1 iff INSN is an operand that will not be affected by
835 having vector doublewords swapped in memory (in which case
836 *SPECIAL is unchanged), or that can be modified to be correct
837 if vector doublewords are swapped in memory (in which case
838 *SPECIAL is changed to a value indicating how). */
839 static unsigned int
840 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
841 unsigned int *special)
842 {
843 /* Calls are always bad. */
844 if (GET_CODE (insn) == CALL_INSN)
845 return 0;
846
847 /* Loads and stores seen here are not permuting, but we can still
848 fix them up by converting them to permuting ones. Exceptions:
849 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
850 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
851 for the SET source. Also we must now make an exception for lvx
852 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
853 explicit "& -16") since this leads to unrecognizable insns. */
854 rtx body = PATTERN (insn);
855 int i = INSN_UID (insn);
856
857 if (insn_entry[i].is_load)
858 {
859 if (GET_CODE (body) == SET)
860 {
861 rtx rhs = SET_SRC (body);
862 /* Even without a swap, the RHS might be a vec_select for, say,
863 a byte-reversing load. */
864 if (GET_CODE (rhs) != MEM)
865 return 0;
866 if (GET_CODE (XEXP (rhs, 0)) == AND)
867 return 0;
868
869 *special = SH_NOSWAP_LD;
870 return 1;
871 }
872 else
873 return 0;
874 }
875
876 if (insn_entry[i].is_store)
877 {
878 if (GET_CODE (body) == SET
879 && GET_CODE (SET_SRC (body)) != UNSPEC
880 && GET_CODE (SET_SRC (body)) != VEC_SELECT)
881 {
882 rtx lhs = SET_DEST (body);
883 /* Even without a swap, the RHS might be a vec_select for, say,
884 a byte-reversing store. */
885 if (GET_CODE (lhs) != MEM)
886 return 0;
887 if (GET_CODE (XEXP (lhs, 0)) == AND)
888 return 0;
889
890 *special = SH_NOSWAP_ST;
891 return 1;
892 }
893 else
894 return 0;
895 }
896
897 /* A convert to single precision can be left as is provided that
898 all of its uses are in xxspltw instructions that splat BE element
899 zero. */
900 if (GET_CODE (body) == SET
901 && GET_CODE (SET_SRC (body)) == UNSPEC
902 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
903 {
904 df_ref def;
905 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
906
907 FOR_EACH_INSN_INFO_DEF (def, insn_info)
908 {
909 struct df_link *link = DF_REF_CHAIN (def);
910 if (!link)
911 return 0;
912
913 for (; link; link = link->next) {
914 rtx use_insn = DF_REF_INSN (link->ref);
915 rtx use_body = PATTERN (use_insn);
916 if (GET_CODE (use_body) != SET
917 || GET_CODE (SET_SRC (use_body)) != UNSPEC
918 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
919 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
920 return 0;
921 }
922 }
923
924 return 1;
925 }
926
927 /* A concatenation of two doublewords is ok if we reverse the
928 order of the inputs. */
929 if (GET_CODE (body) == SET
930 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
931 && (GET_MODE (SET_SRC (body)) == V2DFmode
932 || GET_MODE (SET_SRC (body)) == V2DImode))
933 {
934 *special = SH_CONCAT;
935 return 1;
936 }
937
938 /* V2DF reductions are always swappable. */
939 if (GET_CODE (body) == PARALLEL)
940 {
941 rtx expr = XVECEXP (body, 0, 0);
942 if (GET_CODE (expr) == SET
943 && v2df_reduction_p (SET_SRC (expr)))
944 return 1;
945 }
946
947 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
948 constant pool. */
949 if (GET_CODE (body) == SET
950 && GET_CODE (SET_SRC (body)) == UNSPEC
951 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
952 && XVECLEN (SET_SRC (body), 0) == 3
953 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
954 {
955 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
956 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
957 df_ref use;
958 FOR_EACH_INSN_INFO_USE (use, insn_info)
959 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
960 {
961 struct df_link *def_link = DF_REF_CHAIN (use);
962 /* Punt if multiple definitions for this reg. */
963 if (def_link && !def_link->next &&
964 const_load_sequence_p (insn_entry,
965 DF_REF_INSN (def_link->ref)))
966 {
967 *special = SH_VPERM;
968 return 1;
969 }
970 }
971 }
972
973 /* Otherwise check the operands for vector lane violations. */
974 return rtx_is_swappable_p (body, special);
975 }
976
977 enum chain_purpose { FOR_LOADS, FOR_STORES };
978
979 /* Return true if the UD or DU chain headed by LINK is non-empty,
980 and every entry on the chain references an insn that is a
981 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
982 register swap must have only permuting loads as reaching defs.
983 If PURPOSE is FOR_STORES, each such register swap must have only
984 register swaps or permuting stores as reached uses. */
985 static bool
986 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
987 enum chain_purpose purpose)
988 {
989 if (!link)
990 return false;
991
992 for (; link; link = link->next)
993 {
994 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
995 continue;
996
997 if (DF_REF_IS_ARTIFICIAL (link->ref))
998 return false;
999
1000 rtx reached_insn = DF_REF_INSN (link->ref);
1001 unsigned uid = INSN_UID (reached_insn);
1002 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
1003
1004 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
1005 || insn_entry[uid].is_store)
1006 return false;
1007
1008 if (purpose == FOR_LOADS)
1009 {
1010 df_ref use;
1011 FOR_EACH_INSN_INFO_USE (use, insn_info)
1012 {
1013 struct df_link *swap_link = DF_REF_CHAIN (use);
1014
1015 while (swap_link)
1016 {
1017 if (DF_REF_IS_ARTIFICIAL (link->ref))
1018 return false;
1019
1020 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
1021 unsigned uid2 = INSN_UID (swap_def_insn);
1022
1023 /* Only permuting loads are allowed. */
1024 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
1025 return false;
1026
1027 swap_link = swap_link->next;
1028 }
1029 }
1030 }
1031 else if (purpose == FOR_STORES)
1032 {
1033 df_ref def;
1034 FOR_EACH_INSN_INFO_DEF (def, insn_info)
1035 {
1036 struct df_link *swap_link = DF_REF_CHAIN (def);
1037
1038 while (swap_link)
1039 {
1040 if (DF_REF_IS_ARTIFICIAL (link->ref))
1041 return false;
1042
1043 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
1044 unsigned uid2 = INSN_UID (swap_use_insn);
1045
1046 /* Permuting stores or register swaps are allowed. */
1047 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
1048 return false;
1049
1050 swap_link = swap_link->next;
1051 }
1052 }
1053 }
1054 }
1055
1056 return true;
1057 }
1058
1059 /* Mark the xxswapdi instructions associated with permuting loads and
1060 stores for removal. Note that we only flag them for deletion here,
1061 as there is a possibility of a swap being reached from multiple
1062 loads, etc. */
1063 static void
1064 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
1065 {
1066 rtx insn = insn_entry[i].insn;
1067 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1068
1069 if (insn_entry[i].is_load)
1070 {
1071 df_ref def;
1072 FOR_EACH_INSN_INFO_DEF (def, insn_info)
1073 {
1074 struct df_link *link = DF_REF_CHAIN (def);
1075
1076 /* We know by now that these are swaps, so we can delete
1077 them confidently. */
1078 while (link)
1079 {
1080 rtx use_insn = DF_REF_INSN (link->ref);
1081 insn_entry[INSN_UID (use_insn)].will_delete = 1;
1082 link = link->next;
1083 }
1084 }
1085 }
1086 else if (insn_entry[i].is_store)
1087 {
1088 df_ref use;
1089 FOR_EACH_INSN_INFO_USE (use, insn_info)
1090 {
1091 /* Ignore uses for addressability. */
1092 machine_mode mode = GET_MODE (DF_REF_REG (use));
1093 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
1094 continue;
1095
1096 struct df_link *link = DF_REF_CHAIN (use);
1097
1098 /* We know by now that these are swaps, so we can delete
1099 them confidently. */
1100 while (link)
1101 {
1102 rtx def_insn = DF_REF_INSN (link->ref);
1103 insn_entry[INSN_UID (def_insn)].will_delete = 1;
1104 link = link->next;
1105 }
1106 }
1107 }
1108 }
1109
1110 /* *OP_PTR is either a CONST_VECTOR or an expression containing one.
1111 Swap the first half of the vector with the second in the first
1112 case. Recurse to find it in the second. */
1113 static void
1114 swap_const_vector_halves (rtx *op_ptr)
1115 {
1116 int i;
1117 rtx op = *op_ptr;
1118 enum rtx_code code = GET_CODE (op);
1119 if (GET_CODE (op) == CONST_VECTOR)
1120 {
1121 int units = GET_MODE_NUNITS (GET_MODE (op));
1122 rtx_vector_builder builder (GET_MODE (op), units, 1);
1123 for (i = 0; i < units / 2; ++i)
1124 builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2));
1125 for (i = 0; i < units / 2; ++i)
1126 builder.quick_push (CONST_VECTOR_ELT (op, i));
1127 *op_ptr = builder.build ();
1128 }
1129 else
1130 {
1131 int j;
1132 const char *fmt = GET_RTX_FORMAT (code);
1133 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1134 if (fmt[i] == 'e' || fmt[i] == 'u')
1135 swap_const_vector_halves (&XEXP (op, i));
1136 else if (fmt[i] == 'E')
1137 for (j = 0; j < XVECLEN (op, i); ++j)
1138 swap_const_vector_halves (&XVECEXP (op, i, j));
1139 }
1140 }
1141
1142 /* Find all subregs of a vector expression that perform a narrowing,
1143 and adjust the subreg index to account for doubleword swapping. */
1144 static void
1145 adjust_subreg_index (rtx op)
1146 {
1147 enum rtx_code code = GET_CODE (op);
1148 if (code == SUBREG
1149 && (GET_MODE_SIZE (GET_MODE (op))
1150 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
1151 {
1152 unsigned int index = SUBREG_BYTE (op);
1153 if (index < 8)
1154 index += 8;
1155 else
1156 index -= 8;
1157 SUBREG_BYTE (op) = index;
1158 }
1159
1160 const char *fmt = GET_RTX_FORMAT (code);
1161 int i,j;
1162 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
1163 if (fmt[i] == 'e' || fmt[i] == 'u')
1164 adjust_subreg_index (XEXP (op, i));
1165 else if (fmt[i] == 'E')
1166 for (j = 0; j < XVECLEN (op, i); ++j)
1167 adjust_subreg_index (XVECEXP (op, i, j));
1168 }
1169
1170 /* Convert the non-permuting load INSN to a permuting one. */
1171 static void
1172 permute_load (rtx_insn *insn)
1173 {
1174 rtx body = PATTERN (insn);
1175 rtx mem_op = SET_SRC (body);
1176 rtx tgt_reg = SET_DEST (body);
1177 machine_mode mode = GET_MODE (tgt_reg);
1178 int n_elts = GET_MODE_NUNITS (mode);
1179 int half_elts = n_elts / 2;
1180 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1181 int i, j;
1182 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1183 XVECEXP (par, 0, i) = GEN_INT (j);
1184 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1185 XVECEXP (par, 0, i) = GEN_INT (j);
1186 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
1187 SET_SRC (body) = sel;
1188 INSN_CODE (insn) = -1; /* Force re-recognition. */
1189 df_insn_rescan (insn);
1190
1191 if (dump_file)
1192 fprintf (dump_file, "Replacing load %d with permuted load\n",
1193 INSN_UID (insn));
1194 }
1195
1196 /* Convert the non-permuting store INSN to a permuting one. */
1197 static void
1198 permute_store (rtx_insn *insn)
1199 {
1200 rtx body = PATTERN (insn);
1201 rtx src_reg = SET_SRC (body);
1202 machine_mode mode = GET_MODE (src_reg);
1203 int n_elts = GET_MODE_NUNITS (mode);
1204 int half_elts = n_elts / 2;
1205 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
1206 int i, j;
1207 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
1208 XVECEXP (par, 0, i) = GEN_INT (j);
1209 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
1210 XVECEXP (par, 0, i) = GEN_INT (j);
1211 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
1212 SET_SRC (body) = sel;
1213 INSN_CODE (insn) = -1; /* Force re-recognition. */
1214 df_insn_rescan (insn);
1215
1216 if (dump_file)
1217 fprintf (dump_file, "Replacing store %d with permuted store\n",
1218 INSN_UID (insn));
1219 }
1220
1221 /* Given OP that contains a vector extract operation, adjust the index
1222 of the extracted lane to account for the doubleword swap. */
1223 static void
1224 adjust_extract (rtx_insn *insn)
1225 {
1226 rtx pattern = PATTERN (insn);
1227 if (GET_CODE (pattern) == PARALLEL)
1228 pattern = XVECEXP (pattern, 0, 0);
1229 rtx src = SET_SRC (pattern);
1230 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
1231 account for that. */
1232 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
1233 rtx par = XEXP (sel, 1);
1234 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
1235 int lane = INTVAL (XVECEXP (par, 0, 0));
1236 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1237 XVECEXP (par, 0, 0) = GEN_INT (lane);
1238 INSN_CODE (insn) = -1; /* Force re-recognition. */
1239 df_insn_rescan (insn);
1240
1241 if (dump_file)
1242 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
1243 }
1244
1245 /* Given OP that contains a vector direct-splat operation, adjust the index
1246 of the source lane to account for the doubleword swap. */
1247 static void
1248 adjust_splat (rtx_insn *insn)
1249 {
1250 rtx body = PATTERN (insn);
1251 rtx unspec = XEXP (body, 1);
1252 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
1253 int lane = INTVAL (XVECEXP (unspec, 0, 1));
1254 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
1255 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
1256 INSN_CODE (insn) = -1; /* Force re-recognition. */
1257 df_insn_rescan (insn);
1258
1259 if (dump_file)
1260 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
1261 }
1262
1263 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
1264 swap), reverse the order of the source operands and adjust the indices
1265 of the source lanes to account for doubleword reversal. */
1266 static void
1267 adjust_xxpermdi (rtx_insn *insn)
1268 {
1269 rtx set = PATTERN (insn);
1270 rtx select = XEXP (set, 1);
1271 rtx concat = XEXP (select, 0);
1272 rtx src0 = XEXP (concat, 0);
1273 XEXP (concat, 0) = XEXP (concat, 1);
1274 XEXP (concat, 1) = src0;
1275 rtx parallel = XEXP (select, 1);
1276 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
1277 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
1278 int new_lane0 = 3 - lane1;
1279 int new_lane1 = 3 - lane0;
1280 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
1281 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
1282 INSN_CODE (insn) = -1; /* Force re-recognition. */
1283 df_insn_rescan (insn);
1284
1285 if (dump_file)
1286 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
1287 }
1288
1289 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
1290 reverse the order of those inputs. */
1291 static void
1292 adjust_concat (rtx_insn *insn)
1293 {
1294 rtx set = PATTERN (insn);
1295 rtx concat = XEXP (set, 1);
1296 rtx src0 = XEXP (concat, 0);
1297 XEXP (concat, 0) = XEXP (concat, 1);
1298 XEXP (concat, 1) = src0;
1299 INSN_CODE (insn) = -1; /* Force re-recognition. */
1300 df_insn_rescan (insn);
1301
1302 if (dump_file)
1303 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
1304 }
1305
1306 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
1307 constant pool to reflect swapped doublewords. */
1308 static void
1309 adjust_vperm (rtx_insn *insn)
1310 {
1311 /* We previously determined that the UNSPEC_VPERM was fed by a
1312 swap of a swapping load of a TOC-relative constant pool symbol.
1313 Find the MEM in the swapping load and replace it with a MEM for
1314 the adjusted mask constant. */
1315 rtx set = PATTERN (insn);
1316 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
1317
1318 /* Find the swap. */
1319 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
1320 df_ref use;
1321 rtx_insn *swap_insn = 0;
1322 FOR_EACH_INSN_INFO_USE (use, insn_info)
1323 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
1324 {
1325 struct df_link *def_link = DF_REF_CHAIN (use);
1326 gcc_assert (def_link && !def_link->next);
1327 swap_insn = DF_REF_INSN (def_link->ref);
1328 break;
1329 }
1330 gcc_assert (swap_insn);
1331
1332 /* Find the load. */
1333 insn_info = DF_INSN_INFO_GET (swap_insn);
1334 rtx_insn *load_insn = 0;
1335 FOR_EACH_INSN_INFO_USE (use, insn_info)
1336 {
1337 struct df_link *def_link = DF_REF_CHAIN (use);
1338 gcc_assert (def_link && !def_link->next);
1339 load_insn = DF_REF_INSN (def_link->ref);
1340 break;
1341 }
1342 gcc_assert (load_insn);
1343
1344 /* Find the TOC-relative symbol access. */
1345 insn_info = DF_INSN_INFO_GET (load_insn);
1346 rtx_insn *tocrel_insn = 0;
1347 FOR_EACH_INSN_INFO_USE (use, insn_info)
1348 {
1349 struct df_link *def_link = DF_REF_CHAIN (use);
1350 gcc_assert (def_link && !def_link->next);
1351 tocrel_insn = DF_REF_INSN (def_link->ref);
1352 break;
1353 }
1354 gcc_assert (tocrel_insn);
1355
1356 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1357 to set tocrel_base; otherwise it would be unnecessary as we've
1358 already established it will return true. */
1359 rtx base, offset;
1360 const_rtx tocrel_base;
1361 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1362 /* There is an extra level of indirection for small/large code models. */
1363 if (GET_CODE (tocrel_expr) == MEM)
1364 tocrel_expr = XEXP (tocrel_expr, 0);
1365 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1366 gcc_unreachable ();
1367 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1368 rtx const_vector = get_pool_constant (base);
1369 /* With the extra indirection, get_pool_constant will produce the
1370 real constant from the reg_equal expression, so get the real
1371 constant. */
1372 if (GET_CODE (const_vector) == SYMBOL_REF)
1373 const_vector = get_pool_constant (const_vector);
1374 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1375
1376 /* Create an adjusted mask from the initial mask. */
1377 unsigned int new_mask[16], i, val;
1378 for (i = 0; i < 16; ++i) {
1379 val = INTVAL (XVECEXP (const_vector, 0, i));
1380 if (val < 16)
1381 new_mask[i] = (val + 8) % 16;
1382 else
1383 new_mask[i] = ((val + 8) % 16) + 16;
1384 }
1385
1386 /* Create a new CONST_VECTOR and a MEM that references it. */
1387 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
1388 for (i = 0; i < 16; ++i)
1389 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
1390 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
1391 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
1392 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1393 can't recognize. Force the SYMBOL_REF into a register. */
1394 if (!REG_P (XEXP (new_mem, 0))) {
1395 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1396 XEXP (new_mem, 0) = base_reg;
1397 /* Move the newly created insn ahead of the load insn. */
1398 rtx_insn *force_insn = get_last_insn ();
1399 remove_insn (force_insn);
1400 rtx_insn *before_load_insn = PREV_INSN (load_insn);
1401 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1402 df_insn_rescan (before_load_insn);
1403 df_insn_rescan (force_insn);
1404 }
1405
1406 /* Replace the MEM in the load instruction and rescan it. */
1407 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1408 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
1409 df_insn_rescan (load_insn);
1410
1411 if (dump_file)
1412 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
1413 }
1414
1415 /* The insn described by INSN_ENTRY[I] can be swapped, but only
1416 with special handling. Take care of that here. */
1417 static void
1418 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
1419 {
1420 rtx_insn *insn = insn_entry[i].insn;
1421 rtx body = PATTERN (insn);
1422
1423 switch (insn_entry[i].special_handling)
1424 {
1425 default:
1426 gcc_unreachable ();
1427 case SH_CONST_VECTOR:
1428 {
1429 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
1430 gcc_assert (GET_CODE (body) == SET);
1431 swap_const_vector_halves (&SET_SRC (body));
1432 if (dump_file)
1433 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
1434 break;
1435 }
1436 case SH_SUBREG:
1437 /* A subreg of the same size is already safe. For subregs that
1438 select a smaller portion of a reg, adjust the index for
1439 swapped doublewords. */
1440 adjust_subreg_index (body);
1441 if (dump_file)
1442 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
1443 break;
1444 case SH_NOSWAP_LD:
1445 /* Convert a non-permuting load to a permuting one. */
1446 permute_load (insn);
1447 break;
1448 case SH_NOSWAP_ST:
1449 /* Convert a non-permuting store to a permuting one. */
1450 permute_store (insn);
1451 break;
1452 case SH_EXTRACT:
1453 /* Change the lane on an extract operation. */
1454 adjust_extract (insn);
1455 break;
1456 case SH_SPLAT:
1457 /* Change the lane on a direct-splat operation. */
1458 adjust_splat (insn);
1459 break;
1460 case SH_XXPERMDI:
1461 /* Change the lanes on an XXPERMDI operation. */
1462 adjust_xxpermdi (insn);
1463 break;
1464 case SH_CONCAT:
1465 /* Reverse the order of a concatenation operation. */
1466 adjust_concat (insn);
1467 break;
1468 case SH_VPERM:
1469 /* Change the mask loaded from the constant pool for a VPERM. */
1470 adjust_vperm (insn);
1471 break;
1472 }
1473 }
1474
1475 /* Find the insn from the Ith table entry, which is known to be a
1476 register swap Y = SWAP(X). Replace it with a copy Y = X. */
1477 static void
1478 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
1479 {
1480 rtx_insn *insn = insn_entry[i].insn;
1481 rtx body = PATTERN (insn);
1482 rtx src_reg = XEXP (SET_SRC (body), 0);
1483 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
1484 rtx_insn *new_insn = emit_insn_before (copy, insn);
1485 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
1486 df_insn_rescan (new_insn);
1487
1488 if (dump_file)
1489 {
1490 unsigned int new_uid = INSN_UID (new_insn);
1491 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
1492 }
1493
1494 df_insn_delete (insn);
1495 remove_insn (insn);
1496 insn->set_deleted ();
1497 }
1498
1499 /* Make NEW_MEM_EXP's attributes and flags resemble those of
1500 ORIGINAL_MEM_EXP. */
1501 static void
1502 mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp)
1503 {
1504 RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump);
1505 RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call);
1506 RTX_FLAG (new_mem_exp, unchanging) = RTX_FLAG (original_mem_exp, unchanging);
1507 RTX_FLAG (new_mem_exp, volatil) = RTX_FLAG (original_mem_exp, volatil);
1508 RTX_FLAG (new_mem_exp, frame_related) =
1509 RTX_FLAG (original_mem_exp, frame_related);
1510
1511 /* The following fields may not be used with MEM subexpressions */
1512 RTX_FLAG (new_mem_exp, in_struct) = RTX_FLAG (original_mem_exp, in_struct);
1513 RTX_FLAG (new_mem_exp, return_val) = RTX_FLAG (original_mem_exp, return_val);
1514
1515 struct mem_attrs original_attrs = *get_mem_attrs(original_mem_exp);
1516
1517 alias_set_type set = original_attrs.alias;
1518 set_mem_alias_set (new_mem_exp, set);
1519
1520 addr_space_t addrspace = original_attrs.addrspace;
1521 set_mem_addr_space (new_mem_exp, addrspace);
1522
1523 unsigned int align = original_attrs.align;
1524 set_mem_align (new_mem_exp, align);
1525
1526 tree expr = original_attrs.expr;
1527 set_mem_expr (new_mem_exp, expr);
1528
1529 if (original_attrs.offset_known_p)
1530 {
1531 HOST_WIDE_INT offset = original_attrs.offset;
1532 set_mem_offset (new_mem_exp, offset);
1533 }
1534 else
1535 clear_mem_offset (new_mem_exp);
1536
1537 if (original_attrs.size_known_p)
1538 {
1539 HOST_WIDE_INT size = original_attrs.size;
1540 set_mem_size (new_mem_exp, size);
1541 }
1542 else
1543 clear_mem_size (new_mem_exp);
1544 }
1545
1546 /* Generate an rtx expression to represent use of the stvx insn to store
1547 the value represented by register SRC_EXP into the memory at address
1548 DEST_EXP, with vector mode MODE. */
1549 rtx
1550 rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1551 {
1552 rtx stvx;
1553
1554 if (mode == V16QImode)
1555 stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp);
1556 else if (mode == V8HImode)
1557 stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp);
1558 #ifdef HAVE_V8HFmode
1559 else if (mode == V8HFmode)
1560 stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp);
1561 #endif
1562 else if (mode == V4SImode)
1563 stvx = gen_altivec_stvx_v4si (src_exp, dest_exp);
1564 else if (mode == V4SFmode)
1565 stvx = gen_altivec_stvx_v4sf (src_exp, dest_exp);
1566 else if (mode == V2DImode)
1567 stvx = gen_altivec_stvx_v2di (src_exp, dest_exp);
1568 else if (mode == V2DFmode)
1569 stvx = gen_altivec_stvx_v2df (src_exp, dest_exp);
1570 else if (mode == V1TImode)
1571 stvx = gen_altivec_stvx_v1ti (src_exp, dest_exp);
1572 else
1573 /* KFmode, TFmode, other modes not expected in this context. */
1574 gcc_unreachable ();
1575
1576 rtx new_mem_exp = SET_DEST (PATTERN (stvx));
1577 mimic_memory_attributes_and_flags (new_mem_exp, dest_exp);
1578 return stvx;
1579 }
1580
1581 /* Given that STORE_INSN represents an aligned store-with-swap of a
1582 swapped value, replace the store with an aligned store (without
1583 swap) and replace the swap with a copy insn. */
1584 static void
1585 replace_swapped_aligned_store (swap_web_entry *insn_entry,
1586 rtx_insn *store_insn)
1587 {
1588 unsigned uid = INSN_UID (store_insn);
1589 gcc_assert (insn_entry[uid].is_swap && insn_entry[uid].is_store);
1590
1591 rtx body = PATTERN (store_insn);
1592 rtx dest_address = XEXP (SET_DEST (body), 0);
1593 rtx swap_reg = XEXP (SET_SRC (body), 0);
1594 gcc_assert (REG_P (dest_address)
1595 || rs6000_sum_of_two_registers_p (dest_address));
1596
1597 /* Find the swap instruction that provides the value to be stored by
1598 * this store-with-swap instruction. */
1599 struct df_insn_info *insn_info = DF_INSN_INFO_GET (store_insn);
1600 df_ref use;
1601 rtx_insn *swap_insn = NULL;
1602 unsigned uid2 = 0;
1603 FOR_EACH_INSN_INFO_USE (use, insn_info)
1604 {
1605 struct df_link *def_link = DF_REF_CHAIN (use);
1606
1607 /* if this is not the definition of the candidate swap register,
1608 then skip it. I am only interested in the swap insnd. */
1609 if (!rtx_equal_p (DF_REF_REG (use), swap_reg))
1610 continue;
1611
1612 /* If there is no def or the def is artifical or there are
1613 multiple defs, we should not be here. */
1614 gcc_assert (def_link && def_link->ref && !def_link->next
1615 && !DF_REF_IS_ARTIFICIAL (def_link->ref));
1616
1617 swap_insn = DF_REF_INSN (def_link->ref);
1618 uid2 = INSN_UID (swap_insn);
1619
1620 /* If this source value is not a simple swap, we should not be here. */
1621 gcc_assert (insn_entry[uid2].is_swap && !insn_entry[uid2].is_load
1622 && !insn_entry[uid2].is_store);
1623
1624 /* We've processed the use we care about, so break out of
1625 this loop. */
1626 break;
1627 }
1628
1629 /* At this point, swap_insn and uid2 represent the swap instruction
1630 that feeds the store. */
1631 gcc_assert (swap_insn);
1632 rtx set = single_set (store_insn);
1633 gcc_assert (set);
1634 rtx dest_exp = SET_DEST (set);
1635 rtx src_exp = XEXP (SET_SRC (body), 0);
1636 enum machine_mode mode = GET_MODE (dest_exp);
1637 gcc_assert (MEM_P (dest_exp));
1638 gcc_assert (MEM_ALIGN (dest_exp) >= 128);
1639
1640 /* Replace the copy with a new insn. */
1641 rtx stvx;
1642 stvx = rs6000_gen_stvx (mode, dest_exp, src_exp);
1643
1644 rtx_insn *new_insn = emit_insn_before (stvx, store_insn);
1645 rtx new_body = PATTERN (new_insn);
1646
1647 gcc_assert ((GET_CODE (new_body) == SET)
1648 && (GET_CODE (SET_DEST (new_body)) == MEM));
1649
1650 set_block_for_insn (new_insn, BLOCK_FOR_INSN (store_insn));
1651 df_insn_rescan (new_insn);
1652
1653 df_insn_delete (store_insn);
1654 remove_insn (store_insn);
1655 store_insn->set_deleted ();
1656
1657 /* Replace the swap with a copy. */
1658 uid2 = INSN_UID (swap_insn);
1659 mark_swaps_for_removal (insn_entry, uid2);
1660 replace_swap_with_copy (insn_entry, uid2);
1661 }
1662
1663 /* Generate an rtx expression to represent use of the lvx insn to load
1664 from memory SRC_EXP into register DEST_EXP with vector mode MODE. */
1665 rtx
1666 rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp)
1667 {
1668 rtx lvx;
1669
1670 if (mode == V16QImode)
1671 lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp);
1672 else if (mode == V8HImode)
1673 lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp);
1674 #ifdef HAVE_V8HFmode
1675 else if (mode == V8HFmode)
1676 lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp);
1677 #endif
1678 else if (mode == V4SImode)
1679 lvx = gen_altivec_lvx_v4si (dest_exp, src_exp);
1680 else if (mode == V4SFmode)
1681 lvx = gen_altivec_lvx_v4sf (dest_exp, src_exp);
1682 else if (mode == V2DImode)
1683 lvx = gen_altivec_lvx_v2di (dest_exp, src_exp);
1684 else if (mode == V2DFmode)
1685 lvx = gen_altivec_lvx_v2df (dest_exp, src_exp);
1686 else if (mode == V1TImode)
1687 lvx = gen_altivec_lvx_v1ti (dest_exp, src_exp);
1688 else
1689 /* KFmode, TFmode, other modes not expected in this context. */
1690 gcc_unreachable ();
1691
1692 rtx new_mem_exp = SET_SRC (PATTERN (lvx));
1693 mimic_memory_attributes_and_flags (new_mem_exp, src_exp);
1694
1695 return lvx;
1696 }
1697
1698 /* Given that SWAP_INSN represents a swap of an aligned
1699 load-with-swap, replace the load with an aligned load (without
1700 swap) and replace the swap with a copy insn. */
1701 static void
1702 replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn)
1703 {
1704 /* Find the load. */
1705 unsigned uid = INSN_UID (swap_insn);
1706 /* Only call this if quad_aligned_load_p (swap_insn). */
1707 gcc_assert (insn_entry[uid].is_swap && !insn_entry[uid].is_load);
1708 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1709
1710 /* Since insn is known to represent a swap instruction, we know it
1711 "uses" only one input variable. */
1712 df_ref use = DF_INSN_INFO_USES (insn_info);
1713
1714 /* Figure out where this input variable is defined. */
1715 struct df_link *def_link = DF_REF_CHAIN (use);
1716 gcc_assert (def_link && !def_link->next);
1717 gcc_assert (def_link && def_link->ref &&
1718 !DF_REF_IS_ARTIFICIAL (def_link->ref) && !def_link->next);
1719
1720 rtx_insn *def_insn = DF_REF_INSN (def_link->ref);
1721 unsigned uid2 = INSN_UID (def_insn);
1722
1723 /* We're expecting a load-with-swap insn. */
1724 gcc_assert (insn_entry[uid2].is_load && insn_entry[uid2].is_swap);
1725
1726 /* We expect this to be a set to memory, with source representing a
1727 swap (indicated by code VEC_SELECT). */
1728 rtx body = PATTERN (def_insn);
1729 gcc_assert ((GET_CODE (body) == SET)
1730 && (GET_CODE (SET_SRC (body)) == VEC_SELECT)
1731 && (GET_CODE (XEXP (SET_SRC (body), 0)) == MEM));
1732
1733 rtx src_exp = XEXP (SET_SRC (body), 0);
1734 enum machine_mode mode = GET_MODE (src_exp);
1735 rtx lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp);
1736
1737 rtx_insn *new_insn = emit_insn_before (lvx, def_insn);
1738 rtx new_body = PATTERN (new_insn);
1739
1740 gcc_assert ((GET_CODE (new_body) == SET)
1741 && (GET_CODE (SET_SRC (new_body)) == MEM));
1742
1743 set_block_for_insn (new_insn, BLOCK_FOR_INSN (def_insn));
1744 df_insn_rescan (new_insn);
1745
1746 df_insn_delete (def_insn);
1747 remove_insn (def_insn);
1748 def_insn->set_deleted ();
1749
1750 /* Replace the swap with a copy. */
1751 mark_swaps_for_removal (insn_entry, uid);
1752 replace_swap_with_copy (insn_entry, uid);
1753 }
1754
1755 /* Given that SWAP_INSN represents a swap of a load of a constant
1756 vector value, replace with a single instruction that loads a
1757 swapped variant of the original constant.
1758
1759 The "natural" representation of a byte array in memory is the same
1760 for big endian and little endian.
1761
1762 unsigned char byte_array[] =
1763 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f };
1764
1765 However, when loaded into a vector register, the representation
1766 depends on endian conventions.
1767
1768 In big-endian mode, the register holds:
1769
1770 MSB LSB
1771 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1772
1773 In little-endian mode, the register holds:
1774
1775 MSB LSB
1776 [ f, e, d, c, b, a, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]
1777
1778 Word arrays require different handling. Consider the word array:
1779
1780 unsigned int word_array[] =
1781 { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f };
1782
1783 The in-memory representation depends on endian configuration. The
1784 equivalent array, declared as a byte array, in memory would be:
1785
1786 unsigned char big_endian_word_array_data[] =
1787 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f }
1788
1789 unsigned char little_endian_word_array_data[] =
1790 { 3, 2, 1, 0, 7, 6, 5, 4, b, a, 9, 8, f, e, d, c }
1791
1792 In big-endian mode, the register holds:
1793
1794 MSB LSB
1795 [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f ]
1796
1797 In little-endian mode, the register holds:
1798
1799 MSB LSB
1800 [ c, d, e, f, 8, 9, a, b, 4, 5, 6, 7, 0, 1, 2, 3 ]
1801
1802
1803 Similar transformations apply to the vector of half-word and vector
1804 of double-word representations.
1805
1806 For now, don't handle vectors of quad-precision values. Just return.
1807 A better solution is to fix the code generator to emit lvx/stvx for
1808 those. */
1809 static void
1810 replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn)
1811 {
1812 /* Find the load. */
1813 struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
1814 rtx_insn *load_insn;
1815 df_ref use = DF_INSN_INFO_USES (insn_info);
1816 struct df_link *def_link = DF_REF_CHAIN (use);
1817 gcc_assert (def_link && !def_link->next);
1818
1819 load_insn = DF_REF_INSN (def_link->ref);
1820 gcc_assert (load_insn);
1821
1822 /* Find the TOC-relative symbol access. */
1823 insn_info = DF_INSN_INFO_GET (load_insn);
1824 use = DF_INSN_INFO_USES (insn_info);
1825
1826 def_link = DF_REF_CHAIN (use);
1827 gcc_assert (def_link && !def_link->next);
1828
1829 rtx_insn *tocrel_insn = DF_REF_INSN (def_link->ref);
1830 gcc_assert (tocrel_insn);
1831
1832 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
1833 to set tocrel_base; otherwise it would be unnecessary as we've
1834 already established it will return true. */
1835 rtx base, offset;
1836 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
1837 const_rtx tocrel_base;
1838
1839 /* There is an extra level of indirection for small/large code models. */
1840 if (GET_CODE (tocrel_expr) == MEM)
1841 tocrel_expr = XEXP (tocrel_expr, 0);
1842
1843 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
1844 gcc_unreachable ();
1845
1846 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
1847 rtx const_vector = get_pool_constant (base);
1848
1849 /* With the extra indirection, get_pool_constant will produce the
1850 real constant from the reg_equal expression, so get the real
1851 constant. */
1852 if (GET_CODE (const_vector) == SYMBOL_REF)
1853 const_vector = get_pool_constant (const_vector);
1854 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
1855
1856 rtx new_mem;
1857 enum machine_mode mode = GET_MODE (const_vector);
1858
1859 /* Create an adjusted constant from the original constant. */
1860 if (mode == V1TImode)
1861 /* Leave this code as is. */
1862 return;
1863 else if (mode == V16QImode)
1864 {
1865 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (16));
1866 int i;
1867
1868 for (i = 0; i < 16; i++)
1869 XVECEXP (vals, 0, ((i+8) % 16)) = XVECEXP (const_vector, 0, i);
1870 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1871 new_mem = force_const_mem (mode, new_const_vector);
1872 }
1873 else if ((mode == V8HImode)
1874 #ifdef HAVE_V8HFmode
1875 || (mode == V8HFmode)
1876 #endif
1877 )
1878 {
1879 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8));
1880 int i;
1881
1882 for (i = 0; i < 8; i++)
1883 XVECEXP (vals, 0, ((i+4) % 8)) = XVECEXP (const_vector, 0, i);
1884 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1885 new_mem = force_const_mem (mode, new_const_vector);
1886 }
1887 else if ((mode == V4SImode) || (mode == V4SFmode))
1888 {
1889 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (4));
1890 int i;
1891
1892 for (i = 0; i < 4; i++)
1893 XVECEXP (vals, 0, ((i+2) % 4)) = XVECEXP (const_vector, 0, i);
1894 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1895 new_mem = force_const_mem (mode, new_const_vector);
1896 }
1897 else if ((mode == V2DImode) || (mode == V2DFmode))
1898 {
1899 rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (2));
1900 int i;
1901
1902 for (i = 0; i < 2; i++)
1903 XVECEXP (vals, 0, ((i+1) % 2)) = XVECEXP (const_vector, 0, i);
1904 rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
1905 new_mem = force_const_mem (mode, new_const_vector);
1906 }
1907 else
1908 {
1909 /* We do not expect other modes to be constant-load-swapped. */
1910 gcc_unreachable ();
1911 }
1912
1913 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
1914 can't recognize. Force the SYMBOL_REF into a register. */
1915 if (!REG_P (XEXP (new_mem, 0))) {
1916 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
1917 XEXP (new_mem, 0) = base_reg;
1918
1919 /* Move the newly created insn ahead of the load insn. */
1920 /* The last insn is the the insn that forced new_mem into a register. */
1921 rtx_insn *force_insn = get_last_insn ();
1922 /* Remove this insn from the end of the instruction sequence. */
1923 remove_insn (force_insn);
1924 rtx_insn *before_load_insn = PREV_INSN (load_insn);
1925
1926 /* And insert this insn back into the sequence before the previous
1927 load insn so this new expression will be available when the
1928 existing load is modified to load the swapped constant. */
1929 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
1930 df_insn_rescan (before_load_insn);
1931 df_insn_rescan (force_insn);
1932 }
1933
1934 /* Replace the MEM in the load instruction and rescan it. */
1935 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
1936 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
1937 df_insn_rescan (load_insn);
1938
1939 unsigned int uid = INSN_UID (swap_insn);
1940 mark_swaps_for_removal (insn_entry, uid);
1941 replace_swap_with_copy (insn_entry, uid);
1942 }
1943
1944 /* Dump the swap table to DUMP_FILE. */
1945 static void
1946 dump_swap_insn_table (swap_web_entry *insn_entry)
1947 {
1948 int e = get_max_uid ();
1949 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
1950
1951 for (int i = 0; i < e; ++i)
1952 if (insn_entry[i].is_relevant)
1953 {
1954 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
1955 fprintf (dump_file, "%6d %6d ", i,
1956 pred_entry && pred_entry->insn
1957 ? INSN_UID (pred_entry->insn) : 0);
1958 if (insn_entry[i].is_load)
1959 fputs ("load ", dump_file);
1960 if (insn_entry[i].is_store)
1961 fputs ("store ", dump_file);
1962 if (insn_entry[i].is_swap)
1963 fputs ("swap ", dump_file);
1964 if (insn_entry[i].is_live_in)
1965 fputs ("live-in ", dump_file);
1966 if (insn_entry[i].is_live_out)
1967 fputs ("live-out ", dump_file);
1968 if (insn_entry[i].contains_subreg)
1969 fputs ("subreg ", dump_file);
1970 if (insn_entry[i].is_128_int)
1971 fputs ("int128 ", dump_file);
1972 if (insn_entry[i].is_call)
1973 fputs ("call ", dump_file);
1974 if (insn_entry[i].is_swappable)
1975 {
1976 fputs ("swappable ", dump_file);
1977 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
1978 fputs ("special:constvec ", dump_file);
1979 else if (insn_entry[i].special_handling == SH_SUBREG)
1980 fputs ("special:subreg ", dump_file);
1981 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
1982 fputs ("special:load ", dump_file);
1983 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
1984 fputs ("special:store ", dump_file);
1985 else if (insn_entry[i].special_handling == SH_EXTRACT)
1986 fputs ("special:extract ", dump_file);
1987 else if (insn_entry[i].special_handling == SH_SPLAT)
1988 fputs ("special:splat ", dump_file);
1989 else if (insn_entry[i].special_handling == SH_XXPERMDI)
1990 fputs ("special:xxpermdi ", dump_file);
1991 else if (insn_entry[i].special_handling == SH_CONCAT)
1992 fputs ("special:concat ", dump_file);
1993 else if (insn_entry[i].special_handling == SH_VPERM)
1994 fputs ("special:vperm ", dump_file);
1995 }
1996 if (insn_entry[i].web_not_optimizable)
1997 fputs ("unoptimizable ", dump_file);
1998 if (insn_entry[i].will_delete)
1999 fputs ("delete ", dump_file);
2000 fputs ("\n", dump_file);
2001 }
2002 fputs ("\n", dump_file);
2003 }
2004
2005 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
2006 Here RTX is an (& addr (const_int -16)). Always return a new copy
2007 to avoid problems with combine. */
2008 static rtx
2009 alignment_with_canonical_addr (rtx align)
2010 {
2011 rtx canon;
2012 rtx addr = XEXP (align, 0);
2013
2014 if (REG_P (addr))
2015 canon = addr;
2016
2017 else if (GET_CODE (addr) == PLUS)
2018 {
2019 rtx addrop0 = XEXP (addr, 0);
2020 rtx addrop1 = XEXP (addr, 1);
2021
2022 if (!REG_P (addrop0))
2023 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
2024
2025 if (!REG_P (addrop1))
2026 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
2027
2028 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
2029 }
2030
2031 else
2032 canon = force_reg (GET_MODE (addr), addr);
2033
2034 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
2035 }
2036
2037 /* Check whether an rtx is an alignment mask, and if so, return
2038 a fully-expanded rtx for the masking operation. */
2039 static rtx
2040 alignment_mask (rtx_insn *insn)
2041 {
2042 rtx body = PATTERN (insn);
2043
2044 if (GET_CODE (body) != SET
2045 || GET_CODE (SET_SRC (body)) != AND
2046 || !REG_P (XEXP (SET_SRC (body), 0)))
2047 return 0;
2048
2049 rtx mask = XEXP (SET_SRC (body), 1);
2050
2051 if (GET_CODE (mask) == CONST_INT)
2052 {
2053 if (INTVAL (mask) == -16)
2054 return alignment_with_canonical_addr (SET_SRC (body));
2055 else
2056 return 0;
2057 }
2058
2059 if (!REG_P (mask))
2060 return 0;
2061
2062 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2063 df_ref use;
2064 rtx real_mask = 0;
2065
2066 FOR_EACH_INSN_INFO_USE (use, insn_info)
2067 {
2068 if (!rtx_equal_p (DF_REF_REG (use), mask))
2069 continue;
2070
2071 struct df_link *def_link = DF_REF_CHAIN (use);
2072 if (!def_link || def_link->next)
2073 return 0;
2074
2075 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
2076 rtx const_body = PATTERN (const_insn);
2077 if (GET_CODE (const_body) != SET)
2078 return 0;
2079
2080 real_mask = SET_SRC (const_body);
2081
2082 if (GET_CODE (real_mask) != CONST_INT
2083 || INTVAL (real_mask) != -16)
2084 return 0;
2085 }
2086
2087 if (real_mask == 0)
2088 return 0;
2089
2090 return alignment_with_canonical_addr (SET_SRC (body));
2091 }
2092
2093 /* Given INSN that's a load or store based at BASE_REG, look for a
2094 feeding computation that aligns its address on a 16-byte boundary.
2095 Return the rtx and its containing AND_INSN. */
2096 static rtx
2097 find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
2098 {
2099 df_ref base_use;
2100 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2101 rtx and_operation = 0;
2102
2103 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
2104 {
2105 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
2106 continue;
2107
2108 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
2109 if (!base_def_link || base_def_link->next)
2110 break;
2111
2112 /* With stack-protector code enabled, and possibly in other
2113 circumstances, there may not be an associated insn for
2114 the def. */
2115 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
2116 break;
2117
2118 *and_insn = DF_REF_INSN (base_def_link->ref);
2119 and_operation = alignment_mask (*and_insn);
2120 if (and_operation != 0)
2121 break;
2122 }
2123
2124 return and_operation;
2125 }
2126
2127 struct del_info { bool replace; rtx_insn *replace_insn; };
2128
2129 /* If INSN is the load for an lvx pattern, put it in canonical form. */
2130 static void
2131 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
2132 {
2133 rtx body = PATTERN (insn);
2134 gcc_assert (GET_CODE (body) == SET
2135 && GET_CODE (SET_SRC (body)) == VEC_SELECT
2136 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
2137
2138 rtx mem = XEXP (SET_SRC (body), 0);
2139 rtx base_reg = XEXP (mem, 0);
2140
2141 rtx_insn *and_insn;
2142 rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
2143
2144 if (and_operation != 0)
2145 {
2146 df_ref def;
2147 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2148 FOR_EACH_INSN_INFO_DEF (def, insn_info)
2149 {
2150 struct df_link *link = DF_REF_CHAIN (def);
2151 if (!link || link->next)
2152 break;
2153
2154 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2155 if (!insn_is_swap_p (swap_insn)
2156 || insn_is_load_p (swap_insn)
2157 || insn_is_store_p (swap_insn))
2158 break;
2159
2160 /* Expected lvx pattern found. Change the swap to
2161 a copy, and propagate the AND operation into the
2162 load. */
2163 to_delete[INSN_UID (swap_insn)].replace = true;
2164 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2165
2166 /* However, first we must be sure that we make the
2167 base register from the AND operation available
2168 in case the register has been overwritten. Copy
2169 the base register to a new pseudo and use that
2170 as the base register of the AND operation in
2171 the new LVX instruction. */
2172 rtx and_base = XEXP (and_operation, 0);
2173 rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
2174 rtx copy = gen_rtx_SET (new_reg, and_base);
2175 rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2176 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2177 df_insn_rescan (new_insn);
2178
2179 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
2180 XEXP (and_operation, 1));
2181 SET_SRC (body) = mem;
2182 INSN_CODE (insn) = -1; /* Force re-recognition. */
2183 df_insn_rescan (insn);
2184
2185 if (dump_file)
2186 fprintf (dump_file, "lvx opportunity found at %d\n",
2187 INSN_UID (insn));
2188 }
2189 }
2190 }
2191
2192 /* If INSN is the store for an stvx pattern, put it in canonical form. */
2193 static void
2194 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
2195 {
2196 rtx body = PATTERN (insn);
2197 gcc_assert (GET_CODE (body) == SET
2198 && GET_CODE (SET_DEST (body)) == MEM
2199 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
2200 rtx mem = SET_DEST (body);
2201 rtx base_reg = XEXP (mem, 0);
2202
2203 rtx_insn *and_insn;
2204 rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
2205
2206 if (and_operation != 0)
2207 {
2208 rtx src_reg = XEXP (SET_SRC (body), 0);
2209 df_ref src_use;
2210 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2211 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
2212 {
2213 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
2214 continue;
2215
2216 struct df_link *link = DF_REF_CHAIN (src_use);
2217 if (!link || link->next)
2218 break;
2219
2220 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
2221 if (!insn_is_swap_p (swap_insn)
2222 || insn_is_load_p (swap_insn)
2223 || insn_is_store_p (swap_insn))
2224 break;
2225
2226 /* Expected stvx pattern found. Change the swap to
2227 a copy, and propagate the AND operation into the
2228 store. */
2229 to_delete[INSN_UID (swap_insn)].replace = true;
2230 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
2231
2232 /* However, first we must be sure that we make the
2233 base register from the AND operation available
2234 in case the register has been overwritten. Copy
2235 the base register to a new pseudo and use that
2236 as the base register of the AND operation in
2237 the new STVX instruction. */
2238 rtx and_base = XEXP (and_operation, 0);
2239 rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
2240 rtx copy = gen_rtx_SET (new_reg, and_base);
2241 rtx_insn *new_insn = emit_insn_after (copy, and_insn);
2242 set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
2243 df_insn_rescan (new_insn);
2244
2245 XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
2246 XEXP (and_operation, 1));
2247 SET_SRC (body) = src_reg;
2248 INSN_CODE (insn) = -1; /* Force re-recognition. */
2249 df_insn_rescan (insn);
2250
2251 if (dump_file)
2252 fprintf (dump_file, "stvx opportunity found at %d\n",
2253 INSN_UID (insn));
2254 }
2255 }
2256 }
2257
2258 /* Look for patterns created from builtin lvx and stvx calls, and
2259 canonicalize them to be properly recognized as such. */
2260 static void
2261 recombine_lvx_stvx_patterns (function *fun)
2262 {
2263 int i;
2264 basic_block bb;
2265 rtx_insn *insn;
2266
2267 int num_insns = get_max_uid ();
2268 del_info *to_delete = XCNEWVEC (del_info, num_insns);
2269
2270 FOR_ALL_BB_FN (bb, fun)
2271 FOR_BB_INSNS (bb, insn)
2272 {
2273 if (!NONDEBUG_INSN_P (insn))
2274 continue;
2275
2276 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
2277 recombine_lvx_pattern (insn, to_delete);
2278 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
2279 recombine_stvx_pattern (insn, to_delete);
2280 }
2281
2282 /* Turning swaps into copies is delayed until now, to avoid problems
2283 with deleting instructions during the insn walk. */
2284 for (i = 0; i < num_insns; i++)
2285 if (to_delete[i].replace)
2286 {
2287 rtx swap_body = PATTERN (to_delete[i].replace_insn);
2288 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
2289 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
2290 rtx_insn *new_insn = emit_insn_before (copy,
2291 to_delete[i].replace_insn);
2292 set_block_for_insn (new_insn,
2293 BLOCK_FOR_INSN (to_delete[i].replace_insn));
2294 df_insn_rescan (new_insn);
2295 df_insn_delete (to_delete[i].replace_insn);
2296 remove_insn (to_delete[i].replace_insn);
2297 to_delete[i].replace_insn->set_deleted ();
2298 }
2299
2300 free (to_delete);
2301 }
2302
2303 /* Main entry point for this pass. */
2304 unsigned int
2305 rs6000_analyze_swaps (function *fun)
2306 {
2307 swap_web_entry *insn_entry;
2308 basic_block bb;
2309 rtx_insn *insn, *curr_insn = 0;
2310
2311 /* Dataflow analysis for use-def chains. */
2312 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2313 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2314 df_analyze ();
2315 df_set_flags (DF_DEFER_INSN_RESCAN);
2316
2317 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
2318 recombine_lvx_stvx_patterns (fun);
2319 df_process_deferred_rescans ();
2320
2321 /* Allocate structure to represent webs of insns. */
2322 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2323
2324 /* Walk the insns to gather basic data. */
2325 FOR_ALL_BB_FN (bb, fun)
2326 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2327 {
2328 unsigned int uid = INSN_UID (insn);
2329 if (NONDEBUG_INSN_P (insn))
2330 {
2331 insn_entry[uid].insn = insn;
2332
2333 if (GET_CODE (insn) == CALL_INSN)
2334 insn_entry[uid].is_call = 1;
2335
2336 /* Walk the uses and defs to see if we mention vector regs.
2337 Record any constraints on optimization of such mentions. */
2338 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2339 df_ref mention;
2340 FOR_EACH_INSN_INFO_USE (mention, insn_info)
2341 {
2342 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2343 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2344
2345 /* If a use gets its value from a call insn, it will be
2346 a hard register and will look like (reg:V4SI 3 3).
2347 The df analysis creates two mentions for GPR3 and GPR4,
2348 both DImode. We must recognize this and treat it as a
2349 vector mention to ensure the call is unioned with this
2350 use. */
2351 if (mode == DImode && DF_REF_INSN_INFO (mention))
2352 {
2353 rtx feeder = DF_REF_INSN (mention);
2354 /* FIXME: It is pretty hard to get from the df mention
2355 to the mode of the use in the insn. We arbitrarily
2356 pick a vector mode here, even though the use might
2357 be a real DImode. We can be too conservative
2358 (create a web larger than necessary) because of
2359 this, so consider eventually fixing this. */
2360 if (GET_CODE (feeder) == CALL_INSN)
2361 mode = V4SImode;
2362 }
2363
2364 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2365 {
2366 insn_entry[uid].is_relevant = 1;
2367 if (mode == TImode || mode == V1TImode
2368 || FLOAT128_VECTOR_P (mode))
2369 insn_entry[uid].is_128_int = 1;
2370 if (DF_REF_INSN_INFO (mention))
2371 insn_entry[uid].contains_subreg
2372 = !rtx_equal_p (DF_REF_REG (mention),
2373 DF_REF_REAL_REG (mention));
2374 union_defs (insn_entry, insn, mention);
2375 }
2376 }
2377 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
2378 {
2379 /* We use DF_REF_REAL_REG here to get inside any subregs. */
2380 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
2381
2382 /* If we're loading up a hard vector register for a call,
2383 it looks like (set (reg:V4SI 9 9) (...)). The df
2384 analysis creates two mentions for GPR9 and GPR10, both
2385 DImode. So relying on the mode from the mentions
2386 isn't sufficient to ensure we union the call into the
2387 web with the parameter setup code. */
2388 if (mode == DImode && GET_CODE (insn) == SET
2389 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
2390 mode = GET_MODE (SET_DEST (insn));
2391
2392 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
2393 {
2394 insn_entry[uid].is_relevant = 1;
2395 if (mode == TImode || mode == V1TImode
2396 || FLOAT128_VECTOR_P (mode))
2397 insn_entry[uid].is_128_int = 1;
2398 if (DF_REF_INSN_INFO (mention))
2399 insn_entry[uid].contains_subreg
2400 = !rtx_equal_p (DF_REF_REG (mention),
2401 DF_REF_REAL_REG (mention));
2402 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
2403 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
2404 insn_entry[uid].is_live_out = 1;
2405 union_uses (insn_entry, insn, mention);
2406 }
2407 }
2408
2409 if (insn_entry[uid].is_relevant)
2410 {
2411 /* Determine if this is a load or store. */
2412 insn_entry[uid].is_load = insn_is_load_p (insn);
2413 insn_entry[uid].is_store = insn_is_store_p (insn);
2414
2415 /* Determine if this is a doubleword swap. If not,
2416 determine whether it can legally be swapped. */
2417 if (insn_is_swap_p (insn))
2418 insn_entry[uid].is_swap = 1;
2419 else
2420 {
2421 unsigned int special = SH_NONE;
2422 insn_entry[uid].is_swappable
2423 = insn_is_swappable_p (insn_entry, insn, &special);
2424 if (special != SH_NONE && insn_entry[uid].contains_subreg)
2425 insn_entry[uid].is_swappable = 0;
2426 else if (special != SH_NONE)
2427 insn_entry[uid].special_handling = special;
2428 else if (insn_entry[uid].contains_subreg)
2429 insn_entry[uid].special_handling = SH_SUBREG;
2430 }
2431 }
2432 }
2433 }
2434
2435 if (dump_file)
2436 {
2437 fprintf (dump_file, "\nSwap insn entry table when first built\n");
2438 dump_swap_insn_table (insn_entry);
2439 }
2440
2441 /* Record unoptimizable webs. */
2442 unsigned e = get_max_uid (), i;
2443 for (i = 0; i < e; ++i)
2444 {
2445 if (!insn_entry[i].is_relevant)
2446 continue;
2447
2448 swap_web_entry *root
2449 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
2450
2451 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
2452 || (insn_entry[i].contains_subreg
2453 && insn_entry[i].special_handling != SH_SUBREG)
2454 || insn_entry[i].is_128_int || insn_entry[i].is_call
2455 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
2456 root->web_not_optimizable = 1;
2457
2458 /* If we have loads or stores that aren't permuting then the
2459 optimization isn't appropriate. */
2460 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
2461 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
2462 root->web_not_optimizable = 1;
2463
2464 /* If we have a swap that is both fed by a permuting load
2465 and a feeder of a permuting store, then the optimization
2466 isn't appropriate. (Consider vec_xl followed by vec_xst_be.) */
2467 else if (insn_entry[i].is_swap && !insn_entry[i].is_load
2468 && !insn_entry[i].is_store
2469 && swap_feeds_both_load_and_store (&insn_entry[i]))
2470 root->web_not_optimizable = 1;
2471
2472 /* If we have permuting loads or stores that are not accompanied
2473 by a register swap, the optimization isn't appropriate. */
2474 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
2475 {
2476 rtx insn = insn_entry[i].insn;
2477 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2478 df_ref def;
2479
2480 FOR_EACH_INSN_INFO_DEF (def, insn_info)
2481 {
2482 struct df_link *link = DF_REF_CHAIN (def);
2483
2484 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
2485 {
2486 root->web_not_optimizable = 1;
2487 break;
2488 }
2489 }
2490 }
2491 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
2492 {
2493 rtx insn = insn_entry[i].insn;
2494 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
2495 df_ref use;
2496
2497 FOR_EACH_INSN_INFO_USE (use, insn_info)
2498 {
2499 struct df_link *link = DF_REF_CHAIN (use);
2500
2501 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
2502 {
2503 root->web_not_optimizable = 1;
2504 break;
2505 }
2506 }
2507 }
2508 }
2509
2510 if (dump_file)
2511 {
2512 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
2513 dump_swap_insn_table (insn_entry);
2514 }
2515
2516 /* For each load and store in an optimizable web (which implies
2517 the loads and stores are permuting), find the associated
2518 register swaps and mark them for removal. Due to various
2519 optimizations we may mark the same swap more than once. Also
2520 perform special handling for swappable insns that require it. */
2521 for (i = 0; i < e; ++i)
2522 if ((insn_entry[i].is_load || insn_entry[i].is_store)
2523 && insn_entry[i].is_swap)
2524 {
2525 swap_web_entry* root_entry
2526 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2527 if (!root_entry->web_not_optimizable)
2528 mark_swaps_for_removal (insn_entry, i);
2529 }
2530 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
2531 {
2532 swap_web_entry* root_entry
2533 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
2534 if (!root_entry->web_not_optimizable)
2535 handle_special_swappables (insn_entry, i);
2536 }
2537
2538 /* Now delete the swaps marked for removal. */
2539 for (i = 0; i < e; ++i)
2540 if (insn_entry[i].will_delete)
2541 replace_swap_with_copy (insn_entry, i);
2542
2543 /* Clean up. */
2544 free (insn_entry);
2545
2546 /* Use a second pass over rtl to detect that certain vector values
2547 fetched from or stored to memory on quad-word aligned addresses
2548 can use lvx/stvx without swaps. */
2549
2550 /* First, rebuild ud chains. */
2551 df_remove_problem (df_chain);
2552 df_process_deferred_rescans ();
2553 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2554 df_chain_add_problem (DF_UD_CHAIN);
2555 df_analyze ();
2556
2557 swap_web_entry *pass2_insn_entry;
2558 pass2_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2559
2560 /* Walk the insns to gather basic data. */
2561 FOR_ALL_BB_FN (bb, fun)
2562 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2563 {
2564 unsigned int uid = INSN_UID (insn);
2565 if (NONDEBUG_INSN_P (insn))
2566 {
2567 pass2_insn_entry[uid].insn = insn;
2568
2569 pass2_insn_entry[uid].is_relevant = 1;
2570 pass2_insn_entry[uid].is_load = insn_is_load_p (insn);
2571 pass2_insn_entry[uid].is_store = insn_is_store_p (insn);
2572
2573 /* Determine if this is a doubleword swap. If not,
2574 determine whether it can legally be swapped. */
2575 if (insn_is_swap_p (insn))
2576 pass2_insn_entry[uid].is_swap = 1;
2577 }
2578 }
2579
2580 e = get_max_uid ();
2581 for (unsigned i = 0; i < e; ++i)
2582 if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load
2583 && !pass2_insn_entry[i].is_store)
2584 {
2585 /* Replace swap of aligned load-swap with aligned unswapped
2586 load. */
2587 rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2588 if (quad_aligned_load_p (pass2_insn_entry, rtx_insn))
2589 replace_swapped_aligned_load (pass2_insn_entry, rtx_insn);
2590 }
2591 else if (pass2_insn_entry[i].is_swap && pass2_insn_entry[i].is_store)
2592 {
2593 /* Replace aligned store-swap of swapped value with aligned
2594 unswapped store. */
2595 rtx_insn *rtx_insn = pass2_insn_entry[i].insn;
2596 if (quad_aligned_store_p (pass2_insn_entry, rtx_insn))
2597 replace_swapped_aligned_store (pass2_insn_entry, rtx_insn);
2598 }
2599
2600 /* Clean up. */
2601 free (pass2_insn_entry);
2602
2603 /* Use a third pass over rtl to replace swap(load(vector constant))
2604 with load(swapped vector constant). */
2605
2606 /* First, rebuild ud chains. */
2607 df_remove_problem (df_chain);
2608 df_process_deferred_rescans ();
2609 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
2610 df_chain_add_problem (DF_UD_CHAIN);
2611 df_analyze ();
2612
2613 swap_web_entry *pass3_insn_entry;
2614 pass3_insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
2615
2616 /* Walk the insns to gather basic data. */
2617 FOR_ALL_BB_FN (bb, fun)
2618 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
2619 {
2620 unsigned int uid = INSN_UID (insn);
2621 if (NONDEBUG_INSN_P (insn))
2622 {
2623 pass3_insn_entry[uid].insn = insn;
2624
2625 pass3_insn_entry[uid].is_relevant = 1;
2626 pass3_insn_entry[uid].is_load = insn_is_load_p (insn);
2627 pass3_insn_entry[uid].is_store = insn_is_store_p (insn);
2628
2629 /* Determine if this is a doubleword swap. If not,
2630 determine whether it can legally be swapped. */
2631 if (insn_is_swap_p (insn))
2632 pass3_insn_entry[uid].is_swap = 1;
2633 }
2634 }
2635
2636 e = get_max_uid ();
2637 for (unsigned i = 0; i < e; ++i)
2638 if (pass3_insn_entry[i].is_swap && !pass3_insn_entry[i].is_load
2639 && !pass3_insn_entry[i].is_store)
2640 {
2641 insn = pass3_insn_entry[i].insn;
2642 if (const_load_sequence_p (pass3_insn_entry, insn))
2643 replace_swapped_load_constant (pass3_insn_entry, insn);
2644 }
2645
2646 /* Clean up. */
2647 free (pass3_insn_entry);
2648 return 0;
2649 }
2650
2651 const pass_data pass_data_analyze_swaps =
2652 {
2653 RTL_PASS, /* type */
2654 "swaps", /* name */
2655 OPTGROUP_NONE, /* optinfo_flags */
2656 TV_NONE, /* tv_id */
2657 0, /* properties_required */
2658 0, /* properties_provided */
2659 0, /* properties_destroyed */
2660 0, /* todo_flags_start */
2661 TODO_df_finish, /* todo_flags_finish */
2662 };
2663
2664 class pass_analyze_swaps : public rtl_opt_pass
2665 {
2666 public:
2667 pass_analyze_swaps(gcc::context *ctxt)
2668 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
2669 {}
2670
2671 /* opt_pass methods: */
2672 virtual bool gate (function *)
2673 {
2674 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
2675 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
2676 }
2677
2678 virtual unsigned int execute (function *fun)
2679 {
2680 return rs6000_analyze_swaps (fun);
2681 }
2682
2683 opt_pass *clone ()
2684 {
2685 return new pass_analyze_swaps (m_ctxt);
2686 }
2687
2688 }; // class pass_analyze_swaps
2689
2690 rtl_opt_pass *
2691 make_pass_analyze_swaps (gcc::context *ctxt)
2692 {
2693 return new pass_analyze_swaps (ctxt);
2694 }
2695