]>
Commit | Line | Data |
---|---|---|
a98824ac SP |
1 | /* Tag Collision Avoidance pass for Falkor. |
2 | Copyright (C) 2018 Free Software Foundation, Inc. | |
3 | ||
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #define IN_TARGET_CODE 1 | |
21 | ||
22 | #include "config.h" | |
23 | #define INCLUDE_LIST | |
24 | #include "system.h" | |
25 | #include "coretypes.h" | |
26 | #include "backend.h" | |
27 | #include "target.h" | |
28 | #include "rtl.h" | |
29 | #include "tree.h" | |
30 | #include "tree-pass.h" | |
31 | #include "aarch64-protos.h" | |
32 | #include "hash-map.h" | |
33 | #include "cfgloop.h" | |
34 | #include "cfgrtl.h" | |
35 | #include "rtl-iter.h" | |
36 | #include "df.h" | |
37 | #include "memmodel.h" | |
38 | #include "optabs.h" | |
39 | #include "regs.h" | |
40 | #include "recog.h" | |
41 | #include "regrename.h" | |
42 | #include "print-rtl.h" | |
43 | ||
44 | /* The Falkor hardware prefetching system uses the encoding of the registers | |
45 | and offsets of loads to decide which of the multiple hardware prefetchers to | |
46 | assign the load to. This has the positive effect of accelerating prefetches | |
47 | when all related loads with uniform strides are assigned to the same | |
48 | prefetcher unit. The down side is that because of the way the assignment | |
49 | works, multiple unrelated loads may end up on the same prefetch unit, thus | |
50 | causing the unit to bounce between different sets of addresses and never | |
51 | train correctly. The point of this pass is to avoid such collisions so that | |
52 | unrelated loads are spread out to different prefetchers. It also makes a | |
53 | rudimentary attempt to ensure that related loads with the same tags don't | |
54 | get moved out unnecessarily. | |
55 | ||
56 | Perhaps a future enhancement would be to make a more concerted attempt to | |
57 | get related loads under the same tag. See the memcpy/memset implementation | |
58 | for falkor in glibc to understand the kind of impact this can have on | |
59 | falkor. | |
60 | ||
61 | The assignment of loads is based on a tag that is computed from the encoding | |
62 | of the first destination register (the only destination in case of LDR), the | |
63 | base register and the offset (either the register or the immediate value, as | |
64 | encoded in the instruction). This is what the 14 bit tag looks like: | |
65 | ||
66 | |<- 6 bits ->|<- 4b ->|<- 4b ->| | |
67 | -------------------------------- | |
68 | | OFFSET | SRC | DST | | |
69 | -------------------------------- | |
70 | ||
71 | For all cases, the SRC and DST are the 4 LSB of the encoding of the register | |
72 | in the instruction. Offset computation is more involved and is as follows: | |
73 | ||
74 | - For register offset addressing: 4 LSB of the offset register with the MSB | |
75 | of the 6 bits set to 1. | |
76 | ||
77 | - For immediate offset: 4 LSB of the encoded immediate offset. The encoding | |
78 | depends on the width of the load and is expressed as multiples of the | |
79 | width. | |
80 | ||
81 | - For loads with update: 4 LSB of the offset. The encoding here is the | |
82 | exact number by which the base is offset and incremented. | |
83 | ||
84 | Based on the above it is clear that registers 0 and 16 will result in | |
85 | collisions, 1 and 17 and so on. This pass detects such collisions within a | |
86 | def/use chain of the source register in a loop and tries to resolve the | |
87 | collision by renaming one of the destination registers. */ | |
88 | ||
89 | /* Get the destination part of the tag. */ | |
90 | #define TAG_GET_DEST(__tag) ((__tag) & 0xf) | |
91 | ||
92 | /* Get the tag with the destination part updated. */ | |
93 | #define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf)) | |
94 | ||
95 | #define MAX_PREFETCH_STRIDE 2048 | |
96 | ||
97 | /* The instruction information structure. This is used to cache information | |
98 | about the INSN that we derive when traversing through all of the insns in | |
99 | loops. */ | |
100 | class tag_insn_info | |
101 | { | |
102 | public: | |
103 | rtx_insn *insn; | |
104 | rtx dest; | |
105 | rtx base; | |
106 | rtx offset; | |
107 | bool writeback; | |
108 | bool ldp; | |
109 | ||
110 | tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p) | |
111 | : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p) | |
112 | {} | |
113 | ||
114 | /* Compute the tag based on BASE, DEST and OFFSET of the load. */ | |
115 | unsigned tag () | |
116 | { | |
117 | unsigned int_offset = 0; | |
118 | rtx offset = this->offset; | |
119 | unsigned dest = REGNO (this->dest); | |
120 | unsigned base = REGNO (this->base); | |
121 | machine_mode dest_mode = GET_MODE (this->dest); | |
122 | ||
123 | /* Falkor does not support SVE; GET_LOAD_INFO ensures that the | |
124 | destination mode is constant here. */ | |
125 | unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant (); | |
126 | ||
127 | /* For loads of larger than 16 bytes, the DEST part of the tag is 0. */ | |
128 | if ((dest_mode_size << this->ldp) > 16) | |
129 | dest = 0; | |
130 | ||
131 | if (offset && REG_P (offset)) | |
132 | int_offset = (1 << 5) | REGNO (offset); | |
133 | else if (offset && CONST_INT_P (offset)) | |
134 | { | |
135 | int_offset = INTVAL (offset); | |
136 | int_offset /= dest_mode_size; | |
137 | if (!this->writeback) | |
138 | int_offset >>= 2; | |
139 | } | |
140 | return ((dest & 0xf) | |
141 | | ((base & 0xf) << 4) | |
142 | | ((int_offset & 0x3f) << 8)); | |
143 | } | |
144 | }; | |
145 | ||
146 | /* Hash map to traverse and process instructions with colliding tags. */ | |
147 | typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t; | |
148 | ||
149 | /* Vector of instructions with colliding tags. */ | |
150 | typedef auto_vec <tag_insn_info *> insn_info_list_t; | |
151 | ||
152 | /* Pair of instruction information and unavailable register set to pass to | |
153 | CHECK_COLLIDING_TAGS. */ | |
154 | typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t; | |
155 | ||
156 | ||
157 | /* Callback to free all tag_insn_info objects. */ | |
158 | bool | |
159 | free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v, | |
160 | void *arg ATTRIBUTE_UNUSED) | |
161 | { | |
162 | while (v->length () > 0) | |
163 | delete v->pop (); | |
164 | ||
165 | return true; | |
166 | } | |
167 | ||
168 | ||
169 | /* Add all aliases of the register to the unavailable register set. REG is the | |
170 | smallest register number that can then be used to reference its aliases. | |
171 | UNAVAILABLE is the hard register set to add the ignored register numbers to | |
172 | and MODE is the mode in which the registers would have been used. */ | |
173 | static void | |
174 | ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg) | |
175 | { | |
176 | add_to_hard_reg_set (unavailable, mode, reg); | |
177 | add_to_hard_reg_set (unavailable, mode, reg + 16); | |
178 | add_to_hard_reg_set (unavailable, mode, reg + 32); | |
179 | add_to_hard_reg_set (unavailable, mode, reg + 48); | |
180 | } | |
181 | ||
182 | ||
183 | /* Callback to check which destination registers are unavailable to us for | |
184 | renaming because of the base and offset colliding. This is a callback that | |
185 | gets called for every name value pair (T, V) in the TAG_MAP. The ARG is an | |
186 | std::pair of the tag_insn_info of the original insn and the hard register | |
187 | set UNAVAILABLE that is used to record hard register numbers that cannot be | |
188 | used for the renaming. This always returns true since we want to traverse | |
189 | through the entire TAG_MAP. */ | |
190 | bool | |
191 | check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg) | |
192 | { | |
193 | HARD_REG_SET *unavailable = arg->second; | |
194 | unsigned orig_tag = arg->first->tag (); | |
195 | unsigned tag = INTVAL (t); | |
196 | machine_mode mode = GET_MODE (arg->first->dest); | |
197 | ||
198 | /* Can't collide with emptiness. */ | |
199 | if (v.length () == 0) | |
200 | return true; | |
201 | ||
202 | /* Drop all aliased destination registers that result in the same | |
203 | tag. It is not necessary to drop all of them but we do anyway | |
204 | because it is quicker than checking ranges. */ | |
205 | if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0)) | |
206 | ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag)); | |
207 | ||
208 | return true; | |
209 | } | |
210 | ||
211 | ||
212 | /* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for | |
213 | renaming. INSN_INFO is the original insn, TAG_MAP is the map of the list of | |
214 | insns indexed by their tags, HEAD is the def/use chain head of the | |
215 | destination register of the original insn. The routine returns the super | |
216 | class of register classes that may be used during the renaming. */ | |
217 | static enum reg_class | |
218 | init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head, | |
219 | HARD_REG_SET *unavailable) | |
220 | { | |
221 | unsigned dest = head->regno; | |
222 | enum reg_class super_class = NO_REGS; | |
223 | machine_mode mode = GET_MODE (insn_info->dest); | |
224 | ||
225 | CLEAR_HARD_REG_SET (*unavailable); | |
226 | ||
227 | for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use) | |
228 | { | |
229 | if (DEBUG_INSN_P (tmp->insn)) | |
230 | continue; | |
231 | ||
232 | IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]); | |
233 | super_class = reg_class_superunion[(int) super_class][(int) tmp->cl]; | |
234 | } | |
235 | ||
236 | for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++) | |
237 | if (fixed_regs[i] || global_regs[i]) | |
238 | add_to_hard_reg_set (unavailable, mode, i); | |
239 | ||
240 | arg_pair_t arg = arg_pair_t (insn_info, unavailable); | |
241 | ||
242 | /* Exclude all registers that would lead to collisions with other loads. */ | |
243 | tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg); | |
244 | ||
245 | /* Finally, also ignore all aliases of the current reg. */ | |
246 | ignore_all_aliases (unavailable, mode, dest & 0xf); | |
247 | ||
248 | return super_class; | |
249 | } | |
250 | ||
251 | ||
252 | /* Find a suitable and available register and rename the chain of occurrences | |
253 | of the register defined in the def/use chain headed by HEAD in which INSN | |
254 | exists. CUR_TAG, TAGS and TAG_MAP are used to determine which registers are | |
255 | unavailable due to a potential collision due to the rename. The routine | |
256 | returns the register number in case of a successful rename or -1 to indicate | |
257 | failure. */ | |
258 | static int | |
259 | rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head) | |
260 | { | |
261 | unsigned dest_regno = head->regno; | |
262 | ||
263 | if (head->cannot_rename || head->renamed) | |
264 | return -1; | |
265 | ||
266 | HARD_REG_SET unavailable; | |
267 | ||
268 | enum reg_class super_class = init_unavailable (insn_info, tag_map, head, | |
269 | &unavailable); | |
270 | ||
271 | unsigned new_regno = find_rename_reg (head, super_class, &unavailable, | |
272 | dest_regno, false); | |
273 | ||
274 | /* Attempt to rename as long as regrename doesn't just throw the same | |
275 | register at us. */ | |
276 | if (new_regno != dest_regno && regrename_do_replace (head, new_regno)) | |
277 | { | |
278 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
279 | fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n", | |
280 | INSN_UID (insn_info->insn), dest_regno, new_regno); | |
281 | ||
282 | return new_regno; | |
283 | } | |
284 | ||
285 | return -1; | |
286 | } | |
287 | ||
288 | ||
289 | /* Return true if REGNO is not safe to rename. */ | |
290 | static bool | |
291 | unsafe_rename_p (unsigned regno) | |
292 | { | |
293 | /* Avoid renaming registers used for argument passing and return value. In | |
294 | future we could be a little less conservative and walk through the basic | |
295 | blocks to see if there are any call or syscall sites. */ | |
296 | if (regno <= R8_REGNUM | |
297 | || (regno >= V0_REGNUM && regno < V8_REGNUM)) | |
298 | return true; | |
299 | ||
300 | /* Don't attempt to rename registers that may have specific meanings. */ | |
301 | switch (regno) | |
302 | { | |
303 | case LR_REGNUM: | |
304 | case HARD_FRAME_POINTER_REGNUM: | |
305 | case FRAME_POINTER_REGNUM: | |
306 | case STACK_POINTER_REGNUM: | |
307 | return true; | |
308 | } | |
309 | ||
310 | return false; | |
311 | } | |
312 | ||
313 | ||
314 | /* Go through the def/use chains for the register and find the chain for this | |
315 | insn to rename. The function returns the hard register number in case of a | |
316 | successful rename and -1 otherwise. */ | |
317 | static int | |
318 | rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map) | |
319 | { | |
320 | struct du_chain *chain = NULL; | |
321 | du_head_p head = NULL; | |
322 | int i; | |
323 | ||
324 | unsigned dest_regno = REGNO (insn_info->dest); | |
325 | ||
326 | if (unsafe_rename_p (dest_regno)) | |
327 | return -1; | |
328 | ||
329 | /* Search the chain where this instruction is (one of) the root. */ | |
330 | rtx_insn *insn = insn_info->insn; | |
331 | operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info; | |
332 | ||
333 | for (i = 0; i < dest_op_info->n_chains; i++) | |
334 | { | |
335 | /* The register tracked by this chain does not match the | |
336 | destination register of insn. */ | |
337 | if (dest_op_info->heads[i]->regno != dest_regno) | |
338 | continue; | |
339 | ||
340 | head = dest_op_info->heads[i]; | |
341 | /* The chain was merged in another, find the new head. */ | |
342 | if (!head->first) | |
343 | head = regrename_chain_from_id (head->id); | |
344 | ||
345 | for (chain = head->first; chain; chain = chain->next_use) | |
346 | /* Found the insn in the chain, so try renaming the register in this | |
347 | chain. */ | |
348 | if (chain->insn == insn) | |
349 | return rename_chain (insn_info, tag_map, head); | |
350 | } | |
351 | ||
352 | return -1; | |
353 | } | |
354 | ||
355 | ||
356 | /* Flag to track if the map has changed. */ | |
357 | static bool map_changed = false; | |
358 | ||
359 | /* The actual reallocation logic. For each vector of collisions V, try to | |
360 | resolve the collision by attempting to rename the destination register of | |
361 | all but one of the loads. This is a callback that is invoked for each | |
362 | name-value pair (T, V) in TAG_MAP. The function returns true whenever it | |
363 | returns unchanged and false otherwise to halt traversal. */ | |
364 | bool | |
365 | avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map) | |
366 | { | |
367 | /* We need at least two loads to cause a tag collision, return unchanged. */ | |
368 | if (v->length () < 2) | |
369 | return true; | |
370 | ||
371 | tag_insn_info *vec_start = v->pop (); | |
372 | tag_insn_info *insn_info = vec_start; | |
373 | ||
374 | /* Try to rename at least one register to reduce the collision. If we | |
375 | iterate all the way through, we end up dropping one of the loads from the | |
376 | list. This is fine because we want at most one element to ensure that a | |
377 | subsequent rename attempt does not end up worsening the collision. */ | |
378 | do | |
379 | { | |
380 | int new_regno; | |
381 | ||
382 | if ((new_regno = rename_dest (insn_info, *tag_map)) != -1) | |
383 | { | |
384 | rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno)); | |
385 | ||
386 | tag_map->get_or_insert (new_tag).safe_push (insn_info); | |
387 | df_set_regs_ever_live (new_regno, true); | |
388 | map_changed = true; | |
389 | return false; | |
390 | } | |
391 | ||
392 | v->safe_insert (0, insn_info); | |
393 | insn_info = v->pop (); | |
394 | } | |
395 | while (insn_info != vec_start); | |
396 | ||
397 | if (dump_file) | |
398 | fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>", | |
399 | INSN_UID (insn_info->insn)); | |
400 | ||
401 | /* Drop the last element and move on to the next tag. */ | |
402 | delete insn_info; | |
403 | return true; | |
404 | } | |
405 | ||
406 | ||
407 | /* For each set of collisions, attempt to rename the registers or insert a move | |
408 | to avoid the collision. We repeatedly traverse through TAG_MAP using | |
409 | AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a | |
410 | full traversal results in no change in the map. */ | |
411 | static void | |
412 | avoid_collisions (tag_map_t &tag_map) | |
413 | { | |
414 | do | |
415 | { | |
416 | map_changed = false; | |
417 | tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map); | |
418 | } | |
419 | while (map_changed); | |
420 | } | |
421 | \f | |
422 | ||
423 | ||
424 | /* Find the use def chain in which INSN exists and then see if there is a | |
425 | definition inside the loop and outside it. We use this as a simple | |
426 | approximation to determine whether the base register is an IV. The basic | |
427 | idea is to find INSN in the use-def chains for its base register and find | |
428 | all definitions that reach it. Of all these definitions, there should be at | |
429 | least one definition that is a simple addition of a constant value, either | |
430 | as a binary operation or a pre or post update. | |
431 | ||
432 | The function returns true if the base register is estimated to be an IV. */ | |
433 | static bool | |
434 | iv_p (rtx_insn *insn, rtx reg, struct loop *loop) | |
435 | { | |
436 | df_ref ause; | |
437 | unsigned regno = REGNO (reg); | |
438 | ||
439 | /* Ignore loads from the stack. */ | |
440 | if (regno == SP_REGNUM) | |
441 | return false; | |
442 | ||
443 | for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause)) | |
444 | { | |
445 | if (!DF_REF_INSN_INFO (ause) | |
446 | || !NONDEBUG_INSN_P (DF_REF_INSN (ause))) | |
447 | continue; | |
448 | ||
449 | if (insn != DF_REF_INSN (ause)) | |
450 | continue; | |
451 | ||
452 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
453 | df_ref def_rec; | |
454 | ||
455 | FOR_EACH_INSN_INFO_DEF (def_rec, insn_info) | |
456 | { | |
457 | rtx_insn *insn = DF_REF_INSN (def_rec); | |
458 | basic_block bb = BLOCK_FOR_INSN (insn); | |
459 | ||
460 | if (dominated_by_p (CDI_DOMINATORS, bb, loop->header) | |
461 | && bb->loop_father == loop) | |
462 | { | |
463 | if (recog_memoized (insn) < 0) | |
464 | continue; | |
465 | ||
466 | rtx pat = PATTERN (insn); | |
467 | ||
468 | /* Prefetch or clobber; unlikely to be a constant stride. The | |
469 | falkor software prefetcher tuning is pretty conservative, so | |
470 | its presence indicates that the access pattern is probably | |
471 | strided but most likely with an unknown stride size or a | |
472 | stride size that is quite large. */ | |
473 | if (GET_CODE (pat) != SET) | |
474 | continue; | |
475 | ||
476 | rtx x = SET_SRC (pat); | |
477 | if (GET_CODE (x) == ZERO_EXTRACT | |
478 | || GET_CODE (x) == ZERO_EXTEND | |
479 | || GET_CODE (x) == SIGN_EXTEND) | |
480 | x = XEXP (x, 0); | |
481 | ||
482 | /* Loading the value from memory; unlikely to be a constant | |
483 | stride. */ | |
484 | if (MEM_P (x)) | |
485 | continue; | |
486 | ||
487 | /* An increment or decrement by a constant MODE_SIZE amount or | |
488 | the result of a binary expression is likely to be an IV. */ | |
489 | if (GET_CODE (x) == POST_INC | |
490 | || GET_CODE (x) == POST_DEC | |
491 | || GET_CODE (x) == PRE_INC | |
492 | || GET_CODE (x) == PRE_DEC) | |
493 | return true; | |
494 | else if (BINARY_P (x) | |
495 | && (CONST_INT_P (XEXP (x, 0)) | |
496 | || CONST_INT_P (XEXP (x, 1)))) | |
497 | { | |
498 | rtx stride = (CONST_INT_P (XEXP (x, 0)) | |
499 | ? XEXP (x, 0) : XEXP (x, 1)); | |
500 | ||
501 | /* Don't bother with very long strides because the prefetcher | |
502 | is unable to train on them anyway. */ | |
503 | if (INTVAL (stride) < MAX_PREFETCH_STRIDE) | |
504 | return true; | |
505 | } | |
506 | } | |
507 | } | |
508 | return false; | |
509 | } | |
510 | return false; | |
511 | } | |
512 | ||
513 | ||
514 | /* Return true if SRC is a strided load in the LOOP, false otherwise. | |
515 | If it is a strided load, set the BASE and OFFSET. Also, if this is | |
516 | a pre/post increment load, set PRE_POST to true. */ | |
517 | static bool | |
518 | valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post, | |
519 | rtx *base, rtx *offset, bool load_pair) | |
520 | { | |
521 | subrtx_var_iterator::array_type array; | |
522 | rtx x = NULL_RTX; | |
523 | ||
524 | FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST) | |
525 | if (MEM_P (*iter)) | |
526 | { | |
527 | x = *iter; | |
528 | break; | |
529 | } | |
530 | ||
531 | if (!x) | |
532 | return false; | |
533 | ||
534 | struct aarch64_address_info addr; | |
535 | machine_mode mode = GET_MODE (x); | |
536 | ||
537 | if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true)) | |
538 | return false; | |
539 | ||
540 | unsigned regno = REGNO (addr.base); | |
541 | if (global_regs[regno] || fixed_regs[regno]) | |
542 | return false; | |
543 | ||
544 | if (addr.type == ADDRESS_REG_WB) | |
545 | { | |
546 | unsigned code = GET_CODE (XEXP (x, 0)); | |
547 | ||
548 | *pre_post = true; | |
549 | *base = addr.base; | |
550 | ||
551 | if (code == PRE_MODIFY || code == POST_MODIFY) | |
552 | *offset = addr.offset; | |
553 | else | |
554 | { | |
555 | /*Writeback is only supported for fixed-width modes. */ | |
556 | unsigned int_offset = GET_MODE_SIZE (mode).to_constant (); | |
557 | ||
558 | /* For post-incremented load pairs we would increment the base twice | |
559 | over, so make that adjustment. */ | |
560 | if (load_pair && (code == POST_INC || code == POST_DEC)) | |
561 | int_offset *= 2; | |
562 | ||
563 | *offset = GEN_INT (int_offset); | |
564 | } | |
565 | return true; | |
566 | } | |
567 | else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG) | |
568 | { | |
569 | /* Check if the load is strided. */ | |
570 | if (!iv_p (insn, addr.base, loop)) | |
571 | return false; | |
572 | ||
573 | *base = addr.base; | |
574 | *offset = addr.offset; | |
575 | return true; | |
576 | } | |
577 | ||
578 | return false; | |
579 | } | |
580 | ||
581 | ||
582 | /* Return true if INSN is a strided load in LOOP. If it is a strided load, set | |
583 | the DEST, BASE and OFFSET. Also, if this is a pre/post increment load, set | |
584 | PRE_POST to true. | |
585 | ||
586 | The routine does checks on the destination of the insn and depends on | |
587 | STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET. */ | |
588 | static bool | |
589 | get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base, | |
590 | rtx *offset, bool *pre_post, bool *ldp) | |
591 | { | |
592 | if (!INSN_P (insn) || recog_memoized (insn) < 0) | |
593 | return false; | |
594 | ||
595 | rtx pat = PATTERN (insn); | |
596 | unsigned code = GET_CODE (pat); | |
597 | bool load_pair = (code == PARALLEL); | |
598 | ||
599 | /* For a load pair we need only the first base and destination | |
600 | registers. We however need to ensure that our pre/post increment | |
601 | offset is doubled; we do that in STRIDED_LOAD_P. */ | |
602 | if (load_pair) | |
603 | { | |
604 | pat = XVECEXP (pat, 0, 0); | |
605 | code = GET_CODE (pat); | |
606 | } | |
607 | ||
608 | if (code != SET) | |
609 | return false; | |
610 | ||
611 | rtx dest_rtx = SET_DEST (pat); | |
612 | ||
613 | if (!REG_P (dest_rtx)) | |
614 | return false; | |
615 | ||
616 | unsigned regno = REGNO (dest_rtx); | |
617 | machine_mode mode = GET_MODE (dest_rtx); | |
618 | machine_mode inner_mode = GET_MODE_INNER (mode); | |
619 | ||
620 | /* Falkor does not support SVE vectors. */ | |
621 | if (!GET_MODE_SIZE (mode).is_constant ()) | |
622 | return false; | |
623 | ||
624 | /* Ignore vector struct or lane loads. */ | |
625 | if (GET_MODE_SIZE (mode).to_constant () | |
626 | != GET_MODE_SIZE (inner_mode).to_constant ()) | |
627 | return false; | |
628 | ||
629 | /* The largest width we want to bother with is a load of a pair of | |
630 | quad-words. */ | |
631 | if ((GET_MODE_SIZE (mode).to_constant () << load_pair) | |
632 | > GET_MODE_SIZE (OImode)) | |
633 | return false; | |
634 | ||
635 | /* Ignore loads into the stack pointer because it is unlikely to be a | |
636 | stream. */ | |
637 | if (regno == SP_REGNUM) | |
638 | return false; | |
639 | ||
640 | if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset, | |
641 | load_pair)) | |
642 | { | |
643 | *dest = dest_rtx; | |
644 | *ldp = load_pair; | |
645 | ||
646 | return true; | |
647 | } | |
648 | ||
649 | return false; | |
650 | } | |
651 | ||
652 | ||
653 | /* Return whether INSN and CAND are in the same def/use chain. */ | |
654 | static bool | |
655 | in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno) | |
656 | { | |
657 | struct du_chain *chain = NULL; | |
658 | du_head_p head = NULL; | |
659 | int i; | |
660 | ||
661 | /* Search the chain where this instruction is (one of) the root. */ | |
662 | operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info; | |
663 | ||
664 | for (i = 0; i < op_info->n_chains; i++) | |
665 | { | |
666 | /* The register tracked by this chain does not match the | |
667 | dest register of insn. */ | |
668 | if (op_info->heads[i]->regno != regno) | |
669 | continue; | |
670 | ||
671 | head = op_info->heads[i]; | |
672 | /* The chain was merged in another, find the new head. */ | |
673 | if (!head->first) | |
674 | head = regrename_chain_from_id (head->id); | |
675 | ||
676 | bool found_insn = false, found_cand = false; | |
677 | ||
678 | for (chain = head->first; chain; chain = chain->next_use) | |
679 | { | |
680 | rtx *loc = &SET_DEST (PATTERN (chain->insn)); | |
681 | ||
682 | if (chain->loc != loc) | |
683 | continue; | |
684 | ||
685 | if (chain->insn == insn) | |
686 | found_insn = true; | |
687 | ||
688 | if (chain->insn == cand) | |
689 | found_cand = true; | |
690 | ||
691 | if (found_insn && found_cand) | |
692 | return true; | |
693 | } | |
694 | } | |
695 | ||
696 | return false; | |
697 | } | |
698 | ||
699 | ||
700 | /* Callback function to traverse the tag map and drop loads that have the same | |
701 | destination and and in the same chain of occurrence. Routine always returns | |
702 | true to allow traversal through all of TAG_MAP. */ | |
703 | bool | |
704 | single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v, | |
705 | void *arg ATTRIBUTE_UNUSED) | |
706 | { | |
707 | for (int i = v->length () - 1; i>= 1; i--) | |
708 | { | |
709 | tag_insn_info *insn_info = (*v)[i]; | |
710 | ||
711 | for (int j = v->length () - 2; j >= 0; j--) | |
712 | { | |
713 | /* Filter out destinations in the same chain. */ | |
714 | if (in_same_chain (insn_info->insn, (*v)[j]->insn, | |
715 | REGNO (insn_info->dest))) | |
716 | { | |
717 | v->ordered_remove (j); | |
718 | i = v->length (); | |
719 | break; | |
720 | } | |
721 | } | |
722 | } | |
723 | ||
724 | return true; | |
725 | } | |
726 | ||
727 | ||
728 | /* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn | |
729 | list INSN_INFO for tag T. */ | |
730 | bool | |
731 | dump_insn_list (const rtx &t, const insn_info_list_t &insn_info, | |
732 | void *unused ATTRIBUTE_UNUSED) | |
733 | { | |
734 | gcc_assert (dump_file); | |
735 | fprintf (dump_file, "Tag 0x%lx ::\n", INTVAL (t)); | |
736 | ||
737 | for (unsigned i = 0; i < insn_info.length (); i++) | |
738 | dump_insn_slim (dump_file, insn_info[i]->insn); | |
739 | ||
740 | fprintf (dump_file, "\n"); | |
741 | ||
742 | return true; | |
743 | } | |
744 | ||
745 | ||
746 | /* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware | |
747 | prefetcher memory tags. */ | |
748 | static void | |
749 | record_loads (tag_map_t &tag_map, struct loop *loop) | |
750 | { | |
751 | rtx_insn *insn; | |
752 | basic_block *body, bb; | |
753 | ||
754 | body = get_loop_body (loop); | |
755 | ||
756 | for (unsigned i = 0; i < loop->num_nodes; i++) | |
757 | { | |
758 | bb = body[i]; | |
759 | FOR_BB_INSNS (bb, insn) | |
760 | { | |
761 | rtx base = NULL_RTX; | |
762 | rtx dest = NULL_RTX; | |
763 | rtx offset = NULL_RTX; | |
764 | bool writeback = false; | |
765 | bool ldp = false; | |
766 | ||
767 | if (!INSN_P (insn) || DEBUG_INSN_P (insn)) | |
768 | continue; | |
769 | ||
770 | if (get_load_info (insn, loop, &dest, &base, &offset, &writeback, | |
771 | &ldp)) | |
772 | { | |
773 | tag_insn_info *i = new tag_insn_info (insn, dest, base, offset, | |
774 | writeback, ldp); | |
775 | rtx tag = GEN_INT (i->tag ()); | |
776 | tag_map.get_or_insert (tag).safe_push (i); | |
777 | } | |
778 | } | |
779 | } | |
780 | ||
781 | if (dump_file) | |
782 | { | |
783 | fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num); | |
784 | tag_map.traverse <void *, dump_insn_list> (NULL); | |
785 | } | |
786 | ||
787 | /* Try to reduce the dataset before launching into the rename attempt. Drop | |
788 | destinations in the same collision chain that appear in the same def/use | |
789 | chain, all as defs. These chains will move together in a rename so | |
790 | there's no point in keeping both in there. */ | |
791 | tag_map.traverse <void *, single_dest_per_chain> (NULL); | |
792 | } | |
793 | ||
794 | ||
795 | /* Tag collision avoidance pass for Falkor. The pass runs in two phases for | |
796 | each loop; the first phase collects all loads that we consider as | |
797 | interesting for renaming into a tag-indexed map of lists. The second phase | |
798 | renames the destination register of the loads in an attempt to spread out | |
799 | the loads into different tags. */ | |
800 | void | |
801 | execute_tag_collision_avoidance () | |
802 | { | |
803 | struct loop *loop; | |
804 | ||
805 | df_set_flags (DF_RD_PRUNE_DEAD_DEFS); | |
806 | df_chain_add_problem (DF_UD_CHAIN); | |
807 | df_compute_regs_ever_live (true); | |
83933829 | 808 | df_note_add_problem (); |
a98824ac SP |
809 | df_analyze (); |
810 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
811 | ||
812 | regrename_init (true); | |
813 | regrename_analyze (NULL); | |
814 | ||
815 | compute_bb_for_insn (); | |
816 | calculate_dominance_info (CDI_DOMINATORS); | |
817 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); | |
818 | ||
819 | FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) | |
820 | { | |
821 | tag_map_t tag_map (512); | |
822 | ||
823 | record_loads (tag_map, loop); | |
824 | avoid_collisions (tag_map); | |
825 | if (dump_file) | |
826 | { | |
827 | fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num); | |
828 | tag_map.traverse <void *, dump_insn_list> (NULL); | |
829 | } | |
830 | tag_map.traverse <void *, free_insn_info> (NULL); | |
831 | } | |
832 | ||
833 | loop_optimizer_finalize (); | |
834 | free_dominance_info (CDI_DOMINATORS); | |
835 | regrename_finish (); | |
836 | } | |
837 | ||
838 | ||
839 | const pass_data pass_data_tag_collision_avoidance = | |
840 | { | |
841 | RTL_PASS, /* type */ | |
842 | "tag_collision_avoidance", /* name */ | |
843 | OPTGROUP_NONE, /* optinfo_flags */ | |
844 | TV_NONE, /* tv_id */ | |
845 | 0, /* properties_required */ | |
846 | 0, /* properties_provided */ | |
847 | 0, /* properties_destroyed */ | |
848 | 0, /* todo_flags_start */ | |
849 | TODO_df_finish, /* todo_flags_finish */ | |
850 | }; | |
851 | ||
852 | ||
853 | class pass_tag_collision_avoidance : public rtl_opt_pass | |
854 | { | |
855 | public: | |
856 | pass_tag_collision_avoidance (gcc::context *ctxt) | |
857 | : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt) | |
858 | {} | |
859 | ||
860 | /* opt_pass methods: */ | |
861 | virtual bool gate (function *) | |
862 | { | |
863 | return ((aarch64_tune_params.extra_tuning_flags | |
864 | & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS) | |
865 | && optimize >= 2); | |
866 | } | |
867 | ||
868 | virtual unsigned int execute (function *) | |
869 | { | |
870 | execute_tag_collision_avoidance (); | |
871 | return 0; | |
872 | } | |
873 | ||
874 | }; // class pass_tag_collision_avoidance | |
875 | ||
876 | ||
877 | /* Create a new pass instance. */ | |
878 | rtl_opt_pass * | |
879 | make_pass_tag_collision_avoidance (gcc::context *ctxt) | |
880 | { | |
881 | return new pass_tag_collision_avoidance (ctxt); | |
882 | } |