]>
Commit | Line | Data |
---|---|---|
ee7ef7ab | 1 | /* FMA steering optimization pass for Cortex-A57. |
aad93da1 | 2 | Copyright (C) 2015-2017 Free Software Foundation, Inc. |
ee7ef7ab | 3 | Contributed by ARM Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | #include "config.h" | |
d637913b | 22 | #define INCLUDE_LIST |
ee7ef7ab | 23 | #include "system.h" |
24 | #include "coretypes.h" | |
9ef16211 | 25 | #include "backend.h" |
c1eb80de | 26 | #include "target.h" |
9ef16211 | 27 | #include "rtl.h" |
28 | #include "df.h" | |
ee7ef7ab | 29 | #include "insn-config.h" |
c1eb80de | 30 | #include "regs.h" |
ad7b10a2 | 31 | #include "memmodel.h" |
c1eb80de | 32 | #include "emit-rtl.h" |
33 | #include "recog.h" | |
ee7ef7ab | 34 | #include "cfganal.h" |
ee7ef7ab | 35 | #include "insn-attr.h" |
ee7ef7ab | 36 | #include "context.h" |
37 | #include "tree-pass.h" | |
38 | #include "regrename.h" | |
5e9fcc70 | 39 | #include "aarch64-protos.h" |
ee7ef7ab | 40 | |
ee7ef7ab | 41 | /* For better performance, the destination of FMADD/FMSUB instructions should |
42 | have the same parity as their accumulator register if the accumulator | |
43 | contains the result of a previous FMUL or FMADD/FMSUB instruction if | |
44 | targetting Cortex-A57 processors. Performance is also increased by | |
45 | otherwise keeping a good balance in the parity of the destination register | |
46 | of FMUL or FMADD/FMSUB. | |
47 | ||
48 | This pass ensure that registers are renamed so that these conditions hold. | |
49 | We reuse the existing register renaming facility from regrename.c to build | |
50 | dependency chains and expose candidate registers for renaming. | |
51 | ||
52 | ||
53 | The algorithm has three steps: | |
54 | ||
55 | First, the functions of the register renaming pass are called. These | |
56 | analyze the instructions and produce a list of def/use chains of | |
57 | instructions. | |
58 | ||
59 | Next, this information is used to build trees of multiply and | |
60 | multiply-accumulate instructions. The roots of these trees are any | |
61 | multiply, or any multiply-accumulate whose accumulator is not dependent on | |
62 | a multiply or multiply-accumulate instruction. A child is added to the | |
63 | tree where a dependency chain exists between the result of the parent | |
64 | instruction and the accumulator operand of the child, as in the diagram | |
65 | below: | |
66 | ||
67 | fmul s2, s0, s1 | |
68 | / \ | |
69 | fmadd s0, s1, s1, s2 fmadd s4, s1, s1 s2 | |
70 | | | |
71 | fmadd s3, s1, s1, s0 | |
72 | ||
73 | Trees made of a single instruction are permitted. | |
74 | ||
75 | Finally, renaming is performed. The parity of the destination register at | |
76 | the root of a tree is checked against the current balance of multiply and | |
77 | multiply-accumulate on each pipeline. If necessary, the root of a tree is | |
78 | renamed, in which case the rest of the tree is then renamed to keep the same | |
79 | parity in the destination registers of all instructions in the tree. */ | |
80 | ||
81 | ||
82 | ||
83 | /* Forward declarations. */ | |
84 | class fma_node; | |
85 | class fma_root_node; | |
86 | class func_fma_steering; | |
87 | ||
88 | /* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent | |
89 | FMADD/FMSUB instructions form a graph. This is because alternatives can | |
90 | make a register be set by several FMUL or FMADD/FMSUB instructions in | |
91 | different basic blocks and because of loops. For ease of browsing, the | |
92 | connected components of this graph are broken up into forests of trees. | |
93 | Forests are represented by fma_forest objects, contained in the fma_forests | |
94 | list. Using a separate object for the forests allows for a better use of | |
95 | memory as there is some information that is global to each forest, such as | |
96 | the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each | |
97 | floating-point execution pipelines. */ | |
98 | ||
99 | class fma_forest | |
100 | { | |
101 | public: | |
102 | fma_forest (func_fma_steering *, fma_root_node *, int); | |
103 | ~fma_forest (); | |
104 | ||
105 | int get_id (); | |
106 | std::list<fma_root_node *> *get_roots (); | |
107 | func_fma_steering *get_globals (); | |
108 | int get_target_parity (); | |
109 | void fma_node_created (fma_node *); | |
110 | void merge_forest (fma_forest *); | |
111 | void dump_info (); | |
112 | void dispatch (); | |
113 | ||
114 | private: | |
115 | /* The list of roots that form this forest. */ | |
116 | std::list<fma_root_node *> *m_roots; | |
117 | ||
118 | /* Target parity the destination register of all FMUL and FMADD/FMSUB | |
119 | instructions in this forest should have. */ | |
120 | int m_target_parity; | |
121 | ||
122 | /* Link to the instance of func_fma_steering holding data related to the | |
123 | FMA steering of the current function (cfun). */ | |
124 | func_fma_steering *m_globals; | |
125 | ||
126 | /* Identifier for the forest (used for dumps). */ | |
127 | int m_id; | |
128 | ||
129 | /* Total number of nodes in the forest (for statistics). */ | |
130 | int m_nb_nodes; | |
131 | }; | |
132 | ||
133 | class fma_node | |
134 | { | |
135 | public: | |
136 | fma_node (fma_node *parent, du_chain *chain); | |
137 | ~fma_node (); | |
138 | ||
139 | bool root_p (); | |
140 | fma_forest *get_forest (); | |
141 | std::list<fma_node *> *get_children (); | |
142 | rtx_insn *get_insn (); | |
143 | void add_child (fma_node *); | |
144 | int get_parity (); | |
145 | void set_head (du_head *); | |
146 | void rename (fma_forest *); | |
147 | void dump_info (fma_forest *); | |
148 | ||
149 | protected: | |
150 | /* Root node that lead to this node. */ | |
151 | fma_root_node *m_root; | |
152 | ||
153 | /* The parent node of this node. If the node belong to a chain with several | |
154 | parent nodes, the first one encountered in a depth-first search is chosen | |
155 | as canonical parent. */ | |
156 | fma_node *m_parent; | |
157 | ||
158 | /* The list of child nodes. If a chain contains several parent nodes, one is | |
159 | chosen as canonical parent and the others will have no children. */ | |
160 | std::list<fma_node *> *m_children; | |
161 | ||
162 | /* The associated DU_HEAD chain that the insn represented by this object | |
163 | is (one of) the root of. When a chain contains several roots, the non | |
164 | canonical ones have this field set to NULL. */ | |
165 | struct du_head *m_head; | |
166 | ||
167 | /* The FMUL or FMADD/FMSUB instruction this object corresponds to. */ | |
168 | rtx_insn *m_insn; | |
169 | }; | |
170 | ||
171 | class fma_root_node : public fma_node | |
172 | { | |
173 | public: | |
174 | fma_root_node (func_fma_steering *, du_chain *, int); | |
175 | ||
176 | fma_forest *get_forest (); | |
177 | void set_forest (fma_forest *); | |
178 | void dump_info (fma_forest *); | |
179 | ||
180 | private: | |
181 | /* The forest this node belonged to when it was created. */ | |
182 | fma_forest *m_forest; | |
183 | }; | |
184 | ||
185 | /* Class holding all data and methods relative to the FMA steering of a given | |
186 | function. The FMA steering pass could then run in parallel for different | |
187 | functions. */ | |
188 | ||
189 | class func_fma_steering | |
190 | { | |
191 | public: | |
192 | func_fma_steering (); | |
193 | ~func_fma_steering (); | |
194 | ||
195 | int get_fpu_balance (); | |
196 | void remove_forest (fma_forest *); | |
197 | bool put_node (fma_node *); | |
198 | void update_balance (int); | |
199 | fma_node *get_fma_node (rtx_insn *); | |
200 | void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p); | |
201 | void execute_fma_steering (); | |
202 | ||
203 | private: | |
204 | void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *), | |
205 | void (*) (fma_forest *, fma_node *), bool); | |
206 | void analyze (); | |
207 | void rename_fma_trees (); | |
208 | ||
209 | /* Mapping between FMUL or FMADD/FMSUB instructions and the associated | |
210 | fma_node object. Used when analyzing an instruction that is a root of | |
211 | a chain to find if such an object was created because this instruction | |
212 | is also a use in another chain. */ | |
213 | hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map; | |
214 | ||
215 | /* A list of all the forests in a given function. */ | |
216 | std::list<fma_forest *> m_fma_forests; | |
217 | ||
218 | /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU | |
219 | pipelines: | |
220 | < 0: more instruction dispatched to the first pipeline | |
221 | == 0: perfect balance | |
222 | > 0: more instruction dispatched to the second pipeline. */ | |
223 | int m_fpu_balance; | |
224 | ||
225 | /* Identifier for the next forest created. */ | |
226 | int m_next_forest_id; | |
227 | }; | |
228 | ||
229 | /* Rename the register HEAD->regno in all the insns in the chain HEAD to any | |
230 | register not in the set UNAVAILABLE. Adapted from rename_chains in | |
231 | regrename.c. */ | |
232 | ||
233 | static bool | |
234 | rename_single_chain (du_head_p head, HARD_REG_SET *unavailable) | |
235 | { | |
236 | int best_new_reg; | |
237 | int n_uses = 0; | |
238 | struct du_chain *tmp; | |
239 | int reg = head->regno; | |
240 | enum reg_class super_class = NO_REGS; | |
241 | ||
242 | if (head->cannot_rename) | |
243 | return false; | |
244 | ||
245 | if (fixed_regs[reg] || global_regs[reg] | |
246 | || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM)) | |
247 | return false; | |
248 | ||
249 | /* Iterate over elements in the chain in order to: | |
250 | 1. Count number of uses, and narrow the set of registers we can | |
251 | use for renaming. | |
252 | 2. Compute the superunion of register classes in this chain. */ | |
253 | for (tmp = head->first; tmp; tmp = tmp->next_use) | |
254 | { | |
255 | if (DEBUG_INSN_P (tmp->insn)) | |
256 | continue; | |
257 | n_uses++; | |
258 | IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]); | |
259 | super_class = reg_class_superunion[(int) super_class][(int) tmp->cl]; | |
260 | } | |
261 | ||
262 | if (n_uses < 1) | |
263 | return false; | |
264 | ||
265 | best_new_reg = find_rename_reg (head, super_class, unavailable, reg, | |
266 | false); | |
267 | ||
268 | if (dump_file) | |
269 | { | |
270 | fprintf (dump_file, "Register %s in insn %d", reg_names[reg], | |
271 | INSN_UID (head->first->insn)); | |
272 | if (head->need_caller_save_reg) | |
273 | fprintf (dump_file, " crosses a call"); | |
274 | } | |
275 | ||
276 | if (best_new_reg == reg) | |
277 | { | |
278 | if (dump_file) | |
279 | fprintf (dump_file, "; no available better choice\n"); | |
280 | return false; | |
281 | } | |
282 | ||
688586d5 | 283 | if (regrename_do_replace (head, best_new_reg)) |
284 | { | |
285 | if (dump_file) | |
286 | fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]); | |
287 | df_set_regs_ever_live (best_new_reg, true); | |
288 | } | |
289 | else | |
290 | { | |
291 | if (dump_file) | |
292 | fprintf (dump_file, ", renaming as %s failed\n", | |
293 | reg_names[best_new_reg]); | |
294 | return false; | |
295 | } | |
ee7ef7ab | 296 | return true; |
297 | } | |
298 | ||
299 | /* Return whether T is the attribute of a FMADD/FMSUB-like instruction. */ | |
300 | ||
301 | static bool | |
302 | is_fmac_op (enum attr_type t) | |
303 | { | |
304 | return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S); | |
305 | } | |
306 | ||
307 | /* Return whether T is the attribute of a FMUL instruction. */ | |
308 | ||
309 | static bool | |
310 | is_fmul_op (enum attr_type t) | |
311 | { | |
312 | return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S); | |
313 | } | |
314 | ||
315 | /* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB | |
316 | instruction. */ | |
317 | ||
318 | static bool | |
319 | is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok) | |
320 | { | |
321 | enum attr_type t; | |
322 | ||
323 | if (!NONDEBUG_INSN_P (insn)) | |
324 | return false; | |
325 | ||
326 | if (recog_memoized (insn) < 0) | |
327 | return false; | |
328 | ||
329 | /* Only consider chain(s) this instruction is a root of if this is an FMUL or | |
330 | FMADD/FMSUB instruction. This allows to avoid browsing chains of all | |
331 | instructions for FMUL or FMADD/FMSUB in them. */ | |
332 | t = get_attr_type (insn); | |
333 | return is_fmac_op (t) || (fmul_ok && is_fmul_op (t)); | |
334 | } | |
335 | ||
336 | ||
337 | /* | |
338 | * Class fma_forest method definitions. | |
339 | */ | |
340 | ||
341 | fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root, | |
342 | int id) | |
343 | { | |
344 | memset (this, 0, sizeof (*this)); | |
345 | this->m_globals = fma_steer; | |
346 | this->m_roots = new std::list<fma_root_node *>; | |
347 | this->m_roots->push_back (fma_root); | |
348 | this->m_id = id; | |
349 | } | |
350 | ||
351 | fma_forest::~fma_forest () | |
352 | { | |
353 | delete this->m_roots; | |
354 | } | |
355 | ||
356 | int | |
357 | fma_forest::get_id () | |
358 | { | |
359 | return this->m_id; | |
360 | } | |
361 | ||
362 | std::list<fma_root_node *> * | |
363 | fma_forest::get_roots () | |
364 | { | |
365 | return this->m_roots; | |
366 | } | |
367 | ||
368 | func_fma_steering * | |
369 | fma_forest::get_globals () | |
370 | { | |
371 | return this->m_globals; | |
372 | } | |
373 | ||
374 | int | |
375 | fma_forest::get_target_parity () | |
376 | { | |
377 | return this->m_target_parity; | |
378 | } | |
379 | ||
380 | /* Act on the creation of NODE by updating statistics in FOREST and adding an | |
381 | entry for it in the func_fma_steering hashmap. */ | |
382 | ||
383 | void fma_forest::fma_node_created (fma_node *node) | |
384 | { | |
385 | bool created = !this->m_globals->put_node (node); | |
386 | ||
387 | gcc_assert (created); | |
388 | this->m_nb_nodes++; | |
389 | } | |
390 | ||
391 | /* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical | |
392 | fma_forest object to represent both. */ | |
393 | ||
394 | void | |
395 | fma_forest::merge_forest (fma_forest *other_forest) | |
396 | { | |
397 | std::list<fma_root_node *> *other_roots; | |
398 | std::list<fma_root_node *>::iterator other_root_iter; | |
399 | ||
400 | if (this == other_forest) | |
401 | return; | |
402 | ||
403 | other_roots = other_forest->m_roots; | |
404 | ||
405 | /* Update root nodes' pointer to forest. */ | |
406 | for (other_root_iter = other_roots->begin (); | |
407 | other_root_iter != other_roots->end (); other_root_iter++) | |
408 | (*other_root_iter)->set_forest (this); | |
409 | ||
410 | /* Remove other_forest from the list of forests and move its tree roots in | |
411 | the list of tree roots of ref_forest. */ | |
412 | this->m_globals->remove_forest (other_forest); | |
413 | this->m_roots->splice (this->m_roots->begin (), *other_roots); | |
ee7ef7ab | 414 | this->m_nb_nodes += other_forest->m_nb_nodes; |
98bf84bc | 415 | |
416 | delete other_forest; | |
ee7ef7ab | 417 | } |
418 | ||
419 | /* Dump information about the forest FOREST. */ | |
420 | ||
421 | void | |
422 | fma_forest::dump_info () | |
423 | { | |
424 | gcc_assert (dump_file); | |
425 | ||
426 | fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id, | |
427 | this->m_nb_nodes); | |
428 | } | |
429 | ||
430 | /* Wrapper around fma_forest::dump_info for use as parameter of function | |
431 | pointer type in func_fma_steering::dfs. */ | |
432 | ||
433 | static void | |
434 | dump_forest_info (fma_forest *forest) | |
435 | { | |
436 | forest->dump_info (); | |
437 | } | |
438 | ||
439 | /* Dispatch forest to the least utilized pipeline. */ | |
440 | ||
441 | void | |
442 | fma_forest::dispatch () | |
443 | { | |
444 | this->m_target_parity = this->m_roots->front ()->get_parity (); | |
445 | int fpu_balance = this->m_globals->get_fpu_balance (); | |
446 | if (fpu_balance != 0) | |
447 | this->m_target_parity = (fpu_balance < 0); | |
448 | ||
449 | if (dump_file) | |
450 | fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id, | |
451 | this->m_target_parity ? "odd" : "even"); | |
452 | } | |
453 | ||
454 | /* Wrapper around fma_forest::dispatch for use as parameter of function pointer | |
455 | type in func_fma_steering::dfs. */ | |
456 | ||
457 | static void | |
458 | dispatch_forest (fma_forest *forest) | |
459 | { | |
460 | forest->dispatch (); | |
461 | } | |
462 | ||
463 | fma_node::fma_node (fma_node *parent, du_chain *chain) | |
464 | { | |
465 | memset (this, 0, sizeof (*this)); | |
466 | this->m_parent = parent; | |
467 | this->m_children = new std::list<fma_node *>; | |
468 | this->m_insn = chain->insn; | |
469 | /* root_p () cannot be used to check for root before root is set. */ | |
470 | if (this->m_parent == this) | |
471 | this->m_root = static_cast<fma_root_node *> (parent); | |
472 | else | |
473 | { | |
474 | this->m_root = parent->m_root; | |
475 | this->get_forest ()->fma_node_created (this); | |
476 | } | |
477 | } | |
478 | ||
479 | fma_node::~fma_node () | |
480 | { | |
481 | delete this->m_children; | |
482 | } | |
483 | ||
484 | std::list<fma_node *> * | |
485 | fma_node::get_children () | |
486 | { | |
487 | return this->m_children; | |
488 | } | |
489 | ||
490 | rtx_insn * | |
491 | fma_node::get_insn () | |
492 | { | |
493 | return this->m_insn; | |
494 | } | |
495 | ||
496 | void | |
497 | fma_node::set_head (du_head *head) | |
498 | { | |
499 | gcc_assert (!this->m_head); | |
500 | this->m_head = head; | |
501 | } | |
502 | ||
503 | /* Add a child to this node in the list of children. */ | |
504 | ||
505 | void | |
506 | fma_node::add_child (fma_node *child) | |
507 | { | |
508 | this->m_children->push_back (child); | |
509 | } | |
510 | ||
511 | /* Return the parity of the destination register of the instruction represented | |
512 | by this node. */ | |
513 | ||
514 | int | |
515 | fma_node::get_parity () | |
516 | { | |
517 | return this->m_head->regno % 2; | |
518 | } | |
519 | ||
520 | /* Get the actual forest associated with a non root node as the one the node | |
521 | points to might have been merged into another one. In that case the pointer | |
522 | in the root nodes are updated so we return the forest pointer of a root node | |
523 | pointed to by the initial forest. Despite being a oneliner, this method is | |
524 | defined here as it references a method from fma_root_node. */ | |
525 | ||
526 | fma_forest * | |
527 | fma_node::get_forest () | |
528 | { | |
529 | return this->m_root->get_forest (); | |
530 | } | |
531 | ||
532 | /* Return whether a node is a root node. */ | |
533 | ||
534 | bool | |
535 | fma_node::root_p () | |
536 | { | |
537 | return this->m_root == this; | |
538 | } | |
539 | ||
540 | /* Dump information about the children of node FMA_NODE in forest FOREST. */ | |
541 | ||
542 | void | |
543 | fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest) | |
544 | { | |
545 | struct du_chain *chain; | |
546 | std::list<fma_node *>::iterator fma_child; | |
547 | ||
548 | gcc_assert (dump_file); | |
549 | ||
550 | if (this->get_children ()->empty ()) | |
551 | return; | |
552 | ||
553 | fprintf (dump_file, "Instruction(s)"); | |
554 | for (chain = this->m_head->first; chain; chain = chain->next_use) | |
555 | { | |
556 | if (!is_fmul_fmac_insn (chain->insn, true)) | |
557 | continue; | |
558 | ||
559 | if (chain->loc != &SET_DEST (PATTERN (chain->insn))) | |
560 | continue; | |
561 | ||
562 | fprintf (dump_file, " %d", INSN_UID (chain->insn)); | |
563 | } | |
564 | ||
565 | fprintf (dump_file, " is(are) accumulator dependency of instructions"); | |
566 | for (fma_child = this->get_children ()->begin (); | |
567 | fma_child != this->get_children ()->end (); fma_child++) | |
568 | fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn)); | |
569 | fprintf (dump_file, "\n"); | |
570 | } | |
571 | ||
572 | /* Wrapper around fma_node::dump_info for use as parameter of function pointer | |
573 | type in func_fma_steering::dfs. */ | |
574 | ||
575 | static void | |
576 | dump_tree_node_info (fma_forest *forest, fma_node *node) | |
577 | { | |
578 | node->dump_info (forest); | |
579 | } | |
580 | ||
581 | /* Rename the destination register of a single FMUL or FMADD/FMSUB instruction | |
582 | represented by FMA_NODE to a register that respect the target parity for | |
583 | FOREST or with same parity of the instruction represented by its parent node | |
584 | if it has one. */ | |
585 | ||
586 | void | |
587 | fma_node::rename (fma_forest *forest) | |
588 | { | |
589 | int cur_parity, target_parity; | |
590 | ||
591 | /* This is alternate root of a chain and thus has no children. It will be | |
592 | renamed when processing the canonical root for that chain. */ | |
593 | if (!this->m_head) | |
594 | return; | |
595 | ||
596 | target_parity = forest->get_target_parity (); | |
597 | if (this->m_parent) | |
598 | target_parity = this->m_parent->get_parity (); | |
599 | cur_parity = this->get_parity (); | |
600 | ||
601 | /* Rename if parity differs. */ | |
602 | if (cur_parity != target_parity) | |
603 | { | |
604 | rtx_insn *insn = this->m_insn; | |
605 | HARD_REG_SET unavailable; | |
582adad1 | 606 | machine_mode mode; |
ee7ef7ab | 607 | int reg; |
608 | ||
609 | if (dump_file) | |
610 | { | |
611 | unsigned cur_dest_reg = this->m_head->regno; | |
612 | ||
613 | fprintf (dump_file, "FMA or FMUL at insn %d but destination " | |
614 | "register (%s) has different parity from expected to " | |
615 | "maximize FPU pipeline utilization\n", INSN_UID (insn), | |
616 | reg_names[cur_dest_reg]); | |
617 | } | |
618 | ||
619 | /* Don't clobber traceback for noreturn functions. */ | |
620 | CLEAR_HARD_REG_SET (unavailable); | |
621 | if (frame_pointer_needed) | |
622 | { | |
623 | add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM); | |
624 | add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM); | |
625 | } | |
626 | ||
627 | /* Exclude registers with wrong parity. */ | |
628 | mode = GET_MODE (SET_DEST (PATTERN (insn))); | |
629 | for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2) | |
630 | add_to_hard_reg_set (&unavailable, mode, reg); | |
631 | ||
632 | if (!rename_single_chain (this->m_head, &unavailable)) | |
633 | { | |
634 | if (dump_file) | |
635 | fprintf (dump_file, "Destination register of insn %d could not be " | |
636 | "renamed. Dependent FMA insns will use this parity from " | |
637 | "there on.\n", INSN_UID (insn)); | |
638 | } | |
639 | else | |
640 | cur_parity = target_parity; | |
641 | } | |
642 | ||
643 | forest->get_globals ()->update_balance (cur_parity); | |
644 | } | |
645 | ||
646 | /* Wrapper around fma_node::dump_info for use as parameter of function pointer | |
647 | type in func_fma_steering::dfs. */ | |
648 | ||
649 | static void | |
650 | rename_fma_node (fma_forest *forest, fma_node *node) | |
651 | { | |
652 | node->rename (forest); | |
653 | } | |
654 | ||
655 | fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain, | |
656 | int id) : fma_node (this, chain) | |
657 | { | |
658 | this->m_forest = new fma_forest (globals, this, id); | |
659 | this->m_forest->fma_node_created (this); | |
660 | } | |
661 | ||
662 | fma_forest * | |
663 | fma_root_node::get_forest () | |
664 | { | |
665 | return this->m_forest; | |
666 | } | |
667 | ||
668 | void | |
669 | fma_root_node::set_forest (fma_forest *ref_forest) | |
670 | { | |
671 | this->m_forest = ref_forest; | |
672 | } | |
673 | ||
674 | /* Dump information about the roots of forest FOREST. */ | |
675 | ||
676 | void | |
677 | fma_root_node::dump_info (fma_forest *forest) | |
678 | { | |
679 | gcc_assert (dump_file); | |
680 | ||
681 | if (this == forest->get_roots ()->front ()) | |
682 | fprintf (dump_file, "Instruction(s) at root of forest #%d:", | |
683 | forest->get_id ()); | |
684 | fprintf (dump_file, " %d", INSN_UID (this->m_insn)); | |
685 | if (this == forest->get_roots ()->back ()) | |
686 | fprintf (dump_file, "\n"); | |
687 | } | |
688 | ||
689 | /* Wrapper around fma_root_node::dump_info for use as parameter of function | |
690 | pointer type in func_fma_steering::dfs. */ | |
691 | ||
692 | static void | |
693 | dump_tree_root_info (fma_forest *forest, fma_root_node *node) | |
694 | { | |
695 | node->dump_info (forest); | |
696 | } | |
697 | ||
698 | func_fma_steering::func_fma_steering () : m_fpu_balance (0) | |
699 | { | |
700 | this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>; | |
701 | this->m_fma_forests.clear (); | |
702 | this->m_next_forest_id = 0; | |
703 | } | |
704 | ||
705 | func_fma_steering::~func_fma_steering () | |
706 | { | |
707 | delete this->m_insn_fma_head_map; | |
708 | } | |
709 | ||
710 | int | |
711 | func_fma_steering::get_fpu_balance () | |
712 | { | |
713 | return this->m_fpu_balance; | |
714 | } | |
715 | ||
716 | void | |
717 | func_fma_steering::remove_forest (fma_forest *forest) | |
718 | { | |
719 | this->m_fma_forests.remove (forest); | |
720 | } | |
721 | ||
722 | /* Memorize the mapping of this instruction to its fma_node object and return | |
723 | whether such a mapping existed. */ | |
724 | ||
725 | bool | |
726 | func_fma_steering::put_node (fma_node *node) | |
727 | { | |
728 | return this->m_insn_fma_head_map->put (node->get_insn (), node); | |
729 | } | |
730 | ||
731 | /* Update the current balance considering a node with the given PARITY. */ | |
732 | ||
733 | void | |
734 | func_fma_steering::update_balance (int parity) | |
735 | { | |
736 | this->m_fpu_balance = parity ? this->m_fpu_balance + 1 | |
737 | : this->m_fpu_balance - 1; | |
738 | } | |
739 | ||
740 | /* Return whether an fma_node object exists for instruction INSN and, if not, | |
741 | allocate one in *RET. */ | |
742 | ||
743 | fma_node * | |
744 | func_fma_steering::get_fma_node (rtx_insn *insn) | |
745 | { | |
746 | fma_node **fma_slot; | |
747 | ||
748 | fma_slot = this->m_insn_fma_head_map->get (insn); | |
749 | if (fma_slot) | |
750 | return *fma_slot; | |
751 | return NULL; | |
752 | } | |
753 | ||
754 | /* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB | |
755 | instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all | |
756 | part of FOREST. For the children, the associated head is left untouched | |
757 | (and thus null) as this function will be called again when considering the | |
758 | chain where they are def. For the parent, the chain is given in HEAD. */ | |
759 | ||
760 | void | |
761 | func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest, | |
762 | du_chain *chain, du_head_p head) | |
763 | { | |
764 | fma_forest *forest; | |
765 | fma_node *node = this->get_fma_node (chain->insn); | |
766 | ||
767 | /* This is a root node. */ | |
768 | if (!node) | |
769 | { | |
770 | fma_root_node *root_node; | |
771 | ||
772 | root_node = new fma_root_node (this, chain, this->m_next_forest_id++); | |
773 | forest = root_node->get_forest (); | |
774 | node = root_node; | |
775 | ||
776 | /* Until proved otherwise, assume this root is not part of an existing | |
777 | forest and thus add its forest to the list of forests. */ | |
778 | this->m_fma_forests.push_back (forest); | |
779 | } | |
780 | else | |
781 | forest = node->get_forest (); | |
782 | ||
783 | node->set_head (head); | |
784 | ||
785 | /* fma_node is part of a chain with several defs, one of them having already | |
786 | been processed. The root of that already processed def is the canonical | |
787 | one and the root of fma_node is added to its forest. No need to process | |
788 | the children nodes as they were already processed when the other def was | |
789 | processed. */ | |
790 | if (ref_forest) | |
791 | { | |
792 | ref_forest->merge_forest (forest); | |
793 | return; | |
794 | } | |
795 | ||
796 | for (chain = head->first; chain; chain = chain->next_use) | |
797 | { | |
798 | fma_node *child_fma; | |
799 | rtx fma_rtx, *accum_rtx_p; | |
800 | ||
801 | if (!is_fmul_fmac_insn (chain->insn, false)) | |
802 | continue; | |
803 | ||
804 | /* Get FMA rtx. */ | |
805 | fma_rtx = SET_SRC (PATTERN (chain->insn)); | |
806 | /* FMA is negated. */ | |
807 | if (GET_CODE (fma_rtx) == NEG) | |
808 | fma_rtx = XEXP (fma_rtx, 0); | |
809 | /* Get accumulator rtx. */ | |
810 | accum_rtx_p = &XEXP (fma_rtx, 2); | |
811 | /* Accumulator is negated. */ | |
812 | if (!REG_P (*accum_rtx_p)) | |
813 | accum_rtx_p = &XEXP (*accum_rtx_p, 0); | |
814 | ||
815 | /* This du_chain structure is not for the accumulator register. */ | |
816 | if (accum_rtx_p != chain->loc) | |
817 | continue; | |
818 | ||
819 | /* If object already created, this is a loop carried dependency. We | |
820 | don't include this object in the children as we want trees for | |
821 | rename_fma_trees to not be an infinite loop. */ | |
822 | if (this->get_fma_node (chain->insn)) | |
823 | continue; | |
824 | ||
825 | child_fma = new fma_node (node, chain); | |
826 | ||
827 | /* Memorize the mapping of this instruction to its fma_node object | |
828 | as it will be processed for the chain starting at its destination | |
829 | register later. */ | |
830 | ||
831 | /* Link to siblings. */ | |
832 | node->add_child (child_fma); | |
833 | } | |
834 | } | |
835 | ||
836 | /* Perform a depth-first search of the forests of fma_node in | |
837 | THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in | |
838 | THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and | |
839 | PROCESS_NODE () on each node. If FREE is true, free all std::list in the | |
840 | same dfs. */ | |
841 | ||
842 | void | |
843 | func_fma_steering::dfs (void (*process_forest) (fma_forest *), | |
844 | void (*process_root) (fma_forest *, fma_root_node *), | |
845 | void (*process_node) (fma_forest *, fma_node *), | |
846 | bool free) | |
847 | { | |
848 | vec<fma_node *> to_process; | |
849 | std::list<fma_forest *>::iterator forest_iter; | |
850 | ||
851 | to_process.create (0); | |
852 | ||
853 | /* For each forest. */ | |
854 | for (forest_iter = this->m_fma_forests.begin (); | |
855 | forest_iter != this->m_fma_forests.end (); forest_iter++) | |
856 | { | |
857 | std::list<fma_root_node *>::iterator root_iter; | |
858 | ||
859 | if (process_forest) | |
860 | process_forest (*forest_iter); | |
861 | ||
862 | /* For each tree root in this forest. */ | |
863 | for (root_iter = (*forest_iter)->get_roots ()->begin (); | |
864 | root_iter != (*forest_iter)->get_roots ()->end (); root_iter++) | |
865 | { | |
866 | if (process_root) | |
867 | process_root (*forest_iter, *root_iter); | |
868 | to_process.safe_push (*root_iter); | |
869 | } | |
870 | ||
871 | /* For each tree node in this forest. */ | |
872 | while (!to_process.is_empty ()) | |
873 | { | |
874 | fma_node *node; | |
875 | std::list<fma_node *>::iterator child_iter; | |
876 | ||
877 | node = to_process.pop (); | |
878 | ||
879 | if (process_node) | |
880 | process_node (*forest_iter, node); | |
881 | ||
882 | /* Absence of children might indicate an alternate root of a *chain*. | |
883 | It's ok to skip it here as the chain will be renamed when | |
884 | processing the canonical root for that chain. */ | |
885 | if (node->get_children ()->empty ()) | |
886 | continue; | |
887 | ||
888 | for (child_iter = node->get_children ()->begin (); | |
889 | child_iter != node->get_children ()->end (); child_iter++) | |
890 | to_process.safe_push (*child_iter); | |
891 | if (free) | |
892 | { | |
893 | if (node->root_p ()) | |
894 | delete static_cast<fma_root_node *> (node); | |
895 | else | |
896 | delete node; | |
897 | } | |
898 | } | |
899 | if (free) | |
900 | delete *forest_iter; | |
901 | } | |
902 | ||
903 | to_process.release (); | |
904 | } | |
905 | ||
906 | /* Build the dependency trees of FMUL and FMADD/FMSUB instructions. */ | |
907 | ||
908 | void | |
909 | func_fma_steering::analyze () | |
910 | { | |
911 | int i, n_blocks, *bb_dfs_preorder; | |
912 | basic_block bb; | |
913 | rtx_insn *insn; | |
914 | ||
915 | bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun)); | |
916 | n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false); | |
917 | ||
918 | /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB | |
919 | instructions. */ | |
920 | for (i = 0; i < n_blocks; i++) | |
921 | { | |
922 | bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]); | |
923 | FOR_BB_INSNS (bb, insn) | |
924 | { | |
925 | operand_rr_info *dest_op_info; | |
20930113 | 926 | struct du_chain *chain = NULL; |
ee7ef7ab | 927 | unsigned dest_regno; |
20930113 | 928 | fma_forest *forest = NULL; |
929 | du_head_p head = NULL; | |
ee7ef7ab | 930 | int i; |
931 | ||
932 | if (!is_fmul_fmac_insn (insn, true)) | |
933 | continue; | |
934 | ||
935 | /* Search the chain where this instruction is (one of) the root. */ | |
936 | dest_op_info = insn_rr[INSN_UID (insn)].op_info; | |
937 | dest_regno = REGNO (SET_DEST (PATTERN (insn))); | |
938 | for (i = 0; i < dest_op_info->n_chains; i++) | |
939 | { | |
940 | /* The register tracked by this chain does not match the | |
941 | destination register of insn. */ | |
942 | if (dest_op_info->heads[i]->regno != dest_regno) | |
943 | continue; | |
944 | ||
945 | head = dest_op_info->heads[i]; | |
946 | /* The chain was merged in another, find the new head. */ | |
947 | if (!head->first) | |
948 | head = regrename_chain_from_id (head->id); | |
949 | ||
950 | /* Search the chain element for this instruction and, if another | |
951 | FMUL or FMADD/FMSUB instruction was already processed, note | |
952 | the forest of its tree. */ | |
953 | forest = NULL; | |
954 | for (chain = head->first; chain; chain = chain->next_use) | |
955 | { | |
956 | fma_node **fma_slot; | |
957 | ||
958 | if (!is_fmul_fmac_insn (chain->insn, true)) | |
959 | continue; | |
960 | ||
961 | /* This is a use, continue. */ | |
962 | if (chain->loc != &SET_DEST (PATTERN (chain->insn))) | |
963 | continue; | |
964 | ||
965 | if (chain->insn == insn) | |
966 | break; | |
967 | ||
968 | fma_slot = this->m_insn_fma_head_map->get (chain->insn); | |
969 | if (fma_slot && (*fma_slot)->get_children ()) | |
970 | forest = (*fma_slot)->get_forest (); | |
971 | } | |
972 | if (chain) | |
973 | break; | |
974 | } | |
975 | ||
976 | /* We didn't find a chain with a def for this instruction. */ | |
977 | gcc_assert (i < dest_op_info->n_chains); | |
978 | ||
979 | this->analyze_fma_fmul_insn (forest, chain, head); | |
980 | } | |
981 | } | |
982 | free (bb_dfs_preorder); | |
983 | ||
984 | if (dump_file) | |
985 | this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info, | |
986 | false); | |
987 | } | |
988 | ||
989 | /* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with | |
990 | the objective of keeping FPU pipeline balanced in term of instructions and | |
991 | having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be | |
992 | scheduled on the same pipeline. */ | |
993 | ||
994 | void | |
995 | func_fma_steering::rename_fma_trees () | |
996 | { | |
997 | this->dfs (dispatch_forest, NULL, rename_fma_node, true); | |
998 | ||
999 | if (dump_file && !this->m_fma_forests.empty ()) | |
1000 | { | |
1001 | fprintf (dump_file, "Function %s has ", current_function_name ()); | |
1002 | if (this->m_fpu_balance == 0) | |
1003 | fprintf (dump_file, "perfect balance of FMUL/FMA chains between the " | |
1004 | "two FPU pipelines\n"); | |
1005 | else if (this->m_fpu_balance > 0) | |
1006 | fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second " | |
1007 | "FPU pipeline\n", this->m_fpu_balance); | |
1008 | else /* this->m_fpu_balance < 0 */ | |
1009 | fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first " | |
1010 | "FPU pipeline\n", - this->m_fpu_balance); | |
1011 | } | |
1012 | } | |
1013 | ||
1014 | /* Execute FMA steering pass. */ | |
1015 | ||
1016 | void | |
1017 | func_fma_steering::execute_fma_steering () | |
1018 | { | |
1019 | df_set_flags (DF_LR_RUN_DCE); | |
1020 | df_note_add_problem (); | |
1021 | df_analyze (); | |
1022 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
1023 | ||
1024 | regrename_init (true); | |
1025 | regrename_analyze (NULL); | |
1026 | this->analyze (); | |
1027 | this->rename_fma_trees (); | |
1028 | regrename_finish (); | |
1029 | } | |
1030 | ||
1031 | const pass_data pass_data_fma_steering = | |
1032 | { | |
1033 | RTL_PASS, /* type */ | |
1034 | "fma_steering", /* name */ | |
1035 | OPTGROUP_NONE, /* optinfo_flags */ | |
1036 | TV_NONE, /* tv_id */ | |
1037 | 0, /* properties_required */ | |
1038 | 0, /* properties_provided */ | |
1039 | 0, /* properties_destroyed */ | |
1040 | 0, /* todo_flags_start */ | |
1041 | TODO_df_finish, /* todo_flags_finish */ | |
1042 | }; | |
1043 | ||
1044 | class pass_fma_steering : public rtl_opt_pass | |
1045 | { | |
1046 | public: | |
1047 | pass_fma_steering (gcc::context *ctxt) | |
1048 | : rtl_opt_pass (pass_data_fma_steering, ctxt) | |
1049 | {} | |
1050 | ||
1051 | /* opt_pass methods: */ | |
1052 | virtual bool gate (function *) | |
1053 | { | |
14677da9 | 1054 | return (aarch64_tune_params.extra_tuning_flags |
5e9fcc70 | 1055 | & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) |
1056 | && optimize >= 2; | |
ee7ef7ab | 1057 | } |
1058 | ||
1059 | virtual unsigned int execute (function *) | |
1060 | { | |
1061 | func_fma_steering *fma_steering = new func_fma_steering; | |
1062 | fma_steering->execute_fma_steering (); | |
1063 | delete fma_steering; | |
1064 | return 0; | |
1065 | } | |
1066 | ||
1067 | }; // class pass_fma_steering | |
1068 | ||
1069 | /* Create a new fma steering pass instance. */ | |
1070 | ||
e8ebfb45 | 1071 | rtl_opt_pass * |
ee7ef7ab | 1072 | make_pass_fma_steering (gcc::context *ctxt) |
1073 | { | |
1074 | return new pass_fma_steering (ctxt); | |
1075 | } |