]>
Commit | Line | Data |
---|---|---|
fde9b31b | 1 | /* FMA steering optimization pass for Cortex-A57. |
85ec4feb | 2 | Copyright (C) 2015-2018 Free Software Foundation, Inc. |
fde9b31b TP |
3 | Contributed by ARM Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
8fcc61f8 RS |
21 | #define IN_TARGET_CODE 1 |
22 | ||
fde9b31b | 23 | #include "config.h" |
2c384ad8 | 24 | #define INCLUDE_LIST |
fde9b31b TP |
25 | #include "system.h" |
26 | #include "coretypes.h" | |
c7131fb2 | 27 | #include "backend.h" |
e11c4407 | 28 | #include "target.h" |
c7131fb2 AM |
29 | #include "rtl.h" |
30 | #include "df.h" | |
fde9b31b | 31 | #include "insn-config.h" |
e11c4407 | 32 | #include "regs.h" |
4d0cdd0c | 33 | #include "memmodel.h" |
e11c4407 AM |
34 | #include "emit-rtl.h" |
35 | #include "recog.h" | |
fde9b31b | 36 | #include "cfganal.h" |
fde9b31b | 37 | #include "insn-attr.h" |
fde9b31b TP |
38 | #include "context.h" |
39 | #include "tree-pass.h" | |
40 | #include "regrename.h" | |
dfba575f | 41 | #include "aarch64-protos.h" |
fde9b31b | 42 | |
fde9b31b TP |
43 | /* For better performance, the destination of FMADD/FMSUB instructions should |
44 | have the same parity as their accumulator register if the accumulator | |
45 | contains the result of a previous FMUL or FMADD/FMSUB instruction if | |
46 | targetting Cortex-A57 processors. Performance is also increased by | |
47 | otherwise keeping a good balance in the parity of the destination register | |
48 | of FMUL or FMADD/FMSUB. | |
49 | ||
50 | This pass ensure that registers are renamed so that these conditions hold. | |
51 | We reuse the existing register renaming facility from regrename.c to build | |
52 | dependency chains and expose candidate registers for renaming. | |
53 | ||
54 | ||
55 | The algorithm has three steps: | |
56 | ||
57 | First, the functions of the register renaming pass are called. These | |
58 | analyze the instructions and produce a list of def/use chains of | |
59 | instructions. | |
60 | ||
61 | Next, this information is used to build trees of multiply and | |
62 | multiply-accumulate instructions. The roots of these trees are any | |
63 | multiply, or any multiply-accumulate whose accumulator is not dependent on | |
64 | a multiply or multiply-accumulate instruction. A child is added to the | |
65 | tree where a dependency chain exists between the result of the parent | |
66 | instruction and the accumulator operand of the child, as in the diagram | |
67 | below: | |
68 | ||
69 | fmul s2, s0, s1 | |
70 | / \ | |
71 | fmadd s0, s1, s1, s2 fmadd s4, s1, s1 s2 | |
72 | | | |
73 | fmadd s3, s1, s1, s0 | |
74 | ||
75 | Trees made of a single instruction are permitted. | |
76 | ||
77 | Finally, renaming is performed. The parity of the destination register at | |
78 | the root of a tree is checked against the current balance of multiply and | |
79 | multiply-accumulate on each pipeline. If necessary, the root of a tree is | |
80 | renamed, in which case the rest of the tree is then renamed to keep the same | |
81 | parity in the destination registers of all instructions in the tree. */ | |
82 | ||
83 | ||
84 | ||
85 | /* Forward declarations. */ | |
86 | class fma_node; | |
87 | class fma_root_node; | |
88 | class func_fma_steering; | |
89 | ||
90 | /* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent | |
91 | FMADD/FMSUB instructions form a graph. This is because alternatives can | |
92 | make a register be set by several FMUL or FMADD/FMSUB instructions in | |
93 | different basic blocks and because of loops. For ease of browsing, the | |
94 | connected components of this graph are broken up into forests of trees. | |
95 | Forests are represented by fma_forest objects, contained in the fma_forests | |
96 | list. Using a separate object for the forests allows for a better use of | |
97 | memory as there is some information that is global to each forest, such as | |
98 | the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each | |
99 | floating-point execution pipelines. */ | |
100 | ||
101 | class fma_forest | |
102 | { | |
103 | public: | |
104 | fma_forest (func_fma_steering *, fma_root_node *, int); | |
105 | ~fma_forest (); | |
106 | ||
107 | int get_id (); | |
108 | std::list<fma_root_node *> *get_roots (); | |
109 | func_fma_steering *get_globals (); | |
110 | int get_target_parity (); | |
111 | void fma_node_created (fma_node *); | |
112 | void merge_forest (fma_forest *); | |
113 | void dump_info (); | |
114 | void dispatch (); | |
115 | ||
116 | private: | |
117 | /* The list of roots that form this forest. */ | |
118 | std::list<fma_root_node *> *m_roots; | |
119 | ||
120 | /* Target parity the destination register of all FMUL and FMADD/FMSUB | |
121 | instructions in this forest should have. */ | |
122 | int m_target_parity; | |
123 | ||
124 | /* Link to the instance of func_fma_steering holding data related to the | |
125 | FMA steering of the current function (cfun). */ | |
126 | func_fma_steering *m_globals; | |
127 | ||
128 | /* Identifier for the forest (used for dumps). */ | |
129 | int m_id; | |
130 | ||
131 | /* Total number of nodes in the forest (for statistics). */ | |
132 | int m_nb_nodes; | |
133 | }; | |
134 | ||
135 | class fma_node | |
136 | { | |
137 | public: | |
138 | fma_node (fma_node *parent, du_chain *chain); | |
139 | ~fma_node (); | |
140 | ||
141 | bool root_p (); | |
142 | fma_forest *get_forest (); | |
143 | std::list<fma_node *> *get_children (); | |
144 | rtx_insn *get_insn (); | |
145 | void add_child (fma_node *); | |
146 | int get_parity (); | |
147 | void set_head (du_head *); | |
148 | void rename (fma_forest *); | |
149 | void dump_info (fma_forest *); | |
150 | ||
151 | protected: | |
152 | /* Root node that lead to this node. */ | |
153 | fma_root_node *m_root; | |
154 | ||
155 | /* The parent node of this node. If the node belong to a chain with several | |
156 | parent nodes, the first one encountered in a depth-first search is chosen | |
157 | as canonical parent. */ | |
158 | fma_node *m_parent; | |
159 | ||
160 | /* The list of child nodes. If a chain contains several parent nodes, one is | |
161 | chosen as canonical parent and the others will have no children. */ | |
162 | std::list<fma_node *> *m_children; | |
163 | ||
164 | /* The associated DU_HEAD chain that the insn represented by this object | |
165 | is (one of) the root of. When a chain contains several roots, the non | |
166 | canonical ones have this field set to NULL. */ | |
167 | struct du_head *m_head; | |
168 | ||
169 | /* The FMUL or FMADD/FMSUB instruction this object corresponds to. */ | |
170 | rtx_insn *m_insn; | |
171 | }; | |
172 | ||
173 | class fma_root_node : public fma_node | |
174 | { | |
175 | public: | |
176 | fma_root_node (func_fma_steering *, du_chain *, int); | |
177 | ||
178 | fma_forest *get_forest (); | |
179 | void set_forest (fma_forest *); | |
180 | void dump_info (fma_forest *); | |
181 | ||
182 | private: | |
183 | /* The forest this node belonged to when it was created. */ | |
184 | fma_forest *m_forest; | |
185 | }; | |
186 | ||
187 | /* Class holding all data and methods relative to the FMA steering of a given | |
188 | function. The FMA steering pass could then run in parallel for different | |
189 | functions. */ | |
190 | ||
191 | class func_fma_steering | |
192 | { | |
193 | public: | |
194 | func_fma_steering (); | |
195 | ~func_fma_steering (); | |
196 | ||
197 | int get_fpu_balance (); | |
198 | void remove_forest (fma_forest *); | |
199 | bool put_node (fma_node *); | |
200 | void update_balance (int); | |
201 | fma_node *get_fma_node (rtx_insn *); | |
202 | void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p); | |
203 | void execute_fma_steering (); | |
204 | ||
205 | private: | |
206 | void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *), | |
207 | void (*) (fma_forest *, fma_node *), bool); | |
208 | void analyze (); | |
209 | void rename_fma_trees (); | |
210 | ||
211 | /* Mapping between FMUL or FMADD/FMSUB instructions and the associated | |
212 | fma_node object. Used when analyzing an instruction that is a root of | |
213 | a chain to find if such an object was created because this instruction | |
214 | is also a use in another chain. */ | |
215 | hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map; | |
216 | ||
217 | /* A list of all the forests in a given function. */ | |
218 | std::list<fma_forest *> m_fma_forests; | |
219 | ||
220 | /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU | |
221 | pipelines: | |
222 | < 0: more instruction dispatched to the first pipeline | |
223 | == 0: perfect balance | |
224 | > 0: more instruction dispatched to the second pipeline. */ | |
225 | int m_fpu_balance; | |
226 | ||
227 | /* Identifier for the next forest created. */ | |
228 | int m_next_forest_id; | |
229 | }; | |
230 | ||
231 | /* Rename the register HEAD->regno in all the insns in the chain HEAD to any | |
232 | register not in the set UNAVAILABLE. Adapted from rename_chains in | |
233 | regrename.c. */ | |
234 | ||
235 | static bool | |
236 | rename_single_chain (du_head_p head, HARD_REG_SET *unavailable) | |
237 | { | |
238 | int best_new_reg; | |
239 | int n_uses = 0; | |
240 | struct du_chain *tmp; | |
241 | int reg = head->regno; | |
242 | enum reg_class super_class = NO_REGS; | |
243 | ||
244 | if (head->cannot_rename) | |
245 | return false; | |
246 | ||
247 | if (fixed_regs[reg] || global_regs[reg] | |
248 | || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM)) | |
249 | return false; | |
250 | ||
251 | /* Iterate over elements in the chain in order to: | |
252 | 1. Count number of uses, and narrow the set of registers we can | |
253 | use for renaming. | |
254 | 2. Compute the superunion of register classes in this chain. */ | |
255 | for (tmp = head->first; tmp; tmp = tmp->next_use) | |
256 | { | |
257 | if (DEBUG_INSN_P (tmp->insn)) | |
258 | continue; | |
259 | n_uses++; | |
260 | IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]); | |
261 | super_class = reg_class_superunion[(int) super_class][(int) tmp->cl]; | |
262 | } | |
263 | ||
264 | if (n_uses < 1) | |
265 | return false; | |
266 | ||
267 | best_new_reg = find_rename_reg (head, super_class, unavailable, reg, | |
268 | false); | |
269 | ||
270 | if (dump_file) | |
271 | { | |
272 | fprintf (dump_file, "Register %s in insn %d", reg_names[reg], | |
273 | INSN_UID (head->first->insn)); | |
274 | if (head->need_caller_save_reg) | |
275 | fprintf (dump_file, " crosses a call"); | |
276 | } | |
277 | ||
278 | if (best_new_reg == reg) | |
279 | { | |
280 | if (dump_file) | |
281 | fprintf (dump_file, "; no available better choice\n"); | |
282 | return false; | |
283 | } | |
284 | ||
17369fbf CLT |
285 | if (regrename_do_replace (head, best_new_reg)) |
286 | { | |
287 | if (dump_file) | |
288 | fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]); | |
289 | df_set_regs_ever_live (best_new_reg, true); | |
290 | } | |
291 | else | |
292 | { | |
293 | if (dump_file) | |
294 | fprintf (dump_file, ", renaming as %s failed\n", | |
295 | reg_names[best_new_reg]); | |
296 | return false; | |
297 | } | |
fde9b31b TP |
298 | return true; |
299 | } | |
300 | ||
301 | /* Return whether T is the attribute of a FMADD/FMSUB-like instruction. */ | |
302 | ||
303 | static bool | |
304 | is_fmac_op (enum attr_type t) | |
305 | { | |
306 | return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S); | |
307 | } | |
308 | ||
309 | /* Return whether T is the attribute of a FMUL instruction. */ | |
310 | ||
311 | static bool | |
312 | is_fmul_op (enum attr_type t) | |
313 | { | |
314 | return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S); | |
315 | } | |
316 | ||
317 | /* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB | |
318 | instruction. */ | |
319 | ||
320 | static bool | |
321 | is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok) | |
322 | { | |
323 | enum attr_type t; | |
324 | ||
325 | if (!NONDEBUG_INSN_P (insn)) | |
326 | return false; | |
327 | ||
328 | if (recog_memoized (insn) < 0) | |
329 | return false; | |
330 | ||
331 | /* Only consider chain(s) this instruction is a root of if this is an FMUL or | |
332 | FMADD/FMSUB instruction. This allows to avoid browsing chains of all | |
333 | instructions for FMUL or FMADD/FMSUB in them. */ | |
334 | t = get_attr_type (insn); | |
335 | return is_fmac_op (t) || (fmul_ok && is_fmul_op (t)); | |
336 | } | |
337 | ||
338 | ||
339 | /* | |
340 | * Class fma_forest method definitions. | |
341 | */ | |
342 | ||
343 | fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root, | |
344 | int id) | |
345 | { | |
346 | memset (this, 0, sizeof (*this)); | |
347 | this->m_globals = fma_steer; | |
348 | this->m_roots = new std::list<fma_root_node *>; | |
349 | this->m_roots->push_back (fma_root); | |
350 | this->m_id = id; | |
351 | } | |
352 | ||
353 | fma_forest::~fma_forest () | |
354 | { | |
355 | delete this->m_roots; | |
356 | } | |
357 | ||
358 | int | |
359 | fma_forest::get_id () | |
360 | { | |
361 | return this->m_id; | |
362 | } | |
363 | ||
364 | std::list<fma_root_node *> * | |
365 | fma_forest::get_roots () | |
366 | { | |
367 | return this->m_roots; | |
368 | } | |
369 | ||
370 | func_fma_steering * | |
371 | fma_forest::get_globals () | |
372 | { | |
373 | return this->m_globals; | |
374 | } | |
375 | ||
376 | int | |
377 | fma_forest::get_target_parity () | |
378 | { | |
379 | return this->m_target_parity; | |
380 | } | |
381 | ||
382 | /* Act on the creation of NODE by updating statistics in FOREST and adding an | |
383 | entry for it in the func_fma_steering hashmap. */ | |
384 | ||
385 | void fma_forest::fma_node_created (fma_node *node) | |
386 | { | |
387 | bool created = !this->m_globals->put_node (node); | |
388 | ||
389 | gcc_assert (created); | |
390 | this->m_nb_nodes++; | |
391 | } | |
392 | ||
393 | /* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical | |
394 | fma_forest object to represent both. */ | |
395 | ||
396 | void | |
397 | fma_forest::merge_forest (fma_forest *other_forest) | |
398 | { | |
399 | std::list<fma_root_node *> *other_roots; | |
400 | std::list<fma_root_node *>::iterator other_root_iter; | |
401 | ||
402 | if (this == other_forest) | |
403 | return; | |
404 | ||
405 | other_roots = other_forest->m_roots; | |
406 | ||
407 | /* Update root nodes' pointer to forest. */ | |
408 | for (other_root_iter = other_roots->begin (); | |
409 | other_root_iter != other_roots->end (); other_root_iter++) | |
410 | (*other_root_iter)->set_forest (this); | |
411 | ||
412 | /* Remove other_forest from the list of forests and move its tree roots in | |
413 | the list of tree roots of ref_forest. */ | |
414 | this->m_globals->remove_forest (other_forest); | |
415 | this->m_roots->splice (this->m_roots->begin (), *other_roots); | |
fde9b31b | 416 | this->m_nb_nodes += other_forest->m_nb_nodes; |
33e2d6f8 WD |
417 | |
418 | delete other_forest; | |
fde9b31b TP |
419 | } |
420 | ||
421 | /* Dump information about the forest FOREST. */ | |
422 | ||
423 | void | |
424 | fma_forest::dump_info () | |
425 | { | |
426 | gcc_assert (dump_file); | |
427 | ||
428 | fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id, | |
429 | this->m_nb_nodes); | |
430 | } | |
431 | ||
432 | /* Wrapper around fma_forest::dump_info for use as parameter of function | |
433 | pointer type in func_fma_steering::dfs. */ | |
434 | ||
435 | static void | |
436 | dump_forest_info (fma_forest *forest) | |
437 | { | |
438 | forest->dump_info (); | |
439 | } | |
440 | ||
441 | /* Dispatch forest to the least utilized pipeline. */ | |
442 | ||
443 | void | |
444 | fma_forest::dispatch () | |
445 | { | |
446 | this->m_target_parity = this->m_roots->front ()->get_parity (); | |
447 | int fpu_balance = this->m_globals->get_fpu_balance (); | |
448 | if (fpu_balance != 0) | |
449 | this->m_target_parity = (fpu_balance < 0); | |
450 | ||
451 | if (dump_file) | |
452 | fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id, | |
453 | this->m_target_parity ? "odd" : "even"); | |
454 | } | |
455 | ||
456 | /* Wrapper around fma_forest::dispatch for use as parameter of function pointer | |
457 | type in func_fma_steering::dfs. */ | |
458 | ||
459 | static void | |
460 | dispatch_forest (fma_forest *forest) | |
461 | { | |
462 | forest->dispatch (); | |
463 | } | |
464 | ||
465 | fma_node::fma_node (fma_node *parent, du_chain *chain) | |
466 | { | |
467 | memset (this, 0, sizeof (*this)); | |
468 | this->m_parent = parent; | |
469 | this->m_children = new std::list<fma_node *>; | |
470 | this->m_insn = chain->insn; | |
471 | /* root_p () cannot be used to check for root before root is set. */ | |
472 | if (this->m_parent == this) | |
473 | this->m_root = static_cast<fma_root_node *> (parent); | |
474 | else | |
475 | { | |
476 | this->m_root = parent->m_root; | |
477 | this->get_forest ()->fma_node_created (this); | |
478 | } | |
479 | } | |
480 | ||
481 | fma_node::~fma_node () | |
482 | { | |
483 | delete this->m_children; | |
484 | } | |
485 | ||
486 | std::list<fma_node *> * | |
487 | fma_node::get_children () | |
488 | { | |
489 | return this->m_children; | |
490 | } | |
491 | ||
492 | rtx_insn * | |
493 | fma_node::get_insn () | |
494 | { | |
495 | return this->m_insn; | |
496 | } | |
497 | ||
498 | void | |
499 | fma_node::set_head (du_head *head) | |
500 | { | |
501 | gcc_assert (!this->m_head); | |
502 | this->m_head = head; | |
503 | } | |
504 | ||
505 | /* Add a child to this node in the list of children. */ | |
506 | ||
507 | void | |
508 | fma_node::add_child (fma_node *child) | |
509 | { | |
510 | this->m_children->push_back (child); | |
511 | } | |
512 | ||
513 | /* Return the parity of the destination register of the instruction represented | |
514 | by this node. */ | |
515 | ||
516 | int | |
517 | fma_node::get_parity () | |
518 | { | |
519 | return this->m_head->regno % 2; | |
520 | } | |
521 | ||
522 | /* Get the actual forest associated with a non root node as the one the node | |
523 | points to might have been merged into another one. In that case the pointer | |
524 | in the root nodes are updated so we return the forest pointer of a root node | |
525 | pointed to by the initial forest. Despite being a oneliner, this method is | |
526 | defined here as it references a method from fma_root_node. */ | |
527 | ||
528 | fma_forest * | |
529 | fma_node::get_forest () | |
530 | { | |
531 | return this->m_root->get_forest (); | |
532 | } | |
533 | ||
534 | /* Return whether a node is a root node. */ | |
535 | ||
536 | bool | |
537 | fma_node::root_p () | |
538 | { | |
539 | return this->m_root == this; | |
540 | } | |
541 | ||
542 | /* Dump information about the children of node FMA_NODE in forest FOREST. */ | |
543 | ||
544 | void | |
545 | fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest) | |
546 | { | |
547 | struct du_chain *chain; | |
548 | std::list<fma_node *>::iterator fma_child; | |
549 | ||
550 | gcc_assert (dump_file); | |
551 | ||
552 | if (this->get_children ()->empty ()) | |
553 | return; | |
554 | ||
555 | fprintf (dump_file, "Instruction(s)"); | |
556 | for (chain = this->m_head->first; chain; chain = chain->next_use) | |
557 | { | |
558 | if (!is_fmul_fmac_insn (chain->insn, true)) | |
559 | continue; | |
560 | ||
561 | if (chain->loc != &SET_DEST (PATTERN (chain->insn))) | |
562 | continue; | |
563 | ||
564 | fprintf (dump_file, " %d", INSN_UID (chain->insn)); | |
565 | } | |
566 | ||
567 | fprintf (dump_file, " is(are) accumulator dependency of instructions"); | |
568 | for (fma_child = this->get_children ()->begin (); | |
569 | fma_child != this->get_children ()->end (); fma_child++) | |
570 | fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn)); | |
571 | fprintf (dump_file, "\n"); | |
572 | } | |
573 | ||
574 | /* Wrapper around fma_node::dump_info for use as parameter of function pointer | |
575 | type in func_fma_steering::dfs. */ | |
576 | ||
577 | static void | |
578 | dump_tree_node_info (fma_forest *forest, fma_node *node) | |
579 | { | |
580 | node->dump_info (forest); | |
581 | } | |
582 | ||
583 | /* Rename the destination register of a single FMUL or FMADD/FMSUB instruction | |
584 | represented by FMA_NODE to a register that respect the target parity for | |
585 | FOREST or with same parity of the instruction represented by its parent node | |
586 | if it has one. */ | |
587 | ||
588 | void | |
589 | fma_node::rename (fma_forest *forest) | |
590 | { | |
591 | int cur_parity, target_parity; | |
592 | ||
593 | /* This is alternate root of a chain and thus has no children. It will be | |
594 | renamed when processing the canonical root for that chain. */ | |
595 | if (!this->m_head) | |
596 | return; | |
597 | ||
598 | target_parity = forest->get_target_parity (); | |
599 | if (this->m_parent) | |
600 | target_parity = this->m_parent->get_parity (); | |
601 | cur_parity = this->get_parity (); | |
602 | ||
603 | /* Rename if parity differs. */ | |
604 | if (cur_parity != target_parity) | |
605 | { | |
606 | rtx_insn *insn = this->m_insn; | |
607 | HARD_REG_SET unavailable; | |
b8506a8a | 608 | machine_mode mode; |
fde9b31b TP |
609 | int reg; |
610 | ||
611 | if (dump_file) | |
612 | { | |
613 | unsigned cur_dest_reg = this->m_head->regno; | |
614 | ||
615 | fprintf (dump_file, "FMA or FMUL at insn %d but destination " | |
616 | "register (%s) has different parity from expected to " | |
617 | "maximize FPU pipeline utilization\n", INSN_UID (insn), | |
618 | reg_names[cur_dest_reg]); | |
619 | } | |
620 | ||
621 | /* Don't clobber traceback for noreturn functions. */ | |
622 | CLEAR_HARD_REG_SET (unavailable); | |
623 | if (frame_pointer_needed) | |
624 | { | |
625 | add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM); | |
626 | add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM); | |
627 | } | |
628 | ||
629 | /* Exclude registers with wrong parity. */ | |
630 | mode = GET_MODE (SET_DEST (PATTERN (insn))); | |
631 | for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2) | |
632 | add_to_hard_reg_set (&unavailable, mode, reg); | |
633 | ||
634 | if (!rename_single_chain (this->m_head, &unavailable)) | |
635 | { | |
636 | if (dump_file) | |
637 | fprintf (dump_file, "Destination register of insn %d could not be " | |
638 | "renamed. Dependent FMA insns will use this parity from " | |
639 | "there on.\n", INSN_UID (insn)); | |
640 | } | |
641 | else | |
642 | cur_parity = target_parity; | |
643 | } | |
644 | ||
645 | forest->get_globals ()->update_balance (cur_parity); | |
646 | } | |
647 | ||
648 | /* Wrapper around fma_node::dump_info for use as parameter of function pointer | |
649 | type in func_fma_steering::dfs. */ | |
650 | ||
651 | static void | |
652 | rename_fma_node (fma_forest *forest, fma_node *node) | |
653 | { | |
654 | node->rename (forest); | |
655 | } | |
656 | ||
657 | fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain, | |
658 | int id) : fma_node (this, chain) | |
659 | { | |
660 | this->m_forest = new fma_forest (globals, this, id); | |
661 | this->m_forest->fma_node_created (this); | |
662 | } | |
663 | ||
664 | fma_forest * | |
665 | fma_root_node::get_forest () | |
666 | { | |
667 | return this->m_forest; | |
668 | } | |
669 | ||
670 | void | |
671 | fma_root_node::set_forest (fma_forest *ref_forest) | |
672 | { | |
673 | this->m_forest = ref_forest; | |
674 | } | |
675 | ||
676 | /* Dump information about the roots of forest FOREST. */ | |
677 | ||
678 | void | |
679 | fma_root_node::dump_info (fma_forest *forest) | |
680 | { | |
681 | gcc_assert (dump_file); | |
682 | ||
683 | if (this == forest->get_roots ()->front ()) | |
684 | fprintf (dump_file, "Instruction(s) at root of forest #%d:", | |
685 | forest->get_id ()); | |
686 | fprintf (dump_file, " %d", INSN_UID (this->m_insn)); | |
687 | if (this == forest->get_roots ()->back ()) | |
688 | fprintf (dump_file, "\n"); | |
689 | } | |
690 | ||
691 | /* Wrapper around fma_root_node::dump_info for use as parameter of function | |
692 | pointer type in func_fma_steering::dfs. */ | |
693 | ||
694 | static void | |
695 | dump_tree_root_info (fma_forest *forest, fma_root_node *node) | |
696 | { | |
697 | node->dump_info (forest); | |
698 | } | |
699 | ||
700 | func_fma_steering::func_fma_steering () : m_fpu_balance (0) | |
701 | { | |
702 | this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>; | |
703 | this->m_fma_forests.clear (); | |
704 | this->m_next_forest_id = 0; | |
705 | } | |
706 | ||
707 | func_fma_steering::~func_fma_steering () | |
708 | { | |
709 | delete this->m_insn_fma_head_map; | |
710 | } | |
711 | ||
712 | int | |
713 | func_fma_steering::get_fpu_balance () | |
714 | { | |
715 | return this->m_fpu_balance; | |
716 | } | |
717 | ||
718 | void | |
719 | func_fma_steering::remove_forest (fma_forest *forest) | |
720 | { | |
721 | this->m_fma_forests.remove (forest); | |
722 | } | |
723 | ||
724 | /* Memorize the mapping of this instruction to its fma_node object and return | |
725 | whether such a mapping existed. */ | |
726 | ||
727 | bool | |
728 | func_fma_steering::put_node (fma_node *node) | |
729 | { | |
730 | return this->m_insn_fma_head_map->put (node->get_insn (), node); | |
731 | } | |
732 | ||
733 | /* Update the current balance considering a node with the given PARITY. */ | |
734 | ||
735 | void | |
736 | func_fma_steering::update_balance (int parity) | |
737 | { | |
738 | this->m_fpu_balance = parity ? this->m_fpu_balance + 1 | |
739 | : this->m_fpu_balance - 1; | |
740 | } | |
741 | ||
742 | /* Return whether an fma_node object exists for instruction INSN and, if not, | |
743 | allocate one in *RET. */ | |
744 | ||
745 | fma_node * | |
746 | func_fma_steering::get_fma_node (rtx_insn *insn) | |
747 | { | |
748 | fma_node **fma_slot; | |
749 | ||
750 | fma_slot = this->m_insn_fma_head_map->get (insn); | |
751 | if (fma_slot) | |
752 | return *fma_slot; | |
753 | return NULL; | |
754 | } | |
755 | ||
756 | /* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB | |
757 | instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all | |
758 | part of FOREST. For the children, the associated head is left untouched | |
759 | (and thus null) as this function will be called again when considering the | |
760 | chain where they are def. For the parent, the chain is given in HEAD. */ | |
761 | ||
762 | void | |
763 | func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest, | |
764 | du_chain *chain, du_head_p head) | |
765 | { | |
766 | fma_forest *forest; | |
767 | fma_node *node = this->get_fma_node (chain->insn); | |
768 | ||
769 | /* This is a root node. */ | |
770 | if (!node) | |
771 | { | |
772 | fma_root_node *root_node; | |
773 | ||
774 | root_node = new fma_root_node (this, chain, this->m_next_forest_id++); | |
775 | forest = root_node->get_forest (); | |
776 | node = root_node; | |
777 | ||
778 | /* Until proved otherwise, assume this root is not part of an existing | |
779 | forest and thus add its forest to the list of forests. */ | |
780 | this->m_fma_forests.push_back (forest); | |
781 | } | |
782 | else | |
783 | forest = node->get_forest (); | |
784 | ||
785 | node->set_head (head); | |
786 | ||
787 | /* fma_node is part of a chain with several defs, one of them having already | |
788 | been processed. The root of that already processed def is the canonical | |
789 | one and the root of fma_node is added to its forest. No need to process | |
790 | the children nodes as they were already processed when the other def was | |
791 | processed. */ | |
792 | if (ref_forest) | |
793 | { | |
794 | ref_forest->merge_forest (forest); | |
795 | return; | |
796 | } | |
797 | ||
798 | for (chain = head->first; chain; chain = chain->next_use) | |
799 | { | |
800 | fma_node *child_fma; | |
801 | rtx fma_rtx, *accum_rtx_p; | |
802 | ||
803 | if (!is_fmul_fmac_insn (chain->insn, false)) | |
804 | continue; | |
805 | ||
806 | /* Get FMA rtx. */ | |
807 | fma_rtx = SET_SRC (PATTERN (chain->insn)); | |
808 | /* FMA is negated. */ | |
809 | if (GET_CODE (fma_rtx) == NEG) | |
810 | fma_rtx = XEXP (fma_rtx, 0); | |
811 | /* Get accumulator rtx. */ | |
812 | accum_rtx_p = &XEXP (fma_rtx, 2); | |
813 | /* Accumulator is negated. */ | |
814 | if (!REG_P (*accum_rtx_p)) | |
815 | accum_rtx_p = &XEXP (*accum_rtx_p, 0); | |
816 | ||
817 | /* This du_chain structure is not for the accumulator register. */ | |
818 | if (accum_rtx_p != chain->loc) | |
819 | continue; | |
820 | ||
821 | /* If object already created, this is a loop carried dependency. We | |
822 | don't include this object in the children as we want trees for | |
823 | rename_fma_trees to not be an infinite loop. */ | |
824 | if (this->get_fma_node (chain->insn)) | |
825 | continue; | |
826 | ||
827 | child_fma = new fma_node (node, chain); | |
828 | ||
829 | /* Memorize the mapping of this instruction to its fma_node object | |
830 | as it will be processed for the chain starting at its destination | |
831 | register later. */ | |
832 | ||
833 | /* Link to siblings. */ | |
834 | node->add_child (child_fma); | |
835 | } | |
836 | } | |
837 | ||
838 | /* Perform a depth-first search of the forests of fma_node in | |
839 | THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in | |
840 | THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and | |
841 | PROCESS_NODE () on each node. If FREE is true, free all std::list in the | |
842 | same dfs. */ | |
843 | ||
844 | void | |
845 | func_fma_steering::dfs (void (*process_forest) (fma_forest *), | |
846 | void (*process_root) (fma_forest *, fma_root_node *), | |
847 | void (*process_node) (fma_forest *, fma_node *), | |
848 | bool free) | |
849 | { | |
850 | vec<fma_node *> to_process; | |
851 | std::list<fma_forest *>::iterator forest_iter; | |
852 | ||
853 | to_process.create (0); | |
854 | ||
855 | /* For each forest. */ | |
856 | for (forest_iter = this->m_fma_forests.begin (); | |
857 | forest_iter != this->m_fma_forests.end (); forest_iter++) | |
858 | { | |
859 | std::list<fma_root_node *>::iterator root_iter; | |
860 | ||
861 | if (process_forest) | |
862 | process_forest (*forest_iter); | |
863 | ||
864 | /* For each tree root in this forest. */ | |
865 | for (root_iter = (*forest_iter)->get_roots ()->begin (); | |
866 | root_iter != (*forest_iter)->get_roots ()->end (); root_iter++) | |
867 | { | |
868 | if (process_root) | |
869 | process_root (*forest_iter, *root_iter); | |
870 | to_process.safe_push (*root_iter); | |
871 | } | |
872 | ||
873 | /* For each tree node in this forest. */ | |
874 | while (!to_process.is_empty ()) | |
875 | { | |
876 | fma_node *node; | |
877 | std::list<fma_node *>::iterator child_iter; | |
878 | ||
879 | node = to_process.pop (); | |
880 | ||
881 | if (process_node) | |
882 | process_node (*forest_iter, node); | |
883 | ||
884 | /* Absence of children might indicate an alternate root of a *chain*. | |
885 | It's ok to skip it here as the chain will be renamed when | |
886 | processing the canonical root for that chain. */ | |
887 | if (node->get_children ()->empty ()) | |
888 | continue; | |
889 | ||
890 | for (child_iter = node->get_children ()->begin (); | |
891 | child_iter != node->get_children ()->end (); child_iter++) | |
892 | to_process.safe_push (*child_iter); | |
893 | if (free) | |
894 | { | |
895 | if (node->root_p ()) | |
896 | delete static_cast<fma_root_node *> (node); | |
897 | else | |
898 | delete node; | |
899 | } | |
900 | } | |
901 | if (free) | |
902 | delete *forest_iter; | |
903 | } | |
904 | ||
905 | to_process.release (); | |
906 | } | |
907 | ||
908 | /* Build the dependency trees of FMUL and FMADD/FMSUB instructions. */ | |
909 | ||
910 | void | |
911 | func_fma_steering::analyze () | |
912 | { | |
913 | int i, n_blocks, *bb_dfs_preorder; | |
914 | basic_block bb; | |
915 | rtx_insn *insn; | |
916 | ||
917 | bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun)); | |
918 | n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false); | |
919 | ||
920 | /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB | |
921 | instructions. */ | |
922 | for (i = 0; i < n_blocks; i++) | |
923 | { | |
924 | bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]); | |
925 | FOR_BB_INSNS (bb, insn) | |
926 | { | |
927 | operand_rr_info *dest_op_info; | |
8fd23c8e | 928 | struct du_chain *chain = NULL; |
fde9b31b | 929 | unsigned dest_regno; |
8fd23c8e ML |
930 | fma_forest *forest = NULL; |
931 | du_head_p head = NULL; | |
fde9b31b TP |
932 | int i; |
933 | ||
934 | if (!is_fmul_fmac_insn (insn, true)) | |
935 | continue; | |
936 | ||
937 | /* Search the chain where this instruction is (one of) the root. */ | |
938 | dest_op_info = insn_rr[INSN_UID (insn)].op_info; | |
939 | dest_regno = REGNO (SET_DEST (PATTERN (insn))); | |
940 | for (i = 0; i < dest_op_info->n_chains; i++) | |
941 | { | |
942 | /* The register tracked by this chain does not match the | |
943 | destination register of insn. */ | |
944 | if (dest_op_info->heads[i]->regno != dest_regno) | |
945 | continue; | |
946 | ||
947 | head = dest_op_info->heads[i]; | |
948 | /* The chain was merged in another, find the new head. */ | |
949 | if (!head->first) | |
950 | head = regrename_chain_from_id (head->id); | |
951 | ||
952 | /* Search the chain element for this instruction and, if another | |
953 | FMUL or FMADD/FMSUB instruction was already processed, note | |
954 | the forest of its tree. */ | |
955 | forest = NULL; | |
956 | for (chain = head->first; chain; chain = chain->next_use) | |
957 | { | |
958 | fma_node **fma_slot; | |
959 | ||
960 | if (!is_fmul_fmac_insn (chain->insn, true)) | |
961 | continue; | |
962 | ||
963 | /* This is a use, continue. */ | |
964 | if (chain->loc != &SET_DEST (PATTERN (chain->insn))) | |
965 | continue; | |
966 | ||
967 | if (chain->insn == insn) | |
968 | break; | |
969 | ||
970 | fma_slot = this->m_insn_fma_head_map->get (chain->insn); | |
971 | if (fma_slot && (*fma_slot)->get_children ()) | |
972 | forest = (*fma_slot)->get_forest (); | |
973 | } | |
974 | if (chain) | |
975 | break; | |
976 | } | |
977 | ||
0e82a9b9 BC |
978 | /* Due to implementation of regrename, dest register can slip away |
979 | from regrename's analysis. As a result, there is no chain for | |
980 | the destination register of insn. We simply skip the insn even | |
981 | it is a fmul/fmac instruction. This can happen when the dest | |
982 | register is also a source register of insn and one of the below | |
983 | conditions is satisfied: | |
984 | 1) the source reg is setup in larger mode than this insn; | |
985 | 2) the source reg is uninitialized; | |
986 | 3) the source reg is passed in as parameter. */ | |
987 | if (i < dest_op_info->n_chains) | |
988 | this->analyze_fma_fmul_insn (forest, chain, head); | |
fde9b31b TP |
989 | } |
990 | } | |
991 | free (bb_dfs_preorder); | |
992 | ||
993 | if (dump_file) | |
994 | this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info, | |
995 | false); | |
996 | } | |
997 | ||
998 | /* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with | |
999 | the objective of keeping FPU pipeline balanced in term of instructions and | |
1000 | having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be | |
1001 | scheduled on the same pipeline. */ | |
1002 | ||
1003 | void | |
1004 | func_fma_steering::rename_fma_trees () | |
1005 | { | |
1006 | this->dfs (dispatch_forest, NULL, rename_fma_node, true); | |
1007 | ||
1008 | if (dump_file && !this->m_fma_forests.empty ()) | |
1009 | { | |
1010 | fprintf (dump_file, "Function %s has ", current_function_name ()); | |
1011 | if (this->m_fpu_balance == 0) | |
1012 | fprintf (dump_file, "perfect balance of FMUL/FMA chains between the " | |
1013 | "two FPU pipelines\n"); | |
1014 | else if (this->m_fpu_balance > 0) | |
1015 | fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second " | |
1016 | "FPU pipeline\n", this->m_fpu_balance); | |
1017 | else /* this->m_fpu_balance < 0 */ | |
1018 | fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first " | |
1019 | "FPU pipeline\n", - this->m_fpu_balance); | |
1020 | } | |
1021 | } | |
1022 | ||
1023 | /* Execute FMA steering pass. */ | |
1024 | ||
1025 | void | |
1026 | func_fma_steering::execute_fma_steering () | |
1027 | { | |
1028 | df_set_flags (DF_LR_RUN_DCE); | |
1029 | df_note_add_problem (); | |
1030 | df_analyze (); | |
1031 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
1032 | ||
1033 | regrename_init (true); | |
1034 | regrename_analyze (NULL); | |
1035 | this->analyze (); | |
1036 | this->rename_fma_trees (); | |
1037 | regrename_finish (); | |
1038 | } | |
1039 | ||
1040 | const pass_data pass_data_fma_steering = | |
1041 | { | |
1042 | RTL_PASS, /* type */ | |
1043 | "fma_steering", /* name */ | |
1044 | OPTGROUP_NONE, /* optinfo_flags */ | |
1045 | TV_NONE, /* tv_id */ | |
1046 | 0, /* properties_required */ | |
1047 | 0, /* properties_provided */ | |
1048 | 0, /* properties_destroyed */ | |
1049 | 0, /* todo_flags_start */ | |
1050 | TODO_df_finish, /* todo_flags_finish */ | |
1051 | }; | |
1052 | ||
1053 | class pass_fma_steering : public rtl_opt_pass | |
1054 | { | |
1055 | public: | |
1056 | pass_fma_steering (gcc::context *ctxt) | |
1057 | : rtl_opt_pass (pass_data_fma_steering, ctxt) | |
1058 | {} | |
1059 | ||
1060 | /* opt_pass methods: */ | |
1061 | virtual bool gate (function *) | |
1062 | { | |
b175b679 | 1063 | return (aarch64_tune_params.extra_tuning_flags |
dfba575f JG |
1064 | & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) |
1065 | && optimize >= 2; | |
fde9b31b TP |
1066 | } |
1067 | ||
1068 | virtual unsigned int execute (function *) | |
1069 | { | |
1070 | func_fma_steering *fma_steering = new func_fma_steering; | |
1071 | fma_steering->execute_fma_steering (); | |
1072 | delete fma_steering; | |
1073 | return 0; | |
1074 | } | |
1075 | ||
1076 | }; // class pass_fma_steering | |
1077 | ||
1078 | /* Create a new fma steering pass instance. */ | |
1079 | ||
5d4d7840 | 1080 | rtl_opt_pass * |
fde9b31b TP |
1081 | make_pass_fma_steering (gcc::context *ctxt) |
1082 | { | |
1083 | return new pass_fma_steering (ctxt); | |
1084 | } |