]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/cortex-a57-fma-steering.c
Move MEMMODEL_* from coretypes.h to memmodel.h
[thirdparty/gcc.git] / gcc / config / aarch64 / cortex-a57-fma-steering.c
1 /* FMA steering optimization pass for Cortex-A57.
2 Copyright (C) 2015-2016 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #define INCLUDE_LIST
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "df.h"
29 #include "insn-config.h"
30 #include "regs.h"
31 #include "memmodel.h"
32 #include "emit-rtl.h"
33 #include "recog.h"
34 #include "cfganal.h"
35 #include "insn-attr.h"
36 #include "context.h"
37 #include "tree-pass.h"
38 #include "regrename.h"
39 #include "cortex-a57-fma-steering.h"
40 #include "aarch64-protos.h"
41
42 /* For better performance, the destination of FMADD/FMSUB instructions should
43 have the same parity as their accumulator register if the accumulator
44 contains the result of a previous FMUL or FMADD/FMSUB instruction if
45 targetting Cortex-A57 processors. Performance is also increased by
46 otherwise keeping a good balance in the parity of the destination register
47 of FMUL or FMADD/FMSUB.
48
49 This pass ensure that registers are renamed so that these conditions hold.
50 We reuse the existing register renaming facility from regrename.c to build
51 dependency chains and expose candidate registers for renaming.
52
53
54 The algorithm has three steps:
55
56 First, the functions of the register renaming pass are called. These
57 analyze the instructions and produce a list of def/use chains of
58 instructions.
59
60 Next, this information is used to build trees of multiply and
61 multiply-accumulate instructions. The roots of these trees are any
62 multiply, or any multiply-accumulate whose accumulator is not dependent on
63 a multiply or multiply-accumulate instruction. A child is added to the
64 tree where a dependency chain exists between the result of the parent
65 instruction and the accumulator operand of the child, as in the diagram
66 below:
67
68 fmul s2, s0, s1
69 / \
70 fmadd s0, s1, s1, s2 fmadd s4, s1, s1 s2
71 |
72 fmadd s3, s1, s1, s0
73
74 Trees made of a single instruction are permitted.
75
76 Finally, renaming is performed. The parity of the destination register at
77 the root of a tree is checked against the current balance of multiply and
78 multiply-accumulate on each pipeline. If necessary, the root of a tree is
79 renamed, in which case the rest of the tree is then renamed to keep the same
80 parity in the destination registers of all instructions in the tree. */
81
82
83
84 /* Forward declarations. */
85 class fma_node;
86 class fma_root_node;
87 class func_fma_steering;
88
89 /* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent
90 FMADD/FMSUB instructions form a graph. This is because alternatives can
91 make a register be set by several FMUL or FMADD/FMSUB instructions in
92 different basic blocks and because of loops. For ease of browsing, the
93 connected components of this graph are broken up into forests of trees.
94 Forests are represented by fma_forest objects, contained in the fma_forests
95 list. Using a separate object for the forests allows for a better use of
96 memory as there is some information that is global to each forest, such as
97 the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each
98 floating-point execution pipelines. */
99
100 class fma_forest
101 {
102 public:
103 fma_forest (func_fma_steering *, fma_root_node *, int);
104 ~fma_forest ();
105
106 int get_id ();
107 std::list<fma_root_node *> *get_roots ();
108 func_fma_steering *get_globals ();
109 int get_target_parity ();
110 void fma_node_created (fma_node *);
111 void merge_forest (fma_forest *);
112 void dump_info ();
113 void dispatch ();
114
115 private:
116 /* The list of roots that form this forest. */
117 std::list<fma_root_node *> *m_roots;
118
119 /* Target parity the destination register of all FMUL and FMADD/FMSUB
120 instructions in this forest should have. */
121 int m_target_parity;
122
123 /* Link to the instance of func_fma_steering holding data related to the
124 FMA steering of the current function (cfun). */
125 func_fma_steering *m_globals;
126
127 /* Identifier for the forest (used for dumps). */
128 int m_id;
129
130 /* Total number of nodes in the forest (for statistics). */
131 int m_nb_nodes;
132 };
133
134 class fma_node
135 {
136 public:
137 fma_node (fma_node *parent, du_chain *chain);
138 ~fma_node ();
139
140 bool root_p ();
141 fma_forest *get_forest ();
142 std::list<fma_node *> *get_children ();
143 rtx_insn *get_insn ();
144 void add_child (fma_node *);
145 int get_parity ();
146 void set_head (du_head *);
147 void rename (fma_forest *);
148 void dump_info (fma_forest *);
149
150 protected:
151 /* Root node that lead to this node. */
152 fma_root_node *m_root;
153
154 /* The parent node of this node. If the node belong to a chain with several
155 parent nodes, the first one encountered in a depth-first search is chosen
156 as canonical parent. */
157 fma_node *m_parent;
158
159 /* The list of child nodes. If a chain contains several parent nodes, one is
160 chosen as canonical parent and the others will have no children. */
161 std::list<fma_node *> *m_children;
162
163 /* The associated DU_HEAD chain that the insn represented by this object
164 is (one of) the root of. When a chain contains several roots, the non
165 canonical ones have this field set to NULL. */
166 struct du_head *m_head;
167
168 /* The FMUL or FMADD/FMSUB instruction this object corresponds to. */
169 rtx_insn *m_insn;
170 };
171
172 class fma_root_node : public fma_node
173 {
174 public:
175 fma_root_node (func_fma_steering *, du_chain *, int);
176
177 fma_forest *get_forest ();
178 void set_forest (fma_forest *);
179 void dump_info (fma_forest *);
180
181 private:
182 /* The forest this node belonged to when it was created. */
183 fma_forest *m_forest;
184 };
185
186 /* Class holding all data and methods relative to the FMA steering of a given
187 function. The FMA steering pass could then run in parallel for different
188 functions. */
189
190 class func_fma_steering
191 {
192 public:
193 func_fma_steering ();
194 ~func_fma_steering ();
195
196 int get_fpu_balance ();
197 void remove_forest (fma_forest *);
198 bool put_node (fma_node *);
199 void update_balance (int);
200 fma_node *get_fma_node (rtx_insn *);
201 void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p);
202 void execute_fma_steering ();
203
204 private:
205 void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
206 void (*) (fma_forest *, fma_node *), bool);
207 void analyze ();
208 void rename_fma_trees ();
209
210 /* Mapping between FMUL or FMADD/FMSUB instructions and the associated
211 fma_node object. Used when analyzing an instruction that is a root of
212 a chain to find if such an object was created because this instruction
213 is also a use in another chain. */
214 hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map;
215
216 /* A list of all the forests in a given function. */
217 std::list<fma_forest *> m_fma_forests;
218
219 /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU
220 pipelines:
221 < 0: more instruction dispatched to the first pipeline
222 == 0: perfect balance
223 > 0: more instruction dispatched to the second pipeline. */
224 int m_fpu_balance;
225
226 /* Identifier for the next forest created. */
227 int m_next_forest_id;
228 };
229
230 /* Rename the register HEAD->regno in all the insns in the chain HEAD to any
231 register not in the set UNAVAILABLE. Adapted from rename_chains in
232 regrename.c. */
233
234 static bool
235 rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
236 {
237 int best_new_reg;
238 int n_uses = 0;
239 struct du_chain *tmp;
240 int reg = head->regno;
241 enum reg_class super_class = NO_REGS;
242
243 if (head->cannot_rename)
244 return false;
245
246 if (fixed_regs[reg] || global_regs[reg]
247 || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM))
248 return false;
249
250 /* Iterate over elements in the chain in order to:
251 1. Count number of uses, and narrow the set of registers we can
252 use for renaming.
253 2. Compute the superunion of register classes in this chain. */
254 for (tmp = head->first; tmp; tmp = tmp->next_use)
255 {
256 if (DEBUG_INSN_P (tmp->insn))
257 continue;
258 n_uses++;
259 IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]);
260 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
261 }
262
263 if (n_uses < 1)
264 return false;
265
266 best_new_reg = find_rename_reg (head, super_class, unavailable, reg,
267 false);
268
269 if (dump_file)
270 {
271 fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
272 INSN_UID (head->first->insn));
273 if (head->need_caller_save_reg)
274 fprintf (dump_file, " crosses a call");
275 }
276
277 if (best_new_reg == reg)
278 {
279 if (dump_file)
280 fprintf (dump_file, "; no available better choice\n");
281 return false;
282 }
283
284 if (regrename_do_replace (head, best_new_reg))
285 {
286 if (dump_file)
287 fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
288 df_set_regs_ever_live (best_new_reg, true);
289 }
290 else
291 {
292 if (dump_file)
293 fprintf (dump_file, ", renaming as %s failed\n",
294 reg_names[best_new_reg]);
295 return false;
296 }
297 return true;
298 }
299
300 /* Return whether T is the attribute of a FMADD/FMSUB-like instruction. */
301
302 static bool
303 is_fmac_op (enum attr_type t)
304 {
305 return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S);
306 }
307
308 /* Return whether T is the attribute of a FMUL instruction. */
309
310 static bool
311 is_fmul_op (enum attr_type t)
312 {
313 return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S);
314 }
315
316 /* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB
317 instruction. */
318
319 static bool
320 is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok)
321 {
322 enum attr_type t;
323
324 if (!NONDEBUG_INSN_P (insn))
325 return false;
326
327 if (recog_memoized (insn) < 0)
328 return false;
329
330 /* Only consider chain(s) this instruction is a root of if this is an FMUL or
331 FMADD/FMSUB instruction. This allows to avoid browsing chains of all
332 instructions for FMUL or FMADD/FMSUB in them. */
333 t = get_attr_type (insn);
334 return is_fmac_op (t) || (fmul_ok && is_fmul_op (t));
335 }
336
337
338 /*
339 * Class fma_forest method definitions.
340 */
341
342 fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root,
343 int id)
344 {
345 memset (this, 0, sizeof (*this));
346 this->m_globals = fma_steer;
347 this->m_roots = new std::list<fma_root_node *>;
348 this->m_roots->push_back (fma_root);
349 this->m_id = id;
350 }
351
352 fma_forest::~fma_forest ()
353 {
354 delete this->m_roots;
355 }
356
357 int
358 fma_forest::get_id ()
359 {
360 return this->m_id;
361 }
362
363 std::list<fma_root_node *> *
364 fma_forest::get_roots ()
365 {
366 return this->m_roots;
367 }
368
369 func_fma_steering *
370 fma_forest::get_globals ()
371 {
372 return this->m_globals;
373 }
374
375 int
376 fma_forest::get_target_parity ()
377 {
378 return this->m_target_parity;
379 }
380
381 /* Act on the creation of NODE by updating statistics in FOREST and adding an
382 entry for it in the func_fma_steering hashmap. */
383
384 void fma_forest::fma_node_created (fma_node *node)
385 {
386 bool created = !this->m_globals->put_node (node);
387
388 gcc_assert (created);
389 this->m_nb_nodes++;
390 }
391
392 /* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical
393 fma_forest object to represent both. */
394
395 void
396 fma_forest::merge_forest (fma_forest *other_forest)
397 {
398 std::list<fma_root_node *> *other_roots;
399 std::list<fma_root_node *>::iterator other_root_iter;
400
401 if (this == other_forest)
402 return;
403
404 other_roots = other_forest->m_roots;
405
406 /* Update root nodes' pointer to forest. */
407 for (other_root_iter = other_roots->begin ();
408 other_root_iter != other_roots->end (); other_root_iter++)
409 (*other_root_iter)->set_forest (this);
410
411 /* Remove other_forest from the list of forests and move its tree roots in
412 the list of tree roots of ref_forest. */
413 this->m_globals->remove_forest (other_forest);
414 this->m_roots->splice (this->m_roots->begin (), *other_roots);
415 delete other_forest;
416
417 this->m_nb_nodes += other_forest->m_nb_nodes;
418 }
419
420 /* Dump information about the forest FOREST. */
421
422 void
423 fma_forest::dump_info ()
424 {
425 gcc_assert (dump_file);
426
427 fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id,
428 this->m_nb_nodes);
429 }
430
431 /* Wrapper around fma_forest::dump_info for use as parameter of function
432 pointer type in func_fma_steering::dfs. */
433
434 static void
435 dump_forest_info (fma_forest *forest)
436 {
437 forest->dump_info ();
438 }
439
440 /* Dispatch forest to the least utilized pipeline. */
441
442 void
443 fma_forest::dispatch ()
444 {
445 this->m_target_parity = this->m_roots->front ()->get_parity ();
446 int fpu_balance = this->m_globals->get_fpu_balance ();
447 if (fpu_balance != 0)
448 this->m_target_parity = (fpu_balance < 0);
449
450 if (dump_file)
451 fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id,
452 this->m_target_parity ? "odd" : "even");
453 }
454
455 /* Wrapper around fma_forest::dispatch for use as parameter of function pointer
456 type in func_fma_steering::dfs. */
457
458 static void
459 dispatch_forest (fma_forest *forest)
460 {
461 forest->dispatch ();
462 }
463
464 fma_node::fma_node (fma_node *parent, du_chain *chain)
465 {
466 memset (this, 0, sizeof (*this));
467 this->m_parent = parent;
468 this->m_children = new std::list<fma_node *>;
469 this->m_insn = chain->insn;
470 /* root_p () cannot be used to check for root before root is set. */
471 if (this->m_parent == this)
472 this->m_root = static_cast<fma_root_node *> (parent);
473 else
474 {
475 this->m_root = parent->m_root;
476 this->get_forest ()->fma_node_created (this);
477 }
478 }
479
480 fma_node::~fma_node ()
481 {
482 delete this->m_children;
483 }
484
485 std::list<fma_node *> *
486 fma_node::get_children ()
487 {
488 return this->m_children;
489 }
490
491 rtx_insn *
492 fma_node::get_insn ()
493 {
494 return this->m_insn;
495 }
496
497 void
498 fma_node::set_head (du_head *head)
499 {
500 gcc_assert (!this->m_head);
501 this->m_head = head;
502 }
503
504 /* Add a child to this node in the list of children. */
505
506 void
507 fma_node::add_child (fma_node *child)
508 {
509 this->m_children->push_back (child);
510 }
511
512 /* Return the parity of the destination register of the instruction represented
513 by this node. */
514
515 int
516 fma_node::get_parity ()
517 {
518 return this->m_head->regno % 2;
519 }
520
521 /* Get the actual forest associated with a non root node as the one the node
522 points to might have been merged into another one. In that case the pointer
523 in the root nodes are updated so we return the forest pointer of a root node
524 pointed to by the initial forest. Despite being a oneliner, this method is
525 defined here as it references a method from fma_root_node. */
526
527 fma_forest *
528 fma_node::get_forest ()
529 {
530 return this->m_root->get_forest ();
531 }
532
533 /* Return whether a node is a root node. */
534
535 bool
536 fma_node::root_p ()
537 {
538 return this->m_root == this;
539 }
540
541 /* Dump information about the children of node FMA_NODE in forest FOREST. */
542
543 void
544 fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest)
545 {
546 struct du_chain *chain;
547 std::list<fma_node *>::iterator fma_child;
548
549 gcc_assert (dump_file);
550
551 if (this->get_children ()->empty ())
552 return;
553
554 fprintf (dump_file, "Instruction(s)");
555 for (chain = this->m_head->first; chain; chain = chain->next_use)
556 {
557 if (!is_fmul_fmac_insn (chain->insn, true))
558 continue;
559
560 if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
561 continue;
562
563 fprintf (dump_file, " %d", INSN_UID (chain->insn));
564 }
565
566 fprintf (dump_file, " is(are) accumulator dependency of instructions");
567 for (fma_child = this->get_children ()->begin ();
568 fma_child != this->get_children ()->end (); fma_child++)
569 fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn));
570 fprintf (dump_file, "\n");
571 }
572
573 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
574 type in func_fma_steering::dfs. */
575
576 static void
577 dump_tree_node_info (fma_forest *forest, fma_node *node)
578 {
579 node->dump_info (forest);
580 }
581
582 /* Rename the destination register of a single FMUL or FMADD/FMSUB instruction
583 represented by FMA_NODE to a register that respect the target parity for
584 FOREST or with same parity of the instruction represented by its parent node
585 if it has one. */
586
587 void
588 fma_node::rename (fma_forest *forest)
589 {
590 int cur_parity, target_parity;
591
592 /* This is alternate root of a chain and thus has no children. It will be
593 renamed when processing the canonical root for that chain. */
594 if (!this->m_head)
595 return;
596
597 target_parity = forest->get_target_parity ();
598 if (this->m_parent)
599 target_parity = this->m_parent->get_parity ();
600 cur_parity = this->get_parity ();
601
602 /* Rename if parity differs. */
603 if (cur_parity != target_parity)
604 {
605 rtx_insn *insn = this->m_insn;
606 HARD_REG_SET unavailable;
607 enum machine_mode mode;
608 int reg;
609
610 if (dump_file)
611 {
612 unsigned cur_dest_reg = this->m_head->regno;
613
614 fprintf (dump_file, "FMA or FMUL at insn %d but destination "
615 "register (%s) has different parity from expected to "
616 "maximize FPU pipeline utilization\n", INSN_UID (insn),
617 reg_names[cur_dest_reg]);
618 }
619
620 /* Don't clobber traceback for noreturn functions. */
621 CLEAR_HARD_REG_SET (unavailable);
622 if (frame_pointer_needed)
623 {
624 add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM);
625 add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM);
626 }
627
628 /* Exclude registers with wrong parity. */
629 mode = GET_MODE (SET_DEST (PATTERN (insn)));
630 for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2)
631 add_to_hard_reg_set (&unavailable, mode, reg);
632
633 if (!rename_single_chain (this->m_head, &unavailable))
634 {
635 if (dump_file)
636 fprintf (dump_file, "Destination register of insn %d could not be "
637 "renamed. Dependent FMA insns will use this parity from "
638 "there on.\n", INSN_UID (insn));
639 }
640 else
641 cur_parity = target_parity;
642 }
643
644 forest->get_globals ()->update_balance (cur_parity);
645 }
646
647 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
648 type in func_fma_steering::dfs. */
649
650 static void
651 rename_fma_node (fma_forest *forest, fma_node *node)
652 {
653 node->rename (forest);
654 }
655
656 fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain,
657 int id) : fma_node (this, chain)
658 {
659 this->m_forest = new fma_forest (globals, this, id);
660 this->m_forest->fma_node_created (this);
661 }
662
663 fma_forest *
664 fma_root_node::get_forest ()
665 {
666 return this->m_forest;
667 }
668
669 void
670 fma_root_node::set_forest (fma_forest *ref_forest)
671 {
672 this->m_forest = ref_forest;
673 }
674
675 /* Dump information about the roots of forest FOREST. */
676
677 void
678 fma_root_node::dump_info (fma_forest *forest)
679 {
680 gcc_assert (dump_file);
681
682 if (this == forest->get_roots ()->front ())
683 fprintf (dump_file, "Instruction(s) at root of forest #%d:",
684 forest->get_id ());
685 fprintf (dump_file, " %d", INSN_UID (this->m_insn));
686 if (this == forest->get_roots ()->back ())
687 fprintf (dump_file, "\n");
688 }
689
690 /* Wrapper around fma_root_node::dump_info for use as parameter of function
691 pointer type in func_fma_steering::dfs. */
692
693 static void
694 dump_tree_root_info (fma_forest *forest, fma_root_node *node)
695 {
696 node->dump_info (forest);
697 }
698
699 func_fma_steering::func_fma_steering () : m_fpu_balance (0)
700 {
701 this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>;
702 this->m_fma_forests.clear ();
703 this->m_next_forest_id = 0;
704 }
705
706 func_fma_steering::~func_fma_steering ()
707 {
708 delete this->m_insn_fma_head_map;
709 }
710
711 int
712 func_fma_steering::get_fpu_balance ()
713 {
714 return this->m_fpu_balance;
715 }
716
717 void
718 func_fma_steering::remove_forest (fma_forest *forest)
719 {
720 this->m_fma_forests.remove (forest);
721 }
722
723 /* Memorize the mapping of this instruction to its fma_node object and return
724 whether such a mapping existed. */
725
726 bool
727 func_fma_steering::put_node (fma_node *node)
728 {
729 return this->m_insn_fma_head_map->put (node->get_insn (), node);
730 }
731
732 /* Update the current balance considering a node with the given PARITY. */
733
734 void
735 func_fma_steering::update_balance (int parity)
736 {
737 this->m_fpu_balance = parity ? this->m_fpu_balance + 1
738 : this->m_fpu_balance - 1;
739 }
740
741 /* Return whether an fma_node object exists for instruction INSN and, if not,
742 allocate one in *RET. */
743
744 fma_node *
745 func_fma_steering::get_fma_node (rtx_insn *insn)
746 {
747 fma_node **fma_slot;
748
749 fma_slot = this->m_insn_fma_head_map->get (insn);
750 if (fma_slot)
751 return *fma_slot;
752 return NULL;
753 }
754
755 /* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB
756 instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all
757 part of FOREST. For the children, the associated head is left untouched
758 (and thus null) as this function will be called again when considering the
759 chain where they are def. For the parent, the chain is given in HEAD. */
760
761 void
762 func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest,
763 du_chain *chain, du_head_p head)
764 {
765 fma_forest *forest;
766 fma_node *node = this->get_fma_node (chain->insn);
767
768 /* This is a root node. */
769 if (!node)
770 {
771 fma_root_node *root_node;
772
773 root_node = new fma_root_node (this, chain, this->m_next_forest_id++);
774 forest = root_node->get_forest ();
775 node = root_node;
776
777 /* Until proved otherwise, assume this root is not part of an existing
778 forest and thus add its forest to the list of forests. */
779 this->m_fma_forests.push_back (forest);
780 }
781 else
782 forest = node->get_forest ();
783
784 node->set_head (head);
785
786 /* fma_node is part of a chain with several defs, one of them having already
787 been processed. The root of that already processed def is the canonical
788 one and the root of fma_node is added to its forest. No need to process
789 the children nodes as they were already processed when the other def was
790 processed. */
791 if (ref_forest)
792 {
793 ref_forest->merge_forest (forest);
794 return;
795 }
796
797 for (chain = head->first; chain; chain = chain->next_use)
798 {
799 fma_node *child_fma;
800 rtx fma_rtx, *accum_rtx_p;
801
802 if (!is_fmul_fmac_insn (chain->insn, false))
803 continue;
804
805 /* Get FMA rtx. */
806 fma_rtx = SET_SRC (PATTERN (chain->insn));
807 /* FMA is negated. */
808 if (GET_CODE (fma_rtx) == NEG)
809 fma_rtx = XEXP (fma_rtx, 0);
810 /* Get accumulator rtx. */
811 accum_rtx_p = &XEXP (fma_rtx, 2);
812 /* Accumulator is negated. */
813 if (!REG_P (*accum_rtx_p))
814 accum_rtx_p = &XEXP (*accum_rtx_p, 0);
815
816 /* This du_chain structure is not for the accumulator register. */
817 if (accum_rtx_p != chain->loc)
818 continue;
819
820 /* If object already created, this is a loop carried dependency. We
821 don't include this object in the children as we want trees for
822 rename_fma_trees to not be an infinite loop. */
823 if (this->get_fma_node (chain->insn))
824 continue;
825
826 child_fma = new fma_node (node, chain);
827
828 /* Memorize the mapping of this instruction to its fma_node object
829 as it will be processed for the chain starting at its destination
830 register later. */
831
832 /* Link to siblings. */
833 node->add_child (child_fma);
834 }
835 }
836
837 /* Perform a depth-first search of the forests of fma_node in
838 THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in
839 THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and
840 PROCESS_NODE () on each node. If FREE is true, free all std::list in the
841 same dfs. */
842
843 void
844 func_fma_steering::dfs (void (*process_forest) (fma_forest *),
845 void (*process_root) (fma_forest *, fma_root_node *),
846 void (*process_node) (fma_forest *, fma_node *),
847 bool free)
848 {
849 vec<fma_node *> to_process;
850 std::list<fma_forest *>::iterator forest_iter;
851
852 to_process.create (0);
853
854 /* For each forest. */
855 for (forest_iter = this->m_fma_forests.begin ();
856 forest_iter != this->m_fma_forests.end (); forest_iter++)
857 {
858 std::list<fma_root_node *>::iterator root_iter;
859
860 if (process_forest)
861 process_forest (*forest_iter);
862
863 /* For each tree root in this forest. */
864 for (root_iter = (*forest_iter)->get_roots ()->begin ();
865 root_iter != (*forest_iter)->get_roots ()->end (); root_iter++)
866 {
867 if (process_root)
868 process_root (*forest_iter, *root_iter);
869 to_process.safe_push (*root_iter);
870 }
871
872 /* For each tree node in this forest. */
873 while (!to_process.is_empty ())
874 {
875 fma_node *node;
876 std::list<fma_node *>::iterator child_iter;
877
878 node = to_process.pop ();
879
880 if (process_node)
881 process_node (*forest_iter, node);
882
883 /* Absence of children might indicate an alternate root of a *chain*.
884 It's ok to skip it here as the chain will be renamed when
885 processing the canonical root for that chain. */
886 if (node->get_children ()->empty ())
887 continue;
888
889 for (child_iter = node->get_children ()->begin ();
890 child_iter != node->get_children ()->end (); child_iter++)
891 to_process.safe_push (*child_iter);
892 if (free)
893 {
894 if (node->root_p ())
895 delete static_cast<fma_root_node *> (node);
896 else
897 delete node;
898 }
899 }
900 if (free)
901 delete *forest_iter;
902 }
903
904 to_process.release ();
905 }
906
907 /* Build the dependency trees of FMUL and FMADD/FMSUB instructions. */
908
909 void
910 func_fma_steering::analyze ()
911 {
912 int i, n_blocks, *bb_dfs_preorder;
913 basic_block bb;
914 rtx_insn *insn;
915
916 bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
917 n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false);
918
919 /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB
920 instructions. */
921 for (i = 0; i < n_blocks; i++)
922 {
923 bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]);
924 FOR_BB_INSNS (bb, insn)
925 {
926 operand_rr_info *dest_op_info;
927 struct du_chain *chain;
928 unsigned dest_regno;
929 fma_forest *forest;
930 du_head_p head;
931 int i;
932
933 if (!is_fmul_fmac_insn (insn, true))
934 continue;
935
936 /* Search the chain where this instruction is (one of) the root. */
937 dest_op_info = insn_rr[INSN_UID (insn)].op_info;
938 dest_regno = REGNO (SET_DEST (PATTERN (insn)));
939 for (i = 0; i < dest_op_info->n_chains; i++)
940 {
941 /* The register tracked by this chain does not match the
942 destination register of insn. */
943 if (dest_op_info->heads[i]->regno != dest_regno)
944 continue;
945
946 head = dest_op_info->heads[i];
947 /* The chain was merged in another, find the new head. */
948 if (!head->first)
949 head = regrename_chain_from_id (head->id);
950
951 /* Search the chain element for this instruction and, if another
952 FMUL or FMADD/FMSUB instruction was already processed, note
953 the forest of its tree. */
954 forest = NULL;
955 for (chain = head->first; chain; chain = chain->next_use)
956 {
957 fma_node **fma_slot;
958
959 if (!is_fmul_fmac_insn (chain->insn, true))
960 continue;
961
962 /* This is a use, continue. */
963 if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
964 continue;
965
966 if (chain->insn == insn)
967 break;
968
969 fma_slot = this->m_insn_fma_head_map->get (chain->insn);
970 if (fma_slot && (*fma_slot)->get_children ())
971 forest = (*fma_slot)->get_forest ();
972 }
973 if (chain)
974 break;
975 }
976
977 /* We didn't find a chain with a def for this instruction. */
978 gcc_assert (i < dest_op_info->n_chains);
979
980 this->analyze_fma_fmul_insn (forest, chain, head);
981 }
982 }
983 free (bb_dfs_preorder);
984
985 if (dump_file)
986 this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info,
987 false);
988 }
989
990 /* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with
991 the objective of keeping FPU pipeline balanced in term of instructions and
992 having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be
993 scheduled on the same pipeline. */
994
995 void
996 func_fma_steering::rename_fma_trees ()
997 {
998 this->dfs (dispatch_forest, NULL, rename_fma_node, true);
999
1000 if (dump_file && !this->m_fma_forests.empty ())
1001 {
1002 fprintf (dump_file, "Function %s has ", current_function_name ());
1003 if (this->m_fpu_balance == 0)
1004 fprintf (dump_file, "perfect balance of FMUL/FMA chains between the "
1005 "two FPU pipelines\n");
1006 else if (this->m_fpu_balance > 0)
1007 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second "
1008 "FPU pipeline\n", this->m_fpu_balance);
1009 else /* this->m_fpu_balance < 0 */
1010 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first "
1011 "FPU pipeline\n", - this->m_fpu_balance);
1012 }
1013 }
1014
1015 /* Execute FMA steering pass. */
1016
1017 void
1018 func_fma_steering::execute_fma_steering ()
1019 {
1020 df_set_flags (DF_LR_RUN_DCE);
1021 df_note_add_problem ();
1022 df_analyze ();
1023 df_set_flags (DF_DEFER_INSN_RESCAN);
1024
1025 regrename_init (true);
1026 regrename_analyze (NULL);
1027 this->analyze ();
1028 this->rename_fma_trees ();
1029 regrename_finish ();
1030 }
1031
1032 const pass_data pass_data_fma_steering =
1033 {
1034 RTL_PASS, /* type */
1035 "fma_steering", /* name */
1036 OPTGROUP_NONE, /* optinfo_flags */
1037 TV_NONE, /* tv_id */
1038 0, /* properties_required */
1039 0, /* properties_provided */
1040 0, /* properties_destroyed */
1041 0, /* todo_flags_start */
1042 TODO_df_finish, /* todo_flags_finish */
1043 };
1044
1045 class pass_fma_steering : public rtl_opt_pass
1046 {
1047 public:
1048 pass_fma_steering (gcc::context *ctxt)
1049 : rtl_opt_pass (pass_data_fma_steering, ctxt)
1050 {}
1051
1052 /* opt_pass methods: */
1053 virtual bool gate (function *)
1054 {
1055 return (aarch64_tune_params.extra_tuning_flags
1056 & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)
1057 && optimize >= 2;
1058 }
1059
1060 virtual unsigned int execute (function *)
1061 {
1062 func_fma_steering *fma_steering = new func_fma_steering;
1063 fma_steering->execute_fma_steering ();
1064 delete fma_steering;
1065 return 0;
1066 }
1067
1068 }; // class pass_fma_steering
1069
1070 /* Create a new fma steering pass instance. */
1071
1072 static rtl_opt_pass *
1073 make_pass_fma_steering (gcc::context *ctxt)
1074 {
1075 return new pass_fma_steering (ctxt);
1076 }
1077
1078 /* Register the FMA steering pass to the pass manager. */
1079
1080 void
1081 aarch64_register_fma_steering ()
1082 {
1083 opt_pass *pass_fma_steering = make_pass_fma_steering (g);
1084
1085 struct register_pass_info fma_steering_info
1086 = { pass_fma_steering, "rnreg", 1, PASS_POS_INSERT_AFTER };
1087
1088 register_pass (&fma_steering_info);
1089 }