]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/cortex-a57-fma-steering.c
Remove enum before machine_mode
[thirdparty/gcc.git] / gcc / config / aarch64 / cortex-a57-fma-steering.c
CommitLineData
ee7ef7ab 1/* FMA steering optimization pass for Cortex-A57.
aad93da1 2 Copyright (C) 2015-2017 Free Software Foundation, Inc.
ee7ef7ab 3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
d637913b 22#define INCLUDE_LIST
ee7ef7ab 23#include "system.h"
24#include "coretypes.h"
9ef16211 25#include "backend.h"
c1eb80de 26#include "target.h"
9ef16211 27#include "rtl.h"
28#include "df.h"
ee7ef7ab 29#include "insn-config.h"
c1eb80de 30#include "regs.h"
ad7b10a2 31#include "memmodel.h"
c1eb80de 32#include "emit-rtl.h"
33#include "recog.h"
ee7ef7ab 34#include "cfganal.h"
ee7ef7ab 35#include "insn-attr.h"
ee7ef7ab 36#include "context.h"
37#include "tree-pass.h"
38#include "regrename.h"
5e9fcc70 39#include "aarch64-protos.h"
ee7ef7ab 40
ee7ef7ab 41/* For better performance, the destination of FMADD/FMSUB instructions should
42 have the same parity as their accumulator register if the accumulator
43 contains the result of a previous FMUL or FMADD/FMSUB instruction if
44 targetting Cortex-A57 processors. Performance is also increased by
45 otherwise keeping a good balance in the parity of the destination register
46 of FMUL or FMADD/FMSUB.
47
48 This pass ensure that registers are renamed so that these conditions hold.
49 We reuse the existing register renaming facility from regrename.c to build
50 dependency chains and expose candidate registers for renaming.
51
52
53 The algorithm has three steps:
54
55 First, the functions of the register renaming pass are called. These
56 analyze the instructions and produce a list of def/use chains of
57 instructions.
58
59 Next, this information is used to build trees of multiply and
60 multiply-accumulate instructions. The roots of these trees are any
61 multiply, or any multiply-accumulate whose accumulator is not dependent on
62 a multiply or multiply-accumulate instruction. A child is added to the
63 tree where a dependency chain exists between the result of the parent
64 instruction and the accumulator operand of the child, as in the diagram
65 below:
66
67 fmul s2, s0, s1
68 / \
69 fmadd s0, s1, s1, s2 fmadd s4, s1, s1 s2
70 |
71 fmadd s3, s1, s1, s0
72
73 Trees made of a single instruction are permitted.
74
75 Finally, renaming is performed. The parity of the destination register at
76 the root of a tree is checked against the current balance of multiply and
77 multiply-accumulate on each pipeline. If necessary, the root of a tree is
78 renamed, in which case the rest of the tree is then renamed to keep the same
79 parity in the destination registers of all instructions in the tree. */
80
81
82
83/* Forward declarations. */
84class fma_node;
85class fma_root_node;
86class func_fma_steering;
87
88/* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent
89 FMADD/FMSUB instructions form a graph. This is because alternatives can
90 make a register be set by several FMUL or FMADD/FMSUB instructions in
91 different basic blocks and because of loops. For ease of browsing, the
92 connected components of this graph are broken up into forests of trees.
93 Forests are represented by fma_forest objects, contained in the fma_forests
94 list. Using a separate object for the forests allows for a better use of
95 memory as there is some information that is global to each forest, such as
96 the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each
97 floating-point execution pipelines. */
98
99class fma_forest
100{
101public:
102 fma_forest (func_fma_steering *, fma_root_node *, int);
103 ~fma_forest ();
104
105 int get_id ();
106 std::list<fma_root_node *> *get_roots ();
107 func_fma_steering *get_globals ();
108 int get_target_parity ();
109 void fma_node_created (fma_node *);
110 void merge_forest (fma_forest *);
111 void dump_info ();
112 void dispatch ();
113
114private:
115 /* The list of roots that form this forest. */
116 std::list<fma_root_node *> *m_roots;
117
118 /* Target parity the destination register of all FMUL and FMADD/FMSUB
119 instructions in this forest should have. */
120 int m_target_parity;
121
122 /* Link to the instance of func_fma_steering holding data related to the
123 FMA steering of the current function (cfun). */
124 func_fma_steering *m_globals;
125
126 /* Identifier for the forest (used for dumps). */
127 int m_id;
128
129 /* Total number of nodes in the forest (for statistics). */
130 int m_nb_nodes;
131};
132
133class fma_node
134{
135public:
136 fma_node (fma_node *parent, du_chain *chain);
137 ~fma_node ();
138
139 bool root_p ();
140 fma_forest *get_forest ();
141 std::list<fma_node *> *get_children ();
142 rtx_insn *get_insn ();
143 void add_child (fma_node *);
144 int get_parity ();
145 void set_head (du_head *);
146 void rename (fma_forest *);
147 void dump_info (fma_forest *);
148
149protected:
150 /* Root node that lead to this node. */
151 fma_root_node *m_root;
152
153 /* The parent node of this node. If the node belong to a chain with several
154 parent nodes, the first one encountered in a depth-first search is chosen
155 as canonical parent. */
156 fma_node *m_parent;
157
158 /* The list of child nodes. If a chain contains several parent nodes, one is
159 chosen as canonical parent and the others will have no children. */
160 std::list<fma_node *> *m_children;
161
162 /* The associated DU_HEAD chain that the insn represented by this object
163 is (one of) the root of. When a chain contains several roots, the non
164 canonical ones have this field set to NULL. */
165 struct du_head *m_head;
166
167 /* The FMUL or FMADD/FMSUB instruction this object corresponds to. */
168 rtx_insn *m_insn;
169};
170
171class fma_root_node : public fma_node
172{
173public:
174 fma_root_node (func_fma_steering *, du_chain *, int);
175
176 fma_forest *get_forest ();
177 void set_forest (fma_forest *);
178 void dump_info (fma_forest *);
179
180private:
181 /* The forest this node belonged to when it was created. */
182 fma_forest *m_forest;
183};
184
185/* Class holding all data and methods relative to the FMA steering of a given
186 function. The FMA steering pass could then run in parallel for different
187 functions. */
188
189class func_fma_steering
190{
191public:
192 func_fma_steering ();
193 ~func_fma_steering ();
194
195 int get_fpu_balance ();
196 void remove_forest (fma_forest *);
197 bool put_node (fma_node *);
198 void update_balance (int);
199 fma_node *get_fma_node (rtx_insn *);
200 void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p);
201 void execute_fma_steering ();
202
203private:
204 void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
205 void (*) (fma_forest *, fma_node *), bool);
206 void analyze ();
207 void rename_fma_trees ();
208
209 /* Mapping between FMUL or FMADD/FMSUB instructions and the associated
210 fma_node object. Used when analyzing an instruction that is a root of
211 a chain to find if such an object was created because this instruction
212 is also a use in another chain. */
213 hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map;
214
215 /* A list of all the forests in a given function. */
216 std::list<fma_forest *> m_fma_forests;
217
218 /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU
219 pipelines:
220 < 0: more instruction dispatched to the first pipeline
221 == 0: perfect balance
222 > 0: more instruction dispatched to the second pipeline. */
223 int m_fpu_balance;
224
225 /* Identifier for the next forest created. */
226 int m_next_forest_id;
227};
228
229/* Rename the register HEAD->regno in all the insns in the chain HEAD to any
230 register not in the set UNAVAILABLE. Adapted from rename_chains in
231 regrename.c. */
232
233static bool
234rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
235{
236 int best_new_reg;
237 int n_uses = 0;
238 struct du_chain *tmp;
239 int reg = head->regno;
240 enum reg_class super_class = NO_REGS;
241
242 if (head->cannot_rename)
243 return false;
244
245 if (fixed_regs[reg] || global_regs[reg]
246 || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM))
247 return false;
248
249 /* Iterate over elements in the chain in order to:
250 1. Count number of uses, and narrow the set of registers we can
251 use for renaming.
252 2. Compute the superunion of register classes in this chain. */
253 for (tmp = head->first; tmp; tmp = tmp->next_use)
254 {
255 if (DEBUG_INSN_P (tmp->insn))
256 continue;
257 n_uses++;
258 IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]);
259 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
260 }
261
262 if (n_uses < 1)
263 return false;
264
265 best_new_reg = find_rename_reg (head, super_class, unavailable, reg,
266 false);
267
268 if (dump_file)
269 {
270 fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
271 INSN_UID (head->first->insn));
272 if (head->need_caller_save_reg)
273 fprintf (dump_file, " crosses a call");
274 }
275
276 if (best_new_reg == reg)
277 {
278 if (dump_file)
279 fprintf (dump_file, "; no available better choice\n");
280 return false;
281 }
282
688586d5 283 if (regrename_do_replace (head, best_new_reg))
284 {
285 if (dump_file)
286 fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
287 df_set_regs_ever_live (best_new_reg, true);
288 }
289 else
290 {
291 if (dump_file)
292 fprintf (dump_file, ", renaming as %s failed\n",
293 reg_names[best_new_reg]);
294 return false;
295 }
ee7ef7ab 296 return true;
297}
298
299/* Return whether T is the attribute of a FMADD/FMSUB-like instruction. */
300
301static bool
302is_fmac_op (enum attr_type t)
303{
304 return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S);
305}
306
307/* Return whether T is the attribute of a FMUL instruction. */
308
309static bool
310is_fmul_op (enum attr_type t)
311{
312 return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S);
313}
314
315/* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB
316 instruction. */
317
318static bool
319is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok)
320{
321 enum attr_type t;
322
323 if (!NONDEBUG_INSN_P (insn))
324 return false;
325
326 if (recog_memoized (insn) < 0)
327 return false;
328
329 /* Only consider chain(s) this instruction is a root of if this is an FMUL or
330 FMADD/FMSUB instruction. This allows to avoid browsing chains of all
331 instructions for FMUL or FMADD/FMSUB in them. */
332 t = get_attr_type (insn);
333 return is_fmac_op (t) || (fmul_ok && is_fmul_op (t));
334}
335
336
337/*
338 * Class fma_forest method definitions.
339 */
340
341fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root,
342 int id)
343{
344 memset (this, 0, sizeof (*this));
345 this->m_globals = fma_steer;
346 this->m_roots = new std::list<fma_root_node *>;
347 this->m_roots->push_back (fma_root);
348 this->m_id = id;
349}
350
351fma_forest::~fma_forest ()
352{
353 delete this->m_roots;
354}
355
356int
357fma_forest::get_id ()
358{
359 return this->m_id;
360}
361
362std::list<fma_root_node *> *
363fma_forest::get_roots ()
364{
365 return this->m_roots;
366}
367
368func_fma_steering *
369fma_forest::get_globals ()
370{
371 return this->m_globals;
372}
373
374int
375fma_forest::get_target_parity ()
376{
377 return this->m_target_parity;
378}
379
380/* Act on the creation of NODE by updating statistics in FOREST and adding an
381 entry for it in the func_fma_steering hashmap. */
382
383void fma_forest::fma_node_created (fma_node *node)
384{
385 bool created = !this->m_globals->put_node (node);
386
387 gcc_assert (created);
388 this->m_nb_nodes++;
389}
390
391/* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical
392 fma_forest object to represent both. */
393
394void
395fma_forest::merge_forest (fma_forest *other_forest)
396{
397 std::list<fma_root_node *> *other_roots;
398 std::list<fma_root_node *>::iterator other_root_iter;
399
400 if (this == other_forest)
401 return;
402
403 other_roots = other_forest->m_roots;
404
405 /* Update root nodes' pointer to forest. */
406 for (other_root_iter = other_roots->begin ();
407 other_root_iter != other_roots->end (); other_root_iter++)
408 (*other_root_iter)->set_forest (this);
409
410 /* Remove other_forest from the list of forests and move its tree roots in
411 the list of tree roots of ref_forest. */
412 this->m_globals->remove_forest (other_forest);
413 this->m_roots->splice (this->m_roots->begin (), *other_roots);
ee7ef7ab 414 this->m_nb_nodes += other_forest->m_nb_nodes;
98bf84bc 415
416 delete other_forest;
ee7ef7ab 417}
418
419/* Dump information about the forest FOREST. */
420
421void
422fma_forest::dump_info ()
423{
424 gcc_assert (dump_file);
425
426 fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id,
427 this->m_nb_nodes);
428}
429
430/* Wrapper around fma_forest::dump_info for use as parameter of function
431 pointer type in func_fma_steering::dfs. */
432
433static void
434dump_forest_info (fma_forest *forest)
435{
436 forest->dump_info ();
437}
438
439/* Dispatch forest to the least utilized pipeline. */
440
441void
442fma_forest::dispatch ()
443{
444 this->m_target_parity = this->m_roots->front ()->get_parity ();
445 int fpu_balance = this->m_globals->get_fpu_balance ();
446 if (fpu_balance != 0)
447 this->m_target_parity = (fpu_balance < 0);
448
449 if (dump_file)
450 fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id,
451 this->m_target_parity ? "odd" : "even");
452}
453
454/* Wrapper around fma_forest::dispatch for use as parameter of function pointer
455 type in func_fma_steering::dfs. */
456
457static void
458dispatch_forest (fma_forest *forest)
459{
460 forest->dispatch ();
461}
462
463fma_node::fma_node (fma_node *parent, du_chain *chain)
464{
465 memset (this, 0, sizeof (*this));
466 this->m_parent = parent;
467 this->m_children = new std::list<fma_node *>;
468 this->m_insn = chain->insn;
469 /* root_p () cannot be used to check for root before root is set. */
470 if (this->m_parent == this)
471 this->m_root = static_cast<fma_root_node *> (parent);
472 else
473 {
474 this->m_root = parent->m_root;
475 this->get_forest ()->fma_node_created (this);
476 }
477}
478
479fma_node::~fma_node ()
480{
481 delete this->m_children;
482}
483
484std::list<fma_node *> *
485fma_node::get_children ()
486{
487 return this->m_children;
488}
489
490rtx_insn *
491fma_node::get_insn ()
492{
493 return this->m_insn;
494}
495
496void
497fma_node::set_head (du_head *head)
498{
499 gcc_assert (!this->m_head);
500 this->m_head = head;
501}
502
503/* Add a child to this node in the list of children. */
504
505void
506fma_node::add_child (fma_node *child)
507{
508 this->m_children->push_back (child);
509}
510
511/* Return the parity of the destination register of the instruction represented
512 by this node. */
513
514int
515fma_node::get_parity ()
516{
517 return this->m_head->regno % 2;
518}
519
520/* Get the actual forest associated with a non root node as the one the node
521 points to might have been merged into another one. In that case the pointer
522 in the root nodes are updated so we return the forest pointer of a root node
523 pointed to by the initial forest. Despite being a oneliner, this method is
524 defined here as it references a method from fma_root_node. */
525
526fma_forest *
527fma_node::get_forest ()
528{
529 return this->m_root->get_forest ();
530}
531
532/* Return whether a node is a root node. */
533
534bool
535fma_node::root_p ()
536{
537 return this->m_root == this;
538}
539
540/* Dump information about the children of node FMA_NODE in forest FOREST. */
541
542void
543fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest)
544{
545 struct du_chain *chain;
546 std::list<fma_node *>::iterator fma_child;
547
548 gcc_assert (dump_file);
549
550 if (this->get_children ()->empty ())
551 return;
552
553 fprintf (dump_file, "Instruction(s)");
554 for (chain = this->m_head->first; chain; chain = chain->next_use)
555 {
556 if (!is_fmul_fmac_insn (chain->insn, true))
557 continue;
558
559 if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
560 continue;
561
562 fprintf (dump_file, " %d", INSN_UID (chain->insn));
563 }
564
565 fprintf (dump_file, " is(are) accumulator dependency of instructions");
566 for (fma_child = this->get_children ()->begin ();
567 fma_child != this->get_children ()->end (); fma_child++)
568 fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn));
569 fprintf (dump_file, "\n");
570}
571
572/* Wrapper around fma_node::dump_info for use as parameter of function pointer
573 type in func_fma_steering::dfs. */
574
575static void
576dump_tree_node_info (fma_forest *forest, fma_node *node)
577{
578 node->dump_info (forest);
579}
580
581/* Rename the destination register of a single FMUL or FMADD/FMSUB instruction
582 represented by FMA_NODE to a register that respect the target parity for
583 FOREST or with same parity of the instruction represented by its parent node
584 if it has one. */
585
586void
587fma_node::rename (fma_forest *forest)
588{
589 int cur_parity, target_parity;
590
591 /* This is alternate root of a chain and thus has no children. It will be
592 renamed when processing the canonical root for that chain. */
593 if (!this->m_head)
594 return;
595
596 target_parity = forest->get_target_parity ();
597 if (this->m_parent)
598 target_parity = this->m_parent->get_parity ();
599 cur_parity = this->get_parity ();
600
601 /* Rename if parity differs. */
602 if (cur_parity != target_parity)
603 {
604 rtx_insn *insn = this->m_insn;
605 HARD_REG_SET unavailable;
582adad1 606 machine_mode mode;
ee7ef7ab 607 int reg;
608
609 if (dump_file)
610 {
611 unsigned cur_dest_reg = this->m_head->regno;
612
613 fprintf (dump_file, "FMA or FMUL at insn %d but destination "
614 "register (%s) has different parity from expected to "
615 "maximize FPU pipeline utilization\n", INSN_UID (insn),
616 reg_names[cur_dest_reg]);
617 }
618
619 /* Don't clobber traceback for noreturn functions. */
620 CLEAR_HARD_REG_SET (unavailable);
621 if (frame_pointer_needed)
622 {
623 add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM);
624 add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM);
625 }
626
627 /* Exclude registers with wrong parity. */
628 mode = GET_MODE (SET_DEST (PATTERN (insn)));
629 for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2)
630 add_to_hard_reg_set (&unavailable, mode, reg);
631
632 if (!rename_single_chain (this->m_head, &unavailable))
633 {
634 if (dump_file)
635 fprintf (dump_file, "Destination register of insn %d could not be "
636 "renamed. Dependent FMA insns will use this parity from "
637 "there on.\n", INSN_UID (insn));
638 }
639 else
640 cur_parity = target_parity;
641 }
642
643 forest->get_globals ()->update_balance (cur_parity);
644}
645
646/* Wrapper around fma_node::dump_info for use as parameter of function pointer
647 type in func_fma_steering::dfs. */
648
649static void
650rename_fma_node (fma_forest *forest, fma_node *node)
651{
652 node->rename (forest);
653}
654
655fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain,
656 int id) : fma_node (this, chain)
657{
658 this->m_forest = new fma_forest (globals, this, id);
659 this->m_forest->fma_node_created (this);
660}
661
662fma_forest *
663fma_root_node::get_forest ()
664{
665 return this->m_forest;
666}
667
668void
669fma_root_node::set_forest (fma_forest *ref_forest)
670{
671 this->m_forest = ref_forest;
672}
673
674/* Dump information about the roots of forest FOREST. */
675
676void
677fma_root_node::dump_info (fma_forest *forest)
678{
679 gcc_assert (dump_file);
680
681 if (this == forest->get_roots ()->front ())
682 fprintf (dump_file, "Instruction(s) at root of forest #%d:",
683 forest->get_id ());
684 fprintf (dump_file, " %d", INSN_UID (this->m_insn));
685 if (this == forest->get_roots ()->back ())
686 fprintf (dump_file, "\n");
687}
688
689/* Wrapper around fma_root_node::dump_info for use as parameter of function
690 pointer type in func_fma_steering::dfs. */
691
692static void
693dump_tree_root_info (fma_forest *forest, fma_root_node *node)
694{
695 node->dump_info (forest);
696}
697
698func_fma_steering::func_fma_steering () : m_fpu_balance (0)
699{
700 this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>;
701 this->m_fma_forests.clear ();
702 this->m_next_forest_id = 0;
703}
704
705func_fma_steering::~func_fma_steering ()
706{
707 delete this->m_insn_fma_head_map;
708}
709
710int
711func_fma_steering::get_fpu_balance ()
712{
713 return this->m_fpu_balance;
714}
715
716void
717func_fma_steering::remove_forest (fma_forest *forest)
718{
719 this->m_fma_forests.remove (forest);
720}
721
722/* Memorize the mapping of this instruction to its fma_node object and return
723 whether such a mapping existed. */
724
725bool
726func_fma_steering::put_node (fma_node *node)
727{
728 return this->m_insn_fma_head_map->put (node->get_insn (), node);
729}
730
731/* Update the current balance considering a node with the given PARITY. */
732
733void
734func_fma_steering::update_balance (int parity)
735{
736 this->m_fpu_balance = parity ? this->m_fpu_balance + 1
737 : this->m_fpu_balance - 1;
738}
739
740/* Return whether an fma_node object exists for instruction INSN and, if not,
741 allocate one in *RET. */
742
743fma_node *
744func_fma_steering::get_fma_node (rtx_insn *insn)
745{
746 fma_node **fma_slot;
747
748 fma_slot = this->m_insn_fma_head_map->get (insn);
749 if (fma_slot)
750 return *fma_slot;
751 return NULL;
752}
753
754/* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB
755 instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all
756 part of FOREST. For the children, the associated head is left untouched
757 (and thus null) as this function will be called again when considering the
758 chain where they are def. For the parent, the chain is given in HEAD. */
759
760void
761func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest,
762 du_chain *chain, du_head_p head)
763{
764 fma_forest *forest;
765 fma_node *node = this->get_fma_node (chain->insn);
766
767 /* This is a root node. */
768 if (!node)
769 {
770 fma_root_node *root_node;
771
772 root_node = new fma_root_node (this, chain, this->m_next_forest_id++);
773 forest = root_node->get_forest ();
774 node = root_node;
775
776 /* Until proved otherwise, assume this root is not part of an existing
777 forest and thus add its forest to the list of forests. */
778 this->m_fma_forests.push_back (forest);
779 }
780 else
781 forest = node->get_forest ();
782
783 node->set_head (head);
784
785 /* fma_node is part of a chain with several defs, one of them having already
786 been processed. The root of that already processed def is the canonical
787 one and the root of fma_node is added to its forest. No need to process
788 the children nodes as they were already processed when the other def was
789 processed. */
790 if (ref_forest)
791 {
792 ref_forest->merge_forest (forest);
793 return;
794 }
795
796 for (chain = head->first; chain; chain = chain->next_use)
797 {
798 fma_node *child_fma;
799 rtx fma_rtx, *accum_rtx_p;
800
801 if (!is_fmul_fmac_insn (chain->insn, false))
802 continue;
803
804 /* Get FMA rtx. */
805 fma_rtx = SET_SRC (PATTERN (chain->insn));
806 /* FMA is negated. */
807 if (GET_CODE (fma_rtx) == NEG)
808 fma_rtx = XEXP (fma_rtx, 0);
809 /* Get accumulator rtx. */
810 accum_rtx_p = &XEXP (fma_rtx, 2);
811 /* Accumulator is negated. */
812 if (!REG_P (*accum_rtx_p))
813 accum_rtx_p = &XEXP (*accum_rtx_p, 0);
814
815 /* This du_chain structure is not for the accumulator register. */
816 if (accum_rtx_p != chain->loc)
817 continue;
818
819 /* If object already created, this is a loop carried dependency. We
820 don't include this object in the children as we want trees for
821 rename_fma_trees to not be an infinite loop. */
822 if (this->get_fma_node (chain->insn))
823 continue;
824
825 child_fma = new fma_node (node, chain);
826
827 /* Memorize the mapping of this instruction to its fma_node object
828 as it will be processed for the chain starting at its destination
829 register later. */
830
831 /* Link to siblings. */
832 node->add_child (child_fma);
833 }
834}
835
836/* Perform a depth-first search of the forests of fma_node in
837 THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in
838 THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and
839 PROCESS_NODE () on each node. If FREE is true, free all std::list in the
840 same dfs. */
841
842void
843func_fma_steering::dfs (void (*process_forest) (fma_forest *),
844 void (*process_root) (fma_forest *, fma_root_node *),
845 void (*process_node) (fma_forest *, fma_node *),
846 bool free)
847{
848 vec<fma_node *> to_process;
849 std::list<fma_forest *>::iterator forest_iter;
850
851 to_process.create (0);
852
853 /* For each forest. */
854 for (forest_iter = this->m_fma_forests.begin ();
855 forest_iter != this->m_fma_forests.end (); forest_iter++)
856 {
857 std::list<fma_root_node *>::iterator root_iter;
858
859 if (process_forest)
860 process_forest (*forest_iter);
861
862 /* For each tree root in this forest. */
863 for (root_iter = (*forest_iter)->get_roots ()->begin ();
864 root_iter != (*forest_iter)->get_roots ()->end (); root_iter++)
865 {
866 if (process_root)
867 process_root (*forest_iter, *root_iter);
868 to_process.safe_push (*root_iter);
869 }
870
871 /* For each tree node in this forest. */
872 while (!to_process.is_empty ())
873 {
874 fma_node *node;
875 std::list<fma_node *>::iterator child_iter;
876
877 node = to_process.pop ();
878
879 if (process_node)
880 process_node (*forest_iter, node);
881
882 /* Absence of children might indicate an alternate root of a *chain*.
883 It's ok to skip it here as the chain will be renamed when
884 processing the canonical root for that chain. */
885 if (node->get_children ()->empty ())
886 continue;
887
888 for (child_iter = node->get_children ()->begin ();
889 child_iter != node->get_children ()->end (); child_iter++)
890 to_process.safe_push (*child_iter);
891 if (free)
892 {
893 if (node->root_p ())
894 delete static_cast<fma_root_node *> (node);
895 else
896 delete node;
897 }
898 }
899 if (free)
900 delete *forest_iter;
901 }
902
903 to_process.release ();
904}
905
906/* Build the dependency trees of FMUL and FMADD/FMSUB instructions. */
907
908void
909func_fma_steering::analyze ()
910{
911 int i, n_blocks, *bb_dfs_preorder;
912 basic_block bb;
913 rtx_insn *insn;
914
915 bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
916 n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false);
917
918 /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB
919 instructions. */
920 for (i = 0; i < n_blocks; i++)
921 {
922 bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]);
923 FOR_BB_INSNS (bb, insn)
924 {
925 operand_rr_info *dest_op_info;
20930113 926 struct du_chain *chain = NULL;
ee7ef7ab 927 unsigned dest_regno;
20930113 928 fma_forest *forest = NULL;
929 du_head_p head = NULL;
ee7ef7ab 930 int i;
931
932 if (!is_fmul_fmac_insn (insn, true))
933 continue;
934
935 /* Search the chain where this instruction is (one of) the root. */
936 dest_op_info = insn_rr[INSN_UID (insn)].op_info;
937 dest_regno = REGNO (SET_DEST (PATTERN (insn)));
938 for (i = 0; i < dest_op_info->n_chains; i++)
939 {
940 /* The register tracked by this chain does not match the
941 destination register of insn. */
942 if (dest_op_info->heads[i]->regno != dest_regno)
943 continue;
944
945 head = dest_op_info->heads[i];
946 /* The chain was merged in another, find the new head. */
947 if (!head->first)
948 head = regrename_chain_from_id (head->id);
949
950 /* Search the chain element for this instruction and, if another
951 FMUL or FMADD/FMSUB instruction was already processed, note
952 the forest of its tree. */
953 forest = NULL;
954 for (chain = head->first; chain; chain = chain->next_use)
955 {
956 fma_node **fma_slot;
957
958 if (!is_fmul_fmac_insn (chain->insn, true))
959 continue;
960
961 /* This is a use, continue. */
962 if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
963 continue;
964
965 if (chain->insn == insn)
966 break;
967
968 fma_slot = this->m_insn_fma_head_map->get (chain->insn);
969 if (fma_slot && (*fma_slot)->get_children ())
970 forest = (*fma_slot)->get_forest ();
971 }
972 if (chain)
973 break;
974 }
975
976 /* We didn't find a chain with a def for this instruction. */
977 gcc_assert (i < dest_op_info->n_chains);
978
979 this->analyze_fma_fmul_insn (forest, chain, head);
980 }
981 }
982 free (bb_dfs_preorder);
983
984 if (dump_file)
985 this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info,
986 false);
987}
988
989/* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with
990 the objective of keeping FPU pipeline balanced in term of instructions and
991 having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be
992 scheduled on the same pipeline. */
993
994void
995func_fma_steering::rename_fma_trees ()
996{
997 this->dfs (dispatch_forest, NULL, rename_fma_node, true);
998
999 if (dump_file && !this->m_fma_forests.empty ())
1000 {
1001 fprintf (dump_file, "Function %s has ", current_function_name ());
1002 if (this->m_fpu_balance == 0)
1003 fprintf (dump_file, "perfect balance of FMUL/FMA chains between the "
1004 "two FPU pipelines\n");
1005 else if (this->m_fpu_balance > 0)
1006 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second "
1007 "FPU pipeline\n", this->m_fpu_balance);
1008 else /* this->m_fpu_balance < 0 */
1009 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first "
1010 "FPU pipeline\n", - this->m_fpu_balance);
1011 }
1012}
1013
1014/* Execute FMA steering pass. */
1015
1016void
1017func_fma_steering::execute_fma_steering ()
1018{
1019 df_set_flags (DF_LR_RUN_DCE);
1020 df_note_add_problem ();
1021 df_analyze ();
1022 df_set_flags (DF_DEFER_INSN_RESCAN);
1023
1024 regrename_init (true);
1025 regrename_analyze (NULL);
1026 this->analyze ();
1027 this->rename_fma_trees ();
1028 regrename_finish ();
1029}
1030
1031const pass_data pass_data_fma_steering =
1032{
1033 RTL_PASS, /* type */
1034 "fma_steering", /* name */
1035 OPTGROUP_NONE, /* optinfo_flags */
1036 TV_NONE, /* tv_id */
1037 0, /* properties_required */
1038 0, /* properties_provided */
1039 0, /* properties_destroyed */
1040 0, /* todo_flags_start */
1041 TODO_df_finish, /* todo_flags_finish */
1042};
1043
1044class pass_fma_steering : public rtl_opt_pass
1045{
1046public:
1047 pass_fma_steering (gcc::context *ctxt)
1048 : rtl_opt_pass (pass_data_fma_steering, ctxt)
1049 {}
1050
1051 /* opt_pass methods: */
1052 virtual bool gate (function *)
1053 {
14677da9 1054 return (aarch64_tune_params.extra_tuning_flags
5e9fcc70 1055 & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)
1056 && optimize >= 2;
ee7ef7ab 1057 }
1058
1059 virtual unsigned int execute (function *)
1060 {
1061 func_fma_steering *fma_steering = new func_fma_steering;
1062 fma_steering->execute_fma_steering ();
1063 delete fma_steering;
1064 return 0;
1065 }
1066
1067}; // class pass_fma_steering
1068
1069/* Create a new fma steering pass instance. */
1070
e8ebfb45 1071rtl_opt_pass *
ee7ef7ab 1072make_pass_fma_steering (gcc::context *ctxt)
1073{
1074 return new pass_fma_steering (ctxt);
1075}