1 // Pass to fuse CC operations with other instructions.
2 // Copyright (C) 2021 Free Software Foundation, Inc.
4 // This file is part of GCC.
6 // GCC is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation; either version 3, or (at your option) any later
11 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
20 // This pass looks for sequences of the form:
22 // A: (set (reg R1) X1)
23 // B: ...instructions that might change the value of X1...
24 // C: (set (reg CC) X2) // X2 uses R1
26 // and tries to change them to:
28 // C': [(set (reg CC) X2')
30 // B: ...instructions that might change the value of X1...
32 // where X2' is the result of replacing R1 with X1 in X2.
34 // This sequence occurs in SVE code in two important cases:
36 // (a) Sometimes, to deal correctly with overflow, we need to increment
37 // an IV after a WHILELO rather than before it. In this case:
39 // - B includes an IV increment and
40 // - C is a separate PTEST.
42 // (b) ACLE code of the form:
44 // svbool_t ok = svrdffr ();
45 // if (svptest_last (pg, ok))
48 // must, for performance reasons, be code-generated as:
51 // ...branch on flags result...
53 // without a separate PTEST of Pok. In this case:
54 // - A is an aarch64_rdffr
55 // - B includes an aarch64_update_ffrt
56 // - C is a separate PTEST
58 // Combine can handle this optimization if B doesn't exist and if A and
59 // C are in the same BB. This pass instead handles cases where B does
60 // exist and cases where A and C are in different BBs of the same EBB.
62 #define IN_TARGET_CODE 1
64 #define INCLUDE_ALGORITHM
65 #define INCLUDE_FUNCTIONAL
68 #include "coretypes.h"
73 #include "tree-pass.h"
75 using namespace rtl_ssa
;
78 const pass_data pass_data_cc_fusion
=
82 OPTGROUP_NONE
, // optinfo_flags
84 0, // properties_required
85 0, // properties_provided
86 0, // properties_destroyed
87 0, // todo_flags_start
88 TODO_df_finish
, // todo_flags_finish
91 // Class that represents one run of the pass.
95 cc_fusion () : m_parallel () {}
99 rtx
optimizable_set (const insn_info
*);
100 bool parallelize_insns (def_info
*, rtx
, def_info
*, rtx
);
101 void optimize_cc_setter (def_info
*, rtx
);
103 // A spare PARALLEL rtx, or null if none.
107 // See whether INSN is a single_set that we can optimize. Return the
108 // set if so, otherwise return null.
110 cc_fusion::optimizable_set (const insn_info
*insn
)
112 if (!insn
->can_be_optimized ()
114 || insn
->has_volatile_refs ()
115 || insn
->has_pre_post_modify ())
118 return single_set (insn
->rtl ());
121 // CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
122 // a single_set that sets (only) OTHER_DEF. CC_SET is known to set the
123 // CC register and the instruction that contains CC_SET is known to use
124 // OTHER_DEF. Try to do CC_SET and OTHER_SET in parallel.
126 cc_fusion::parallelize_insns (def_info
*cc_def
, rtx cc_set
,
127 def_info
*other_def
, rtx other_set
)
129 auto attempt
= crtl
->ssa
->new_change_attempt ();
131 insn_info
*cc_insn
= cc_def
->insn ();
132 insn_info
*other_insn
= other_def
->insn ();
133 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
134 fprintf (dump_file
, "trying to parallelize insn %d and insn %d\n",
135 other_insn
->uid (), cc_insn
->uid ());
137 // Try to substitute OTHER_SET into CC_INSN.
138 insn_change_watermark rtl_watermark
;
139 rtx_insn
*cc_rtl
= cc_insn
->rtl ();
140 insn_propagation
prop (cc_rtl
, SET_DEST (other_set
),
141 SET_SRC (other_set
));
142 if (!prop
.apply_to_pattern (&PATTERN (cc_rtl
))
143 || prop
.num_replacements
== 0)
145 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
146 fprintf (dump_file
, "-- failed to substitute all uses of r%d\n",
147 other_def
->regno ());
151 // Restrict the uses to those outside notes.
152 use_array cc_uses
= remove_note_accesses (attempt
, cc_insn
->uses ());
153 use_array other_set_uses
= remove_note_accesses (attempt
,
154 other_insn
->uses ());
156 // Remove the use of the substituted value.
157 access_array_builder
uses_builder (attempt
);
158 uses_builder
.reserve (cc_uses
.size ());
159 for (use_info
*use
: cc_uses
)
160 if (use
->def () != other_def
)
161 uses_builder
.quick_push (use
);
162 cc_uses
= use_array (uses_builder
.finish ());
164 // Get the list of uses for the new instruction.
165 insn_change
cc_change (cc_insn
);
166 cc_change
.new_uses
= merge_access_arrays (attempt
, other_set_uses
, cc_uses
);
167 if (!cc_change
.new_uses
.is_valid ())
169 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
170 fprintf (dump_file
, "-- cannot merge uses\n");
174 // The instruction initially defines just two registers. recog can add
175 // extra clobbers if necessary.
176 auto_vec
<access_info
*, 2> new_defs
;
177 new_defs
.quick_push (cc_def
);
178 new_defs
.quick_push (other_def
);
179 sort_accesses (new_defs
);
180 cc_change
.new_defs
= def_array (access_array (new_defs
));
182 // Make sure there is somewhere that the new instruction could live.
183 auto other_change
= insn_change::delete_insn (other_insn
);
184 insn_change
*changes
[] = { &other_change
, &cc_change
};
185 cc_change
.move_range
= cc_insn
->ebb ()->insn_range ();
186 if (!restrict_movement_ignoring (cc_change
, insn_is_changing (changes
)))
188 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
189 fprintf (dump_file
, "-- cannot satisfy all definitions and uses\n");
193 // Tentatively install the new pattern. By convention, the CC set
197 XVECEXP (m_parallel
, 0, 0) = cc_set
;
198 XVECEXP (m_parallel
, 0, 1) = other_set
;
202 rtvec vec
= gen_rtvec (2, cc_set
, other_set
);
203 m_parallel
= gen_rtx_PARALLEL (VOIDmode
, vec
);
205 validate_change (cc_rtl
, &PATTERN (cc_rtl
), m_parallel
, 1);
207 // These routines report failures themselves.
208 if (!recog_ignoring (attempt
, cc_change
, insn_is_changing (changes
))
209 || !changes_are_worthwhile (changes
)
210 || !crtl
->ssa
->verify_insn_changes (changes
))
213 remove_reg_equal_equiv_notes (cc_rtl
);
214 confirm_change_group ();
215 crtl
->ssa
->change_insns (changes
);
216 m_parallel
= NULL_RTX
;
220 // Try to optimize the instruction that contains CC_DEF, where CC_DEF describes
221 // a definition of the CC register by CC_SET.
223 cc_fusion::optimize_cc_setter (def_info
*cc_def
, rtx cc_set
)
225 // Search the registers used by the CC setter for an easily-substitutable
227 for (use_info
*other_use
: cc_def
->insn ()->uses ())
228 if (def_info
*other_def
= other_use
->def ())
229 if (other_use
->regno () != CC_REGNUM
230 && other_def
->ebb () == cc_def
->ebb ())
231 if (rtx other_set
= optimizable_set (other_def
->insn ()))
233 rtx dest
= SET_DEST (other_set
);
235 && REGNO (dest
) == other_def
->regno ()
236 && REG_NREGS (dest
) == 1
237 && parallelize_insns (cc_def
, cc_set
, other_def
, other_set
))
242 // Run the pass on the current function.
244 cc_fusion::execute ()
247 calculate_dominance_info (CDI_DOMINATORS
);
249 crtl
->ssa
= new rtl_ssa::function_info (cfun
);
251 // Walk through all instructions that set CC. Look for a PTEST instruction
252 // that we can optimize.
254 // ??? The PTEST test isn't needed for correctness, but it ensures that the
255 // pass no effect on non-SVE code.
256 for (def_info
*def
: crtl
->ssa
->reg_defs (CC_REGNUM
))
257 if (rtx cc_set
= optimizable_set (def
->insn ()))
258 if (REG_P (SET_DEST (cc_set
))
259 && REGNO (SET_DEST (cc_set
)) == CC_REGNUM
260 && GET_CODE (SET_SRC (cc_set
)) == UNSPEC
261 && XINT (SET_SRC (cc_set
), 1) == UNSPEC_PTEST
)
262 optimize_cc_setter (def
, cc_set
);
265 crtl
->ssa
->perform_pending_updates ();
266 free_dominance_info (CDI_DOMINATORS
);
269 class pass_cc_fusion
: public rtl_opt_pass
272 pass_cc_fusion (gcc::context
*ctxt
)
273 : rtl_opt_pass (pass_data_cc_fusion
, ctxt
)
277 virtual bool gate (function
*) { return TARGET_SVE
&& optimize
>= 2; }
278 virtual unsigned int execute (function
*);
282 pass_cc_fusion::execute (function
*)
284 cc_fusion ().execute ();
290 // Create a new CC fusion pass instance.
293 make_pass_cc_fusion (gcc::context
*ctxt
)
295 return new pass_cc_fusion (ctxt
);