]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64-cc-fusion.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-cc-fusion.cc
CommitLineData
5a783f42 1// Pass to fuse CC operations with other instructions.
83ffe9cd 2// Copyright (C) 2021-2023 Free Software Foundation, Inc.
5a783f42
RS
3//
4// This file is part of GCC.
5//
6// GCC is free software; you can redistribute it and/or modify it under
7// the terms of the GNU General Public License as published by the Free
8// Software Foundation; either version 3, or (at your option) any later
9// version.
10//
11// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12// WARRANTY; without even the implied warranty of MERCHANTABILITY or
13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14// for more details.
15//
16// You should have received a copy of the GNU General Public License
17// along with GCC; see the file COPYING3. If not see
18// <http://www.gnu.org/licenses/>.
19
20// This pass looks for sequences of the form:
21//
22// A: (set (reg R1) X1)
23// B: ...instructions that might change the value of X1...
24// C: (set (reg CC) X2) // X2 uses R1
25//
26// and tries to change them to:
27//
28// C': [(set (reg CC) X2')
29// (set (reg R1) X1)]
30// B: ...instructions that might change the value of X1...
31//
32// where X2' is the result of replacing R1 with X1 in X2.
33//
34// This sequence occurs in SVE code in two important cases:
35//
36// (a) Sometimes, to deal correctly with overflow, we need to increment
37// an IV after a WHILELO rather than before it. In this case:
38// - A is a WHILELO,
39// - B includes an IV increment and
40// - C is a separate PTEST.
41//
42// (b) ACLE code of the form:
43//
44// svbool_t ok = svrdffr ();
45// if (svptest_last (pg, ok))
46// ...
47//
48// must, for performance reasons, be code-generated as:
49//
50// RDFFRS Pok.B, Pg/Z
51// ...branch on flags result...
52//
53// without a separate PTEST of Pok. In this case:
54// - A is an aarch64_rdffr
55// - B includes an aarch64_update_ffrt
56// - C is a separate PTEST
57//
58// Combine can handle this optimization if B doesn't exist and if A and
59// C are in the same BB. This pass instead handles cases where B does
60// exist and cases where A and C are in different BBs of the same EBB.
61
62#define IN_TARGET_CODE 1
63
64#define INCLUDE_ALGORITHM
65#define INCLUDE_FUNCTIONAL
66#include "config.h"
67#include "system.h"
68#include "coretypes.h"
69#include "backend.h"
70#include "rtl.h"
71#include "df.h"
72#include "rtl-ssa.h"
73#include "tree-pass.h"
74
75using namespace rtl_ssa;
76
77namespace {
78const pass_data pass_data_cc_fusion =
79{
80 RTL_PASS, // type
81 "cc_fusion", // name
82 OPTGROUP_NONE, // optinfo_flags
83 TV_NONE, // tv_id
84 0, // properties_required
85 0, // properties_provided
86 0, // properties_destroyed
87 0, // todo_flags_start
88 TODO_df_finish, // todo_flags_finish
89};
90
91// Class that represents one run of the pass.
92class cc_fusion
93{
94public:
95 cc_fusion () : m_parallel () {}
96 void execute ();
97
98private:
99 rtx optimizable_set (const insn_info *);
100 bool parallelize_insns (def_info *, rtx, def_info *, rtx);
101 void optimize_cc_setter (def_info *, rtx);
102
103 // A spare PARALLEL rtx, or null if none.
104 rtx m_parallel;
105};
106
107// See whether INSN is a single_set that we can optimize. Return the
108// set if so, otherwise return null.
109rtx
110cc_fusion::optimizable_set (const insn_info *insn)
111{
112 if (!insn->can_be_optimized ()
113 || insn->is_asm ()
114 || insn->has_volatile_refs ()
115 || insn->has_pre_post_modify ())
116 return NULL_RTX;
117
118 return single_set (insn->rtl ());
119}
120
121// CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
122// a single_set that sets (only) OTHER_DEF. CC_SET is known to set the
123// CC register and the instruction that contains CC_SET is known to use
124// OTHER_DEF. Try to do CC_SET and OTHER_SET in parallel.
125bool
126cc_fusion::parallelize_insns (def_info *cc_def, rtx cc_set,
127 def_info *other_def, rtx other_set)
128{
129 auto attempt = crtl->ssa->new_change_attempt ();
130
131 insn_info *cc_insn = cc_def->insn ();
132 insn_info *other_insn = other_def->insn ();
133 if (dump_file && (dump_flags & TDF_DETAILS))
134 fprintf (dump_file, "trying to parallelize insn %d and insn %d\n",
135 other_insn->uid (), cc_insn->uid ());
136
137 // Try to substitute OTHER_SET into CC_INSN.
138 insn_change_watermark rtl_watermark;
139 rtx_insn *cc_rtl = cc_insn->rtl ();
140 insn_propagation prop (cc_rtl, SET_DEST (other_set),
141 SET_SRC (other_set));
142 if (!prop.apply_to_pattern (&PATTERN (cc_rtl))
143 || prop.num_replacements == 0)
144 {
145 if (dump_file && (dump_flags & TDF_DETAILS))
146 fprintf (dump_file, "-- failed to substitute all uses of r%d\n",
147 other_def->regno ());
148 return false;
149 }
150
151 // Restrict the uses to those outside notes.
152 use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ());
153 use_array other_set_uses = remove_note_accesses (attempt,
154 other_insn->uses ());
155
156 // Remove the use of the substituted value.
157 access_array_builder uses_builder (attempt);
158 uses_builder.reserve (cc_uses.size ());
159 for (use_info *use : cc_uses)
160 if (use->def () != other_def)
161 uses_builder.quick_push (use);
162 cc_uses = use_array (uses_builder.finish ());
163
164 // Get the list of uses for the new instruction.
165 insn_change cc_change (cc_insn);
166 cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses);
167 if (!cc_change.new_uses.is_valid ())
168 {
169 if (dump_file && (dump_flags & TDF_DETAILS))
170 fprintf (dump_file, "-- cannot merge uses\n");
171 return false;
172 }
173
174 // The instruction initially defines just two registers. recog can add
175 // extra clobbers if necessary.
176 auto_vec<access_info *, 2> new_defs;
177 new_defs.quick_push (cc_def);
178 new_defs.quick_push (other_def);
179 sort_accesses (new_defs);
180 cc_change.new_defs = def_array (access_array (new_defs));
181
182 // Make sure there is somewhere that the new instruction could live.
183 auto other_change = insn_change::delete_insn (other_insn);
184 insn_change *changes[] = { &other_change, &cc_change };
185 cc_change.move_range = cc_insn->ebb ()->insn_range ();
186 if (!restrict_movement_ignoring (cc_change, insn_is_changing (changes)))
187 {
188 if (dump_file && (dump_flags & TDF_DETAILS))
189 fprintf (dump_file, "-- cannot satisfy all definitions and uses\n");
190 return false;
191 }
192
193 // Tentatively install the new pattern. By convention, the CC set
194 // must be first.
195 if (m_parallel)
196 {
197 XVECEXP (m_parallel, 0, 0) = cc_set;
198 XVECEXP (m_parallel, 0, 1) = other_set;
199 }
200 else
201 {
202 rtvec vec = gen_rtvec (2, cc_set, other_set);
203 m_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
204 }
205 validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1);
206
207 // These routines report failures themselves.
208 if (!recog_ignoring (attempt, cc_change, insn_is_changing (changes))
209 || !changes_are_worthwhile (changes)
210 || !crtl->ssa->verify_insn_changes (changes))
211 return false;
212
213 remove_reg_equal_equiv_notes (cc_rtl);
214 confirm_change_group ();
215 crtl->ssa->change_insns (changes);
216 m_parallel = NULL_RTX;
217 return true;
218}
219
220// Try to optimize the instruction that contains CC_DEF, where CC_DEF describes
221// a definition of the CC register by CC_SET.
222void
223cc_fusion::optimize_cc_setter (def_info *cc_def, rtx cc_set)
224{
225 // Search the registers used by the CC setter for an easily-substitutable
226 // def-use chain.
227 for (use_info *other_use : cc_def->insn ()->uses ())
228 if (def_info *other_def = other_use->def ())
229 if (other_use->regno () != CC_REGNUM
230 && other_def->ebb () == cc_def->ebb ())
231 if (rtx other_set = optimizable_set (other_def->insn ()))
232 {
233 rtx dest = SET_DEST (other_set);
234 if (REG_P (dest)
235 && REGNO (dest) == other_def->regno ()
236 && REG_NREGS (dest) == 1
237 && parallelize_insns (cc_def, cc_set, other_def, other_set))
238 return;
239 }
240}
241
242// Run the pass on the current function.
243void
244cc_fusion::execute ()
245{
246 // Initialization.
247 calculate_dominance_info (CDI_DOMINATORS);
248 df_analyze ();
249 crtl->ssa = new rtl_ssa::function_info (cfun);
250
251 // Walk through all instructions that set CC. Look for a PTEST instruction
252 // that we can optimize.
253 //
254 // ??? The PTEST test isn't needed for correctness, but it ensures that the
255 // pass no effect on non-SVE code.
256 for (def_info *def : crtl->ssa->reg_defs (CC_REGNUM))
257 if (rtx cc_set = optimizable_set (def->insn ()))
258 if (REG_P (SET_DEST (cc_set))
259 && REGNO (SET_DEST (cc_set)) == CC_REGNUM
260 && GET_CODE (SET_SRC (cc_set)) == UNSPEC
261 && XINT (SET_SRC (cc_set), 1) == UNSPEC_PTEST)
262 optimize_cc_setter (def, cc_set);
263
264 // Finalization.
265 crtl->ssa->perform_pending_updates ();
266 free_dominance_info (CDI_DOMINATORS);
267}
268
269class pass_cc_fusion : public rtl_opt_pass
270{
271public:
272 pass_cc_fusion (gcc::context *ctxt)
273 : rtl_opt_pass (pass_data_cc_fusion, ctxt)
274 {}
275
276 // opt_pass methods:
277 virtual bool gate (function *) { return TARGET_SVE && optimize >= 2; }
278 virtual unsigned int execute (function *);
279};
280
281unsigned int
282pass_cc_fusion::execute (function *)
283{
284 cc_fusion ().execute ();
285 return 0;
286}
287
288} // end namespace
289
290// Create a new CC fusion pass instance.
291
292rtl_opt_pass *
293make_pass_cc_fusion (gcc::context *ctxt)
294{
295 return new pass_cc_fusion (ctxt);
296}