]>
Commit | Line | Data |
---|---|---|
5a783f42 | 1 | // Pass to fuse CC operations with other instructions. |
83ffe9cd | 2 | // Copyright (C) 2021-2023 Free Software Foundation, Inc. |
5a783f42 RS |
3 | // |
4 | // This file is part of GCC. | |
5 | // | |
6 | // GCC is free software; you can redistribute it and/or modify it under | |
7 | // the terms of the GNU General Public License as published by the Free | |
8 | // Software Foundation; either version 3, or (at your option) any later | |
9 | // version. | |
10 | // | |
11 | // GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | // for more details. | |
15 | // | |
16 | // You should have received a copy of the GNU General Public License | |
17 | // along with GCC; see the file COPYING3. If not see | |
18 | // <http://www.gnu.org/licenses/>. | |
19 | ||
20 | // This pass looks for sequences of the form: | |
21 | // | |
22 | // A: (set (reg R1) X1) | |
23 | // B: ...instructions that might change the value of X1... | |
24 | // C: (set (reg CC) X2) // X2 uses R1 | |
25 | // | |
26 | // and tries to change them to: | |
27 | // | |
28 | // C': [(set (reg CC) X2') | |
29 | // (set (reg R1) X1)] | |
30 | // B: ...instructions that might change the value of X1... | |
31 | // | |
32 | // where X2' is the result of replacing R1 with X1 in X2. | |
33 | // | |
34 | // This sequence occurs in SVE code in two important cases: | |
35 | // | |
36 | // (a) Sometimes, to deal correctly with overflow, we need to increment | |
37 | // an IV after a WHILELO rather than before it. In this case: | |
38 | // - A is a WHILELO, | |
39 | // - B includes an IV increment and | |
40 | // - C is a separate PTEST. | |
41 | // | |
42 | // (b) ACLE code of the form: | |
43 | // | |
44 | // svbool_t ok = svrdffr (); | |
45 | // if (svptest_last (pg, ok)) | |
46 | // ... | |
47 | // | |
48 | // must, for performance reasons, be code-generated as: | |
49 | // | |
50 | // RDFFRS Pok.B, Pg/Z | |
51 | // ...branch on flags result... | |
52 | // | |
53 | // without a separate PTEST of Pok. In this case: | |
54 | // - A is an aarch64_rdffr | |
55 | // - B includes an aarch64_update_ffrt | |
56 | // - C is a separate PTEST | |
57 | // | |
58 | // Combine can handle this optimization if B doesn't exist and if A and | |
59 | // C are in the same BB. This pass instead handles cases where B does | |
60 | // exist and cases where A and C are in different BBs of the same EBB. | |
61 | ||
62 | #define IN_TARGET_CODE 1 | |
63 | ||
64 | #define INCLUDE_ALGORITHM | |
65 | #define INCLUDE_FUNCTIONAL | |
66 | #include "config.h" | |
67 | #include "system.h" | |
68 | #include "coretypes.h" | |
69 | #include "backend.h" | |
70 | #include "rtl.h" | |
71 | #include "df.h" | |
72 | #include "rtl-ssa.h" | |
73 | #include "tree-pass.h" | |
74 | ||
75 | using namespace rtl_ssa; | |
76 | ||
77 | namespace { | |
78 | const pass_data pass_data_cc_fusion = | |
79 | { | |
80 | RTL_PASS, // type | |
81 | "cc_fusion", // name | |
82 | OPTGROUP_NONE, // optinfo_flags | |
83 | TV_NONE, // tv_id | |
84 | 0, // properties_required | |
85 | 0, // properties_provided | |
86 | 0, // properties_destroyed | |
87 | 0, // todo_flags_start | |
88 | TODO_df_finish, // todo_flags_finish | |
89 | }; | |
90 | ||
91 | // Class that represents one run of the pass. | |
92 | class cc_fusion | |
93 | { | |
94 | public: | |
95 | cc_fusion () : m_parallel () {} | |
96 | void execute (); | |
97 | ||
98 | private: | |
99 | rtx optimizable_set (const insn_info *); | |
100 | bool parallelize_insns (def_info *, rtx, def_info *, rtx); | |
101 | void optimize_cc_setter (def_info *, rtx); | |
102 | ||
103 | // A spare PARALLEL rtx, or null if none. | |
104 | rtx m_parallel; | |
105 | }; | |
106 | ||
107 | // See whether INSN is a single_set that we can optimize. Return the | |
108 | // set if so, otherwise return null. | |
109 | rtx | |
110 | cc_fusion::optimizable_set (const insn_info *insn) | |
111 | { | |
112 | if (!insn->can_be_optimized () | |
113 | || insn->is_asm () | |
114 | || insn->has_volatile_refs () | |
115 | || insn->has_pre_post_modify ()) | |
116 | return NULL_RTX; | |
117 | ||
118 | return single_set (insn->rtl ()); | |
119 | } | |
120 | ||
121 | // CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise | |
122 | // a single_set that sets (only) OTHER_DEF. CC_SET is known to set the | |
123 | // CC register and the instruction that contains CC_SET is known to use | |
124 | // OTHER_DEF. Try to do CC_SET and OTHER_SET in parallel. | |
125 | bool | |
126 | cc_fusion::parallelize_insns (def_info *cc_def, rtx cc_set, | |
127 | def_info *other_def, rtx other_set) | |
128 | { | |
129 | auto attempt = crtl->ssa->new_change_attempt (); | |
130 | ||
131 | insn_info *cc_insn = cc_def->insn (); | |
132 | insn_info *other_insn = other_def->insn (); | |
133 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
134 | fprintf (dump_file, "trying to parallelize insn %d and insn %d\n", | |
135 | other_insn->uid (), cc_insn->uid ()); | |
136 | ||
137 | // Try to substitute OTHER_SET into CC_INSN. | |
138 | insn_change_watermark rtl_watermark; | |
139 | rtx_insn *cc_rtl = cc_insn->rtl (); | |
140 | insn_propagation prop (cc_rtl, SET_DEST (other_set), | |
141 | SET_SRC (other_set)); | |
142 | if (!prop.apply_to_pattern (&PATTERN (cc_rtl)) | |
143 | || prop.num_replacements == 0) | |
144 | { | |
145 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
146 | fprintf (dump_file, "-- failed to substitute all uses of r%d\n", | |
147 | other_def->regno ()); | |
148 | return false; | |
149 | } | |
150 | ||
151 | // Restrict the uses to those outside notes. | |
152 | use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ()); | |
153 | use_array other_set_uses = remove_note_accesses (attempt, | |
154 | other_insn->uses ()); | |
155 | ||
156 | // Remove the use of the substituted value. | |
157 | access_array_builder uses_builder (attempt); | |
158 | uses_builder.reserve (cc_uses.size ()); | |
159 | for (use_info *use : cc_uses) | |
160 | if (use->def () != other_def) | |
161 | uses_builder.quick_push (use); | |
162 | cc_uses = use_array (uses_builder.finish ()); | |
163 | ||
164 | // Get the list of uses for the new instruction. | |
165 | insn_change cc_change (cc_insn); | |
166 | cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses); | |
167 | if (!cc_change.new_uses.is_valid ()) | |
168 | { | |
169 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
170 | fprintf (dump_file, "-- cannot merge uses\n"); | |
171 | return false; | |
172 | } | |
173 | ||
174 | // The instruction initially defines just two registers. recog can add | |
175 | // extra clobbers if necessary. | |
176 | auto_vec<access_info *, 2> new_defs; | |
177 | new_defs.quick_push (cc_def); | |
178 | new_defs.quick_push (other_def); | |
179 | sort_accesses (new_defs); | |
180 | cc_change.new_defs = def_array (access_array (new_defs)); | |
181 | ||
182 | // Make sure there is somewhere that the new instruction could live. | |
183 | auto other_change = insn_change::delete_insn (other_insn); | |
184 | insn_change *changes[] = { &other_change, &cc_change }; | |
185 | cc_change.move_range = cc_insn->ebb ()->insn_range (); | |
186 | if (!restrict_movement_ignoring (cc_change, insn_is_changing (changes))) | |
187 | { | |
188 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
189 | fprintf (dump_file, "-- cannot satisfy all definitions and uses\n"); | |
190 | return false; | |
191 | } | |
192 | ||
193 | // Tentatively install the new pattern. By convention, the CC set | |
194 | // must be first. | |
195 | if (m_parallel) | |
196 | { | |
197 | XVECEXP (m_parallel, 0, 0) = cc_set; | |
198 | XVECEXP (m_parallel, 0, 1) = other_set; | |
199 | } | |
200 | else | |
201 | { | |
202 | rtvec vec = gen_rtvec (2, cc_set, other_set); | |
203 | m_parallel = gen_rtx_PARALLEL (VOIDmode, vec); | |
204 | } | |
205 | validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1); | |
206 | ||
207 | // These routines report failures themselves. | |
208 | if (!recog_ignoring (attempt, cc_change, insn_is_changing (changes)) | |
209 | || !changes_are_worthwhile (changes) | |
210 | || !crtl->ssa->verify_insn_changes (changes)) | |
211 | return false; | |
212 | ||
213 | remove_reg_equal_equiv_notes (cc_rtl); | |
214 | confirm_change_group (); | |
215 | crtl->ssa->change_insns (changes); | |
216 | m_parallel = NULL_RTX; | |
217 | return true; | |
218 | } | |
219 | ||
220 | // Try to optimize the instruction that contains CC_DEF, where CC_DEF describes | |
221 | // a definition of the CC register by CC_SET. | |
222 | void | |
223 | cc_fusion::optimize_cc_setter (def_info *cc_def, rtx cc_set) | |
224 | { | |
225 | // Search the registers used by the CC setter for an easily-substitutable | |
226 | // def-use chain. | |
227 | for (use_info *other_use : cc_def->insn ()->uses ()) | |
228 | if (def_info *other_def = other_use->def ()) | |
229 | if (other_use->regno () != CC_REGNUM | |
230 | && other_def->ebb () == cc_def->ebb ()) | |
231 | if (rtx other_set = optimizable_set (other_def->insn ())) | |
232 | { | |
233 | rtx dest = SET_DEST (other_set); | |
234 | if (REG_P (dest) | |
235 | && REGNO (dest) == other_def->regno () | |
236 | && REG_NREGS (dest) == 1 | |
237 | && parallelize_insns (cc_def, cc_set, other_def, other_set)) | |
238 | return; | |
239 | } | |
240 | } | |
241 | ||
242 | // Run the pass on the current function. | |
243 | void | |
244 | cc_fusion::execute () | |
245 | { | |
246 | // Initialization. | |
247 | calculate_dominance_info (CDI_DOMINATORS); | |
248 | df_analyze (); | |
249 | crtl->ssa = new rtl_ssa::function_info (cfun); | |
250 | ||
251 | // Walk through all instructions that set CC. Look for a PTEST instruction | |
252 | // that we can optimize. | |
253 | // | |
254 | // ??? The PTEST test isn't needed for correctness, but it ensures that the | |
255 | // pass no effect on non-SVE code. | |
256 | for (def_info *def : crtl->ssa->reg_defs (CC_REGNUM)) | |
257 | if (rtx cc_set = optimizable_set (def->insn ())) | |
258 | if (REG_P (SET_DEST (cc_set)) | |
259 | && REGNO (SET_DEST (cc_set)) == CC_REGNUM | |
260 | && GET_CODE (SET_SRC (cc_set)) == UNSPEC | |
261 | && XINT (SET_SRC (cc_set), 1) == UNSPEC_PTEST) | |
262 | optimize_cc_setter (def, cc_set); | |
263 | ||
264 | // Finalization. | |
265 | crtl->ssa->perform_pending_updates (); | |
266 | free_dominance_info (CDI_DOMINATORS); | |
267 | } | |
268 | ||
269 | class pass_cc_fusion : public rtl_opt_pass | |
270 | { | |
271 | public: | |
272 | pass_cc_fusion (gcc::context *ctxt) | |
273 | : rtl_opt_pass (pass_data_cc_fusion, ctxt) | |
274 | {} | |
275 | ||
276 | // opt_pass methods: | |
277 | virtual bool gate (function *) { return TARGET_SVE && optimize >= 2; } | |
278 | virtual unsigned int execute (function *); | |
279 | }; | |
280 | ||
281 | unsigned int | |
282 | pass_cc_fusion::execute (function *) | |
283 | { | |
284 | cc_fusion ().execute (); | |
285 | return 0; | |
286 | } | |
287 | ||
288 | } // end namespace | |
289 | ||
290 | // Create a new CC fusion pass instance. | |
291 | ||
292 | rtl_opt_pass * | |
293 | make_pass_cc_fusion (gcc::context *ctxt) | |
294 | { | |
295 | return new pass_cc_fusion (ctxt); | |
296 | } |