]>
Commit | Line | Data |
---|---|---|
9243c3d1 | 1 | /* VSETVL pass for RISC-V 'V' Extension for GNU compiler. |
c841bde5 | 2 | Copyright (C) 2022-2023 Free Software Foundation, Inc. |
9243c3d1 JZZ |
3 | Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or(at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | /* This pass is to Set VL/VTYPE global status for RVV instructions | |
22 | that depend on VL and VTYPE registers by Lazy code motion (LCM). | |
23 | ||
24 | Strategy: | |
25 | ||
26 | - Backward demanded info fusion within block. | |
27 | ||
28 | - Lazy code motion (LCM) based demanded info backward propagation. | |
29 | ||
30 | - RTL_SSA framework for def-use, PHI analysis. | |
31 | ||
32 | - Lazy code motion (LCM) for global VL/VTYPE optimization. | |
33 | ||
34 | Assumption: | |
35 | ||
36 | - Each avl operand is either an immediate (must be in range 0 ~ 31) or reg. | |
37 | ||
38 | This pass consists of 5 phases: | |
39 | ||
40 | - Phase 1 - compute VL/VTYPE demanded information within each block | |
41 | by backward data-flow analysis. | |
42 | ||
43 | - Phase 2 - Emit vsetvl instructions within each basic block according to | |
44 | demand, compute and save ANTLOC && AVLOC of each block. | |
45 | ||
387cd9d3 JZZ |
46 | - Phase 3 - Backward && forward demanded info propagation and fusion across |
47 | blocks. | |
9243c3d1 JZZ |
48 | |
49 | - Phase 4 - Lazy code motion including: compute local properties, | |
50 | pre_edge_lcm and vsetvl insertion && delete edges for LCM results. | |
51 | ||
52 | - Phase 5 - Cleanup AVL operand of RVV instruction since it will not be | |
53 | used any more and VL operand of VSETVL instruction if it is not used by | |
54 | any non-debug instructions. | |
55 | ||
6b6b9c68 JZZ |
56 | - Phase 6 - Propagate AVL between vsetvl instructions. |
57 | ||
9243c3d1 JZZ |
58 | Implementation: |
59 | ||
60 | - The subroutine of optimize == 0 is simple_vsetvl. | |
61 | This function simplily vsetvl insertion for each RVV | |
62 | instruction. No optimization. | |
63 | ||
64 | - The subroutine of optimize > 0 is lazy_vsetvl. | |
65 | This function optimize vsetvl insertion process by | |
ec99ffab JZZ |
66 | lazy code motion (LCM) layering on RTL_SSA. |
67 | ||
68 | - get_avl (), get_insn (), get_avl_source (): | |
69 | ||
70 | 1. get_insn () is the current instruction, find_access (get_insn | |
71 | ())->def is the same as get_avl_source () if get_insn () demand VL. | |
72 | 2. If get_avl () is non-VLMAX REG, get_avl () == get_avl_source | |
73 | ()->regno (). | |
74 | 3. get_avl_source ()->regno () is the REGNO that we backward propagate. | |
75 | */ | |
9243c3d1 JZZ |
76 | |
77 | #define IN_TARGET_CODE 1 | |
78 | #define INCLUDE_ALGORITHM | |
79 | #define INCLUDE_FUNCTIONAL | |
80 | ||
81 | #include "config.h" | |
82 | #include "system.h" | |
83 | #include "coretypes.h" | |
84 | #include "tm.h" | |
85 | #include "backend.h" | |
86 | #include "rtl.h" | |
87 | #include "target.h" | |
88 | #include "tree-pass.h" | |
89 | #include "df.h" | |
90 | #include "rtl-ssa.h" | |
91 | #include "cfgcleanup.h" | |
92 | #include "insn-config.h" | |
93 | #include "insn-attr.h" | |
94 | #include "insn-opinit.h" | |
95 | #include "tm-constrs.h" | |
96 | #include "cfgrtl.h" | |
97 | #include "cfganal.h" | |
98 | #include "lcm.h" | |
99 | #include "predict.h" | |
100 | #include "profile-count.h" | |
101 | #include "riscv-vsetvl.h" | |
102 | ||
103 | using namespace rtl_ssa; | |
104 | using namespace riscv_vector; | |
105 | ||
ec99ffab JZZ |
106 | static CONSTEXPR const unsigned ALL_SEW[] = {8, 16, 32, 64}; |
107 | static CONSTEXPR const vlmul_type ALL_LMUL[] | |
108 | = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2}; | |
ec99ffab | 109 | |
9243c3d1 JZZ |
110 | DEBUG_FUNCTION void |
111 | debug (const vector_insn_info *info) | |
112 | { | |
113 | info->dump (stderr); | |
114 | } | |
115 | ||
116 | DEBUG_FUNCTION void | |
117 | debug (const vector_infos_manager *info) | |
118 | { | |
119 | info->dump (stderr); | |
120 | } | |
121 | ||
122 | static bool | |
123 | vlmax_avl_p (rtx x) | |
124 | { | |
125 | return x && rtx_equal_p (x, RVV_VLMAX); | |
126 | } | |
127 | ||
128 | static bool | |
129 | vlmax_avl_insn_p (rtx_insn *rinsn) | |
130 | { | |
85112fbb JZZ |
131 | return (INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi |
132 | || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi); | |
9243c3d1 JZZ |
133 | } |
134 | ||
8d8cc482 JZZ |
135 | /* Return true if the block is a loop itself: |
136 | local_dem | |
137 | __________ | |
138 | ____|____ | | |
139 | | | | | |
140 | |________| | | |
141 | |_________| | |
142 | reaching_out | |
143 | */ | |
9243c3d1 JZZ |
144 | static bool |
145 | loop_basic_block_p (const basic_block cfg_bb) | |
146 | { | |
8d8cc482 JZZ |
147 | if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb))) |
148 | { | |
149 | edge e; | |
150 | edge_iterator ei; | |
151 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
152 | if (e->dest->index == cfg_bb->index) | |
153 | return true; | |
154 | } | |
155 | return false; | |
9243c3d1 JZZ |
156 | } |
157 | ||
158 | /* Return true if it is an RVV instruction depends on VTYPE global | |
159 | status register. */ | |
160 | static bool | |
161 | has_vtype_op (rtx_insn *rinsn) | |
162 | { | |
163 | return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn); | |
164 | } | |
165 | ||
166 | /* Return true if it is an RVV instruction depends on VL global | |
167 | status register. */ | |
168 | static bool | |
169 | has_vl_op (rtx_insn *rinsn) | |
170 | { | |
171 | return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn); | |
172 | } | |
173 | ||
174 | /* Is this a SEW value that can be encoded into the VTYPE format. */ | |
175 | static bool | |
176 | valid_sew_p (size_t sew) | |
177 | { | |
178 | return exact_log2 (sew) && sew >= 8 && sew <= 64; | |
179 | } | |
180 | ||
ec99ffab JZZ |
181 | /* Return true if the instruction ignores VLMUL field of VTYPE. */ |
182 | static bool | |
183 | ignore_vlmul_insn_p (rtx_insn *rinsn) | |
184 | { | |
185 | return get_attr_type (rinsn) == TYPE_VIMOVVX | |
186 | || get_attr_type (rinsn) == TYPE_VFMOVVF | |
187 | || get_attr_type (rinsn) == TYPE_VIMOVXV | |
188 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
189 | } | |
190 | ||
191 | /* Return true if the instruction is scalar move instruction. */ | |
192 | static bool | |
193 | scalar_move_insn_p (rtx_insn *rinsn) | |
194 | { | |
195 | return get_attr_type (rinsn) == TYPE_VIMOVXV | |
196 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
197 | } | |
198 | ||
60bd33bc JZZ |
199 | /* Return true if the instruction is fault first load instruction. */ |
200 | static bool | |
201 | fault_first_load_p (rtx_insn *rinsn) | |
202 | { | |
6313b045 JZZ |
203 | return recog_memoized (rinsn) >= 0 |
204 | && (get_attr_type (rinsn) == TYPE_VLDFF | |
205 | || get_attr_type (rinsn) == TYPE_VLSEGDFF); | |
60bd33bc JZZ |
206 | } |
207 | ||
208 | /* Return true if the instruction is read vl instruction. */ | |
209 | static bool | |
210 | read_vl_insn_p (rtx_insn *rinsn) | |
211 | { | |
212 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL; | |
213 | } | |
214 | ||
9243c3d1 JZZ |
215 | /* Return true if it is a vsetvl instruction. */ |
216 | static bool | |
217 | vector_config_insn_p (rtx_insn *rinsn) | |
218 | { | |
219 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL; | |
220 | } | |
221 | ||
222 | /* Return true if it is vsetvldi or vsetvlsi. */ | |
223 | static bool | |
224 | vsetvl_insn_p (rtx_insn *rinsn) | |
225 | { | |
6b6b9c68 JZZ |
226 | if (!vector_config_insn_p (rinsn)) |
227 | return false; | |
85112fbb | 228 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi |
6b6b9c68 JZZ |
229 | || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi); |
230 | } | |
231 | ||
232 | /* Return true if it is vsetvl zero, rs1. */ | |
233 | static bool | |
234 | vsetvl_discard_result_insn_p (rtx_insn *rinsn) | |
235 | { | |
236 | if (!vector_config_insn_p (rinsn)) | |
237 | return false; | |
238 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi | |
239 | || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi); | |
9243c3d1 JZZ |
240 | } |
241 | ||
9243c3d1 | 242 | static bool |
4f673c5e | 243 | real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb) |
9243c3d1 | 244 | { |
4f673c5e | 245 | return insn != nullptr && insn->is_real () && insn->bb () == bb; |
9243c3d1 JZZ |
246 | } |
247 | ||
9243c3d1 | 248 | static bool |
4f673c5e | 249 | before_p (const insn_info *insn1, const insn_info *insn2) |
9243c3d1 | 250 | { |
9b9a1ac1 | 251 | return insn1->compare_with (insn2) < 0; |
4f673c5e JZZ |
252 | } |
253 | ||
6b6b9c68 JZZ |
254 | static insn_info * |
255 | find_reg_killed_by (const bb_info *bb, rtx x) | |
4f673c5e | 256 | { |
6b6b9c68 JZZ |
257 | if (!x || vlmax_avl_p (x) || !REG_P (x)) |
258 | return nullptr; | |
259 | for (insn_info *insn : bb->reverse_real_nondebug_insns ()) | |
4f673c5e | 260 | if (find_access (insn->defs (), REGNO (x))) |
6b6b9c68 JZZ |
261 | return insn; |
262 | return nullptr; | |
263 | } | |
264 | ||
265 | /* Helper function to get VL operand. */ | |
266 | static rtx | |
267 | get_vl (rtx_insn *rinsn) | |
268 | { | |
269 | if (has_vl_op (rinsn)) | |
270 | { | |
271 | extract_insn_cached (rinsn); | |
272 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
273 | } | |
274 | return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0)); | |
4f673c5e JZZ |
275 | } |
276 | ||
277 | static bool | |
278 | has_vsetvl_killed_avl_p (const bb_info *bb, const vector_insn_info &info) | |
279 | { | |
280 | if (info.dirty_with_killed_avl_p ()) | |
281 | { | |
282 | rtx avl = info.get_avl (); | |
6b6b9c68 | 283 | if (vlmax_avl_p (avl)) |
ec99ffab | 284 | return find_reg_killed_by (bb, info.get_avl_reg_rtx ()) != nullptr; |
4f673c5e JZZ |
285 | for (const insn_info *insn : bb->reverse_real_nondebug_insns ()) |
286 | { | |
287 | def_info *def = find_access (insn->defs (), REGNO (avl)); | |
288 | if (def) | |
289 | { | |
290 | set_info *set = safe_dyn_cast<set_info *> (def); | |
291 | if (!set) | |
292 | return false; | |
293 | ||
294 | rtx new_avl = gen_rtx_REG (GET_MODE (avl), REGNO (avl)); | |
295 | gcc_assert (new_avl != avl); | |
296 | if (!info.compatible_avl_p (avl_info (new_avl, set))) | |
297 | return false; | |
298 | ||
299 | return true; | |
300 | } | |
301 | } | |
302 | } | |
303 | return false; | |
304 | } | |
305 | ||
9243c3d1 JZZ |
306 | /* An "anticipatable occurrence" is one that is the first occurrence in the |
307 | basic block, the operands are not modified in the basic block prior | |
308 | to the occurrence and the output is not used between the start of | |
4f673c5e JZZ |
309 | the block and the occurrence. |
310 | ||
311 | For VSETVL instruction, we have these following formats: | |
312 | 1. vsetvl zero, rs1. | |
313 | 2. vsetvl zero, imm. | |
314 | 3. vsetvl rd, rs1. | |
315 | ||
316 | So base on these circumstances, a DEM is considered as a local anticipatable | |
317 | occurrence should satisfy these following conditions: | |
318 | ||
319 | 1). rs1 (avl) are not modified in the basic block prior to the VSETVL. | |
320 | 2). rd (vl) are not modified in the basic block prior to the VSETVL. | |
321 | 3). rd (vl) is not used between the start of the block and the occurrence. | |
322 | ||
323 | Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE | |
324 | is modified prior to the occurrence. This case is already considered as | |
325 | a non-local anticipatable occurrence. | |
326 | */ | |
9243c3d1 | 327 | static bool |
4f673c5e | 328 | anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem) |
9243c3d1 | 329 | { |
4f673c5e | 330 | insn_info *insn = dem.get_insn (); |
9243c3d1 JZZ |
331 | /* The only possible operand we care of VSETVL is AVL. */ |
332 | if (dem.has_avl_reg ()) | |
333 | { | |
4f673c5e | 334 | /* rs1 (avl) are not modified in the basic block prior to the VSETVL. */ |
9243c3d1 JZZ |
335 | if (!vlmax_avl_p (dem.get_avl ())) |
336 | { | |
ec99ffab | 337 | set_info *set = dem.get_avl_source (); |
9243c3d1 JZZ |
338 | /* If it's undefined, it's not anticipatable conservatively. */ |
339 | if (!set) | |
340 | return false; | |
4f673c5e JZZ |
341 | if (real_insn_and_same_bb_p (set->insn (), bb) |
342 | && before_p (set->insn (), insn)) | |
9243c3d1 JZZ |
343 | return false; |
344 | } | |
345 | } | |
346 | ||
4f673c5e | 347 | /* rd (vl) is not used between the start of the block and the occurrence. */ |
9243c3d1 JZZ |
348 | if (vsetvl_insn_p (insn->rtl ())) |
349 | { | |
4f673c5e JZZ |
350 | rtx dest = get_vl (insn->rtl ()); |
351 | for (insn_info *i = insn->prev_nondebug_insn (); | |
352 | real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ()) | |
353 | { | |
354 | /* rd (vl) is not used between the start of the block and the | |
355 | * occurrence. */ | |
356 | if (find_access (i->uses (), REGNO (dest))) | |
357 | return false; | |
358 | /* rd (vl) are not modified in the basic block prior to the VSETVL. */ | |
359 | if (find_access (i->defs (), REGNO (dest))) | |
360 | return false; | |
361 | } | |
9243c3d1 JZZ |
362 | } |
363 | ||
364 | return true; | |
365 | } | |
366 | ||
367 | /* An "available occurrence" is one that is the last occurrence in the | |
368 | basic block and the operands are not modified by following statements in | |
4f673c5e JZZ |
369 | the basic block [including this insn]. |
370 | ||
371 | For VSETVL instruction, we have these following formats: | |
372 | 1. vsetvl zero, rs1. | |
373 | 2. vsetvl zero, imm. | |
374 | 3. vsetvl rd, rs1. | |
375 | ||
376 | So base on these circumstances, a DEM is considered as a local available | |
377 | occurrence should satisfy these following conditions: | |
378 | ||
379 | 1). rs1 (avl) are not modified by following statements in | |
380 | the basic block. | |
381 | 2). rd (vl) are not modified by following statements in | |
382 | the basic block. | |
383 | ||
384 | Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE | |
385 | is modified prior to the occurrence. This case is already considered as | |
386 | a non-local available occurrence. | |
387 | */ | |
9243c3d1 | 388 | static bool |
4f673c5e | 389 | available_occurrence_p (const bb_info *bb, const vector_insn_info dem) |
9243c3d1 | 390 | { |
4f673c5e | 391 | insn_info *insn = dem.get_insn (); |
9243c3d1 JZZ |
392 | /* The only possible operand we care of VSETVL is AVL. */ |
393 | if (dem.has_avl_reg ()) | |
394 | { | |
9243c3d1 JZZ |
395 | if (!vlmax_avl_p (dem.get_avl ())) |
396 | { | |
4f673c5e JZZ |
397 | rtx dest = NULL_RTX; |
398 | if (vsetvl_insn_p (insn->rtl ())) | |
399 | dest = get_vl (insn->rtl ()); | |
400 | for (const insn_info *i = insn; real_insn_and_same_bb_p (i, bb); | |
401 | i = i->next_nondebug_insn ()) | |
402 | { | |
60bd33bc JZZ |
403 | if (read_vl_insn_p (i->rtl ())) |
404 | continue; | |
4f673c5e JZZ |
405 | /* rs1 (avl) are not modified by following statements in |
406 | the basic block. */ | |
407 | if (find_access (i->defs (), REGNO (dem.get_avl ()))) | |
408 | return false; | |
409 | /* rd (vl) are not modified by following statements in | |
410 | the basic block. */ | |
411 | if (dest && find_access (i->defs (), REGNO (dest))) | |
412 | return false; | |
413 | } | |
9243c3d1 JZZ |
414 | } |
415 | } | |
416 | return true; | |
417 | } | |
418 | ||
6b6b9c68 JZZ |
419 | static bool |
420 | insn_should_be_added_p (const insn_info *insn, unsigned int types) | |
9243c3d1 | 421 | { |
6b6b9c68 JZZ |
422 | if (insn->is_real () && (types & REAL_SET)) |
423 | return true; | |
424 | if (insn->is_phi () && (types & PHI_SET)) | |
425 | return true; | |
426 | if (insn->is_bb_head () && (types & BB_HEAD_SET)) | |
427 | return true; | |
428 | if (insn->is_bb_end () && (types & BB_END_SET)) | |
429 | return true; | |
430 | return false; | |
9243c3d1 JZZ |
431 | } |
432 | ||
6b6b9c68 JZZ |
433 | /* Recursively find all define instructions. The kind of instruction is |
434 | specified by the DEF_TYPE. */ | |
435 | static hash_set<set_info *> | |
436 | get_all_sets (phi_info *phi, unsigned int types) | |
9243c3d1 | 437 | { |
6b6b9c68 | 438 | hash_set<set_info *> insns; |
4f673c5e JZZ |
439 | auto_vec<phi_info *> work_list; |
440 | hash_set<phi_info *> visited_list; | |
441 | if (!phi) | |
6b6b9c68 | 442 | return hash_set<set_info *> (); |
4f673c5e | 443 | work_list.safe_push (phi); |
9243c3d1 | 444 | |
4f673c5e | 445 | while (!work_list.is_empty ()) |
9243c3d1 | 446 | { |
4f673c5e JZZ |
447 | phi_info *phi = work_list.pop (); |
448 | visited_list.add (phi); | |
449 | for (use_info *use : phi->inputs ()) | |
9243c3d1 | 450 | { |
4f673c5e | 451 | def_info *def = use->def (); |
6b6b9c68 JZZ |
452 | set_info *set = safe_dyn_cast<set_info *> (def); |
453 | if (!set) | |
454 | return hash_set<set_info *> (); | |
9243c3d1 | 455 | |
6b6b9c68 | 456 | gcc_assert (!set->insn ()->is_debug_insn ()); |
9243c3d1 | 457 | |
6b6b9c68 JZZ |
458 | if (insn_should_be_added_p (set->insn (), types)) |
459 | insns.add (set); | |
460 | if (set->insn ()->is_phi ()) | |
4f673c5e | 461 | { |
6b6b9c68 | 462 | phi_info *new_phi = as_a<phi_info *> (set); |
4f673c5e JZZ |
463 | if (!visited_list.contains (new_phi)) |
464 | work_list.safe_push (new_phi); | |
465 | } | |
466 | } | |
9243c3d1 | 467 | } |
4f673c5e JZZ |
468 | return insns; |
469 | } | |
9243c3d1 | 470 | |
6b6b9c68 JZZ |
471 | static hash_set<set_info *> |
472 | get_all_sets (set_info *set, bool /* get_real_inst */ real_p, | |
473 | bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p) | |
474 | { | |
475 | if (real_p && phi_p && param_p) | |
476 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
477 | REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET); | |
478 | ||
479 | else if (real_p && param_p) | |
480 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
481 | REAL_SET | BB_HEAD_SET | BB_END_SET); | |
482 | ||
483 | else if (real_p) | |
484 | return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET); | |
485 | return hash_set<set_info *> (); | |
486 | } | |
487 | ||
488 | /* Helper function to get AVL operand. */ | |
489 | static rtx | |
490 | get_avl (rtx_insn *rinsn) | |
491 | { | |
492 | if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn)) | |
493 | return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0); | |
494 | ||
495 | if (!has_vl_op (rinsn)) | |
496 | return NULL_RTX; | |
497 | if (get_attr_avl_type (rinsn) == VLMAX) | |
498 | return RVV_VLMAX; | |
499 | extract_insn_cached (rinsn); | |
500 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
501 | } | |
502 | ||
503 | static set_info * | |
504 | get_same_bb_set (hash_set<set_info *> &sets, const basic_block cfg_bb) | |
505 | { | |
506 | for (set_info *set : sets) | |
507 | if (set->bb ()->cfg_bb () == cfg_bb) | |
508 | return set; | |
509 | return nullptr; | |
510 | } | |
511 | ||
4f673c5e JZZ |
512 | /* Recursively find all predecessor blocks for cfg_bb. */ |
513 | static hash_set<basic_block> | |
514 | get_all_predecessors (basic_block cfg_bb) | |
515 | { | |
516 | hash_set<basic_block> blocks; | |
517 | auto_vec<basic_block> work_list; | |
518 | hash_set<basic_block> visited_list; | |
519 | work_list.safe_push (cfg_bb); | |
9243c3d1 | 520 | |
4f673c5e | 521 | while (!work_list.is_empty ()) |
9243c3d1 | 522 | { |
4f673c5e JZZ |
523 | basic_block new_cfg_bb = work_list.pop (); |
524 | visited_list.add (new_cfg_bb); | |
525 | edge e; | |
526 | edge_iterator ei; | |
527 | FOR_EACH_EDGE (e, ei, new_cfg_bb->preds) | |
528 | { | |
529 | if (!visited_list.contains (e->src)) | |
530 | work_list.safe_push (e->src); | |
531 | blocks.add (e->src); | |
532 | } | |
9243c3d1 | 533 | } |
4f673c5e JZZ |
534 | return blocks; |
535 | } | |
9243c3d1 | 536 | |
4f673c5e JZZ |
537 | /* Return true if there is an INSN in insns staying in the block BB. */ |
538 | static bool | |
6b6b9c68 | 539 | any_set_in_bb_p (hash_set<set_info *> sets, const bb_info *bb) |
4f673c5e | 540 | { |
6b6b9c68 JZZ |
541 | for (const set_info *set : sets) |
542 | if (set->bb ()->index () == bb->index ()) | |
4f673c5e JZZ |
543 | return true; |
544 | return false; | |
9243c3d1 JZZ |
545 | } |
546 | ||
547 | /* Helper function to get SEW operand. We always have SEW value for | |
548 | all RVV instructions that have VTYPE OP. */ | |
549 | static uint8_t | |
550 | get_sew (rtx_insn *rinsn) | |
551 | { | |
552 | return get_attr_sew (rinsn); | |
553 | } | |
554 | ||
555 | /* Helper function to get VLMUL operand. We always have VLMUL value for | |
556 | all RVV instructions that have VTYPE OP. */ | |
557 | static enum vlmul_type | |
558 | get_vlmul (rtx_insn *rinsn) | |
559 | { | |
560 | return (enum vlmul_type) get_attr_vlmul (rinsn); | |
561 | } | |
562 | ||
563 | /* Get default tail policy. */ | |
564 | static bool | |
565 | get_default_ta () | |
566 | { | |
567 | /* For the instruction that doesn't require TA, we still need a default value | |
568 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
569 | return (bool) (get_prefer_tail_policy () & 0x1 | |
570 | || (get_prefer_tail_policy () >> 1 & 0x1)); | |
571 | } | |
572 | ||
573 | /* Get default mask policy. */ | |
574 | static bool | |
575 | get_default_ma () | |
576 | { | |
577 | /* For the instruction that doesn't require MA, we still need a default value | |
578 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
579 | return (bool) (get_prefer_mask_policy () & 0x1 | |
580 | || (get_prefer_mask_policy () >> 1 & 0x1)); | |
581 | } | |
582 | ||
583 | /* Helper function to get TA operand. */ | |
584 | static bool | |
585 | tail_agnostic_p (rtx_insn *rinsn) | |
586 | { | |
587 | /* If it doesn't have TA, we return agnostic by default. */ | |
588 | extract_insn_cached (rinsn); | |
589 | int ta = get_attr_ta (rinsn); | |
590 | return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta); | |
591 | } | |
592 | ||
593 | /* Helper function to get MA operand. */ | |
594 | static bool | |
595 | mask_agnostic_p (rtx_insn *rinsn) | |
596 | { | |
597 | /* If it doesn't have MA, we return agnostic by default. */ | |
598 | extract_insn_cached (rinsn); | |
599 | int ma = get_attr_ma (rinsn); | |
600 | return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma); | |
601 | } | |
602 | ||
603 | /* Return true if FN has a vector instruction that use VL/VTYPE. */ | |
604 | static bool | |
605 | has_vector_insn (function *fn) | |
606 | { | |
607 | basic_block cfg_bb; | |
608 | rtx_insn *rinsn; | |
609 | FOR_ALL_BB_FN (cfg_bb, fn) | |
610 | FOR_BB_INSNS (cfg_bb, rinsn) | |
611 | if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn)) | |
612 | return true; | |
613 | return false; | |
614 | } | |
615 | ||
616 | /* Emit vsetvl instruction. */ | |
617 | static rtx | |
e577b91b | 618 | gen_vsetvl_pat (enum vsetvl_type insn_type, const vl_vtype_info &info, rtx vl) |
9243c3d1 JZZ |
619 | { |
620 | rtx avl = info.get_avl (); | |
04655110 LX |
621 | /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s, |
622 | set the value of avl to (const_int 0) so that VSETVL PASS will | |
623 | insert vsetvl correctly.*/ | |
624 | if (info.has_avl_no_reg ()) | |
625 | avl = GEN_INT (0); | |
9243c3d1 JZZ |
626 | rtx sew = gen_int_mode (info.get_sew (), Pmode); |
627 | rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode); | |
628 | rtx ta = gen_int_mode (info.get_ta (), Pmode); | |
629 | rtx ma = gen_int_mode (info.get_ma (), Pmode); | |
630 | ||
631 | if (insn_type == VSETVL_NORMAL) | |
632 | { | |
633 | gcc_assert (vl != NULL_RTX); | |
634 | return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma); | |
635 | } | |
636 | else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY) | |
637 | return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma); | |
638 | else | |
639 | return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma); | |
640 | } | |
641 | ||
642 | static rtx | |
e577b91b | 643 | gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info) |
9243c3d1 JZZ |
644 | { |
645 | rtx new_pat; | |
60bd33bc JZZ |
646 | vl_vtype_info new_info = info; |
647 | if (info.get_insn () && info.get_insn ()->rtl () | |
648 | && fault_first_load_p (info.get_insn ()->rtl ())) | |
649 | new_info.set_avl_info ( | |
650 | avl_info (get_avl (info.get_insn ()->rtl ()), nullptr)); | |
9243c3d1 JZZ |
651 | if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ())) |
652 | { | |
653 | rtx dest = get_vl (rinsn); | |
60bd33bc | 654 | new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, dest); |
9243c3d1 JZZ |
655 | } |
656 | else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only) | |
60bd33bc | 657 | new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, new_info, NULL_RTX); |
9243c3d1 | 658 | else |
60bd33bc | 659 | new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX); |
9243c3d1 JZZ |
660 | return new_pat; |
661 | } | |
662 | ||
663 | static void | |
664 | emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type, | |
e577b91b | 665 | const vl_vtype_info &info, rtx vl, rtx_insn *rinsn) |
9243c3d1 JZZ |
666 | { |
667 | rtx pat = gen_vsetvl_pat (insn_type, info, vl); | |
668 | if (dump_file) | |
669 | { | |
670 | fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n"); | |
671 | print_rtl_single (dump_file, pat); | |
672 | } | |
673 | ||
674 | if (emit_type == EMIT_DIRECT) | |
675 | emit_insn (pat); | |
676 | else if (emit_type == EMIT_BEFORE) | |
677 | emit_insn_before (pat, rinsn); | |
678 | else | |
679 | emit_insn_after (pat, rinsn); | |
680 | } | |
681 | ||
682 | static void | |
683 | eliminate_insn (rtx_insn *rinsn) | |
684 | { | |
685 | if (dump_file) | |
686 | { | |
687 | fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn)); | |
688 | print_rtl_single (dump_file, rinsn); | |
689 | } | |
690 | if (in_sequence_p ()) | |
691 | remove_insn (rinsn); | |
692 | else | |
693 | delete_insn (rinsn); | |
694 | } | |
695 | ||
a481eed8 | 696 | static vsetvl_type |
9243c3d1 JZZ |
697 | insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn, |
698 | const vector_insn_info &info, const vector_insn_info &prev_info) | |
699 | { | |
700 | /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same | |
701 | VLMAX. */ | |
702 | if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p () | |
4f673c5e | 703 | && info.compatible_avl_p (prev_info) && info.same_vlmax_p (prev_info)) |
9243c3d1 JZZ |
704 | { |
705 | emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX, | |
706 | rinsn); | |
a481eed8 | 707 | return VSETVL_VTYPE_CHANGE_ONLY; |
9243c3d1 JZZ |
708 | } |
709 | ||
710 | if (info.has_avl_imm ()) | |
711 | { | |
712 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, | |
713 | rinsn); | |
a481eed8 | 714 | return VSETVL_DISCARD_RESULT; |
9243c3d1 JZZ |
715 | } |
716 | ||
717 | if (info.has_avl_no_reg ()) | |
718 | { | |
719 | /* We can only use x0, x0 if there's no chance of the vtype change causing | |
720 | the previous vl to become invalid. */ | |
721 | if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p () | |
722 | && info.same_vlmax_p (prev_info)) | |
723 | { | |
724 | emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX, | |
725 | rinsn); | |
a481eed8 | 726 | return VSETVL_VTYPE_CHANGE_ONLY; |
9243c3d1 JZZ |
727 | } |
728 | /* Otherwise use an AVL of 0 to avoid depending on previous vl. */ | |
729 | vl_vtype_info new_info = info; | |
730 | new_info.set_avl_info (avl_info (const0_rtx, nullptr)); | |
731 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX, | |
732 | rinsn); | |
a481eed8 | 733 | return VSETVL_DISCARD_RESULT; |
9243c3d1 JZZ |
734 | } |
735 | ||
736 | /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the | |
737 | opcode if the AVLReg is X0 as they have different register classes for | |
738 | the AVL operand. */ | |
739 | if (vlmax_avl_p (info.get_avl ())) | |
740 | { | |
741 | gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn)); | |
ec99ffab | 742 | rtx vl_op = info.get_avl_reg_rtx (); |
9243c3d1 JZZ |
743 | gcc_assert (!vlmax_avl_p (vl_op)); |
744 | emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn); | |
a481eed8 | 745 | return VSETVL_NORMAL; |
9243c3d1 JZZ |
746 | } |
747 | ||
748 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn); | |
749 | ||
750 | if (dump_file) | |
751 | { | |
752 | fprintf (dump_file, "Update VL/VTYPE info, previous info="); | |
753 | prev_info.dump (dump_file); | |
754 | } | |
a481eed8 | 755 | return VSETVL_DISCARD_RESULT; |
9243c3d1 JZZ |
756 | } |
757 | ||
758 | /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them | |
759 | to INSN. If such notes are added to an insn which references a | |
760 | CODE_LABEL, the LABEL_NUSES count is incremented. We have to add | |
761 | that note, because the following loop optimization pass requires | |
762 | them. */ | |
763 | ||
764 | /* ??? If there was a jump optimization pass after gcse and before loop, | |
765 | then we would not need to do this here, because jump would add the | |
766 | necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */ | |
767 | ||
768 | static void | |
27a2a4b6 | 769 | add_label_notes (rtx x, rtx_insn *rinsn) |
9243c3d1 JZZ |
770 | { |
771 | enum rtx_code code = GET_CODE (x); | |
772 | int i, j; | |
773 | const char *fmt; | |
774 | ||
775 | if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x)) | |
776 | { | |
777 | /* This code used to ignore labels that referred to dispatch tables to | |
778 | avoid flow generating (slightly) worse code. | |
779 | ||
780 | We no longer ignore such label references (see LABEL_REF handling in | |
781 | mark_jump_label for additional information). */ | |
782 | ||
783 | /* There's no reason for current users to emit jump-insns with | |
784 | such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET | |
785 | notes. */ | |
27a2a4b6 JZZ |
786 | gcc_assert (!JUMP_P (rinsn)); |
787 | add_reg_note (rinsn, REG_LABEL_OPERAND, label_ref_label (x)); | |
9243c3d1 JZZ |
788 | |
789 | if (LABEL_P (label_ref_label (x))) | |
790 | LABEL_NUSES (label_ref_label (x))++; | |
791 | ||
792 | return; | |
793 | } | |
794 | ||
795 | for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) | |
796 | { | |
797 | if (fmt[i] == 'e') | |
27a2a4b6 | 798 | add_label_notes (XEXP (x, i), rinsn); |
9243c3d1 JZZ |
799 | else if (fmt[i] == 'E') |
800 | for (j = XVECLEN (x, i) - 1; j >= 0; j--) | |
27a2a4b6 | 801 | add_label_notes (XVECEXP (x, i, j), rinsn); |
9243c3d1 JZZ |
802 | } |
803 | } | |
804 | ||
805 | /* Add EXPR to the end of basic block BB. | |
806 | ||
807 | This is used by both the PRE and code hoisting. */ | |
808 | ||
809 | static void | |
810 | insert_insn_end_basic_block (rtx_insn *rinsn, basic_block cfg_bb) | |
811 | { | |
812 | rtx_insn *end_rinsn = BB_END (cfg_bb); | |
813 | rtx_insn *new_insn; | |
814 | rtx_insn *pat, *pat_end; | |
815 | ||
816 | pat = rinsn; | |
817 | gcc_assert (pat && INSN_P (pat)); | |
818 | ||
819 | pat_end = pat; | |
820 | while (NEXT_INSN (pat_end) != NULL_RTX) | |
821 | pat_end = NEXT_INSN (pat_end); | |
822 | ||
823 | /* If the last end_rinsn is a jump, insert EXPR in front. Similarly we need | |
824 | to take care of trapping instructions in presence of non-call exceptions. | |
825 | */ | |
826 | ||
827 | if (JUMP_P (end_rinsn) | |
828 | || (NONJUMP_INSN_P (end_rinsn) | |
829 | && (!single_succ_p (cfg_bb) | |
830 | || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL))) | |
831 | { | |
832 | /* FIXME: What if something in jump uses value set in new end_rinsn? */ | |
833 | new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb); | |
834 | } | |
835 | ||
836 | /* Likewise if the last end_rinsn is a call, as will happen in the presence | |
837 | of exception handling. */ | |
838 | else if (CALL_P (end_rinsn) | |
839 | && (!single_succ_p (cfg_bb) | |
840 | || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL)) | |
841 | { | |
842 | /* Keeping in mind targets with small register classes and parameters | |
843 | in registers, we search backward and place the instructions before | |
844 | the first parameter is loaded. Do this for everyone for consistency | |
845 | and a presumption that we'll get better code elsewhere as well. */ | |
846 | ||
847 | /* Since different machines initialize their parameter registers | |
848 | in different orders, assume nothing. Collect the set of all | |
849 | parameter registers. */ | |
850 | end_rinsn = find_first_parameter_load (end_rinsn, BB_HEAD (cfg_bb)); | |
851 | ||
852 | /* If we found all the parameter loads, then we want to insert | |
853 | before the first parameter load. | |
854 | ||
855 | If we did not find all the parameter loads, then we might have | |
856 | stopped on the head of the block, which could be a CODE_LABEL. | |
857 | If we inserted before the CODE_LABEL, then we would be putting | |
858 | the end_rinsn in the wrong basic block. In that case, put the | |
859 | end_rinsn after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. | |
860 | */ | |
861 | while (LABEL_P (end_rinsn) || NOTE_INSN_BASIC_BLOCK_P (end_rinsn)) | |
862 | end_rinsn = NEXT_INSN (end_rinsn); | |
863 | ||
864 | new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb); | |
865 | } | |
866 | else | |
867 | new_insn = emit_insn_after_noloc (pat, end_rinsn, cfg_bb); | |
868 | ||
869 | while (1) | |
870 | { | |
871 | if (INSN_P (pat)) | |
872 | add_label_notes (PATTERN (pat), new_insn); | |
873 | if (pat == pat_end) | |
874 | break; | |
875 | pat = NEXT_INSN (pat); | |
876 | } | |
877 | } | |
878 | ||
879 | /* Get VL/VTYPE information for INSN. */ | |
880 | static vl_vtype_info | |
881 | get_vl_vtype_info (const insn_info *insn) | |
882 | { | |
9243c3d1 JZZ |
883 | set_info *set = nullptr; |
884 | rtx avl = ::get_avl (insn->rtl ()); | |
ec99ffab JZZ |
885 | if (avl && REG_P (avl)) |
886 | { | |
887 | if (vlmax_avl_p (avl) && has_vl_op (insn->rtl ())) | |
888 | set | |
889 | = find_access (insn->uses (), REGNO (get_vl (insn->rtl ())))->def (); | |
890 | else if (!vlmax_avl_p (avl)) | |
891 | set = find_access (insn->uses (), REGNO (avl))->def (); | |
892 | else | |
893 | set = nullptr; | |
894 | } | |
9243c3d1 JZZ |
895 | |
896 | uint8_t sew = get_sew (insn->rtl ()); | |
897 | enum vlmul_type vlmul = get_vlmul (insn->rtl ()); | |
898 | uint8_t ratio = get_attr_ratio (insn->rtl ()); | |
899 | /* when get_attr_ratio is invalid, this kind of instructions | |
900 | doesn't care about ratio. However, we still need this value | |
901 | in demand info backward analysis. */ | |
902 | if (ratio == INVALID_ATTRIBUTE) | |
903 | ratio = calculate_ratio (sew, vlmul); | |
904 | bool ta = tail_agnostic_p (insn->rtl ()); | |
905 | bool ma = mask_agnostic_p (insn->rtl ()); | |
906 | ||
907 | /* If merge operand is undef value, we prefer agnostic. */ | |
908 | int merge_op_idx = get_attr_merge_op_idx (insn->rtl ()); | |
909 | if (merge_op_idx != INVALID_ATTRIBUTE | |
910 | && satisfies_constraint_vu (recog_data.operand[merge_op_idx])) | |
911 | { | |
912 | ta = true; | |
913 | ma = true; | |
914 | } | |
915 | ||
916 | vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma); | |
917 | return info; | |
918 | } | |
919 | ||
920 | static void | |
921 | change_insn (rtx_insn *rinsn, rtx new_pat) | |
922 | { | |
923 | /* We don't apply change on RTL_SSA here since it's possible a | |
924 | new INSN we add in the PASS before which doesn't have RTL_SSA | |
925 | info yet.*/ | |
926 | if (dump_file) | |
927 | { | |
928 | fprintf (dump_file, "\nChange PATTERN of insn %d from:\n", | |
929 | INSN_UID (rinsn)); | |
930 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
931 | } | |
932 | ||
011ba384 | 933 | validate_change (rinsn, &PATTERN (rinsn), new_pat, false); |
9243c3d1 JZZ |
934 | |
935 | if (dump_file) | |
936 | { | |
937 | fprintf (dump_file, "\nto:\n"); | |
938 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
939 | } | |
940 | } | |
941 | ||
60bd33bc JZZ |
942 | static const insn_info * |
943 | get_forward_read_vl_insn (const insn_info *insn) | |
944 | { | |
945 | const bb_info *bb = insn->bb (); | |
946 | for (const insn_info *i = insn->next_nondebug_insn (); | |
947 | real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ()) | |
948 | { | |
949 | if (find_access (i->defs (), VL_REGNUM)) | |
950 | return nullptr; | |
951 | if (read_vl_insn_p (i->rtl ())) | |
952 | return i; | |
953 | } | |
954 | return nullptr; | |
955 | } | |
956 | ||
957 | static const insn_info * | |
958 | get_backward_fault_first_load_insn (const insn_info *insn) | |
959 | { | |
960 | const bb_info *bb = insn->bb (); | |
961 | for (const insn_info *i = insn->prev_nondebug_insn (); | |
962 | real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ()) | |
963 | { | |
964 | if (fault_first_load_p (i->rtl ())) | |
965 | return i; | |
966 | if (find_access (i->defs (), VL_REGNUM)) | |
967 | return nullptr; | |
968 | } | |
969 | return nullptr; | |
970 | } | |
971 | ||
9243c3d1 JZZ |
972 | static bool |
973 | change_insn (function_info *ssa, insn_change change, insn_info *insn, | |
974 | rtx new_pat) | |
975 | { | |
976 | rtx_insn *rinsn = insn->rtl (); | |
977 | auto attempt = ssa->new_change_attempt (); | |
978 | if (!restrict_movement (change)) | |
979 | return false; | |
980 | ||
981 | if (dump_file) | |
982 | { | |
983 | fprintf (dump_file, "\nChange PATTERN of insn %d from:\n", | |
984 | INSN_UID (rinsn)); | |
985 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
9243c3d1 JZZ |
986 | } |
987 | ||
988 | insn_change_watermark watermark; | |
989 | validate_change (rinsn, &PATTERN (rinsn), new_pat, true); | |
990 | ||
991 | /* These routines report failures themselves. */ | |
992 | if (!recog (attempt, change) || !change_is_worthwhile (change, false)) | |
993 | return false; | |
a1e42094 JZZ |
994 | |
995 | /* Fix bug: | |
996 | (insn 12 34 13 2 (set (reg:VNx8DI 120 v24 [orig:134 _1 ] [134]) | |
997 | (if_then_else:VNx8DI (unspec:VNx8BI [ | |
998 | (const_vector:VNx8BI repeat [ | |
999 | (const_int 1 [0x1]) | |
1000 | ]) | |
1001 | (const_int 0 [0]) | |
1002 | (const_int 2 [0x2]) repeated x2 | |
1003 | (const_int 0 [0]) | |
1004 | (reg:SI 66 vl) | |
1005 | (reg:SI 67 vtype) | |
1006 | ] UNSPEC_VPREDICATE) | |
1007 | (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137]) | |
1008 | (sign_extend:VNx8DI (vec_duplicate:VNx8SI (reg:SI 15 a5 | |
1009 | [140])))) (unspec:VNx8DI [ (const_int 0 [0]) ] UNSPEC_VUNDEF))) "rvv.c":8:12 | |
1010 | 2784 {pred_single_widen_addsvnx8di_scalar} (expr_list:REG_EQUIV | |
1011 | (mem/c:VNx8DI (reg:DI 10 a0 [142]) [1 <retval>+0 S[64, 64] A128]) | |
1012 | (expr_list:REG_EQUAL (if_then_else:VNx8DI (unspec:VNx8BI [ | |
1013 | (const_vector:VNx8BI repeat [ | |
1014 | (const_int 1 [0x1]) | |
1015 | ]) | |
1016 | (reg/v:DI 13 a3 [orig:139 vl ] [139]) | |
1017 | (const_int 2 [0x2]) repeated x2 | |
1018 | (const_int 0 [0]) | |
1019 | (reg:SI 66 vl) | |
1020 | (reg:SI 67 vtype) | |
1021 | ] UNSPEC_VPREDICATE) | |
1022 | (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137]) | |
1023 | (const_vector:VNx8DI repeat [ | |
1024 | (const_int 2730 [0xaaa]) | |
1025 | ])) | |
1026 | (unspec:VNx8DI [ | |
1027 | (const_int 0 [0]) | |
1028 | ] UNSPEC_VUNDEF)) | |
1029 | (nil)))) | |
1030 | Here we want to remove use "a3". However, the REG_EQUAL/REG_EQUIV note use | |
1031 | "a3" which made us fail in change_insn. We reference to the | |
1032 | 'aarch64-cc-fusion.cc' and add this method. */ | |
1033 | remove_reg_equal_equiv_notes (rinsn); | |
9243c3d1 JZZ |
1034 | confirm_change_group (); |
1035 | ssa->change_insn (change); | |
1036 | ||
1037 | if (dump_file) | |
1038 | { | |
1039 | fprintf (dump_file, "\nto:\n"); | |
1040 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
9243c3d1 JZZ |
1041 | } |
1042 | return true; | |
1043 | } | |
1044 | ||
aef20243 JZZ |
1045 | static void |
1046 | change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info) | |
1047 | { | |
1048 | rtx_insn *rinsn; | |
1049 | if (vector_config_insn_p (insn->rtl ())) | |
1050 | { | |
1051 | rinsn = insn->rtl (); | |
1052 | gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet"); | |
1053 | } | |
1054 | else | |
1055 | { | |
1056 | gcc_assert (has_vtype_op (insn->rtl ())); | |
1057 | rinsn = PREV_INSN (insn->rtl ()); | |
1058 | gcc_assert (vector_config_insn_p (rinsn)); | |
1059 | } | |
1060 | rtx new_pat = gen_vsetvl_pat (rinsn, info); | |
1061 | change_insn (rinsn, new_pat); | |
1062 | } | |
1063 | ||
69f39144 JZ |
1064 | static bool |
1065 | avl_source_has_vsetvl_p (set_info *avl_source) | |
1066 | { | |
1067 | if (!avl_source) | |
1068 | return false; | |
1069 | if (!avl_source->insn ()) | |
1070 | return false; | |
1071 | if (avl_source->insn ()->is_real ()) | |
1072 | return vsetvl_insn_p (avl_source->insn ()->rtl ()); | |
1073 | hash_set<set_info *> sets = get_all_sets (avl_source, true, false, true); | |
1074 | for (const auto set : sets) | |
1075 | { | |
1076 | if (set->insn ()->is_real () && vsetvl_insn_p (set->insn ()->rtl ())) | |
1077 | return true; | |
1078 | } | |
1079 | return false; | |
1080 | } | |
1081 | ||
4f673c5e | 1082 | static bool |
6b6b9c68 | 1083 | source_equal_p (insn_info *insn1, insn_info *insn2) |
4f673c5e | 1084 | { |
6b6b9c68 JZZ |
1085 | if (!insn1 || !insn2) |
1086 | return false; | |
1087 | rtx_insn *rinsn1 = insn1->rtl (); | |
1088 | rtx_insn *rinsn2 = insn2->rtl (); | |
4f673c5e JZZ |
1089 | if (!rinsn1 || !rinsn2) |
1090 | return false; | |
1091 | rtx note1 = find_reg_equal_equiv_note (rinsn1); | |
1092 | rtx note2 = find_reg_equal_equiv_note (rinsn2); | |
1093 | rtx single_set1 = single_set (rinsn1); | |
1094 | rtx single_set2 = single_set (rinsn2); | |
60bd33bc JZZ |
1095 | if (read_vl_insn_p (rinsn1) && read_vl_insn_p (rinsn2)) |
1096 | { | |
1097 | const insn_info *load1 = get_backward_fault_first_load_insn (insn1); | |
1098 | const insn_info *load2 = get_backward_fault_first_load_insn (insn2); | |
1099 | return load1 && load2 && load1 == load2; | |
1100 | } | |
4f673c5e JZZ |
1101 | |
1102 | if (note1 && note2 && rtx_equal_p (note1, note2)) | |
1103 | return true; | |
6b6b9c68 JZZ |
1104 | |
1105 | /* Since vsetvl instruction is not single SET. | |
1106 | We handle this case specially here. */ | |
1107 | if (vsetvl_insn_p (insn1->rtl ()) && vsetvl_insn_p (insn2->rtl ())) | |
1108 | { | |
1109 | /* For example: | |
1110 | vsetvl1 a6,a5,e32m1 | |
1111 | RVV 1 (use a6 as AVL) | |
1112 | vsetvl2 a5,a5,e8mf4 | |
1113 | RVV 2 (use a5 as AVL) | |
1114 | We consider AVL of RVV 1 and RVV 2 are same so that we can | |
1115 | gain more optimization opportunities. | |
1116 | ||
1117 | Note: insn1_info.compatible_avl_p (insn2_info) | |
1118 | will make sure there is no instruction between vsetvl1 and vsetvl2 | |
1119 | modify a5 since their def will be different if there is instruction | |
1120 | modify a5 and compatible_avl_p will return false. */ | |
1121 | vector_insn_info insn1_info, insn2_info; | |
1122 | insn1_info.parse_insn (insn1); | |
1123 | insn2_info.parse_insn (insn2); | |
69f39144 JZ |
1124 | |
1125 | /* To avoid dead loop, we don't optimize a vsetvli def has vsetvli | |
1126 | instructions which will complicate the situation. */ | |
1127 | if (avl_source_has_vsetvl_p (insn1_info.get_avl_source ()) | |
1128 | || avl_source_has_vsetvl_p (insn2_info.get_avl_source ())) | |
1129 | return false; | |
1130 | ||
6b6b9c68 JZZ |
1131 | if (insn1_info.same_vlmax_p (insn2_info) |
1132 | && insn1_info.compatible_avl_p (insn2_info)) | |
1133 | return true; | |
1134 | } | |
1135 | ||
1136 | /* We only handle AVL is set by instructions with no side effects. */ | |
1137 | if (!single_set1 || !single_set2) | |
1138 | return false; | |
1139 | if (!rtx_equal_p (SET_SRC (single_set1), SET_SRC (single_set2))) | |
1140 | return false; | |
e9fd9efc JZ |
1141 | /* RTL_SSA uses include REG_NOTE. Consider this following case: |
1142 | ||
1143 | insn1 RTL: | |
1144 | (insn 41 39 42 4 (set (reg:DI 26 s10 [orig:159 loop_len_46 ] [159]) | |
1145 | (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201]) | |
1146 | (reg:DI 14 a4 [276]))) 408 {*umindi3} | |
1147 | (expr_list:REG_EQUAL (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201]) | |
1148 | (const_int 2 [0x2])) | |
1149 | (nil))) | |
1150 | The RTL_SSA uses of this instruction has 2 uses: | |
1151 | 1. (reg:DI 15 a5 [orig:201 _149 ] [201]) - twice. | |
1152 | 2. (reg:DI 14 a4 [276]) - once. | |
1153 | ||
1154 | insn2 RTL: | |
1155 | (insn 38 353 351 4 (set (reg:DI 27 s11 [orig:160 loop_len_47 ] [160]) | |
1156 | (umin:DI (reg:DI 15 a5 [orig:199 _146 ] [199]) | |
1157 | (reg:DI 14 a4 [276]))) 408 {*umindi3} | |
1158 | (expr_list:REG_EQUAL (umin:DI (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200]) | |
1159 | (const_int 2 [0x2])) | |
1160 | (nil))) | |
1161 | The RTL_SSA uses of this instruction has 3 uses: | |
1162 | 1. (reg:DI 15 a5 [orig:199 _146 ] [199]) - once | |
1163 | 2. (reg:DI 14 a4 [276]) - once | |
1164 | 3. (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200]) - once | |
1165 | ||
1166 | Return false when insn1->uses ().size () != insn2->uses ().size () | |
1167 | */ | |
1168 | if (insn1->uses ().size () != insn2->uses ().size ()) | |
1169 | return false; | |
6b6b9c68 JZZ |
1170 | for (size_t i = 0; i < insn1->uses ().size (); i++) |
1171 | if (insn1->uses ()[i] != insn2->uses ()[i]) | |
1172 | return false; | |
1173 | return true; | |
4f673c5e JZZ |
1174 | } |
1175 | ||
1176 | /* Helper function to get single same real RTL source. | |
1177 | return NULL if it is not a single real RTL source. */ | |
6b6b9c68 | 1178 | static insn_info * |
4f673c5e JZZ |
1179 | extract_single_source (set_info *set) |
1180 | { | |
1181 | if (!set) | |
1182 | return nullptr; | |
1183 | if (set->insn ()->is_real ()) | |
6b6b9c68 | 1184 | return set->insn (); |
4f673c5e JZZ |
1185 | if (!set->insn ()->is_phi ()) |
1186 | return nullptr; | |
6b6b9c68 | 1187 | hash_set<set_info *> sets = get_all_sets (set, true, false, true); |
4f673c5e | 1188 | |
6b6b9c68 | 1189 | insn_info *first_insn = (*sets.begin ())->insn (); |
4f673c5e JZZ |
1190 | if (first_insn->is_artificial ()) |
1191 | return nullptr; | |
6b6b9c68 | 1192 | for (const set_info *set : sets) |
4f673c5e JZZ |
1193 | { |
1194 | /* If there is a head or end insn, we conservative return | |
1195 | NULL so that VSETVL PASS will insert vsetvl directly. */ | |
6b6b9c68 | 1196 | if (set->insn ()->is_artificial ()) |
4f673c5e | 1197 | return nullptr; |
6b6b9c68 | 1198 | if (!source_equal_p (set->insn (), first_insn)) |
4f673c5e JZZ |
1199 | return nullptr; |
1200 | } | |
1201 | ||
6b6b9c68 | 1202 | return first_insn; |
4f673c5e JZZ |
1203 | } |
1204 | ||
ec99ffab JZZ |
1205 | static unsigned |
1206 | calculate_sew (vlmul_type vlmul, unsigned int ratio) | |
1207 | { | |
1208 | for (const unsigned sew : ALL_SEW) | |
1209 | if (calculate_ratio (sew, vlmul) == ratio) | |
1210 | return sew; | |
1211 | return 0; | |
1212 | } | |
1213 | ||
1214 | static vlmul_type | |
1215 | calculate_vlmul (unsigned int sew, unsigned int ratio) | |
1216 | { | |
1217 | for (const vlmul_type vlmul : ALL_LMUL) | |
1218 | if (calculate_ratio (sew, vlmul) == ratio) | |
1219 | return vlmul; | |
1220 | return LMUL_RESERVED; | |
1221 | } | |
1222 | ||
1223 | static bool | |
1224 | incompatible_avl_p (const vector_insn_info &info1, | |
1225 | const vector_insn_info &info2) | |
1226 | { | |
1227 | return !info1.compatible_avl_p (info2) && !info2.compatible_avl_p (info1); | |
1228 | } | |
1229 | ||
1230 | static bool | |
1231 | different_sew_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1232 | { | |
1233 | return info1.get_sew () != info2.get_sew (); | |
1234 | } | |
1235 | ||
1236 | static bool | |
1237 | different_lmul_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1238 | { | |
1239 | return info1.get_vlmul () != info2.get_vlmul (); | |
1240 | } | |
1241 | ||
1242 | static bool | |
1243 | different_ratio_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1244 | { | |
1245 | return info1.get_ratio () != info2.get_ratio (); | |
1246 | } | |
1247 | ||
1248 | static bool | |
1249 | different_tail_policy_p (const vector_insn_info &info1, | |
1250 | const vector_insn_info &info2) | |
1251 | { | |
1252 | return info1.get_ta () != info2.get_ta (); | |
1253 | } | |
1254 | ||
1255 | static bool | |
1256 | different_mask_policy_p (const vector_insn_info &info1, | |
1257 | const vector_insn_info &info2) | |
1258 | { | |
1259 | return info1.get_ma () != info2.get_ma (); | |
1260 | } | |
1261 | ||
1262 | static bool | |
1263 | possible_zero_avl_p (const vector_insn_info &info1, | |
1264 | const vector_insn_info &info2) | |
1265 | { | |
1266 | return !info1.has_non_zero_avl () || !info2.has_non_zero_avl (); | |
1267 | } | |
1268 | ||
ec99ffab JZZ |
1269 | static bool |
1270 | second_ratio_invalid_for_first_sew_p (const vector_insn_info &info1, | |
1271 | const vector_insn_info &info2) | |
1272 | { | |
1273 | return calculate_vlmul (info1.get_sew (), info2.get_ratio ()) | |
1274 | == LMUL_RESERVED; | |
1275 | } | |
1276 | ||
1277 | static bool | |
1278 | second_ratio_invalid_for_first_lmul_p (const vector_insn_info &info1, | |
1279 | const vector_insn_info &info2) | |
1280 | { | |
1281 | return calculate_sew (info1.get_vlmul (), info2.get_ratio ()) == 0; | |
1282 | } | |
1283 | ||
1284 | static bool | |
1285 | second_sew_less_than_first_sew_p (const vector_insn_info &info1, | |
1286 | const vector_insn_info &info2) | |
1287 | { | |
1288 | return info2.get_sew () < info1.get_sew (); | |
1289 | } | |
1290 | ||
1291 | static bool | |
1292 | first_sew_less_than_second_sew_p (const vector_insn_info &info1, | |
1293 | const vector_insn_info &info2) | |
1294 | { | |
1295 | return info1.get_sew () < info2.get_sew (); | |
1296 | } | |
1297 | ||
1298 | /* return 0 if LMUL1 == LMUL2. | |
1299 | return -1 if LMUL1 < LMUL2. | |
1300 | return 1 if LMUL1 > LMUL2. */ | |
1301 | static int | |
1302 | compare_lmul (vlmul_type vlmul1, vlmul_type vlmul2) | |
1303 | { | |
1304 | if (vlmul1 == vlmul2) | |
1305 | return 0; | |
1306 | ||
1307 | switch (vlmul1) | |
1308 | { | |
1309 | case LMUL_1: | |
1310 | if (vlmul2 == LMUL_2 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8) | |
1311 | return 1; | |
1312 | else | |
1313 | return -1; | |
1314 | case LMUL_2: | |
1315 | if (vlmul2 == LMUL_4 || vlmul2 == LMUL_8) | |
1316 | return 1; | |
1317 | else | |
1318 | return -1; | |
1319 | case LMUL_4: | |
1320 | if (vlmul2 == LMUL_8) | |
1321 | return 1; | |
1322 | else | |
1323 | return -1; | |
1324 | case LMUL_8: | |
1325 | return -1; | |
1326 | case LMUL_F2: | |
1327 | if (vlmul2 == LMUL_1 || vlmul2 == LMUL_2 || vlmul2 == LMUL_4 | |
1328 | || vlmul2 == LMUL_8) | |
1329 | return 1; | |
1330 | else | |
1331 | return -1; | |
1332 | case LMUL_F4: | |
1333 | if (vlmul2 == LMUL_F2 || vlmul2 == LMUL_1 || vlmul2 == LMUL_2 | |
1334 | || vlmul2 == LMUL_4 || vlmul2 == LMUL_8) | |
1335 | return 1; | |
1336 | else | |
1337 | return -1; | |
1338 | case LMUL_F8: | |
1339 | return 0; | |
1340 | default: | |
1341 | gcc_unreachable (); | |
1342 | } | |
1343 | } | |
1344 | ||
1345 | static bool | |
1346 | second_lmul_less_than_first_lmul_p (const vector_insn_info &info1, | |
1347 | const vector_insn_info &info2) | |
1348 | { | |
1349 | return compare_lmul (info2.get_vlmul (), info1.get_vlmul ()) == -1; | |
1350 | } | |
1351 | ||
ec99ffab JZZ |
1352 | static bool |
1353 | second_ratio_less_than_first_ratio_p (const vector_insn_info &info1, | |
1354 | const vector_insn_info &info2) | |
1355 | { | |
1356 | return info2.get_ratio () < info1.get_ratio (); | |
1357 | } | |
1358 | ||
1359 | static CONSTEXPR const demands_cond incompatible_conds[] = { | |
1360 | #define DEF_INCOMPATIBLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, \ | |
1361 | GE_SEW1, TAIL_POLICTY1, MASK_POLICY1, AVL2, \ | |
1362 | SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, \ | |
1363 | TAIL_POLICTY2, MASK_POLICY2, COND) \ | |
1364 | {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \ | |
1365 | MASK_POLICY1}, \ | |
1366 | {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \ | |
1367 | MASK_POLICY2}}, \ | |
1368 | COND}, | |
1369 | #include "riscv-vsetvl.def" | |
1370 | }; | |
1371 | ||
1372 | static unsigned | |
1373 | greatest_sew (const vector_insn_info &info1, const vector_insn_info &info2) | |
1374 | { | |
1375 | return std::max (info1.get_sew (), info2.get_sew ()); | |
1376 | } | |
1377 | ||
1378 | static unsigned | |
1379 | first_sew (const vector_insn_info &info1, const vector_insn_info &) | |
1380 | { | |
1381 | return info1.get_sew (); | |
1382 | } | |
1383 | ||
1384 | static unsigned | |
1385 | second_sew (const vector_insn_info &, const vector_insn_info &info2) | |
1386 | { | |
1387 | return info2.get_sew (); | |
1388 | } | |
1389 | ||
1390 | static vlmul_type | |
1391 | first_vlmul (const vector_insn_info &info1, const vector_insn_info &) | |
1392 | { | |
1393 | return info1.get_vlmul (); | |
1394 | } | |
1395 | ||
1396 | static vlmul_type | |
1397 | second_vlmul (const vector_insn_info &, const vector_insn_info &info2) | |
1398 | { | |
1399 | return info2.get_vlmul (); | |
1400 | } | |
1401 | ||
1402 | static unsigned | |
1403 | first_ratio (const vector_insn_info &info1, const vector_insn_info &) | |
1404 | { | |
1405 | return info1.get_ratio (); | |
1406 | } | |
1407 | ||
1408 | static unsigned | |
1409 | second_ratio (const vector_insn_info &, const vector_insn_info &info2) | |
1410 | { | |
1411 | return info2.get_ratio (); | |
1412 | } | |
1413 | ||
1414 | static vlmul_type | |
1415 | vlmul_for_first_sew_second_ratio (const vector_insn_info &info1, | |
1416 | const vector_insn_info &info2) | |
1417 | { | |
1418 | return calculate_vlmul (info1.get_sew (), info2.get_ratio ()); | |
1419 | } | |
1420 | ||
1421 | static unsigned | |
1422 | ratio_for_second_sew_first_vlmul (const vector_insn_info &info1, | |
1423 | const vector_insn_info &info2) | |
1424 | { | |
1425 | return calculate_ratio (info2.get_sew (), info1.get_vlmul ()); | |
1426 | } | |
1427 | ||
1428 | static CONSTEXPR const demands_fuse_rule fuse_rules[] = { | |
1429 | #define DEF_SEW_LMUL_FUSE_RULE(DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, \ | |
1430 | DEMAND_GE_SEW1, DEMAND_SEW2, DEMAND_LMUL2, \ | |
1431 | DEMAND_RATIO2, DEMAND_GE_SEW2, NEW_DEMAND_SEW, \ | |
1432 | NEW_DEMAND_LMUL, NEW_DEMAND_RATIO, \ | |
1433 | NEW_DEMAND_GE_SEW, NEW_SEW, NEW_VLMUL, \ | |
1434 | NEW_RATIO) \ | |
1435 | {{{DEMAND_ANY, DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, DEMAND_ANY, \ | |
1436 | DEMAND_GE_SEW1, DEMAND_ANY, DEMAND_ANY}, \ | |
1437 | {DEMAND_ANY, DEMAND_SEW2, DEMAND_LMUL2, DEMAND_RATIO2, DEMAND_ANY, \ | |
1438 | DEMAND_GE_SEW2, DEMAND_ANY, DEMAND_ANY}}, \ | |
1439 | NEW_DEMAND_SEW, \ | |
1440 | NEW_DEMAND_LMUL, \ | |
1441 | NEW_DEMAND_RATIO, \ | |
1442 | NEW_DEMAND_GE_SEW, \ | |
1443 | NEW_SEW, \ | |
1444 | NEW_VLMUL, \ | |
1445 | NEW_RATIO}, | |
1446 | #include "riscv-vsetvl.def" | |
1447 | }; | |
1448 | ||
1449 | static bool | |
1450 | always_unavailable (const vector_insn_info &, const vector_insn_info &) | |
1451 | { | |
1452 | return true; | |
1453 | } | |
1454 | ||
1455 | static bool | |
1456 | avl_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1457 | { | |
1458 | return !info2.compatible_avl_p (info1.get_avl_info ()); | |
1459 | } | |
1460 | ||
1461 | static bool | |
1462 | sew_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1463 | { | |
1464 | if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO)) | |
1465 | { | |
1466 | if (info2.demand_p (DEMAND_GE_SEW)) | |
1467 | return info1.get_sew () < info2.get_sew (); | |
1468 | return info1.get_sew () != info2.get_sew (); | |
1469 | } | |
1470 | return true; | |
1471 | } | |
1472 | ||
1473 | static bool | |
1474 | lmul_unavailable_p (const vector_insn_info &info1, | |
1475 | const vector_insn_info &info2) | |
1476 | { | |
1477 | if (info1.get_vlmul () == info2.get_vlmul () && !info2.demand_p (DEMAND_SEW) | |
1478 | && !info2.demand_p (DEMAND_RATIO)) | |
1479 | return false; | |
1480 | return true; | |
1481 | } | |
1482 | ||
1483 | static bool | |
1484 | ge_sew_unavailable_p (const vector_insn_info &info1, | |
1485 | const vector_insn_info &info2) | |
1486 | { | |
1487 | if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO) | |
1488 | && info2.demand_p (DEMAND_GE_SEW)) | |
1489 | return info1.get_sew () < info2.get_sew (); | |
1490 | return true; | |
1491 | } | |
1492 | ||
1493 | static bool | |
1494 | ge_sew_lmul_unavailable_p (const vector_insn_info &info1, | |
1495 | const vector_insn_info &info2) | |
1496 | { | |
1497 | if (!info2.demand_p (DEMAND_RATIO) && info2.demand_p (DEMAND_GE_SEW)) | |
1498 | return info1.get_sew () < info2.get_sew (); | |
1499 | return true; | |
1500 | } | |
1501 | ||
1502 | static bool | |
1503 | ge_sew_ratio_unavailable_p (const vector_insn_info &info1, | |
1504 | const vector_insn_info &info2) | |
1505 | { | |
1506 | if (!info2.demand_p (DEMAND_LMUL) && info2.demand_p (DEMAND_GE_SEW)) | |
1507 | return info1.get_sew () < info2.get_sew (); | |
1508 | return true; | |
1509 | } | |
1510 | ||
1511 | static CONSTEXPR const demands_cond unavailable_conds[] = { | |
1512 | #define DEF_UNAVAILABLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, \ | |
1513 | TAIL_POLICTY1, MASK_POLICY1, AVL2, SEW2, LMUL2, \ | |
1514 | RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \ | |
1515 | MASK_POLICY2, COND) \ | |
1516 | {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \ | |
1517 | MASK_POLICY1}, \ | |
1518 | {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \ | |
1519 | MASK_POLICY2}}, \ | |
1520 | COND}, | |
1521 | #include "riscv-vsetvl.def" | |
1522 | }; | |
1523 | ||
1524 | static bool | |
1525 | same_sew_lmul_demand_p (const bool *dems1, const bool *dems2) | |
1526 | { | |
1527 | return dems1[DEMAND_SEW] == dems2[DEMAND_SEW] | |
1528 | && dems1[DEMAND_LMUL] == dems2[DEMAND_LMUL] | |
1529 | && dems1[DEMAND_RATIO] == dems2[DEMAND_RATIO] && !dems1[DEMAND_GE_SEW] | |
1530 | && !dems2[DEMAND_GE_SEW]; | |
1531 | } | |
1532 | ||
1533 | static bool | |
1534 | propagate_avl_across_demands_p (const vector_insn_info &info1, | |
1535 | const vector_insn_info &info2) | |
1536 | { | |
1537 | if (info2.demand_p (DEMAND_AVL)) | |
1538 | { | |
1539 | if (info2.demand_p (DEMAND_NONZERO_AVL)) | |
1540 | return info1.demand_p (DEMAND_AVL) | |
1541 | && !info1.demand_p (DEMAND_NONZERO_AVL) && info1.has_avl_reg (); | |
1542 | } | |
1543 | else | |
1544 | return info1.demand_p (DEMAND_AVL) && info1.has_avl_reg (); | |
1545 | return false; | |
1546 | } | |
1547 | ||
1548 | static bool | |
a481eed8 | 1549 | reg_available_p (const insn_info *insn, const vector_insn_info &info) |
ec99ffab | 1550 | { |
a481eed8 | 1551 | if (info.has_avl_reg () && !info.get_avl_source ()) |
44c918b5 | 1552 | return false; |
a481eed8 JZZ |
1553 | insn_info *def_insn = info.get_avl_source ()->insn (); |
1554 | if (def_insn->bb () == insn->bb ()) | |
1555 | return before_p (def_insn, insn); | |
ec99ffab | 1556 | else |
a481eed8 JZZ |
1557 | return dominated_by_p (CDI_DOMINATORS, insn->bb ()->cfg_bb (), |
1558 | def_insn->bb ()->cfg_bb ()); | |
ec99ffab JZZ |
1559 | } |
1560 | ||
60bd33bc JZZ |
1561 | /* Return true if the instruction support relaxed compatible check. */ |
1562 | static bool | |
1563 | support_relaxed_compatible_p (const vector_insn_info &info1, | |
1564 | const vector_insn_info &info2) | |
1565 | { | |
1566 | if (fault_first_load_p (info1.get_insn ()->rtl ()) | |
1567 | && info2.demand_p (DEMAND_AVL) && info2.has_avl_reg () | |
1568 | && info2.get_avl_source () && info2.get_avl_source ()->insn ()->is_phi ()) | |
1569 | { | |
1570 | hash_set<set_info *> sets | |
1571 | = get_all_sets (info2.get_avl_source (), true, false, false); | |
1572 | for (set_info *set : sets) | |
1573 | { | |
1574 | if (read_vl_insn_p (set->insn ()->rtl ())) | |
1575 | { | |
1576 | const insn_info *insn | |
1577 | = get_backward_fault_first_load_insn (set->insn ()); | |
1578 | if (insn == info1.get_insn ()) | |
1579 | return info2.compatible_vtype_p (info1); | |
1580 | } | |
1581 | } | |
1582 | } | |
1583 | return false; | |
1584 | } | |
1585 | ||
1586 | /* Return true if the block is worthwhile backward propagation. */ | |
1587 | static bool | |
1588 | backward_propagate_worthwhile_p (const basic_block cfg_bb, | |
1589 | const vector_block_info block_info) | |
1590 | { | |
1591 | if (loop_basic_block_p (cfg_bb)) | |
1592 | { | |
1593 | if (block_info.reaching_out.valid_or_dirty_p ()) | |
1594 | { | |
1595 | if (block_info.local_dem.compatible_p (block_info.reaching_out)) | |
1596 | { | |
1597 | /* Case 1 (Can backward propagate): | |
1598 | .... | |
1599 | bb0: | |
1600 | ... | |
1601 | for (int i = 0; i < n; i++) | |
1602 | { | |
1603 | vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); | |
1604 | __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); | |
1605 | } | |
1606 | The local_dem is compatible with reaching_out. Such case is | |
1607 | worthwhile backward propagation. */ | |
1608 | return true; | |
1609 | } | |
1610 | else | |
1611 | { | |
1612 | if (support_relaxed_compatible_p (block_info.reaching_out, | |
1613 | block_info.local_dem)) | |
1614 | return true; | |
1615 | /* Case 2 (Don't backward propagate): | |
1616 | .... | |
1617 | bb0: | |
1618 | ... | |
1619 | for (int i = 0; i < n; i++) | |
1620 | { | |
1621 | vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); | |
1622 | __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); | |
1623 | vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8); | |
1624 | __riscv_vse16_v_i16mf2 (out + i + 6, v, 8); | |
1625 | } | |
1626 | The local_dem is incompatible with reaching_out. | |
1627 | It makes no sense to backward propagate the local_dem since we | |
1628 | can't avoid VSETVL inside the loop. */ | |
1629 | return false; | |
1630 | } | |
1631 | } | |
1632 | else | |
1633 | { | |
1634 | gcc_assert (block_info.reaching_out.unknown_p ()); | |
1635 | /* Case 3 (Don't backward propagate): | |
1636 | .... | |
1637 | bb0: | |
1638 | ... | |
1639 | for (int i = 0; i < n; i++) | |
1640 | { | |
1641 | vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); | |
1642 | __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); | |
1643 | fn3 (); | |
1644 | } | |
1645 | The local_dem is VALID, but the reaching_out is UNKNOWN. | |
1646 | It makes no sense to backward propagate the local_dem since we | |
1647 | can't avoid VSETVL inside the loop. */ | |
1648 | return false; | |
1649 | } | |
1650 | } | |
1651 | ||
1652 | return true; | |
1653 | } | |
1654 | ||
a2d12abe JZZ |
1655 | /* Count the number of REGNO in RINSN. */ |
1656 | static int | |
1657 | count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) | |
1658 | { | |
1659 | int count = 0; | |
1660 | extract_insn (rinsn); | |
1661 | for (int i = 0; i < recog_data.n_operands; i++) | |
1662 | if (refers_to_regno_p (regno, recog_data.operand[i])) | |
1663 | count++; | |
1664 | return count; | |
1665 | } | |
1666 | ||
12b23c71 JZZ |
1667 | avl_info::avl_info (const avl_info &other) |
1668 | { | |
1669 | m_value = other.get_value (); | |
1670 | m_source = other.get_source (); | |
1671 | } | |
1672 | ||
9243c3d1 JZZ |
1673 | avl_info::avl_info (rtx value_in, set_info *source_in) |
1674 | : m_value (value_in), m_source (source_in) | |
1675 | {} | |
1676 | ||
4f673c5e JZZ |
1677 | bool |
1678 | avl_info::single_source_equal_p (const avl_info &other) const | |
1679 | { | |
1680 | set_info *set1 = m_source; | |
1681 | set_info *set2 = other.get_source (); | |
6b6b9c68 JZZ |
1682 | insn_info *insn1 = extract_single_source (set1); |
1683 | insn_info *insn2 = extract_single_source (set2); | |
1684 | if (!insn1 || !insn2) | |
1685 | return false; | |
1686 | return source_equal_p (insn1, insn2); | |
1687 | } | |
1688 | ||
1689 | bool | |
1690 | avl_info::multiple_source_equal_p (const avl_info &other) const | |
1691 | { | |
d875d756 JZ |
1692 | /* When the def info is same in RTL_SSA namespace, it's safe |
1693 | to consider they are avl compatible. */ | |
1694 | if (m_source == other.get_source ()) | |
1695 | return true; | |
6b6b9c68 | 1696 | |
d875d756 JZ |
1697 | /* We only consider handle PHI node. */ |
1698 | if (!m_source->insn ()->is_phi () || !other.get_source ()->insn ()->is_phi ()) | |
1699 | return false; | |
6b6b9c68 | 1700 | |
d875d756 JZ |
1701 | phi_info *phi1 = as_a<phi_info *> (m_source); |
1702 | phi_info *phi2 = as_a<phi_info *> (other.get_source ()); | |
6b6b9c68 | 1703 | |
d875d756 JZ |
1704 | if (phi1->is_degenerate () && phi2->is_degenerate ()) |
1705 | { | |
1706 | /* Degenerate PHI means the PHI node only have one input. */ | |
1707 | ||
1708 | /* If both PHI nodes have the same single input in use list. | |
1709 | We consider they are AVL compatible. */ | |
1710 | if (phi1->input_value (0) == phi2->input_value (0)) | |
1711 | return true; | |
1712 | } | |
1713 | /* TODO: We can support more optimization cases in the future. */ | |
1714 | return false; | |
4f673c5e JZZ |
1715 | } |
1716 | ||
9243c3d1 JZZ |
1717 | avl_info & |
1718 | avl_info::operator= (const avl_info &other) | |
1719 | { | |
1720 | m_value = other.get_value (); | |
1721 | m_source = other.get_source (); | |
1722 | return *this; | |
1723 | } | |
1724 | ||
1725 | bool | |
1726 | avl_info::operator== (const avl_info &other) const | |
1727 | { | |
1728 | if (!m_value) | |
1729 | return !other.get_value (); | |
1730 | if (!other.get_value ()) | |
1731 | return false; | |
1732 | ||
9243c3d1 JZZ |
1733 | if (GET_CODE (m_value) != GET_CODE (other.get_value ())) |
1734 | return false; | |
1735 | ||
1736 | /* Handle CONST_INT AVL. */ | |
1737 | if (CONST_INT_P (m_value)) | |
1738 | return INTVAL (m_value) == INTVAL (other.get_value ()); | |
1739 | ||
1740 | /* Handle VLMAX AVL. */ | |
1741 | if (vlmax_avl_p (m_value)) | |
1742 | return vlmax_avl_p (other.get_value ()); | |
1743 | ||
4f673c5e JZZ |
1744 | /* If any source is undef value, we think they are not equal. */ |
1745 | if (!m_source || !other.get_source ()) | |
1746 | return false; | |
1747 | ||
1748 | /* If both sources are single source (defined by a single real RTL) | |
1749 | and their definitions are same. */ | |
1750 | if (single_source_equal_p (other)) | |
1751 | return true; | |
1752 | ||
6b6b9c68 | 1753 | return multiple_source_equal_p (other); |
9243c3d1 JZZ |
1754 | } |
1755 | ||
1756 | bool | |
1757 | avl_info::operator!= (const avl_info &other) const | |
1758 | { | |
1759 | return !(*this == other); | |
1760 | } | |
1761 | ||
ec99ffab JZZ |
1762 | bool |
1763 | avl_info::has_non_zero_avl () const | |
1764 | { | |
1765 | if (has_avl_imm ()) | |
1766 | return INTVAL (get_value ()) > 0; | |
1767 | if (has_avl_reg ()) | |
1768 | return vlmax_avl_p (get_value ()); | |
1769 | return false; | |
1770 | } | |
1771 | ||
9243c3d1 JZZ |
1772 | /* Initialize VL/VTYPE information. */ |
1773 | vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in, | |
1774 | enum vlmul_type vlmul_in, uint8_t ratio_in, | |
1775 | bool ta_in, bool ma_in) | |
1776 | : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in), | |
1777 | m_ta (ta_in), m_ma (ma_in) | |
1778 | { | |
1779 | gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW"); | |
1780 | } | |
1781 | ||
1782 | bool | |
1783 | vl_vtype_info::operator== (const vl_vtype_info &other) const | |
1784 | { | |
6b6b9c68 | 1785 | return same_avl_p (other) && m_sew == other.get_sew () |
9243c3d1 JZZ |
1786 | && m_vlmul == other.get_vlmul () && m_ta == other.get_ta () |
1787 | && m_ma == other.get_ma () && m_ratio == other.get_ratio (); | |
1788 | } | |
1789 | ||
1790 | bool | |
1791 | vl_vtype_info::operator!= (const vl_vtype_info &other) const | |
1792 | { | |
1793 | return !(*this == other); | |
1794 | } | |
1795 | ||
9243c3d1 JZZ |
1796 | bool |
1797 | vl_vtype_info::same_avl_p (const vl_vtype_info &other) const | |
1798 | { | |
6b6b9c68 JZZ |
1799 | /* We need to compare both RTL and SET. If both AVL are CONST_INT. |
1800 | For example, const_int 3 and const_int 4, we need to compare | |
1801 | RTL. If both AVL are REG and their REGNO are same, we need to | |
1802 | compare SET. */ | |
1803 | return get_avl () == other.get_avl () | |
1804 | && get_avl_source () == other.get_avl_source (); | |
9243c3d1 JZZ |
1805 | } |
1806 | ||
1807 | bool | |
1808 | vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const | |
1809 | { | |
1810 | return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul () | |
1811 | && get_ta () == other.get_ta () && get_ma () == other.get_ma (); | |
1812 | } | |
1813 | ||
1814 | bool | |
1815 | vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const | |
1816 | { | |
1817 | return get_ratio () == other.get_ratio (); | |
1818 | } | |
1819 | ||
1820 | /* Compare the compatibility between Dem1 and Dem2. | |
1821 | If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2 | |
1822 | meaning Dem1 is easier be compatible with others than Dem2 | |
1823 | or Dem2 is stricter than Dem1. | |
1824 | For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */ | |
9243c3d1 JZZ |
1825 | bool |
1826 | vector_insn_info::operator>= (const vector_insn_info &other) const | |
1827 | { | |
60bd33bc JZZ |
1828 | if (support_relaxed_compatible_p (*this, other)) |
1829 | { | |
1830 | unsigned array_size = sizeof (unavailable_conds) / sizeof (demands_cond); | |
1831 | /* Bypass AVL unavailable cases. */ | |
1832 | for (unsigned i = 2; i < array_size; i++) | |
1833 | if (unavailable_conds[i].pair.match_cond_p (this->get_demands (), | |
1834 | other.get_demands ()) | |
1835 | && unavailable_conds[i].incompatible_p (*this, other)) | |
1836 | return false; | |
1837 | return true; | |
1838 | } | |
1839 | ||
1840 | if (!other.compatible_p (static_cast<const vl_vtype_info &> (*this))) | |
1841 | return false; | |
1842 | if (!this->compatible_p (static_cast<const vl_vtype_info &> (other))) | |
9243c3d1 JZZ |
1843 | return true; |
1844 | ||
1845 | if (*this == other) | |
1846 | return true; | |
1847 | ||
ec99ffab JZZ |
1848 | for (const auto &cond : unavailable_conds) |
1849 | if (cond.pair.match_cond_p (this->get_demands (), other.get_demands ()) | |
1850 | && cond.incompatible_p (*this, other)) | |
1851 | return false; | |
9243c3d1 JZZ |
1852 | |
1853 | return true; | |
1854 | } | |
1855 | ||
1856 | bool | |
1857 | vector_insn_info::operator== (const vector_insn_info &other) const | |
1858 | { | |
1859 | gcc_assert (!uninit_p () && !other.uninit_p () | |
1860 | && "Uninitialization should not happen"); | |
1861 | ||
1862 | /* Empty is only equal to another Empty. */ | |
1863 | if (empty_p ()) | |
1864 | return other.empty_p (); | |
1865 | if (other.empty_p ()) | |
1866 | return empty_p (); | |
1867 | ||
1868 | /* Unknown is only equal to another Unknown. */ | |
1869 | if (unknown_p ()) | |
1870 | return other.unknown_p (); | |
1871 | if (other.unknown_p ()) | |
1872 | return unknown_p (); | |
1873 | ||
1874 | for (size_t i = 0; i < NUM_DEMAND; i++) | |
1875 | if (m_demands[i] != other.demand_p ((enum demand_type) i)) | |
1876 | return false; | |
1877 | ||
7ae4d1df JZZ |
1878 | if (vector_config_insn_p (m_insn->rtl ()) |
1879 | || vector_config_insn_p (other.get_insn ()->rtl ())) | |
1880 | if (m_insn != other.get_insn ()) | |
1881 | return false; | |
9243c3d1 JZZ |
1882 | |
1883 | if (!same_avl_p (other)) | |
1884 | return false; | |
1885 | ||
1886 | /* If the full VTYPE is valid, check that it is the same. */ | |
1887 | return same_vtype_p (other); | |
1888 | } | |
1889 | ||
1890 | void | |
1891 | vector_insn_info::parse_insn (rtx_insn *rinsn) | |
1892 | { | |
1893 | *this = vector_insn_info (); | |
1894 | if (!NONDEBUG_INSN_P (rinsn)) | |
1895 | return; | |
1896 | if (!has_vtype_op (rinsn)) | |
1897 | return; | |
1898 | m_state = VALID; | |
1899 | extract_insn_cached (rinsn); | |
1900 | const rtx avl = recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
1901 | m_avl = avl_info (avl, nullptr); | |
1902 | m_sew = ::get_sew (rinsn); | |
1903 | m_vlmul = ::get_vlmul (rinsn); | |
1904 | m_ta = tail_agnostic_p (rinsn); | |
1905 | m_ma = mask_agnostic_p (rinsn); | |
1906 | } | |
1907 | ||
1908 | void | |
1909 | vector_insn_info::parse_insn (insn_info *insn) | |
1910 | { | |
1911 | *this = vector_insn_info (); | |
1912 | ||
1913 | /* Return if it is debug insn for the consistency with optimize == 0. */ | |
1914 | if (insn->is_debug_insn ()) | |
1915 | return; | |
1916 | ||
1917 | /* We set it as unknown since we don't what will happen in CALL or ASM. */ | |
1918 | if (insn->is_call () || insn->is_asm ()) | |
1919 | { | |
1920 | set_unknown (); | |
1921 | return; | |
1922 | } | |
1923 | ||
1924 | /* If this is something that updates VL/VTYPE that we don't know about, set | |
1925 | the state to unknown. */ | |
60bd33bc | 1926 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()) |
9243c3d1 JZZ |
1927 | && (find_access (insn->defs (), VL_REGNUM) |
1928 | || find_access (insn->defs (), VTYPE_REGNUM))) | |
1929 | { | |
1930 | set_unknown (); | |
1931 | return; | |
1932 | } | |
1933 | ||
1934 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())) | |
1935 | return; | |
1936 | ||
1937 | /* Warning: This function has to work on both the lowered (i.e. post | |
1938 | emit_local_forward_vsetvls) and pre-lowering forms. The main implication | |
1939 | of this is that it can't use the value of a SEW, VL, or Policy operand as | |
1940 | they might be stale after lowering. */ | |
1941 | vl_vtype_info::operator= (get_vl_vtype_info (insn)); | |
1942 | m_insn = insn; | |
1943 | m_state = VALID; | |
1944 | if (vector_config_insn_p (insn->rtl ())) | |
1945 | { | |
1946 | m_demands[DEMAND_AVL] = true; | |
1947 | m_demands[DEMAND_RATIO] = true; | |
1948 | return; | |
1949 | } | |
1950 | ||
1951 | if (has_vl_op (insn->rtl ())) | |
1952 | m_demands[DEMAND_AVL] = true; | |
1953 | ||
1954 | if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE) | |
1955 | m_demands[DEMAND_RATIO] = true; | |
1956 | else | |
1957 | { | |
1958 | /* TODO: By default, if it doesn't demand RATIO, we set it | |
1959 | demand SEW && LMUL both. Some instructions may demand SEW | |
1960 | only and ignore LMUL, will fix it later. */ | |
1961 | m_demands[DEMAND_SEW] = true; | |
ec99ffab JZZ |
1962 | if (!ignore_vlmul_insn_p (insn->rtl ())) |
1963 | m_demands[DEMAND_LMUL] = true; | |
9243c3d1 JZZ |
1964 | } |
1965 | ||
1966 | if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE) | |
1967 | m_demands[DEMAND_TAIL_POLICY] = true; | |
1968 | if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE) | |
1969 | m_demands[DEMAND_MASK_POLICY] = true; | |
6b6b9c68 JZZ |
1970 | |
1971 | if (vector_config_insn_p (insn->rtl ())) | |
1972 | return; | |
1973 | ||
ec99ffab JZZ |
1974 | if (scalar_move_insn_p (insn->rtl ())) |
1975 | { | |
1976 | if (m_avl.has_non_zero_avl ()) | |
1977 | m_demands[DEMAND_NONZERO_AVL] = true; | |
1978 | if (m_ta) | |
1979 | m_demands[DEMAND_GE_SEW] = true; | |
1980 | } | |
1981 | ||
1982 | if (!m_avl.has_avl_reg () || vlmax_avl_p (get_avl ()) || !m_avl.get_source ()) | |
1983 | return; | |
1984 | if (!m_avl.get_source ()->insn ()->is_real () | |
1985 | && !m_avl.get_source ()->insn ()->is_phi ()) | |
6b6b9c68 JZZ |
1986 | return; |
1987 | ||
1988 | insn_info *def_insn = extract_single_source (m_avl.get_source ()); | |
ec99ffab JZZ |
1989 | if (!def_insn || !vsetvl_insn_p (def_insn->rtl ())) |
1990 | return; | |
9243c3d1 | 1991 | |
ec99ffab JZZ |
1992 | vector_insn_info new_info; |
1993 | new_info.parse_insn (def_insn); | |
1994 | if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ())) | |
1995 | return; | |
1996 | /* TODO: Currently, we don't forward AVL for non-VLMAX vsetvl. */ | |
1997 | if (vlmax_avl_p (new_info.get_avl ())) | |
1998 | set_avl_info (avl_info (new_info.get_avl (), get_avl_source ())); | |
1999 | ||
2000 | if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ()) | |
2001 | m_demands[DEMAND_NONZERO_AVL] = true; | |
9243c3d1 JZZ |
2002 | } |
2003 | ||
2004 | bool | |
2005 | vector_insn_info::compatible_p (const vector_insn_info &other) const | |
2006 | { | |
2007 | gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p () | |
2008 | && "Can't compare invalid demanded infos"); | |
2009 | ||
ec99ffab | 2010 | for (const auto &cond : incompatible_conds) |
60bd33bc | 2011 | if (cond.dual_incompatible_p (*this, other)) |
ec99ffab | 2012 | return false; |
9243c3d1 JZZ |
2013 | return true; |
2014 | } | |
2015 | ||
d51f2456 JZ |
2016 | bool |
2017 | vector_insn_info::skip_avl_compatible_p (const vector_insn_info &other) const | |
2018 | { | |
2019 | gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p () | |
2020 | && "Can't compare invalid demanded infos"); | |
2021 | unsigned array_size = sizeof (incompatible_conds) / sizeof (demands_cond); | |
2022 | /* Bypass AVL incompatible cases. */ | |
2023 | for (unsigned i = 1; i < array_size; i++) | |
2024 | if (incompatible_conds[i].dual_incompatible_p (*this, other)) | |
2025 | return false; | |
2026 | return true; | |
2027 | } | |
2028 | ||
9243c3d1 JZZ |
2029 | bool |
2030 | vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const | |
2031 | { | |
2032 | gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); | |
2033 | gcc_assert (!unknown_p () && "Can't compare AVL in unknown state"); | |
2034 | if (!demand_p (DEMAND_AVL)) | |
2035 | return true; | |
ec99ffab JZZ |
2036 | if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ()) |
2037 | return true; | |
9243c3d1 JZZ |
2038 | return get_avl_info () == other.get_avl_info (); |
2039 | } | |
2040 | ||
4f673c5e JZZ |
2041 | bool |
2042 | vector_insn_info::compatible_avl_p (const avl_info &other) const | |
2043 | { | |
2044 | gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); | |
2045 | gcc_assert (!unknown_p () && "Can't compare AVL in unknown state"); | |
2046 | gcc_assert (demand_p (DEMAND_AVL) && "Can't compare AVL undemand state"); | |
ec99ffab JZZ |
2047 | if (!demand_p (DEMAND_AVL)) |
2048 | return true; | |
2049 | if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ()) | |
2050 | return true; | |
4f673c5e JZZ |
2051 | return get_avl_info () == other; |
2052 | } | |
2053 | ||
9243c3d1 JZZ |
2054 | bool |
2055 | vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const | |
2056 | { | |
2057 | gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); | |
2058 | gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state"); | |
ec99ffab JZZ |
2059 | if (demand_p (DEMAND_SEW)) |
2060 | { | |
2061 | if (!demand_p (DEMAND_GE_SEW) && m_sew != other.get_sew ()) | |
2062 | return false; | |
2063 | if (demand_p (DEMAND_GE_SEW) && m_sew > other.get_sew ()) | |
2064 | return false; | |
2065 | } | |
9243c3d1 JZZ |
2066 | if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ()) |
2067 | return false; | |
2068 | if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ()) | |
2069 | return false; | |
2070 | if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ()) | |
2071 | return false; | |
2072 | if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ()) | |
2073 | return false; | |
2074 | return true; | |
2075 | } | |
2076 | ||
2077 | /* Determine whether the vector instructions requirements represented by | |
2078 | Require are compatible with the previous vsetvli instruction represented | |
2079 | by this. INSN is the instruction whose requirements we're considering. */ | |
2080 | bool | |
2081 | vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const | |
2082 | { | |
2083 | gcc_assert (!uninit_p () && "Can't handle uninitialized info"); | |
2084 | if (empty_p ()) | |
2085 | return false; | |
2086 | ||
2087 | /* Nothing is compatible with Unknown. */ | |
2088 | if (unknown_p ()) | |
2089 | return false; | |
2090 | ||
2091 | /* If the instruction doesn't need an AVLReg and the SEW matches, consider | |
2092 | it compatible. */ | |
2093 | if (!demand_p (DEMAND_AVL)) | |
2094 | if (m_sew == curr_info.get_sew ()) | |
2095 | return true; | |
2096 | ||
2097 | return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info); | |
2098 | } | |
2099 | ||
6b6b9c68 JZZ |
2100 | bool |
2101 | vector_insn_info::available_p (const vector_insn_info &other) const | |
2102 | { | |
ec99ffab JZZ |
2103 | return *this >= other; |
2104 | } | |
2105 | ||
2106 | void | |
2107 | vector_insn_info::fuse_avl (const vector_insn_info &info1, | |
2108 | const vector_insn_info &info2) | |
2109 | { | |
2110 | set_insn (info1.get_insn ()); | |
2111 | if (info1.demand_p (DEMAND_AVL)) | |
2112 | { | |
2113 | if (info1.demand_p (DEMAND_NONZERO_AVL)) | |
2114 | { | |
2115 | if (info2.demand_p (DEMAND_AVL) | |
2116 | && !info2.demand_p (DEMAND_NONZERO_AVL)) | |
2117 | { | |
2118 | set_avl_info (info2.get_avl_info ()); | |
2119 | set_demand (DEMAND_AVL, true); | |
2120 | set_demand (DEMAND_NONZERO_AVL, false); | |
2121 | return; | |
2122 | } | |
2123 | } | |
2124 | set_avl_info (info1.get_avl_info ()); | |
2125 | set_demand (DEMAND_NONZERO_AVL, info1.demand_p (DEMAND_NONZERO_AVL)); | |
2126 | } | |
2127 | else | |
2128 | { | |
2129 | set_avl_info (info2.get_avl_info ()); | |
2130 | set_demand (DEMAND_NONZERO_AVL, info2.demand_p (DEMAND_NONZERO_AVL)); | |
2131 | } | |
2132 | set_demand (DEMAND_AVL, | |
2133 | info1.demand_p (DEMAND_AVL) || info2.demand_p (DEMAND_AVL)); | |
2134 | } | |
2135 | ||
2136 | void | |
2137 | vector_insn_info::fuse_sew_lmul (const vector_insn_info &info1, | |
2138 | const vector_insn_info &info2) | |
2139 | { | |
2140 | /* We need to fuse sew && lmul according to demand info: | |
2141 | ||
2142 | 1. GE_SEW. | |
2143 | 2. SEW. | |
2144 | 3. LMUL. | |
2145 | 4. RATIO. */ | |
2146 | if (same_sew_lmul_demand_p (info1.get_demands (), info2.get_demands ())) | |
2147 | { | |
2148 | set_demand (DEMAND_SEW, info2.demand_p (DEMAND_SEW)); | |
2149 | set_demand (DEMAND_LMUL, info2.demand_p (DEMAND_LMUL)); | |
2150 | set_demand (DEMAND_RATIO, info2.demand_p (DEMAND_RATIO)); | |
2151 | set_demand (DEMAND_GE_SEW, info2.demand_p (DEMAND_GE_SEW)); | |
2152 | set_sew (info2.get_sew ()); | |
2153 | set_vlmul (info2.get_vlmul ()); | |
2154 | set_ratio (info2.get_ratio ()); | |
2155 | return; | |
2156 | } | |
2157 | for (const auto &rule : fuse_rules) | |
2158 | { | |
2159 | if (rule.pair.match_cond_p (info1.get_demands (), info2.get_demands ())) | |
2160 | { | |
2161 | set_demand (DEMAND_SEW, rule.demand_sew_p); | |
2162 | set_demand (DEMAND_LMUL, rule.demand_lmul_p); | |
2163 | set_demand (DEMAND_RATIO, rule.demand_ratio_p); | |
2164 | set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p); | |
2165 | set_sew (rule.new_sew (info1, info2)); | |
2166 | set_vlmul (rule.new_vlmul (info1, info2)); | |
2167 | set_ratio (rule.new_ratio (info1, info2)); | |
2168 | return; | |
2169 | } | |
2170 | if (rule.pair.match_cond_p (info2.get_demands (), info1.get_demands ())) | |
2171 | { | |
2172 | set_demand (DEMAND_SEW, rule.demand_sew_p); | |
2173 | set_demand (DEMAND_LMUL, rule.demand_lmul_p); | |
2174 | set_demand (DEMAND_RATIO, rule.demand_ratio_p); | |
2175 | set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p); | |
2176 | set_sew (rule.new_sew (info2, info1)); | |
2177 | set_vlmul (rule.new_vlmul (info2, info1)); | |
2178 | set_ratio (rule.new_ratio (info2, info1)); | |
2179 | return; | |
2180 | } | |
2181 | } | |
2182 | gcc_unreachable (); | |
2183 | } | |
2184 | ||
2185 | void | |
2186 | vector_insn_info::fuse_tail_policy (const vector_insn_info &info1, | |
2187 | const vector_insn_info &info2) | |
2188 | { | |
2189 | if (info1.demand_p (DEMAND_TAIL_POLICY)) | |
2190 | { | |
2191 | set_ta (info1.get_ta ()); | |
2192 | demand (DEMAND_TAIL_POLICY); | |
2193 | } | |
2194 | else if (info2.demand_p (DEMAND_TAIL_POLICY)) | |
2195 | { | |
2196 | set_ta (info2.get_ta ()); | |
2197 | demand (DEMAND_TAIL_POLICY); | |
2198 | } | |
2199 | else | |
2200 | set_ta (get_default_ta ()); | |
2201 | } | |
2202 | ||
2203 | void | |
2204 | vector_insn_info::fuse_mask_policy (const vector_insn_info &info1, | |
2205 | const vector_insn_info &info2) | |
2206 | { | |
2207 | if (info1.demand_p (DEMAND_MASK_POLICY)) | |
2208 | { | |
2209 | set_ma (info1.get_ma ()); | |
2210 | demand (DEMAND_MASK_POLICY); | |
2211 | } | |
2212 | else if (info2.demand_p (DEMAND_MASK_POLICY)) | |
2213 | { | |
2214 | set_ma (info2.get_ma ()); | |
2215 | demand (DEMAND_MASK_POLICY); | |
2216 | } | |
2217 | else | |
2218 | set_ma (get_default_ma ()); | |
6b6b9c68 JZZ |
2219 | } |
2220 | ||
9243c3d1 JZZ |
2221 | vector_insn_info |
2222 | vector_insn_info::merge (const vector_insn_info &merge_info, | |
d51f2456 | 2223 | enum merge_type type) const |
9243c3d1 | 2224 | { |
6b6b9c68 JZZ |
2225 | if (!vsetvl_insn_p (get_insn ()->rtl ())) |
2226 | gcc_assert (this->compatible_p (merge_info) | |
2227 | && "Can't merge incompatible demanded infos"); | |
9243c3d1 JZZ |
2228 | |
2229 | vector_insn_info new_info; | |
ec99ffab | 2230 | new_info.set_valid (); |
4f673c5e | 2231 | if (type == LOCAL_MERGE) |
9243c3d1 | 2232 | { |
4f673c5e | 2233 | /* For local backward data flow, we always update INSN && AVL as the |
ec99ffab JZZ |
2234 | latest INSN and AVL so that we can keep track status of each INSN. */ |
2235 | new_info.fuse_avl (merge_info, *this); | |
9243c3d1 JZZ |
2236 | } |
2237 | else | |
2238 | { | |
4f673c5e | 2239 | /* For global data flow, we should keep original INSN and AVL if they |
ec99ffab | 2240 | valid since we should keep the life information of each block. |
9243c3d1 | 2241 | |
ec99ffab JZZ |
2242 | For example: |
2243 | bb 0 -> bb 1. | |
2244 | We should keep INSN && AVL of bb 1 since we will eventually emit | |
2245 | vsetvl instruction according to INSN and AVL of bb 1. */ | |
2246 | new_info.fuse_avl (*this, merge_info); | |
2247 | } | |
9243c3d1 | 2248 | |
ec99ffab JZZ |
2249 | new_info.fuse_sew_lmul (*this, merge_info); |
2250 | new_info.fuse_tail_policy (*this, merge_info); | |
2251 | new_info.fuse_mask_policy (*this, merge_info); | |
9243c3d1 JZZ |
2252 | return new_info; |
2253 | } | |
2254 | ||
60bd33bc JZZ |
2255 | bool |
2256 | vector_insn_info::update_fault_first_load_avl (insn_info *insn) | |
2257 | { | |
2258 | // Update AVL to vl-output of the fault first load. | |
2259 | const insn_info *read_vl = get_forward_read_vl_insn (insn); | |
2260 | if (read_vl) | |
2261 | { | |
2262 | rtx vl = SET_DEST (PATTERN (read_vl->rtl ())); | |
2263 | def_info *def = find_access (read_vl->defs (), REGNO (vl)); | |
2264 | set_info *set = safe_dyn_cast<set_info *> (def); | |
2265 | set_avl_info (avl_info (vl, set)); | |
2266 | set_insn (insn); | |
2267 | return true; | |
2268 | } | |
2269 | return false; | |
2270 | } | |
2271 | ||
ea7a9f36 KC |
2272 | static const char * |
2273 | vlmul_to_str (vlmul_type vlmul) | |
2274 | { | |
2275 | switch (vlmul) | |
2276 | { | |
2277 | case LMUL_1: | |
2278 | return "m1"; | |
2279 | case LMUL_2: | |
2280 | return "m2"; | |
2281 | case LMUL_4: | |
2282 | return "m4"; | |
2283 | case LMUL_8: | |
2284 | return "m8"; | |
2285 | case LMUL_RESERVED: | |
2286 | return "INVALID LMUL"; | |
2287 | case LMUL_F8: | |
2288 | return "mf8"; | |
2289 | case LMUL_F4: | |
2290 | return "mf4"; | |
2291 | case LMUL_F2: | |
2292 | return "mf2"; | |
2293 | ||
2294 | default: | |
2295 | gcc_unreachable (); | |
2296 | } | |
2297 | } | |
2298 | ||
2299 | static const char * | |
2300 | policy_to_str (bool agnostic_p) | |
2301 | { | |
2302 | return agnostic_p ? "agnostic" : "undisturbed"; | |
2303 | } | |
2304 | ||
9243c3d1 JZZ |
2305 | void |
2306 | vector_insn_info::dump (FILE *file) const | |
2307 | { | |
2308 | fprintf (file, "["); | |
2309 | if (uninit_p ()) | |
2310 | fprintf (file, "UNINITIALIZED,"); | |
2311 | else if (valid_p ()) | |
2312 | fprintf (file, "VALID,"); | |
2313 | else if (unknown_p ()) | |
2314 | fprintf (file, "UNKNOWN,"); | |
2315 | else if (empty_p ()) | |
2316 | fprintf (file, "EMPTY,"); | |
6b6b9c68 JZZ |
2317 | else if (hard_empty_p ()) |
2318 | fprintf (file, "HARD_EMPTY,"); | |
4f673c5e JZZ |
2319 | else if (dirty_with_killed_avl_p ()) |
2320 | fprintf (file, "DIRTY_WITH_KILLED_AVL,"); | |
9243c3d1 JZZ |
2321 | else |
2322 | fprintf (file, "DIRTY,"); | |
2323 | ||
2324 | fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL)); | |
ec99ffab | 2325 | fprintf (file, "%d(DEMAND_NONZERO_AVL),", demand_p (DEMAND_NONZERO_AVL)); |
9243c3d1 | 2326 | fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW)); |
ec99ffab | 2327 | fprintf (file, "%d(DEMAND_GE_SEW),", demand_p (DEMAND_GE_SEW)); |
9243c3d1 JZZ |
2328 | fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL)); |
2329 | fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO)); | |
2330 | fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY)); | |
2331 | fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY)); | |
2332 | ||
2333 | fprintf (file, "AVL="); | |
2334 | print_rtl_single (file, get_avl ()); | |
2335 | fprintf (file, "SEW=%d,", get_sew ()); | |
ea7a9f36 | 2336 | fprintf (file, "VLMUL=%s,", vlmul_to_str (get_vlmul ())); |
9243c3d1 | 2337 | fprintf (file, "RATIO=%d,", get_ratio ()); |
ea7a9f36 KC |
2338 | fprintf (file, "TAIL_POLICY=%s,", policy_to_str (get_ta ())); |
2339 | fprintf (file, "MASK_POLICY=%s", policy_to_str (get_ma ())); | |
9243c3d1 JZZ |
2340 | fprintf (file, "]\n"); |
2341 | ||
2342 | if (valid_p ()) | |
2343 | { | |
2344 | if (get_insn ()) | |
2345 | { | |
12b23c71 JZZ |
2346 | fprintf (file, "The real INSN="); |
2347 | print_rtl_single (file, get_insn ()->rtl ()); | |
9243c3d1 | 2348 | } |
9243c3d1 JZZ |
2349 | } |
2350 | } | |
2351 | ||
2352 | vector_infos_manager::vector_infos_manager () | |
2353 | { | |
2354 | vector_edge_list = nullptr; | |
2355 | vector_kill = nullptr; | |
2356 | vector_del = nullptr; | |
2357 | vector_insert = nullptr; | |
2358 | vector_antic = nullptr; | |
2359 | vector_transp = nullptr; | |
2360 | vector_comp = nullptr; | |
2361 | vector_avin = nullptr; | |
2362 | vector_avout = nullptr; | |
2363 | vector_insn_infos.safe_grow (get_max_uid ()); | |
2364 | vector_block_infos.safe_grow (last_basic_block_for_fn (cfun)); | |
2365 | if (!optimize) | |
2366 | { | |
2367 | basic_block cfg_bb; | |
2368 | rtx_insn *rinsn; | |
2369 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2370 | { | |
2371 | vector_block_infos[cfg_bb->index].local_dem = vector_insn_info (); | |
2372 | vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info (); | |
2373 | FOR_BB_INSNS (cfg_bb, rinsn) | |
2374 | vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn); | |
2375 | } | |
2376 | } | |
2377 | else | |
2378 | { | |
2379 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2380 | { | |
2381 | vector_block_infos[bb->index ()].local_dem = vector_insn_info (); | |
2382 | vector_block_infos[bb->index ()].reaching_out = vector_insn_info (); | |
2383 | for (insn_info *insn : bb->real_insns ()) | |
2384 | vector_insn_infos[insn->uid ()].parse_insn (insn); | |
acc10c79 | 2385 | vector_block_infos[bb->index ()].probability = profile_probability (); |
9243c3d1 JZZ |
2386 | } |
2387 | } | |
2388 | } | |
2389 | ||
2390 | void | |
2391 | vector_infos_manager::create_expr (vector_insn_info &info) | |
2392 | { | |
2393 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2394 | if (*vector_exprs[i] == info) | |
2395 | return; | |
2396 | vector_exprs.safe_push (&info); | |
2397 | } | |
2398 | ||
2399 | size_t | |
2400 | vector_infos_manager::get_expr_id (const vector_insn_info &info) const | |
2401 | { | |
2402 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2403 | if (*vector_exprs[i] == info) | |
2404 | return i; | |
2405 | gcc_unreachable (); | |
2406 | } | |
2407 | ||
2408 | auto_vec<size_t> | |
2409 | vector_infos_manager::get_all_available_exprs ( | |
2410 | const vector_insn_info &info) const | |
2411 | { | |
2412 | auto_vec<size_t> available_list; | |
2413 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
6b6b9c68 | 2414 | if (info.available_p (*vector_exprs[i])) |
9243c3d1 JZZ |
2415 | available_list.safe_push (i); |
2416 | return available_list; | |
2417 | } | |
2418 | ||
d06e9264 JZ |
2419 | bool |
2420 | vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const | |
2421 | { | |
2422 | hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb); | |
2423 | for (const basic_block pred_cfg_bb : pred_cfg_bbs) | |
2424 | { | |
2425 | const auto &pred_block_info = vector_block_infos[pred_cfg_bb->index]; | |
2426 | if (!pred_block_info.local_dem.valid_or_dirty_p () | |
2427 | && !pred_block_info.reaching_out.valid_or_dirty_p ()) | |
2428 | continue; | |
2429 | return false; | |
2430 | } | |
2431 | return true; | |
2432 | } | |
2433 | ||
9243c3d1 JZZ |
2434 | bool |
2435 | vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const | |
2436 | { | |
2437 | if (bitmap_empty_p (bitdata)) | |
2438 | return false; | |
2439 | ||
2440 | int ratio = -1; | |
2441 | unsigned int bb_index; | |
2442 | sbitmap_iterator sbi; | |
2443 | ||
2444 | EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi) | |
2445 | { | |
2446 | if (ratio == -1) | |
2447 | ratio = vector_exprs[bb_index]->get_ratio (); | |
2448 | else if (vector_exprs[bb_index]->get_ratio () != ratio) | |
2449 | return false; | |
2450 | } | |
2451 | return true; | |
2452 | } | |
2453 | ||
ff8f9544 JZ |
2454 | /* Return TRUE if the incoming vector configuration state |
2455 | to CFG_BB is compatible with the vector configuration | |
2456 | state in CFG_BB, FALSE otherwise. */ | |
2457 | bool | |
2458 | vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const | |
2459 | { | |
2460 | const auto &info = vector_block_infos[cfg_bb->index].local_dem; | |
2461 | sbitmap avin = vector_avin[cfg_bb->index]; | |
2462 | unsigned int bb_index; | |
2463 | sbitmap_iterator sbi; | |
2464 | EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi) | |
2465 | { | |
2466 | const auto &avin_info | |
2467 | = static_cast<const vl_vtype_info &> (*vector_exprs[bb_index]); | |
2468 | if (!info.compatible_p (avin_info)) | |
2469 | return false; | |
2470 | } | |
2471 | return true; | |
2472 | } | |
2473 | ||
005fad9d JZZ |
2474 | bool |
2475 | vector_infos_manager::all_same_avl_p (const basic_block cfg_bb, | |
2476 | sbitmap bitdata) const | |
2477 | { | |
2478 | if (bitmap_empty_p (bitdata)) | |
2479 | return false; | |
2480 | ||
2481 | const auto &block_info = vector_block_infos[cfg_bb->index]; | |
2482 | if (!block_info.local_dem.demand_p (DEMAND_AVL)) | |
2483 | return true; | |
2484 | ||
2485 | avl_info avl = block_info.local_dem.get_avl_info (); | |
2486 | unsigned int bb_index; | |
2487 | sbitmap_iterator sbi; | |
2488 | ||
2489 | EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi) | |
2490 | { | |
2491 | if (vector_exprs[bb_index]->get_avl_info () != avl) | |
2492 | return false; | |
2493 | } | |
2494 | return true; | |
2495 | } | |
2496 | ||
9243c3d1 JZZ |
2497 | size_t |
2498 | vector_infos_manager::expr_set_num (sbitmap bitdata) const | |
2499 | { | |
2500 | size_t count = 0; | |
2501 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2502 | if (bitmap_bit_p (bitdata, i)) | |
2503 | count++; | |
2504 | return count; | |
2505 | } | |
2506 | ||
2507 | void | |
2508 | vector_infos_manager::release (void) | |
2509 | { | |
2510 | if (!vector_insn_infos.is_empty ()) | |
2511 | vector_insn_infos.release (); | |
2512 | if (!vector_block_infos.is_empty ()) | |
2513 | vector_block_infos.release (); | |
2514 | if (!vector_exprs.is_empty ()) | |
2515 | vector_exprs.release (); | |
2516 | ||
ec99ffab JZZ |
2517 | gcc_assert (to_refine_vsetvls.is_empty ()); |
2518 | gcc_assert (to_delete_vsetvls.is_empty ()); | |
9243c3d1 | 2519 | if (optimize > 0) |
cfe3fbc6 JZZ |
2520 | free_bitmap_vectors (); |
2521 | } | |
2522 | ||
2523 | void | |
2524 | vector_infos_manager::create_bitmap_vectors (void) | |
2525 | { | |
2526 | /* Create the bitmap vectors. */ | |
2527 | vector_antic = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2528 | vector_exprs.length ()); | |
2529 | vector_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2530 | vector_exprs.length ()); | |
2531 | vector_comp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2532 | vector_exprs.length ()); | |
2533 | vector_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2534 | vector_exprs.length ()); | |
2535 | vector_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2536 | vector_exprs.length ()); | |
2537 | vector_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2538 | vector_exprs.length ()); | |
2539 | ||
2540 | bitmap_vector_ones (vector_transp, last_basic_block_for_fn (cfun)); | |
2541 | bitmap_vector_clear (vector_antic, last_basic_block_for_fn (cfun)); | |
2542 | bitmap_vector_clear (vector_comp, last_basic_block_for_fn (cfun)); | |
2543 | } | |
2544 | ||
2545 | void | |
2546 | vector_infos_manager::free_bitmap_vectors (void) | |
2547 | { | |
2548 | /* Finished. Free up all the things we've allocated. */ | |
2549 | free_edge_list (vector_edge_list); | |
2550 | if (vector_del) | |
2551 | sbitmap_vector_free (vector_del); | |
2552 | if (vector_insert) | |
2553 | sbitmap_vector_free (vector_insert); | |
2554 | if (vector_kill) | |
2555 | sbitmap_vector_free (vector_kill); | |
2556 | if (vector_antic) | |
2557 | sbitmap_vector_free (vector_antic); | |
2558 | if (vector_transp) | |
2559 | sbitmap_vector_free (vector_transp); | |
2560 | if (vector_comp) | |
2561 | sbitmap_vector_free (vector_comp); | |
2562 | if (vector_avin) | |
2563 | sbitmap_vector_free (vector_avin); | |
2564 | if (vector_avout) | |
2565 | sbitmap_vector_free (vector_avout); | |
2566 | ||
2567 | vector_edge_list = nullptr; | |
2568 | vector_kill = nullptr; | |
2569 | vector_del = nullptr; | |
2570 | vector_insert = nullptr; | |
2571 | vector_antic = nullptr; | |
2572 | vector_transp = nullptr; | |
2573 | vector_comp = nullptr; | |
2574 | vector_avin = nullptr; | |
2575 | vector_avout = nullptr; | |
9243c3d1 JZZ |
2576 | } |
2577 | ||
2578 | void | |
2579 | vector_infos_manager::dump (FILE *file) const | |
2580 | { | |
2581 | basic_block cfg_bb; | |
2582 | rtx_insn *rinsn; | |
2583 | ||
2584 | fprintf (file, "\n"); | |
2585 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2586 | { | |
2587 | fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index); | |
2588 | fprintf (file, "<HEADER>="); | |
2589 | vector_block_infos[cfg_bb->index].local_dem.dump (file); | |
2590 | FOR_BB_INSNS (cfg_bb, rinsn) | |
2591 | { | |
2592 | if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn)) | |
2593 | continue; | |
2594 | fprintf (file, "<insn %d>=", INSN_UID (rinsn)); | |
2595 | const auto &info = vector_insn_infos[INSN_UID (rinsn)]; | |
2596 | info.dump (file); | |
2597 | } | |
2598 | fprintf (file, "<FOOTER>="); | |
2599 | vector_block_infos[cfg_bb->index].reaching_out.dump (file); | |
acc10c79 JZZ |
2600 | fprintf (file, "<Probability>="); |
2601 | vector_block_infos[cfg_bb->index].probability.dump (file); | |
9243c3d1 JZZ |
2602 | fprintf (file, "\n\n"); |
2603 | } | |
2604 | ||
2605 | fprintf (file, "\n"); | |
2606 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2607 | { | |
2608 | fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index); | |
2609 | ||
2610 | fprintf (file, "<ANTLOC>="); | |
2611 | if (vector_antic == nullptr) | |
2612 | fprintf (file, "(nil)\n"); | |
2613 | else | |
2614 | dump_bitmap_file (file, vector_antic[cfg_bb->index]); | |
2615 | ||
2616 | fprintf (file, "<AVLOC>="); | |
2617 | if (vector_comp == nullptr) | |
2618 | fprintf (file, "(nil)\n"); | |
2619 | else | |
2620 | dump_bitmap_file (file, vector_comp[cfg_bb->index]); | |
2621 | ||
2622 | fprintf (file, "<TRANSP>="); | |
2623 | if (vector_transp == nullptr) | |
2624 | fprintf (file, "(nil)\n"); | |
2625 | else | |
2626 | dump_bitmap_file (file, vector_transp[cfg_bb->index]); | |
2627 | ||
2628 | fprintf (file, "<KILL>="); | |
2629 | if (vector_kill == nullptr) | |
2630 | fprintf (file, "(nil)\n"); | |
2631 | else | |
2632 | dump_bitmap_file (file, vector_kill[cfg_bb->index]); | |
2633 | } | |
2634 | ||
2635 | fprintf (file, "\n"); | |
2636 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2637 | { | |
2638 | fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n", | |
2639 | cfg_bb->index); | |
2640 | ||
2641 | fprintf (file, "<AVIN>="); | |
2642 | if (vector_avin == nullptr) | |
2643 | fprintf (file, "(nil)\n"); | |
2644 | else | |
2645 | dump_bitmap_file (file, vector_avin[cfg_bb->index]); | |
2646 | ||
2647 | fprintf (file, "<AVOUT>="); | |
2648 | if (vector_avout == nullptr) | |
2649 | fprintf (file, "(nil)\n"); | |
2650 | else | |
2651 | dump_bitmap_file (file, vector_avout[cfg_bb->index]); | |
2652 | ||
2653 | fprintf (file, "<DELETE>="); | |
2654 | if (vector_del == nullptr) | |
2655 | fprintf (file, "(nil)\n"); | |
2656 | else | |
2657 | dump_bitmap_file (file, vector_del[cfg_bb->index]); | |
2658 | } | |
2659 | ||
2660 | fprintf (file, "\nGlobal LCM (Lazy code motion) INSERT info:\n"); | |
2661 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2662 | { | |
2663 | for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++) | |
2664 | { | |
2665 | edge eg = INDEX_EDGE (vector_edge_list, ed); | |
2666 | if (bitmap_bit_p (vector_insert[ed], i)) | |
2667 | fprintf (dump_file, | |
2668 | "INSERT edge %d from bb %d to bb %d for VSETVL " | |
2669 | "expr[%ld]\n", | |
2670 | ed, eg->src->index, eg->dest->index, i); | |
2671 | } | |
2672 | } | |
2673 | } | |
2674 | ||
2675 | const pass_data pass_data_vsetvl = { | |
2676 | RTL_PASS, /* type */ | |
2677 | "vsetvl", /* name */ | |
2678 | OPTGROUP_NONE, /* optinfo_flags */ | |
2679 | TV_NONE, /* tv_id */ | |
2680 | 0, /* properties_required */ | |
2681 | 0, /* properties_provided */ | |
2682 | 0, /* properties_destroyed */ | |
2683 | 0, /* todo_flags_start */ | |
2684 | 0, /* todo_flags_finish */ | |
2685 | }; | |
2686 | ||
2687 | class pass_vsetvl : public rtl_opt_pass | |
2688 | { | |
2689 | private: | |
c139f5e1 KC |
2690 | vector_infos_manager *m_vector_manager; |
2691 | ||
2692 | const vector_insn_info &get_vector_info (const rtx_insn *) const; | |
2693 | const vector_insn_info &get_vector_info (const insn_info *) const; | |
2694 | const vector_block_info &get_block_info (const basic_block) const; | |
2695 | const vector_block_info &get_block_info (const bb_info *) const; | |
2696 | vector_block_info &get_block_info (const basic_block); | |
2697 | vector_block_info &get_block_info (const bb_info *); | |
2698 | void update_vector_info (const insn_info *, const vector_insn_info &); | |
9243c3d1 JZZ |
2699 | |
2700 | void simple_vsetvl (void) const; | |
2701 | void lazy_vsetvl (void); | |
2702 | ||
2703 | /* Phase 1. */ | |
2704 | void compute_local_backward_infos (const bb_info *); | |
2705 | ||
2706 | /* Phase 2. */ | |
2707 | bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const; | |
2708 | void transfer_before (vector_insn_info &, insn_info *) const; | |
2709 | void transfer_after (vector_insn_info &, insn_info *) const; | |
2710 | void emit_local_forward_vsetvls (const bb_info *); | |
2711 | ||
2712 | /* Phase 3. */ | |
4f673c5e JZZ |
2713 | enum fusion_type get_backward_fusion_type (const bb_info *, |
2714 | const vector_insn_info &); | |
6b6b9c68 | 2715 | bool hard_empty_block_p (const bb_info *, const vector_insn_info &) const; |
387cd9d3 JZZ |
2716 | bool backward_demand_fusion (void); |
2717 | bool forward_demand_fusion (void); | |
6b6b9c68 | 2718 | bool cleanup_illegal_dirty_blocks (void); |
387cd9d3 | 2719 | void demand_fusion (void); |
9243c3d1 JZZ |
2720 | |
2721 | /* Phase 4. */ | |
2722 | void prune_expressions (void); | |
2723 | void compute_local_properties (void); | |
6b6b9c68 | 2724 | bool can_refine_vsetvl_p (const basic_block, const vector_insn_info &) const; |
9243c3d1 JZZ |
2725 | void refine_vsetvls (void) const; |
2726 | void cleanup_vsetvls (void); | |
2727 | bool commit_vsetvls (void); | |
2728 | void pre_vsetvl (void); | |
2729 | ||
2730 | /* Phase 5. */ | |
c919d059 KC |
2731 | rtx_insn *get_vsetvl_at_end (const bb_info *, vector_insn_info *) const; |
2732 | void local_eliminate_vsetvl_insn (const bb_info *) const; | |
9243c3d1 JZZ |
2733 | void cleanup_insns (void) const; |
2734 | ||
6b6b9c68 JZZ |
2735 | /* Phase 6. */ |
2736 | void propagate_avl (void) const; | |
2737 | ||
9243c3d1 JZZ |
2738 | void init (void); |
2739 | void done (void); | |
acc10c79 | 2740 | void compute_probabilities (void); |
9243c3d1 JZZ |
2741 | |
2742 | public: | |
2743 | pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {} | |
2744 | ||
2745 | /* opt_pass methods: */ | |
2746 | virtual bool gate (function *) final override { return TARGET_VECTOR; } | |
2747 | virtual unsigned int execute (function *) final override; | |
2748 | }; // class pass_vsetvl | |
2749 | ||
c139f5e1 KC |
2750 | const vector_insn_info & |
2751 | pass_vsetvl::get_vector_info (const rtx_insn *i) const | |
2752 | { | |
2753 | return m_vector_manager->vector_insn_infos[INSN_UID (i)]; | |
2754 | } | |
2755 | ||
2756 | const vector_insn_info & | |
2757 | pass_vsetvl::get_vector_info (const insn_info *i) const | |
2758 | { | |
2759 | return m_vector_manager->vector_insn_infos[i->uid ()]; | |
2760 | } | |
2761 | ||
2762 | const vector_block_info & | |
2763 | pass_vsetvl::get_block_info (const basic_block bb) const | |
2764 | { | |
2765 | return m_vector_manager->vector_block_infos[bb->index]; | |
2766 | } | |
2767 | ||
2768 | const vector_block_info & | |
2769 | pass_vsetvl::get_block_info (const bb_info *bb) const | |
2770 | { | |
2771 | return m_vector_manager->vector_block_infos[bb->index ()]; | |
2772 | } | |
2773 | ||
2774 | vector_block_info & | |
2775 | pass_vsetvl::get_block_info (const basic_block bb) | |
2776 | { | |
2777 | return m_vector_manager->vector_block_infos[bb->index]; | |
2778 | } | |
2779 | ||
2780 | vector_block_info & | |
2781 | pass_vsetvl::get_block_info (const bb_info *bb) | |
2782 | { | |
2783 | return m_vector_manager->vector_block_infos[bb->index ()]; | |
2784 | } | |
2785 | ||
2786 | void | |
2787 | pass_vsetvl::update_vector_info (const insn_info *i, | |
2788 | const vector_insn_info &new_info) | |
2789 | { | |
2790 | m_vector_manager->vector_insn_infos[i->uid ()] = new_info; | |
2791 | } | |
2792 | ||
9243c3d1 JZZ |
2793 | /* Simple m_vsetvl_insert vsetvl for optimize == 0. */ |
2794 | void | |
2795 | pass_vsetvl::simple_vsetvl (void) const | |
2796 | { | |
2797 | if (dump_file) | |
2798 | fprintf (dump_file, | |
2799 | "\nEntering Simple VSETVL PASS and Handling %d basic blocks for " | |
2800 | "function:%s\n", | |
2801 | n_basic_blocks_for_fn (cfun), function_name (cfun)); | |
2802 | ||
2803 | basic_block cfg_bb; | |
2804 | rtx_insn *rinsn; | |
2805 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2806 | { | |
2807 | FOR_BB_INSNS (cfg_bb, rinsn) | |
2808 | { | |
2809 | if (!NONDEBUG_INSN_P (rinsn)) | |
2810 | continue; | |
2811 | if (has_vtype_op (rinsn)) | |
2812 | { | |
c139f5e1 | 2813 | const auto info = get_vector_info (rinsn); |
9243c3d1 JZZ |
2814 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info, |
2815 | NULL_RTX, rinsn); | |
2816 | } | |
2817 | } | |
2818 | } | |
2819 | } | |
2820 | ||
2821 | /* Compute demanded information by backward data-flow analysis. */ | |
2822 | void | |
2823 | pass_vsetvl::compute_local_backward_infos (const bb_info *bb) | |
2824 | { | |
2825 | vector_insn_info change; | |
2826 | change.set_empty (); | |
2827 | ||
2828 | auto &block_info = m_vector_manager->vector_block_infos[bb->index ()]; | |
2829 | block_info.reaching_out = change; | |
2830 | ||
2831 | for (insn_info *insn : bb->reverse_real_nondebug_insns ()) | |
2832 | { | |
c139f5e1 | 2833 | auto &info = get_vector_info (insn); |
9243c3d1 JZZ |
2834 | |
2835 | if (info.uninit_p ()) | |
2836 | /* If it is uninitialized, propagate it directly. */ | |
c139f5e1 | 2837 | update_vector_info (insn, change); |
9243c3d1 JZZ |
2838 | else if (info.unknown_p ()) |
2839 | change = info; | |
2840 | else | |
2841 | { | |
2842 | gcc_assert (info.valid_p () && "Unexpected Invalid demanded info"); | |
ec99ffab JZZ |
2843 | if (change.valid_p ()) |
2844 | { | |
a481eed8 JZZ |
2845 | if (!(propagate_avl_across_demands_p (change, info) |
2846 | && !reg_available_p (insn, change)) | |
ec99ffab | 2847 | && change.compatible_p (info)) |
fdc5abbd | 2848 | { |
c139f5e1 | 2849 | update_vector_info (insn, change.merge (info, LOCAL_MERGE)); |
fdc5abbd JZ |
2850 | /* Fix PR109399, we should update user vsetvl instruction |
2851 | if there is a change in demand fusion. */ | |
2852 | if (vsetvl_insn_p (insn->rtl ())) | |
2853 | change_vsetvl_insn (insn, info); | |
2854 | } | |
ec99ffab | 2855 | } |
9243c3d1 JZZ |
2856 | change = info; |
2857 | } | |
2858 | } | |
2859 | ||
2860 | block_info.local_dem = change; | |
2861 | if (block_info.local_dem.empty_p ()) | |
2862 | block_info.reaching_out = block_info.local_dem; | |
2863 | } | |
2864 | ||
2865 | /* Return true if a dem_info is required to transition from curr_info to | |
2866 | require before INSN. */ | |
2867 | bool | |
2868 | pass_vsetvl::need_vsetvl (const vector_insn_info &require, | |
2869 | const vector_insn_info &curr_info) const | |
2870 | { | |
2871 | if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ()) | |
2872 | return true; | |
2873 | ||
a481eed8 | 2874 | if (require.compatible_p (static_cast<const vl_vtype_info &> (curr_info))) |
9243c3d1 JZZ |
2875 | return false; |
2876 | ||
2877 | return true; | |
2878 | } | |
2879 | ||
2880 | /* Given an incoming state reaching INSN, modifies that state so that it is | |
2881 | minimally compatible with INSN. The resulting state is guaranteed to be | |
2882 | semantically legal for INSN, but may not be the state requested by INSN. */ | |
2883 | void | |
2884 | pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const | |
2885 | { | |
2886 | if (!has_vtype_op (insn->rtl ())) | |
2887 | return; | |
2888 | ||
c139f5e1 | 2889 | const vector_insn_info require = get_vector_info (insn); |
9243c3d1 JZZ |
2890 | if (info.valid_p () && !need_vsetvl (require, info)) |
2891 | return; | |
2892 | info = require; | |
2893 | } | |
2894 | ||
2895 | /* Given a state with which we evaluated insn (see transfer_before above for why | |
2896 | this might be different that the state insn requested), modify the state to | |
2897 | reflect the changes insn might make. */ | |
2898 | void | |
2899 | pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const | |
2900 | { | |
2901 | if (vector_config_insn_p (insn->rtl ())) | |
2902 | { | |
c139f5e1 | 2903 | info = get_vector_info (insn); |
9243c3d1 JZZ |
2904 | return; |
2905 | } | |
2906 | ||
60bd33bc JZZ |
2907 | if (fault_first_load_p (insn->rtl ()) |
2908 | && info.update_fault_first_load_avl (insn)) | |
2909 | return; | |
9243c3d1 JZZ |
2910 | |
2911 | /* If this is something that updates VL/VTYPE that we don't know about, set | |
2912 | the state to unknown. */ | |
2913 | if (insn->is_call () || insn->is_asm () | |
2914 | || find_access (insn->defs (), VL_REGNUM) | |
2915 | || find_access (insn->defs (), VTYPE_REGNUM)) | |
2916 | info = vector_insn_info::get_unknown (); | |
2917 | } | |
2918 | ||
2919 | /* Emit vsetvl within each block by forward data-flow analysis. */ | |
2920 | void | |
2921 | pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb) | |
2922 | { | |
2923 | auto &block_info = m_vector_manager->vector_block_infos[bb->index ()]; | |
2924 | if (block_info.local_dem.empty_p ()) | |
2925 | return; | |
2926 | ||
2927 | vector_insn_info curr_info; | |
2928 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
2929 | { | |
2930 | const vector_insn_info prev_info = curr_info; | |
a481eed8 | 2931 | enum vsetvl_type type = NUM_VSETVL_TYPE; |
9243c3d1 JZZ |
2932 | transfer_before (curr_info, insn); |
2933 | ||
2934 | if (has_vtype_op (insn->rtl ())) | |
2935 | { | |
2936 | if (static_cast<const vl_vtype_info &> (prev_info) | |
2937 | != static_cast<const vl_vtype_info &> (curr_info)) | |
2938 | { | |
c139f5e1 | 2939 | const auto require = get_vector_info (insn); |
9243c3d1 JZZ |
2940 | if (!require.compatible_p ( |
2941 | static_cast<const vl_vtype_info &> (prev_info))) | |
a481eed8 JZZ |
2942 | type = insert_vsetvl (EMIT_BEFORE, insn->rtl (), require, |
2943 | prev_info); | |
9243c3d1 JZZ |
2944 | } |
2945 | } | |
2946 | ||
a481eed8 JZZ |
2947 | /* Fix the issue of following sequence: |
2948 | vsetivli zero, 5 | |
2949 | .... | |
2950 | vsetvli zero, zero | |
2951 | vmv.x.s (demand AVL = 8). | |
2952 | .... | |
2953 | incorrect: vsetvli zero, zero ===> Since the curr_info is AVL = 8. | |
2954 | correct: vsetivli zero, 8 | |
2955 | vadd (demand AVL = 8). */ | |
2956 | if (type == VSETVL_VTYPE_CHANGE_ONLY) | |
2957 | { | |
2958 | /* Update the curr_info to be real correct AVL. */ | |
2959 | curr_info.set_avl_info (prev_info.get_avl_info ()); | |
2960 | } | |
9243c3d1 JZZ |
2961 | transfer_after (curr_info, insn); |
2962 | } | |
2963 | ||
2964 | block_info.reaching_out = curr_info; | |
2965 | } | |
2966 | ||
4f673c5e JZZ |
2967 | enum fusion_type |
2968 | pass_vsetvl::get_backward_fusion_type (const bb_info *bb, | |
2969 | const vector_insn_info &prop) | |
9243c3d1 | 2970 | { |
4f673c5e | 2971 | insn_info *insn = prop.get_insn (); |
9243c3d1 | 2972 | |
4f673c5e JZZ |
2973 | /* TODO: We don't backward propagate the explict VSETVL here |
2974 | since we will change vsetvl and vsetvlmax intrinsics into | |
2975 | no side effects which can be optimized into optimal location | |
2976 | by GCC internal passes. We only need to support these backward | |
2977 | propagation if vsetvl intrinsics have side effects. */ | |
2978 | if (vsetvl_insn_p (insn->rtl ())) | |
2979 | return INVALID_FUSION; | |
2980 | ||
2981 | gcc_assert (has_vtype_op (insn->rtl ())); | |
2982 | rtx reg = NULL_RTX; | |
2983 | ||
2984 | /* Case 1: Don't need VL. Just let it backward propagate. */ | |
ec99ffab | 2985 | if (!prop.demand_p (DEMAND_AVL)) |
4f673c5e JZZ |
2986 | return VALID_AVL_FUSION; |
2987 | else | |
9243c3d1 | 2988 | { |
4f673c5e JZZ |
2989 | /* Case 2: CONST_INT AVL, we don't need to check def. */ |
2990 | if (prop.has_avl_imm ()) | |
2991 | return VALID_AVL_FUSION; | |
2992 | else | |
2993 | { | |
2994 | /* Case 3: REG AVL, we need to check the distance of def to make | |
2995 | sure we won't backward propagate over the def. */ | |
2996 | gcc_assert (prop.has_avl_reg ()); | |
2997 | if (vlmax_avl_p (prop.get_avl ())) | |
2998 | /* Check VL operand for vsetvl vl,zero. */ | |
ec99ffab | 2999 | reg = prop.get_avl_reg_rtx (); |
4f673c5e JZZ |
3000 | else |
3001 | /* Check AVL operand for vsetvl zero,avl. */ | |
ec99ffab | 3002 | reg = prop.get_avl (); |
4f673c5e JZZ |
3003 | } |
3004 | } | |
9243c3d1 | 3005 | |
4f673c5e | 3006 | gcc_assert (reg); |
ec99ffab JZZ |
3007 | if (!prop.get_avl_source ()->insn ()->is_phi () |
3008 | && prop.get_avl_source ()->insn ()->bb () == insn->bb ()) | |
6b6b9c68 JZZ |
3009 | return INVALID_FUSION; |
3010 | hash_set<set_info *> sets | |
3011 | = get_all_sets (prop.get_avl_source (), true, true, true); | |
3012 | if (any_set_in_bb_p (sets, insn->bb ())) | |
3013 | return INVALID_FUSION; | |
3014 | ||
3015 | if (vlmax_avl_p (prop.get_avl ())) | |
4f673c5e | 3016 | { |
6b6b9c68 | 3017 | if (find_reg_killed_by (bb, reg)) |
4f673c5e | 3018 | return INVALID_FUSION; |
6b6b9c68 JZZ |
3019 | else |
3020 | return VALID_AVL_FUSION; | |
4f673c5e | 3021 | } |
6b6b9c68 JZZ |
3022 | |
3023 | /* By default, we always enable backward fusion so that we can | |
3024 | gain more optimizations. */ | |
3025 | if (!find_reg_killed_by (bb, reg)) | |
3026 | return VALID_AVL_FUSION; | |
3027 | return KILLED_AVL_FUSION; | |
3028 | } | |
3029 | ||
3030 | /* We almost enable all cases in get_backward_fusion_type, this function | |
3031 | disable the backward fusion by changing dirty blocks into hard empty | |
3032 | blocks in forward dataflow. We can have more accurate optimization by | |
3033 | this method. */ | |
3034 | bool | |
3035 | pass_vsetvl::hard_empty_block_p (const bb_info *bb, | |
3036 | const vector_insn_info &info) const | |
3037 | { | |
3038 | if (!info.dirty_p () || !info.has_avl_reg ()) | |
3039 | return false; | |
3040 | ||
3041 | basic_block cfg_bb = bb->cfg_bb (); | |
3042 | sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index]; | |
ec99ffab JZZ |
3043 | set_info *set = info.get_avl_source (); |
3044 | rtx avl = gen_rtx_REG (Pmode, set->regno ()); | |
6b6b9c68 JZZ |
3045 | hash_set<set_info *> sets = get_all_sets (set, true, false, false); |
3046 | hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb); | |
3047 | ||
3048 | if (find_reg_killed_by (bb, avl)) | |
3049 | { | |
3050 | /* Condition 1: | |
3051 | Dirty block with killed AVL means that the empty block (no RVV | |
3052 | instructions) are polluted as Dirty blocks with the value of current | |
3053 | AVL is killed. For example: | |
3054 | bb 0: | |
3055 | ... | |
3056 | bb 1: | |
3057 | def a5 | |
3058 | bb 2: | |
3059 | RVV (use a5) | |
3060 | In backward dataflow, we will polluted BB0 and BB1 as Dirt with AVL | |
3061 | killed. since a5 is killed in BB1. | |
3062 | In this case, let's take a look at this example: | |
3063 | ||
3064 | bb 3: bb 4: | |
3065 | def3 a5 def4 a5 | |
3066 | bb 5: bb 6: | |
3067 | def1 a5 def2 a5 | |
3068 | \ / | |
3069 | \ / | |
3070 | \ / | |
3071 | \ / | |
3072 | bb 7: | |
3073 | RVV (use a5) | |
3074 | In thi case, we can polluted BB5 and BB6 as dirty if get-def | |
3075 | of a5 from RVV instruction in BB7 is the def1 in BB5 and | |
3076 | def2 BB6 so we can return false early here for HARD_EMPTY_BLOCK_P. | |
3077 | However, we are not sure whether BB3 and BB4 can be | |
3078 | polluted as Dirty with AVL killed so we can't return false | |
3079 | for HARD_EMPTY_BLOCK_P here since it's too early which will | |
3080 | potentially produce issues. */ | |
3081 | gcc_assert (info.dirty_with_killed_avl_p ()); | |
3082 | if (info.get_avl_source () | |
3083 | && get_same_bb_set (sets, bb->cfg_bb ()) == info.get_avl_source ()) | |
3084 | return false; | |
4f673c5e | 3085 | } |
9243c3d1 | 3086 | |
6b6b9c68 JZZ |
3087 | /* Condition 2: |
3088 | Suppress the VL/VTYPE info backward propagation too early: | |
3089 | ________ | |
3090 | | BB0 | | |
3091 | |________| | |
3092 | | | |
3093 | ____|____ | |
3094 | | BB1 | | |
3095 | |________| | |
3096 | In this case, suppose BB 1 has multiple predecessors, BB 0 is one | |
3097 | of them. BB1 has VL/VTYPE info (may be VALID or DIRTY) to backward | |
3098 | propagate. | |
3099 | The AVIN (available in) which is calculated by LCM is empty only | |
3100 | in these 2 circumstances: | |
3101 | 1. all predecessors of BB1 are empty (not VALID | |
3102 | and can not be polluted in backward fusion flow) | |
3103 | 2. VL/VTYPE info of BB1 predecessors are conflict. | |
3104 | ||
3105 | We keep it as dirty in 2nd circumstance and set it as HARD_EMPTY | |
3106 | (can not be polluted as DIRTY any more) in 1st circumstance. | |
3107 | We don't backward propagate in 1st circumstance since there is | |
3108 | no VALID RVV instruction and no polluted blocks (dirty blocks) | |
3109 | by backward propagation from other following blocks. | |
3110 | It's meaningless to keep it as Dirty anymore. | |
3111 | ||
3112 | However, since we keep it as dirty in 2nd since there are VALID or | |
3113 | Dirty blocks in predecessors, we can still gain the benefits and | |
3114 | optimization opportunities. For example, in this case: | |
3115 | for (size_t i = 0; i < n; i++) | |
3116 | { | |
3117 | if (i != cond) { | |
3118 | vint8mf8_t v = *(vint8mf8_t*)(in + i + 100); | |
3119 | *(vint8mf8_t*)(out + i + 100) = v; | |
3120 | } else { | |
3121 | vbool1_t v = *(vbool1_t*)(in + i + 400); | |
3122 | *(vbool1_t*)(out + i + 400) = v; | |
3123 | } | |
3124 | } | |
3125 | VL/VTYPE in if-else are conflict which will produce empty AVIN LCM result | |
3126 | but we can still keep dirty blocks if *(i != cond)* is very unlikely then | |
3127 | we can preset vsetvl (VL/VTYPE) info from else (static propability model). | |
3128 | ||
3129 | We don't want to backward propagate VL/VTYPE information too early | |
3130 | which is not the optimal and may potentially produce issues. */ | |
3131 | if (bitmap_empty_p (avin)) | |
4f673c5e | 3132 | { |
6b6b9c68 JZZ |
3133 | bool hard_empty_p = true; |
3134 | for (const basic_block pred_cfg_bb : pred_cfg_bbs) | |
3135 | { | |
3136 | if (pred_cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) | |
3137 | continue; | |
3138 | sbitmap avout = m_vector_manager->vector_avout[pred_cfg_bb->index]; | |
3139 | if (!bitmap_empty_p (avout)) | |
3140 | { | |
3141 | hard_empty_p = false; | |
3142 | break; | |
3143 | } | |
3144 | } | |
3145 | if (hard_empty_p) | |
3146 | return true; | |
4f673c5e | 3147 | } |
9243c3d1 | 3148 | |
6b6b9c68 JZZ |
3149 | edge e; |
3150 | edge_iterator ei; | |
3151 | bool has_avl_killed_insn_p = false; | |
3152 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
4f673c5e | 3153 | { |
6b6b9c68 JZZ |
3154 | const auto block_info |
3155 | = m_vector_manager->vector_block_infos[e->dest->index]; | |
3156 | if (block_info.local_dem.dirty_with_killed_avl_p ()) | |
9243c3d1 | 3157 | { |
6b6b9c68 JZZ |
3158 | has_avl_killed_insn_p = true; |
3159 | break; | |
3160 | } | |
3161 | } | |
3162 | if (!has_avl_killed_insn_p) | |
3163 | return false; | |
4f673c5e | 3164 | |
6b6b9c68 JZZ |
3165 | bool any_set_in_bbs_p = false; |
3166 | for (const basic_block pred_cfg_bb : pred_cfg_bbs) | |
3167 | { | |
3168 | insn_info *def_insn = extract_single_source (set); | |
3169 | if (def_insn) | |
3170 | { | |
3171 | /* Condition 3: | |
3172 | ||
3173 | Case 1: Case 2: | |
3174 | bb 0: bb 0: | |
3175 | def a5 101 ... | |
3176 | bb 1: bb 1: | |
3177 | ... ... | |
3178 | bb 2: bb 2: | |
3179 | RVV 1 (use a5 with TAIL ANY) ... | |
3180 | bb 3: bb 3: | |
3181 | def a5 101 def a5 101 | |
3182 | bb 4: bb 4: | |
3183 | ... ... | |
3184 | bb 5: bb 5: | |
3185 | RVV 2 (use a5 with TU) RVV 1 (use a5) | |
3186 | ||
3187 | Case 1: We can pollute BB3,BB2,BB1,BB0 are all Dirt blocks | |
3188 | with killed AVL so that we can merge TU demand info from RVV 2 | |
3189 | into RVV 1 and elide the vsevl instruction in BB5. | |
3190 | ||
3191 | TODO: We only optimize for single source def since multiple source | |
3192 | def is quite complicated. | |
3193 | ||
3194 | Case 2: We only can pollute bb 3 as dirty and it has been accepted | |
3195 | in Condition 2 and we can't pollute BB3,BB2,BB1,BB0 like case 1. */ | |
3196 | insn_info *last_killed_insn | |
3197 | = find_reg_killed_by (crtl->ssa->bb (pred_cfg_bb), avl); | |
3198 | if (!last_killed_insn || pred_cfg_bb == def_insn->bb ()->cfg_bb ()) | |
3199 | continue; | |
3200 | if (source_equal_p (last_killed_insn, def_insn)) | |
4f673c5e | 3201 | { |
6b6b9c68 JZZ |
3202 | any_set_in_bbs_p = true; |
3203 | break; | |
4f673c5e | 3204 | } |
9243c3d1 JZZ |
3205 | } |
3206 | else | |
4f673c5e | 3207 | { |
6b6b9c68 JZZ |
3208 | /* Condition 4: |
3209 | ||
3210 | bb 0: bb 1: bb 3: | |
3211 | def1 a5 def2 a5 ... | |
3212 | \ / / | |
3213 | \ / / | |
3214 | \ / / | |
3215 | \ / / | |
3216 | bb 4: / | |
3217 | | / | |
3218 | | / | |
3219 | bb 5: / | |
3220 | | / | |
3221 | | / | |
3222 | bb 6: / | |
3223 | | / | |
3224 | | / | |
3225 | bb 8: | |
3226 | RVV 1 (use a5) | |
3227 | If we get-def (REAL) of a5 from RVV 1 instruction, we will get | |
3228 | def1 from BB0 and def2 from BB1. So we will pollute BB6,BB5,BB4, | |
3229 | BB0,BB1 with DIRTY and set BB3 as HARD_EMPTY so that we won't | |
3230 | propagate AVL to BB3. */ | |
3231 | if (any_set_in_bb_p (sets, crtl->ssa->bb (pred_cfg_bb))) | |
3232 | { | |
3233 | any_set_in_bbs_p = true; | |
3234 | break; | |
3235 | } | |
4f673c5e | 3236 | } |
9243c3d1 | 3237 | } |
6b6b9c68 JZZ |
3238 | if (!any_set_in_bbs_p) |
3239 | return true; | |
3240 | return false; | |
9243c3d1 JZZ |
3241 | } |
3242 | ||
3243 | /* Compute global backward demanded info. */ | |
387cd9d3 JZZ |
3244 | bool |
3245 | pass_vsetvl::backward_demand_fusion (void) | |
9243c3d1 JZZ |
3246 | { |
3247 | /* We compute global infos by backward propagation. | |
3248 | We want to have better performance in these following cases: | |
3249 | ||
3250 | 1. for (size_t i = 0; i < n; i++) { | |
3251 | if (i != cond) { | |
3252 | vint8mf8_t v = *(vint8mf8_t*)(in + i + 100); | |
3253 | *(vint8mf8_t*)(out + i + 100) = v; | |
3254 | } else { | |
3255 | vbool1_t v = *(vbool1_t*)(in + i + 400); | |
3256 | *(vbool1_t*)(out + i + 400) = v; | |
3257 | } | |
3258 | } | |
3259 | ||
3260 | Since we don't have any RVV instruction in the BEFORE blocks, | |
3261 | LCM fails to optimize such case. We want to backward propagate | |
3262 | them into empty blocks so that we could have better performance | |
3263 | in LCM. | |
3264 | ||
3265 | 2. bb 0: | |
3266 | vsetvl e8,mf8 (demand RATIO) | |
3267 | bb 1: | |
3268 | vsetvl e32,mf2 (demand SEW and LMUL) | |
3269 | We backward propagate the first VSETVL into e32,mf2 so that we | |
3270 | could be able to eliminate the second VSETVL in LCM. */ | |
3271 | ||
387cd9d3 | 3272 | bool changed_p = false; |
9243c3d1 JZZ |
3273 | for (const bb_info *bb : crtl->ssa->reverse_bbs ()) |
3274 | { | |
3275 | basic_block cfg_bb = bb->cfg_bb (); | |
b9b251b7 JZZ |
3276 | const auto &curr_block_info |
3277 | = m_vector_manager->vector_block_infos[cfg_bb->index]; | |
3278 | const auto &prop = curr_block_info.local_dem; | |
9243c3d1 JZZ |
3279 | |
3280 | /* If there is nothing to propagate, just skip it. */ | |
3281 | if (!prop.valid_or_dirty_p ()) | |
3282 | continue; | |
3283 | ||
b9b251b7 | 3284 | if (!backward_propagate_worthwhile_p (cfg_bb, curr_block_info)) |
9243c3d1 JZZ |
3285 | continue; |
3286 | ||
d06e9264 JZ |
3287 | /* Fix PR108270: |
3288 | ||
3289 | bb 0 -> bb 1 | |
3290 | We don't need to backward fuse VL/VTYPE info from bb 1 to bb 0 | |
3291 | if bb 1 is not inside a loop and all predecessors of bb 0 are empty. */ | |
3292 | if (m_vector_manager->all_empty_predecessor_p (cfg_bb)) | |
3293 | continue; | |
3294 | ||
9243c3d1 JZZ |
3295 | edge e; |
3296 | edge_iterator ei; | |
3297 | /* Backward propagate to each predecessor. */ | |
3298 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
3299 | { | |
9243c3d1 JZZ |
3300 | auto &block_info |
3301 | = m_vector_manager->vector_block_infos[e->src->index]; | |
3302 | ||
3303 | /* We don't propagate through critical edges. */ | |
3304 | if (e->flags & EDGE_COMPLEX) | |
3305 | continue; | |
3306 | if (e->src->index == ENTRY_BLOCK_PTR_FOR_FN (cfun)->index) | |
3307 | continue; | |
44c918b5 JZZ |
3308 | /* If prop is demand of vsetvl instruction and reaching doesn't demand |
3309 | AVL. We don't backward propagate since vsetvl instruction has no | |
3310 | side effects. */ | |
3311 | if (vsetvl_insn_p (prop.get_insn ()->rtl ()) | |
3312 | && propagate_avl_across_demands_p (prop, block_info.reaching_out)) | |
3313 | continue; | |
9243c3d1 JZZ |
3314 | |
3315 | if (block_info.reaching_out.unknown_p ()) | |
3316 | continue; | |
6b6b9c68 JZZ |
3317 | else if (block_info.reaching_out.hard_empty_p ()) |
3318 | continue; | |
9243c3d1 JZZ |
3319 | else if (block_info.reaching_out.empty_p ()) |
3320 | { | |
4f673c5e JZZ |
3321 | enum fusion_type type |
3322 | = get_backward_fusion_type (crtl->ssa->bb (e->src), prop); | |
3323 | if (type == INVALID_FUSION) | |
9243c3d1 JZZ |
3324 | continue; |
3325 | ||
4f673c5e JZZ |
3326 | block_info.reaching_out = prop; |
3327 | block_info.reaching_out.set_dirty (type); | |
6b6b9c68 JZZ |
3328 | |
3329 | if (prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ())) | |
3330 | { | |
3331 | hash_set<set_info *> sets | |
3332 | = get_all_sets (prop.get_avl_source (), true, true, true); | |
3333 | set_info *set = get_same_bb_set (sets, e->src); | |
3334 | if (set) | |
3335 | block_info.reaching_out.set_avl_info ( | |
3336 | avl_info (prop.get_avl (), set)); | |
3337 | } | |
3338 | ||
4f673c5e JZZ |
3339 | block_info.local_dem = block_info.reaching_out; |
3340 | block_info.probability = curr_block_info.probability; | |
3341 | changed_p = true; | |
9243c3d1 JZZ |
3342 | } |
3343 | else if (block_info.reaching_out.dirty_p ()) | |
3344 | { | |
3345 | /* DIRTY -> DIRTY or VALID -> DIRTY. */ | |
3346 | vector_insn_info new_info; | |
3347 | ||
3348 | if (block_info.reaching_out.compatible_p (prop)) | |
3349 | { | |
ec99ffab | 3350 | if (block_info.reaching_out.available_p (prop)) |
9243c3d1 | 3351 | continue; |
4f673c5e | 3352 | new_info = block_info.reaching_out.merge (prop, GLOBAL_MERGE); |
6b6b9c68 JZZ |
3353 | new_info.set_dirty ( |
3354 | block_info.reaching_out.dirty_with_killed_avl_p ()); | |
3355 | block_info.probability += curr_block_info.probability; | |
9243c3d1 JZZ |
3356 | } |
3357 | else | |
3358 | { | |
4f673c5e JZZ |
3359 | if (curr_block_info.probability > block_info.probability) |
3360 | { | |
6b6b9c68 JZZ |
3361 | enum fusion_type type |
3362 | = get_backward_fusion_type (crtl->ssa->bb (e->src), | |
3363 | prop); | |
3364 | if (type == INVALID_FUSION) | |
3365 | continue; | |
4f673c5e | 3366 | new_info = prop; |
6b6b9c68 | 3367 | new_info.set_dirty (type); |
4f673c5e JZZ |
3368 | block_info.probability = curr_block_info.probability; |
3369 | } | |
9243c3d1 JZZ |
3370 | else |
3371 | continue; | |
3372 | } | |
3373 | ||
ec99ffab JZZ |
3374 | if (propagate_avl_across_demands_p (prop, |
3375 | block_info.reaching_out)) | |
3376 | { | |
3377 | rtx reg = new_info.get_avl_reg_rtx (); | |
3378 | if (find_reg_killed_by (crtl->ssa->bb (e->src), reg)) | |
3379 | new_info.set_dirty (true); | |
3380 | } | |
3381 | ||
9243c3d1 JZZ |
3382 | block_info.local_dem = new_info; |
3383 | block_info.reaching_out = new_info; | |
387cd9d3 | 3384 | changed_p = true; |
9243c3d1 JZZ |
3385 | } |
3386 | else | |
3387 | { | |
3388 | /* We not only change the info during backward propagation, | |
3389 | but also change the VSETVL instruction. */ | |
3390 | gcc_assert (block_info.reaching_out.valid_p ()); | |
6b6b9c68 JZZ |
3391 | hash_set<set_info *> sets |
3392 | = get_all_sets (prop.get_avl_source (), true, false, false); | |
3393 | set_info *set = get_same_bb_set (sets, e->src); | |
3394 | if (vsetvl_insn_p (block_info.reaching_out.get_insn ()->rtl ()) | |
3395 | && prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ())) | |
3396 | { | |
3397 | if (!block_info.reaching_out.same_vlmax_p (prop)) | |
3398 | continue; | |
3399 | if (block_info.reaching_out.same_vtype_p (prop)) | |
3400 | continue; | |
3401 | if (!set) | |
3402 | continue; | |
3403 | if (set->insn () != block_info.reaching_out.get_insn ()) | |
3404 | continue; | |
3405 | } | |
ec99ffab JZZ |
3406 | |
3407 | if (!block_info.reaching_out.compatible_p (prop)) | |
3408 | continue; | |
3409 | if (block_info.reaching_out.available_p (prop)) | |
3410 | continue; | |
9243c3d1 JZZ |
3411 | |
3412 | vector_insn_info be_merged = block_info.reaching_out; | |
3413 | if (block_info.local_dem == block_info.reaching_out) | |
3414 | be_merged = block_info.local_dem; | |
4f673c5e JZZ |
3415 | vector_insn_info new_info = be_merged.merge (prop, GLOBAL_MERGE); |
3416 | ||
3417 | if (curr_block_info.probability > block_info.probability) | |
3418 | block_info.probability = curr_block_info.probability; | |
9243c3d1 | 3419 | |
ec99ffab | 3420 | if (propagate_avl_across_demands_p (prop, block_info.reaching_out) |
a481eed8 JZZ |
3421 | && !reg_available_p (crtl->ssa->bb (e->src)->end_insn (), |
3422 | new_info)) | |
ec99ffab JZZ |
3423 | continue; |
3424 | ||
aef20243 | 3425 | change_vsetvl_insn (new_info.get_insn (), new_info); |
9243c3d1 JZZ |
3426 | if (block_info.local_dem == block_info.reaching_out) |
3427 | block_info.local_dem = new_info; | |
3428 | block_info.reaching_out = new_info; | |
387cd9d3 | 3429 | changed_p = true; |
9243c3d1 JZZ |
3430 | } |
3431 | } | |
3432 | } | |
387cd9d3 JZZ |
3433 | return changed_p; |
3434 | } | |
3435 | ||
3436 | /* Compute global forward demanded info. */ | |
3437 | bool | |
3438 | pass_vsetvl::forward_demand_fusion (void) | |
3439 | { | |
3440 | /* Enhance the global information propagation especially | |
3441 | backward propagation miss the propagation. | |
3442 | Consider such case: | |
3443 | ||
3444 | bb0 | |
3445 | (TU) | |
3446 | / \ | |
3447 | bb1 bb2 | |
3448 | (TU) (ANY) | |
3449 | existing edge -----> \ / (TU) <----- LCM create this edge. | |
3450 | bb3 | |
3451 | (TU) | |
3452 | ||
3453 | Base on the situation, LCM fails to eliminate the VSETVL instruction and | |
3454 | insert an edge from bb2 to bb3 since we can't backward propagate bb3 into | |
3455 | bb2. To avoid this confusing LCM result and non-optimal codegen, we should | |
3456 | forward propagate information from bb0 to bb2 which is friendly to LCM. */ | |
3457 | bool changed_p = false; | |
3458 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3459 | { | |
3460 | basic_block cfg_bb = bb->cfg_bb (); | |
3461 | const auto &prop | |
3462 | = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; | |
3463 | ||
3464 | /* If there is nothing to propagate, just skip it. */ | |
3465 | if (!prop.valid_or_dirty_p ()) | |
3466 | continue; | |
3467 | ||
00fb7698 JZZ |
3468 | if (cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
3469 | continue; | |
3470 | ||
ec99ffab JZZ |
3471 | if (vsetvl_insn_p (prop.get_insn ()->rtl ())) |
3472 | continue; | |
3473 | ||
387cd9d3 JZZ |
3474 | edge e; |
3475 | edge_iterator ei; | |
3476 | /* Forward propagate to each successor. */ | |
3477 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
3478 | { | |
3479 | auto &local_dem | |
3480 | = m_vector_manager->vector_block_infos[e->dest->index].local_dem; | |
3481 | auto &reaching_out | |
3482 | = m_vector_manager->vector_block_infos[e->dest->index].reaching_out; | |
3483 | ||
3484 | /* It's quite obvious, we don't need to propagate itself. */ | |
3485 | if (e->dest->index == cfg_bb->index) | |
3486 | continue; | |
00fb7698 JZZ |
3487 | /* We don't propagate through critical edges. */ |
3488 | if (e->flags & EDGE_COMPLEX) | |
3489 | continue; | |
3490 | if (e->dest->index == EXIT_BLOCK_PTR_FOR_FN (cfun)->index) | |
3491 | continue; | |
387cd9d3 JZZ |
3492 | |
3493 | /* If there is nothing to propagate, just skip it. */ | |
3494 | if (!local_dem.valid_or_dirty_p ()) | |
3495 | continue; | |
ec99ffab | 3496 | if (local_dem.available_p (prop)) |
4f673c5e JZZ |
3497 | continue; |
3498 | if (!local_dem.compatible_p (prop)) | |
3499 | continue; | |
ec99ffab JZZ |
3500 | if (propagate_avl_across_demands_p (prop, local_dem)) |
3501 | continue; | |
387cd9d3 | 3502 | |
4f673c5e JZZ |
3503 | vector_insn_info new_info = local_dem.merge (prop, GLOBAL_MERGE); |
3504 | new_info.set_insn (local_dem.get_insn ()); | |
3505 | if (local_dem.dirty_p ()) | |
387cd9d3 | 3506 | { |
4f673c5e | 3507 | gcc_assert (local_dem == reaching_out); |
6b6b9c68 | 3508 | new_info.set_dirty (local_dem.dirty_with_killed_avl_p ()); |
4f673c5e | 3509 | local_dem = new_info; |
4f673c5e JZZ |
3510 | reaching_out = local_dem; |
3511 | } | |
3512 | else | |
3513 | { | |
3514 | if (reaching_out == local_dem) | |
3515 | reaching_out = new_info; | |
3516 | local_dem = new_info; | |
3517 | change_vsetvl_insn (local_dem.get_insn (), new_info); | |
387cd9d3 | 3518 | } |
4f673c5e JZZ |
3519 | auto &prob |
3520 | = m_vector_manager->vector_block_infos[e->dest->index].probability; | |
3521 | auto &curr_prob | |
3522 | = m_vector_manager->vector_block_infos[cfg_bb->index].probability; | |
3523 | prob = curr_prob * e->probability; | |
3524 | changed_p = true; | |
387cd9d3 JZZ |
3525 | } |
3526 | } | |
3527 | return changed_p; | |
3528 | } | |
3529 | ||
3530 | void | |
3531 | pass_vsetvl::demand_fusion (void) | |
3532 | { | |
3533 | bool changed_p = true; | |
3534 | while (changed_p) | |
3535 | { | |
3536 | changed_p = false; | |
4f673c5e JZZ |
3537 | /* To optimize the case like this: |
3538 | void f2 (int8_t * restrict in, int8_t * restrict out, int n, int cond) | |
3539 | { | |
3540 | size_t vl = 101; | |
3541 | ||
3542 | for (size_t i = 0; i < n; i++) | |
3543 | { | |
3544 | vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl); | |
3545 | __riscv_vse8_v_i8mf8 (out + i + 300, v, vl); | |
3546 | } | |
3547 | ||
3548 | for (size_t i = 0; i < n; i++) | |
3549 | { | |
3550 | vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl); | |
3551 | __riscv_vse8_v_i8mf8 (out + i, v, vl); | |
3552 | ||
3553 | vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl); | |
3554 | __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl); | |
3555 | } | |
3556 | } | |
3557 | ||
3558 | bb 0: li a5, 101 (killed avl) | |
3559 | ... | |
3560 | bb 1: vsetvli zero, a5, ta | |
3561 | ... | |
3562 | bb 2: li a5, 101 (killed avl) | |
3563 | ... | |
3564 | bb 3: vsetvli zero, a3, tu | |
3565 | ||
3566 | We want to fuse VSEVLI instructions on bb 1 and bb 3. However, there is | |
3567 | an AVL kill instruction in bb 2 that we can't backward fuse bb 3 or | |
3568 | forward bb 1 arbitrarily. We need available information of each block to | |
3569 | help for such cases. */ | |
6b6b9c68 JZZ |
3570 | changed_p |= backward_demand_fusion (); |
3571 | changed_p |= forward_demand_fusion (); | |
3572 | } | |
3573 | ||
3574 | changed_p = true; | |
3575 | while (changed_p) | |
3576 | { | |
3577 | changed_p = false; | |
3578 | prune_expressions (); | |
3579 | m_vector_manager->create_bitmap_vectors (); | |
3580 | compute_local_properties (); | |
4f673c5e JZZ |
3581 | compute_available (m_vector_manager->vector_comp, |
3582 | m_vector_manager->vector_kill, | |
3583 | m_vector_manager->vector_avout, | |
3584 | m_vector_manager->vector_avin); | |
6b6b9c68 | 3585 | changed_p |= cleanup_illegal_dirty_blocks (); |
4f673c5e JZZ |
3586 | m_vector_manager->free_bitmap_vectors (); |
3587 | if (!m_vector_manager->vector_exprs.is_empty ()) | |
3588 | m_vector_manager->vector_exprs.release (); | |
387cd9d3 | 3589 | } |
9243c3d1 JZZ |
3590 | |
3591 | if (dump_file) | |
3592 | { | |
3593 | fprintf (dump_file, "\n\nDirty blocks list: "); | |
681a5632 JZZ |
3594 | for (const bb_info *bb : crtl->ssa->bbs ()) |
3595 | if (m_vector_manager->vector_block_infos[bb->index ()] | |
3596 | .reaching_out.dirty_p ()) | |
3597 | fprintf (dump_file, "%d ", bb->index ()); | |
9243c3d1 JZZ |
3598 | fprintf (dump_file, "\n\n"); |
3599 | } | |
3600 | } | |
3601 | ||
6b6b9c68 JZZ |
3602 | /* Cleanup illegal dirty blocks. */ |
3603 | bool | |
3604 | pass_vsetvl::cleanup_illegal_dirty_blocks (void) | |
3605 | { | |
3606 | bool changed_p = false; | |
3607 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3608 | { | |
3609 | basic_block cfg_bb = bb->cfg_bb (); | |
3610 | const auto &prop | |
3611 | = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; | |
3612 | ||
3613 | /* If there is nothing to cleanup, just skip it. */ | |
3614 | if (!prop.valid_or_dirty_p ()) | |
3615 | continue; | |
3616 | ||
3617 | if (hard_empty_block_p (bb, prop)) | |
3618 | { | |
3619 | m_vector_manager->vector_block_infos[cfg_bb->index].local_dem | |
3620 | = vector_insn_info::get_hard_empty (); | |
3621 | m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out | |
3622 | = vector_insn_info::get_hard_empty (); | |
3623 | changed_p = true; | |
3624 | continue; | |
3625 | } | |
3626 | } | |
3627 | return changed_p; | |
3628 | } | |
3629 | ||
9243c3d1 JZZ |
3630 | /* Assemble the candidates expressions for LCM. */ |
3631 | void | |
3632 | pass_vsetvl::prune_expressions (void) | |
3633 | { | |
681a5632 | 3634 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 3635 | { |
681a5632 JZZ |
3636 | if (m_vector_manager->vector_block_infos[bb->index ()] |
3637 | .local_dem.valid_or_dirty_p ()) | |
9243c3d1 | 3638 | m_vector_manager->create_expr ( |
681a5632 JZZ |
3639 | m_vector_manager->vector_block_infos[bb->index ()].local_dem); |
3640 | if (m_vector_manager->vector_block_infos[bb->index ()] | |
9243c3d1 JZZ |
3641 | .reaching_out.valid_or_dirty_p ()) |
3642 | m_vector_manager->create_expr ( | |
681a5632 | 3643 | m_vector_manager->vector_block_infos[bb->index ()].reaching_out); |
9243c3d1 JZZ |
3644 | } |
3645 | ||
3646 | if (dump_file) | |
3647 | { | |
3648 | fprintf (dump_file, "\nThe total VSETVL expression num = %d\n", | |
3649 | m_vector_manager->vector_exprs.length ()); | |
3650 | fprintf (dump_file, "Expression List:\n"); | |
3651 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
3652 | { | |
3653 | fprintf (dump_file, "Expr[%ld]:\n", i); | |
3654 | m_vector_manager->vector_exprs[i]->dump (dump_file); | |
3655 | fprintf (dump_file, "\n"); | |
3656 | } | |
3657 | } | |
3658 | } | |
3659 | ||
4f673c5e JZZ |
3660 | /* Compute the local properties of each recorded expression. |
3661 | ||
3662 | Local properties are those that are defined by the block, irrespective of | |
3663 | other blocks. | |
3664 | ||
3665 | An expression is transparent in a block if its operands are not modified | |
3666 | in the block. | |
3667 | ||
3668 | An expression is computed (locally available) in a block if it is computed | |
3669 | at least once and expression would contain the same value if the | |
3670 | computation was moved to the end of the block. | |
3671 | ||
3672 | An expression is locally anticipatable in a block if it is computed at | |
3673 | least once and expression would contain the same value if the computation | |
3674 | was moved to the beginning of the block. */ | |
9243c3d1 JZZ |
3675 | void |
3676 | pass_vsetvl::compute_local_properties (void) | |
3677 | { | |
3678 | /* - If T is locally available at the end of a block, then T' must be | |
3679 | available at the end of the same block. Since some optimization has | |
3680 | occurred earlier, T' might not be locally available, however, it must | |
3681 | have been previously computed on all paths. As a formula, T at AVLOC(B) | |
3682 | implies that T' at AVOUT(B). | |
3683 | An "available occurrence" is one that is the last occurrence in the | |
3684 | basic block and the operands are not modified by following statements in | |
3685 | the basic block [including this insn]. | |
3686 | ||
3687 | - If T is locally anticipated at the beginning of a block, then either | |
3688 | T', is locally anticipated or it is already available from previous | |
3689 | blocks. As a formula, this means that T at ANTLOC(B) implies that T' at | |
3690 | ANTLOC(B) at AVIN(B). | |
3691 | An "anticipatable occurrence" is one that is the first occurrence in the | |
3692 | basic block, the operands are not modified in the basic block prior | |
3693 | to the occurrence and the output is not used between the start of | |
3694 | the block and the occurrence. */ | |
3695 | ||
3696 | basic_block cfg_bb; | |
4f673c5e | 3697 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 3698 | { |
4f673c5e | 3699 | unsigned int curr_bb_idx = bb->index (); |
9243c3d1 JZZ |
3700 | const auto local_dem |
3701 | = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem; | |
3702 | const auto reaching_out | |
3703 | = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out; | |
3704 | ||
4f673c5e JZZ |
3705 | /* Compute transparent. */ |
3706 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
9243c3d1 | 3707 | { |
4f673c5e JZZ |
3708 | const vector_insn_info *expr = m_vector_manager->vector_exprs[i]; |
3709 | if (local_dem.real_dirty_p () || local_dem.valid_p () | |
3710 | || local_dem.unknown_p () | |
3711 | || has_vsetvl_killed_avl_p (bb, local_dem)) | |
9243c3d1 | 3712 | bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i); |
4f673c5e JZZ |
3713 | /* FIXME: Here we set the block as non-transparent (killed) if there |
3714 | is an instruction killed the value of AVL according to the | |
3715 | definition of Local transparent. This is true for such following | |
3716 | case: | |
3717 | ||
3718 | bb 0 (Loop label): | |
3719 | vsetvl zero, a5, e8, mf8 | |
3720 | bb 1: | |
3721 | def a5 | |
3722 | bb 2: | |
3723 | branch bb 0 (Loop label). | |
3724 | ||
3725 | In this case, we known there is a loop bb 0->bb 1->bb 2. According | |
3726 | to LCM definition, it is correct when we set vsetvl zero, a5, e8, | |
3727 | mf8 as non-transparent (killed) so that LCM will not hoist outside | |
3728 | the bb 0. | |
3729 | ||
3730 | However, such conservative configuration will forbid optimization | |
3731 | on some unlucky case. For example: | |
3732 | ||
3733 | bb 0: | |
3734 | li a5, 101 | |
3735 | bb 1: | |
3736 | vsetvl zero, a5, e8, mf8 | |
3737 | bb 2: | |
3738 | li a5, 101 | |
3739 | bb 3: | |
3740 | vsetvl zero, a5, e8, mf8. | |
3741 | So we also relax def a5 as transparent to gain more optimizations | |
3742 | as long as the all real def insn of avl do not come from this | |
3743 | block. This configuration may be still missing some optimization | |
3744 | opportunities. */ | |
6b6b9c68 | 3745 | if (find_reg_killed_by (bb, expr->get_avl ())) |
4f673c5e | 3746 | { |
6b6b9c68 JZZ |
3747 | hash_set<set_info *> sets |
3748 | = get_all_sets (expr->get_avl_source (), true, false, false); | |
3749 | if (any_set_in_bb_p (sets, bb)) | |
4f673c5e JZZ |
3750 | bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], |
3751 | i); | |
3752 | } | |
9243c3d1 JZZ |
3753 | } |
3754 | ||
4f673c5e | 3755 | /* Compute anticipatable occurrences. */ |
6b6b9c68 JZZ |
3756 | if (local_dem.valid_p () || local_dem.real_dirty_p () |
3757 | || (has_vsetvl_killed_avl_p (bb, local_dem) | |
3758 | && vlmax_avl_p (local_dem.get_avl ()))) | |
4f673c5e JZZ |
3759 | if (anticipatable_occurrence_p (bb, local_dem)) |
3760 | bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx], | |
3761 | m_vector_manager->get_expr_id (local_dem)); | |
9243c3d1 | 3762 | |
4f673c5e | 3763 | /* Compute available occurrences. */ |
9243c3d1 JZZ |
3764 | if (reaching_out.valid_or_dirty_p ()) |
3765 | { | |
9243c3d1 JZZ |
3766 | auto_vec<size_t> available_list |
3767 | = m_vector_manager->get_all_available_exprs (reaching_out); | |
3768 | for (size_t i = 0; i < available_list.length (); i++) | |
4f673c5e JZZ |
3769 | { |
3770 | const vector_insn_info *expr | |
3771 | = m_vector_manager->vector_exprs[available_list[i]]; | |
3772 | if (reaching_out.real_dirty_p () | |
3773 | || has_vsetvl_killed_avl_p (bb, reaching_out) | |
3774 | || available_occurrence_p (bb, *expr)) | |
3775 | bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx], | |
3776 | available_list[i]); | |
3777 | } | |
9243c3d1 JZZ |
3778 | } |
3779 | } | |
3780 | ||
3781 | /* Compute kill for each basic block using: | |
3782 | ||
3783 | ~(TRANSP | COMP) | |
3784 | */ | |
3785 | ||
3786 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3787 | { | |
3788 | bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index], | |
3789 | m_vector_manager->vector_transp[cfg_bb->index], | |
3790 | m_vector_manager->vector_comp[cfg_bb->index]); | |
3791 | bitmap_not (m_vector_manager->vector_kill[cfg_bb->index], | |
3792 | m_vector_manager->vector_kill[cfg_bb->index]); | |
3793 | } | |
3794 | ||
3795 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3796 | { | |
3797 | edge e; | |
3798 | edge_iterator ei; | |
3799 | ||
3800 | /* If the current block is the destination of an abnormal edge, we | |
3801 | kill all trapping (for PRE) and memory (for hoist) expressions | |
3802 | because we won't be able to properly place the instruction on | |
3803 | the edge. So make them neither anticipatable nor transparent. | |
3804 | This is fairly conservative. | |
3805 | ||
3806 | ??? For hoisting it may be necessary to check for set-and-jump | |
3807 | instructions here, not just for abnormal edges. The general problem | |
3808 | is that when an expression cannot not be placed right at the end of | |
3809 | a basic block we should account for any side-effects of a subsequent | |
3810 | jump instructions that could clobber the expression. It would | |
3811 | be best to implement this check along the lines of | |
3812 | should_hoist_expr_to_dom where the target block is already known | |
3813 | and, hence, there's no need to conservatively prune expressions on | |
3814 | "intermediate" set-and-jump instructions. */ | |
3815 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
3816 | if (e->flags & EDGE_COMPLEX) | |
3817 | { | |
3818 | bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]); | |
3819 | bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]); | |
3820 | } | |
3821 | } | |
3822 | } | |
3823 | ||
3824 | /* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */ | |
3825 | bool | |
6b6b9c68 JZZ |
3826 | pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb, |
3827 | const vector_insn_info &info) const | |
9243c3d1 JZZ |
3828 | { |
3829 | if (!m_vector_manager->all_same_ratio_p ( | |
3830 | m_vector_manager->vector_avin[cfg_bb->index])) | |
3831 | return false; | |
3832 | ||
005fad9d JZZ |
3833 | if (!m_vector_manager->all_same_avl_p ( |
3834 | cfg_bb, m_vector_manager->vector_avin[cfg_bb->index])) | |
3835 | return false; | |
3836 | ||
9243c3d1 JZZ |
3837 | size_t expr_id |
3838 | = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]); | |
6b6b9c68 JZZ |
3839 | if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info)) |
3840 | return false; | |
3841 | if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info)) | |
9243c3d1 JZZ |
3842 | return false; |
3843 | ||
3844 | edge e; | |
3845 | edge_iterator ei; | |
3846 | bool all_valid_p = true; | |
3847 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
3848 | { | |
3849 | if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index])) | |
3850 | { | |
3851 | all_valid_p = false; | |
3852 | break; | |
3853 | } | |
3854 | } | |
3855 | ||
3856 | if (!all_valid_p) | |
3857 | return false; | |
3858 | return true; | |
3859 | } | |
3860 | ||
3861 | /* Optimize athe case like this: | |
3862 | ||
3863 | bb 0: | |
3864 | vsetvl 0 a5,zero,e8,mf8 | |
3865 | insn 0 (demand SEW + LMUL) | |
3866 | bb 1: | |
3867 | vsetvl 1 a5,zero,e16,mf4 | |
3868 | insn 1 (demand SEW + LMUL) | |
3869 | ||
3870 | In this case, we should be able to refine | |
3871 | vsetvl 1 into vsetvl zero, zero according AVIN. */ | |
3872 | void | |
3873 | pass_vsetvl::refine_vsetvls (void) const | |
3874 | { | |
3875 | basic_block cfg_bb; | |
3876 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3877 | { | |
c139f5e1 | 3878 | auto info = get_block_info(cfg_bb).local_dem; |
9243c3d1 JZZ |
3879 | insn_info *insn = info.get_insn (); |
3880 | if (!info.valid_p ()) | |
3881 | continue; | |
3882 | ||
3883 | rtx_insn *rinsn = insn->rtl (); | |
6b6b9c68 | 3884 | if (!can_refine_vsetvl_p (cfg_bb, info)) |
9243c3d1 JZZ |
3885 | continue; |
3886 | ||
ec99ffab JZZ |
3887 | /* We can't refine user vsetvl into vsetvl zero,zero since the dest |
3888 | will be used by the following instructions. */ | |
3889 | if (vector_config_insn_p (rinsn)) | |
3890 | { | |
3891 | m_vector_manager->to_refine_vsetvls.add (rinsn); | |
3892 | continue; | |
3893 | } | |
ff8f9544 JZ |
3894 | |
3895 | /* If all incoming edges to a block have a vector state that is compatbile | |
3896 | with the block. In such a case we need not emit a vsetvl in the current | |
3897 | block. */ | |
3898 | ||
3899 | gcc_assert (has_vtype_op (insn->rtl ())); | |
3900 | rinsn = PREV_INSN (insn->rtl ()); | |
3901 | gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ()))); | |
3902 | if (m_vector_manager->all_avail_in_compatible_p (cfg_bb)) | |
3903 | { | |
3904 | size_t id = m_vector_manager->get_expr_id (info); | |
3905 | if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id)) | |
3906 | continue; | |
3907 | eliminate_insn (rinsn); | |
3908 | } | |
3909 | else | |
3910 | { | |
3911 | rtx new_pat | |
3912 | = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX); | |
3913 | change_insn (rinsn, new_pat); | |
3914 | } | |
9243c3d1 JZZ |
3915 | } |
3916 | } | |
3917 | ||
3918 | void | |
3919 | pass_vsetvl::cleanup_vsetvls () | |
3920 | { | |
3921 | basic_block cfg_bb; | |
3922 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3923 | { | |
3924 | auto &info | |
c139f5e1 | 3925 | = get_block_info(cfg_bb).reaching_out; |
9243c3d1 JZZ |
3926 | gcc_assert (m_vector_manager->expr_set_num ( |
3927 | m_vector_manager->vector_del[cfg_bb->index]) | |
3928 | <= 1); | |
3929 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
3930 | { | |
3931 | if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i)) | |
3932 | { | |
3933 | if (info.dirty_p ()) | |
3934 | info.set_unknown (); | |
3935 | else | |
3936 | { | |
4f673c5e | 3937 | const auto dem |
c139f5e1 | 3938 | = get_block_info(cfg_bb) |
4f673c5e JZZ |
3939 | .local_dem; |
3940 | gcc_assert (dem == *m_vector_manager->vector_exprs[i]); | |
3941 | insn_info *insn = dem.get_insn (); | |
9243c3d1 JZZ |
3942 | gcc_assert (insn && insn->rtl ()); |
3943 | rtx_insn *rinsn; | |
ec99ffab JZZ |
3944 | /* We can't eliminate user vsetvl since the dest will be used |
3945 | * by the following instructions. */ | |
9243c3d1 | 3946 | if (vector_config_insn_p (insn->rtl ())) |
9243c3d1 | 3947 | { |
ec99ffab JZZ |
3948 | m_vector_manager->to_delete_vsetvls.add (insn->rtl ()); |
3949 | continue; | |
9243c3d1 | 3950 | } |
ec99ffab JZZ |
3951 | |
3952 | gcc_assert (has_vtype_op (insn->rtl ())); | |
3953 | rinsn = PREV_INSN (insn->rtl ()); | |
3954 | gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ()))); | |
9243c3d1 JZZ |
3955 | eliminate_insn (rinsn); |
3956 | } | |
3957 | } | |
3958 | } | |
3959 | } | |
3960 | } | |
3961 | ||
3962 | bool | |
3963 | pass_vsetvl::commit_vsetvls (void) | |
3964 | { | |
3965 | bool need_commit = false; | |
3966 | ||
3967 | for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++) | |
3968 | { | |
3969 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
3970 | { | |
3971 | edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed); | |
3972 | if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i)) | |
3973 | { | |
3974 | const vector_insn_info *require | |
3975 | = m_vector_manager->vector_exprs[i]; | |
3976 | gcc_assert (require->valid_or_dirty_p ()); | |
3977 | rtl_profile_for_edge (eg); | |
3978 | start_sequence (); | |
3979 | ||
3980 | insn_info *insn = require->get_insn (); | |
3981 | vector_insn_info prev_info = vector_insn_info (); | |
005fad9d JZZ |
3982 | sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index]; |
3983 | if (m_vector_manager->all_same_ratio_p (bitdata) | |
3984 | && m_vector_manager->all_same_avl_p (eg->dest, bitdata)) | |
9243c3d1 | 3985 | { |
005fad9d | 3986 | size_t first = bitmap_first_set_bit (bitdata); |
9243c3d1 JZZ |
3987 | prev_info = *m_vector_manager->vector_exprs[first]; |
3988 | } | |
3989 | ||
3990 | insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info); | |
3991 | rtx_insn *rinsn = get_insns (); | |
3992 | end_sequence (); | |
3993 | default_rtl_profile (); | |
3994 | ||
3995 | /* We should not get an abnormal edge here. */ | |
3996 | gcc_assert (!(eg->flags & EDGE_ABNORMAL)); | |
3997 | need_commit = true; | |
3998 | insert_insn_on_edge (rinsn, eg); | |
3999 | } | |
4000 | } | |
4001 | } | |
4002 | ||
4f673c5e | 4003 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 4004 | { |
4f673c5e | 4005 | basic_block cfg_bb = bb->cfg_bb (); |
9243c3d1 | 4006 | const auto reaching_out |
c139f5e1 | 4007 | = get_block_info(cfg_bb).reaching_out; |
9243c3d1 JZZ |
4008 | if (!reaching_out.dirty_p ()) |
4009 | continue; | |
4010 | ||
4f673c5e JZZ |
4011 | if (reaching_out.dirty_with_killed_avl_p ()) |
4012 | { | |
4013 | if (!has_vsetvl_killed_avl_p (bb, reaching_out)) | |
4014 | continue; | |
4015 | ||
4016 | unsigned int bb_index; | |
4017 | sbitmap_iterator sbi; | |
4018 | sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index]; | |
4019 | bool available_p = false; | |
4020 | EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi) | |
4021 | { | |
ec99ffab JZZ |
4022 | if (m_vector_manager->vector_exprs[bb_index]->available_p ( |
4023 | reaching_out)) | |
4f673c5e JZZ |
4024 | { |
4025 | available_p = true; | |
4026 | break; | |
4027 | } | |
4028 | } | |
4029 | if (available_p) | |
4030 | continue; | |
4031 | } | |
7ae4d1df JZZ |
4032 | |
4033 | rtx new_pat; | |
ec99ffab JZZ |
4034 | if (!reaching_out.demand_p (DEMAND_AVL)) |
4035 | { | |
4036 | vl_vtype_info new_info = reaching_out; | |
4037 | new_info.set_avl_info (avl_info (const0_rtx, nullptr)); | |
4038 | new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX); | |
4039 | } | |
4040 | else if (can_refine_vsetvl_p (cfg_bb, reaching_out)) | |
9243c3d1 JZZ |
4041 | new_pat |
4042 | = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX); | |
7ae4d1df JZZ |
4043 | else if (vlmax_avl_p (reaching_out.get_avl ())) |
4044 | new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out, | |
ec99ffab | 4045 | reaching_out.get_avl_reg_rtx ()); |
7ae4d1df JZZ |
4046 | else |
4047 | new_pat | |
4048 | = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX); | |
9243c3d1 JZZ |
4049 | |
4050 | start_sequence (); | |
4051 | emit_insn (new_pat); | |
4052 | rtx_insn *rinsn = get_insns (); | |
4053 | end_sequence (); | |
4054 | insert_insn_end_basic_block (rinsn, cfg_bb); | |
4055 | if (dump_file) | |
4056 | { | |
4057 | fprintf (dump_file, | |
4058 | "\nInsert vsetvl insn %d at the end of <bb %d>:\n", | |
4059 | INSN_UID (rinsn), cfg_bb->index); | |
4060 | print_rtl_single (dump_file, rinsn); | |
4061 | } | |
4062 | } | |
4063 | ||
4064 | return need_commit; | |
4065 | } | |
4066 | ||
4067 | void | |
4068 | pass_vsetvl::pre_vsetvl (void) | |
4069 | { | |
4070 | /* Compute entity list. */ | |
4071 | prune_expressions (); | |
4072 | ||
cfe3fbc6 | 4073 | m_vector_manager->create_bitmap_vectors (); |
9243c3d1 JZZ |
4074 | compute_local_properties (); |
4075 | m_vector_manager->vector_edge_list = pre_edge_lcm_avs ( | |
4076 | m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp, | |
4077 | m_vector_manager->vector_comp, m_vector_manager->vector_antic, | |
4078 | m_vector_manager->vector_kill, m_vector_manager->vector_avin, | |
4079 | m_vector_manager->vector_avout, &m_vector_manager->vector_insert, | |
4080 | &m_vector_manager->vector_del); | |
4081 | ||
4082 | /* We should dump the information before CFG is changed. Otherwise it will | |
4083 | produce ICE (internal compiler error). */ | |
4084 | if (dump_file) | |
4085 | m_vector_manager->dump (dump_file); | |
4086 | ||
4087 | refine_vsetvls (); | |
4088 | cleanup_vsetvls (); | |
4089 | bool need_commit = commit_vsetvls (); | |
4090 | if (need_commit) | |
4091 | commit_edge_insertions (); | |
4092 | } | |
4093 | ||
c919d059 KC |
4094 | /* Some instruction can not be accessed in RTL_SSA when we don't re-init |
4095 | the new RTL_SSA framework but it is definetely at the END of the block. | |
4096 | ||
4097 | Here we optimize the VSETVL is hoisted by LCM: | |
4098 | ||
4099 | Before LCM: | |
4100 | bb 1: | |
4101 | vsetvli a5,a2,e32,m1,ta,mu | |
4102 | bb 2: | |
4103 | vsetvli zero,a5,e32,m1,ta,mu | |
4104 | ... | |
4105 | ||
4106 | After LCM: | |
4107 | bb 1: | |
4108 | vsetvli a5,a2,e32,m1,ta,mu | |
4109 | LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate | |
4110 | bb 2: | |
4111 | ... | |
4112 | */ | |
4113 | rtx_insn * | |
4114 | pass_vsetvl::get_vsetvl_at_end (const bb_info *bb, vector_insn_info *dem) const | |
4115 | { | |
4116 | rtx_insn *end_vsetvl = BB_END (bb->cfg_bb ()); | |
4117 | if (end_vsetvl && NONDEBUG_INSN_P (end_vsetvl)) | |
4118 | { | |
4119 | if (JUMP_P (end_vsetvl)) | |
4120 | end_vsetvl = PREV_INSN (end_vsetvl); | |
4121 | ||
4122 | if (NONDEBUG_INSN_P (end_vsetvl) | |
4123 | && vsetvl_discard_result_insn_p (end_vsetvl)) | |
4124 | { | |
4125 | /* Only handle single succ. here, multiple succ. is much | |
4126 | more complicated. */ | |
4127 | if (single_succ_p (bb->cfg_bb ())) | |
4128 | { | |
4129 | edge e = single_succ_edge (bb->cfg_bb ()); | |
4130 | *dem = get_block_info (e->dest).local_dem; | |
4131 | return end_vsetvl; | |
4132 | } | |
4133 | } | |
4134 | } | |
4135 | return nullptr; | |
4136 | } | |
4137 | ||
4138 | /* This predicator should only used within same basic block. */ | |
4139 | static bool | |
4140 | local_avl_compatible_p (rtx avl1, rtx avl2) | |
4141 | { | |
4142 | if (!REG_P (avl1) || !REG_P (avl2)) | |
4143 | return false; | |
4144 | ||
4145 | return REGNO (avl1) == REGNO (avl2); | |
4146 | } | |
4147 | ||
8421f279 JZ |
4148 | /* Local user vsetvl optimizaiton: |
4149 | ||
4150 | Case 1: | |
4151 | vsetvl a5,a4,e8,mf8 | |
4152 | ... | |
4153 | vsetvl zero,a5,e8,mf8 --> Eliminate directly. | |
4154 | ||
4155 | Case 2: | |
4156 | vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2 | |
4157 | ... | |
4158 | vsetvl zero,a5,e32,mf2 --> Eliminate directly. */ | |
4159 | void | |
c919d059 KC |
4160 | pass_vsetvl::local_eliminate_vsetvl_insn (const bb_info *bb) const |
4161 | { | |
4162 | rtx_insn *prev_vsetvl = nullptr; | |
4163 | rtx_insn *curr_vsetvl = nullptr; | |
4164 | rtx vl_placeholder = RVV_VLMAX; | |
4165 | rtx prev_avl = vl_placeholder; | |
4166 | rtx curr_avl = vl_placeholder; | |
4167 | vector_insn_info prev_dem; | |
4168 | ||
4169 | /* Instruction inserted by LCM is not appeared in RTL-SSA yet, try to | |
4170 | found those instruciton. */ | |
4171 | if (rtx_insn *end_vsetvl = get_vsetvl_at_end (bb, &prev_dem)) | |
8421f279 | 4172 | { |
c919d059 KC |
4173 | prev_avl = get_avl (end_vsetvl); |
4174 | prev_vsetvl = end_vsetvl; | |
4175 | } | |
4176 | ||
4177 | bool skip_one = false; | |
4178 | /* Backward propgate vsetvl info, drop the later one (prev_vsetvl) if it's | |
4179 | compatible with current vsetvl (curr_avl), and merge the vtype and avl | |
4180 | info. into current vsetvl. */ | |
4181 | for (insn_info *insn : bb->reverse_real_nondebug_insns ()) | |
4182 | { | |
4183 | rtx_insn *rinsn = insn->rtl (); | |
4184 | const auto &curr_dem = get_vector_info (insn); | |
4185 | bool need_invalidate = false; | |
4186 | ||
4187 | /* Skip if this insn already handled in last iteration. */ | |
4188 | if (skip_one) | |
4189 | { | |
4190 | skip_one = false; | |
4191 | continue; | |
4192 | } | |
4193 | ||
4194 | if (vsetvl_insn_p (rinsn)) | |
4195 | { | |
4196 | curr_vsetvl = rinsn; | |
4197 | /* vsetvl are using vl rather than avl since it will try to merge | |
4198 | with other vsetvl_discard_result. | |
4199 | ||
4200 | v--- avl | |
4201 | vsetvl a5,a4,e8,mf8 # vsetvl | |
4202 | ... ^--- vl | |
4203 | vsetvl zero,a5,e8,mf8 # vsetvl_discard_result | |
4204 | ^--- avl | |
4205 | */ | |
4206 | curr_avl = get_vl (rinsn); | |
4207 | /* vsetvl is a cut point of local backward vsetvl elimination. */ | |
4208 | need_invalidate = true; | |
4209 | } | |
4210 | else if (has_vtype_op (rinsn) && NONDEBUG_INSN_P (PREV_INSN (rinsn)) | |
4211 | && (vsetvl_discard_result_insn_p (PREV_INSN (rinsn)) | |
4212 | || vsetvl_insn_p (PREV_INSN (rinsn)))) | |
8421f279 | 4213 | { |
c919d059 | 4214 | curr_vsetvl = PREV_INSN (rinsn); |
8421f279 | 4215 | |
c919d059 | 4216 | if (vsetvl_insn_p (PREV_INSN (rinsn))) |
8421f279 | 4217 | { |
c919d059 KC |
4218 | /* Need invalidate and skip if it's vsetvl. */ |
4219 | need_invalidate = true; | |
4220 | /* vsetvl_discard_result_insn_p won't appeared in RTL-SSA, | |
4221 | * so only need to skip for vsetvl. */ | |
4222 | skip_one = true; | |
8421f279 | 4223 | } |
c919d059 KC |
4224 | |
4225 | curr_avl = get_avl (rinsn); | |
4226 | ||
4227 | /* Some instrucion like pred_extract_first<mode> don't reqruie avl, so | |
4228 | the avl is null, use vl_placeholder for unify the handling | |
4229 | logic. */ | |
4230 | if (!curr_avl) | |
4231 | curr_avl = vl_placeholder; | |
4232 | } | |
4233 | else if (insn->is_call () || insn->is_asm () | |
4234 | || find_access (insn->defs (), VL_REGNUM) | |
4235 | || find_access (insn->defs (), VTYPE_REGNUM) | |
4236 | || (REG_P (prev_avl) | |
4237 | && find_access (insn->defs (), REGNO (prev_avl)))) | |
4238 | { | |
4239 | /* Invalidate if this insn can't propagate vl, vtype or avl. */ | |
4240 | need_invalidate = true; | |
4241 | prev_dem = vector_insn_info (); | |
4242 | } | |
4243 | else | |
4244 | /* Not interested instruction. */ | |
4245 | continue; | |
4246 | ||
4247 | /* Local AVL compatibility checking is simpler than global, we only | |
4248 | need to check the REGNO is same. */ | |
4249 | if (prev_dem.valid_p () && prev_dem.skip_avl_compatible_p (curr_dem) | |
4250 | && local_avl_compatible_p (prev_avl, curr_avl)) | |
4251 | { | |
4252 | /* curr_dem and prev_dem is compatible! */ | |
4253 | /* Update avl info since we need to make sure they are fully | |
4254 | compatible before merge. */ | |
4255 | prev_dem.set_avl_info (curr_dem.get_avl_info ()); | |
4256 | /* Merge both and update into curr_vsetvl. */ | |
4257 | prev_dem = curr_dem.merge (prev_dem, LOCAL_MERGE); | |
4258 | change_vsetvl_insn (curr_dem.get_insn (), prev_dem); | |
4259 | /* Then we can drop prev_vsetvl. */ | |
4260 | eliminate_insn (prev_vsetvl); | |
4261 | } | |
4262 | ||
4263 | if (need_invalidate) | |
4264 | { | |
4265 | prev_vsetvl = nullptr; | |
4266 | curr_vsetvl = nullptr; | |
4267 | prev_avl = vl_placeholder; | |
4268 | curr_avl = vl_placeholder; | |
4269 | prev_dem = vector_insn_info (); | |
4270 | } | |
4271 | else | |
4272 | { | |
4273 | prev_vsetvl = curr_vsetvl; | |
4274 | prev_avl = curr_avl; | |
4275 | prev_dem = curr_dem; | |
8421f279 JZ |
4276 | } |
4277 | } | |
4278 | } | |
4279 | ||
c5a1fa59 JZ |
4280 | /* Before VSETVL PASS, RVV instructions pattern is depending on AVL operand |
4281 | implicitly. Since we will emit VSETVL instruction and make RVV instructions | |
4282 | depending on VL/VTYPE global status registers, we remove the such AVL operand | |
4283 | in the RVV instructions pattern here in order to remove AVL dependencies when | |
4284 | AVL operand is a register operand. | |
4285 | ||
4286 | Before the VSETVL PASS: | |
4287 | li a5,32 | |
4288 | ... | |
4289 | vadd.vv (..., a5) | |
4290 | After the VSETVL PASS: | |
4291 | li a5,32 | |
4292 | vsetvli zero, a5, ... | |
4293 | ... | |
4294 | vadd.vv (..., const_int 0). */ | |
9243c3d1 JZZ |
4295 | void |
4296 | pass_vsetvl::cleanup_insns (void) const | |
4297 | { | |
4298 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4299 | { | |
c919d059 | 4300 | local_eliminate_vsetvl_insn (bb); |
9243c3d1 JZZ |
4301 | for (insn_info *insn : bb->real_nondebug_insns ()) |
4302 | { | |
4303 | rtx_insn *rinsn = insn->rtl (); | |
9243c3d1 JZZ |
4304 | if (vlmax_avl_insn_p (rinsn)) |
4305 | { | |
4306 | eliminate_insn (rinsn); | |
4307 | continue; | |
4308 | } | |
4309 | ||
4310 | /* Erase the AVL operand from the instruction. */ | |
4311 | if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn))) | |
4312 | continue; | |
4313 | rtx avl = get_vl (rinsn); | |
a2d12abe | 4314 | if (count_regno_occurrences (rinsn, REGNO (avl)) == 1) |
9243c3d1 JZZ |
4315 | { |
4316 | /* Get the list of uses for the new instruction. */ | |
4317 | auto attempt = crtl->ssa->new_change_attempt (); | |
4318 | insn_change change (insn); | |
4319 | /* Remove the use of the substituted value. */ | |
4320 | access_array_builder uses_builder (attempt); | |
4321 | uses_builder.reserve (insn->num_uses () - 1); | |
4322 | for (use_info *use : insn->uses ()) | |
4323 | if (use != find_access (insn->uses (), REGNO (avl))) | |
4324 | uses_builder.quick_push (use); | |
4325 | use_array new_uses = use_array (uses_builder.finish ()); | |
4326 | change.new_uses = new_uses; | |
4327 | change.move_range = insn->ebb ()->insn_range (); | |
60bd33bc JZZ |
4328 | rtx pat; |
4329 | if (fault_first_load_p (rinsn)) | |
4330 | pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx); | |
4331 | else | |
4332 | { | |
4333 | rtx set = single_set (rinsn); | |
4334 | rtx src | |
4335 | = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx); | |
4336 | pat = gen_rtx_SET (SET_DEST (set), src); | |
4337 | } | |
1d339ce8 KC |
4338 | bool ok = change_insn (crtl->ssa, change, insn, pat); |
4339 | gcc_assert (ok); | |
9243c3d1 JZZ |
4340 | } |
4341 | } | |
4342 | } | |
4343 | } | |
4344 | ||
6b6b9c68 JZZ |
4345 | void |
4346 | pass_vsetvl::propagate_avl (void) const | |
4347 | { | |
4348 | /* Rebuild the RTL_SSA according to the new CFG generated by LCM. */ | |
4349 | /* Finalization of RTL_SSA. */ | |
4350 | free_dominance_info (CDI_DOMINATORS); | |
4351 | if (crtl->ssa->perform_pending_updates ()) | |
4352 | cleanup_cfg (0); | |
4353 | delete crtl->ssa; | |
4354 | crtl->ssa = nullptr; | |
4355 | /* Initialization of RTL_SSA. */ | |
4356 | calculate_dominance_info (CDI_DOMINATORS); | |
4357 | df_analyze (); | |
4358 | crtl->ssa = new function_info (cfun); | |
4359 | ||
4360 | hash_set<rtx_insn *> to_delete; | |
4361 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4362 | { | |
4363 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
4364 | { | |
4365 | if (vsetvl_discard_result_insn_p (insn->rtl ())) | |
4366 | { | |
4367 | rtx avl = get_avl (insn->rtl ()); | |
4368 | if (!REG_P (avl)) | |
4369 | continue; | |
4370 | ||
4371 | set_info *set = find_access (insn->uses (), REGNO (avl))->def (); | |
4372 | insn_info *def_insn = extract_single_source (set); | |
4373 | if (!def_insn) | |
4374 | continue; | |
4375 | ||
4376 | /* Handle this case: | |
4377 | vsetvli a6,zero,e32,m1,ta,mu | |
4378 | li a5,4096 | |
4379 | add a7,a0,a5 | |
4380 | addi a7,a7,-96 | |
4381 | vsetvli t1,zero,e8,mf8,ta,ma | |
4382 | vle8.v v24,0(a7) | |
4383 | add a5,a3,a5 | |
4384 | addi a5,a5,-96 | |
4385 | vse8.v v24,0(a5) | |
4386 | vsetvli zero,a6,e32,m1,tu,ma | |
4387 | */ | |
4388 | if (vsetvl_insn_p (def_insn->rtl ())) | |
4389 | { | |
4390 | vl_vtype_info def_info = get_vl_vtype_info (def_insn); | |
4391 | vl_vtype_info info = get_vl_vtype_info (insn); | |
4392 | rtx avl = get_avl (def_insn->rtl ()); | |
4393 | rtx vl = get_vl (def_insn->rtl ()); | |
4394 | if (def_info.get_ratio () == info.get_ratio ()) | |
4395 | { | |
4396 | if (vlmax_avl_p (def_info.get_avl ())) | |
4397 | { | |
4398 | info.set_avl_info ( | |
4399 | avl_info (def_info.get_avl (), nullptr)); | |
4400 | rtx new_pat | |
4401 | = gen_vsetvl_pat (VSETVL_NORMAL, info, vl); | |
4402 | validate_change (insn->rtl (), | |
4403 | &PATTERN (insn->rtl ()), new_pat, | |
4404 | false); | |
4405 | continue; | |
4406 | } | |
4407 | if (def_info.has_avl_imm () || rtx_equal_p (avl, vl)) | |
4408 | { | |
4409 | info.set_avl_info (avl_info (avl, nullptr)); | |
4410 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_AFTER, | |
4411 | info, NULL_RTX, insn->rtl ()); | |
4412 | if (set->single_nondebug_insn_use ()) | |
4413 | { | |
4414 | to_delete.add (insn->rtl ()); | |
4415 | to_delete.add (def_insn->rtl ()); | |
4416 | } | |
4417 | continue; | |
4418 | } | |
4419 | } | |
4420 | } | |
4421 | } | |
4422 | ||
4423 | /* Change vsetvl rd, rs1 --> vsevl zero, rs1, | |
4424 | if rd is not used by any nondebug instructions. | |
4425 | Even though this PASS runs after RA and it doesn't help for | |
4426 | reduce register pressure, it can help instructions scheduling | |
4427 | since we remove the dependencies. */ | |
4428 | if (vsetvl_insn_p (insn->rtl ())) | |
4429 | { | |
4430 | rtx vl = get_vl (insn->rtl ()); | |
4431 | rtx avl = get_avl (insn->rtl ()); | |
6b6b9c68 JZZ |
4432 | def_info *def = find_access (insn->defs (), REGNO (vl)); |
4433 | set_info *set = safe_dyn_cast<set_info *> (def); | |
ec99ffab JZZ |
4434 | vector_insn_info info; |
4435 | info.parse_insn (insn); | |
6b6b9c68 | 4436 | gcc_assert (set); |
ec99ffab JZZ |
4437 | if (m_vector_manager->to_delete_vsetvls.contains (insn->rtl ())) |
4438 | { | |
4439 | m_vector_manager->to_delete_vsetvls.remove (insn->rtl ()); | |
4440 | if (m_vector_manager->to_refine_vsetvls.contains ( | |
4441 | insn->rtl ())) | |
4442 | m_vector_manager->to_refine_vsetvls.remove (insn->rtl ()); | |
4443 | if (!set->has_nondebug_insn_uses ()) | |
4444 | { | |
4445 | to_delete.add (insn->rtl ()); | |
4446 | continue; | |
4447 | } | |
4448 | } | |
4449 | if (m_vector_manager->to_refine_vsetvls.contains (insn->rtl ())) | |
4450 | { | |
4451 | m_vector_manager->to_refine_vsetvls.remove (insn->rtl ()); | |
4452 | if (!set->has_nondebug_insn_uses ()) | |
4453 | { | |
4454 | rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, | |
4455 | info, NULL_RTX); | |
4456 | change_insn (insn->rtl (), new_pat); | |
4457 | continue; | |
4458 | } | |
4459 | } | |
4460 | if (vlmax_avl_p (avl)) | |
4461 | continue; | |
6b6b9c68 JZZ |
4462 | rtx new_pat |
4463 | = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX); | |
4464 | if (!set->has_nondebug_insn_uses ()) | |
4465 | { | |
4466 | validate_change (insn->rtl (), &PATTERN (insn->rtl ()), | |
4467 | new_pat, false); | |
4468 | continue; | |
4469 | } | |
4470 | } | |
4471 | } | |
4472 | } | |
4473 | ||
4474 | for (rtx_insn *rinsn : to_delete) | |
4475 | eliminate_insn (rinsn); | |
4476 | } | |
4477 | ||
9243c3d1 JZZ |
4478 | void |
4479 | pass_vsetvl::init (void) | |
4480 | { | |
4481 | if (optimize > 0) | |
4482 | { | |
4483 | /* Initialization of RTL_SSA. */ | |
4484 | calculate_dominance_info (CDI_DOMINATORS); | |
4485 | df_analyze (); | |
4486 | crtl->ssa = new function_info (cfun); | |
4487 | } | |
4488 | ||
4489 | m_vector_manager = new vector_infos_manager (); | |
4f673c5e | 4490 | compute_probabilities (); |
9243c3d1 JZZ |
4491 | |
4492 | if (dump_file) | |
4493 | { | |
4494 | fprintf (dump_file, "\nPrologue: Initialize vector infos\n"); | |
4495 | m_vector_manager->dump (dump_file); | |
4496 | } | |
4497 | } | |
4498 | ||
4499 | void | |
4500 | pass_vsetvl::done (void) | |
4501 | { | |
4502 | if (optimize > 0) | |
4503 | { | |
4504 | /* Finalization of RTL_SSA. */ | |
4505 | free_dominance_info (CDI_DOMINATORS); | |
4506 | if (crtl->ssa->perform_pending_updates ()) | |
4507 | cleanup_cfg (0); | |
4508 | delete crtl->ssa; | |
4509 | crtl->ssa = nullptr; | |
4510 | } | |
4511 | m_vector_manager->release (); | |
4512 | delete m_vector_manager; | |
4513 | m_vector_manager = nullptr; | |
4514 | } | |
4515 | ||
acc10c79 JZZ |
4516 | /* Compute probability for each block. */ |
4517 | void | |
4518 | pass_vsetvl::compute_probabilities (void) | |
4519 | { | |
4520 | /* Don't compute it in -O0 since we don't need it. */ | |
4521 | if (!optimize) | |
4522 | return; | |
4523 | edge e; | |
4524 | edge_iterator ei; | |
4525 | ||
4526 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4527 | { | |
4528 | basic_block cfg_bb = bb->cfg_bb (); | |
4529 | auto &curr_prob | |
c139f5e1 | 4530 | = get_block_info(cfg_bb).probability; |
c129d22d JZZ |
4531 | |
4532 | /* GCC assume entry block (bb 0) are always so | |
4533 | executed so set its probability as "always". */ | |
acc10c79 JZZ |
4534 | if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) |
4535 | curr_prob = profile_probability::always (); | |
c129d22d JZZ |
4536 | /* Exit block (bb 1) is the block we don't need to process. */ |
4537 | if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) | |
4538 | continue; | |
4539 | ||
acc10c79 JZZ |
4540 | gcc_assert (curr_prob.initialized_p ()); |
4541 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
4542 | { | |
4543 | auto &new_prob | |
c139f5e1 | 4544 | = get_block_info(e->dest).probability; |
acc10c79 JZZ |
4545 | if (!new_prob.initialized_p ()) |
4546 | new_prob = curr_prob * e->probability; | |
4547 | else if (new_prob == profile_probability::always ()) | |
4548 | continue; | |
4549 | else | |
4550 | new_prob += curr_prob * e->probability; | |
4551 | } | |
4552 | } | |
acc10c79 JZZ |
4553 | } |
4554 | ||
9243c3d1 JZZ |
4555 | /* Lazy vsetvl insertion for optimize > 0. */ |
4556 | void | |
4557 | pass_vsetvl::lazy_vsetvl (void) | |
4558 | { | |
4559 | if (dump_file) | |
4560 | fprintf (dump_file, | |
4561 | "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for " | |
4562 | "function:%s\n", | |
4563 | n_basic_blocks_for_fn (cfun), function_name (cfun)); | |
4564 | ||
4565 | /* Phase 1 - Compute the local dems within each block. | |
4566 | The data-flow analysis within each block is backward analysis. */ | |
4567 | if (dump_file) | |
4568 | fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n"); | |
4569 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4570 | compute_local_backward_infos (bb); | |
4571 | if (dump_file) | |
4572 | m_vector_manager->dump (dump_file); | |
4573 | ||
4574 | /* Phase 2 - Emit vsetvl instructions within each basic block according to | |
4575 | demand, compute and save ANTLOC && AVLOC of each block. */ | |
4576 | if (dump_file) | |
4577 | fprintf (dump_file, | |
4578 | "\nPhase 2: Emit vsetvl instruction within each block\n"); | |
4579 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4580 | emit_local_forward_vsetvls (bb); | |
4581 | if (dump_file) | |
4582 | m_vector_manager->dump (dump_file); | |
4583 | ||
4584 | /* Phase 3 - Propagate demanded info across blocks. */ | |
4585 | if (dump_file) | |
4586 | fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n"); | |
387cd9d3 | 4587 | demand_fusion (); |
9243c3d1 JZZ |
4588 | if (dump_file) |
4589 | m_vector_manager->dump (dump_file); | |
4590 | ||
4591 | /* Phase 4 - Lazy code motion. */ | |
4592 | if (dump_file) | |
4593 | fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n"); | |
4594 | pre_vsetvl (); | |
4595 | ||
4596 | /* Phase 5 - Cleanup AVL && VL operand of RVV instruction. */ | |
4597 | if (dump_file) | |
4598 | fprintf (dump_file, "\nPhase 5: Cleanup AVL and VL operands\n"); | |
4599 | cleanup_insns (); | |
6b6b9c68 JZZ |
4600 | |
4601 | /* Phase 6 - Rebuild RTL_SSA to propagate AVL between vsetvls. */ | |
4602 | if (dump_file) | |
4603 | fprintf (dump_file, | |
4604 | "\nPhase 6: Rebuild RTL_SSA to propagate AVL between vsetvls\n"); | |
4605 | propagate_avl (); | |
9243c3d1 JZZ |
4606 | } |
4607 | ||
4608 | /* Main entry point for this pass. */ | |
4609 | unsigned int | |
4610 | pass_vsetvl::execute (function *) | |
4611 | { | |
4612 | if (n_basic_blocks_for_fn (cfun) <= 0) | |
4613 | return 0; | |
4614 | ||
ca8fb009 JZZ |
4615 | /* The RVV instruction may change after split which is not a stable |
4616 | instruction. We need to split it here to avoid potential issue | |
4617 | since the VSETVL PASS is insert before split PASS. */ | |
4618 | split_all_insns (); | |
9243c3d1 JZZ |
4619 | |
4620 | /* Early return for there is no vector instructions. */ | |
4621 | if (!has_vector_insn (cfun)) | |
4622 | return 0; | |
4623 | ||
4624 | init (); | |
4625 | ||
4626 | if (!optimize) | |
4627 | simple_vsetvl (); | |
4628 | else | |
4629 | lazy_vsetvl (); | |
4630 | ||
4631 | done (); | |
4632 | return 0; | |
4633 | } | |
4634 | ||
4635 | rtl_opt_pass * | |
4636 | make_pass_vsetvl (gcc::context *ctxt) | |
4637 | { | |
4638 | return new pass_vsetvl (ctxt); | |
4639 | } |