]>
Commit | Line | Data |
---|---|---|
9243c3d1 | 1 | /* VSETVL pass for RISC-V 'V' Extension for GNU compiler. |
c841bde5 | 2 | Copyright (C) 2022-2023 Free Software Foundation, Inc. |
9243c3d1 JZZ |
3 | Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or(at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | /* This pass is to Set VL/VTYPE global status for RVV instructions | |
22 | that depend on VL and VTYPE registers by Lazy code motion (LCM). | |
23 | ||
24 | Strategy: | |
25 | ||
26 | - Backward demanded info fusion within block. | |
27 | ||
28 | - Lazy code motion (LCM) based demanded info backward propagation. | |
29 | ||
30 | - RTL_SSA framework for def-use, PHI analysis. | |
31 | ||
32 | - Lazy code motion (LCM) for global VL/VTYPE optimization. | |
33 | ||
34 | Assumption: | |
35 | ||
36 | - Each avl operand is either an immediate (must be in range 0 ~ 31) or reg. | |
37 | ||
38 | This pass consists of 5 phases: | |
39 | ||
40 | - Phase 1 - compute VL/VTYPE demanded information within each block | |
41 | by backward data-flow analysis. | |
42 | ||
43 | - Phase 2 - Emit vsetvl instructions within each basic block according to | |
44 | demand, compute and save ANTLOC && AVLOC of each block. | |
45 | ||
387cd9d3 JZZ |
46 | - Phase 3 - Backward && forward demanded info propagation and fusion across |
47 | blocks. | |
9243c3d1 JZZ |
48 | |
49 | - Phase 4 - Lazy code motion including: compute local properties, | |
50 | pre_edge_lcm and vsetvl insertion && delete edges for LCM results. | |
51 | ||
52 | - Phase 5 - Cleanup AVL operand of RVV instruction since it will not be | |
53 | used any more and VL operand of VSETVL instruction if it is not used by | |
54 | any non-debug instructions. | |
55 | ||
6b6b9c68 JZZ |
56 | - Phase 6 - Propagate AVL between vsetvl instructions. |
57 | ||
9243c3d1 JZZ |
58 | Implementation: |
59 | ||
60 | - The subroutine of optimize == 0 is simple_vsetvl. | |
61 | This function simplily vsetvl insertion for each RVV | |
62 | instruction. No optimization. | |
63 | ||
64 | - The subroutine of optimize > 0 is lazy_vsetvl. | |
65 | This function optimize vsetvl insertion process by | |
ec99ffab JZZ |
66 | lazy code motion (LCM) layering on RTL_SSA. |
67 | ||
68 | - get_avl (), get_insn (), get_avl_source (): | |
69 | ||
70 | 1. get_insn () is the current instruction, find_access (get_insn | |
71 | ())->def is the same as get_avl_source () if get_insn () demand VL. | |
72 | 2. If get_avl () is non-VLMAX REG, get_avl () == get_avl_source | |
73 | ()->regno (). | |
74 | 3. get_avl_source ()->regno () is the REGNO that we backward propagate. | |
75 | */ | |
9243c3d1 JZZ |
76 | |
77 | #define IN_TARGET_CODE 1 | |
78 | #define INCLUDE_ALGORITHM | |
79 | #define INCLUDE_FUNCTIONAL | |
80 | ||
81 | #include "config.h" | |
82 | #include "system.h" | |
83 | #include "coretypes.h" | |
84 | #include "tm.h" | |
85 | #include "backend.h" | |
86 | #include "rtl.h" | |
87 | #include "target.h" | |
88 | #include "tree-pass.h" | |
89 | #include "df.h" | |
90 | #include "rtl-ssa.h" | |
91 | #include "cfgcleanup.h" | |
92 | #include "insn-config.h" | |
93 | #include "insn-attr.h" | |
94 | #include "insn-opinit.h" | |
95 | #include "tm-constrs.h" | |
96 | #include "cfgrtl.h" | |
97 | #include "cfganal.h" | |
98 | #include "lcm.h" | |
99 | #include "predict.h" | |
100 | #include "profile-count.h" | |
101 | #include "riscv-vsetvl.h" | |
102 | ||
103 | using namespace rtl_ssa; | |
104 | using namespace riscv_vector; | |
105 | ||
ec99ffab JZZ |
106 | static CONSTEXPR const unsigned ALL_SEW[] = {8, 16, 32, 64}; |
107 | static CONSTEXPR const vlmul_type ALL_LMUL[] | |
108 | = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2}; | |
ec99ffab | 109 | |
9243c3d1 JZZ |
110 | DEBUG_FUNCTION void |
111 | debug (const vector_insn_info *info) | |
112 | { | |
113 | info->dump (stderr); | |
114 | } | |
115 | ||
116 | DEBUG_FUNCTION void | |
117 | debug (const vector_infos_manager *info) | |
118 | { | |
119 | info->dump (stderr); | |
120 | } | |
121 | ||
122 | static bool | |
123 | vlmax_avl_p (rtx x) | |
124 | { | |
125 | return x && rtx_equal_p (x, RVV_VLMAX); | |
126 | } | |
127 | ||
128 | static bool | |
129 | vlmax_avl_insn_p (rtx_insn *rinsn) | |
130 | { | |
85112fbb JZZ |
131 | return (INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi |
132 | || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi); | |
9243c3d1 JZZ |
133 | } |
134 | ||
8d8cc482 JZZ |
135 | /* Return true if the block is a loop itself: |
136 | local_dem | |
137 | __________ | |
138 | ____|____ | | |
139 | | | | | |
140 | |________| | | |
141 | |_________| | |
142 | reaching_out | |
143 | */ | |
9243c3d1 JZZ |
144 | static bool |
145 | loop_basic_block_p (const basic_block cfg_bb) | |
146 | { | |
8d8cc482 JZZ |
147 | if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb))) |
148 | { | |
149 | edge e; | |
150 | edge_iterator ei; | |
151 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
152 | if (e->dest->index == cfg_bb->index) | |
153 | return true; | |
154 | } | |
155 | return false; | |
9243c3d1 JZZ |
156 | } |
157 | ||
158 | /* Return true if it is an RVV instruction depends on VTYPE global | |
159 | status register. */ | |
160 | static bool | |
161 | has_vtype_op (rtx_insn *rinsn) | |
162 | { | |
163 | return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn); | |
164 | } | |
165 | ||
166 | /* Return true if it is an RVV instruction depends on VL global | |
167 | status register. */ | |
168 | static bool | |
169 | has_vl_op (rtx_insn *rinsn) | |
170 | { | |
171 | return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn); | |
172 | } | |
173 | ||
174 | /* Is this a SEW value that can be encoded into the VTYPE format. */ | |
175 | static bool | |
176 | valid_sew_p (size_t sew) | |
177 | { | |
178 | return exact_log2 (sew) && sew >= 8 && sew <= 64; | |
179 | } | |
180 | ||
ec99ffab JZZ |
181 | /* Return true if the instruction ignores VLMUL field of VTYPE. */ |
182 | static bool | |
183 | ignore_vlmul_insn_p (rtx_insn *rinsn) | |
184 | { | |
185 | return get_attr_type (rinsn) == TYPE_VIMOVVX | |
186 | || get_attr_type (rinsn) == TYPE_VFMOVVF | |
187 | || get_attr_type (rinsn) == TYPE_VIMOVXV | |
188 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
189 | } | |
190 | ||
191 | /* Return true if the instruction is scalar move instruction. */ | |
192 | static bool | |
193 | scalar_move_insn_p (rtx_insn *rinsn) | |
194 | { | |
195 | return get_attr_type (rinsn) == TYPE_VIMOVXV | |
196 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
197 | } | |
198 | ||
60bd33bc JZZ |
199 | /* Return true if the instruction is fault first load instruction. */ |
200 | static bool | |
201 | fault_first_load_p (rtx_insn *rinsn) | |
202 | { | |
203 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VLDFF; | |
204 | } | |
205 | ||
206 | /* Return true if the instruction is read vl instruction. */ | |
207 | static bool | |
208 | read_vl_insn_p (rtx_insn *rinsn) | |
209 | { | |
210 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL; | |
211 | } | |
212 | ||
9243c3d1 JZZ |
213 | /* Return true if it is a vsetvl instruction. */ |
214 | static bool | |
215 | vector_config_insn_p (rtx_insn *rinsn) | |
216 | { | |
217 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL; | |
218 | } | |
219 | ||
220 | /* Return true if it is vsetvldi or vsetvlsi. */ | |
221 | static bool | |
222 | vsetvl_insn_p (rtx_insn *rinsn) | |
223 | { | |
6b6b9c68 JZZ |
224 | if (!vector_config_insn_p (rinsn)) |
225 | return false; | |
85112fbb | 226 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi |
6b6b9c68 JZZ |
227 | || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi); |
228 | } | |
229 | ||
230 | /* Return true if it is vsetvl zero, rs1. */ | |
231 | static bool | |
232 | vsetvl_discard_result_insn_p (rtx_insn *rinsn) | |
233 | { | |
234 | if (!vector_config_insn_p (rinsn)) | |
235 | return false; | |
236 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi | |
237 | || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi); | |
9243c3d1 JZZ |
238 | } |
239 | ||
9243c3d1 | 240 | static bool |
4f673c5e | 241 | real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb) |
9243c3d1 | 242 | { |
4f673c5e | 243 | return insn != nullptr && insn->is_real () && insn->bb () == bb; |
9243c3d1 JZZ |
244 | } |
245 | ||
9243c3d1 | 246 | static bool |
4f673c5e | 247 | before_p (const insn_info *insn1, const insn_info *insn2) |
9243c3d1 | 248 | { |
9b9a1ac1 | 249 | return insn1->compare_with (insn2) < 0; |
4f673c5e JZZ |
250 | } |
251 | ||
6b6b9c68 JZZ |
252 | static insn_info * |
253 | find_reg_killed_by (const bb_info *bb, rtx x) | |
4f673c5e | 254 | { |
6b6b9c68 JZZ |
255 | if (!x || vlmax_avl_p (x) || !REG_P (x)) |
256 | return nullptr; | |
257 | for (insn_info *insn : bb->reverse_real_nondebug_insns ()) | |
4f673c5e | 258 | if (find_access (insn->defs (), REGNO (x))) |
6b6b9c68 JZZ |
259 | return insn; |
260 | return nullptr; | |
261 | } | |
262 | ||
263 | /* Helper function to get VL operand. */ | |
264 | static rtx | |
265 | get_vl (rtx_insn *rinsn) | |
266 | { | |
267 | if (has_vl_op (rinsn)) | |
268 | { | |
269 | extract_insn_cached (rinsn); | |
270 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
271 | } | |
272 | return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0)); | |
4f673c5e JZZ |
273 | } |
274 | ||
275 | static bool | |
276 | has_vsetvl_killed_avl_p (const bb_info *bb, const vector_insn_info &info) | |
277 | { | |
278 | if (info.dirty_with_killed_avl_p ()) | |
279 | { | |
280 | rtx avl = info.get_avl (); | |
6b6b9c68 | 281 | if (vlmax_avl_p (avl)) |
ec99ffab | 282 | return find_reg_killed_by (bb, info.get_avl_reg_rtx ()) != nullptr; |
4f673c5e JZZ |
283 | for (const insn_info *insn : bb->reverse_real_nondebug_insns ()) |
284 | { | |
285 | def_info *def = find_access (insn->defs (), REGNO (avl)); | |
286 | if (def) | |
287 | { | |
288 | set_info *set = safe_dyn_cast<set_info *> (def); | |
289 | if (!set) | |
290 | return false; | |
291 | ||
292 | rtx new_avl = gen_rtx_REG (GET_MODE (avl), REGNO (avl)); | |
293 | gcc_assert (new_avl != avl); | |
294 | if (!info.compatible_avl_p (avl_info (new_avl, set))) | |
295 | return false; | |
296 | ||
297 | return true; | |
298 | } | |
299 | } | |
300 | } | |
301 | return false; | |
302 | } | |
303 | ||
9243c3d1 JZZ |
304 | /* An "anticipatable occurrence" is one that is the first occurrence in the |
305 | basic block, the operands are not modified in the basic block prior | |
306 | to the occurrence and the output is not used between the start of | |
4f673c5e JZZ |
307 | the block and the occurrence. |
308 | ||
309 | For VSETVL instruction, we have these following formats: | |
310 | 1. vsetvl zero, rs1. | |
311 | 2. vsetvl zero, imm. | |
312 | 3. vsetvl rd, rs1. | |
313 | ||
314 | So base on these circumstances, a DEM is considered as a local anticipatable | |
315 | occurrence should satisfy these following conditions: | |
316 | ||
317 | 1). rs1 (avl) are not modified in the basic block prior to the VSETVL. | |
318 | 2). rd (vl) are not modified in the basic block prior to the VSETVL. | |
319 | 3). rd (vl) is not used between the start of the block and the occurrence. | |
320 | ||
321 | Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE | |
322 | is modified prior to the occurrence. This case is already considered as | |
323 | a non-local anticipatable occurrence. | |
324 | */ | |
9243c3d1 | 325 | static bool |
4f673c5e | 326 | anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem) |
9243c3d1 | 327 | { |
4f673c5e | 328 | insn_info *insn = dem.get_insn (); |
9243c3d1 JZZ |
329 | /* The only possible operand we care of VSETVL is AVL. */ |
330 | if (dem.has_avl_reg ()) | |
331 | { | |
4f673c5e | 332 | /* rs1 (avl) are not modified in the basic block prior to the VSETVL. */ |
9243c3d1 JZZ |
333 | if (!vlmax_avl_p (dem.get_avl ())) |
334 | { | |
ec99ffab | 335 | set_info *set = dem.get_avl_source (); |
9243c3d1 JZZ |
336 | /* If it's undefined, it's not anticipatable conservatively. */ |
337 | if (!set) | |
338 | return false; | |
4f673c5e JZZ |
339 | if (real_insn_and_same_bb_p (set->insn (), bb) |
340 | && before_p (set->insn (), insn)) | |
9243c3d1 JZZ |
341 | return false; |
342 | } | |
343 | } | |
344 | ||
4f673c5e | 345 | /* rd (vl) is not used between the start of the block and the occurrence. */ |
9243c3d1 JZZ |
346 | if (vsetvl_insn_p (insn->rtl ())) |
347 | { | |
4f673c5e JZZ |
348 | rtx dest = get_vl (insn->rtl ()); |
349 | for (insn_info *i = insn->prev_nondebug_insn (); | |
350 | real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ()) | |
351 | { | |
352 | /* rd (vl) is not used between the start of the block and the | |
353 | * occurrence. */ | |
354 | if (find_access (i->uses (), REGNO (dest))) | |
355 | return false; | |
356 | /* rd (vl) are not modified in the basic block prior to the VSETVL. */ | |
357 | if (find_access (i->defs (), REGNO (dest))) | |
358 | return false; | |
359 | } | |
9243c3d1 JZZ |
360 | } |
361 | ||
362 | return true; | |
363 | } | |
364 | ||
365 | /* An "available occurrence" is one that is the last occurrence in the | |
366 | basic block and the operands are not modified by following statements in | |
4f673c5e JZZ |
367 | the basic block [including this insn]. |
368 | ||
369 | For VSETVL instruction, we have these following formats: | |
370 | 1. vsetvl zero, rs1. | |
371 | 2. vsetvl zero, imm. | |
372 | 3. vsetvl rd, rs1. | |
373 | ||
374 | So base on these circumstances, a DEM is considered as a local available | |
375 | occurrence should satisfy these following conditions: | |
376 | ||
377 | 1). rs1 (avl) are not modified by following statements in | |
378 | the basic block. | |
379 | 2). rd (vl) are not modified by following statements in | |
380 | the basic block. | |
381 | ||
382 | Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE | |
383 | is modified prior to the occurrence. This case is already considered as | |
384 | a non-local available occurrence. | |
385 | */ | |
9243c3d1 | 386 | static bool |
4f673c5e | 387 | available_occurrence_p (const bb_info *bb, const vector_insn_info dem) |
9243c3d1 | 388 | { |
4f673c5e | 389 | insn_info *insn = dem.get_insn (); |
9243c3d1 JZZ |
390 | /* The only possible operand we care of VSETVL is AVL. */ |
391 | if (dem.has_avl_reg ()) | |
392 | { | |
9243c3d1 JZZ |
393 | if (!vlmax_avl_p (dem.get_avl ())) |
394 | { | |
4f673c5e JZZ |
395 | rtx dest = NULL_RTX; |
396 | if (vsetvl_insn_p (insn->rtl ())) | |
397 | dest = get_vl (insn->rtl ()); | |
398 | for (const insn_info *i = insn; real_insn_and_same_bb_p (i, bb); | |
399 | i = i->next_nondebug_insn ()) | |
400 | { | |
60bd33bc JZZ |
401 | if (read_vl_insn_p (i->rtl ())) |
402 | continue; | |
4f673c5e JZZ |
403 | /* rs1 (avl) are not modified by following statements in |
404 | the basic block. */ | |
405 | if (find_access (i->defs (), REGNO (dem.get_avl ()))) | |
406 | return false; | |
407 | /* rd (vl) are not modified by following statements in | |
408 | the basic block. */ | |
409 | if (dest && find_access (i->defs (), REGNO (dest))) | |
410 | return false; | |
411 | } | |
9243c3d1 JZZ |
412 | } |
413 | } | |
414 | return true; | |
415 | } | |
416 | ||
6b6b9c68 JZZ |
417 | static bool |
418 | insn_should_be_added_p (const insn_info *insn, unsigned int types) | |
9243c3d1 | 419 | { |
6b6b9c68 JZZ |
420 | if (insn->is_real () && (types & REAL_SET)) |
421 | return true; | |
422 | if (insn->is_phi () && (types & PHI_SET)) | |
423 | return true; | |
424 | if (insn->is_bb_head () && (types & BB_HEAD_SET)) | |
425 | return true; | |
426 | if (insn->is_bb_end () && (types & BB_END_SET)) | |
427 | return true; | |
428 | return false; | |
9243c3d1 JZZ |
429 | } |
430 | ||
6b6b9c68 JZZ |
431 | /* Recursively find all define instructions. The kind of instruction is |
432 | specified by the DEF_TYPE. */ | |
433 | static hash_set<set_info *> | |
434 | get_all_sets (phi_info *phi, unsigned int types) | |
9243c3d1 | 435 | { |
6b6b9c68 | 436 | hash_set<set_info *> insns; |
4f673c5e JZZ |
437 | auto_vec<phi_info *> work_list; |
438 | hash_set<phi_info *> visited_list; | |
439 | if (!phi) | |
6b6b9c68 | 440 | return hash_set<set_info *> (); |
4f673c5e | 441 | work_list.safe_push (phi); |
9243c3d1 | 442 | |
4f673c5e | 443 | while (!work_list.is_empty ()) |
9243c3d1 | 444 | { |
4f673c5e JZZ |
445 | phi_info *phi = work_list.pop (); |
446 | visited_list.add (phi); | |
447 | for (use_info *use : phi->inputs ()) | |
9243c3d1 | 448 | { |
4f673c5e | 449 | def_info *def = use->def (); |
6b6b9c68 JZZ |
450 | set_info *set = safe_dyn_cast<set_info *> (def); |
451 | if (!set) | |
452 | return hash_set<set_info *> (); | |
9243c3d1 | 453 | |
6b6b9c68 | 454 | gcc_assert (!set->insn ()->is_debug_insn ()); |
9243c3d1 | 455 | |
6b6b9c68 JZZ |
456 | if (insn_should_be_added_p (set->insn (), types)) |
457 | insns.add (set); | |
458 | if (set->insn ()->is_phi ()) | |
4f673c5e | 459 | { |
6b6b9c68 | 460 | phi_info *new_phi = as_a<phi_info *> (set); |
4f673c5e JZZ |
461 | if (!visited_list.contains (new_phi)) |
462 | work_list.safe_push (new_phi); | |
463 | } | |
464 | } | |
9243c3d1 | 465 | } |
4f673c5e JZZ |
466 | return insns; |
467 | } | |
9243c3d1 | 468 | |
6b6b9c68 JZZ |
469 | static hash_set<set_info *> |
470 | get_all_sets (set_info *set, bool /* get_real_inst */ real_p, | |
471 | bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p) | |
472 | { | |
473 | if (real_p && phi_p && param_p) | |
474 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
475 | REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET); | |
476 | ||
477 | else if (real_p && param_p) | |
478 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
479 | REAL_SET | BB_HEAD_SET | BB_END_SET); | |
480 | ||
481 | else if (real_p) | |
482 | return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET); | |
483 | return hash_set<set_info *> (); | |
484 | } | |
485 | ||
486 | /* Helper function to get AVL operand. */ | |
487 | static rtx | |
488 | get_avl (rtx_insn *rinsn) | |
489 | { | |
490 | if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn)) | |
491 | return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0); | |
492 | ||
493 | if (!has_vl_op (rinsn)) | |
494 | return NULL_RTX; | |
495 | if (get_attr_avl_type (rinsn) == VLMAX) | |
496 | return RVV_VLMAX; | |
497 | extract_insn_cached (rinsn); | |
498 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
499 | } | |
500 | ||
501 | static set_info * | |
502 | get_same_bb_set (hash_set<set_info *> &sets, const basic_block cfg_bb) | |
503 | { | |
504 | for (set_info *set : sets) | |
505 | if (set->bb ()->cfg_bb () == cfg_bb) | |
506 | return set; | |
507 | return nullptr; | |
508 | } | |
509 | ||
4f673c5e JZZ |
510 | /* Recursively find all predecessor blocks for cfg_bb. */ |
511 | static hash_set<basic_block> | |
512 | get_all_predecessors (basic_block cfg_bb) | |
513 | { | |
514 | hash_set<basic_block> blocks; | |
515 | auto_vec<basic_block> work_list; | |
516 | hash_set<basic_block> visited_list; | |
517 | work_list.safe_push (cfg_bb); | |
9243c3d1 | 518 | |
4f673c5e | 519 | while (!work_list.is_empty ()) |
9243c3d1 | 520 | { |
4f673c5e JZZ |
521 | basic_block new_cfg_bb = work_list.pop (); |
522 | visited_list.add (new_cfg_bb); | |
523 | edge e; | |
524 | edge_iterator ei; | |
525 | FOR_EACH_EDGE (e, ei, new_cfg_bb->preds) | |
526 | { | |
527 | if (!visited_list.contains (e->src)) | |
528 | work_list.safe_push (e->src); | |
529 | blocks.add (e->src); | |
530 | } | |
9243c3d1 | 531 | } |
4f673c5e JZZ |
532 | return blocks; |
533 | } | |
9243c3d1 | 534 | |
4f673c5e JZZ |
535 | /* Return true if there is an INSN in insns staying in the block BB. */ |
536 | static bool | |
6b6b9c68 | 537 | any_set_in_bb_p (hash_set<set_info *> sets, const bb_info *bb) |
4f673c5e | 538 | { |
6b6b9c68 JZZ |
539 | for (const set_info *set : sets) |
540 | if (set->bb ()->index () == bb->index ()) | |
4f673c5e JZZ |
541 | return true; |
542 | return false; | |
9243c3d1 JZZ |
543 | } |
544 | ||
545 | /* Helper function to get SEW operand. We always have SEW value for | |
546 | all RVV instructions that have VTYPE OP. */ | |
547 | static uint8_t | |
548 | get_sew (rtx_insn *rinsn) | |
549 | { | |
550 | return get_attr_sew (rinsn); | |
551 | } | |
552 | ||
553 | /* Helper function to get VLMUL operand. We always have VLMUL value for | |
554 | all RVV instructions that have VTYPE OP. */ | |
555 | static enum vlmul_type | |
556 | get_vlmul (rtx_insn *rinsn) | |
557 | { | |
558 | return (enum vlmul_type) get_attr_vlmul (rinsn); | |
559 | } | |
560 | ||
561 | /* Get default tail policy. */ | |
562 | static bool | |
563 | get_default_ta () | |
564 | { | |
565 | /* For the instruction that doesn't require TA, we still need a default value | |
566 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
567 | return (bool) (get_prefer_tail_policy () & 0x1 | |
568 | || (get_prefer_tail_policy () >> 1 & 0x1)); | |
569 | } | |
570 | ||
571 | /* Get default mask policy. */ | |
572 | static bool | |
573 | get_default_ma () | |
574 | { | |
575 | /* For the instruction that doesn't require MA, we still need a default value | |
576 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
577 | return (bool) (get_prefer_mask_policy () & 0x1 | |
578 | || (get_prefer_mask_policy () >> 1 & 0x1)); | |
579 | } | |
580 | ||
581 | /* Helper function to get TA operand. */ | |
582 | static bool | |
583 | tail_agnostic_p (rtx_insn *rinsn) | |
584 | { | |
585 | /* If it doesn't have TA, we return agnostic by default. */ | |
586 | extract_insn_cached (rinsn); | |
587 | int ta = get_attr_ta (rinsn); | |
588 | return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta); | |
589 | } | |
590 | ||
591 | /* Helper function to get MA operand. */ | |
592 | static bool | |
593 | mask_agnostic_p (rtx_insn *rinsn) | |
594 | { | |
595 | /* If it doesn't have MA, we return agnostic by default. */ | |
596 | extract_insn_cached (rinsn); | |
597 | int ma = get_attr_ma (rinsn); | |
598 | return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma); | |
599 | } | |
600 | ||
601 | /* Return true if FN has a vector instruction that use VL/VTYPE. */ | |
602 | static bool | |
603 | has_vector_insn (function *fn) | |
604 | { | |
605 | basic_block cfg_bb; | |
606 | rtx_insn *rinsn; | |
607 | FOR_ALL_BB_FN (cfg_bb, fn) | |
608 | FOR_BB_INSNS (cfg_bb, rinsn) | |
609 | if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn)) | |
610 | return true; | |
611 | return false; | |
612 | } | |
613 | ||
614 | /* Emit vsetvl instruction. */ | |
615 | static rtx | |
e577b91b | 616 | gen_vsetvl_pat (enum vsetvl_type insn_type, const vl_vtype_info &info, rtx vl) |
9243c3d1 JZZ |
617 | { |
618 | rtx avl = info.get_avl (); | |
619 | rtx sew = gen_int_mode (info.get_sew (), Pmode); | |
620 | rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode); | |
621 | rtx ta = gen_int_mode (info.get_ta (), Pmode); | |
622 | rtx ma = gen_int_mode (info.get_ma (), Pmode); | |
623 | ||
624 | if (insn_type == VSETVL_NORMAL) | |
625 | { | |
626 | gcc_assert (vl != NULL_RTX); | |
627 | return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma); | |
628 | } | |
629 | else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY) | |
630 | return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma); | |
631 | else | |
632 | return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma); | |
633 | } | |
634 | ||
635 | static rtx | |
e577b91b | 636 | gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info) |
9243c3d1 JZZ |
637 | { |
638 | rtx new_pat; | |
60bd33bc JZZ |
639 | vl_vtype_info new_info = info; |
640 | if (info.get_insn () && info.get_insn ()->rtl () | |
641 | && fault_first_load_p (info.get_insn ()->rtl ())) | |
642 | new_info.set_avl_info ( | |
643 | avl_info (get_avl (info.get_insn ()->rtl ()), nullptr)); | |
9243c3d1 JZZ |
644 | if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ())) |
645 | { | |
646 | rtx dest = get_vl (rinsn); | |
60bd33bc | 647 | new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, dest); |
9243c3d1 JZZ |
648 | } |
649 | else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only) | |
60bd33bc | 650 | new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, new_info, NULL_RTX); |
9243c3d1 | 651 | else |
60bd33bc | 652 | new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX); |
9243c3d1 JZZ |
653 | return new_pat; |
654 | } | |
655 | ||
656 | static void | |
657 | emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type, | |
e577b91b | 658 | const vl_vtype_info &info, rtx vl, rtx_insn *rinsn) |
9243c3d1 JZZ |
659 | { |
660 | rtx pat = gen_vsetvl_pat (insn_type, info, vl); | |
661 | if (dump_file) | |
662 | { | |
663 | fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n"); | |
664 | print_rtl_single (dump_file, pat); | |
665 | } | |
666 | ||
667 | if (emit_type == EMIT_DIRECT) | |
668 | emit_insn (pat); | |
669 | else if (emit_type == EMIT_BEFORE) | |
670 | emit_insn_before (pat, rinsn); | |
671 | else | |
672 | emit_insn_after (pat, rinsn); | |
673 | } | |
674 | ||
675 | static void | |
676 | eliminate_insn (rtx_insn *rinsn) | |
677 | { | |
678 | if (dump_file) | |
679 | { | |
680 | fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn)); | |
681 | print_rtl_single (dump_file, rinsn); | |
682 | } | |
683 | if (in_sequence_p ()) | |
684 | remove_insn (rinsn); | |
685 | else | |
686 | delete_insn (rinsn); | |
687 | } | |
688 | ||
a481eed8 | 689 | static vsetvl_type |
9243c3d1 JZZ |
690 | insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn, |
691 | const vector_insn_info &info, const vector_insn_info &prev_info) | |
692 | { | |
693 | /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same | |
694 | VLMAX. */ | |
695 | if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p () | |
4f673c5e | 696 | && info.compatible_avl_p (prev_info) && info.same_vlmax_p (prev_info)) |
9243c3d1 JZZ |
697 | { |
698 | emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX, | |
699 | rinsn); | |
a481eed8 | 700 | return VSETVL_VTYPE_CHANGE_ONLY; |
9243c3d1 JZZ |
701 | } |
702 | ||
703 | if (info.has_avl_imm ()) | |
704 | { | |
705 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, | |
706 | rinsn); | |
a481eed8 | 707 | return VSETVL_DISCARD_RESULT; |
9243c3d1 JZZ |
708 | } |
709 | ||
710 | if (info.has_avl_no_reg ()) | |
711 | { | |
712 | /* We can only use x0, x0 if there's no chance of the vtype change causing | |
713 | the previous vl to become invalid. */ | |
714 | if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p () | |
715 | && info.same_vlmax_p (prev_info)) | |
716 | { | |
717 | emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX, | |
718 | rinsn); | |
a481eed8 | 719 | return VSETVL_VTYPE_CHANGE_ONLY; |
9243c3d1 JZZ |
720 | } |
721 | /* Otherwise use an AVL of 0 to avoid depending on previous vl. */ | |
722 | vl_vtype_info new_info = info; | |
723 | new_info.set_avl_info (avl_info (const0_rtx, nullptr)); | |
724 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX, | |
725 | rinsn); | |
a481eed8 | 726 | return VSETVL_DISCARD_RESULT; |
9243c3d1 JZZ |
727 | } |
728 | ||
729 | /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the | |
730 | opcode if the AVLReg is X0 as they have different register classes for | |
731 | the AVL operand. */ | |
732 | if (vlmax_avl_p (info.get_avl ())) | |
733 | { | |
734 | gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn)); | |
ec99ffab | 735 | rtx vl_op = info.get_avl_reg_rtx (); |
9243c3d1 JZZ |
736 | gcc_assert (!vlmax_avl_p (vl_op)); |
737 | emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn); | |
a481eed8 | 738 | return VSETVL_NORMAL; |
9243c3d1 JZZ |
739 | } |
740 | ||
741 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn); | |
742 | ||
743 | if (dump_file) | |
744 | { | |
745 | fprintf (dump_file, "Update VL/VTYPE info, previous info="); | |
746 | prev_info.dump (dump_file); | |
747 | } | |
a481eed8 | 748 | return VSETVL_DISCARD_RESULT; |
9243c3d1 JZZ |
749 | } |
750 | ||
751 | /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them | |
752 | to INSN. If such notes are added to an insn which references a | |
753 | CODE_LABEL, the LABEL_NUSES count is incremented. We have to add | |
754 | that note, because the following loop optimization pass requires | |
755 | them. */ | |
756 | ||
757 | /* ??? If there was a jump optimization pass after gcse and before loop, | |
758 | then we would not need to do this here, because jump would add the | |
759 | necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */ | |
760 | ||
761 | static void | |
27a2a4b6 | 762 | add_label_notes (rtx x, rtx_insn *rinsn) |
9243c3d1 JZZ |
763 | { |
764 | enum rtx_code code = GET_CODE (x); | |
765 | int i, j; | |
766 | const char *fmt; | |
767 | ||
768 | if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x)) | |
769 | { | |
770 | /* This code used to ignore labels that referred to dispatch tables to | |
771 | avoid flow generating (slightly) worse code. | |
772 | ||
773 | We no longer ignore such label references (see LABEL_REF handling in | |
774 | mark_jump_label for additional information). */ | |
775 | ||
776 | /* There's no reason for current users to emit jump-insns with | |
777 | such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET | |
778 | notes. */ | |
27a2a4b6 JZZ |
779 | gcc_assert (!JUMP_P (rinsn)); |
780 | add_reg_note (rinsn, REG_LABEL_OPERAND, label_ref_label (x)); | |
9243c3d1 JZZ |
781 | |
782 | if (LABEL_P (label_ref_label (x))) | |
783 | LABEL_NUSES (label_ref_label (x))++; | |
784 | ||
785 | return; | |
786 | } | |
787 | ||
788 | for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) | |
789 | { | |
790 | if (fmt[i] == 'e') | |
27a2a4b6 | 791 | add_label_notes (XEXP (x, i), rinsn); |
9243c3d1 JZZ |
792 | else if (fmt[i] == 'E') |
793 | for (j = XVECLEN (x, i) - 1; j >= 0; j--) | |
27a2a4b6 | 794 | add_label_notes (XVECEXP (x, i, j), rinsn); |
9243c3d1 JZZ |
795 | } |
796 | } | |
797 | ||
798 | /* Add EXPR to the end of basic block BB. | |
799 | ||
800 | This is used by both the PRE and code hoisting. */ | |
801 | ||
802 | static void | |
803 | insert_insn_end_basic_block (rtx_insn *rinsn, basic_block cfg_bb) | |
804 | { | |
805 | rtx_insn *end_rinsn = BB_END (cfg_bb); | |
806 | rtx_insn *new_insn; | |
807 | rtx_insn *pat, *pat_end; | |
808 | ||
809 | pat = rinsn; | |
810 | gcc_assert (pat && INSN_P (pat)); | |
811 | ||
812 | pat_end = pat; | |
813 | while (NEXT_INSN (pat_end) != NULL_RTX) | |
814 | pat_end = NEXT_INSN (pat_end); | |
815 | ||
816 | /* If the last end_rinsn is a jump, insert EXPR in front. Similarly we need | |
817 | to take care of trapping instructions in presence of non-call exceptions. | |
818 | */ | |
819 | ||
820 | if (JUMP_P (end_rinsn) | |
821 | || (NONJUMP_INSN_P (end_rinsn) | |
822 | && (!single_succ_p (cfg_bb) | |
823 | || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL))) | |
824 | { | |
825 | /* FIXME: What if something in jump uses value set in new end_rinsn? */ | |
826 | new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb); | |
827 | } | |
828 | ||
829 | /* Likewise if the last end_rinsn is a call, as will happen in the presence | |
830 | of exception handling. */ | |
831 | else if (CALL_P (end_rinsn) | |
832 | && (!single_succ_p (cfg_bb) | |
833 | || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL)) | |
834 | { | |
835 | /* Keeping in mind targets with small register classes and parameters | |
836 | in registers, we search backward and place the instructions before | |
837 | the first parameter is loaded. Do this for everyone for consistency | |
838 | and a presumption that we'll get better code elsewhere as well. */ | |
839 | ||
840 | /* Since different machines initialize their parameter registers | |
841 | in different orders, assume nothing. Collect the set of all | |
842 | parameter registers. */ | |
843 | end_rinsn = find_first_parameter_load (end_rinsn, BB_HEAD (cfg_bb)); | |
844 | ||
845 | /* If we found all the parameter loads, then we want to insert | |
846 | before the first parameter load. | |
847 | ||
848 | If we did not find all the parameter loads, then we might have | |
849 | stopped on the head of the block, which could be a CODE_LABEL. | |
850 | If we inserted before the CODE_LABEL, then we would be putting | |
851 | the end_rinsn in the wrong basic block. In that case, put the | |
852 | end_rinsn after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. | |
853 | */ | |
854 | while (LABEL_P (end_rinsn) || NOTE_INSN_BASIC_BLOCK_P (end_rinsn)) | |
855 | end_rinsn = NEXT_INSN (end_rinsn); | |
856 | ||
857 | new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb); | |
858 | } | |
859 | else | |
860 | new_insn = emit_insn_after_noloc (pat, end_rinsn, cfg_bb); | |
861 | ||
862 | while (1) | |
863 | { | |
864 | if (INSN_P (pat)) | |
865 | add_label_notes (PATTERN (pat), new_insn); | |
866 | if (pat == pat_end) | |
867 | break; | |
868 | pat = NEXT_INSN (pat); | |
869 | } | |
870 | } | |
871 | ||
872 | /* Get VL/VTYPE information for INSN. */ | |
873 | static vl_vtype_info | |
874 | get_vl_vtype_info (const insn_info *insn) | |
875 | { | |
9243c3d1 JZZ |
876 | set_info *set = nullptr; |
877 | rtx avl = ::get_avl (insn->rtl ()); | |
ec99ffab JZZ |
878 | if (avl && REG_P (avl)) |
879 | { | |
880 | if (vlmax_avl_p (avl) && has_vl_op (insn->rtl ())) | |
881 | set | |
882 | = find_access (insn->uses (), REGNO (get_vl (insn->rtl ())))->def (); | |
883 | else if (!vlmax_avl_p (avl)) | |
884 | set = find_access (insn->uses (), REGNO (avl))->def (); | |
885 | else | |
886 | set = nullptr; | |
887 | } | |
9243c3d1 JZZ |
888 | |
889 | uint8_t sew = get_sew (insn->rtl ()); | |
890 | enum vlmul_type vlmul = get_vlmul (insn->rtl ()); | |
891 | uint8_t ratio = get_attr_ratio (insn->rtl ()); | |
892 | /* when get_attr_ratio is invalid, this kind of instructions | |
893 | doesn't care about ratio. However, we still need this value | |
894 | in demand info backward analysis. */ | |
895 | if (ratio == INVALID_ATTRIBUTE) | |
896 | ratio = calculate_ratio (sew, vlmul); | |
897 | bool ta = tail_agnostic_p (insn->rtl ()); | |
898 | bool ma = mask_agnostic_p (insn->rtl ()); | |
899 | ||
900 | /* If merge operand is undef value, we prefer agnostic. */ | |
901 | int merge_op_idx = get_attr_merge_op_idx (insn->rtl ()); | |
902 | if (merge_op_idx != INVALID_ATTRIBUTE | |
903 | && satisfies_constraint_vu (recog_data.operand[merge_op_idx])) | |
904 | { | |
905 | ta = true; | |
906 | ma = true; | |
907 | } | |
908 | ||
909 | vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma); | |
910 | return info; | |
911 | } | |
912 | ||
913 | static void | |
914 | change_insn (rtx_insn *rinsn, rtx new_pat) | |
915 | { | |
916 | /* We don't apply change on RTL_SSA here since it's possible a | |
917 | new INSN we add in the PASS before which doesn't have RTL_SSA | |
918 | info yet.*/ | |
919 | if (dump_file) | |
920 | { | |
921 | fprintf (dump_file, "\nChange PATTERN of insn %d from:\n", | |
922 | INSN_UID (rinsn)); | |
923 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
924 | } | |
925 | ||
011ba384 | 926 | validate_change (rinsn, &PATTERN (rinsn), new_pat, false); |
9243c3d1 JZZ |
927 | |
928 | if (dump_file) | |
929 | { | |
930 | fprintf (dump_file, "\nto:\n"); | |
931 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
932 | } | |
933 | } | |
934 | ||
60bd33bc JZZ |
935 | static const insn_info * |
936 | get_forward_read_vl_insn (const insn_info *insn) | |
937 | { | |
938 | const bb_info *bb = insn->bb (); | |
939 | for (const insn_info *i = insn->next_nondebug_insn (); | |
940 | real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ()) | |
941 | { | |
942 | if (find_access (i->defs (), VL_REGNUM)) | |
943 | return nullptr; | |
944 | if (read_vl_insn_p (i->rtl ())) | |
945 | return i; | |
946 | } | |
947 | return nullptr; | |
948 | } | |
949 | ||
950 | static const insn_info * | |
951 | get_backward_fault_first_load_insn (const insn_info *insn) | |
952 | { | |
953 | const bb_info *bb = insn->bb (); | |
954 | for (const insn_info *i = insn->prev_nondebug_insn (); | |
955 | real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ()) | |
956 | { | |
957 | if (fault_first_load_p (i->rtl ())) | |
958 | return i; | |
959 | if (find_access (i->defs (), VL_REGNUM)) | |
960 | return nullptr; | |
961 | } | |
962 | return nullptr; | |
963 | } | |
964 | ||
9243c3d1 JZZ |
965 | static bool |
966 | change_insn (function_info *ssa, insn_change change, insn_info *insn, | |
967 | rtx new_pat) | |
968 | { | |
969 | rtx_insn *rinsn = insn->rtl (); | |
970 | auto attempt = ssa->new_change_attempt (); | |
971 | if (!restrict_movement (change)) | |
972 | return false; | |
973 | ||
974 | if (dump_file) | |
975 | { | |
976 | fprintf (dump_file, "\nChange PATTERN of insn %d from:\n", | |
977 | INSN_UID (rinsn)); | |
978 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
9243c3d1 JZZ |
979 | } |
980 | ||
981 | insn_change_watermark watermark; | |
982 | validate_change (rinsn, &PATTERN (rinsn), new_pat, true); | |
983 | ||
984 | /* These routines report failures themselves. */ | |
985 | if (!recog (attempt, change) || !change_is_worthwhile (change, false)) | |
986 | return false; | |
a1e42094 JZZ |
987 | |
988 | /* Fix bug: | |
989 | (insn 12 34 13 2 (set (reg:VNx8DI 120 v24 [orig:134 _1 ] [134]) | |
990 | (if_then_else:VNx8DI (unspec:VNx8BI [ | |
991 | (const_vector:VNx8BI repeat [ | |
992 | (const_int 1 [0x1]) | |
993 | ]) | |
994 | (const_int 0 [0]) | |
995 | (const_int 2 [0x2]) repeated x2 | |
996 | (const_int 0 [0]) | |
997 | (reg:SI 66 vl) | |
998 | (reg:SI 67 vtype) | |
999 | ] UNSPEC_VPREDICATE) | |
1000 | (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137]) | |
1001 | (sign_extend:VNx8DI (vec_duplicate:VNx8SI (reg:SI 15 a5 | |
1002 | [140])))) (unspec:VNx8DI [ (const_int 0 [0]) ] UNSPEC_VUNDEF))) "rvv.c":8:12 | |
1003 | 2784 {pred_single_widen_addsvnx8di_scalar} (expr_list:REG_EQUIV | |
1004 | (mem/c:VNx8DI (reg:DI 10 a0 [142]) [1 <retval>+0 S[64, 64] A128]) | |
1005 | (expr_list:REG_EQUAL (if_then_else:VNx8DI (unspec:VNx8BI [ | |
1006 | (const_vector:VNx8BI repeat [ | |
1007 | (const_int 1 [0x1]) | |
1008 | ]) | |
1009 | (reg/v:DI 13 a3 [orig:139 vl ] [139]) | |
1010 | (const_int 2 [0x2]) repeated x2 | |
1011 | (const_int 0 [0]) | |
1012 | (reg:SI 66 vl) | |
1013 | (reg:SI 67 vtype) | |
1014 | ] UNSPEC_VPREDICATE) | |
1015 | (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137]) | |
1016 | (const_vector:VNx8DI repeat [ | |
1017 | (const_int 2730 [0xaaa]) | |
1018 | ])) | |
1019 | (unspec:VNx8DI [ | |
1020 | (const_int 0 [0]) | |
1021 | ] UNSPEC_VUNDEF)) | |
1022 | (nil)))) | |
1023 | Here we want to remove use "a3". However, the REG_EQUAL/REG_EQUIV note use | |
1024 | "a3" which made us fail in change_insn. We reference to the | |
1025 | 'aarch64-cc-fusion.cc' and add this method. */ | |
1026 | remove_reg_equal_equiv_notes (rinsn); | |
9243c3d1 JZZ |
1027 | confirm_change_group (); |
1028 | ssa->change_insn (change); | |
1029 | ||
1030 | if (dump_file) | |
1031 | { | |
1032 | fprintf (dump_file, "\nto:\n"); | |
1033 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
9243c3d1 JZZ |
1034 | } |
1035 | return true; | |
1036 | } | |
1037 | ||
aef20243 JZZ |
1038 | static void |
1039 | change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info) | |
1040 | { | |
1041 | rtx_insn *rinsn; | |
1042 | if (vector_config_insn_p (insn->rtl ())) | |
1043 | { | |
1044 | rinsn = insn->rtl (); | |
1045 | gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet"); | |
1046 | } | |
1047 | else | |
1048 | { | |
1049 | gcc_assert (has_vtype_op (insn->rtl ())); | |
1050 | rinsn = PREV_INSN (insn->rtl ()); | |
1051 | gcc_assert (vector_config_insn_p (rinsn)); | |
1052 | } | |
1053 | rtx new_pat = gen_vsetvl_pat (rinsn, info); | |
1054 | change_insn (rinsn, new_pat); | |
1055 | } | |
1056 | ||
d51f2456 JZ |
1057 | static void |
1058 | local_eliminate_vsetvl_insn (const vector_insn_info &dem) | |
1059 | { | |
1060 | const insn_info *insn = dem.get_insn (); | |
1061 | if (!insn || insn->is_artificial ()) | |
1062 | return; | |
1063 | rtx_insn *rinsn = insn->rtl (); | |
1064 | const bb_info *bb = insn->bb (); | |
1065 | if (vsetvl_insn_p (rinsn)) | |
1066 | { | |
1067 | rtx vl = get_vl (rinsn); | |
1068 | for (insn_info *i = insn->next_nondebug_insn (); | |
1069 | real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ()) | |
1070 | { | |
1071 | if (i->is_call () || i->is_asm () | |
1072 | || find_access (i->defs (), VL_REGNUM) | |
1073 | || find_access (i->defs (), VTYPE_REGNUM)) | |
1074 | return; | |
1075 | ||
1076 | if (has_vtype_op (i->rtl ())) | |
1077 | { | |
1078 | if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ()))) | |
1079 | return; | |
1080 | rtx avl = get_avl (i->rtl ()); | |
1081 | if (avl != vl) | |
1082 | return; | |
1083 | set_info *def = find_access (i->uses (), REGNO (avl))->def (); | |
1084 | if (def->insn () != insn) | |
1085 | return; | |
1086 | ||
1087 | vector_insn_info new_info; | |
1088 | new_info.parse_insn (i); | |
1089 | if (!new_info.skip_avl_compatible_p (dem)) | |
1090 | return; | |
1091 | ||
1092 | new_info.set_avl_info (dem.get_avl_info ()); | |
1093 | new_info = dem.merge (new_info, LOCAL_MERGE); | |
1094 | change_vsetvl_insn (insn, new_info); | |
1095 | eliminate_insn (PREV_INSN (i->rtl ())); | |
1096 | return; | |
1097 | } | |
1098 | } | |
1099 | } | |
1100 | } | |
1101 | ||
4f673c5e | 1102 | static bool |
6b6b9c68 | 1103 | source_equal_p (insn_info *insn1, insn_info *insn2) |
4f673c5e | 1104 | { |
6b6b9c68 JZZ |
1105 | if (!insn1 || !insn2) |
1106 | return false; | |
1107 | rtx_insn *rinsn1 = insn1->rtl (); | |
1108 | rtx_insn *rinsn2 = insn2->rtl (); | |
4f673c5e JZZ |
1109 | if (!rinsn1 || !rinsn2) |
1110 | return false; | |
1111 | rtx note1 = find_reg_equal_equiv_note (rinsn1); | |
1112 | rtx note2 = find_reg_equal_equiv_note (rinsn2); | |
1113 | rtx single_set1 = single_set (rinsn1); | |
1114 | rtx single_set2 = single_set (rinsn2); | |
60bd33bc JZZ |
1115 | if (read_vl_insn_p (rinsn1) && read_vl_insn_p (rinsn2)) |
1116 | { | |
1117 | const insn_info *load1 = get_backward_fault_first_load_insn (insn1); | |
1118 | const insn_info *load2 = get_backward_fault_first_load_insn (insn2); | |
1119 | return load1 && load2 && load1 == load2; | |
1120 | } | |
4f673c5e JZZ |
1121 | |
1122 | if (note1 && note2 && rtx_equal_p (note1, note2)) | |
1123 | return true; | |
6b6b9c68 JZZ |
1124 | |
1125 | /* Since vsetvl instruction is not single SET. | |
1126 | We handle this case specially here. */ | |
1127 | if (vsetvl_insn_p (insn1->rtl ()) && vsetvl_insn_p (insn2->rtl ())) | |
1128 | { | |
1129 | /* For example: | |
1130 | vsetvl1 a6,a5,e32m1 | |
1131 | RVV 1 (use a6 as AVL) | |
1132 | vsetvl2 a5,a5,e8mf4 | |
1133 | RVV 2 (use a5 as AVL) | |
1134 | We consider AVL of RVV 1 and RVV 2 are same so that we can | |
1135 | gain more optimization opportunities. | |
1136 | ||
1137 | Note: insn1_info.compatible_avl_p (insn2_info) | |
1138 | will make sure there is no instruction between vsetvl1 and vsetvl2 | |
1139 | modify a5 since their def will be different if there is instruction | |
1140 | modify a5 and compatible_avl_p will return false. */ | |
1141 | vector_insn_info insn1_info, insn2_info; | |
1142 | insn1_info.parse_insn (insn1); | |
1143 | insn2_info.parse_insn (insn2); | |
1144 | if (insn1_info.same_vlmax_p (insn2_info) | |
1145 | && insn1_info.compatible_avl_p (insn2_info)) | |
1146 | return true; | |
1147 | } | |
1148 | ||
1149 | /* We only handle AVL is set by instructions with no side effects. */ | |
1150 | if (!single_set1 || !single_set2) | |
1151 | return false; | |
1152 | if (!rtx_equal_p (SET_SRC (single_set1), SET_SRC (single_set2))) | |
1153 | return false; | |
1154 | gcc_assert (insn1->uses ().size () == insn2->uses ().size ()); | |
1155 | for (size_t i = 0; i < insn1->uses ().size (); i++) | |
1156 | if (insn1->uses ()[i] != insn2->uses ()[i]) | |
1157 | return false; | |
1158 | return true; | |
4f673c5e JZZ |
1159 | } |
1160 | ||
1161 | /* Helper function to get single same real RTL source. | |
1162 | return NULL if it is not a single real RTL source. */ | |
6b6b9c68 | 1163 | static insn_info * |
4f673c5e JZZ |
1164 | extract_single_source (set_info *set) |
1165 | { | |
1166 | if (!set) | |
1167 | return nullptr; | |
1168 | if (set->insn ()->is_real ()) | |
6b6b9c68 | 1169 | return set->insn (); |
4f673c5e JZZ |
1170 | if (!set->insn ()->is_phi ()) |
1171 | return nullptr; | |
6b6b9c68 | 1172 | hash_set<set_info *> sets = get_all_sets (set, true, false, true); |
4f673c5e | 1173 | |
6b6b9c68 | 1174 | insn_info *first_insn = (*sets.begin ())->insn (); |
4f673c5e JZZ |
1175 | if (first_insn->is_artificial ()) |
1176 | return nullptr; | |
6b6b9c68 | 1177 | for (const set_info *set : sets) |
4f673c5e JZZ |
1178 | { |
1179 | /* If there is a head or end insn, we conservative return | |
1180 | NULL so that VSETVL PASS will insert vsetvl directly. */ | |
6b6b9c68 | 1181 | if (set->insn ()->is_artificial ()) |
4f673c5e | 1182 | return nullptr; |
6b6b9c68 | 1183 | if (!source_equal_p (set->insn (), first_insn)) |
4f673c5e JZZ |
1184 | return nullptr; |
1185 | } | |
1186 | ||
6b6b9c68 | 1187 | return first_insn; |
4f673c5e JZZ |
1188 | } |
1189 | ||
ec99ffab JZZ |
1190 | static unsigned |
1191 | calculate_sew (vlmul_type vlmul, unsigned int ratio) | |
1192 | { | |
1193 | for (const unsigned sew : ALL_SEW) | |
1194 | if (calculate_ratio (sew, vlmul) == ratio) | |
1195 | return sew; | |
1196 | return 0; | |
1197 | } | |
1198 | ||
1199 | static vlmul_type | |
1200 | calculate_vlmul (unsigned int sew, unsigned int ratio) | |
1201 | { | |
1202 | for (const vlmul_type vlmul : ALL_LMUL) | |
1203 | if (calculate_ratio (sew, vlmul) == ratio) | |
1204 | return vlmul; | |
1205 | return LMUL_RESERVED; | |
1206 | } | |
1207 | ||
1208 | static bool | |
1209 | incompatible_avl_p (const vector_insn_info &info1, | |
1210 | const vector_insn_info &info2) | |
1211 | { | |
1212 | return !info1.compatible_avl_p (info2) && !info2.compatible_avl_p (info1); | |
1213 | } | |
1214 | ||
1215 | static bool | |
1216 | different_sew_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1217 | { | |
1218 | return info1.get_sew () != info2.get_sew (); | |
1219 | } | |
1220 | ||
1221 | static bool | |
1222 | different_lmul_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1223 | { | |
1224 | return info1.get_vlmul () != info2.get_vlmul (); | |
1225 | } | |
1226 | ||
1227 | static bool | |
1228 | different_ratio_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1229 | { | |
1230 | return info1.get_ratio () != info2.get_ratio (); | |
1231 | } | |
1232 | ||
1233 | static bool | |
1234 | different_tail_policy_p (const vector_insn_info &info1, | |
1235 | const vector_insn_info &info2) | |
1236 | { | |
1237 | return info1.get_ta () != info2.get_ta (); | |
1238 | } | |
1239 | ||
1240 | static bool | |
1241 | different_mask_policy_p (const vector_insn_info &info1, | |
1242 | const vector_insn_info &info2) | |
1243 | { | |
1244 | return info1.get_ma () != info2.get_ma (); | |
1245 | } | |
1246 | ||
1247 | static bool | |
1248 | possible_zero_avl_p (const vector_insn_info &info1, | |
1249 | const vector_insn_info &info2) | |
1250 | { | |
1251 | return !info1.has_non_zero_avl () || !info2.has_non_zero_avl (); | |
1252 | } | |
1253 | ||
ec99ffab JZZ |
1254 | static bool |
1255 | second_ratio_invalid_for_first_sew_p (const vector_insn_info &info1, | |
1256 | const vector_insn_info &info2) | |
1257 | { | |
1258 | return calculate_vlmul (info1.get_sew (), info2.get_ratio ()) | |
1259 | == LMUL_RESERVED; | |
1260 | } | |
1261 | ||
1262 | static bool | |
1263 | second_ratio_invalid_for_first_lmul_p (const vector_insn_info &info1, | |
1264 | const vector_insn_info &info2) | |
1265 | { | |
1266 | return calculate_sew (info1.get_vlmul (), info2.get_ratio ()) == 0; | |
1267 | } | |
1268 | ||
1269 | static bool | |
1270 | second_sew_less_than_first_sew_p (const vector_insn_info &info1, | |
1271 | const vector_insn_info &info2) | |
1272 | { | |
1273 | return info2.get_sew () < info1.get_sew (); | |
1274 | } | |
1275 | ||
1276 | static bool | |
1277 | first_sew_less_than_second_sew_p (const vector_insn_info &info1, | |
1278 | const vector_insn_info &info2) | |
1279 | { | |
1280 | return info1.get_sew () < info2.get_sew (); | |
1281 | } | |
1282 | ||
1283 | /* return 0 if LMUL1 == LMUL2. | |
1284 | return -1 if LMUL1 < LMUL2. | |
1285 | return 1 if LMUL1 > LMUL2. */ | |
1286 | static int | |
1287 | compare_lmul (vlmul_type vlmul1, vlmul_type vlmul2) | |
1288 | { | |
1289 | if (vlmul1 == vlmul2) | |
1290 | return 0; | |
1291 | ||
1292 | switch (vlmul1) | |
1293 | { | |
1294 | case LMUL_1: | |
1295 | if (vlmul2 == LMUL_2 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8) | |
1296 | return 1; | |
1297 | else | |
1298 | return -1; | |
1299 | case LMUL_2: | |
1300 | if (vlmul2 == LMUL_4 || vlmul2 == LMUL_8) | |
1301 | return 1; | |
1302 | else | |
1303 | return -1; | |
1304 | case LMUL_4: | |
1305 | if (vlmul2 == LMUL_8) | |
1306 | return 1; | |
1307 | else | |
1308 | return -1; | |
1309 | case LMUL_8: | |
1310 | return -1; | |
1311 | case LMUL_F2: | |
1312 | if (vlmul2 == LMUL_1 || vlmul2 == LMUL_2 || vlmul2 == LMUL_4 | |
1313 | || vlmul2 == LMUL_8) | |
1314 | return 1; | |
1315 | else | |
1316 | return -1; | |
1317 | case LMUL_F4: | |
1318 | if (vlmul2 == LMUL_F2 || vlmul2 == LMUL_1 || vlmul2 == LMUL_2 | |
1319 | || vlmul2 == LMUL_4 || vlmul2 == LMUL_8) | |
1320 | return 1; | |
1321 | else | |
1322 | return -1; | |
1323 | case LMUL_F8: | |
1324 | return 0; | |
1325 | default: | |
1326 | gcc_unreachable (); | |
1327 | } | |
1328 | } | |
1329 | ||
1330 | static bool | |
1331 | second_lmul_less_than_first_lmul_p (const vector_insn_info &info1, | |
1332 | const vector_insn_info &info2) | |
1333 | { | |
1334 | return compare_lmul (info2.get_vlmul (), info1.get_vlmul ()) == -1; | |
1335 | } | |
1336 | ||
ec99ffab JZZ |
1337 | static bool |
1338 | second_ratio_less_than_first_ratio_p (const vector_insn_info &info1, | |
1339 | const vector_insn_info &info2) | |
1340 | { | |
1341 | return info2.get_ratio () < info1.get_ratio (); | |
1342 | } | |
1343 | ||
1344 | static CONSTEXPR const demands_cond incompatible_conds[] = { | |
1345 | #define DEF_INCOMPATIBLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, \ | |
1346 | GE_SEW1, TAIL_POLICTY1, MASK_POLICY1, AVL2, \ | |
1347 | SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, \ | |
1348 | TAIL_POLICTY2, MASK_POLICY2, COND) \ | |
1349 | {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \ | |
1350 | MASK_POLICY1}, \ | |
1351 | {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \ | |
1352 | MASK_POLICY2}}, \ | |
1353 | COND}, | |
1354 | #include "riscv-vsetvl.def" | |
1355 | }; | |
1356 | ||
1357 | static unsigned | |
1358 | greatest_sew (const vector_insn_info &info1, const vector_insn_info &info2) | |
1359 | { | |
1360 | return std::max (info1.get_sew (), info2.get_sew ()); | |
1361 | } | |
1362 | ||
1363 | static unsigned | |
1364 | first_sew (const vector_insn_info &info1, const vector_insn_info &) | |
1365 | { | |
1366 | return info1.get_sew (); | |
1367 | } | |
1368 | ||
1369 | static unsigned | |
1370 | second_sew (const vector_insn_info &, const vector_insn_info &info2) | |
1371 | { | |
1372 | return info2.get_sew (); | |
1373 | } | |
1374 | ||
1375 | static vlmul_type | |
1376 | first_vlmul (const vector_insn_info &info1, const vector_insn_info &) | |
1377 | { | |
1378 | return info1.get_vlmul (); | |
1379 | } | |
1380 | ||
1381 | static vlmul_type | |
1382 | second_vlmul (const vector_insn_info &, const vector_insn_info &info2) | |
1383 | { | |
1384 | return info2.get_vlmul (); | |
1385 | } | |
1386 | ||
1387 | static unsigned | |
1388 | first_ratio (const vector_insn_info &info1, const vector_insn_info &) | |
1389 | { | |
1390 | return info1.get_ratio (); | |
1391 | } | |
1392 | ||
1393 | static unsigned | |
1394 | second_ratio (const vector_insn_info &, const vector_insn_info &info2) | |
1395 | { | |
1396 | return info2.get_ratio (); | |
1397 | } | |
1398 | ||
1399 | static vlmul_type | |
1400 | vlmul_for_first_sew_second_ratio (const vector_insn_info &info1, | |
1401 | const vector_insn_info &info2) | |
1402 | { | |
1403 | return calculate_vlmul (info1.get_sew (), info2.get_ratio ()); | |
1404 | } | |
1405 | ||
1406 | static unsigned | |
1407 | ratio_for_second_sew_first_vlmul (const vector_insn_info &info1, | |
1408 | const vector_insn_info &info2) | |
1409 | { | |
1410 | return calculate_ratio (info2.get_sew (), info1.get_vlmul ()); | |
1411 | } | |
1412 | ||
1413 | static CONSTEXPR const demands_fuse_rule fuse_rules[] = { | |
1414 | #define DEF_SEW_LMUL_FUSE_RULE(DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, \ | |
1415 | DEMAND_GE_SEW1, DEMAND_SEW2, DEMAND_LMUL2, \ | |
1416 | DEMAND_RATIO2, DEMAND_GE_SEW2, NEW_DEMAND_SEW, \ | |
1417 | NEW_DEMAND_LMUL, NEW_DEMAND_RATIO, \ | |
1418 | NEW_DEMAND_GE_SEW, NEW_SEW, NEW_VLMUL, \ | |
1419 | NEW_RATIO) \ | |
1420 | {{{DEMAND_ANY, DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, DEMAND_ANY, \ | |
1421 | DEMAND_GE_SEW1, DEMAND_ANY, DEMAND_ANY}, \ | |
1422 | {DEMAND_ANY, DEMAND_SEW2, DEMAND_LMUL2, DEMAND_RATIO2, DEMAND_ANY, \ | |
1423 | DEMAND_GE_SEW2, DEMAND_ANY, DEMAND_ANY}}, \ | |
1424 | NEW_DEMAND_SEW, \ | |
1425 | NEW_DEMAND_LMUL, \ | |
1426 | NEW_DEMAND_RATIO, \ | |
1427 | NEW_DEMAND_GE_SEW, \ | |
1428 | NEW_SEW, \ | |
1429 | NEW_VLMUL, \ | |
1430 | NEW_RATIO}, | |
1431 | #include "riscv-vsetvl.def" | |
1432 | }; | |
1433 | ||
1434 | static bool | |
1435 | always_unavailable (const vector_insn_info &, const vector_insn_info &) | |
1436 | { | |
1437 | return true; | |
1438 | } | |
1439 | ||
1440 | static bool | |
1441 | avl_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1442 | { | |
1443 | return !info2.compatible_avl_p (info1.get_avl_info ()); | |
1444 | } | |
1445 | ||
1446 | static bool | |
1447 | sew_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1448 | { | |
1449 | if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO)) | |
1450 | { | |
1451 | if (info2.demand_p (DEMAND_GE_SEW)) | |
1452 | return info1.get_sew () < info2.get_sew (); | |
1453 | return info1.get_sew () != info2.get_sew (); | |
1454 | } | |
1455 | return true; | |
1456 | } | |
1457 | ||
1458 | static bool | |
1459 | lmul_unavailable_p (const vector_insn_info &info1, | |
1460 | const vector_insn_info &info2) | |
1461 | { | |
1462 | if (info1.get_vlmul () == info2.get_vlmul () && !info2.demand_p (DEMAND_SEW) | |
1463 | && !info2.demand_p (DEMAND_RATIO)) | |
1464 | return false; | |
1465 | return true; | |
1466 | } | |
1467 | ||
1468 | static bool | |
1469 | ge_sew_unavailable_p (const vector_insn_info &info1, | |
1470 | const vector_insn_info &info2) | |
1471 | { | |
1472 | if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO) | |
1473 | && info2.demand_p (DEMAND_GE_SEW)) | |
1474 | return info1.get_sew () < info2.get_sew (); | |
1475 | return true; | |
1476 | } | |
1477 | ||
1478 | static bool | |
1479 | ge_sew_lmul_unavailable_p (const vector_insn_info &info1, | |
1480 | const vector_insn_info &info2) | |
1481 | { | |
1482 | if (!info2.demand_p (DEMAND_RATIO) && info2.demand_p (DEMAND_GE_SEW)) | |
1483 | return info1.get_sew () < info2.get_sew (); | |
1484 | return true; | |
1485 | } | |
1486 | ||
1487 | static bool | |
1488 | ge_sew_ratio_unavailable_p (const vector_insn_info &info1, | |
1489 | const vector_insn_info &info2) | |
1490 | { | |
1491 | if (!info2.demand_p (DEMAND_LMUL) && info2.demand_p (DEMAND_GE_SEW)) | |
1492 | return info1.get_sew () < info2.get_sew (); | |
1493 | return true; | |
1494 | } | |
1495 | ||
1496 | static CONSTEXPR const demands_cond unavailable_conds[] = { | |
1497 | #define DEF_UNAVAILABLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, \ | |
1498 | TAIL_POLICTY1, MASK_POLICY1, AVL2, SEW2, LMUL2, \ | |
1499 | RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \ | |
1500 | MASK_POLICY2, COND) \ | |
1501 | {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \ | |
1502 | MASK_POLICY1}, \ | |
1503 | {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \ | |
1504 | MASK_POLICY2}}, \ | |
1505 | COND}, | |
1506 | #include "riscv-vsetvl.def" | |
1507 | }; | |
1508 | ||
1509 | static bool | |
1510 | same_sew_lmul_demand_p (const bool *dems1, const bool *dems2) | |
1511 | { | |
1512 | return dems1[DEMAND_SEW] == dems2[DEMAND_SEW] | |
1513 | && dems1[DEMAND_LMUL] == dems2[DEMAND_LMUL] | |
1514 | && dems1[DEMAND_RATIO] == dems2[DEMAND_RATIO] && !dems1[DEMAND_GE_SEW] | |
1515 | && !dems2[DEMAND_GE_SEW]; | |
1516 | } | |
1517 | ||
1518 | static bool | |
1519 | propagate_avl_across_demands_p (const vector_insn_info &info1, | |
1520 | const vector_insn_info &info2) | |
1521 | { | |
1522 | if (info2.demand_p (DEMAND_AVL)) | |
1523 | { | |
1524 | if (info2.demand_p (DEMAND_NONZERO_AVL)) | |
1525 | return info1.demand_p (DEMAND_AVL) | |
1526 | && !info1.demand_p (DEMAND_NONZERO_AVL) && info1.has_avl_reg (); | |
1527 | } | |
1528 | else | |
1529 | return info1.demand_p (DEMAND_AVL) && info1.has_avl_reg (); | |
1530 | return false; | |
1531 | } | |
1532 | ||
1533 | static bool | |
a481eed8 | 1534 | reg_available_p (const insn_info *insn, const vector_insn_info &info) |
ec99ffab | 1535 | { |
a481eed8 | 1536 | if (info.has_avl_reg () && !info.get_avl_source ()) |
44c918b5 | 1537 | return false; |
a481eed8 JZZ |
1538 | insn_info *def_insn = info.get_avl_source ()->insn (); |
1539 | if (def_insn->bb () == insn->bb ()) | |
1540 | return before_p (def_insn, insn); | |
ec99ffab | 1541 | else |
a481eed8 JZZ |
1542 | return dominated_by_p (CDI_DOMINATORS, insn->bb ()->cfg_bb (), |
1543 | def_insn->bb ()->cfg_bb ()); | |
ec99ffab JZZ |
1544 | } |
1545 | ||
60bd33bc JZZ |
1546 | /* Return true if the instruction support relaxed compatible check. */ |
1547 | static bool | |
1548 | support_relaxed_compatible_p (const vector_insn_info &info1, | |
1549 | const vector_insn_info &info2) | |
1550 | { | |
1551 | if (fault_first_load_p (info1.get_insn ()->rtl ()) | |
1552 | && info2.demand_p (DEMAND_AVL) && info2.has_avl_reg () | |
1553 | && info2.get_avl_source () && info2.get_avl_source ()->insn ()->is_phi ()) | |
1554 | { | |
1555 | hash_set<set_info *> sets | |
1556 | = get_all_sets (info2.get_avl_source (), true, false, false); | |
1557 | for (set_info *set : sets) | |
1558 | { | |
1559 | if (read_vl_insn_p (set->insn ()->rtl ())) | |
1560 | { | |
1561 | const insn_info *insn | |
1562 | = get_backward_fault_first_load_insn (set->insn ()); | |
1563 | if (insn == info1.get_insn ()) | |
1564 | return info2.compatible_vtype_p (info1); | |
1565 | } | |
1566 | } | |
1567 | } | |
1568 | return false; | |
1569 | } | |
1570 | ||
1571 | /* Return true if the block is worthwhile backward propagation. */ | |
1572 | static bool | |
1573 | backward_propagate_worthwhile_p (const basic_block cfg_bb, | |
1574 | const vector_block_info block_info) | |
1575 | { | |
1576 | if (loop_basic_block_p (cfg_bb)) | |
1577 | { | |
1578 | if (block_info.reaching_out.valid_or_dirty_p ()) | |
1579 | { | |
1580 | if (block_info.local_dem.compatible_p (block_info.reaching_out)) | |
1581 | { | |
1582 | /* Case 1 (Can backward propagate): | |
1583 | .... | |
1584 | bb0: | |
1585 | ... | |
1586 | for (int i = 0; i < n; i++) | |
1587 | { | |
1588 | vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); | |
1589 | __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); | |
1590 | } | |
1591 | The local_dem is compatible with reaching_out. Such case is | |
1592 | worthwhile backward propagation. */ | |
1593 | return true; | |
1594 | } | |
1595 | else | |
1596 | { | |
1597 | if (support_relaxed_compatible_p (block_info.reaching_out, | |
1598 | block_info.local_dem)) | |
1599 | return true; | |
1600 | /* Case 2 (Don't backward propagate): | |
1601 | .... | |
1602 | bb0: | |
1603 | ... | |
1604 | for (int i = 0; i < n; i++) | |
1605 | { | |
1606 | vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); | |
1607 | __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); | |
1608 | vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8); | |
1609 | __riscv_vse16_v_i16mf2 (out + i + 6, v, 8); | |
1610 | } | |
1611 | The local_dem is incompatible with reaching_out. | |
1612 | It makes no sense to backward propagate the local_dem since we | |
1613 | can't avoid VSETVL inside the loop. */ | |
1614 | return false; | |
1615 | } | |
1616 | } | |
1617 | else | |
1618 | { | |
1619 | gcc_assert (block_info.reaching_out.unknown_p ()); | |
1620 | /* Case 3 (Don't backward propagate): | |
1621 | .... | |
1622 | bb0: | |
1623 | ... | |
1624 | for (int i = 0; i < n; i++) | |
1625 | { | |
1626 | vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); | |
1627 | __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); | |
1628 | fn3 (); | |
1629 | } | |
1630 | The local_dem is VALID, but the reaching_out is UNKNOWN. | |
1631 | It makes no sense to backward propagate the local_dem since we | |
1632 | can't avoid VSETVL inside the loop. */ | |
1633 | return false; | |
1634 | } | |
1635 | } | |
1636 | ||
1637 | return true; | |
1638 | } | |
1639 | ||
a2d12abe JZZ |
1640 | /* Count the number of REGNO in RINSN. */ |
1641 | static int | |
1642 | count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) | |
1643 | { | |
1644 | int count = 0; | |
1645 | extract_insn (rinsn); | |
1646 | for (int i = 0; i < recog_data.n_operands; i++) | |
1647 | if (refers_to_regno_p (regno, recog_data.operand[i])) | |
1648 | count++; | |
1649 | return count; | |
1650 | } | |
1651 | ||
12b23c71 JZZ |
1652 | avl_info::avl_info (const avl_info &other) |
1653 | { | |
1654 | m_value = other.get_value (); | |
1655 | m_source = other.get_source (); | |
1656 | } | |
1657 | ||
9243c3d1 JZZ |
1658 | avl_info::avl_info (rtx value_in, set_info *source_in) |
1659 | : m_value (value_in), m_source (source_in) | |
1660 | {} | |
1661 | ||
4f673c5e JZZ |
1662 | bool |
1663 | avl_info::single_source_equal_p (const avl_info &other) const | |
1664 | { | |
1665 | set_info *set1 = m_source; | |
1666 | set_info *set2 = other.get_source (); | |
6b6b9c68 JZZ |
1667 | insn_info *insn1 = extract_single_source (set1); |
1668 | insn_info *insn2 = extract_single_source (set2); | |
1669 | if (!insn1 || !insn2) | |
1670 | return false; | |
1671 | return source_equal_p (insn1, insn2); | |
1672 | } | |
1673 | ||
1674 | bool | |
1675 | avl_info::multiple_source_equal_p (const avl_info &other) const | |
1676 | { | |
1677 | /* TODO: We don't do too much optimization here since it's | |
1678 | too complicated in case of analyzing the PHI node. | |
1679 | ||
1680 | For example: | |
1681 | void f (void * restrict in, void * restrict out, int n, int m, int cond) | |
1682 | { | |
1683 | size_t vl; | |
1684 | switch (cond) | |
1685 | { | |
1686 | case 1: | |
1687 | vl = 100; | |
1688 | break; | |
1689 | case 2: | |
1690 | vl = *(size_t*)(in + 100); | |
1691 | break; | |
1692 | case 3: | |
1693 | { | |
1694 | size_t new_vl = *(size_t*)(in + 500); | |
1695 | size_t new_vl2 = *(size_t*)(in + 600); | |
1696 | vl = new_vl + new_vl2 + 777; | |
1697 | break; | |
1698 | } | |
1699 | default: | |
1700 | vl = 4000; | |
1701 | break; | |
1702 | } | |
1703 | for (size_t i = 0; i < n; i++) | |
1704 | { | |
1705 | vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl); | |
1706 | __riscv_vse8_v_i8mf8 (out + i, v, vl); | |
1707 | ||
1708 | vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl); | |
1709 | __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl); | |
1710 | } | |
1711 | ||
1712 | size_t vl2; | |
1713 | switch (cond) | |
1714 | { | |
1715 | case 1: | |
1716 | vl2 = 100; | |
1717 | break; | |
1718 | case 2: | |
1719 | vl2 = *(size_t*)(in + 100); | |
1720 | break; | |
1721 | case 3: | |
1722 | { | |
1723 | size_t new_vl = *(size_t*)(in + 500); | |
1724 | size_t new_vl2 = *(size_t*)(in + 600); | |
1725 | vl2 = new_vl + new_vl2 + 777; | |
1726 | break; | |
1727 | } | |
1728 | default: | |
1729 | vl2 = 4000; | |
1730 | break; | |
1731 | } | |
1732 | for (size_t i = 0; i < m; i++) | |
1733 | { | |
1734 | vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl2); | |
1735 | __riscv_vse8_v_i8mf8 (out + i + 300, v, vl2); | |
1736 | vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 200, vl2); | |
1737 | __riscv_vse8_v_i8mf8 (out + i + 200, v2, vl2); | |
1738 | } | |
1739 | } | |
1740 | Such case may not be necessary to optimize since the codes of defining | |
1741 | vl and vl2 are redundant. */ | |
1742 | return m_source == other.get_source (); | |
4f673c5e JZZ |
1743 | } |
1744 | ||
9243c3d1 JZZ |
1745 | avl_info & |
1746 | avl_info::operator= (const avl_info &other) | |
1747 | { | |
1748 | m_value = other.get_value (); | |
1749 | m_source = other.get_source (); | |
1750 | return *this; | |
1751 | } | |
1752 | ||
1753 | bool | |
1754 | avl_info::operator== (const avl_info &other) const | |
1755 | { | |
1756 | if (!m_value) | |
1757 | return !other.get_value (); | |
1758 | if (!other.get_value ()) | |
1759 | return false; | |
1760 | ||
9243c3d1 JZZ |
1761 | if (GET_CODE (m_value) != GET_CODE (other.get_value ())) |
1762 | return false; | |
1763 | ||
1764 | /* Handle CONST_INT AVL. */ | |
1765 | if (CONST_INT_P (m_value)) | |
1766 | return INTVAL (m_value) == INTVAL (other.get_value ()); | |
1767 | ||
1768 | /* Handle VLMAX AVL. */ | |
1769 | if (vlmax_avl_p (m_value)) | |
1770 | return vlmax_avl_p (other.get_value ()); | |
1771 | ||
4f673c5e JZZ |
1772 | /* If any source is undef value, we think they are not equal. */ |
1773 | if (!m_source || !other.get_source ()) | |
1774 | return false; | |
1775 | ||
1776 | /* If both sources are single source (defined by a single real RTL) | |
1777 | and their definitions are same. */ | |
1778 | if (single_source_equal_p (other)) | |
1779 | return true; | |
1780 | ||
6b6b9c68 | 1781 | return multiple_source_equal_p (other); |
9243c3d1 JZZ |
1782 | } |
1783 | ||
1784 | bool | |
1785 | avl_info::operator!= (const avl_info &other) const | |
1786 | { | |
1787 | return !(*this == other); | |
1788 | } | |
1789 | ||
ec99ffab JZZ |
1790 | bool |
1791 | avl_info::has_non_zero_avl () const | |
1792 | { | |
1793 | if (has_avl_imm ()) | |
1794 | return INTVAL (get_value ()) > 0; | |
1795 | if (has_avl_reg ()) | |
1796 | return vlmax_avl_p (get_value ()); | |
1797 | return false; | |
1798 | } | |
1799 | ||
9243c3d1 JZZ |
1800 | /* Initialize VL/VTYPE information. */ |
1801 | vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in, | |
1802 | enum vlmul_type vlmul_in, uint8_t ratio_in, | |
1803 | bool ta_in, bool ma_in) | |
1804 | : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in), | |
1805 | m_ta (ta_in), m_ma (ma_in) | |
1806 | { | |
1807 | gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW"); | |
1808 | } | |
1809 | ||
1810 | bool | |
1811 | vl_vtype_info::operator== (const vl_vtype_info &other) const | |
1812 | { | |
6b6b9c68 | 1813 | return same_avl_p (other) && m_sew == other.get_sew () |
9243c3d1 JZZ |
1814 | && m_vlmul == other.get_vlmul () && m_ta == other.get_ta () |
1815 | && m_ma == other.get_ma () && m_ratio == other.get_ratio (); | |
1816 | } | |
1817 | ||
1818 | bool | |
1819 | vl_vtype_info::operator!= (const vl_vtype_info &other) const | |
1820 | { | |
1821 | return !(*this == other); | |
1822 | } | |
1823 | ||
9243c3d1 JZZ |
1824 | bool |
1825 | vl_vtype_info::same_avl_p (const vl_vtype_info &other) const | |
1826 | { | |
6b6b9c68 JZZ |
1827 | /* We need to compare both RTL and SET. If both AVL are CONST_INT. |
1828 | For example, const_int 3 and const_int 4, we need to compare | |
1829 | RTL. If both AVL are REG and their REGNO are same, we need to | |
1830 | compare SET. */ | |
1831 | return get_avl () == other.get_avl () | |
1832 | && get_avl_source () == other.get_avl_source (); | |
9243c3d1 JZZ |
1833 | } |
1834 | ||
1835 | bool | |
1836 | vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const | |
1837 | { | |
1838 | return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul () | |
1839 | && get_ta () == other.get_ta () && get_ma () == other.get_ma (); | |
1840 | } | |
1841 | ||
1842 | bool | |
1843 | vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const | |
1844 | { | |
1845 | return get_ratio () == other.get_ratio (); | |
1846 | } | |
1847 | ||
1848 | /* Compare the compatibility between Dem1 and Dem2. | |
1849 | If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2 | |
1850 | meaning Dem1 is easier be compatible with others than Dem2 | |
1851 | or Dem2 is stricter than Dem1. | |
1852 | For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */ | |
9243c3d1 JZZ |
1853 | bool |
1854 | vector_insn_info::operator>= (const vector_insn_info &other) const | |
1855 | { | |
60bd33bc JZZ |
1856 | if (support_relaxed_compatible_p (*this, other)) |
1857 | { | |
1858 | unsigned array_size = sizeof (unavailable_conds) / sizeof (demands_cond); | |
1859 | /* Bypass AVL unavailable cases. */ | |
1860 | for (unsigned i = 2; i < array_size; i++) | |
1861 | if (unavailable_conds[i].pair.match_cond_p (this->get_demands (), | |
1862 | other.get_demands ()) | |
1863 | && unavailable_conds[i].incompatible_p (*this, other)) | |
1864 | return false; | |
1865 | return true; | |
1866 | } | |
1867 | ||
1868 | if (!other.compatible_p (static_cast<const vl_vtype_info &> (*this))) | |
1869 | return false; | |
1870 | if (!this->compatible_p (static_cast<const vl_vtype_info &> (other))) | |
9243c3d1 JZZ |
1871 | return true; |
1872 | ||
1873 | if (*this == other) | |
1874 | return true; | |
1875 | ||
ec99ffab JZZ |
1876 | for (const auto &cond : unavailable_conds) |
1877 | if (cond.pair.match_cond_p (this->get_demands (), other.get_demands ()) | |
1878 | && cond.incompatible_p (*this, other)) | |
1879 | return false; | |
9243c3d1 JZZ |
1880 | |
1881 | return true; | |
1882 | } | |
1883 | ||
1884 | bool | |
1885 | vector_insn_info::operator== (const vector_insn_info &other) const | |
1886 | { | |
1887 | gcc_assert (!uninit_p () && !other.uninit_p () | |
1888 | && "Uninitialization should not happen"); | |
1889 | ||
1890 | /* Empty is only equal to another Empty. */ | |
1891 | if (empty_p ()) | |
1892 | return other.empty_p (); | |
1893 | if (other.empty_p ()) | |
1894 | return empty_p (); | |
1895 | ||
1896 | /* Unknown is only equal to another Unknown. */ | |
1897 | if (unknown_p ()) | |
1898 | return other.unknown_p (); | |
1899 | if (other.unknown_p ()) | |
1900 | return unknown_p (); | |
1901 | ||
1902 | for (size_t i = 0; i < NUM_DEMAND; i++) | |
1903 | if (m_demands[i] != other.demand_p ((enum demand_type) i)) | |
1904 | return false; | |
1905 | ||
7ae4d1df JZZ |
1906 | if (vector_config_insn_p (m_insn->rtl ()) |
1907 | || vector_config_insn_p (other.get_insn ()->rtl ())) | |
1908 | if (m_insn != other.get_insn ()) | |
1909 | return false; | |
9243c3d1 JZZ |
1910 | |
1911 | if (!same_avl_p (other)) | |
1912 | return false; | |
1913 | ||
1914 | /* If the full VTYPE is valid, check that it is the same. */ | |
1915 | return same_vtype_p (other); | |
1916 | } | |
1917 | ||
1918 | void | |
1919 | vector_insn_info::parse_insn (rtx_insn *rinsn) | |
1920 | { | |
1921 | *this = vector_insn_info (); | |
1922 | if (!NONDEBUG_INSN_P (rinsn)) | |
1923 | return; | |
1924 | if (!has_vtype_op (rinsn)) | |
1925 | return; | |
1926 | m_state = VALID; | |
1927 | extract_insn_cached (rinsn); | |
1928 | const rtx avl = recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
1929 | m_avl = avl_info (avl, nullptr); | |
1930 | m_sew = ::get_sew (rinsn); | |
1931 | m_vlmul = ::get_vlmul (rinsn); | |
1932 | m_ta = tail_agnostic_p (rinsn); | |
1933 | m_ma = mask_agnostic_p (rinsn); | |
1934 | } | |
1935 | ||
1936 | void | |
1937 | vector_insn_info::parse_insn (insn_info *insn) | |
1938 | { | |
1939 | *this = vector_insn_info (); | |
1940 | ||
1941 | /* Return if it is debug insn for the consistency with optimize == 0. */ | |
1942 | if (insn->is_debug_insn ()) | |
1943 | return; | |
1944 | ||
1945 | /* We set it as unknown since we don't what will happen in CALL or ASM. */ | |
1946 | if (insn->is_call () || insn->is_asm ()) | |
1947 | { | |
1948 | set_unknown (); | |
1949 | return; | |
1950 | } | |
1951 | ||
1952 | /* If this is something that updates VL/VTYPE that we don't know about, set | |
1953 | the state to unknown. */ | |
60bd33bc | 1954 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()) |
9243c3d1 JZZ |
1955 | && (find_access (insn->defs (), VL_REGNUM) |
1956 | || find_access (insn->defs (), VTYPE_REGNUM))) | |
1957 | { | |
1958 | set_unknown (); | |
1959 | return; | |
1960 | } | |
1961 | ||
1962 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())) | |
1963 | return; | |
1964 | ||
1965 | /* Warning: This function has to work on both the lowered (i.e. post | |
1966 | emit_local_forward_vsetvls) and pre-lowering forms. The main implication | |
1967 | of this is that it can't use the value of a SEW, VL, or Policy operand as | |
1968 | they might be stale after lowering. */ | |
1969 | vl_vtype_info::operator= (get_vl_vtype_info (insn)); | |
1970 | m_insn = insn; | |
1971 | m_state = VALID; | |
1972 | if (vector_config_insn_p (insn->rtl ())) | |
1973 | { | |
1974 | m_demands[DEMAND_AVL] = true; | |
1975 | m_demands[DEMAND_RATIO] = true; | |
1976 | return; | |
1977 | } | |
1978 | ||
1979 | if (has_vl_op (insn->rtl ())) | |
1980 | m_demands[DEMAND_AVL] = true; | |
1981 | ||
1982 | if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE) | |
1983 | m_demands[DEMAND_RATIO] = true; | |
1984 | else | |
1985 | { | |
1986 | /* TODO: By default, if it doesn't demand RATIO, we set it | |
1987 | demand SEW && LMUL both. Some instructions may demand SEW | |
1988 | only and ignore LMUL, will fix it later. */ | |
1989 | m_demands[DEMAND_SEW] = true; | |
ec99ffab JZZ |
1990 | if (!ignore_vlmul_insn_p (insn->rtl ())) |
1991 | m_demands[DEMAND_LMUL] = true; | |
9243c3d1 JZZ |
1992 | } |
1993 | ||
1994 | if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE) | |
1995 | m_demands[DEMAND_TAIL_POLICY] = true; | |
1996 | if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE) | |
1997 | m_demands[DEMAND_MASK_POLICY] = true; | |
6b6b9c68 JZZ |
1998 | |
1999 | if (vector_config_insn_p (insn->rtl ())) | |
2000 | return; | |
2001 | ||
ec99ffab JZZ |
2002 | if (scalar_move_insn_p (insn->rtl ())) |
2003 | { | |
2004 | if (m_avl.has_non_zero_avl ()) | |
2005 | m_demands[DEMAND_NONZERO_AVL] = true; | |
2006 | if (m_ta) | |
2007 | m_demands[DEMAND_GE_SEW] = true; | |
2008 | } | |
2009 | ||
2010 | if (!m_avl.has_avl_reg () || vlmax_avl_p (get_avl ()) || !m_avl.get_source ()) | |
2011 | return; | |
2012 | if (!m_avl.get_source ()->insn ()->is_real () | |
2013 | && !m_avl.get_source ()->insn ()->is_phi ()) | |
6b6b9c68 JZZ |
2014 | return; |
2015 | ||
2016 | insn_info *def_insn = extract_single_source (m_avl.get_source ()); | |
ec99ffab JZZ |
2017 | if (!def_insn || !vsetvl_insn_p (def_insn->rtl ())) |
2018 | return; | |
9243c3d1 | 2019 | |
ec99ffab JZZ |
2020 | vector_insn_info new_info; |
2021 | new_info.parse_insn (def_insn); | |
2022 | if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ())) | |
2023 | return; | |
2024 | /* TODO: Currently, we don't forward AVL for non-VLMAX vsetvl. */ | |
2025 | if (vlmax_avl_p (new_info.get_avl ())) | |
2026 | set_avl_info (avl_info (new_info.get_avl (), get_avl_source ())); | |
2027 | ||
2028 | if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ()) | |
2029 | m_demands[DEMAND_NONZERO_AVL] = true; | |
9243c3d1 JZZ |
2030 | } |
2031 | ||
2032 | bool | |
2033 | vector_insn_info::compatible_p (const vector_insn_info &other) const | |
2034 | { | |
2035 | gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p () | |
2036 | && "Can't compare invalid demanded infos"); | |
2037 | ||
ec99ffab | 2038 | for (const auto &cond : incompatible_conds) |
60bd33bc | 2039 | if (cond.dual_incompatible_p (*this, other)) |
ec99ffab | 2040 | return false; |
9243c3d1 JZZ |
2041 | return true; |
2042 | } | |
2043 | ||
d51f2456 JZ |
2044 | bool |
2045 | vector_insn_info::skip_avl_compatible_p (const vector_insn_info &other) const | |
2046 | { | |
2047 | gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p () | |
2048 | && "Can't compare invalid demanded infos"); | |
2049 | unsigned array_size = sizeof (incompatible_conds) / sizeof (demands_cond); | |
2050 | /* Bypass AVL incompatible cases. */ | |
2051 | for (unsigned i = 1; i < array_size; i++) | |
2052 | if (incompatible_conds[i].dual_incompatible_p (*this, other)) | |
2053 | return false; | |
2054 | return true; | |
2055 | } | |
2056 | ||
9243c3d1 JZZ |
2057 | bool |
2058 | vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const | |
2059 | { | |
2060 | gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); | |
2061 | gcc_assert (!unknown_p () && "Can't compare AVL in unknown state"); | |
2062 | if (!demand_p (DEMAND_AVL)) | |
2063 | return true; | |
ec99ffab JZZ |
2064 | if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ()) |
2065 | return true; | |
9243c3d1 JZZ |
2066 | return get_avl_info () == other.get_avl_info (); |
2067 | } | |
2068 | ||
4f673c5e JZZ |
2069 | bool |
2070 | vector_insn_info::compatible_avl_p (const avl_info &other) const | |
2071 | { | |
2072 | gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); | |
2073 | gcc_assert (!unknown_p () && "Can't compare AVL in unknown state"); | |
2074 | gcc_assert (demand_p (DEMAND_AVL) && "Can't compare AVL undemand state"); | |
ec99ffab JZZ |
2075 | if (!demand_p (DEMAND_AVL)) |
2076 | return true; | |
2077 | if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ()) | |
2078 | return true; | |
4f673c5e JZZ |
2079 | return get_avl_info () == other; |
2080 | } | |
2081 | ||
9243c3d1 JZZ |
2082 | bool |
2083 | vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const | |
2084 | { | |
2085 | gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); | |
2086 | gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state"); | |
ec99ffab JZZ |
2087 | if (demand_p (DEMAND_SEW)) |
2088 | { | |
2089 | if (!demand_p (DEMAND_GE_SEW) && m_sew != other.get_sew ()) | |
2090 | return false; | |
2091 | if (demand_p (DEMAND_GE_SEW) && m_sew > other.get_sew ()) | |
2092 | return false; | |
2093 | } | |
9243c3d1 JZZ |
2094 | if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ()) |
2095 | return false; | |
2096 | if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ()) | |
2097 | return false; | |
2098 | if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ()) | |
2099 | return false; | |
2100 | if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ()) | |
2101 | return false; | |
2102 | return true; | |
2103 | } | |
2104 | ||
2105 | /* Determine whether the vector instructions requirements represented by | |
2106 | Require are compatible with the previous vsetvli instruction represented | |
2107 | by this. INSN is the instruction whose requirements we're considering. */ | |
2108 | bool | |
2109 | vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const | |
2110 | { | |
2111 | gcc_assert (!uninit_p () && "Can't handle uninitialized info"); | |
2112 | if (empty_p ()) | |
2113 | return false; | |
2114 | ||
2115 | /* Nothing is compatible with Unknown. */ | |
2116 | if (unknown_p ()) | |
2117 | return false; | |
2118 | ||
2119 | /* If the instruction doesn't need an AVLReg and the SEW matches, consider | |
2120 | it compatible. */ | |
2121 | if (!demand_p (DEMAND_AVL)) | |
2122 | if (m_sew == curr_info.get_sew ()) | |
2123 | return true; | |
2124 | ||
2125 | return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info); | |
2126 | } | |
2127 | ||
6b6b9c68 JZZ |
2128 | bool |
2129 | vector_insn_info::available_p (const vector_insn_info &other) const | |
2130 | { | |
ec99ffab JZZ |
2131 | return *this >= other; |
2132 | } | |
2133 | ||
2134 | void | |
2135 | vector_insn_info::fuse_avl (const vector_insn_info &info1, | |
2136 | const vector_insn_info &info2) | |
2137 | { | |
2138 | set_insn (info1.get_insn ()); | |
2139 | if (info1.demand_p (DEMAND_AVL)) | |
2140 | { | |
2141 | if (info1.demand_p (DEMAND_NONZERO_AVL)) | |
2142 | { | |
2143 | if (info2.demand_p (DEMAND_AVL) | |
2144 | && !info2.demand_p (DEMAND_NONZERO_AVL)) | |
2145 | { | |
2146 | set_avl_info (info2.get_avl_info ()); | |
2147 | set_demand (DEMAND_AVL, true); | |
2148 | set_demand (DEMAND_NONZERO_AVL, false); | |
2149 | return; | |
2150 | } | |
2151 | } | |
2152 | set_avl_info (info1.get_avl_info ()); | |
2153 | set_demand (DEMAND_NONZERO_AVL, info1.demand_p (DEMAND_NONZERO_AVL)); | |
2154 | } | |
2155 | else | |
2156 | { | |
2157 | set_avl_info (info2.get_avl_info ()); | |
2158 | set_demand (DEMAND_NONZERO_AVL, info2.demand_p (DEMAND_NONZERO_AVL)); | |
2159 | } | |
2160 | set_demand (DEMAND_AVL, | |
2161 | info1.demand_p (DEMAND_AVL) || info2.demand_p (DEMAND_AVL)); | |
2162 | } | |
2163 | ||
2164 | void | |
2165 | vector_insn_info::fuse_sew_lmul (const vector_insn_info &info1, | |
2166 | const vector_insn_info &info2) | |
2167 | { | |
2168 | /* We need to fuse sew && lmul according to demand info: | |
2169 | ||
2170 | 1. GE_SEW. | |
2171 | 2. SEW. | |
2172 | 3. LMUL. | |
2173 | 4. RATIO. */ | |
2174 | if (same_sew_lmul_demand_p (info1.get_demands (), info2.get_demands ())) | |
2175 | { | |
2176 | set_demand (DEMAND_SEW, info2.demand_p (DEMAND_SEW)); | |
2177 | set_demand (DEMAND_LMUL, info2.demand_p (DEMAND_LMUL)); | |
2178 | set_demand (DEMAND_RATIO, info2.demand_p (DEMAND_RATIO)); | |
2179 | set_demand (DEMAND_GE_SEW, info2.demand_p (DEMAND_GE_SEW)); | |
2180 | set_sew (info2.get_sew ()); | |
2181 | set_vlmul (info2.get_vlmul ()); | |
2182 | set_ratio (info2.get_ratio ()); | |
2183 | return; | |
2184 | } | |
2185 | for (const auto &rule : fuse_rules) | |
2186 | { | |
2187 | if (rule.pair.match_cond_p (info1.get_demands (), info2.get_demands ())) | |
2188 | { | |
2189 | set_demand (DEMAND_SEW, rule.demand_sew_p); | |
2190 | set_demand (DEMAND_LMUL, rule.demand_lmul_p); | |
2191 | set_demand (DEMAND_RATIO, rule.demand_ratio_p); | |
2192 | set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p); | |
2193 | set_sew (rule.new_sew (info1, info2)); | |
2194 | set_vlmul (rule.new_vlmul (info1, info2)); | |
2195 | set_ratio (rule.new_ratio (info1, info2)); | |
2196 | return; | |
2197 | } | |
2198 | if (rule.pair.match_cond_p (info2.get_demands (), info1.get_demands ())) | |
2199 | { | |
2200 | set_demand (DEMAND_SEW, rule.demand_sew_p); | |
2201 | set_demand (DEMAND_LMUL, rule.demand_lmul_p); | |
2202 | set_demand (DEMAND_RATIO, rule.demand_ratio_p); | |
2203 | set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p); | |
2204 | set_sew (rule.new_sew (info2, info1)); | |
2205 | set_vlmul (rule.new_vlmul (info2, info1)); | |
2206 | set_ratio (rule.new_ratio (info2, info1)); | |
2207 | return; | |
2208 | } | |
2209 | } | |
2210 | gcc_unreachable (); | |
2211 | } | |
2212 | ||
2213 | void | |
2214 | vector_insn_info::fuse_tail_policy (const vector_insn_info &info1, | |
2215 | const vector_insn_info &info2) | |
2216 | { | |
2217 | if (info1.demand_p (DEMAND_TAIL_POLICY)) | |
2218 | { | |
2219 | set_ta (info1.get_ta ()); | |
2220 | demand (DEMAND_TAIL_POLICY); | |
2221 | } | |
2222 | else if (info2.demand_p (DEMAND_TAIL_POLICY)) | |
2223 | { | |
2224 | set_ta (info2.get_ta ()); | |
2225 | demand (DEMAND_TAIL_POLICY); | |
2226 | } | |
2227 | else | |
2228 | set_ta (get_default_ta ()); | |
2229 | } | |
2230 | ||
2231 | void | |
2232 | vector_insn_info::fuse_mask_policy (const vector_insn_info &info1, | |
2233 | const vector_insn_info &info2) | |
2234 | { | |
2235 | if (info1.demand_p (DEMAND_MASK_POLICY)) | |
2236 | { | |
2237 | set_ma (info1.get_ma ()); | |
2238 | demand (DEMAND_MASK_POLICY); | |
2239 | } | |
2240 | else if (info2.demand_p (DEMAND_MASK_POLICY)) | |
2241 | { | |
2242 | set_ma (info2.get_ma ()); | |
2243 | demand (DEMAND_MASK_POLICY); | |
2244 | } | |
2245 | else | |
2246 | set_ma (get_default_ma ()); | |
6b6b9c68 JZZ |
2247 | } |
2248 | ||
9243c3d1 JZZ |
2249 | vector_insn_info |
2250 | vector_insn_info::merge (const vector_insn_info &merge_info, | |
d51f2456 | 2251 | enum merge_type type) const |
9243c3d1 | 2252 | { |
6b6b9c68 JZZ |
2253 | if (!vsetvl_insn_p (get_insn ()->rtl ())) |
2254 | gcc_assert (this->compatible_p (merge_info) | |
2255 | && "Can't merge incompatible demanded infos"); | |
9243c3d1 JZZ |
2256 | |
2257 | vector_insn_info new_info; | |
ec99ffab | 2258 | new_info.set_valid (); |
4f673c5e | 2259 | if (type == LOCAL_MERGE) |
9243c3d1 | 2260 | { |
4f673c5e | 2261 | /* For local backward data flow, we always update INSN && AVL as the |
ec99ffab JZZ |
2262 | latest INSN and AVL so that we can keep track status of each INSN. */ |
2263 | new_info.fuse_avl (merge_info, *this); | |
9243c3d1 JZZ |
2264 | } |
2265 | else | |
2266 | { | |
4f673c5e | 2267 | /* For global data flow, we should keep original INSN and AVL if they |
ec99ffab | 2268 | valid since we should keep the life information of each block. |
9243c3d1 | 2269 | |
ec99ffab JZZ |
2270 | For example: |
2271 | bb 0 -> bb 1. | |
2272 | We should keep INSN && AVL of bb 1 since we will eventually emit | |
2273 | vsetvl instruction according to INSN and AVL of bb 1. */ | |
2274 | new_info.fuse_avl (*this, merge_info); | |
2275 | } | |
9243c3d1 | 2276 | |
ec99ffab JZZ |
2277 | new_info.fuse_sew_lmul (*this, merge_info); |
2278 | new_info.fuse_tail_policy (*this, merge_info); | |
2279 | new_info.fuse_mask_policy (*this, merge_info); | |
9243c3d1 JZZ |
2280 | return new_info; |
2281 | } | |
2282 | ||
60bd33bc JZZ |
2283 | bool |
2284 | vector_insn_info::update_fault_first_load_avl (insn_info *insn) | |
2285 | { | |
2286 | // Update AVL to vl-output of the fault first load. | |
2287 | const insn_info *read_vl = get_forward_read_vl_insn (insn); | |
2288 | if (read_vl) | |
2289 | { | |
2290 | rtx vl = SET_DEST (PATTERN (read_vl->rtl ())); | |
2291 | def_info *def = find_access (read_vl->defs (), REGNO (vl)); | |
2292 | set_info *set = safe_dyn_cast<set_info *> (def); | |
2293 | set_avl_info (avl_info (vl, set)); | |
2294 | set_insn (insn); | |
2295 | return true; | |
2296 | } | |
2297 | return false; | |
2298 | } | |
2299 | ||
9243c3d1 JZZ |
2300 | void |
2301 | vector_insn_info::dump (FILE *file) const | |
2302 | { | |
2303 | fprintf (file, "["); | |
2304 | if (uninit_p ()) | |
2305 | fprintf (file, "UNINITIALIZED,"); | |
2306 | else if (valid_p ()) | |
2307 | fprintf (file, "VALID,"); | |
2308 | else if (unknown_p ()) | |
2309 | fprintf (file, "UNKNOWN,"); | |
2310 | else if (empty_p ()) | |
2311 | fprintf (file, "EMPTY,"); | |
6b6b9c68 JZZ |
2312 | else if (hard_empty_p ()) |
2313 | fprintf (file, "HARD_EMPTY,"); | |
4f673c5e JZZ |
2314 | else if (dirty_with_killed_avl_p ()) |
2315 | fprintf (file, "DIRTY_WITH_KILLED_AVL,"); | |
9243c3d1 JZZ |
2316 | else |
2317 | fprintf (file, "DIRTY,"); | |
2318 | ||
2319 | fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL)); | |
ec99ffab | 2320 | fprintf (file, "%d(DEMAND_NONZERO_AVL),", demand_p (DEMAND_NONZERO_AVL)); |
9243c3d1 | 2321 | fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW)); |
ec99ffab | 2322 | fprintf (file, "%d(DEMAND_GE_SEW),", demand_p (DEMAND_GE_SEW)); |
9243c3d1 JZZ |
2323 | fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL)); |
2324 | fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO)); | |
2325 | fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY)); | |
2326 | fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY)); | |
2327 | ||
2328 | fprintf (file, "AVL="); | |
2329 | print_rtl_single (file, get_avl ()); | |
2330 | fprintf (file, "SEW=%d,", get_sew ()); | |
2331 | fprintf (file, "VLMUL=%d,", get_vlmul ()); | |
2332 | fprintf (file, "RATIO=%d,", get_ratio ()); | |
2333 | fprintf (file, "TAIL_POLICY=%d,", get_ta ()); | |
2334 | fprintf (file, "MASK_POLICY=%d", get_ma ()); | |
2335 | fprintf (file, "]\n"); | |
2336 | ||
2337 | if (valid_p ()) | |
2338 | { | |
2339 | if (get_insn ()) | |
2340 | { | |
12b23c71 JZZ |
2341 | fprintf (file, "The real INSN="); |
2342 | print_rtl_single (file, get_insn ()->rtl ()); | |
9243c3d1 | 2343 | } |
9243c3d1 JZZ |
2344 | } |
2345 | } | |
2346 | ||
2347 | vector_infos_manager::vector_infos_manager () | |
2348 | { | |
2349 | vector_edge_list = nullptr; | |
2350 | vector_kill = nullptr; | |
2351 | vector_del = nullptr; | |
2352 | vector_insert = nullptr; | |
2353 | vector_antic = nullptr; | |
2354 | vector_transp = nullptr; | |
2355 | vector_comp = nullptr; | |
2356 | vector_avin = nullptr; | |
2357 | vector_avout = nullptr; | |
2358 | vector_insn_infos.safe_grow (get_max_uid ()); | |
2359 | vector_block_infos.safe_grow (last_basic_block_for_fn (cfun)); | |
2360 | if (!optimize) | |
2361 | { | |
2362 | basic_block cfg_bb; | |
2363 | rtx_insn *rinsn; | |
2364 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2365 | { | |
2366 | vector_block_infos[cfg_bb->index].local_dem = vector_insn_info (); | |
2367 | vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info (); | |
2368 | FOR_BB_INSNS (cfg_bb, rinsn) | |
2369 | vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn); | |
2370 | } | |
2371 | } | |
2372 | else | |
2373 | { | |
2374 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2375 | { | |
2376 | vector_block_infos[bb->index ()].local_dem = vector_insn_info (); | |
2377 | vector_block_infos[bb->index ()].reaching_out = vector_insn_info (); | |
2378 | for (insn_info *insn : bb->real_insns ()) | |
2379 | vector_insn_infos[insn->uid ()].parse_insn (insn); | |
acc10c79 | 2380 | vector_block_infos[bb->index ()].probability = profile_probability (); |
9243c3d1 JZZ |
2381 | } |
2382 | } | |
2383 | } | |
2384 | ||
2385 | void | |
2386 | vector_infos_manager::create_expr (vector_insn_info &info) | |
2387 | { | |
2388 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2389 | if (*vector_exprs[i] == info) | |
2390 | return; | |
2391 | vector_exprs.safe_push (&info); | |
2392 | } | |
2393 | ||
2394 | size_t | |
2395 | vector_infos_manager::get_expr_id (const vector_insn_info &info) const | |
2396 | { | |
2397 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2398 | if (*vector_exprs[i] == info) | |
2399 | return i; | |
2400 | gcc_unreachable (); | |
2401 | } | |
2402 | ||
2403 | auto_vec<size_t> | |
2404 | vector_infos_manager::get_all_available_exprs ( | |
2405 | const vector_insn_info &info) const | |
2406 | { | |
2407 | auto_vec<size_t> available_list; | |
2408 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
6b6b9c68 | 2409 | if (info.available_p (*vector_exprs[i])) |
9243c3d1 JZZ |
2410 | available_list.safe_push (i); |
2411 | return available_list; | |
2412 | } | |
2413 | ||
d06e9264 JZ |
2414 | bool |
2415 | vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const | |
2416 | { | |
2417 | hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb); | |
2418 | for (const basic_block pred_cfg_bb : pred_cfg_bbs) | |
2419 | { | |
2420 | const auto &pred_block_info = vector_block_infos[pred_cfg_bb->index]; | |
2421 | if (!pred_block_info.local_dem.valid_or_dirty_p () | |
2422 | && !pred_block_info.reaching_out.valid_or_dirty_p ()) | |
2423 | continue; | |
2424 | return false; | |
2425 | } | |
2426 | return true; | |
2427 | } | |
2428 | ||
9243c3d1 JZZ |
2429 | bool |
2430 | vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const | |
2431 | { | |
2432 | if (bitmap_empty_p (bitdata)) | |
2433 | return false; | |
2434 | ||
2435 | int ratio = -1; | |
2436 | unsigned int bb_index; | |
2437 | sbitmap_iterator sbi; | |
2438 | ||
2439 | EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi) | |
2440 | { | |
2441 | if (ratio == -1) | |
2442 | ratio = vector_exprs[bb_index]->get_ratio (); | |
2443 | else if (vector_exprs[bb_index]->get_ratio () != ratio) | |
2444 | return false; | |
2445 | } | |
2446 | return true; | |
2447 | } | |
2448 | ||
ff8f9544 JZ |
2449 | /* Return TRUE if the incoming vector configuration state |
2450 | to CFG_BB is compatible with the vector configuration | |
2451 | state in CFG_BB, FALSE otherwise. */ | |
2452 | bool | |
2453 | vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const | |
2454 | { | |
2455 | const auto &info = vector_block_infos[cfg_bb->index].local_dem; | |
2456 | sbitmap avin = vector_avin[cfg_bb->index]; | |
2457 | unsigned int bb_index; | |
2458 | sbitmap_iterator sbi; | |
2459 | EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi) | |
2460 | { | |
2461 | const auto &avin_info | |
2462 | = static_cast<const vl_vtype_info &> (*vector_exprs[bb_index]); | |
2463 | if (!info.compatible_p (avin_info)) | |
2464 | return false; | |
2465 | } | |
2466 | return true; | |
2467 | } | |
2468 | ||
005fad9d JZZ |
2469 | bool |
2470 | vector_infos_manager::all_same_avl_p (const basic_block cfg_bb, | |
2471 | sbitmap bitdata) const | |
2472 | { | |
2473 | if (bitmap_empty_p (bitdata)) | |
2474 | return false; | |
2475 | ||
2476 | const auto &block_info = vector_block_infos[cfg_bb->index]; | |
2477 | if (!block_info.local_dem.demand_p (DEMAND_AVL)) | |
2478 | return true; | |
2479 | ||
2480 | avl_info avl = block_info.local_dem.get_avl_info (); | |
2481 | unsigned int bb_index; | |
2482 | sbitmap_iterator sbi; | |
2483 | ||
2484 | EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi) | |
2485 | { | |
2486 | if (vector_exprs[bb_index]->get_avl_info () != avl) | |
2487 | return false; | |
2488 | } | |
2489 | return true; | |
2490 | } | |
2491 | ||
9243c3d1 JZZ |
2492 | size_t |
2493 | vector_infos_manager::expr_set_num (sbitmap bitdata) const | |
2494 | { | |
2495 | size_t count = 0; | |
2496 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2497 | if (bitmap_bit_p (bitdata, i)) | |
2498 | count++; | |
2499 | return count; | |
2500 | } | |
2501 | ||
2502 | void | |
2503 | vector_infos_manager::release (void) | |
2504 | { | |
2505 | if (!vector_insn_infos.is_empty ()) | |
2506 | vector_insn_infos.release (); | |
2507 | if (!vector_block_infos.is_empty ()) | |
2508 | vector_block_infos.release (); | |
2509 | if (!vector_exprs.is_empty ()) | |
2510 | vector_exprs.release (); | |
2511 | ||
ec99ffab JZZ |
2512 | gcc_assert (to_refine_vsetvls.is_empty ()); |
2513 | gcc_assert (to_delete_vsetvls.is_empty ()); | |
9243c3d1 | 2514 | if (optimize > 0) |
cfe3fbc6 JZZ |
2515 | free_bitmap_vectors (); |
2516 | } | |
2517 | ||
2518 | void | |
2519 | vector_infos_manager::create_bitmap_vectors (void) | |
2520 | { | |
2521 | /* Create the bitmap vectors. */ | |
2522 | vector_antic = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2523 | vector_exprs.length ()); | |
2524 | vector_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2525 | vector_exprs.length ()); | |
2526 | vector_comp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2527 | vector_exprs.length ()); | |
2528 | vector_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2529 | vector_exprs.length ()); | |
2530 | vector_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2531 | vector_exprs.length ()); | |
2532 | vector_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2533 | vector_exprs.length ()); | |
2534 | ||
2535 | bitmap_vector_ones (vector_transp, last_basic_block_for_fn (cfun)); | |
2536 | bitmap_vector_clear (vector_antic, last_basic_block_for_fn (cfun)); | |
2537 | bitmap_vector_clear (vector_comp, last_basic_block_for_fn (cfun)); | |
2538 | } | |
2539 | ||
2540 | void | |
2541 | vector_infos_manager::free_bitmap_vectors (void) | |
2542 | { | |
2543 | /* Finished. Free up all the things we've allocated. */ | |
2544 | free_edge_list (vector_edge_list); | |
2545 | if (vector_del) | |
2546 | sbitmap_vector_free (vector_del); | |
2547 | if (vector_insert) | |
2548 | sbitmap_vector_free (vector_insert); | |
2549 | if (vector_kill) | |
2550 | sbitmap_vector_free (vector_kill); | |
2551 | if (vector_antic) | |
2552 | sbitmap_vector_free (vector_antic); | |
2553 | if (vector_transp) | |
2554 | sbitmap_vector_free (vector_transp); | |
2555 | if (vector_comp) | |
2556 | sbitmap_vector_free (vector_comp); | |
2557 | if (vector_avin) | |
2558 | sbitmap_vector_free (vector_avin); | |
2559 | if (vector_avout) | |
2560 | sbitmap_vector_free (vector_avout); | |
2561 | ||
2562 | vector_edge_list = nullptr; | |
2563 | vector_kill = nullptr; | |
2564 | vector_del = nullptr; | |
2565 | vector_insert = nullptr; | |
2566 | vector_antic = nullptr; | |
2567 | vector_transp = nullptr; | |
2568 | vector_comp = nullptr; | |
2569 | vector_avin = nullptr; | |
2570 | vector_avout = nullptr; | |
9243c3d1 JZZ |
2571 | } |
2572 | ||
2573 | void | |
2574 | vector_infos_manager::dump (FILE *file) const | |
2575 | { | |
2576 | basic_block cfg_bb; | |
2577 | rtx_insn *rinsn; | |
2578 | ||
2579 | fprintf (file, "\n"); | |
2580 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2581 | { | |
2582 | fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index); | |
2583 | fprintf (file, "<HEADER>="); | |
2584 | vector_block_infos[cfg_bb->index].local_dem.dump (file); | |
2585 | FOR_BB_INSNS (cfg_bb, rinsn) | |
2586 | { | |
2587 | if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn)) | |
2588 | continue; | |
2589 | fprintf (file, "<insn %d>=", INSN_UID (rinsn)); | |
2590 | const auto &info = vector_insn_infos[INSN_UID (rinsn)]; | |
2591 | info.dump (file); | |
2592 | } | |
2593 | fprintf (file, "<FOOTER>="); | |
2594 | vector_block_infos[cfg_bb->index].reaching_out.dump (file); | |
acc10c79 JZZ |
2595 | fprintf (file, "<Probability>="); |
2596 | vector_block_infos[cfg_bb->index].probability.dump (file); | |
9243c3d1 JZZ |
2597 | fprintf (file, "\n\n"); |
2598 | } | |
2599 | ||
2600 | fprintf (file, "\n"); | |
2601 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2602 | { | |
2603 | fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index); | |
2604 | ||
2605 | fprintf (file, "<ANTLOC>="); | |
2606 | if (vector_antic == nullptr) | |
2607 | fprintf (file, "(nil)\n"); | |
2608 | else | |
2609 | dump_bitmap_file (file, vector_antic[cfg_bb->index]); | |
2610 | ||
2611 | fprintf (file, "<AVLOC>="); | |
2612 | if (vector_comp == nullptr) | |
2613 | fprintf (file, "(nil)\n"); | |
2614 | else | |
2615 | dump_bitmap_file (file, vector_comp[cfg_bb->index]); | |
2616 | ||
2617 | fprintf (file, "<TRANSP>="); | |
2618 | if (vector_transp == nullptr) | |
2619 | fprintf (file, "(nil)\n"); | |
2620 | else | |
2621 | dump_bitmap_file (file, vector_transp[cfg_bb->index]); | |
2622 | ||
2623 | fprintf (file, "<KILL>="); | |
2624 | if (vector_kill == nullptr) | |
2625 | fprintf (file, "(nil)\n"); | |
2626 | else | |
2627 | dump_bitmap_file (file, vector_kill[cfg_bb->index]); | |
2628 | } | |
2629 | ||
2630 | fprintf (file, "\n"); | |
2631 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2632 | { | |
2633 | fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n", | |
2634 | cfg_bb->index); | |
2635 | ||
2636 | fprintf (file, "<AVIN>="); | |
2637 | if (vector_avin == nullptr) | |
2638 | fprintf (file, "(nil)\n"); | |
2639 | else | |
2640 | dump_bitmap_file (file, vector_avin[cfg_bb->index]); | |
2641 | ||
2642 | fprintf (file, "<AVOUT>="); | |
2643 | if (vector_avout == nullptr) | |
2644 | fprintf (file, "(nil)\n"); | |
2645 | else | |
2646 | dump_bitmap_file (file, vector_avout[cfg_bb->index]); | |
2647 | ||
2648 | fprintf (file, "<DELETE>="); | |
2649 | if (vector_del == nullptr) | |
2650 | fprintf (file, "(nil)\n"); | |
2651 | else | |
2652 | dump_bitmap_file (file, vector_del[cfg_bb->index]); | |
2653 | } | |
2654 | ||
2655 | fprintf (file, "\nGlobal LCM (Lazy code motion) INSERT info:\n"); | |
2656 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2657 | { | |
2658 | for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++) | |
2659 | { | |
2660 | edge eg = INDEX_EDGE (vector_edge_list, ed); | |
2661 | if (bitmap_bit_p (vector_insert[ed], i)) | |
2662 | fprintf (dump_file, | |
2663 | "INSERT edge %d from bb %d to bb %d for VSETVL " | |
2664 | "expr[%ld]\n", | |
2665 | ed, eg->src->index, eg->dest->index, i); | |
2666 | } | |
2667 | } | |
2668 | } | |
2669 | ||
2670 | const pass_data pass_data_vsetvl = { | |
2671 | RTL_PASS, /* type */ | |
2672 | "vsetvl", /* name */ | |
2673 | OPTGROUP_NONE, /* optinfo_flags */ | |
2674 | TV_NONE, /* tv_id */ | |
2675 | 0, /* properties_required */ | |
2676 | 0, /* properties_provided */ | |
2677 | 0, /* properties_destroyed */ | |
2678 | 0, /* todo_flags_start */ | |
2679 | 0, /* todo_flags_finish */ | |
2680 | }; | |
2681 | ||
2682 | class pass_vsetvl : public rtl_opt_pass | |
2683 | { | |
2684 | private: | |
2685 | class vector_infos_manager *m_vector_manager; | |
2686 | ||
2687 | void simple_vsetvl (void) const; | |
2688 | void lazy_vsetvl (void); | |
2689 | ||
2690 | /* Phase 1. */ | |
2691 | void compute_local_backward_infos (const bb_info *); | |
2692 | ||
2693 | /* Phase 2. */ | |
2694 | bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const; | |
2695 | void transfer_before (vector_insn_info &, insn_info *) const; | |
2696 | void transfer_after (vector_insn_info &, insn_info *) const; | |
2697 | void emit_local_forward_vsetvls (const bb_info *); | |
2698 | ||
2699 | /* Phase 3. */ | |
4f673c5e JZZ |
2700 | enum fusion_type get_backward_fusion_type (const bb_info *, |
2701 | const vector_insn_info &); | |
6b6b9c68 | 2702 | bool hard_empty_block_p (const bb_info *, const vector_insn_info &) const; |
387cd9d3 JZZ |
2703 | bool backward_demand_fusion (void); |
2704 | bool forward_demand_fusion (void); | |
6b6b9c68 | 2705 | bool cleanup_illegal_dirty_blocks (void); |
387cd9d3 | 2706 | void demand_fusion (void); |
9243c3d1 JZZ |
2707 | |
2708 | /* Phase 4. */ | |
2709 | void prune_expressions (void); | |
2710 | void compute_local_properties (void); | |
6b6b9c68 | 2711 | bool can_refine_vsetvl_p (const basic_block, const vector_insn_info &) const; |
9243c3d1 JZZ |
2712 | void refine_vsetvls (void) const; |
2713 | void cleanup_vsetvls (void); | |
2714 | bool commit_vsetvls (void); | |
2715 | void pre_vsetvl (void); | |
2716 | ||
2717 | /* Phase 5. */ | |
2718 | void cleanup_insns (void) const; | |
2719 | ||
6b6b9c68 JZZ |
2720 | /* Phase 6. */ |
2721 | void propagate_avl (void) const; | |
2722 | ||
9243c3d1 JZZ |
2723 | void init (void); |
2724 | void done (void); | |
acc10c79 | 2725 | void compute_probabilities (void); |
9243c3d1 JZZ |
2726 | |
2727 | public: | |
2728 | pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {} | |
2729 | ||
2730 | /* opt_pass methods: */ | |
2731 | virtual bool gate (function *) final override { return TARGET_VECTOR; } | |
2732 | virtual unsigned int execute (function *) final override; | |
2733 | }; // class pass_vsetvl | |
2734 | ||
2735 | /* Simple m_vsetvl_insert vsetvl for optimize == 0. */ | |
2736 | void | |
2737 | pass_vsetvl::simple_vsetvl (void) const | |
2738 | { | |
2739 | if (dump_file) | |
2740 | fprintf (dump_file, | |
2741 | "\nEntering Simple VSETVL PASS and Handling %d basic blocks for " | |
2742 | "function:%s\n", | |
2743 | n_basic_blocks_for_fn (cfun), function_name (cfun)); | |
2744 | ||
2745 | basic_block cfg_bb; | |
2746 | rtx_insn *rinsn; | |
2747 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2748 | { | |
2749 | FOR_BB_INSNS (cfg_bb, rinsn) | |
2750 | { | |
2751 | if (!NONDEBUG_INSN_P (rinsn)) | |
2752 | continue; | |
2753 | if (has_vtype_op (rinsn)) | |
2754 | { | |
2755 | const auto info | |
2756 | = m_vector_manager->vector_insn_infos[INSN_UID (rinsn)]; | |
2757 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info, | |
2758 | NULL_RTX, rinsn); | |
2759 | } | |
2760 | } | |
2761 | } | |
2762 | } | |
2763 | ||
2764 | /* Compute demanded information by backward data-flow analysis. */ | |
2765 | void | |
2766 | pass_vsetvl::compute_local_backward_infos (const bb_info *bb) | |
2767 | { | |
2768 | vector_insn_info change; | |
2769 | change.set_empty (); | |
2770 | ||
2771 | auto &block_info = m_vector_manager->vector_block_infos[bb->index ()]; | |
2772 | block_info.reaching_out = change; | |
2773 | ||
2774 | for (insn_info *insn : bb->reverse_real_nondebug_insns ()) | |
2775 | { | |
2776 | auto &info = m_vector_manager->vector_insn_infos[insn->uid ()]; | |
2777 | ||
2778 | if (info.uninit_p ()) | |
2779 | /* If it is uninitialized, propagate it directly. */ | |
2780 | info = change; | |
2781 | else if (info.unknown_p ()) | |
2782 | change = info; | |
2783 | else | |
2784 | { | |
2785 | gcc_assert (info.valid_p () && "Unexpected Invalid demanded info"); | |
ec99ffab JZZ |
2786 | if (change.valid_p ()) |
2787 | { | |
a481eed8 JZZ |
2788 | if (!(propagate_avl_across_demands_p (change, info) |
2789 | && !reg_available_p (insn, change)) | |
ec99ffab | 2790 | && change.compatible_p (info)) |
fdc5abbd | 2791 | { |
d51f2456 | 2792 | info = change.merge (info, LOCAL_MERGE); |
fdc5abbd JZ |
2793 | /* Fix PR109399, we should update user vsetvl instruction |
2794 | if there is a change in demand fusion. */ | |
2795 | if (vsetvl_insn_p (insn->rtl ())) | |
2796 | change_vsetvl_insn (insn, info); | |
2797 | } | |
ec99ffab | 2798 | } |
9243c3d1 JZZ |
2799 | change = info; |
2800 | } | |
2801 | } | |
2802 | ||
2803 | block_info.local_dem = change; | |
2804 | if (block_info.local_dem.empty_p ()) | |
2805 | block_info.reaching_out = block_info.local_dem; | |
2806 | } | |
2807 | ||
2808 | /* Return true if a dem_info is required to transition from curr_info to | |
2809 | require before INSN. */ | |
2810 | bool | |
2811 | pass_vsetvl::need_vsetvl (const vector_insn_info &require, | |
2812 | const vector_insn_info &curr_info) const | |
2813 | { | |
2814 | if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ()) | |
2815 | return true; | |
2816 | ||
a481eed8 | 2817 | if (require.compatible_p (static_cast<const vl_vtype_info &> (curr_info))) |
9243c3d1 JZZ |
2818 | return false; |
2819 | ||
2820 | return true; | |
2821 | } | |
2822 | ||
2823 | /* Given an incoming state reaching INSN, modifies that state so that it is | |
2824 | minimally compatible with INSN. The resulting state is guaranteed to be | |
2825 | semantically legal for INSN, but may not be the state requested by INSN. */ | |
2826 | void | |
2827 | pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const | |
2828 | { | |
2829 | if (!has_vtype_op (insn->rtl ())) | |
2830 | return; | |
2831 | ||
2832 | const vector_insn_info require | |
2833 | = m_vector_manager->vector_insn_infos[insn->uid ()]; | |
2834 | if (info.valid_p () && !need_vsetvl (require, info)) | |
2835 | return; | |
2836 | info = require; | |
2837 | } | |
2838 | ||
2839 | /* Given a state with which we evaluated insn (see transfer_before above for why | |
2840 | this might be different that the state insn requested), modify the state to | |
2841 | reflect the changes insn might make. */ | |
2842 | void | |
2843 | pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const | |
2844 | { | |
2845 | if (vector_config_insn_p (insn->rtl ())) | |
2846 | { | |
2847 | info = m_vector_manager->vector_insn_infos[insn->uid ()]; | |
2848 | return; | |
2849 | } | |
2850 | ||
60bd33bc JZZ |
2851 | if (fault_first_load_p (insn->rtl ()) |
2852 | && info.update_fault_first_load_avl (insn)) | |
2853 | return; | |
9243c3d1 JZZ |
2854 | |
2855 | /* If this is something that updates VL/VTYPE that we don't know about, set | |
2856 | the state to unknown. */ | |
2857 | if (insn->is_call () || insn->is_asm () | |
2858 | || find_access (insn->defs (), VL_REGNUM) | |
2859 | || find_access (insn->defs (), VTYPE_REGNUM)) | |
2860 | info = vector_insn_info::get_unknown (); | |
2861 | } | |
2862 | ||
2863 | /* Emit vsetvl within each block by forward data-flow analysis. */ | |
2864 | void | |
2865 | pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb) | |
2866 | { | |
2867 | auto &block_info = m_vector_manager->vector_block_infos[bb->index ()]; | |
2868 | if (block_info.local_dem.empty_p ()) | |
2869 | return; | |
2870 | ||
2871 | vector_insn_info curr_info; | |
2872 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
2873 | { | |
2874 | const vector_insn_info prev_info = curr_info; | |
a481eed8 | 2875 | enum vsetvl_type type = NUM_VSETVL_TYPE; |
9243c3d1 JZZ |
2876 | transfer_before (curr_info, insn); |
2877 | ||
2878 | if (has_vtype_op (insn->rtl ())) | |
2879 | { | |
2880 | if (static_cast<const vl_vtype_info &> (prev_info) | |
2881 | != static_cast<const vl_vtype_info &> (curr_info)) | |
2882 | { | |
2883 | const auto require | |
2884 | = m_vector_manager->vector_insn_infos[insn->uid ()]; | |
2885 | if (!require.compatible_p ( | |
2886 | static_cast<const vl_vtype_info &> (prev_info))) | |
a481eed8 JZZ |
2887 | type = insert_vsetvl (EMIT_BEFORE, insn->rtl (), require, |
2888 | prev_info); | |
9243c3d1 JZZ |
2889 | } |
2890 | } | |
2891 | ||
a481eed8 JZZ |
2892 | /* Fix the issue of following sequence: |
2893 | vsetivli zero, 5 | |
2894 | .... | |
2895 | vsetvli zero, zero | |
2896 | vmv.x.s (demand AVL = 8). | |
2897 | .... | |
2898 | incorrect: vsetvli zero, zero ===> Since the curr_info is AVL = 8. | |
2899 | correct: vsetivli zero, 8 | |
2900 | vadd (demand AVL = 8). */ | |
2901 | if (type == VSETVL_VTYPE_CHANGE_ONLY) | |
2902 | { | |
2903 | /* Update the curr_info to be real correct AVL. */ | |
2904 | curr_info.set_avl_info (prev_info.get_avl_info ()); | |
2905 | } | |
9243c3d1 JZZ |
2906 | transfer_after (curr_info, insn); |
2907 | } | |
2908 | ||
2909 | block_info.reaching_out = curr_info; | |
2910 | } | |
2911 | ||
4f673c5e JZZ |
2912 | enum fusion_type |
2913 | pass_vsetvl::get_backward_fusion_type (const bb_info *bb, | |
2914 | const vector_insn_info &prop) | |
9243c3d1 | 2915 | { |
4f673c5e | 2916 | insn_info *insn = prop.get_insn (); |
9243c3d1 | 2917 | |
4f673c5e JZZ |
2918 | /* TODO: We don't backward propagate the explict VSETVL here |
2919 | since we will change vsetvl and vsetvlmax intrinsics into | |
2920 | no side effects which can be optimized into optimal location | |
2921 | by GCC internal passes. We only need to support these backward | |
2922 | propagation if vsetvl intrinsics have side effects. */ | |
2923 | if (vsetvl_insn_p (insn->rtl ())) | |
2924 | return INVALID_FUSION; | |
2925 | ||
2926 | gcc_assert (has_vtype_op (insn->rtl ())); | |
2927 | rtx reg = NULL_RTX; | |
2928 | ||
2929 | /* Case 1: Don't need VL. Just let it backward propagate. */ | |
ec99ffab | 2930 | if (!prop.demand_p (DEMAND_AVL)) |
4f673c5e JZZ |
2931 | return VALID_AVL_FUSION; |
2932 | else | |
9243c3d1 | 2933 | { |
4f673c5e JZZ |
2934 | /* Case 2: CONST_INT AVL, we don't need to check def. */ |
2935 | if (prop.has_avl_imm ()) | |
2936 | return VALID_AVL_FUSION; | |
2937 | else | |
2938 | { | |
2939 | /* Case 3: REG AVL, we need to check the distance of def to make | |
2940 | sure we won't backward propagate over the def. */ | |
2941 | gcc_assert (prop.has_avl_reg ()); | |
2942 | if (vlmax_avl_p (prop.get_avl ())) | |
2943 | /* Check VL operand for vsetvl vl,zero. */ | |
ec99ffab | 2944 | reg = prop.get_avl_reg_rtx (); |
4f673c5e JZZ |
2945 | else |
2946 | /* Check AVL operand for vsetvl zero,avl. */ | |
ec99ffab | 2947 | reg = prop.get_avl (); |
4f673c5e JZZ |
2948 | } |
2949 | } | |
9243c3d1 | 2950 | |
4f673c5e | 2951 | gcc_assert (reg); |
ec99ffab JZZ |
2952 | if (!prop.get_avl_source ()->insn ()->is_phi () |
2953 | && prop.get_avl_source ()->insn ()->bb () == insn->bb ()) | |
6b6b9c68 JZZ |
2954 | return INVALID_FUSION; |
2955 | hash_set<set_info *> sets | |
2956 | = get_all_sets (prop.get_avl_source (), true, true, true); | |
2957 | if (any_set_in_bb_p (sets, insn->bb ())) | |
2958 | return INVALID_FUSION; | |
2959 | ||
2960 | if (vlmax_avl_p (prop.get_avl ())) | |
4f673c5e | 2961 | { |
6b6b9c68 | 2962 | if (find_reg_killed_by (bb, reg)) |
4f673c5e | 2963 | return INVALID_FUSION; |
6b6b9c68 JZZ |
2964 | else |
2965 | return VALID_AVL_FUSION; | |
4f673c5e | 2966 | } |
6b6b9c68 JZZ |
2967 | |
2968 | /* By default, we always enable backward fusion so that we can | |
2969 | gain more optimizations. */ | |
2970 | if (!find_reg_killed_by (bb, reg)) | |
2971 | return VALID_AVL_FUSION; | |
2972 | return KILLED_AVL_FUSION; | |
2973 | } | |
2974 | ||
2975 | /* We almost enable all cases in get_backward_fusion_type, this function | |
2976 | disable the backward fusion by changing dirty blocks into hard empty | |
2977 | blocks in forward dataflow. We can have more accurate optimization by | |
2978 | this method. */ | |
2979 | bool | |
2980 | pass_vsetvl::hard_empty_block_p (const bb_info *bb, | |
2981 | const vector_insn_info &info) const | |
2982 | { | |
2983 | if (!info.dirty_p () || !info.has_avl_reg ()) | |
2984 | return false; | |
2985 | ||
2986 | basic_block cfg_bb = bb->cfg_bb (); | |
2987 | sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index]; | |
ec99ffab JZZ |
2988 | set_info *set = info.get_avl_source (); |
2989 | rtx avl = gen_rtx_REG (Pmode, set->regno ()); | |
6b6b9c68 JZZ |
2990 | hash_set<set_info *> sets = get_all_sets (set, true, false, false); |
2991 | hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb); | |
2992 | ||
2993 | if (find_reg_killed_by (bb, avl)) | |
2994 | { | |
2995 | /* Condition 1: | |
2996 | Dirty block with killed AVL means that the empty block (no RVV | |
2997 | instructions) are polluted as Dirty blocks with the value of current | |
2998 | AVL is killed. For example: | |
2999 | bb 0: | |
3000 | ... | |
3001 | bb 1: | |
3002 | def a5 | |
3003 | bb 2: | |
3004 | RVV (use a5) | |
3005 | In backward dataflow, we will polluted BB0 and BB1 as Dirt with AVL | |
3006 | killed. since a5 is killed in BB1. | |
3007 | In this case, let's take a look at this example: | |
3008 | ||
3009 | bb 3: bb 4: | |
3010 | def3 a5 def4 a5 | |
3011 | bb 5: bb 6: | |
3012 | def1 a5 def2 a5 | |
3013 | \ / | |
3014 | \ / | |
3015 | \ / | |
3016 | \ / | |
3017 | bb 7: | |
3018 | RVV (use a5) | |
3019 | In thi case, we can polluted BB5 and BB6 as dirty if get-def | |
3020 | of a5 from RVV instruction in BB7 is the def1 in BB5 and | |
3021 | def2 BB6 so we can return false early here for HARD_EMPTY_BLOCK_P. | |
3022 | However, we are not sure whether BB3 and BB4 can be | |
3023 | polluted as Dirty with AVL killed so we can't return false | |
3024 | for HARD_EMPTY_BLOCK_P here since it's too early which will | |
3025 | potentially produce issues. */ | |
3026 | gcc_assert (info.dirty_with_killed_avl_p ()); | |
3027 | if (info.get_avl_source () | |
3028 | && get_same_bb_set (sets, bb->cfg_bb ()) == info.get_avl_source ()) | |
3029 | return false; | |
4f673c5e | 3030 | } |
9243c3d1 | 3031 | |
6b6b9c68 JZZ |
3032 | /* Condition 2: |
3033 | Suppress the VL/VTYPE info backward propagation too early: | |
3034 | ________ | |
3035 | | BB0 | | |
3036 | |________| | |
3037 | | | |
3038 | ____|____ | |
3039 | | BB1 | | |
3040 | |________| | |
3041 | In this case, suppose BB 1 has multiple predecessors, BB 0 is one | |
3042 | of them. BB1 has VL/VTYPE info (may be VALID or DIRTY) to backward | |
3043 | propagate. | |
3044 | The AVIN (available in) which is calculated by LCM is empty only | |
3045 | in these 2 circumstances: | |
3046 | 1. all predecessors of BB1 are empty (not VALID | |
3047 | and can not be polluted in backward fusion flow) | |
3048 | 2. VL/VTYPE info of BB1 predecessors are conflict. | |
3049 | ||
3050 | We keep it as dirty in 2nd circumstance and set it as HARD_EMPTY | |
3051 | (can not be polluted as DIRTY any more) in 1st circumstance. | |
3052 | We don't backward propagate in 1st circumstance since there is | |
3053 | no VALID RVV instruction and no polluted blocks (dirty blocks) | |
3054 | by backward propagation from other following blocks. | |
3055 | It's meaningless to keep it as Dirty anymore. | |
3056 | ||
3057 | However, since we keep it as dirty in 2nd since there are VALID or | |
3058 | Dirty blocks in predecessors, we can still gain the benefits and | |
3059 | optimization opportunities. For example, in this case: | |
3060 | for (size_t i = 0; i < n; i++) | |
3061 | { | |
3062 | if (i != cond) { | |
3063 | vint8mf8_t v = *(vint8mf8_t*)(in + i + 100); | |
3064 | *(vint8mf8_t*)(out + i + 100) = v; | |
3065 | } else { | |
3066 | vbool1_t v = *(vbool1_t*)(in + i + 400); | |
3067 | *(vbool1_t*)(out + i + 400) = v; | |
3068 | } | |
3069 | } | |
3070 | VL/VTYPE in if-else are conflict which will produce empty AVIN LCM result | |
3071 | but we can still keep dirty blocks if *(i != cond)* is very unlikely then | |
3072 | we can preset vsetvl (VL/VTYPE) info from else (static propability model). | |
3073 | ||
3074 | We don't want to backward propagate VL/VTYPE information too early | |
3075 | which is not the optimal and may potentially produce issues. */ | |
3076 | if (bitmap_empty_p (avin)) | |
4f673c5e | 3077 | { |
6b6b9c68 JZZ |
3078 | bool hard_empty_p = true; |
3079 | for (const basic_block pred_cfg_bb : pred_cfg_bbs) | |
3080 | { | |
3081 | if (pred_cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) | |
3082 | continue; | |
3083 | sbitmap avout = m_vector_manager->vector_avout[pred_cfg_bb->index]; | |
3084 | if (!bitmap_empty_p (avout)) | |
3085 | { | |
3086 | hard_empty_p = false; | |
3087 | break; | |
3088 | } | |
3089 | } | |
3090 | if (hard_empty_p) | |
3091 | return true; | |
4f673c5e | 3092 | } |
9243c3d1 | 3093 | |
6b6b9c68 JZZ |
3094 | edge e; |
3095 | edge_iterator ei; | |
3096 | bool has_avl_killed_insn_p = false; | |
3097 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
4f673c5e | 3098 | { |
6b6b9c68 JZZ |
3099 | const auto block_info |
3100 | = m_vector_manager->vector_block_infos[e->dest->index]; | |
3101 | if (block_info.local_dem.dirty_with_killed_avl_p ()) | |
9243c3d1 | 3102 | { |
6b6b9c68 JZZ |
3103 | has_avl_killed_insn_p = true; |
3104 | break; | |
3105 | } | |
3106 | } | |
3107 | if (!has_avl_killed_insn_p) | |
3108 | return false; | |
4f673c5e | 3109 | |
6b6b9c68 JZZ |
3110 | bool any_set_in_bbs_p = false; |
3111 | for (const basic_block pred_cfg_bb : pred_cfg_bbs) | |
3112 | { | |
3113 | insn_info *def_insn = extract_single_source (set); | |
3114 | if (def_insn) | |
3115 | { | |
3116 | /* Condition 3: | |
3117 | ||
3118 | Case 1: Case 2: | |
3119 | bb 0: bb 0: | |
3120 | def a5 101 ... | |
3121 | bb 1: bb 1: | |
3122 | ... ... | |
3123 | bb 2: bb 2: | |
3124 | RVV 1 (use a5 with TAIL ANY) ... | |
3125 | bb 3: bb 3: | |
3126 | def a5 101 def a5 101 | |
3127 | bb 4: bb 4: | |
3128 | ... ... | |
3129 | bb 5: bb 5: | |
3130 | RVV 2 (use a5 with TU) RVV 1 (use a5) | |
3131 | ||
3132 | Case 1: We can pollute BB3,BB2,BB1,BB0 are all Dirt blocks | |
3133 | with killed AVL so that we can merge TU demand info from RVV 2 | |
3134 | into RVV 1 and elide the vsevl instruction in BB5. | |
3135 | ||
3136 | TODO: We only optimize for single source def since multiple source | |
3137 | def is quite complicated. | |
3138 | ||
3139 | Case 2: We only can pollute bb 3 as dirty and it has been accepted | |
3140 | in Condition 2 and we can't pollute BB3,BB2,BB1,BB0 like case 1. */ | |
3141 | insn_info *last_killed_insn | |
3142 | = find_reg_killed_by (crtl->ssa->bb (pred_cfg_bb), avl); | |
3143 | if (!last_killed_insn || pred_cfg_bb == def_insn->bb ()->cfg_bb ()) | |
3144 | continue; | |
3145 | if (source_equal_p (last_killed_insn, def_insn)) | |
4f673c5e | 3146 | { |
6b6b9c68 JZZ |
3147 | any_set_in_bbs_p = true; |
3148 | break; | |
4f673c5e | 3149 | } |
9243c3d1 JZZ |
3150 | } |
3151 | else | |
4f673c5e | 3152 | { |
6b6b9c68 JZZ |
3153 | /* Condition 4: |
3154 | ||
3155 | bb 0: bb 1: bb 3: | |
3156 | def1 a5 def2 a5 ... | |
3157 | \ / / | |
3158 | \ / / | |
3159 | \ / / | |
3160 | \ / / | |
3161 | bb 4: / | |
3162 | | / | |
3163 | | / | |
3164 | bb 5: / | |
3165 | | / | |
3166 | | / | |
3167 | bb 6: / | |
3168 | | / | |
3169 | | / | |
3170 | bb 8: | |
3171 | RVV 1 (use a5) | |
3172 | If we get-def (REAL) of a5 from RVV 1 instruction, we will get | |
3173 | def1 from BB0 and def2 from BB1. So we will pollute BB6,BB5,BB4, | |
3174 | BB0,BB1 with DIRTY and set BB3 as HARD_EMPTY so that we won't | |
3175 | propagate AVL to BB3. */ | |
3176 | if (any_set_in_bb_p (sets, crtl->ssa->bb (pred_cfg_bb))) | |
3177 | { | |
3178 | any_set_in_bbs_p = true; | |
3179 | break; | |
3180 | } | |
4f673c5e | 3181 | } |
9243c3d1 | 3182 | } |
6b6b9c68 JZZ |
3183 | if (!any_set_in_bbs_p) |
3184 | return true; | |
3185 | return false; | |
9243c3d1 JZZ |
3186 | } |
3187 | ||
3188 | /* Compute global backward demanded info. */ | |
387cd9d3 JZZ |
3189 | bool |
3190 | pass_vsetvl::backward_demand_fusion (void) | |
9243c3d1 JZZ |
3191 | { |
3192 | /* We compute global infos by backward propagation. | |
3193 | We want to have better performance in these following cases: | |
3194 | ||
3195 | 1. for (size_t i = 0; i < n; i++) { | |
3196 | if (i != cond) { | |
3197 | vint8mf8_t v = *(vint8mf8_t*)(in + i + 100); | |
3198 | *(vint8mf8_t*)(out + i + 100) = v; | |
3199 | } else { | |
3200 | vbool1_t v = *(vbool1_t*)(in + i + 400); | |
3201 | *(vbool1_t*)(out + i + 400) = v; | |
3202 | } | |
3203 | } | |
3204 | ||
3205 | Since we don't have any RVV instruction in the BEFORE blocks, | |
3206 | LCM fails to optimize such case. We want to backward propagate | |
3207 | them into empty blocks so that we could have better performance | |
3208 | in LCM. | |
3209 | ||
3210 | 2. bb 0: | |
3211 | vsetvl e8,mf8 (demand RATIO) | |
3212 | bb 1: | |
3213 | vsetvl e32,mf2 (demand SEW and LMUL) | |
3214 | We backward propagate the first VSETVL into e32,mf2 so that we | |
3215 | could be able to eliminate the second VSETVL in LCM. */ | |
3216 | ||
387cd9d3 | 3217 | bool changed_p = false; |
9243c3d1 JZZ |
3218 | for (const bb_info *bb : crtl->ssa->reverse_bbs ()) |
3219 | { | |
3220 | basic_block cfg_bb = bb->cfg_bb (); | |
b9b251b7 JZZ |
3221 | const auto &curr_block_info |
3222 | = m_vector_manager->vector_block_infos[cfg_bb->index]; | |
3223 | const auto &prop = curr_block_info.local_dem; | |
9243c3d1 JZZ |
3224 | |
3225 | /* If there is nothing to propagate, just skip it. */ | |
3226 | if (!prop.valid_or_dirty_p ()) | |
3227 | continue; | |
3228 | ||
b9b251b7 | 3229 | if (!backward_propagate_worthwhile_p (cfg_bb, curr_block_info)) |
9243c3d1 JZZ |
3230 | continue; |
3231 | ||
d06e9264 JZ |
3232 | /* Fix PR108270: |
3233 | ||
3234 | bb 0 -> bb 1 | |
3235 | We don't need to backward fuse VL/VTYPE info from bb 1 to bb 0 | |
3236 | if bb 1 is not inside a loop and all predecessors of bb 0 are empty. */ | |
3237 | if (m_vector_manager->all_empty_predecessor_p (cfg_bb)) | |
3238 | continue; | |
3239 | ||
9243c3d1 JZZ |
3240 | edge e; |
3241 | edge_iterator ei; | |
3242 | /* Backward propagate to each predecessor. */ | |
3243 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
3244 | { | |
9243c3d1 JZZ |
3245 | auto &block_info |
3246 | = m_vector_manager->vector_block_infos[e->src->index]; | |
3247 | ||
3248 | /* We don't propagate through critical edges. */ | |
3249 | if (e->flags & EDGE_COMPLEX) | |
3250 | continue; | |
3251 | if (e->src->index == ENTRY_BLOCK_PTR_FOR_FN (cfun)->index) | |
3252 | continue; | |
44c918b5 JZZ |
3253 | /* If prop is demand of vsetvl instruction and reaching doesn't demand |
3254 | AVL. We don't backward propagate since vsetvl instruction has no | |
3255 | side effects. */ | |
3256 | if (vsetvl_insn_p (prop.get_insn ()->rtl ()) | |
3257 | && propagate_avl_across_demands_p (prop, block_info.reaching_out)) | |
3258 | continue; | |
9243c3d1 JZZ |
3259 | |
3260 | if (block_info.reaching_out.unknown_p ()) | |
3261 | continue; | |
6b6b9c68 JZZ |
3262 | else if (block_info.reaching_out.hard_empty_p ()) |
3263 | continue; | |
9243c3d1 JZZ |
3264 | else if (block_info.reaching_out.empty_p ()) |
3265 | { | |
4f673c5e JZZ |
3266 | enum fusion_type type |
3267 | = get_backward_fusion_type (crtl->ssa->bb (e->src), prop); | |
3268 | if (type == INVALID_FUSION) | |
9243c3d1 JZZ |
3269 | continue; |
3270 | ||
4f673c5e JZZ |
3271 | block_info.reaching_out = prop; |
3272 | block_info.reaching_out.set_dirty (type); | |
6b6b9c68 JZZ |
3273 | |
3274 | if (prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ())) | |
3275 | { | |
3276 | hash_set<set_info *> sets | |
3277 | = get_all_sets (prop.get_avl_source (), true, true, true); | |
3278 | set_info *set = get_same_bb_set (sets, e->src); | |
3279 | if (set) | |
3280 | block_info.reaching_out.set_avl_info ( | |
3281 | avl_info (prop.get_avl (), set)); | |
3282 | } | |
3283 | ||
4f673c5e JZZ |
3284 | block_info.local_dem = block_info.reaching_out; |
3285 | block_info.probability = curr_block_info.probability; | |
3286 | changed_p = true; | |
9243c3d1 JZZ |
3287 | } |
3288 | else if (block_info.reaching_out.dirty_p ()) | |
3289 | { | |
3290 | /* DIRTY -> DIRTY or VALID -> DIRTY. */ | |
3291 | vector_insn_info new_info; | |
3292 | ||
3293 | if (block_info.reaching_out.compatible_p (prop)) | |
3294 | { | |
ec99ffab | 3295 | if (block_info.reaching_out.available_p (prop)) |
9243c3d1 | 3296 | continue; |
4f673c5e | 3297 | new_info = block_info.reaching_out.merge (prop, GLOBAL_MERGE); |
6b6b9c68 JZZ |
3298 | new_info.set_dirty ( |
3299 | block_info.reaching_out.dirty_with_killed_avl_p ()); | |
3300 | block_info.probability += curr_block_info.probability; | |
9243c3d1 JZZ |
3301 | } |
3302 | else | |
3303 | { | |
4f673c5e JZZ |
3304 | if (curr_block_info.probability > block_info.probability) |
3305 | { | |
6b6b9c68 JZZ |
3306 | enum fusion_type type |
3307 | = get_backward_fusion_type (crtl->ssa->bb (e->src), | |
3308 | prop); | |
3309 | if (type == INVALID_FUSION) | |
3310 | continue; | |
4f673c5e | 3311 | new_info = prop; |
6b6b9c68 | 3312 | new_info.set_dirty (type); |
4f673c5e JZZ |
3313 | block_info.probability = curr_block_info.probability; |
3314 | } | |
9243c3d1 JZZ |
3315 | else |
3316 | continue; | |
3317 | } | |
3318 | ||
ec99ffab JZZ |
3319 | if (propagate_avl_across_demands_p (prop, |
3320 | block_info.reaching_out)) | |
3321 | { | |
3322 | rtx reg = new_info.get_avl_reg_rtx (); | |
3323 | if (find_reg_killed_by (crtl->ssa->bb (e->src), reg)) | |
3324 | new_info.set_dirty (true); | |
3325 | } | |
3326 | ||
9243c3d1 JZZ |
3327 | block_info.local_dem = new_info; |
3328 | block_info.reaching_out = new_info; | |
387cd9d3 | 3329 | changed_p = true; |
9243c3d1 JZZ |
3330 | } |
3331 | else | |
3332 | { | |
3333 | /* We not only change the info during backward propagation, | |
3334 | but also change the VSETVL instruction. */ | |
3335 | gcc_assert (block_info.reaching_out.valid_p ()); | |
6b6b9c68 JZZ |
3336 | hash_set<set_info *> sets |
3337 | = get_all_sets (prop.get_avl_source (), true, false, false); | |
3338 | set_info *set = get_same_bb_set (sets, e->src); | |
3339 | if (vsetvl_insn_p (block_info.reaching_out.get_insn ()->rtl ()) | |
3340 | && prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ())) | |
3341 | { | |
3342 | if (!block_info.reaching_out.same_vlmax_p (prop)) | |
3343 | continue; | |
3344 | if (block_info.reaching_out.same_vtype_p (prop)) | |
3345 | continue; | |
3346 | if (!set) | |
3347 | continue; | |
3348 | if (set->insn () != block_info.reaching_out.get_insn ()) | |
3349 | continue; | |
3350 | } | |
ec99ffab JZZ |
3351 | |
3352 | if (!block_info.reaching_out.compatible_p (prop)) | |
3353 | continue; | |
3354 | if (block_info.reaching_out.available_p (prop)) | |
3355 | continue; | |
9243c3d1 JZZ |
3356 | |
3357 | vector_insn_info be_merged = block_info.reaching_out; | |
3358 | if (block_info.local_dem == block_info.reaching_out) | |
3359 | be_merged = block_info.local_dem; | |
4f673c5e JZZ |
3360 | vector_insn_info new_info = be_merged.merge (prop, GLOBAL_MERGE); |
3361 | ||
3362 | if (curr_block_info.probability > block_info.probability) | |
3363 | block_info.probability = curr_block_info.probability; | |
9243c3d1 | 3364 | |
ec99ffab | 3365 | if (propagate_avl_across_demands_p (prop, block_info.reaching_out) |
a481eed8 JZZ |
3366 | && !reg_available_p (crtl->ssa->bb (e->src)->end_insn (), |
3367 | new_info)) | |
ec99ffab JZZ |
3368 | continue; |
3369 | ||
aef20243 | 3370 | change_vsetvl_insn (new_info.get_insn (), new_info); |
9243c3d1 JZZ |
3371 | if (block_info.local_dem == block_info.reaching_out) |
3372 | block_info.local_dem = new_info; | |
3373 | block_info.reaching_out = new_info; | |
387cd9d3 | 3374 | changed_p = true; |
9243c3d1 JZZ |
3375 | } |
3376 | } | |
3377 | } | |
387cd9d3 JZZ |
3378 | return changed_p; |
3379 | } | |
3380 | ||
3381 | /* Compute global forward demanded info. */ | |
3382 | bool | |
3383 | pass_vsetvl::forward_demand_fusion (void) | |
3384 | { | |
3385 | /* Enhance the global information propagation especially | |
3386 | backward propagation miss the propagation. | |
3387 | Consider such case: | |
3388 | ||
3389 | bb0 | |
3390 | (TU) | |
3391 | / \ | |
3392 | bb1 bb2 | |
3393 | (TU) (ANY) | |
3394 | existing edge -----> \ / (TU) <----- LCM create this edge. | |
3395 | bb3 | |
3396 | (TU) | |
3397 | ||
3398 | Base on the situation, LCM fails to eliminate the VSETVL instruction and | |
3399 | insert an edge from bb2 to bb3 since we can't backward propagate bb3 into | |
3400 | bb2. To avoid this confusing LCM result and non-optimal codegen, we should | |
3401 | forward propagate information from bb0 to bb2 which is friendly to LCM. */ | |
3402 | bool changed_p = false; | |
3403 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3404 | { | |
3405 | basic_block cfg_bb = bb->cfg_bb (); | |
3406 | const auto &prop | |
3407 | = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; | |
3408 | ||
3409 | /* If there is nothing to propagate, just skip it. */ | |
3410 | if (!prop.valid_or_dirty_p ()) | |
3411 | continue; | |
3412 | ||
00fb7698 JZZ |
3413 | if (cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
3414 | continue; | |
3415 | ||
ec99ffab JZZ |
3416 | if (vsetvl_insn_p (prop.get_insn ()->rtl ())) |
3417 | continue; | |
3418 | ||
387cd9d3 JZZ |
3419 | edge e; |
3420 | edge_iterator ei; | |
3421 | /* Forward propagate to each successor. */ | |
3422 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
3423 | { | |
3424 | auto &local_dem | |
3425 | = m_vector_manager->vector_block_infos[e->dest->index].local_dem; | |
3426 | auto &reaching_out | |
3427 | = m_vector_manager->vector_block_infos[e->dest->index].reaching_out; | |
3428 | ||
3429 | /* It's quite obvious, we don't need to propagate itself. */ | |
3430 | if (e->dest->index == cfg_bb->index) | |
3431 | continue; | |
00fb7698 JZZ |
3432 | /* We don't propagate through critical edges. */ |
3433 | if (e->flags & EDGE_COMPLEX) | |
3434 | continue; | |
3435 | if (e->dest->index == EXIT_BLOCK_PTR_FOR_FN (cfun)->index) | |
3436 | continue; | |
387cd9d3 JZZ |
3437 | |
3438 | /* If there is nothing to propagate, just skip it. */ | |
3439 | if (!local_dem.valid_or_dirty_p ()) | |
3440 | continue; | |
ec99ffab | 3441 | if (local_dem.available_p (prop)) |
4f673c5e JZZ |
3442 | continue; |
3443 | if (!local_dem.compatible_p (prop)) | |
3444 | continue; | |
ec99ffab JZZ |
3445 | if (propagate_avl_across_demands_p (prop, local_dem)) |
3446 | continue; | |
387cd9d3 | 3447 | |
4f673c5e JZZ |
3448 | vector_insn_info new_info = local_dem.merge (prop, GLOBAL_MERGE); |
3449 | new_info.set_insn (local_dem.get_insn ()); | |
3450 | if (local_dem.dirty_p ()) | |
387cd9d3 | 3451 | { |
4f673c5e | 3452 | gcc_assert (local_dem == reaching_out); |
6b6b9c68 | 3453 | new_info.set_dirty (local_dem.dirty_with_killed_avl_p ()); |
4f673c5e | 3454 | local_dem = new_info; |
4f673c5e JZZ |
3455 | reaching_out = local_dem; |
3456 | } | |
3457 | else | |
3458 | { | |
3459 | if (reaching_out == local_dem) | |
3460 | reaching_out = new_info; | |
3461 | local_dem = new_info; | |
3462 | change_vsetvl_insn (local_dem.get_insn (), new_info); | |
387cd9d3 | 3463 | } |
4f673c5e JZZ |
3464 | auto &prob |
3465 | = m_vector_manager->vector_block_infos[e->dest->index].probability; | |
3466 | auto &curr_prob | |
3467 | = m_vector_manager->vector_block_infos[cfg_bb->index].probability; | |
3468 | prob = curr_prob * e->probability; | |
3469 | changed_p = true; | |
387cd9d3 JZZ |
3470 | } |
3471 | } | |
3472 | return changed_p; | |
3473 | } | |
3474 | ||
3475 | void | |
3476 | pass_vsetvl::demand_fusion (void) | |
3477 | { | |
3478 | bool changed_p = true; | |
3479 | while (changed_p) | |
3480 | { | |
3481 | changed_p = false; | |
4f673c5e JZZ |
3482 | /* To optimize the case like this: |
3483 | void f2 (int8_t * restrict in, int8_t * restrict out, int n, int cond) | |
3484 | { | |
3485 | size_t vl = 101; | |
3486 | ||
3487 | for (size_t i = 0; i < n; i++) | |
3488 | { | |
3489 | vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl); | |
3490 | __riscv_vse8_v_i8mf8 (out + i + 300, v, vl); | |
3491 | } | |
3492 | ||
3493 | for (size_t i = 0; i < n; i++) | |
3494 | { | |
3495 | vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl); | |
3496 | __riscv_vse8_v_i8mf8 (out + i, v, vl); | |
3497 | ||
3498 | vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl); | |
3499 | __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl); | |
3500 | } | |
3501 | } | |
3502 | ||
3503 | bb 0: li a5, 101 (killed avl) | |
3504 | ... | |
3505 | bb 1: vsetvli zero, a5, ta | |
3506 | ... | |
3507 | bb 2: li a5, 101 (killed avl) | |
3508 | ... | |
3509 | bb 3: vsetvli zero, a3, tu | |
3510 | ||
3511 | We want to fuse VSEVLI instructions on bb 1 and bb 3. However, there is | |
3512 | an AVL kill instruction in bb 2 that we can't backward fuse bb 3 or | |
3513 | forward bb 1 arbitrarily. We need available information of each block to | |
3514 | help for such cases. */ | |
6b6b9c68 JZZ |
3515 | changed_p |= backward_demand_fusion (); |
3516 | changed_p |= forward_demand_fusion (); | |
3517 | } | |
3518 | ||
3519 | changed_p = true; | |
3520 | while (changed_p) | |
3521 | { | |
3522 | changed_p = false; | |
3523 | prune_expressions (); | |
3524 | m_vector_manager->create_bitmap_vectors (); | |
3525 | compute_local_properties (); | |
4f673c5e JZZ |
3526 | compute_available (m_vector_manager->vector_comp, |
3527 | m_vector_manager->vector_kill, | |
3528 | m_vector_manager->vector_avout, | |
3529 | m_vector_manager->vector_avin); | |
6b6b9c68 | 3530 | changed_p |= cleanup_illegal_dirty_blocks (); |
4f673c5e JZZ |
3531 | m_vector_manager->free_bitmap_vectors (); |
3532 | if (!m_vector_manager->vector_exprs.is_empty ()) | |
3533 | m_vector_manager->vector_exprs.release (); | |
387cd9d3 | 3534 | } |
9243c3d1 JZZ |
3535 | |
3536 | if (dump_file) | |
3537 | { | |
3538 | fprintf (dump_file, "\n\nDirty blocks list: "); | |
681a5632 JZZ |
3539 | for (const bb_info *bb : crtl->ssa->bbs ()) |
3540 | if (m_vector_manager->vector_block_infos[bb->index ()] | |
3541 | .reaching_out.dirty_p ()) | |
3542 | fprintf (dump_file, "%d ", bb->index ()); | |
9243c3d1 JZZ |
3543 | fprintf (dump_file, "\n\n"); |
3544 | } | |
3545 | } | |
3546 | ||
6b6b9c68 JZZ |
3547 | /* Cleanup illegal dirty blocks. */ |
3548 | bool | |
3549 | pass_vsetvl::cleanup_illegal_dirty_blocks (void) | |
3550 | { | |
3551 | bool changed_p = false; | |
3552 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3553 | { | |
3554 | basic_block cfg_bb = bb->cfg_bb (); | |
3555 | const auto &prop | |
3556 | = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; | |
3557 | ||
3558 | /* If there is nothing to cleanup, just skip it. */ | |
3559 | if (!prop.valid_or_dirty_p ()) | |
3560 | continue; | |
3561 | ||
3562 | if (hard_empty_block_p (bb, prop)) | |
3563 | { | |
3564 | m_vector_manager->vector_block_infos[cfg_bb->index].local_dem | |
3565 | = vector_insn_info::get_hard_empty (); | |
3566 | m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out | |
3567 | = vector_insn_info::get_hard_empty (); | |
3568 | changed_p = true; | |
3569 | continue; | |
3570 | } | |
3571 | } | |
3572 | return changed_p; | |
3573 | } | |
3574 | ||
9243c3d1 JZZ |
3575 | /* Assemble the candidates expressions for LCM. */ |
3576 | void | |
3577 | pass_vsetvl::prune_expressions (void) | |
3578 | { | |
681a5632 | 3579 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 3580 | { |
681a5632 JZZ |
3581 | if (m_vector_manager->vector_block_infos[bb->index ()] |
3582 | .local_dem.valid_or_dirty_p ()) | |
9243c3d1 | 3583 | m_vector_manager->create_expr ( |
681a5632 JZZ |
3584 | m_vector_manager->vector_block_infos[bb->index ()].local_dem); |
3585 | if (m_vector_manager->vector_block_infos[bb->index ()] | |
9243c3d1 JZZ |
3586 | .reaching_out.valid_or_dirty_p ()) |
3587 | m_vector_manager->create_expr ( | |
681a5632 | 3588 | m_vector_manager->vector_block_infos[bb->index ()].reaching_out); |
9243c3d1 JZZ |
3589 | } |
3590 | ||
3591 | if (dump_file) | |
3592 | { | |
3593 | fprintf (dump_file, "\nThe total VSETVL expression num = %d\n", | |
3594 | m_vector_manager->vector_exprs.length ()); | |
3595 | fprintf (dump_file, "Expression List:\n"); | |
3596 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
3597 | { | |
3598 | fprintf (dump_file, "Expr[%ld]:\n", i); | |
3599 | m_vector_manager->vector_exprs[i]->dump (dump_file); | |
3600 | fprintf (dump_file, "\n"); | |
3601 | } | |
3602 | } | |
3603 | } | |
3604 | ||
4f673c5e JZZ |
3605 | /* Compute the local properties of each recorded expression. |
3606 | ||
3607 | Local properties are those that are defined by the block, irrespective of | |
3608 | other blocks. | |
3609 | ||
3610 | An expression is transparent in a block if its operands are not modified | |
3611 | in the block. | |
3612 | ||
3613 | An expression is computed (locally available) in a block if it is computed | |
3614 | at least once and expression would contain the same value if the | |
3615 | computation was moved to the end of the block. | |
3616 | ||
3617 | An expression is locally anticipatable in a block if it is computed at | |
3618 | least once and expression would contain the same value if the computation | |
3619 | was moved to the beginning of the block. */ | |
9243c3d1 JZZ |
3620 | void |
3621 | pass_vsetvl::compute_local_properties (void) | |
3622 | { | |
3623 | /* - If T is locally available at the end of a block, then T' must be | |
3624 | available at the end of the same block. Since some optimization has | |
3625 | occurred earlier, T' might not be locally available, however, it must | |
3626 | have been previously computed on all paths. As a formula, T at AVLOC(B) | |
3627 | implies that T' at AVOUT(B). | |
3628 | An "available occurrence" is one that is the last occurrence in the | |
3629 | basic block and the operands are not modified by following statements in | |
3630 | the basic block [including this insn]. | |
3631 | ||
3632 | - If T is locally anticipated at the beginning of a block, then either | |
3633 | T', is locally anticipated or it is already available from previous | |
3634 | blocks. As a formula, this means that T at ANTLOC(B) implies that T' at | |
3635 | ANTLOC(B) at AVIN(B). | |
3636 | An "anticipatable occurrence" is one that is the first occurrence in the | |
3637 | basic block, the operands are not modified in the basic block prior | |
3638 | to the occurrence and the output is not used between the start of | |
3639 | the block and the occurrence. */ | |
3640 | ||
3641 | basic_block cfg_bb; | |
4f673c5e | 3642 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 3643 | { |
4f673c5e | 3644 | unsigned int curr_bb_idx = bb->index (); |
9243c3d1 JZZ |
3645 | const auto local_dem |
3646 | = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem; | |
3647 | const auto reaching_out | |
3648 | = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out; | |
3649 | ||
4f673c5e JZZ |
3650 | /* Compute transparent. */ |
3651 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
9243c3d1 | 3652 | { |
4f673c5e JZZ |
3653 | const vector_insn_info *expr = m_vector_manager->vector_exprs[i]; |
3654 | if (local_dem.real_dirty_p () || local_dem.valid_p () | |
3655 | || local_dem.unknown_p () | |
3656 | || has_vsetvl_killed_avl_p (bb, local_dem)) | |
9243c3d1 | 3657 | bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i); |
4f673c5e JZZ |
3658 | /* FIXME: Here we set the block as non-transparent (killed) if there |
3659 | is an instruction killed the value of AVL according to the | |
3660 | definition of Local transparent. This is true for such following | |
3661 | case: | |
3662 | ||
3663 | bb 0 (Loop label): | |
3664 | vsetvl zero, a5, e8, mf8 | |
3665 | bb 1: | |
3666 | def a5 | |
3667 | bb 2: | |
3668 | branch bb 0 (Loop label). | |
3669 | ||
3670 | In this case, we known there is a loop bb 0->bb 1->bb 2. According | |
3671 | to LCM definition, it is correct when we set vsetvl zero, a5, e8, | |
3672 | mf8 as non-transparent (killed) so that LCM will not hoist outside | |
3673 | the bb 0. | |
3674 | ||
3675 | However, such conservative configuration will forbid optimization | |
3676 | on some unlucky case. For example: | |
3677 | ||
3678 | bb 0: | |
3679 | li a5, 101 | |
3680 | bb 1: | |
3681 | vsetvl zero, a5, e8, mf8 | |
3682 | bb 2: | |
3683 | li a5, 101 | |
3684 | bb 3: | |
3685 | vsetvl zero, a5, e8, mf8. | |
3686 | So we also relax def a5 as transparent to gain more optimizations | |
3687 | as long as the all real def insn of avl do not come from this | |
3688 | block. This configuration may be still missing some optimization | |
3689 | opportunities. */ | |
6b6b9c68 | 3690 | if (find_reg_killed_by (bb, expr->get_avl ())) |
4f673c5e | 3691 | { |
6b6b9c68 JZZ |
3692 | hash_set<set_info *> sets |
3693 | = get_all_sets (expr->get_avl_source (), true, false, false); | |
3694 | if (any_set_in_bb_p (sets, bb)) | |
4f673c5e JZZ |
3695 | bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], |
3696 | i); | |
3697 | } | |
9243c3d1 JZZ |
3698 | } |
3699 | ||
4f673c5e | 3700 | /* Compute anticipatable occurrences. */ |
6b6b9c68 JZZ |
3701 | if (local_dem.valid_p () || local_dem.real_dirty_p () |
3702 | || (has_vsetvl_killed_avl_p (bb, local_dem) | |
3703 | && vlmax_avl_p (local_dem.get_avl ()))) | |
4f673c5e JZZ |
3704 | if (anticipatable_occurrence_p (bb, local_dem)) |
3705 | bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx], | |
3706 | m_vector_manager->get_expr_id (local_dem)); | |
9243c3d1 | 3707 | |
4f673c5e | 3708 | /* Compute available occurrences. */ |
9243c3d1 JZZ |
3709 | if (reaching_out.valid_or_dirty_p ()) |
3710 | { | |
9243c3d1 JZZ |
3711 | auto_vec<size_t> available_list |
3712 | = m_vector_manager->get_all_available_exprs (reaching_out); | |
3713 | for (size_t i = 0; i < available_list.length (); i++) | |
4f673c5e JZZ |
3714 | { |
3715 | const vector_insn_info *expr | |
3716 | = m_vector_manager->vector_exprs[available_list[i]]; | |
3717 | if (reaching_out.real_dirty_p () | |
3718 | || has_vsetvl_killed_avl_p (bb, reaching_out) | |
3719 | || available_occurrence_p (bb, *expr)) | |
3720 | bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx], | |
3721 | available_list[i]); | |
3722 | } | |
9243c3d1 JZZ |
3723 | } |
3724 | } | |
3725 | ||
3726 | /* Compute kill for each basic block using: | |
3727 | ||
3728 | ~(TRANSP | COMP) | |
3729 | */ | |
3730 | ||
3731 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3732 | { | |
3733 | bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index], | |
3734 | m_vector_manager->vector_transp[cfg_bb->index], | |
3735 | m_vector_manager->vector_comp[cfg_bb->index]); | |
3736 | bitmap_not (m_vector_manager->vector_kill[cfg_bb->index], | |
3737 | m_vector_manager->vector_kill[cfg_bb->index]); | |
3738 | } | |
3739 | ||
3740 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3741 | { | |
3742 | edge e; | |
3743 | edge_iterator ei; | |
3744 | ||
3745 | /* If the current block is the destination of an abnormal edge, we | |
3746 | kill all trapping (for PRE) and memory (for hoist) expressions | |
3747 | because we won't be able to properly place the instruction on | |
3748 | the edge. So make them neither anticipatable nor transparent. | |
3749 | This is fairly conservative. | |
3750 | ||
3751 | ??? For hoisting it may be necessary to check for set-and-jump | |
3752 | instructions here, not just for abnormal edges. The general problem | |
3753 | is that when an expression cannot not be placed right at the end of | |
3754 | a basic block we should account for any side-effects of a subsequent | |
3755 | jump instructions that could clobber the expression. It would | |
3756 | be best to implement this check along the lines of | |
3757 | should_hoist_expr_to_dom where the target block is already known | |
3758 | and, hence, there's no need to conservatively prune expressions on | |
3759 | "intermediate" set-and-jump instructions. */ | |
3760 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
3761 | if (e->flags & EDGE_COMPLEX) | |
3762 | { | |
3763 | bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]); | |
3764 | bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]); | |
3765 | } | |
3766 | } | |
3767 | } | |
3768 | ||
3769 | /* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */ | |
3770 | bool | |
6b6b9c68 JZZ |
3771 | pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb, |
3772 | const vector_insn_info &info) const | |
9243c3d1 JZZ |
3773 | { |
3774 | if (!m_vector_manager->all_same_ratio_p ( | |
3775 | m_vector_manager->vector_avin[cfg_bb->index])) | |
3776 | return false; | |
3777 | ||
005fad9d JZZ |
3778 | if (!m_vector_manager->all_same_avl_p ( |
3779 | cfg_bb, m_vector_manager->vector_avin[cfg_bb->index])) | |
3780 | return false; | |
3781 | ||
9243c3d1 JZZ |
3782 | size_t expr_id |
3783 | = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]); | |
6b6b9c68 JZZ |
3784 | if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info)) |
3785 | return false; | |
3786 | if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info)) | |
9243c3d1 JZZ |
3787 | return false; |
3788 | ||
3789 | edge e; | |
3790 | edge_iterator ei; | |
3791 | bool all_valid_p = true; | |
3792 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
3793 | { | |
3794 | if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index])) | |
3795 | { | |
3796 | all_valid_p = false; | |
3797 | break; | |
3798 | } | |
3799 | } | |
3800 | ||
3801 | if (!all_valid_p) | |
3802 | return false; | |
3803 | return true; | |
3804 | } | |
3805 | ||
3806 | /* Optimize athe case like this: | |
3807 | ||
3808 | bb 0: | |
3809 | vsetvl 0 a5,zero,e8,mf8 | |
3810 | insn 0 (demand SEW + LMUL) | |
3811 | bb 1: | |
3812 | vsetvl 1 a5,zero,e16,mf4 | |
3813 | insn 1 (demand SEW + LMUL) | |
3814 | ||
3815 | In this case, we should be able to refine | |
3816 | vsetvl 1 into vsetvl zero, zero according AVIN. */ | |
3817 | void | |
3818 | pass_vsetvl::refine_vsetvls (void) const | |
3819 | { | |
3820 | basic_block cfg_bb; | |
3821 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3822 | { | |
3823 | auto info = m_vector_manager->vector_block_infos[cfg_bb->index].local_dem; | |
3824 | insn_info *insn = info.get_insn (); | |
3825 | if (!info.valid_p ()) | |
3826 | continue; | |
3827 | ||
3828 | rtx_insn *rinsn = insn->rtl (); | |
6b6b9c68 | 3829 | if (!can_refine_vsetvl_p (cfg_bb, info)) |
9243c3d1 JZZ |
3830 | continue; |
3831 | ||
ec99ffab JZZ |
3832 | /* We can't refine user vsetvl into vsetvl zero,zero since the dest |
3833 | will be used by the following instructions. */ | |
3834 | if (vector_config_insn_p (rinsn)) | |
3835 | { | |
3836 | m_vector_manager->to_refine_vsetvls.add (rinsn); | |
3837 | continue; | |
3838 | } | |
ff8f9544 JZ |
3839 | |
3840 | /* If all incoming edges to a block have a vector state that is compatbile | |
3841 | with the block. In such a case we need not emit a vsetvl in the current | |
3842 | block. */ | |
3843 | ||
3844 | gcc_assert (has_vtype_op (insn->rtl ())); | |
3845 | rinsn = PREV_INSN (insn->rtl ()); | |
3846 | gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ()))); | |
3847 | if (m_vector_manager->all_avail_in_compatible_p (cfg_bb)) | |
3848 | { | |
3849 | size_t id = m_vector_manager->get_expr_id (info); | |
3850 | if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id)) | |
3851 | continue; | |
3852 | eliminate_insn (rinsn); | |
3853 | } | |
3854 | else | |
3855 | { | |
3856 | rtx new_pat | |
3857 | = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX); | |
3858 | change_insn (rinsn, new_pat); | |
3859 | } | |
9243c3d1 JZZ |
3860 | } |
3861 | } | |
3862 | ||
3863 | void | |
3864 | pass_vsetvl::cleanup_vsetvls () | |
3865 | { | |
3866 | basic_block cfg_bb; | |
3867 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3868 | { | |
3869 | auto &info | |
3870 | = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; | |
3871 | gcc_assert (m_vector_manager->expr_set_num ( | |
3872 | m_vector_manager->vector_del[cfg_bb->index]) | |
3873 | <= 1); | |
3874 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
3875 | { | |
3876 | if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i)) | |
3877 | { | |
3878 | if (info.dirty_p ()) | |
3879 | info.set_unknown (); | |
3880 | else | |
3881 | { | |
4f673c5e JZZ |
3882 | const auto dem |
3883 | = m_vector_manager->vector_block_infos[cfg_bb->index] | |
3884 | .local_dem; | |
3885 | gcc_assert (dem == *m_vector_manager->vector_exprs[i]); | |
3886 | insn_info *insn = dem.get_insn (); | |
9243c3d1 JZZ |
3887 | gcc_assert (insn && insn->rtl ()); |
3888 | rtx_insn *rinsn; | |
ec99ffab JZZ |
3889 | /* We can't eliminate user vsetvl since the dest will be used |
3890 | * by the following instructions. */ | |
9243c3d1 | 3891 | if (vector_config_insn_p (insn->rtl ())) |
9243c3d1 | 3892 | { |
ec99ffab JZZ |
3893 | m_vector_manager->to_delete_vsetvls.add (insn->rtl ()); |
3894 | continue; | |
9243c3d1 | 3895 | } |
ec99ffab JZZ |
3896 | |
3897 | gcc_assert (has_vtype_op (insn->rtl ())); | |
3898 | rinsn = PREV_INSN (insn->rtl ()); | |
3899 | gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ()))); | |
9243c3d1 JZZ |
3900 | eliminate_insn (rinsn); |
3901 | } | |
3902 | } | |
3903 | } | |
3904 | } | |
3905 | } | |
3906 | ||
3907 | bool | |
3908 | pass_vsetvl::commit_vsetvls (void) | |
3909 | { | |
3910 | bool need_commit = false; | |
3911 | ||
3912 | for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++) | |
3913 | { | |
3914 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
3915 | { | |
3916 | edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed); | |
3917 | if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i)) | |
3918 | { | |
3919 | const vector_insn_info *require | |
3920 | = m_vector_manager->vector_exprs[i]; | |
3921 | gcc_assert (require->valid_or_dirty_p ()); | |
3922 | rtl_profile_for_edge (eg); | |
3923 | start_sequence (); | |
3924 | ||
3925 | insn_info *insn = require->get_insn (); | |
3926 | vector_insn_info prev_info = vector_insn_info (); | |
005fad9d JZZ |
3927 | sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index]; |
3928 | if (m_vector_manager->all_same_ratio_p (bitdata) | |
3929 | && m_vector_manager->all_same_avl_p (eg->dest, bitdata)) | |
9243c3d1 | 3930 | { |
005fad9d | 3931 | size_t first = bitmap_first_set_bit (bitdata); |
9243c3d1 JZZ |
3932 | prev_info = *m_vector_manager->vector_exprs[first]; |
3933 | } | |
3934 | ||
3935 | insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info); | |
3936 | rtx_insn *rinsn = get_insns (); | |
3937 | end_sequence (); | |
3938 | default_rtl_profile (); | |
3939 | ||
3940 | /* We should not get an abnormal edge here. */ | |
3941 | gcc_assert (!(eg->flags & EDGE_ABNORMAL)); | |
3942 | need_commit = true; | |
3943 | insert_insn_on_edge (rinsn, eg); | |
3944 | } | |
3945 | } | |
3946 | } | |
3947 | ||
4f673c5e | 3948 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 3949 | { |
4f673c5e | 3950 | basic_block cfg_bb = bb->cfg_bb (); |
9243c3d1 JZZ |
3951 | const auto reaching_out |
3952 | = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; | |
3953 | if (!reaching_out.dirty_p ()) | |
3954 | continue; | |
3955 | ||
4f673c5e JZZ |
3956 | if (reaching_out.dirty_with_killed_avl_p ()) |
3957 | { | |
3958 | if (!has_vsetvl_killed_avl_p (bb, reaching_out)) | |
3959 | continue; | |
3960 | ||
3961 | unsigned int bb_index; | |
3962 | sbitmap_iterator sbi; | |
3963 | sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index]; | |
3964 | bool available_p = false; | |
3965 | EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi) | |
3966 | { | |
ec99ffab JZZ |
3967 | if (m_vector_manager->vector_exprs[bb_index]->available_p ( |
3968 | reaching_out)) | |
4f673c5e JZZ |
3969 | { |
3970 | available_p = true; | |
3971 | break; | |
3972 | } | |
3973 | } | |
3974 | if (available_p) | |
3975 | continue; | |
3976 | } | |
7ae4d1df JZZ |
3977 | |
3978 | rtx new_pat; | |
ec99ffab JZZ |
3979 | if (!reaching_out.demand_p (DEMAND_AVL)) |
3980 | { | |
3981 | vl_vtype_info new_info = reaching_out; | |
3982 | new_info.set_avl_info (avl_info (const0_rtx, nullptr)); | |
3983 | new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX); | |
3984 | } | |
3985 | else if (can_refine_vsetvl_p (cfg_bb, reaching_out)) | |
9243c3d1 JZZ |
3986 | new_pat |
3987 | = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX); | |
7ae4d1df JZZ |
3988 | else if (vlmax_avl_p (reaching_out.get_avl ())) |
3989 | new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out, | |
ec99ffab | 3990 | reaching_out.get_avl_reg_rtx ()); |
7ae4d1df JZZ |
3991 | else |
3992 | new_pat | |
3993 | = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX); | |
9243c3d1 JZZ |
3994 | |
3995 | start_sequence (); | |
3996 | emit_insn (new_pat); | |
3997 | rtx_insn *rinsn = get_insns (); | |
3998 | end_sequence (); | |
3999 | insert_insn_end_basic_block (rinsn, cfg_bb); | |
4000 | if (dump_file) | |
4001 | { | |
4002 | fprintf (dump_file, | |
4003 | "\nInsert vsetvl insn %d at the end of <bb %d>:\n", | |
4004 | INSN_UID (rinsn), cfg_bb->index); | |
4005 | print_rtl_single (dump_file, rinsn); | |
4006 | } | |
4007 | } | |
4008 | ||
4009 | return need_commit; | |
4010 | } | |
4011 | ||
4012 | void | |
4013 | pass_vsetvl::pre_vsetvl (void) | |
4014 | { | |
4015 | /* Compute entity list. */ | |
4016 | prune_expressions (); | |
4017 | ||
cfe3fbc6 | 4018 | m_vector_manager->create_bitmap_vectors (); |
9243c3d1 JZZ |
4019 | compute_local_properties (); |
4020 | m_vector_manager->vector_edge_list = pre_edge_lcm_avs ( | |
4021 | m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp, | |
4022 | m_vector_manager->vector_comp, m_vector_manager->vector_antic, | |
4023 | m_vector_manager->vector_kill, m_vector_manager->vector_avin, | |
4024 | m_vector_manager->vector_avout, &m_vector_manager->vector_insert, | |
4025 | &m_vector_manager->vector_del); | |
4026 | ||
4027 | /* We should dump the information before CFG is changed. Otherwise it will | |
4028 | produce ICE (internal compiler error). */ | |
4029 | if (dump_file) | |
4030 | m_vector_manager->dump (dump_file); | |
4031 | ||
4032 | refine_vsetvls (); | |
4033 | cleanup_vsetvls (); | |
4034 | bool need_commit = commit_vsetvls (); | |
4035 | if (need_commit) | |
4036 | commit_edge_insertions (); | |
4037 | } | |
4038 | ||
c5a1fa59 JZ |
4039 | /* Before VSETVL PASS, RVV instructions pattern is depending on AVL operand |
4040 | implicitly. Since we will emit VSETVL instruction and make RVV instructions | |
4041 | depending on VL/VTYPE global status registers, we remove the such AVL operand | |
4042 | in the RVV instructions pattern here in order to remove AVL dependencies when | |
4043 | AVL operand is a register operand. | |
4044 | ||
4045 | Before the VSETVL PASS: | |
4046 | li a5,32 | |
4047 | ... | |
4048 | vadd.vv (..., a5) | |
4049 | After the VSETVL PASS: | |
4050 | li a5,32 | |
4051 | vsetvli zero, a5, ... | |
4052 | ... | |
4053 | vadd.vv (..., const_int 0). */ | |
9243c3d1 JZZ |
4054 | void |
4055 | pass_vsetvl::cleanup_insns (void) const | |
4056 | { | |
4057 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4058 | { | |
4059 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
4060 | { | |
4061 | rtx_insn *rinsn = insn->rtl (); | |
d51f2456 JZ |
4062 | const auto &dem = m_vector_manager->vector_insn_infos[insn->uid ()]; |
4063 | /* Eliminate local vsetvl: | |
4064 | bb 0: | |
4065 | vsetvl a5,a6,... | |
4066 | vsetvl zero,a5. | |
4067 | ||
4068 | Eliminate vsetvl in bb2 when a5 is only coming from | |
4069 | bb 0. */ | |
4070 | local_eliminate_vsetvl_insn (dem); | |
9243c3d1 JZZ |
4071 | |
4072 | if (vlmax_avl_insn_p (rinsn)) | |
4073 | { | |
4074 | eliminate_insn (rinsn); | |
4075 | continue; | |
4076 | } | |
4077 | ||
4078 | /* Erase the AVL operand from the instruction. */ | |
4079 | if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn))) | |
4080 | continue; | |
4081 | rtx avl = get_vl (rinsn); | |
a2d12abe | 4082 | if (count_regno_occurrences (rinsn, REGNO (avl)) == 1) |
9243c3d1 JZZ |
4083 | { |
4084 | /* Get the list of uses for the new instruction. */ | |
4085 | auto attempt = crtl->ssa->new_change_attempt (); | |
4086 | insn_change change (insn); | |
4087 | /* Remove the use of the substituted value. */ | |
4088 | access_array_builder uses_builder (attempt); | |
4089 | uses_builder.reserve (insn->num_uses () - 1); | |
4090 | for (use_info *use : insn->uses ()) | |
4091 | if (use != find_access (insn->uses (), REGNO (avl))) | |
4092 | uses_builder.quick_push (use); | |
4093 | use_array new_uses = use_array (uses_builder.finish ()); | |
4094 | change.new_uses = new_uses; | |
4095 | change.move_range = insn->ebb ()->insn_range (); | |
60bd33bc JZZ |
4096 | rtx pat; |
4097 | if (fault_first_load_p (rinsn)) | |
4098 | pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx); | |
4099 | else | |
4100 | { | |
4101 | rtx set = single_set (rinsn); | |
4102 | rtx src | |
4103 | = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx); | |
4104 | pat = gen_rtx_SET (SET_DEST (set), src); | |
4105 | } | |
9243c3d1 JZZ |
4106 | gcc_assert (change_insn (crtl->ssa, change, insn, pat)); |
4107 | } | |
4108 | } | |
4109 | } | |
4110 | } | |
4111 | ||
6b6b9c68 JZZ |
4112 | void |
4113 | pass_vsetvl::propagate_avl (void) const | |
4114 | { | |
4115 | /* Rebuild the RTL_SSA according to the new CFG generated by LCM. */ | |
4116 | /* Finalization of RTL_SSA. */ | |
4117 | free_dominance_info (CDI_DOMINATORS); | |
4118 | if (crtl->ssa->perform_pending_updates ()) | |
4119 | cleanup_cfg (0); | |
4120 | delete crtl->ssa; | |
4121 | crtl->ssa = nullptr; | |
4122 | /* Initialization of RTL_SSA. */ | |
4123 | calculate_dominance_info (CDI_DOMINATORS); | |
4124 | df_analyze (); | |
4125 | crtl->ssa = new function_info (cfun); | |
4126 | ||
4127 | hash_set<rtx_insn *> to_delete; | |
4128 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4129 | { | |
4130 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
4131 | { | |
4132 | if (vsetvl_discard_result_insn_p (insn->rtl ())) | |
4133 | { | |
4134 | rtx avl = get_avl (insn->rtl ()); | |
4135 | if (!REG_P (avl)) | |
4136 | continue; | |
4137 | ||
4138 | set_info *set = find_access (insn->uses (), REGNO (avl))->def (); | |
4139 | insn_info *def_insn = extract_single_source (set); | |
4140 | if (!def_insn) | |
4141 | continue; | |
4142 | ||
4143 | /* Handle this case: | |
4144 | vsetvli a6,zero,e32,m1,ta,mu | |
4145 | li a5,4096 | |
4146 | add a7,a0,a5 | |
4147 | addi a7,a7,-96 | |
4148 | vsetvli t1,zero,e8,mf8,ta,ma | |
4149 | vle8.v v24,0(a7) | |
4150 | add a5,a3,a5 | |
4151 | addi a5,a5,-96 | |
4152 | vse8.v v24,0(a5) | |
4153 | vsetvli zero,a6,e32,m1,tu,ma | |
4154 | */ | |
4155 | if (vsetvl_insn_p (def_insn->rtl ())) | |
4156 | { | |
4157 | vl_vtype_info def_info = get_vl_vtype_info (def_insn); | |
4158 | vl_vtype_info info = get_vl_vtype_info (insn); | |
4159 | rtx avl = get_avl (def_insn->rtl ()); | |
4160 | rtx vl = get_vl (def_insn->rtl ()); | |
4161 | if (def_info.get_ratio () == info.get_ratio ()) | |
4162 | { | |
4163 | if (vlmax_avl_p (def_info.get_avl ())) | |
4164 | { | |
4165 | info.set_avl_info ( | |
4166 | avl_info (def_info.get_avl (), nullptr)); | |
4167 | rtx new_pat | |
4168 | = gen_vsetvl_pat (VSETVL_NORMAL, info, vl); | |
4169 | validate_change (insn->rtl (), | |
4170 | &PATTERN (insn->rtl ()), new_pat, | |
4171 | false); | |
4172 | continue; | |
4173 | } | |
4174 | if (def_info.has_avl_imm () || rtx_equal_p (avl, vl)) | |
4175 | { | |
4176 | info.set_avl_info (avl_info (avl, nullptr)); | |
4177 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_AFTER, | |
4178 | info, NULL_RTX, insn->rtl ()); | |
4179 | if (set->single_nondebug_insn_use ()) | |
4180 | { | |
4181 | to_delete.add (insn->rtl ()); | |
4182 | to_delete.add (def_insn->rtl ()); | |
4183 | } | |
4184 | continue; | |
4185 | } | |
4186 | } | |
4187 | } | |
4188 | } | |
4189 | ||
4190 | /* Change vsetvl rd, rs1 --> vsevl zero, rs1, | |
4191 | if rd is not used by any nondebug instructions. | |
4192 | Even though this PASS runs after RA and it doesn't help for | |
4193 | reduce register pressure, it can help instructions scheduling | |
4194 | since we remove the dependencies. */ | |
4195 | if (vsetvl_insn_p (insn->rtl ())) | |
4196 | { | |
4197 | rtx vl = get_vl (insn->rtl ()); | |
4198 | rtx avl = get_avl (insn->rtl ()); | |
6b6b9c68 JZZ |
4199 | def_info *def = find_access (insn->defs (), REGNO (vl)); |
4200 | set_info *set = safe_dyn_cast<set_info *> (def); | |
ec99ffab JZZ |
4201 | vector_insn_info info; |
4202 | info.parse_insn (insn); | |
6b6b9c68 | 4203 | gcc_assert (set); |
ec99ffab JZZ |
4204 | if (m_vector_manager->to_delete_vsetvls.contains (insn->rtl ())) |
4205 | { | |
4206 | m_vector_manager->to_delete_vsetvls.remove (insn->rtl ()); | |
4207 | if (m_vector_manager->to_refine_vsetvls.contains ( | |
4208 | insn->rtl ())) | |
4209 | m_vector_manager->to_refine_vsetvls.remove (insn->rtl ()); | |
4210 | if (!set->has_nondebug_insn_uses ()) | |
4211 | { | |
4212 | to_delete.add (insn->rtl ()); | |
4213 | continue; | |
4214 | } | |
4215 | } | |
4216 | if (m_vector_manager->to_refine_vsetvls.contains (insn->rtl ())) | |
4217 | { | |
4218 | m_vector_manager->to_refine_vsetvls.remove (insn->rtl ()); | |
4219 | if (!set->has_nondebug_insn_uses ()) | |
4220 | { | |
4221 | rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, | |
4222 | info, NULL_RTX); | |
4223 | change_insn (insn->rtl (), new_pat); | |
4224 | continue; | |
4225 | } | |
4226 | } | |
4227 | if (vlmax_avl_p (avl)) | |
4228 | continue; | |
6b6b9c68 JZZ |
4229 | rtx new_pat |
4230 | = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX); | |
4231 | if (!set->has_nondebug_insn_uses ()) | |
4232 | { | |
4233 | validate_change (insn->rtl (), &PATTERN (insn->rtl ()), | |
4234 | new_pat, false); | |
4235 | continue; | |
4236 | } | |
4237 | } | |
4238 | } | |
4239 | } | |
4240 | ||
4241 | for (rtx_insn *rinsn : to_delete) | |
4242 | eliminate_insn (rinsn); | |
4243 | } | |
4244 | ||
9243c3d1 JZZ |
4245 | void |
4246 | pass_vsetvl::init (void) | |
4247 | { | |
4248 | if (optimize > 0) | |
4249 | { | |
4250 | /* Initialization of RTL_SSA. */ | |
4251 | calculate_dominance_info (CDI_DOMINATORS); | |
4252 | df_analyze (); | |
4253 | crtl->ssa = new function_info (cfun); | |
4254 | } | |
4255 | ||
4256 | m_vector_manager = new vector_infos_manager (); | |
4f673c5e | 4257 | compute_probabilities (); |
9243c3d1 JZZ |
4258 | |
4259 | if (dump_file) | |
4260 | { | |
4261 | fprintf (dump_file, "\nPrologue: Initialize vector infos\n"); | |
4262 | m_vector_manager->dump (dump_file); | |
4263 | } | |
4264 | } | |
4265 | ||
4266 | void | |
4267 | pass_vsetvl::done (void) | |
4268 | { | |
4269 | if (optimize > 0) | |
4270 | { | |
4271 | /* Finalization of RTL_SSA. */ | |
4272 | free_dominance_info (CDI_DOMINATORS); | |
4273 | if (crtl->ssa->perform_pending_updates ()) | |
4274 | cleanup_cfg (0); | |
4275 | delete crtl->ssa; | |
4276 | crtl->ssa = nullptr; | |
4277 | } | |
4278 | m_vector_manager->release (); | |
4279 | delete m_vector_manager; | |
4280 | m_vector_manager = nullptr; | |
4281 | } | |
4282 | ||
acc10c79 JZZ |
4283 | /* Compute probability for each block. */ |
4284 | void | |
4285 | pass_vsetvl::compute_probabilities (void) | |
4286 | { | |
4287 | /* Don't compute it in -O0 since we don't need it. */ | |
4288 | if (!optimize) | |
4289 | return; | |
4290 | edge e; | |
4291 | edge_iterator ei; | |
4292 | ||
4293 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4294 | { | |
4295 | basic_block cfg_bb = bb->cfg_bb (); | |
4296 | auto &curr_prob | |
4297 | = m_vector_manager->vector_block_infos[cfg_bb->index].probability; | |
c129d22d JZZ |
4298 | |
4299 | /* GCC assume entry block (bb 0) are always so | |
4300 | executed so set its probability as "always". */ | |
acc10c79 JZZ |
4301 | if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) |
4302 | curr_prob = profile_probability::always (); | |
c129d22d JZZ |
4303 | /* Exit block (bb 1) is the block we don't need to process. */ |
4304 | if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) | |
4305 | continue; | |
4306 | ||
acc10c79 JZZ |
4307 | gcc_assert (curr_prob.initialized_p ()); |
4308 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
4309 | { | |
4310 | auto &new_prob | |
4311 | = m_vector_manager->vector_block_infos[e->dest->index].probability; | |
4312 | if (!new_prob.initialized_p ()) | |
4313 | new_prob = curr_prob * e->probability; | |
4314 | else if (new_prob == profile_probability::always ()) | |
4315 | continue; | |
4316 | else | |
4317 | new_prob += curr_prob * e->probability; | |
4318 | } | |
4319 | } | |
acc10c79 JZZ |
4320 | } |
4321 | ||
9243c3d1 JZZ |
4322 | /* Lazy vsetvl insertion for optimize > 0. */ |
4323 | void | |
4324 | pass_vsetvl::lazy_vsetvl (void) | |
4325 | { | |
4326 | if (dump_file) | |
4327 | fprintf (dump_file, | |
4328 | "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for " | |
4329 | "function:%s\n", | |
4330 | n_basic_blocks_for_fn (cfun), function_name (cfun)); | |
4331 | ||
4332 | /* Phase 1 - Compute the local dems within each block. | |
4333 | The data-flow analysis within each block is backward analysis. */ | |
4334 | if (dump_file) | |
4335 | fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n"); | |
4336 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4337 | compute_local_backward_infos (bb); | |
4338 | if (dump_file) | |
4339 | m_vector_manager->dump (dump_file); | |
4340 | ||
4341 | /* Phase 2 - Emit vsetvl instructions within each basic block according to | |
4342 | demand, compute and save ANTLOC && AVLOC of each block. */ | |
4343 | if (dump_file) | |
4344 | fprintf (dump_file, | |
4345 | "\nPhase 2: Emit vsetvl instruction within each block\n"); | |
4346 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4347 | emit_local_forward_vsetvls (bb); | |
4348 | if (dump_file) | |
4349 | m_vector_manager->dump (dump_file); | |
4350 | ||
4351 | /* Phase 3 - Propagate demanded info across blocks. */ | |
4352 | if (dump_file) | |
4353 | fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n"); | |
387cd9d3 | 4354 | demand_fusion (); |
9243c3d1 JZZ |
4355 | if (dump_file) |
4356 | m_vector_manager->dump (dump_file); | |
4357 | ||
4358 | /* Phase 4 - Lazy code motion. */ | |
4359 | if (dump_file) | |
4360 | fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n"); | |
4361 | pre_vsetvl (); | |
4362 | ||
4363 | /* Phase 5 - Cleanup AVL && VL operand of RVV instruction. */ | |
4364 | if (dump_file) | |
4365 | fprintf (dump_file, "\nPhase 5: Cleanup AVL and VL operands\n"); | |
4366 | cleanup_insns (); | |
6b6b9c68 JZZ |
4367 | |
4368 | /* Phase 6 - Rebuild RTL_SSA to propagate AVL between vsetvls. */ | |
4369 | if (dump_file) | |
4370 | fprintf (dump_file, | |
4371 | "\nPhase 6: Rebuild RTL_SSA to propagate AVL between vsetvls\n"); | |
4372 | propagate_avl (); | |
9243c3d1 JZZ |
4373 | } |
4374 | ||
4375 | /* Main entry point for this pass. */ | |
4376 | unsigned int | |
4377 | pass_vsetvl::execute (function *) | |
4378 | { | |
4379 | if (n_basic_blocks_for_fn (cfun) <= 0) | |
4380 | return 0; | |
4381 | ||
ca8fb009 JZZ |
4382 | /* The RVV instruction may change after split which is not a stable |
4383 | instruction. We need to split it here to avoid potential issue | |
4384 | since the VSETVL PASS is insert before split PASS. */ | |
4385 | split_all_insns (); | |
9243c3d1 JZZ |
4386 | |
4387 | /* Early return for there is no vector instructions. */ | |
4388 | if (!has_vector_insn (cfun)) | |
4389 | return 0; | |
4390 | ||
4391 | init (); | |
4392 | ||
4393 | if (!optimize) | |
4394 | simple_vsetvl (); | |
4395 | else | |
4396 | lazy_vsetvl (); | |
4397 | ||
4398 | done (); | |
4399 | return 0; | |
4400 | } | |
4401 | ||
4402 | rtl_opt_pass * | |
4403 | make_pass_vsetvl (gcc::context *ctxt) | |
4404 | { | |
4405 | return new pass_vsetvl (ctxt); | |
4406 | } |