]>
Commit | Line | Data |
---|---|---|
9243c3d1 | 1 | /* VSETVL pass for RISC-V 'V' Extension for GNU compiler. |
c841bde5 | 2 | Copyright (C) 2022-2023 Free Software Foundation, Inc. |
9243c3d1 JZZ |
3 | Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or(at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | /* This pass is to Set VL/VTYPE global status for RVV instructions | |
22 | that depend on VL and VTYPE registers by Lazy code motion (LCM). | |
23 | ||
24 | Strategy: | |
25 | ||
26 | - Backward demanded info fusion within block. | |
27 | ||
28 | - Lazy code motion (LCM) based demanded info backward propagation. | |
29 | ||
30 | - RTL_SSA framework for def-use, PHI analysis. | |
31 | ||
32 | - Lazy code motion (LCM) for global VL/VTYPE optimization. | |
33 | ||
34 | Assumption: | |
35 | ||
36 | - Each avl operand is either an immediate (must be in range 0 ~ 31) or reg. | |
37 | ||
38 | This pass consists of 5 phases: | |
39 | ||
40 | - Phase 1 - compute VL/VTYPE demanded information within each block | |
41 | by backward data-flow analysis. | |
42 | ||
43 | - Phase 2 - Emit vsetvl instructions within each basic block according to | |
44 | demand, compute and save ANTLOC && AVLOC of each block. | |
45 | ||
387cd9d3 JZZ |
46 | - Phase 3 - Backward && forward demanded info propagation and fusion across |
47 | blocks. | |
9243c3d1 JZZ |
48 | |
49 | - Phase 4 - Lazy code motion including: compute local properties, | |
50 | pre_edge_lcm and vsetvl insertion && delete edges for LCM results. | |
51 | ||
52 | - Phase 5 - Cleanup AVL operand of RVV instruction since it will not be | |
53 | used any more and VL operand of VSETVL instruction if it is not used by | |
54 | any non-debug instructions. | |
55 | ||
6b6b9c68 JZZ |
56 | - Phase 6 - Propagate AVL between vsetvl instructions. |
57 | ||
9243c3d1 JZZ |
58 | Implementation: |
59 | ||
60 | - The subroutine of optimize == 0 is simple_vsetvl. | |
61 | This function simplily vsetvl insertion for each RVV | |
62 | instruction. No optimization. | |
63 | ||
64 | - The subroutine of optimize > 0 is lazy_vsetvl. | |
65 | This function optimize vsetvl insertion process by | |
ec99ffab JZZ |
66 | lazy code motion (LCM) layering on RTL_SSA. |
67 | ||
68 | - get_avl (), get_insn (), get_avl_source (): | |
69 | ||
70 | 1. get_insn () is the current instruction, find_access (get_insn | |
71 | ())->def is the same as get_avl_source () if get_insn () demand VL. | |
72 | 2. If get_avl () is non-VLMAX REG, get_avl () == get_avl_source | |
73 | ()->regno (). | |
74 | 3. get_avl_source ()->regno () is the REGNO that we backward propagate. | |
75 | */ | |
9243c3d1 JZZ |
76 | |
77 | #define IN_TARGET_CODE 1 | |
78 | #define INCLUDE_ALGORITHM | |
79 | #define INCLUDE_FUNCTIONAL | |
80 | ||
81 | #include "config.h" | |
82 | #include "system.h" | |
83 | #include "coretypes.h" | |
84 | #include "tm.h" | |
85 | #include "backend.h" | |
86 | #include "rtl.h" | |
87 | #include "target.h" | |
88 | #include "tree-pass.h" | |
89 | #include "df.h" | |
90 | #include "rtl-ssa.h" | |
91 | #include "cfgcleanup.h" | |
92 | #include "insn-config.h" | |
93 | #include "insn-attr.h" | |
94 | #include "insn-opinit.h" | |
95 | #include "tm-constrs.h" | |
96 | #include "cfgrtl.h" | |
97 | #include "cfganal.h" | |
98 | #include "lcm.h" | |
99 | #include "predict.h" | |
100 | #include "profile-count.h" | |
101 | #include "riscv-vsetvl.h" | |
102 | ||
103 | using namespace rtl_ssa; | |
104 | using namespace riscv_vector; | |
105 | ||
ec99ffab JZZ |
106 | static CONSTEXPR const unsigned ALL_SEW[] = {8, 16, 32, 64}; |
107 | static CONSTEXPR const vlmul_type ALL_LMUL[] | |
108 | = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2}; | |
ec99ffab | 109 | |
9243c3d1 JZZ |
110 | DEBUG_FUNCTION void |
111 | debug (const vector_insn_info *info) | |
112 | { | |
113 | info->dump (stderr); | |
114 | } | |
115 | ||
116 | DEBUG_FUNCTION void | |
117 | debug (const vector_infos_manager *info) | |
118 | { | |
119 | info->dump (stderr); | |
120 | } | |
121 | ||
122 | static bool | |
123 | vlmax_avl_p (rtx x) | |
124 | { | |
125 | return x && rtx_equal_p (x, RVV_VLMAX); | |
126 | } | |
127 | ||
128 | static bool | |
129 | vlmax_avl_insn_p (rtx_insn *rinsn) | |
130 | { | |
85112fbb JZZ |
131 | return (INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi |
132 | || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi); | |
9243c3d1 JZZ |
133 | } |
134 | ||
8d8cc482 JZZ |
135 | /* Return true if the block is a loop itself: |
136 | local_dem | |
137 | __________ | |
138 | ____|____ | | |
139 | | | | | |
140 | |________| | | |
141 | |_________| | |
142 | reaching_out | |
143 | */ | |
9243c3d1 JZZ |
144 | static bool |
145 | loop_basic_block_p (const basic_block cfg_bb) | |
146 | { | |
8d8cc482 JZZ |
147 | if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb))) |
148 | { | |
149 | edge e; | |
150 | edge_iterator ei; | |
151 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
152 | if (e->dest->index == cfg_bb->index) | |
153 | return true; | |
154 | } | |
155 | return false; | |
9243c3d1 JZZ |
156 | } |
157 | ||
158 | /* Return true if it is an RVV instruction depends on VTYPE global | |
159 | status register. */ | |
160 | static bool | |
161 | has_vtype_op (rtx_insn *rinsn) | |
162 | { | |
163 | return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn); | |
164 | } | |
165 | ||
166 | /* Return true if it is an RVV instruction depends on VL global | |
167 | status register. */ | |
168 | static bool | |
169 | has_vl_op (rtx_insn *rinsn) | |
170 | { | |
171 | return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn); | |
172 | } | |
173 | ||
174 | /* Is this a SEW value that can be encoded into the VTYPE format. */ | |
175 | static bool | |
176 | valid_sew_p (size_t sew) | |
177 | { | |
178 | return exact_log2 (sew) && sew >= 8 && sew <= 64; | |
179 | } | |
180 | ||
ec99ffab JZZ |
181 | /* Return true if the instruction ignores VLMUL field of VTYPE. */ |
182 | static bool | |
183 | ignore_vlmul_insn_p (rtx_insn *rinsn) | |
184 | { | |
185 | return get_attr_type (rinsn) == TYPE_VIMOVVX | |
186 | || get_attr_type (rinsn) == TYPE_VFMOVVF | |
187 | || get_attr_type (rinsn) == TYPE_VIMOVXV | |
188 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
189 | } | |
190 | ||
191 | /* Return true if the instruction is scalar move instruction. */ | |
192 | static bool | |
193 | scalar_move_insn_p (rtx_insn *rinsn) | |
194 | { | |
195 | return get_attr_type (rinsn) == TYPE_VIMOVXV | |
196 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
197 | } | |
198 | ||
60bd33bc JZZ |
199 | /* Return true if the instruction is fault first load instruction. */ |
200 | static bool | |
201 | fault_first_load_p (rtx_insn *rinsn) | |
202 | { | |
6313b045 JZZ |
203 | return recog_memoized (rinsn) >= 0 |
204 | && (get_attr_type (rinsn) == TYPE_VLDFF | |
205 | || get_attr_type (rinsn) == TYPE_VLSEGDFF); | |
60bd33bc JZZ |
206 | } |
207 | ||
208 | /* Return true if the instruction is read vl instruction. */ | |
209 | static bool | |
210 | read_vl_insn_p (rtx_insn *rinsn) | |
211 | { | |
212 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL; | |
213 | } | |
214 | ||
9243c3d1 JZZ |
215 | /* Return true if it is a vsetvl instruction. */ |
216 | static bool | |
217 | vector_config_insn_p (rtx_insn *rinsn) | |
218 | { | |
219 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL; | |
220 | } | |
221 | ||
222 | /* Return true if it is vsetvldi or vsetvlsi. */ | |
223 | static bool | |
224 | vsetvl_insn_p (rtx_insn *rinsn) | |
225 | { | |
6b6b9c68 JZZ |
226 | if (!vector_config_insn_p (rinsn)) |
227 | return false; | |
85112fbb | 228 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi |
6b6b9c68 JZZ |
229 | || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi); |
230 | } | |
231 | ||
232 | /* Return true if it is vsetvl zero, rs1. */ | |
233 | static bool | |
234 | vsetvl_discard_result_insn_p (rtx_insn *rinsn) | |
235 | { | |
236 | if (!vector_config_insn_p (rinsn)) | |
237 | return false; | |
238 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi | |
239 | || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi); | |
9243c3d1 JZZ |
240 | } |
241 | ||
9243c3d1 | 242 | static bool |
4f673c5e | 243 | real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb) |
9243c3d1 | 244 | { |
4f673c5e | 245 | return insn != nullptr && insn->is_real () && insn->bb () == bb; |
9243c3d1 JZZ |
246 | } |
247 | ||
9243c3d1 | 248 | static bool |
4f673c5e | 249 | before_p (const insn_info *insn1, const insn_info *insn2) |
9243c3d1 | 250 | { |
9b9a1ac1 | 251 | return insn1->compare_with (insn2) < 0; |
4f673c5e JZZ |
252 | } |
253 | ||
6b6b9c68 JZZ |
254 | static insn_info * |
255 | find_reg_killed_by (const bb_info *bb, rtx x) | |
4f673c5e | 256 | { |
6b6b9c68 JZZ |
257 | if (!x || vlmax_avl_p (x) || !REG_P (x)) |
258 | return nullptr; | |
259 | for (insn_info *insn : bb->reverse_real_nondebug_insns ()) | |
4f673c5e | 260 | if (find_access (insn->defs (), REGNO (x))) |
6b6b9c68 JZZ |
261 | return insn; |
262 | return nullptr; | |
263 | } | |
264 | ||
265 | /* Helper function to get VL operand. */ | |
266 | static rtx | |
267 | get_vl (rtx_insn *rinsn) | |
268 | { | |
269 | if (has_vl_op (rinsn)) | |
270 | { | |
271 | extract_insn_cached (rinsn); | |
272 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
273 | } | |
274 | return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0)); | |
4f673c5e JZZ |
275 | } |
276 | ||
277 | static bool | |
278 | has_vsetvl_killed_avl_p (const bb_info *bb, const vector_insn_info &info) | |
279 | { | |
280 | if (info.dirty_with_killed_avl_p ()) | |
281 | { | |
282 | rtx avl = info.get_avl (); | |
6b6b9c68 | 283 | if (vlmax_avl_p (avl)) |
ec99ffab | 284 | return find_reg_killed_by (bb, info.get_avl_reg_rtx ()) != nullptr; |
4f673c5e JZZ |
285 | for (const insn_info *insn : bb->reverse_real_nondebug_insns ()) |
286 | { | |
287 | def_info *def = find_access (insn->defs (), REGNO (avl)); | |
288 | if (def) | |
289 | { | |
290 | set_info *set = safe_dyn_cast<set_info *> (def); | |
291 | if (!set) | |
292 | return false; | |
293 | ||
294 | rtx new_avl = gen_rtx_REG (GET_MODE (avl), REGNO (avl)); | |
295 | gcc_assert (new_avl != avl); | |
296 | if (!info.compatible_avl_p (avl_info (new_avl, set))) | |
297 | return false; | |
298 | ||
299 | return true; | |
300 | } | |
301 | } | |
302 | } | |
303 | return false; | |
304 | } | |
305 | ||
9243c3d1 JZZ |
306 | /* An "anticipatable occurrence" is one that is the first occurrence in the |
307 | basic block, the operands are not modified in the basic block prior | |
308 | to the occurrence and the output is not used between the start of | |
4f673c5e JZZ |
309 | the block and the occurrence. |
310 | ||
311 | For VSETVL instruction, we have these following formats: | |
312 | 1. vsetvl zero, rs1. | |
313 | 2. vsetvl zero, imm. | |
314 | 3. vsetvl rd, rs1. | |
315 | ||
316 | So base on these circumstances, a DEM is considered as a local anticipatable | |
317 | occurrence should satisfy these following conditions: | |
318 | ||
319 | 1). rs1 (avl) are not modified in the basic block prior to the VSETVL. | |
320 | 2). rd (vl) are not modified in the basic block prior to the VSETVL. | |
321 | 3). rd (vl) is not used between the start of the block and the occurrence. | |
322 | ||
323 | Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE | |
324 | is modified prior to the occurrence. This case is already considered as | |
325 | a non-local anticipatable occurrence. | |
326 | */ | |
9243c3d1 | 327 | static bool |
4f673c5e | 328 | anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem) |
9243c3d1 | 329 | { |
4f673c5e | 330 | insn_info *insn = dem.get_insn (); |
9243c3d1 JZZ |
331 | /* The only possible operand we care of VSETVL is AVL. */ |
332 | if (dem.has_avl_reg ()) | |
333 | { | |
4f673c5e | 334 | /* rs1 (avl) are not modified in the basic block prior to the VSETVL. */ |
9243c3d1 JZZ |
335 | if (!vlmax_avl_p (dem.get_avl ())) |
336 | { | |
ec99ffab | 337 | set_info *set = dem.get_avl_source (); |
9243c3d1 JZZ |
338 | /* If it's undefined, it's not anticipatable conservatively. */ |
339 | if (!set) | |
340 | return false; | |
4f673c5e JZZ |
341 | if (real_insn_and_same_bb_p (set->insn (), bb) |
342 | && before_p (set->insn (), insn)) | |
9243c3d1 JZZ |
343 | return false; |
344 | } | |
345 | } | |
346 | ||
4f673c5e | 347 | /* rd (vl) is not used between the start of the block and the occurrence. */ |
9243c3d1 JZZ |
348 | if (vsetvl_insn_p (insn->rtl ())) |
349 | { | |
4f673c5e JZZ |
350 | rtx dest = get_vl (insn->rtl ()); |
351 | for (insn_info *i = insn->prev_nondebug_insn (); | |
352 | real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ()) | |
353 | { | |
354 | /* rd (vl) is not used between the start of the block and the | |
355 | * occurrence. */ | |
356 | if (find_access (i->uses (), REGNO (dest))) | |
357 | return false; | |
358 | /* rd (vl) are not modified in the basic block prior to the VSETVL. */ | |
359 | if (find_access (i->defs (), REGNO (dest))) | |
360 | return false; | |
361 | } | |
9243c3d1 JZZ |
362 | } |
363 | ||
364 | return true; | |
365 | } | |
366 | ||
367 | /* An "available occurrence" is one that is the last occurrence in the | |
368 | basic block and the operands are not modified by following statements in | |
4f673c5e JZZ |
369 | the basic block [including this insn]. |
370 | ||
371 | For VSETVL instruction, we have these following formats: | |
372 | 1. vsetvl zero, rs1. | |
373 | 2. vsetvl zero, imm. | |
374 | 3. vsetvl rd, rs1. | |
375 | ||
376 | So base on these circumstances, a DEM is considered as a local available | |
377 | occurrence should satisfy these following conditions: | |
378 | ||
379 | 1). rs1 (avl) are not modified by following statements in | |
380 | the basic block. | |
381 | 2). rd (vl) are not modified by following statements in | |
382 | the basic block. | |
383 | ||
384 | Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE | |
385 | is modified prior to the occurrence. This case is already considered as | |
386 | a non-local available occurrence. | |
387 | */ | |
9243c3d1 | 388 | static bool |
4f673c5e | 389 | available_occurrence_p (const bb_info *bb, const vector_insn_info dem) |
9243c3d1 | 390 | { |
4f673c5e | 391 | insn_info *insn = dem.get_insn (); |
9243c3d1 JZZ |
392 | /* The only possible operand we care of VSETVL is AVL. */ |
393 | if (dem.has_avl_reg ()) | |
394 | { | |
9243c3d1 JZZ |
395 | if (!vlmax_avl_p (dem.get_avl ())) |
396 | { | |
4f673c5e | 397 | rtx dest = NULL_RTX; |
20c85207 | 398 | insn_info *i = insn; |
4f673c5e | 399 | if (vsetvl_insn_p (insn->rtl ())) |
20c85207 JZ |
400 | { |
401 | dest = get_vl (insn->rtl ()); | |
402 | /* For user vsetvl a2, a2 instruction, we consider it as | |
403 | available even though it modifies "a2". */ | |
404 | i = i->next_nondebug_insn (); | |
405 | } | |
406 | for (; real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ()) | |
4f673c5e | 407 | { |
60bd33bc JZZ |
408 | if (read_vl_insn_p (i->rtl ())) |
409 | continue; | |
4f673c5e JZZ |
410 | /* rs1 (avl) are not modified by following statements in |
411 | the basic block. */ | |
412 | if (find_access (i->defs (), REGNO (dem.get_avl ()))) | |
413 | return false; | |
414 | /* rd (vl) are not modified by following statements in | |
415 | the basic block. */ | |
416 | if (dest && find_access (i->defs (), REGNO (dest))) | |
417 | return false; | |
418 | } | |
9243c3d1 JZZ |
419 | } |
420 | } | |
421 | return true; | |
422 | } | |
423 | ||
6b6b9c68 JZZ |
424 | static bool |
425 | insn_should_be_added_p (const insn_info *insn, unsigned int types) | |
9243c3d1 | 426 | { |
6b6b9c68 JZZ |
427 | if (insn->is_real () && (types & REAL_SET)) |
428 | return true; | |
429 | if (insn->is_phi () && (types & PHI_SET)) | |
430 | return true; | |
431 | if (insn->is_bb_head () && (types & BB_HEAD_SET)) | |
432 | return true; | |
433 | if (insn->is_bb_end () && (types & BB_END_SET)) | |
434 | return true; | |
435 | return false; | |
9243c3d1 JZZ |
436 | } |
437 | ||
6b6b9c68 JZZ |
438 | /* Recursively find all define instructions. The kind of instruction is |
439 | specified by the DEF_TYPE. */ | |
440 | static hash_set<set_info *> | |
441 | get_all_sets (phi_info *phi, unsigned int types) | |
9243c3d1 | 442 | { |
6b6b9c68 | 443 | hash_set<set_info *> insns; |
4f673c5e JZZ |
444 | auto_vec<phi_info *> work_list; |
445 | hash_set<phi_info *> visited_list; | |
446 | if (!phi) | |
6b6b9c68 | 447 | return hash_set<set_info *> (); |
4f673c5e | 448 | work_list.safe_push (phi); |
9243c3d1 | 449 | |
4f673c5e | 450 | while (!work_list.is_empty ()) |
9243c3d1 | 451 | { |
4f673c5e JZZ |
452 | phi_info *phi = work_list.pop (); |
453 | visited_list.add (phi); | |
454 | for (use_info *use : phi->inputs ()) | |
9243c3d1 | 455 | { |
4f673c5e | 456 | def_info *def = use->def (); |
6b6b9c68 JZZ |
457 | set_info *set = safe_dyn_cast<set_info *> (def); |
458 | if (!set) | |
459 | return hash_set<set_info *> (); | |
9243c3d1 | 460 | |
6b6b9c68 | 461 | gcc_assert (!set->insn ()->is_debug_insn ()); |
9243c3d1 | 462 | |
6b6b9c68 JZZ |
463 | if (insn_should_be_added_p (set->insn (), types)) |
464 | insns.add (set); | |
465 | if (set->insn ()->is_phi ()) | |
4f673c5e | 466 | { |
6b6b9c68 | 467 | phi_info *new_phi = as_a<phi_info *> (set); |
4f673c5e JZZ |
468 | if (!visited_list.contains (new_phi)) |
469 | work_list.safe_push (new_phi); | |
470 | } | |
471 | } | |
9243c3d1 | 472 | } |
4f673c5e JZZ |
473 | return insns; |
474 | } | |
9243c3d1 | 475 | |
6b6b9c68 JZZ |
476 | static hash_set<set_info *> |
477 | get_all_sets (set_info *set, bool /* get_real_inst */ real_p, | |
478 | bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p) | |
479 | { | |
480 | if (real_p && phi_p && param_p) | |
481 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
482 | REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET); | |
483 | ||
484 | else if (real_p && param_p) | |
485 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
486 | REAL_SET | BB_HEAD_SET | BB_END_SET); | |
487 | ||
488 | else if (real_p) | |
489 | return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET); | |
490 | return hash_set<set_info *> (); | |
491 | } | |
492 | ||
493 | /* Helper function to get AVL operand. */ | |
494 | static rtx | |
495 | get_avl (rtx_insn *rinsn) | |
496 | { | |
497 | if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn)) | |
498 | return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0); | |
499 | ||
500 | if (!has_vl_op (rinsn)) | |
501 | return NULL_RTX; | |
502 | if (get_attr_avl_type (rinsn) == VLMAX) | |
503 | return RVV_VLMAX; | |
504 | extract_insn_cached (rinsn); | |
505 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
506 | } | |
507 | ||
508 | static set_info * | |
509 | get_same_bb_set (hash_set<set_info *> &sets, const basic_block cfg_bb) | |
510 | { | |
511 | for (set_info *set : sets) | |
512 | if (set->bb ()->cfg_bb () == cfg_bb) | |
513 | return set; | |
514 | return nullptr; | |
515 | } | |
516 | ||
4f673c5e JZZ |
517 | /* Recursively find all predecessor blocks for cfg_bb. */ |
518 | static hash_set<basic_block> | |
519 | get_all_predecessors (basic_block cfg_bb) | |
520 | { | |
521 | hash_set<basic_block> blocks; | |
522 | auto_vec<basic_block> work_list; | |
523 | hash_set<basic_block> visited_list; | |
524 | work_list.safe_push (cfg_bb); | |
9243c3d1 | 525 | |
4f673c5e | 526 | while (!work_list.is_empty ()) |
9243c3d1 | 527 | { |
4f673c5e JZZ |
528 | basic_block new_cfg_bb = work_list.pop (); |
529 | visited_list.add (new_cfg_bb); | |
530 | edge e; | |
531 | edge_iterator ei; | |
532 | FOR_EACH_EDGE (e, ei, new_cfg_bb->preds) | |
533 | { | |
534 | if (!visited_list.contains (e->src)) | |
535 | work_list.safe_push (e->src); | |
536 | blocks.add (e->src); | |
537 | } | |
9243c3d1 | 538 | } |
4f673c5e JZZ |
539 | return blocks; |
540 | } | |
9243c3d1 | 541 | |
4f673c5e JZZ |
542 | /* Return true if there is an INSN in insns staying in the block BB. */ |
543 | static bool | |
6b6b9c68 | 544 | any_set_in_bb_p (hash_set<set_info *> sets, const bb_info *bb) |
4f673c5e | 545 | { |
6b6b9c68 JZZ |
546 | for (const set_info *set : sets) |
547 | if (set->bb ()->index () == bb->index ()) | |
4f673c5e JZZ |
548 | return true; |
549 | return false; | |
9243c3d1 JZZ |
550 | } |
551 | ||
552 | /* Helper function to get SEW operand. We always have SEW value for | |
553 | all RVV instructions that have VTYPE OP. */ | |
554 | static uint8_t | |
555 | get_sew (rtx_insn *rinsn) | |
556 | { | |
557 | return get_attr_sew (rinsn); | |
558 | } | |
559 | ||
560 | /* Helper function to get VLMUL operand. We always have VLMUL value for | |
561 | all RVV instructions that have VTYPE OP. */ | |
562 | static enum vlmul_type | |
563 | get_vlmul (rtx_insn *rinsn) | |
564 | { | |
565 | return (enum vlmul_type) get_attr_vlmul (rinsn); | |
566 | } | |
567 | ||
568 | /* Get default tail policy. */ | |
569 | static bool | |
570 | get_default_ta () | |
571 | { | |
572 | /* For the instruction that doesn't require TA, we still need a default value | |
573 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
574 | return (bool) (get_prefer_tail_policy () & 0x1 | |
575 | || (get_prefer_tail_policy () >> 1 & 0x1)); | |
576 | } | |
577 | ||
578 | /* Get default mask policy. */ | |
579 | static bool | |
580 | get_default_ma () | |
581 | { | |
582 | /* For the instruction that doesn't require MA, we still need a default value | |
583 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
584 | return (bool) (get_prefer_mask_policy () & 0x1 | |
585 | || (get_prefer_mask_policy () >> 1 & 0x1)); | |
586 | } | |
587 | ||
588 | /* Helper function to get TA operand. */ | |
589 | static bool | |
590 | tail_agnostic_p (rtx_insn *rinsn) | |
591 | { | |
592 | /* If it doesn't have TA, we return agnostic by default. */ | |
593 | extract_insn_cached (rinsn); | |
594 | int ta = get_attr_ta (rinsn); | |
595 | return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta); | |
596 | } | |
597 | ||
598 | /* Helper function to get MA operand. */ | |
599 | static bool | |
600 | mask_agnostic_p (rtx_insn *rinsn) | |
601 | { | |
602 | /* If it doesn't have MA, we return agnostic by default. */ | |
603 | extract_insn_cached (rinsn); | |
604 | int ma = get_attr_ma (rinsn); | |
605 | return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma); | |
606 | } | |
607 | ||
608 | /* Return true if FN has a vector instruction that use VL/VTYPE. */ | |
609 | static bool | |
610 | has_vector_insn (function *fn) | |
611 | { | |
612 | basic_block cfg_bb; | |
613 | rtx_insn *rinsn; | |
614 | FOR_ALL_BB_FN (cfg_bb, fn) | |
615 | FOR_BB_INSNS (cfg_bb, rinsn) | |
616 | if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn)) | |
617 | return true; | |
618 | return false; | |
619 | } | |
620 | ||
621 | /* Emit vsetvl instruction. */ | |
622 | static rtx | |
e577b91b | 623 | gen_vsetvl_pat (enum vsetvl_type insn_type, const vl_vtype_info &info, rtx vl) |
9243c3d1 JZZ |
624 | { |
625 | rtx avl = info.get_avl (); | |
04655110 LX |
626 | /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s, |
627 | set the value of avl to (const_int 0) so that VSETVL PASS will | |
628 | insert vsetvl correctly.*/ | |
629 | if (info.has_avl_no_reg ()) | |
630 | avl = GEN_INT (0); | |
9243c3d1 JZZ |
631 | rtx sew = gen_int_mode (info.get_sew (), Pmode); |
632 | rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode); | |
633 | rtx ta = gen_int_mode (info.get_ta (), Pmode); | |
634 | rtx ma = gen_int_mode (info.get_ma (), Pmode); | |
635 | ||
636 | if (insn_type == VSETVL_NORMAL) | |
637 | { | |
638 | gcc_assert (vl != NULL_RTX); | |
639 | return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma); | |
640 | } | |
641 | else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY) | |
642 | return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma); | |
643 | else | |
644 | return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma); | |
645 | } | |
646 | ||
647 | static rtx | |
e577b91b | 648 | gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info) |
9243c3d1 JZZ |
649 | { |
650 | rtx new_pat; | |
60bd33bc JZZ |
651 | vl_vtype_info new_info = info; |
652 | if (info.get_insn () && info.get_insn ()->rtl () | |
653 | && fault_first_load_p (info.get_insn ()->rtl ())) | |
654 | new_info.set_avl_info ( | |
655 | avl_info (get_avl (info.get_insn ()->rtl ()), nullptr)); | |
9243c3d1 JZZ |
656 | if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ())) |
657 | { | |
658 | rtx dest = get_vl (rinsn); | |
60bd33bc | 659 | new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, dest); |
9243c3d1 JZZ |
660 | } |
661 | else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only) | |
60bd33bc | 662 | new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, new_info, NULL_RTX); |
9243c3d1 | 663 | else |
60bd33bc | 664 | new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX); |
9243c3d1 JZZ |
665 | return new_pat; |
666 | } | |
667 | ||
668 | static void | |
669 | emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type, | |
e577b91b | 670 | const vl_vtype_info &info, rtx vl, rtx_insn *rinsn) |
9243c3d1 JZZ |
671 | { |
672 | rtx pat = gen_vsetvl_pat (insn_type, info, vl); | |
673 | if (dump_file) | |
674 | { | |
675 | fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n"); | |
676 | print_rtl_single (dump_file, pat); | |
677 | } | |
678 | ||
679 | if (emit_type == EMIT_DIRECT) | |
680 | emit_insn (pat); | |
681 | else if (emit_type == EMIT_BEFORE) | |
682 | emit_insn_before (pat, rinsn); | |
683 | else | |
684 | emit_insn_after (pat, rinsn); | |
685 | } | |
686 | ||
687 | static void | |
688 | eliminate_insn (rtx_insn *rinsn) | |
689 | { | |
690 | if (dump_file) | |
691 | { | |
692 | fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn)); | |
693 | print_rtl_single (dump_file, rinsn); | |
694 | } | |
695 | if (in_sequence_p ()) | |
696 | remove_insn (rinsn); | |
697 | else | |
698 | delete_insn (rinsn); | |
699 | } | |
700 | ||
a481eed8 | 701 | static vsetvl_type |
9243c3d1 JZZ |
702 | insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn, |
703 | const vector_insn_info &info, const vector_insn_info &prev_info) | |
704 | { | |
705 | /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same | |
706 | VLMAX. */ | |
707 | if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p () | |
4f673c5e | 708 | && info.compatible_avl_p (prev_info) && info.same_vlmax_p (prev_info)) |
9243c3d1 JZZ |
709 | { |
710 | emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX, | |
711 | rinsn); | |
a481eed8 | 712 | return VSETVL_VTYPE_CHANGE_ONLY; |
9243c3d1 JZZ |
713 | } |
714 | ||
715 | if (info.has_avl_imm ()) | |
716 | { | |
717 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, | |
718 | rinsn); | |
a481eed8 | 719 | return VSETVL_DISCARD_RESULT; |
9243c3d1 JZZ |
720 | } |
721 | ||
722 | if (info.has_avl_no_reg ()) | |
723 | { | |
724 | /* We can only use x0, x0 if there's no chance of the vtype change causing | |
725 | the previous vl to become invalid. */ | |
726 | if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p () | |
727 | && info.same_vlmax_p (prev_info)) | |
728 | { | |
729 | emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX, | |
730 | rinsn); | |
a481eed8 | 731 | return VSETVL_VTYPE_CHANGE_ONLY; |
9243c3d1 JZZ |
732 | } |
733 | /* Otherwise use an AVL of 0 to avoid depending on previous vl. */ | |
734 | vl_vtype_info new_info = info; | |
735 | new_info.set_avl_info (avl_info (const0_rtx, nullptr)); | |
736 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX, | |
737 | rinsn); | |
a481eed8 | 738 | return VSETVL_DISCARD_RESULT; |
9243c3d1 JZZ |
739 | } |
740 | ||
741 | /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the | |
742 | opcode if the AVLReg is X0 as they have different register classes for | |
743 | the AVL operand. */ | |
744 | if (vlmax_avl_p (info.get_avl ())) | |
745 | { | |
746 | gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn)); | |
dd6e1cba JZ |
747 | /* For user vsetvli a5, zero, we should use get_vl to get the VL |
748 | operand "a5". */ | |
749 | rtx vl_op | |
750 | = vsetvl_insn_p (rinsn) ? get_vl (rinsn) : info.get_avl_reg_rtx (); | |
9243c3d1 JZZ |
751 | gcc_assert (!vlmax_avl_p (vl_op)); |
752 | emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn); | |
a481eed8 | 753 | return VSETVL_NORMAL; |
9243c3d1 JZZ |
754 | } |
755 | ||
756 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn); | |
757 | ||
758 | if (dump_file) | |
759 | { | |
760 | fprintf (dump_file, "Update VL/VTYPE info, previous info="); | |
761 | prev_info.dump (dump_file); | |
762 | } | |
a481eed8 | 763 | return VSETVL_DISCARD_RESULT; |
9243c3d1 JZZ |
764 | } |
765 | ||
766 | /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them | |
767 | to INSN. If such notes are added to an insn which references a | |
768 | CODE_LABEL, the LABEL_NUSES count is incremented. We have to add | |
769 | that note, because the following loop optimization pass requires | |
770 | them. */ | |
771 | ||
772 | /* ??? If there was a jump optimization pass after gcse and before loop, | |
773 | then we would not need to do this here, because jump would add the | |
774 | necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */ | |
775 | ||
776 | static void | |
27a2a4b6 | 777 | add_label_notes (rtx x, rtx_insn *rinsn) |
9243c3d1 JZZ |
778 | { |
779 | enum rtx_code code = GET_CODE (x); | |
780 | int i, j; | |
781 | const char *fmt; | |
782 | ||
783 | if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x)) | |
784 | { | |
785 | /* This code used to ignore labels that referred to dispatch tables to | |
786 | avoid flow generating (slightly) worse code. | |
787 | ||
788 | We no longer ignore such label references (see LABEL_REF handling in | |
789 | mark_jump_label for additional information). */ | |
790 | ||
791 | /* There's no reason for current users to emit jump-insns with | |
792 | such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET | |
793 | notes. */ | |
27a2a4b6 JZZ |
794 | gcc_assert (!JUMP_P (rinsn)); |
795 | add_reg_note (rinsn, REG_LABEL_OPERAND, label_ref_label (x)); | |
9243c3d1 JZZ |
796 | |
797 | if (LABEL_P (label_ref_label (x))) | |
798 | LABEL_NUSES (label_ref_label (x))++; | |
799 | ||
800 | return; | |
801 | } | |
802 | ||
803 | for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) | |
804 | { | |
805 | if (fmt[i] == 'e') | |
27a2a4b6 | 806 | add_label_notes (XEXP (x, i), rinsn); |
9243c3d1 JZZ |
807 | else if (fmt[i] == 'E') |
808 | for (j = XVECLEN (x, i) - 1; j >= 0; j--) | |
27a2a4b6 | 809 | add_label_notes (XVECEXP (x, i, j), rinsn); |
9243c3d1 JZZ |
810 | } |
811 | } | |
812 | ||
813 | /* Add EXPR to the end of basic block BB. | |
814 | ||
815 | This is used by both the PRE and code hoisting. */ | |
816 | ||
817 | static void | |
818 | insert_insn_end_basic_block (rtx_insn *rinsn, basic_block cfg_bb) | |
819 | { | |
820 | rtx_insn *end_rinsn = BB_END (cfg_bb); | |
821 | rtx_insn *new_insn; | |
822 | rtx_insn *pat, *pat_end; | |
823 | ||
824 | pat = rinsn; | |
825 | gcc_assert (pat && INSN_P (pat)); | |
826 | ||
827 | pat_end = pat; | |
828 | while (NEXT_INSN (pat_end) != NULL_RTX) | |
829 | pat_end = NEXT_INSN (pat_end); | |
830 | ||
831 | /* If the last end_rinsn is a jump, insert EXPR in front. Similarly we need | |
832 | to take care of trapping instructions in presence of non-call exceptions. | |
833 | */ | |
834 | ||
835 | if (JUMP_P (end_rinsn) | |
836 | || (NONJUMP_INSN_P (end_rinsn) | |
837 | && (!single_succ_p (cfg_bb) | |
838 | || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL))) | |
839 | { | |
840 | /* FIXME: What if something in jump uses value set in new end_rinsn? */ | |
841 | new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb); | |
842 | } | |
843 | ||
844 | /* Likewise if the last end_rinsn is a call, as will happen in the presence | |
845 | of exception handling. */ | |
846 | else if (CALL_P (end_rinsn) | |
847 | && (!single_succ_p (cfg_bb) | |
848 | || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL)) | |
849 | { | |
850 | /* Keeping in mind targets with small register classes and parameters | |
851 | in registers, we search backward and place the instructions before | |
852 | the first parameter is loaded. Do this for everyone for consistency | |
853 | and a presumption that we'll get better code elsewhere as well. */ | |
854 | ||
855 | /* Since different machines initialize their parameter registers | |
856 | in different orders, assume nothing. Collect the set of all | |
857 | parameter registers. */ | |
858 | end_rinsn = find_first_parameter_load (end_rinsn, BB_HEAD (cfg_bb)); | |
859 | ||
860 | /* If we found all the parameter loads, then we want to insert | |
861 | before the first parameter load. | |
862 | ||
863 | If we did not find all the parameter loads, then we might have | |
864 | stopped on the head of the block, which could be a CODE_LABEL. | |
865 | If we inserted before the CODE_LABEL, then we would be putting | |
866 | the end_rinsn in the wrong basic block. In that case, put the | |
867 | end_rinsn after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. | |
868 | */ | |
869 | while (LABEL_P (end_rinsn) || NOTE_INSN_BASIC_BLOCK_P (end_rinsn)) | |
870 | end_rinsn = NEXT_INSN (end_rinsn); | |
871 | ||
872 | new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb); | |
873 | } | |
874 | else | |
875 | new_insn = emit_insn_after_noloc (pat, end_rinsn, cfg_bb); | |
876 | ||
877 | while (1) | |
878 | { | |
879 | if (INSN_P (pat)) | |
880 | add_label_notes (PATTERN (pat), new_insn); | |
881 | if (pat == pat_end) | |
882 | break; | |
883 | pat = NEXT_INSN (pat); | |
884 | } | |
885 | } | |
886 | ||
887 | /* Get VL/VTYPE information for INSN. */ | |
888 | static vl_vtype_info | |
889 | get_vl_vtype_info (const insn_info *insn) | |
890 | { | |
9243c3d1 JZZ |
891 | set_info *set = nullptr; |
892 | rtx avl = ::get_avl (insn->rtl ()); | |
ec99ffab JZZ |
893 | if (avl && REG_P (avl)) |
894 | { | |
895 | if (vlmax_avl_p (avl) && has_vl_op (insn->rtl ())) | |
896 | set | |
897 | = find_access (insn->uses (), REGNO (get_vl (insn->rtl ())))->def (); | |
898 | else if (!vlmax_avl_p (avl)) | |
899 | set = find_access (insn->uses (), REGNO (avl))->def (); | |
900 | else | |
901 | set = nullptr; | |
902 | } | |
9243c3d1 JZZ |
903 | |
904 | uint8_t sew = get_sew (insn->rtl ()); | |
905 | enum vlmul_type vlmul = get_vlmul (insn->rtl ()); | |
906 | uint8_t ratio = get_attr_ratio (insn->rtl ()); | |
907 | /* when get_attr_ratio is invalid, this kind of instructions | |
908 | doesn't care about ratio. However, we still need this value | |
909 | in demand info backward analysis. */ | |
910 | if (ratio == INVALID_ATTRIBUTE) | |
911 | ratio = calculate_ratio (sew, vlmul); | |
912 | bool ta = tail_agnostic_p (insn->rtl ()); | |
913 | bool ma = mask_agnostic_p (insn->rtl ()); | |
914 | ||
915 | /* If merge operand is undef value, we prefer agnostic. */ | |
916 | int merge_op_idx = get_attr_merge_op_idx (insn->rtl ()); | |
917 | if (merge_op_idx != INVALID_ATTRIBUTE | |
918 | && satisfies_constraint_vu (recog_data.operand[merge_op_idx])) | |
919 | { | |
920 | ta = true; | |
921 | ma = true; | |
922 | } | |
923 | ||
924 | vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma); | |
925 | return info; | |
926 | } | |
927 | ||
928 | static void | |
929 | change_insn (rtx_insn *rinsn, rtx new_pat) | |
930 | { | |
931 | /* We don't apply change on RTL_SSA here since it's possible a | |
932 | new INSN we add in the PASS before which doesn't have RTL_SSA | |
933 | info yet.*/ | |
934 | if (dump_file) | |
935 | { | |
936 | fprintf (dump_file, "\nChange PATTERN of insn %d from:\n", | |
937 | INSN_UID (rinsn)); | |
938 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
939 | } | |
940 | ||
011ba384 | 941 | validate_change (rinsn, &PATTERN (rinsn), new_pat, false); |
9243c3d1 JZZ |
942 | |
943 | if (dump_file) | |
944 | { | |
945 | fprintf (dump_file, "\nto:\n"); | |
946 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
947 | } | |
948 | } | |
949 | ||
60bd33bc JZZ |
950 | static const insn_info * |
951 | get_forward_read_vl_insn (const insn_info *insn) | |
952 | { | |
953 | const bb_info *bb = insn->bb (); | |
954 | for (const insn_info *i = insn->next_nondebug_insn (); | |
955 | real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ()) | |
956 | { | |
957 | if (find_access (i->defs (), VL_REGNUM)) | |
958 | return nullptr; | |
959 | if (read_vl_insn_p (i->rtl ())) | |
960 | return i; | |
961 | } | |
962 | return nullptr; | |
963 | } | |
964 | ||
965 | static const insn_info * | |
966 | get_backward_fault_first_load_insn (const insn_info *insn) | |
967 | { | |
968 | const bb_info *bb = insn->bb (); | |
969 | for (const insn_info *i = insn->prev_nondebug_insn (); | |
970 | real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ()) | |
971 | { | |
972 | if (fault_first_load_p (i->rtl ())) | |
973 | return i; | |
974 | if (find_access (i->defs (), VL_REGNUM)) | |
975 | return nullptr; | |
976 | } | |
977 | return nullptr; | |
978 | } | |
979 | ||
9243c3d1 JZZ |
980 | static bool |
981 | change_insn (function_info *ssa, insn_change change, insn_info *insn, | |
982 | rtx new_pat) | |
983 | { | |
984 | rtx_insn *rinsn = insn->rtl (); | |
985 | auto attempt = ssa->new_change_attempt (); | |
986 | if (!restrict_movement (change)) | |
987 | return false; | |
988 | ||
989 | if (dump_file) | |
990 | { | |
991 | fprintf (dump_file, "\nChange PATTERN of insn %d from:\n", | |
992 | INSN_UID (rinsn)); | |
993 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
9243c3d1 JZZ |
994 | } |
995 | ||
996 | insn_change_watermark watermark; | |
997 | validate_change (rinsn, &PATTERN (rinsn), new_pat, true); | |
998 | ||
999 | /* These routines report failures themselves. */ | |
1000 | if (!recog (attempt, change) || !change_is_worthwhile (change, false)) | |
1001 | return false; | |
a1e42094 JZZ |
1002 | |
1003 | /* Fix bug: | |
1004 | (insn 12 34 13 2 (set (reg:VNx8DI 120 v24 [orig:134 _1 ] [134]) | |
1005 | (if_then_else:VNx8DI (unspec:VNx8BI [ | |
1006 | (const_vector:VNx8BI repeat [ | |
1007 | (const_int 1 [0x1]) | |
1008 | ]) | |
1009 | (const_int 0 [0]) | |
1010 | (const_int 2 [0x2]) repeated x2 | |
1011 | (const_int 0 [0]) | |
1012 | (reg:SI 66 vl) | |
1013 | (reg:SI 67 vtype) | |
1014 | ] UNSPEC_VPREDICATE) | |
1015 | (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137]) | |
1016 | (sign_extend:VNx8DI (vec_duplicate:VNx8SI (reg:SI 15 a5 | |
1017 | [140])))) (unspec:VNx8DI [ (const_int 0 [0]) ] UNSPEC_VUNDEF))) "rvv.c":8:12 | |
1018 | 2784 {pred_single_widen_addsvnx8di_scalar} (expr_list:REG_EQUIV | |
1019 | (mem/c:VNx8DI (reg:DI 10 a0 [142]) [1 <retval>+0 S[64, 64] A128]) | |
1020 | (expr_list:REG_EQUAL (if_then_else:VNx8DI (unspec:VNx8BI [ | |
1021 | (const_vector:VNx8BI repeat [ | |
1022 | (const_int 1 [0x1]) | |
1023 | ]) | |
1024 | (reg/v:DI 13 a3 [orig:139 vl ] [139]) | |
1025 | (const_int 2 [0x2]) repeated x2 | |
1026 | (const_int 0 [0]) | |
1027 | (reg:SI 66 vl) | |
1028 | (reg:SI 67 vtype) | |
1029 | ] UNSPEC_VPREDICATE) | |
1030 | (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137]) | |
1031 | (const_vector:VNx8DI repeat [ | |
1032 | (const_int 2730 [0xaaa]) | |
1033 | ])) | |
1034 | (unspec:VNx8DI [ | |
1035 | (const_int 0 [0]) | |
1036 | ] UNSPEC_VUNDEF)) | |
1037 | (nil)))) | |
1038 | Here we want to remove use "a3". However, the REG_EQUAL/REG_EQUIV note use | |
1039 | "a3" which made us fail in change_insn. We reference to the | |
1040 | 'aarch64-cc-fusion.cc' and add this method. */ | |
1041 | remove_reg_equal_equiv_notes (rinsn); | |
9243c3d1 JZZ |
1042 | confirm_change_group (); |
1043 | ssa->change_insn (change); | |
1044 | ||
1045 | if (dump_file) | |
1046 | { | |
1047 | fprintf (dump_file, "\nto:\n"); | |
1048 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
9243c3d1 JZZ |
1049 | } |
1050 | return true; | |
1051 | } | |
1052 | ||
aef20243 JZZ |
1053 | static void |
1054 | change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info) | |
1055 | { | |
1056 | rtx_insn *rinsn; | |
1057 | if (vector_config_insn_p (insn->rtl ())) | |
1058 | { | |
1059 | rinsn = insn->rtl (); | |
1060 | gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet"); | |
1061 | } | |
1062 | else | |
1063 | { | |
1064 | gcc_assert (has_vtype_op (insn->rtl ())); | |
1065 | rinsn = PREV_INSN (insn->rtl ()); | |
1066 | gcc_assert (vector_config_insn_p (rinsn)); | |
1067 | } | |
1068 | rtx new_pat = gen_vsetvl_pat (rinsn, info); | |
1069 | change_insn (rinsn, new_pat); | |
1070 | } | |
1071 | ||
69f39144 JZ |
1072 | static bool |
1073 | avl_source_has_vsetvl_p (set_info *avl_source) | |
1074 | { | |
1075 | if (!avl_source) | |
1076 | return false; | |
1077 | if (!avl_source->insn ()) | |
1078 | return false; | |
1079 | if (avl_source->insn ()->is_real ()) | |
1080 | return vsetvl_insn_p (avl_source->insn ()->rtl ()); | |
1081 | hash_set<set_info *> sets = get_all_sets (avl_source, true, false, true); | |
1082 | for (const auto set : sets) | |
1083 | { | |
1084 | if (set->insn ()->is_real () && vsetvl_insn_p (set->insn ()->rtl ())) | |
1085 | return true; | |
1086 | } | |
1087 | return false; | |
1088 | } | |
1089 | ||
4f673c5e | 1090 | static bool |
6b6b9c68 | 1091 | source_equal_p (insn_info *insn1, insn_info *insn2) |
4f673c5e | 1092 | { |
6b6b9c68 JZZ |
1093 | if (!insn1 || !insn2) |
1094 | return false; | |
1095 | rtx_insn *rinsn1 = insn1->rtl (); | |
1096 | rtx_insn *rinsn2 = insn2->rtl (); | |
4f673c5e JZZ |
1097 | if (!rinsn1 || !rinsn2) |
1098 | return false; | |
1099 | rtx note1 = find_reg_equal_equiv_note (rinsn1); | |
1100 | rtx note2 = find_reg_equal_equiv_note (rinsn2); | |
1101 | rtx single_set1 = single_set (rinsn1); | |
1102 | rtx single_set2 = single_set (rinsn2); | |
60bd33bc JZZ |
1103 | if (read_vl_insn_p (rinsn1) && read_vl_insn_p (rinsn2)) |
1104 | { | |
1105 | const insn_info *load1 = get_backward_fault_first_load_insn (insn1); | |
1106 | const insn_info *load2 = get_backward_fault_first_load_insn (insn2); | |
1107 | return load1 && load2 && load1 == load2; | |
1108 | } | |
4f673c5e JZZ |
1109 | |
1110 | if (note1 && note2 && rtx_equal_p (note1, note2)) | |
1111 | return true; | |
6b6b9c68 JZZ |
1112 | |
1113 | /* Since vsetvl instruction is not single SET. | |
1114 | We handle this case specially here. */ | |
1115 | if (vsetvl_insn_p (insn1->rtl ()) && vsetvl_insn_p (insn2->rtl ())) | |
1116 | { | |
1117 | /* For example: | |
1118 | vsetvl1 a6,a5,e32m1 | |
1119 | RVV 1 (use a6 as AVL) | |
1120 | vsetvl2 a5,a5,e8mf4 | |
1121 | RVV 2 (use a5 as AVL) | |
1122 | We consider AVL of RVV 1 and RVV 2 are same so that we can | |
1123 | gain more optimization opportunities. | |
1124 | ||
1125 | Note: insn1_info.compatible_avl_p (insn2_info) | |
1126 | will make sure there is no instruction between vsetvl1 and vsetvl2 | |
1127 | modify a5 since their def will be different if there is instruction | |
1128 | modify a5 and compatible_avl_p will return false. */ | |
1129 | vector_insn_info insn1_info, insn2_info; | |
1130 | insn1_info.parse_insn (insn1); | |
1131 | insn2_info.parse_insn (insn2); | |
69f39144 JZ |
1132 | |
1133 | /* To avoid dead loop, we don't optimize a vsetvli def has vsetvli | |
1134 | instructions which will complicate the situation. */ | |
1135 | if (avl_source_has_vsetvl_p (insn1_info.get_avl_source ()) | |
1136 | || avl_source_has_vsetvl_p (insn2_info.get_avl_source ())) | |
1137 | return false; | |
1138 | ||
6b6b9c68 JZZ |
1139 | if (insn1_info.same_vlmax_p (insn2_info) |
1140 | && insn1_info.compatible_avl_p (insn2_info)) | |
1141 | return true; | |
1142 | } | |
1143 | ||
1144 | /* We only handle AVL is set by instructions with no side effects. */ | |
1145 | if (!single_set1 || !single_set2) | |
1146 | return false; | |
1147 | if (!rtx_equal_p (SET_SRC (single_set1), SET_SRC (single_set2))) | |
1148 | return false; | |
e9fd9efc JZ |
1149 | /* RTL_SSA uses include REG_NOTE. Consider this following case: |
1150 | ||
1151 | insn1 RTL: | |
1152 | (insn 41 39 42 4 (set (reg:DI 26 s10 [orig:159 loop_len_46 ] [159]) | |
1153 | (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201]) | |
1154 | (reg:DI 14 a4 [276]))) 408 {*umindi3} | |
1155 | (expr_list:REG_EQUAL (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201]) | |
1156 | (const_int 2 [0x2])) | |
1157 | (nil))) | |
1158 | The RTL_SSA uses of this instruction has 2 uses: | |
1159 | 1. (reg:DI 15 a5 [orig:201 _149 ] [201]) - twice. | |
1160 | 2. (reg:DI 14 a4 [276]) - once. | |
1161 | ||
1162 | insn2 RTL: | |
1163 | (insn 38 353 351 4 (set (reg:DI 27 s11 [orig:160 loop_len_47 ] [160]) | |
1164 | (umin:DI (reg:DI 15 a5 [orig:199 _146 ] [199]) | |
1165 | (reg:DI 14 a4 [276]))) 408 {*umindi3} | |
1166 | (expr_list:REG_EQUAL (umin:DI (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200]) | |
1167 | (const_int 2 [0x2])) | |
1168 | (nil))) | |
1169 | The RTL_SSA uses of this instruction has 3 uses: | |
1170 | 1. (reg:DI 15 a5 [orig:199 _146 ] [199]) - once | |
1171 | 2. (reg:DI 14 a4 [276]) - once | |
1172 | 3. (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200]) - once | |
1173 | ||
1174 | Return false when insn1->uses ().size () != insn2->uses ().size () | |
1175 | */ | |
1176 | if (insn1->uses ().size () != insn2->uses ().size ()) | |
1177 | return false; | |
6b6b9c68 JZZ |
1178 | for (size_t i = 0; i < insn1->uses ().size (); i++) |
1179 | if (insn1->uses ()[i] != insn2->uses ()[i]) | |
1180 | return false; | |
1181 | return true; | |
4f673c5e JZZ |
1182 | } |
1183 | ||
1184 | /* Helper function to get single same real RTL source. | |
1185 | return NULL if it is not a single real RTL source. */ | |
6b6b9c68 | 1186 | static insn_info * |
4f673c5e JZZ |
1187 | extract_single_source (set_info *set) |
1188 | { | |
1189 | if (!set) | |
1190 | return nullptr; | |
1191 | if (set->insn ()->is_real ()) | |
6b6b9c68 | 1192 | return set->insn (); |
4f673c5e JZZ |
1193 | if (!set->insn ()->is_phi ()) |
1194 | return nullptr; | |
6b6b9c68 | 1195 | hash_set<set_info *> sets = get_all_sets (set, true, false, true); |
4f673c5e | 1196 | |
6b6b9c68 | 1197 | insn_info *first_insn = (*sets.begin ())->insn (); |
4f673c5e JZZ |
1198 | if (first_insn->is_artificial ()) |
1199 | return nullptr; | |
6b6b9c68 | 1200 | for (const set_info *set : sets) |
4f673c5e JZZ |
1201 | { |
1202 | /* If there is a head or end insn, we conservative return | |
1203 | NULL so that VSETVL PASS will insert vsetvl directly. */ | |
6b6b9c68 | 1204 | if (set->insn ()->is_artificial ()) |
4f673c5e | 1205 | return nullptr; |
6b6b9c68 | 1206 | if (!source_equal_p (set->insn (), first_insn)) |
4f673c5e JZZ |
1207 | return nullptr; |
1208 | } | |
1209 | ||
6b6b9c68 | 1210 | return first_insn; |
4f673c5e JZZ |
1211 | } |
1212 | ||
ec99ffab JZZ |
1213 | static unsigned |
1214 | calculate_sew (vlmul_type vlmul, unsigned int ratio) | |
1215 | { | |
1216 | for (const unsigned sew : ALL_SEW) | |
1217 | if (calculate_ratio (sew, vlmul) == ratio) | |
1218 | return sew; | |
1219 | return 0; | |
1220 | } | |
1221 | ||
1222 | static vlmul_type | |
1223 | calculate_vlmul (unsigned int sew, unsigned int ratio) | |
1224 | { | |
1225 | for (const vlmul_type vlmul : ALL_LMUL) | |
1226 | if (calculate_ratio (sew, vlmul) == ratio) | |
1227 | return vlmul; | |
1228 | return LMUL_RESERVED; | |
1229 | } | |
1230 | ||
1231 | static bool | |
1232 | incompatible_avl_p (const vector_insn_info &info1, | |
1233 | const vector_insn_info &info2) | |
1234 | { | |
1235 | return !info1.compatible_avl_p (info2) && !info2.compatible_avl_p (info1); | |
1236 | } | |
1237 | ||
1238 | static bool | |
1239 | different_sew_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1240 | { | |
1241 | return info1.get_sew () != info2.get_sew (); | |
1242 | } | |
1243 | ||
1244 | static bool | |
1245 | different_lmul_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1246 | { | |
1247 | return info1.get_vlmul () != info2.get_vlmul (); | |
1248 | } | |
1249 | ||
1250 | static bool | |
1251 | different_ratio_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1252 | { | |
1253 | return info1.get_ratio () != info2.get_ratio (); | |
1254 | } | |
1255 | ||
1256 | static bool | |
1257 | different_tail_policy_p (const vector_insn_info &info1, | |
1258 | const vector_insn_info &info2) | |
1259 | { | |
1260 | return info1.get_ta () != info2.get_ta (); | |
1261 | } | |
1262 | ||
1263 | static bool | |
1264 | different_mask_policy_p (const vector_insn_info &info1, | |
1265 | const vector_insn_info &info2) | |
1266 | { | |
1267 | return info1.get_ma () != info2.get_ma (); | |
1268 | } | |
1269 | ||
1270 | static bool | |
1271 | possible_zero_avl_p (const vector_insn_info &info1, | |
1272 | const vector_insn_info &info2) | |
1273 | { | |
1274 | return !info1.has_non_zero_avl () || !info2.has_non_zero_avl (); | |
1275 | } | |
1276 | ||
ec99ffab JZZ |
1277 | static bool |
1278 | second_ratio_invalid_for_first_sew_p (const vector_insn_info &info1, | |
1279 | const vector_insn_info &info2) | |
1280 | { | |
1281 | return calculate_vlmul (info1.get_sew (), info2.get_ratio ()) | |
1282 | == LMUL_RESERVED; | |
1283 | } | |
1284 | ||
1285 | static bool | |
1286 | second_ratio_invalid_for_first_lmul_p (const vector_insn_info &info1, | |
1287 | const vector_insn_info &info2) | |
1288 | { | |
1289 | return calculate_sew (info1.get_vlmul (), info2.get_ratio ()) == 0; | |
1290 | } | |
1291 | ||
1292 | static bool | |
1293 | second_sew_less_than_first_sew_p (const vector_insn_info &info1, | |
1294 | const vector_insn_info &info2) | |
1295 | { | |
1296 | return info2.get_sew () < info1.get_sew (); | |
1297 | } | |
1298 | ||
1299 | static bool | |
1300 | first_sew_less_than_second_sew_p (const vector_insn_info &info1, | |
1301 | const vector_insn_info &info2) | |
1302 | { | |
1303 | return info1.get_sew () < info2.get_sew (); | |
1304 | } | |
1305 | ||
1306 | /* return 0 if LMUL1 == LMUL2. | |
1307 | return -1 if LMUL1 < LMUL2. | |
1308 | return 1 if LMUL1 > LMUL2. */ | |
1309 | static int | |
1310 | compare_lmul (vlmul_type vlmul1, vlmul_type vlmul2) | |
1311 | { | |
1312 | if (vlmul1 == vlmul2) | |
1313 | return 0; | |
1314 | ||
1315 | switch (vlmul1) | |
1316 | { | |
1317 | case LMUL_1: | |
1318 | if (vlmul2 == LMUL_2 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8) | |
1319 | return 1; | |
1320 | else | |
1321 | return -1; | |
1322 | case LMUL_2: | |
1323 | if (vlmul2 == LMUL_4 || vlmul2 == LMUL_8) | |
1324 | return 1; | |
1325 | else | |
1326 | return -1; | |
1327 | case LMUL_4: | |
1328 | if (vlmul2 == LMUL_8) | |
1329 | return 1; | |
1330 | else | |
1331 | return -1; | |
1332 | case LMUL_8: | |
1333 | return -1; | |
1334 | case LMUL_F2: | |
1335 | if (vlmul2 == LMUL_1 || vlmul2 == LMUL_2 || vlmul2 == LMUL_4 | |
1336 | || vlmul2 == LMUL_8) | |
1337 | return 1; | |
1338 | else | |
1339 | return -1; | |
1340 | case LMUL_F4: | |
1341 | if (vlmul2 == LMUL_F2 || vlmul2 == LMUL_1 || vlmul2 == LMUL_2 | |
1342 | || vlmul2 == LMUL_4 || vlmul2 == LMUL_8) | |
1343 | return 1; | |
1344 | else | |
1345 | return -1; | |
1346 | case LMUL_F8: | |
1347 | return 0; | |
1348 | default: | |
1349 | gcc_unreachable (); | |
1350 | } | |
1351 | } | |
1352 | ||
1353 | static bool | |
1354 | second_lmul_less_than_first_lmul_p (const vector_insn_info &info1, | |
1355 | const vector_insn_info &info2) | |
1356 | { | |
1357 | return compare_lmul (info2.get_vlmul (), info1.get_vlmul ()) == -1; | |
1358 | } | |
1359 | ||
ec99ffab JZZ |
1360 | static bool |
1361 | second_ratio_less_than_first_ratio_p (const vector_insn_info &info1, | |
1362 | const vector_insn_info &info2) | |
1363 | { | |
1364 | return info2.get_ratio () < info1.get_ratio (); | |
1365 | } | |
1366 | ||
1367 | static CONSTEXPR const demands_cond incompatible_conds[] = { | |
1368 | #define DEF_INCOMPATIBLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, \ | |
1369 | GE_SEW1, TAIL_POLICTY1, MASK_POLICY1, AVL2, \ | |
1370 | SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, \ | |
1371 | TAIL_POLICTY2, MASK_POLICY2, COND) \ | |
1372 | {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \ | |
1373 | MASK_POLICY1}, \ | |
1374 | {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \ | |
1375 | MASK_POLICY2}}, \ | |
1376 | COND}, | |
1377 | #include "riscv-vsetvl.def" | |
1378 | }; | |
1379 | ||
1380 | static unsigned | |
1381 | greatest_sew (const vector_insn_info &info1, const vector_insn_info &info2) | |
1382 | { | |
1383 | return std::max (info1.get_sew (), info2.get_sew ()); | |
1384 | } | |
1385 | ||
1386 | static unsigned | |
1387 | first_sew (const vector_insn_info &info1, const vector_insn_info &) | |
1388 | { | |
1389 | return info1.get_sew (); | |
1390 | } | |
1391 | ||
1392 | static unsigned | |
1393 | second_sew (const vector_insn_info &, const vector_insn_info &info2) | |
1394 | { | |
1395 | return info2.get_sew (); | |
1396 | } | |
1397 | ||
1398 | static vlmul_type | |
1399 | first_vlmul (const vector_insn_info &info1, const vector_insn_info &) | |
1400 | { | |
1401 | return info1.get_vlmul (); | |
1402 | } | |
1403 | ||
1404 | static vlmul_type | |
1405 | second_vlmul (const vector_insn_info &, const vector_insn_info &info2) | |
1406 | { | |
1407 | return info2.get_vlmul (); | |
1408 | } | |
1409 | ||
1410 | static unsigned | |
1411 | first_ratio (const vector_insn_info &info1, const vector_insn_info &) | |
1412 | { | |
1413 | return info1.get_ratio (); | |
1414 | } | |
1415 | ||
1416 | static unsigned | |
1417 | second_ratio (const vector_insn_info &, const vector_insn_info &info2) | |
1418 | { | |
1419 | return info2.get_ratio (); | |
1420 | } | |
1421 | ||
1422 | static vlmul_type | |
1423 | vlmul_for_first_sew_second_ratio (const vector_insn_info &info1, | |
1424 | const vector_insn_info &info2) | |
1425 | { | |
1426 | return calculate_vlmul (info1.get_sew (), info2.get_ratio ()); | |
1427 | } | |
1428 | ||
1429 | static unsigned | |
1430 | ratio_for_second_sew_first_vlmul (const vector_insn_info &info1, | |
1431 | const vector_insn_info &info2) | |
1432 | { | |
1433 | return calculate_ratio (info2.get_sew (), info1.get_vlmul ()); | |
1434 | } | |
1435 | ||
1436 | static CONSTEXPR const demands_fuse_rule fuse_rules[] = { | |
1437 | #define DEF_SEW_LMUL_FUSE_RULE(DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, \ | |
1438 | DEMAND_GE_SEW1, DEMAND_SEW2, DEMAND_LMUL2, \ | |
1439 | DEMAND_RATIO2, DEMAND_GE_SEW2, NEW_DEMAND_SEW, \ | |
1440 | NEW_DEMAND_LMUL, NEW_DEMAND_RATIO, \ | |
1441 | NEW_DEMAND_GE_SEW, NEW_SEW, NEW_VLMUL, \ | |
1442 | NEW_RATIO) \ | |
1443 | {{{DEMAND_ANY, DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, DEMAND_ANY, \ | |
1444 | DEMAND_GE_SEW1, DEMAND_ANY, DEMAND_ANY}, \ | |
1445 | {DEMAND_ANY, DEMAND_SEW2, DEMAND_LMUL2, DEMAND_RATIO2, DEMAND_ANY, \ | |
1446 | DEMAND_GE_SEW2, DEMAND_ANY, DEMAND_ANY}}, \ | |
1447 | NEW_DEMAND_SEW, \ | |
1448 | NEW_DEMAND_LMUL, \ | |
1449 | NEW_DEMAND_RATIO, \ | |
1450 | NEW_DEMAND_GE_SEW, \ | |
1451 | NEW_SEW, \ | |
1452 | NEW_VLMUL, \ | |
1453 | NEW_RATIO}, | |
1454 | #include "riscv-vsetvl.def" | |
1455 | }; | |
1456 | ||
1457 | static bool | |
1458 | always_unavailable (const vector_insn_info &, const vector_insn_info &) | |
1459 | { | |
1460 | return true; | |
1461 | } | |
1462 | ||
1463 | static bool | |
1464 | avl_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1465 | { | |
1466 | return !info2.compatible_avl_p (info1.get_avl_info ()); | |
1467 | } | |
1468 | ||
1469 | static bool | |
1470 | sew_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1471 | { | |
1472 | if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO)) | |
1473 | { | |
1474 | if (info2.demand_p (DEMAND_GE_SEW)) | |
1475 | return info1.get_sew () < info2.get_sew (); | |
1476 | return info1.get_sew () != info2.get_sew (); | |
1477 | } | |
1478 | return true; | |
1479 | } | |
1480 | ||
1481 | static bool | |
1482 | lmul_unavailable_p (const vector_insn_info &info1, | |
1483 | const vector_insn_info &info2) | |
1484 | { | |
1485 | if (info1.get_vlmul () == info2.get_vlmul () && !info2.demand_p (DEMAND_SEW) | |
1486 | && !info2.demand_p (DEMAND_RATIO)) | |
1487 | return false; | |
1488 | return true; | |
1489 | } | |
1490 | ||
1491 | static bool | |
1492 | ge_sew_unavailable_p (const vector_insn_info &info1, | |
1493 | const vector_insn_info &info2) | |
1494 | { | |
1495 | if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO) | |
1496 | && info2.demand_p (DEMAND_GE_SEW)) | |
1497 | return info1.get_sew () < info2.get_sew (); | |
1498 | return true; | |
1499 | } | |
1500 | ||
1501 | static bool | |
1502 | ge_sew_lmul_unavailable_p (const vector_insn_info &info1, | |
1503 | const vector_insn_info &info2) | |
1504 | { | |
1505 | if (!info2.demand_p (DEMAND_RATIO) && info2.demand_p (DEMAND_GE_SEW)) | |
1506 | return info1.get_sew () < info2.get_sew (); | |
1507 | return true; | |
1508 | } | |
1509 | ||
1510 | static bool | |
1511 | ge_sew_ratio_unavailable_p (const vector_insn_info &info1, | |
1512 | const vector_insn_info &info2) | |
1513 | { | |
1514 | if (!info2.demand_p (DEMAND_LMUL) && info2.demand_p (DEMAND_GE_SEW)) | |
1515 | return info1.get_sew () < info2.get_sew (); | |
1516 | return true; | |
1517 | } | |
1518 | ||
1519 | static CONSTEXPR const demands_cond unavailable_conds[] = { | |
1520 | #define DEF_UNAVAILABLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, \ | |
1521 | TAIL_POLICTY1, MASK_POLICY1, AVL2, SEW2, LMUL2, \ | |
1522 | RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \ | |
1523 | MASK_POLICY2, COND) \ | |
1524 | {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \ | |
1525 | MASK_POLICY1}, \ | |
1526 | {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \ | |
1527 | MASK_POLICY2}}, \ | |
1528 | COND}, | |
1529 | #include "riscv-vsetvl.def" | |
1530 | }; | |
1531 | ||
1532 | static bool | |
1533 | same_sew_lmul_demand_p (const bool *dems1, const bool *dems2) | |
1534 | { | |
1535 | return dems1[DEMAND_SEW] == dems2[DEMAND_SEW] | |
1536 | && dems1[DEMAND_LMUL] == dems2[DEMAND_LMUL] | |
1537 | && dems1[DEMAND_RATIO] == dems2[DEMAND_RATIO] && !dems1[DEMAND_GE_SEW] | |
1538 | && !dems2[DEMAND_GE_SEW]; | |
1539 | } | |
1540 | ||
1541 | static bool | |
1542 | propagate_avl_across_demands_p (const vector_insn_info &info1, | |
1543 | const vector_insn_info &info2) | |
1544 | { | |
1545 | if (info2.demand_p (DEMAND_AVL)) | |
1546 | { | |
1547 | if (info2.demand_p (DEMAND_NONZERO_AVL)) | |
1548 | return info1.demand_p (DEMAND_AVL) | |
1549 | && !info1.demand_p (DEMAND_NONZERO_AVL) && info1.has_avl_reg (); | |
1550 | } | |
1551 | else | |
1552 | return info1.demand_p (DEMAND_AVL) && info1.has_avl_reg (); | |
1553 | return false; | |
1554 | } | |
1555 | ||
1556 | static bool | |
a481eed8 | 1557 | reg_available_p (const insn_info *insn, const vector_insn_info &info) |
ec99ffab | 1558 | { |
a481eed8 | 1559 | if (info.has_avl_reg () && !info.get_avl_source ()) |
44c918b5 | 1560 | return false; |
a481eed8 JZZ |
1561 | insn_info *def_insn = info.get_avl_source ()->insn (); |
1562 | if (def_insn->bb () == insn->bb ()) | |
1563 | return before_p (def_insn, insn); | |
ec99ffab | 1564 | else |
a481eed8 JZZ |
1565 | return dominated_by_p (CDI_DOMINATORS, insn->bb ()->cfg_bb (), |
1566 | def_insn->bb ()->cfg_bb ()); | |
ec99ffab JZZ |
1567 | } |
1568 | ||
60bd33bc JZZ |
1569 | /* Return true if the instruction support relaxed compatible check. */ |
1570 | static bool | |
1571 | support_relaxed_compatible_p (const vector_insn_info &info1, | |
1572 | const vector_insn_info &info2) | |
1573 | { | |
1574 | if (fault_first_load_p (info1.get_insn ()->rtl ()) | |
1575 | && info2.demand_p (DEMAND_AVL) && info2.has_avl_reg () | |
1576 | && info2.get_avl_source () && info2.get_avl_source ()->insn ()->is_phi ()) | |
1577 | { | |
1578 | hash_set<set_info *> sets | |
1579 | = get_all_sets (info2.get_avl_source (), true, false, false); | |
1580 | for (set_info *set : sets) | |
1581 | { | |
1582 | if (read_vl_insn_p (set->insn ()->rtl ())) | |
1583 | { | |
1584 | const insn_info *insn | |
1585 | = get_backward_fault_first_load_insn (set->insn ()); | |
1586 | if (insn == info1.get_insn ()) | |
1587 | return info2.compatible_vtype_p (info1); | |
1588 | } | |
1589 | } | |
1590 | } | |
1591 | return false; | |
1592 | } | |
1593 | ||
1594 | /* Return true if the block is worthwhile backward propagation. */ | |
1595 | static bool | |
1596 | backward_propagate_worthwhile_p (const basic_block cfg_bb, | |
1597 | const vector_block_info block_info) | |
1598 | { | |
1599 | if (loop_basic_block_p (cfg_bb)) | |
1600 | { | |
1601 | if (block_info.reaching_out.valid_or_dirty_p ()) | |
1602 | { | |
1603 | if (block_info.local_dem.compatible_p (block_info.reaching_out)) | |
1604 | { | |
1605 | /* Case 1 (Can backward propagate): | |
1606 | .... | |
1607 | bb0: | |
1608 | ... | |
1609 | for (int i = 0; i < n; i++) | |
1610 | { | |
1611 | vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); | |
1612 | __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); | |
1613 | } | |
1614 | The local_dem is compatible with reaching_out. Such case is | |
1615 | worthwhile backward propagation. */ | |
1616 | return true; | |
1617 | } | |
1618 | else | |
1619 | { | |
1620 | if (support_relaxed_compatible_p (block_info.reaching_out, | |
1621 | block_info.local_dem)) | |
1622 | return true; | |
1623 | /* Case 2 (Don't backward propagate): | |
1624 | .... | |
1625 | bb0: | |
1626 | ... | |
1627 | for (int i = 0; i < n; i++) | |
1628 | { | |
1629 | vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); | |
1630 | __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); | |
1631 | vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8); | |
1632 | __riscv_vse16_v_i16mf2 (out + i + 6, v, 8); | |
1633 | } | |
1634 | The local_dem is incompatible with reaching_out. | |
1635 | It makes no sense to backward propagate the local_dem since we | |
1636 | can't avoid VSETVL inside the loop. */ | |
1637 | return false; | |
1638 | } | |
1639 | } | |
1640 | else | |
1641 | { | |
1642 | gcc_assert (block_info.reaching_out.unknown_p ()); | |
1643 | /* Case 3 (Don't backward propagate): | |
1644 | .... | |
1645 | bb0: | |
1646 | ... | |
1647 | for (int i = 0; i < n; i++) | |
1648 | { | |
1649 | vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); | |
1650 | __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); | |
1651 | fn3 (); | |
1652 | } | |
1653 | The local_dem is VALID, but the reaching_out is UNKNOWN. | |
1654 | It makes no sense to backward propagate the local_dem since we | |
1655 | can't avoid VSETVL inside the loop. */ | |
1656 | return false; | |
1657 | } | |
1658 | } | |
1659 | ||
1660 | return true; | |
1661 | } | |
1662 | ||
a2d12abe JZZ |
1663 | /* Count the number of REGNO in RINSN. */ |
1664 | static int | |
1665 | count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) | |
1666 | { | |
1667 | int count = 0; | |
1668 | extract_insn (rinsn); | |
1669 | for (int i = 0; i < recog_data.n_operands; i++) | |
1670 | if (refers_to_regno_p (regno, recog_data.operand[i])) | |
1671 | count++; | |
1672 | return count; | |
1673 | } | |
1674 | ||
12b23c71 JZZ |
1675 | avl_info::avl_info (const avl_info &other) |
1676 | { | |
1677 | m_value = other.get_value (); | |
1678 | m_source = other.get_source (); | |
1679 | } | |
1680 | ||
9243c3d1 JZZ |
1681 | avl_info::avl_info (rtx value_in, set_info *source_in) |
1682 | : m_value (value_in), m_source (source_in) | |
1683 | {} | |
1684 | ||
4f673c5e JZZ |
1685 | bool |
1686 | avl_info::single_source_equal_p (const avl_info &other) const | |
1687 | { | |
1688 | set_info *set1 = m_source; | |
1689 | set_info *set2 = other.get_source (); | |
6b6b9c68 JZZ |
1690 | insn_info *insn1 = extract_single_source (set1); |
1691 | insn_info *insn2 = extract_single_source (set2); | |
1692 | if (!insn1 || !insn2) | |
1693 | return false; | |
1694 | return source_equal_p (insn1, insn2); | |
1695 | } | |
1696 | ||
1697 | bool | |
1698 | avl_info::multiple_source_equal_p (const avl_info &other) const | |
1699 | { | |
d875d756 JZ |
1700 | /* When the def info is same in RTL_SSA namespace, it's safe |
1701 | to consider they are avl compatible. */ | |
1702 | if (m_source == other.get_source ()) | |
1703 | return true; | |
6b6b9c68 | 1704 | |
d875d756 JZ |
1705 | /* We only consider handle PHI node. */ |
1706 | if (!m_source->insn ()->is_phi () || !other.get_source ()->insn ()->is_phi ()) | |
1707 | return false; | |
6b6b9c68 | 1708 | |
d875d756 JZ |
1709 | phi_info *phi1 = as_a<phi_info *> (m_source); |
1710 | phi_info *phi2 = as_a<phi_info *> (other.get_source ()); | |
6b6b9c68 | 1711 | |
d875d756 JZ |
1712 | if (phi1->is_degenerate () && phi2->is_degenerate ()) |
1713 | { | |
1714 | /* Degenerate PHI means the PHI node only have one input. */ | |
1715 | ||
1716 | /* If both PHI nodes have the same single input in use list. | |
1717 | We consider they are AVL compatible. */ | |
1718 | if (phi1->input_value (0) == phi2->input_value (0)) | |
1719 | return true; | |
1720 | } | |
1721 | /* TODO: We can support more optimization cases in the future. */ | |
1722 | return false; | |
4f673c5e JZZ |
1723 | } |
1724 | ||
9243c3d1 JZZ |
1725 | avl_info & |
1726 | avl_info::operator= (const avl_info &other) | |
1727 | { | |
1728 | m_value = other.get_value (); | |
1729 | m_source = other.get_source (); | |
1730 | return *this; | |
1731 | } | |
1732 | ||
1733 | bool | |
1734 | avl_info::operator== (const avl_info &other) const | |
1735 | { | |
1736 | if (!m_value) | |
1737 | return !other.get_value (); | |
1738 | if (!other.get_value ()) | |
1739 | return false; | |
1740 | ||
9243c3d1 JZZ |
1741 | if (GET_CODE (m_value) != GET_CODE (other.get_value ())) |
1742 | return false; | |
1743 | ||
1744 | /* Handle CONST_INT AVL. */ | |
1745 | if (CONST_INT_P (m_value)) | |
1746 | return INTVAL (m_value) == INTVAL (other.get_value ()); | |
1747 | ||
1748 | /* Handle VLMAX AVL. */ | |
1749 | if (vlmax_avl_p (m_value)) | |
1750 | return vlmax_avl_p (other.get_value ()); | |
1751 | ||
4f673c5e JZZ |
1752 | /* If any source is undef value, we think they are not equal. */ |
1753 | if (!m_source || !other.get_source ()) | |
1754 | return false; | |
1755 | ||
1756 | /* If both sources are single source (defined by a single real RTL) | |
1757 | and their definitions are same. */ | |
1758 | if (single_source_equal_p (other)) | |
1759 | return true; | |
1760 | ||
6b6b9c68 | 1761 | return multiple_source_equal_p (other); |
9243c3d1 JZZ |
1762 | } |
1763 | ||
1764 | bool | |
1765 | avl_info::operator!= (const avl_info &other) const | |
1766 | { | |
1767 | return !(*this == other); | |
1768 | } | |
1769 | ||
ec99ffab JZZ |
1770 | bool |
1771 | avl_info::has_non_zero_avl () const | |
1772 | { | |
1773 | if (has_avl_imm ()) | |
1774 | return INTVAL (get_value ()) > 0; | |
1775 | if (has_avl_reg ()) | |
1776 | return vlmax_avl_p (get_value ()); | |
1777 | return false; | |
1778 | } | |
1779 | ||
9243c3d1 JZZ |
1780 | /* Initialize VL/VTYPE information. */ |
1781 | vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in, | |
1782 | enum vlmul_type vlmul_in, uint8_t ratio_in, | |
1783 | bool ta_in, bool ma_in) | |
1784 | : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in), | |
1785 | m_ta (ta_in), m_ma (ma_in) | |
1786 | { | |
1787 | gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW"); | |
1788 | } | |
1789 | ||
1790 | bool | |
1791 | vl_vtype_info::operator== (const vl_vtype_info &other) const | |
1792 | { | |
6b6b9c68 | 1793 | return same_avl_p (other) && m_sew == other.get_sew () |
9243c3d1 JZZ |
1794 | && m_vlmul == other.get_vlmul () && m_ta == other.get_ta () |
1795 | && m_ma == other.get_ma () && m_ratio == other.get_ratio (); | |
1796 | } | |
1797 | ||
1798 | bool | |
1799 | vl_vtype_info::operator!= (const vl_vtype_info &other) const | |
1800 | { | |
1801 | return !(*this == other); | |
1802 | } | |
1803 | ||
9243c3d1 JZZ |
1804 | bool |
1805 | vl_vtype_info::same_avl_p (const vl_vtype_info &other) const | |
1806 | { | |
6b6b9c68 JZZ |
1807 | /* We need to compare both RTL and SET. If both AVL are CONST_INT. |
1808 | For example, const_int 3 and const_int 4, we need to compare | |
1809 | RTL. If both AVL are REG and their REGNO are same, we need to | |
1810 | compare SET. */ | |
1811 | return get_avl () == other.get_avl () | |
1812 | && get_avl_source () == other.get_avl_source (); | |
9243c3d1 JZZ |
1813 | } |
1814 | ||
1815 | bool | |
1816 | vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const | |
1817 | { | |
1818 | return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul () | |
1819 | && get_ta () == other.get_ta () && get_ma () == other.get_ma (); | |
1820 | } | |
1821 | ||
1822 | bool | |
1823 | vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const | |
1824 | { | |
1825 | return get_ratio () == other.get_ratio (); | |
1826 | } | |
1827 | ||
1828 | /* Compare the compatibility between Dem1 and Dem2. | |
1829 | If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2 | |
1830 | meaning Dem1 is easier be compatible with others than Dem2 | |
1831 | or Dem2 is stricter than Dem1. | |
1832 | For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */ | |
9243c3d1 JZZ |
1833 | bool |
1834 | vector_insn_info::operator>= (const vector_insn_info &other) const | |
1835 | { | |
60bd33bc JZZ |
1836 | if (support_relaxed_compatible_p (*this, other)) |
1837 | { | |
1838 | unsigned array_size = sizeof (unavailable_conds) / sizeof (demands_cond); | |
1839 | /* Bypass AVL unavailable cases. */ | |
1840 | for (unsigned i = 2; i < array_size; i++) | |
1841 | if (unavailable_conds[i].pair.match_cond_p (this->get_demands (), | |
1842 | other.get_demands ()) | |
1843 | && unavailable_conds[i].incompatible_p (*this, other)) | |
1844 | return false; | |
1845 | return true; | |
1846 | } | |
1847 | ||
1848 | if (!other.compatible_p (static_cast<const vl_vtype_info &> (*this))) | |
1849 | return false; | |
1850 | if (!this->compatible_p (static_cast<const vl_vtype_info &> (other))) | |
9243c3d1 JZZ |
1851 | return true; |
1852 | ||
1853 | if (*this == other) | |
1854 | return true; | |
1855 | ||
ec99ffab JZZ |
1856 | for (const auto &cond : unavailable_conds) |
1857 | if (cond.pair.match_cond_p (this->get_demands (), other.get_demands ()) | |
1858 | && cond.incompatible_p (*this, other)) | |
1859 | return false; | |
9243c3d1 JZZ |
1860 | |
1861 | return true; | |
1862 | } | |
1863 | ||
1864 | bool | |
1865 | vector_insn_info::operator== (const vector_insn_info &other) const | |
1866 | { | |
1867 | gcc_assert (!uninit_p () && !other.uninit_p () | |
1868 | && "Uninitialization should not happen"); | |
1869 | ||
1870 | /* Empty is only equal to another Empty. */ | |
1871 | if (empty_p ()) | |
1872 | return other.empty_p (); | |
1873 | if (other.empty_p ()) | |
1874 | return empty_p (); | |
1875 | ||
1876 | /* Unknown is only equal to another Unknown. */ | |
1877 | if (unknown_p ()) | |
1878 | return other.unknown_p (); | |
1879 | if (other.unknown_p ()) | |
1880 | return unknown_p (); | |
1881 | ||
1882 | for (size_t i = 0; i < NUM_DEMAND; i++) | |
1883 | if (m_demands[i] != other.demand_p ((enum demand_type) i)) | |
1884 | return false; | |
1885 | ||
7ae4d1df JZZ |
1886 | if (vector_config_insn_p (m_insn->rtl ()) |
1887 | || vector_config_insn_p (other.get_insn ()->rtl ())) | |
1888 | if (m_insn != other.get_insn ()) | |
1889 | return false; | |
9243c3d1 JZZ |
1890 | |
1891 | if (!same_avl_p (other)) | |
1892 | return false; | |
1893 | ||
1894 | /* If the full VTYPE is valid, check that it is the same. */ | |
1895 | return same_vtype_p (other); | |
1896 | } | |
1897 | ||
1898 | void | |
1899 | vector_insn_info::parse_insn (rtx_insn *rinsn) | |
1900 | { | |
1901 | *this = vector_insn_info (); | |
1902 | if (!NONDEBUG_INSN_P (rinsn)) | |
1903 | return; | |
20c85207 JZ |
1904 | if (optimize == 0 && !has_vtype_op (rinsn)) |
1905 | return; | |
1906 | if (optimize > 0 && !vsetvl_insn_p (rinsn)) | |
9243c3d1 JZZ |
1907 | return; |
1908 | m_state = VALID; | |
1909 | extract_insn_cached (rinsn); | |
20c85207 | 1910 | rtx avl = ::get_avl (rinsn); |
9243c3d1 JZZ |
1911 | m_avl = avl_info (avl, nullptr); |
1912 | m_sew = ::get_sew (rinsn); | |
1913 | m_vlmul = ::get_vlmul (rinsn); | |
1914 | m_ta = tail_agnostic_p (rinsn); | |
1915 | m_ma = mask_agnostic_p (rinsn); | |
1916 | } | |
1917 | ||
1918 | void | |
1919 | vector_insn_info::parse_insn (insn_info *insn) | |
1920 | { | |
1921 | *this = vector_insn_info (); | |
1922 | ||
1923 | /* Return if it is debug insn for the consistency with optimize == 0. */ | |
1924 | if (insn->is_debug_insn ()) | |
1925 | return; | |
1926 | ||
1927 | /* We set it as unknown since we don't what will happen in CALL or ASM. */ | |
1928 | if (insn->is_call () || insn->is_asm ()) | |
1929 | { | |
1930 | set_unknown (); | |
1931 | return; | |
1932 | } | |
1933 | ||
1934 | /* If this is something that updates VL/VTYPE that we don't know about, set | |
1935 | the state to unknown. */ | |
60bd33bc | 1936 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()) |
9243c3d1 JZZ |
1937 | && (find_access (insn->defs (), VL_REGNUM) |
1938 | || find_access (insn->defs (), VTYPE_REGNUM))) | |
1939 | { | |
1940 | set_unknown (); | |
1941 | return; | |
1942 | } | |
1943 | ||
1944 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())) | |
1945 | return; | |
1946 | ||
1947 | /* Warning: This function has to work on both the lowered (i.e. post | |
1948 | emit_local_forward_vsetvls) and pre-lowering forms. The main implication | |
1949 | of this is that it can't use the value of a SEW, VL, or Policy operand as | |
1950 | they might be stale after lowering. */ | |
1951 | vl_vtype_info::operator= (get_vl_vtype_info (insn)); | |
1952 | m_insn = insn; | |
1953 | m_state = VALID; | |
1954 | if (vector_config_insn_p (insn->rtl ())) | |
1955 | { | |
1956 | m_demands[DEMAND_AVL] = true; | |
1957 | m_demands[DEMAND_RATIO] = true; | |
1958 | return; | |
1959 | } | |
1960 | ||
1961 | if (has_vl_op (insn->rtl ())) | |
1962 | m_demands[DEMAND_AVL] = true; | |
1963 | ||
1964 | if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE) | |
1965 | m_demands[DEMAND_RATIO] = true; | |
1966 | else | |
1967 | { | |
1968 | /* TODO: By default, if it doesn't demand RATIO, we set it | |
1969 | demand SEW && LMUL both. Some instructions may demand SEW | |
1970 | only and ignore LMUL, will fix it later. */ | |
1971 | m_demands[DEMAND_SEW] = true; | |
ec99ffab JZZ |
1972 | if (!ignore_vlmul_insn_p (insn->rtl ())) |
1973 | m_demands[DEMAND_LMUL] = true; | |
9243c3d1 JZZ |
1974 | } |
1975 | ||
1976 | if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE) | |
1977 | m_demands[DEMAND_TAIL_POLICY] = true; | |
1978 | if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE) | |
1979 | m_demands[DEMAND_MASK_POLICY] = true; | |
6b6b9c68 JZZ |
1980 | |
1981 | if (vector_config_insn_p (insn->rtl ())) | |
1982 | return; | |
1983 | ||
ec99ffab JZZ |
1984 | if (scalar_move_insn_p (insn->rtl ())) |
1985 | { | |
1986 | if (m_avl.has_non_zero_avl ()) | |
1987 | m_demands[DEMAND_NONZERO_AVL] = true; | |
1988 | if (m_ta) | |
1989 | m_demands[DEMAND_GE_SEW] = true; | |
1990 | } | |
1991 | ||
1992 | if (!m_avl.has_avl_reg () || vlmax_avl_p (get_avl ()) || !m_avl.get_source ()) | |
1993 | return; | |
1994 | if (!m_avl.get_source ()->insn ()->is_real () | |
1995 | && !m_avl.get_source ()->insn ()->is_phi ()) | |
6b6b9c68 JZZ |
1996 | return; |
1997 | ||
1998 | insn_info *def_insn = extract_single_source (m_avl.get_source ()); | |
ec99ffab JZZ |
1999 | if (!def_insn || !vsetvl_insn_p (def_insn->rtl ())) |
2000 | return; | |
9243c3d1 | 2001 | |
ec99ffab JZZ |
2002 | vector_insn_info new_info; |
2003 | new_info.parse_insn (def_insn); | |
2004 | if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ())) | |
2005 | return; | |
9326a49c JZ |
2006 | |
2007 | if (new_info.has_avl ()) | |
2008 | { | |
2009 | if (new_info.has_avl_imm ()) | |
2010 | set_avl_info (avl_info (new_info.get_avl (), nullptr)); | |
2011 | else | |
2012 | { | |
2013 | if (vlmax_avl_p (new_info.get_avl ())) | |
2014 | set_avl_info (avl_info (new_info.get_avl (), get_avl_source ())); | |
2015 | else | |
2016 | { | |
2017 | /* Conservatively propagate non-VLMAX AVL of user vsetvl: | |
2018 | 1. The user vsetvl should be same block with the rvv insn. | |
2019 | 2. The user vsetvl is the only def insn of rvv insn. | |
2020 | 3. The AVL is not modified between def-use chain. | |
2021 | 4. The VL is only used by insn within EBB. | |
2022 | */ | |
2023 | bool modified_p = false; | |
2024 | for (insn_info *i = def_insn->next_nondebug_insn (); | |
2025 | real_insn_and_same_bb_p (i, get_insn ()->bb ()); | |
2026 | i = i->next_nondebug_insn ()) | |
2027 | { | |
c26f2758 JZ |
2028 | /* Consider this following sequence: |
2029 | ||
2030 | insn 1: vsetvli a5,a3,e8,mf4,ta,mu | |
2031 | insn 2: vsetvli zero,a5,e32,m1,ta,ma | |
2032 | ... | |
2033 | vle32.v v1,0(a1) | |
2034 | vsetvli a2,zero,e32,m1,ta,ma | |
2035 | vadd.vv v1,v1,v1 | |
2036 | vsetvli zero,a5,e32,m1,ta,ma | |
2037 | vse32.v v1,0(a0) | |
2038 | ... | |
2039 | insn 3: sub a3,a3,a5 | |
2040 | ... | |
2041 | ||
2042 | We can local AVL propagate "a3" from insn 1 to insn 2 | |
2043 | if no insns between insn 1 and insn 2 modify "a3 even | |
2044 | though insn 3 modifies "a3". | |
2045 | Otherwise, we can't perform local AVL propagation. | |
2046 | ||
2047 | Early break if we reach the insn 2. */ | |
2048 | if (!before_p (i, insn)) | |
2049 | break; | |
9326a49c JZ |
2050 | if (find_access (i->defs (), REGNO (new_info.get_avl ()))) |
2051 | { | |
2052 | modified_p = true; | |
2053 | break; | |
2054 | } | |
2055 | } | |
2056 | ||
2057 | bool has_live_out_use = false; | |
2058 | for (use_info *use : m_avl.get_source ()->all_uses ()) | |
2059 | { | |
2060 | if (use->is_live_out_use ()) | |
2061 | { | |
2062 | has_live_out_use = true; | |
2063 | break; | |
2064 | } | |
2065 | } | |
2066 | if (!modified_p && !has_live_out_use | |
2067 | && def_insn == m_avl.get_source ()->insn () | |
2068 | && m_insn->bb () == def_insn->bb ()) | |
2069 | set_avl_info (new_info.get_avl_info ()); | |
2070 | } | |
2071 | } | |
2072 | } | |
ec99ffab JZZ |
2073 | |
2074 | if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ()) | |
2075 | m_demands[DEMAND_NONZERO_AVL] = true; | |
9243c3d1 JZZ |
2076 | } |
2077 | ||
2078 | bool | |
2079 | vector_insn_info::compatible_p (const vector_insn_info &other) const | |
2080 | { | |
2081 | gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p () | |
2082 | && "Can't compare invalid demanded infos"); | |
2083 | ||
ec99ffab | 2084 | for (const auto &cond : incompatible_conds) |
60bd33bc | 2085 | if (cond.dual_incompatible_p (*this, other)) |
ec99ffab | 2086 | return false; |
9243c3d1 JZZ |
2087 | return true; |
2088 | } | |
2089 | ||
d51f2456 JZ |
2090 | bool |
2091 | vector_insn_info::skip_avl_compatible_p (const vector_insn_info &other) const | |
2092 | { | |
2093 | gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p () | |
2094 | && "Can't compare invalid demanded infos"); | |
2095 | unsigned array_size = sizeof (incompatible_conds) / sizeof (demands_cond); | |
2096 | /* Bypass AVL incompatible cases. */ | |
2097 | for (unsigned i = 1; i < array_size; i++) | |
2098 | if (incompatible_conds[i].dual_incompatible_p (*this, other)) | |
2099 | return false; | |
2100 | return true; | |
2101 | } | |
2102 | ||
9243c3d1 JZZ |
2103 | bool |
2104 | vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const | |
2105 | { | |
2106 | gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); | |
2107 | gcc_assert (!unknown_p () && "Can't compare AVL in unknown state"); | |
2108 | if (!demand_p (DEMAND_AVL)) | |
2109 | return true; | |
ec99ffab JZZ |
2110 | if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ()) |
2111 | return true; | |
9243c3d1 JZZ |
2112 | return get_avl_info () == other.get_avl_info (); |
2113 | } | |
2114 | ||
4f673c5e JZZ |
2115 | bool |
2116 | vector_insn_info::compatible_avl_p (const avl_info &other) const | |
2117 | { | |
2118 | gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); | |
2119 | gcc_assert (!unknown_p () && "Can't compare AVL in unknown state"); | |
2120 | gcc_assert (demand_p (DEMAND_AVL) && "Can't compare AVL undemand state"); | |
ec99ffab JZZ |
2121 | if (!demand_p (DEMAND_AVL)) |
2122 | return true; | |
2123 | if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ()) | |
2124 | return true; | |
4f673c5e JZZ |
2125 | return get_avl_info () == other; |
2126 | } | |
2127 | ||
9243c3d1 JZZ |
2128 | bool |
2129 | vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const | |
2130 | { | |
2131 | gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); | |
2132 | gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state"); | |
ec99ffab JZZ |
2133 | if (demand_p (DEMAND_SEW)) |
2134 | { | |
2135 | if (!demand_p (DEMAND_GE_SEW) && m_sew != other.get_sew ()) | |
2136 | return false; | |
2137 | if (demand_p (DEMAND_GE_SEW) && m_sew > other.get_sew ()) | |
2138 | return false; | |
2139 | } | |
9243c3d1 JZZ |
2140 | if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ()) |
2141 | return false; | |
2142 | if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ()) | |
2143 | return false; | |
2144 | if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ()) | |
2145 | return false; | |
2146 | if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ()) | |
2147 | return false; | |
2148 | return true; | |
2149 | } | |
2150 | ||
2151 | /* Determine whether the vector instructions requirements represented by | |
2152 | Require are compatible with the previous vsetvli instruction represented | |
2153 | by this. INSN is the instruction whose requirements we're considering. */ | |
2154 | bool | |
2155 | vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const | |
2156 | { | |
2157 | gcc_assert (!uninit_p () && "Can't handle uninitialized info"); | |
2158 | if (empty_p ()) | |
2159 | return false; | |
2160 | ||
2161 | /* Nothing is compatible with Unknown. */ | |
2162 | if (unknown_p ()) | |
2163 | return false; | |
2164 | ||
2165 | /* If the instruction doesn't need an AVLReg and the SEW matches, consider | |
2166 | it compatible. */ | |
2167 | if (!demand_p (DEMAND_AVL)) | |
2168 | if (m_sew == curr_info.get_sew ()) | |
2169 | return true; | |
2170 | ||
2171 | return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info); | |
2172 | } | |
2173 | ||
6b6b9c68 JZZ |
2174 | bool |
2175 | vector_insn_info::available_p (const vector_insn_info &other) const | |
2176 | { | |
ec99ffab JZZ |
2177 | return *this >= other; |
2178 | } | |
2179 | ||
2180 | void | |
2181 | vector_insn_info::fuse_avl (const vector_insn_info &info1, | |
2182 | const vector_insn_info &info2) | |
2183 | { | |
2184 | set_insn (info1.get_insn ()); | |
2185 | if (info1.demand_p (DEMAND_AVL)) | |
2186 | { | |
2187 | if (info1.demand_p (DEMAND_NONZERO_AVL)) | |
2188 | { | |
2189 | if (info2.demand_p (DEMAND_AVL) | |
2190 | && !info2.demand_p (DEMAND_NONZERO_AVL)) | |
2191 | { | |
2192 | set_avl_info (info2.get_avl_info ()); | |
2193 | set_demand (DEMAND_AVL, true); | |
2194 | set_demand (DEMAND_NONZERO_AVL, false); | |
2195 | return; | |
2196 | } | |
2197 | } | |
2198 | set_avl_info (info1.get_avl_info ()); | |
2199 | set_demand (DEMAND_NONZERO_AVL, info1.demand_p (DEMAND_NONZERO_AVL)); | |
2200 | } | |
2201 | else | |
2202 | { | |
2203 | set_avl_info (info2.get_avl_info ()); | |
2204 | set_demand (DEMAND_NONZERO_AVL, info2.demand_p (DEMAND_NONZERO_AVL)); | |
2205 | } | |
2206 | set_demand (DEMAND_AVL, | |
2207 | info1.demand_p (DEMAND_AVL) || info2.demand_p (DEMAND_AVL)); | |
2208 | } | |
2209 | ||
2210 | void | |
2211 | vector_insn_info::fuse_sew_lmul (const vector_insn_info &info1, | |
2212 | const vector_insn_info &info2) | |
2213 | { | |
2214 | /* We need to fuse sew && lmul according to demand info: | |
2215 | ||
2216 | 1. GE_SEW. | |
2217 | 2. SEW. | |
2218 | 3. LMUL. | |
2219 | 4. RATIO. */ | |
2220 | if (same_sew_lmul_demand_p (info1.get_demands (), info2.get_demands ())) | |
2221 | { | |
2222 | set_demand (DEMAND_SEW, info2.demand_p (DEMAND_SEW)); | |
2223 | set_demand (DEMAND_LMUL, info2.demand_p (DEMAND_LMUL)); | |
2224 | set_demand (DEMAND_RATIO, info2.demand_p (DEMAND_RATIO)); | |
2225 | set_demand (DEMAND_GE_SEW, info2.demand_p (DEMAND_GE_SEW)); | |
2226 | set_sew (info2.get_sew ()); | |
2227 | set_vlmul (info2.get_vlmul ()); | |
2228 | set_ratio (info2.get_ratio ()); | |
2229 | return; | |
2230 | } | |
2231 | for (const auto &rule : fuse_rules) | |
2232 | { | |
2233 | if (rule.pair.match_cond_p (info1.get_demands (), info2.get_demands ())) | |
2234 | { | |
2235 | set_demand (DEMAND_SEW, rule.demand_sew_p); | |
2236 | set_demand (DEMAND_LMUL, rule.demand_lmul_p); | |
2237 | set_demand (DEMAND_RATIO, rule.demand_ratio_p); | |
2238 | set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p); | |
2239 | set_sew (rule.new_sew (info1, info2)); | |
2240 | set_vlmul (rule.new_vlmul (info1, info2)); | |
2241 | set_ratio (rule.new_ratio (info1, info2)); | |
2242 | return; | |
2243 | } | |
2244 | if (rule.pair.match_cond_p (info2.get_demands (), info1.get_demands ())) | |
2245 | { | |
2246 | set_demand (DEMAND_SEW, rule.demand_sew_p); | |
2247 | set_demand (DEMAND_LMUL, rule.demand_lmul_p); | |
2248 | set_demand (DEMAND_RATIO, rule.demand_ratio_p); | |
2249 | set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p); | |
2250 | set_sew (rule.new_sew (info2, info1)); | |
2251 | set_vlmul (rule.new_vlmul (info2, info1)); | |
2252 | set_ratio (rule.new_ratio (info2, info1)); | |
2253 | return; | |
2254 | } | |
2255 | } | |
2256 | gcc_unreachable (); | |
2257 | } | |
2258 | ||
2259 | void | |
2260 | vector_insn_info::fuse_tail_policy (const vector_insn_info &info1, | |
2261 | const vector_insn_info &info2) | |
2262 | { | |
2263 | if (info1.demand_p (DEMAND_TAIL_POLICY)) | |
2264 | { | |
2265 | set_ta (info1.get_ta ()); | |
2266 | demand (DEMAND_TAIL_POLICY); | |
2267 | } | |
2268 | else if (info2.demand_p (DEMAND_TAIL_POLICY)) | |
2269 | { | |
2270 | set_ta (info2.get_ta ()); | |
2271 | demand (DEMAND_TAIL_POLICY); | |
2272 | } | |
2273 | else | |
2274 | set_ta (get_default_ta ()); | |
2275 | } | |
2276 | ||
2277 | void | |
2278 | vector_insn_info::fuse_mask_policy (const vector_insn_info &info1, | |
2279 | const vector_insn_info &info2) | |
2280 | { | |
2281 | if (info1.demand_p (DEMAND_MASK_POLICY)) | |
2282 | { | |
2283 | set_ma (info1.get_ma ()); | |
2284 | demand (DEMAND_MASK_POLICY); | |
2285 | } | |
2286 | else if (info2.demand_p (DEMAND_MASK_POLICY)) | |
2287 | { | |
2288 | set_ma (info2.get_ma ()); | |
2289 | demand (DEMAND_MASK_POLICY); | |
2290 | } | |
2291 | else | |
2292 | set_ma (get_default_ma ()); | |
6b6b9c68 JZZ |
2293 | } |
2294 | ||
9243c3d1 JZZ |
2295 | vector_insn_info |
2296 | vector_insn_info::merge (const vector_insn_info &merge_info, | |
d51f2456 | 2297 | enum merge_type type) const |
9243c3d1 | 2298 | { |
6b6b9c68 JZZ |
2299 | if (!vsetvl_insn_p (get_insn ()->rtl ())) |
2300 | gcc_assert (this->compatible_p (merge_info) | |
2301 | && "Can't merge incompatible demanded infos"); | |
9243c3d1 JZZ |
2302 | |
2303 | vector_insn_info new_info; | |
ec99ffab | 2304 | new_info.set_valid (); |
4f673c5e | 2305 | if (type == LOCAL_MERGE) |
9243c3d1 | 2306 | { |
4f673c5e | 2307 | /* For local backward data flow, we always update INSN && AVL as the |
ec99ffab JZZ |
2308 | latest INSN and AVL so that we can keep track status of each INSN. */ |
2309 | new_info.fuse_avl (merge_info, *this); | |
9243c3d1 JZZ |
2310 | } |
2311 | else | |
2312 | { | |
4f673c5e | 2313 | /* For global data flow, we should keep original INSN and AVL if they |
ec99ffab | 2314 | valid since we should keep the life information of each block. |
9243c3d1 | 2315 | |
ec99ffab JZZ |
2316 | For example: |
2317 | bb 0 -> bb 1. | |
2318 | We should keep INSN && AVL of bb 1 since we will eventually emit | |
2319 | vsetvl instruction according to INSN and AVL of bb 1. */ | |
2320 | new_info.fuse_avl (*this, merge_info); | |
2321 | } | |
9243c3d1 | 2322 | |
ec99ffab JZZ |
2323 | new_info.fuse_sew_lmul (*this, merge_info); |
2324 | new_info.fuse_tail_policy (*this, merge_info); | |
2325 | new_info.fuse_mask_policy (*this, merge_info); | |
9243c3d1 JZZ |
2326 | return new_info; |
2327 | } | |
2328 | ||
60bd33bc JZZ |
2329 | bool |
2330 | vector_insn_info::update_fault_first_load_avl (insn_info *insn) | |
2331 | { | |
2332 | // Update AVL to vl-output of the fault first load. | |
2333 | const insn_info *read_vl = get_forward_read_vl_insn (insn); | |
2334 | if (read_vl) | |
2335 | { | |
2336 | rtx vl = SET_DEST (PATTERN (read_vl->rtl ())); | |
2337 | def_info *def = find_access (read_vl->defs (), REGNO (vl)); | |
2338 | set_info *set = safe_dyn_cast<set_info *> (def); | |
2339 | set_avl_info (avl_info (vl, set)); | |
2340 | set_insn (insn); | |
2341 | return true; | |
2342 | } | |
2343 | return false; | |
2344 | } | |
2345 | ||
ea7a9f36 KC |
2346 | static const char * |
2347 | vlmul_to_str (vlmul_type vlmul) | |
2348 | { | |
2349 | switch (vlmul) | |
2350 | { | |
2351 | case LMUL_1: | |
2352 | return "m1"; | |
2353 | case LMUL_2: | |
2354 | return "m2"; | |
2355 | case LMUL_4: | |
2356 | return "m4"; | |
2357 | case LMUL_8: | |
2358 | return "m8"; | |
2359 | case LMUL_RESERVED: | |
2360 | return "INVALID LMUL"; | |
2361 | case LMUL_F8: | |
2362 | return "mf8"; | |
2363 | case LMUL_F4: | |
2364 | return "mf4"; | |
2365 | case LMUL_F2: | |
2366 | return "mf2"; | |
2367 | ||
2368 | default: | |
2369 | gcc_unreachable (); | |
2370 | } | |
2371 | } | |
2372 | ||
2373 | static const char * | |
2374 | policy_to_str (bool agnostic_p) | |
2375 | { | |
2376 | return agnostic_p ? "agnostic" : "undisturbed"; | |
2377 | } | |
2378 | ||
9243c3d1 JZZ |
2379 | void |
2380 | vector_insn_info::dump (FILE *file) const | |
2381 | { | |
2382 | fprintf (file, "["); | |
2383 | if (uninit_p ()) | |
2384 | fprintf (file, "UNINITIALIZED,"); | |
2385 | else if (valid_p ()) | |
2386 | fprintf (file, "VALID,"); | |
2387 | else if (unknown_p ()) | |
2388 | fprintf (file, "UNKNOWN,"); | |
2389 | else if (empty_p ()) | |
2390 | fprintf (file, "EMPTY,"); | |
6b6b9c68 JZZ |
2391 | else if (hard_empty_p ()) |
2392 | fprintf (file, "HARD_EMPTY,"); | |
4f673c5e JZZ |
2393 | else if (dirty_with_killed_avl_p ()) |
2394 | fprintf (file, "DIRTY_WITH_KILLED_AVL,"); | |
9243c3d1 JZZ |
2395 | else |
2396 | fprintf (file, "DIRTY,"); | |
2397 | ||
2398 | fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL)); | |
ec99ffab | 2399 | fprintf (file, "%d(DEMAND_NONZERO_AVL),", demand_p (DEMAND_NONZERO_AVL)); |
9243c3d1 | 2400 | fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW)); |
ec99ffab | 2401 | fprintf (file, "%d(DEMAND_GE_SEW),", demand_p (DEMAND_GE_SEW)); |
9243c3d1 JZZ |
2402 | fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL)); |
2403 | fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO)); | |
2404 | fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY)); | |
2405 | fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY)); | |
2406 | ||
2407 | fprintf (file, "AVL="); | |
2408 | print_rtl_single (file, get_avl ()); | |
2409 | fprintf (file, "SEW=%d,", get_sew ()); | |
ea7a9f36 | 2410 | fprintf (file, "VLMUL=%s,", vlmul_to_str (get_vlmul ())); |
9243c3d1 | 2411 | fprintf (file, "RATIO=%d,", get_ratio ()); |
ea7a9f36 KC |
2412 | fprintf (file, "TAIL_POLICY=%s,", policy_to_str (get_ta ())); |
2413 | fprintf (file, "MASK_POLICY=%s", policy_to_str (get_ma ())); | |
9243c3d1 JZZ |
2414 | fprintf (file, "]\n"); |
2415 | ||
2416 | if (valid_p ()) | |
2417 | { | |
2418 | if (get_insn ()) | |
2419 | { | |
12b23c71 JZZ |
2420 | fprintf (file, "The real INSN="); |
2421 | print_rtl_single (file, get_insn ()->rtl ()); | |
9243c3d1 | 2422 | } |
9243c3d1 JZZ |
2423 | } |
2424 | } | |
2425 | ||
2426 | vector_infos_manager::vector_infos_manager () | |
2427 | { | |
2428 | vector_edge_list = nullptr; | |
2429 | vector_kill = nullptr; | |
2430 | vector_del = nullptr; | |
2431 | vector_insert = nullptr; | |
2432 | vector_antic = nullptr; | |
2433 | vector_transp = nullptr; | |
2434 | vector_comp = nullptr; | |
2435 | vector_avin = nullptr; | |
2436 | vector_avout = nullptr; | |
2437 | vector_insn_infos.safe_grow (get_max_uid ()); | |
2438 | vector_block_infos.safe_grow (last_basic_block_for_fn (cfun)); | |
2439 | if (!optimize) | |
2440 | { | |
2441 | basic_block cfg_bb; | |
2442 | rtx_insn *rinsn; | |
2443 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2444 | { | |
2445 | vector_block_infos[cfg_bb->index].local_dem = vector_insn_info (); | |
2446 | vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info (); | |
2447 | FOR_BB_INSNS (cfg_bb, rinsn) | |
2448 | vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn); | |
2449 | } | |
2450 | } | |
2451 | else | |
2452 | { | |
2453 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2454 | { | |
2455 | vector_block_infos[bb->index ()].local_dem = vector_insn_info (); | |
2456 | vector_block_infos[bb->index ()].reaching_out = vector_insn_info (); | |
2457 | for (insn_info *insn : bb->real_insns ()) | |
2458 | vector_insn_infos[insn->uid ()].parse_insn (insn); | |
acc10c79 | 2459 | vector_block_infos[bb->index ()].probability = profile_probability (); |
9243c3d1 JZZ |
2460 | } |
2461 | } | |
2462 | } | |
2463 | ||
2464 | void | |
2465 | vector_infos_manager::create_expr (vector_insn_info &info) | |
2466 | { | |
2467 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2468 | if (*vector_exprs[i] == info) | |
2469 | return; | |
2470 | vector_exprs.safe_push (&info); | |
2471 | } | |
2472 | ||
2473 | size_t | |
2474 | vector_infos_manager::get_expr_id (const vector_insn_info &info) const | |
2475 | { | |
2476 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2477 | if (*vector_exprs[i] == info) | |
2478 | return i; | |
2479 | gcc_unreachable (); | |
2480 | } | |
2481 | ||
2482 | auto_vec<size_t> | |
2483 | vector_infos_manager::get_all_available_exprs ( | |
2484 | const vector_insn_info &info) const | |
2485 | { | |
2486 | auto_vec<size_t> available_list; | |
2487 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
6b6b9c68 | 2488 | if (info.available_p (*vector_exprs[i])) |
9243c3d1 JZZ |
2489 | available_list.safe_push (i); |
2490 | return available_list; | |
2491 | } | |
2492 | ||
d06e9264 JZ |
2493 | bool |
2494 | vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const | |
2495 | { | |
2496 | hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb); | |
2497 | for (const basic_block pred_cfg_bb : pred_cfg_bbs) | |
2498 | { | |
2499 | const auto &pred_block_info = vector_block_infos[pred_cfg_bb->index]; | |
2500 | if (!pred_block_info.local_dem.valid_or_dirty_p () | |
2501 | && !pred_block_info.reaching_out.valid_or_dirty_p ()) | |
2502 | continue; | |
2503 | return false; | |
2504 | } | |
2505 | return true; | |
2506 | } | |
2507 | ||
9243c3d1 JZZ |
2508 | bool |
2509 | vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const | |
2510 | { | |
2511 | if (bitmap_empty_p (bitdata)) | |
2512 | return false; | |
2513 | ||
2514 | int ratio = -1; | |
2515 | unsigned int bb_index; | |
2516 | sbitmap_iterator sbi; | |
2517 | ||
2518 | EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi) | |
2519 | { | |
2520 | if (ratio == -1) | |
2521 | ratio = vector_exprs[bb_index]->get_ratio (); | |
2522 | else if (vector_exprs[bb_index]->get_ratio () != ratio) | |
2523 | return false; | |
2524 | } | |
2525 | return true; | |
2526 | } | |
2527 | ||
ff8f9544 JZ |
2528 | /* Return TRUE if the incoming vector configuration state |
2529 | to CFG_BB is compatible with the vector configuration | |
2530 | state in CFG_BB, FALSE otherwise. */ | |
2531 | bool | |
2532 | vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const | |
2533 | { | |
2534 | const auto &info = vector_block_infos[cfg_bb->index].local_dem; | |
2535 | sbitmap avin = vector_avin[cfg_bb->index]; | |
2536 | unsigned int bb_index; | |
2537 | sbitmap_iterator sbi; | |
2538 | EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi) | |
2539 | { | |
2540 | const auto &avin_info | |
2541 | = static_cast<const vl_vtype_info &> (*vector_exprs[bb_index]); | |
2542 | if (!info.compatible_p (avin_info)) | |
2543 | return false; | |
2544 | } | |
2545 | return true; | |
2546 | } | |
2547 | ||
005fad9d JZZ |
2548 | bool |
2549 | vector_infos_manager::all_same_avl_p (const basic_block cfg_bb, | |
2550 | sbitmap bitdata) const | |
2551 | { | |
2552 | if (bitmap_empty_p (bitdata)) | |
2553 | return false; | |
2554 | ||
2555 | const auto &block_info = vector_block_infos[cfg_bb->index]; | |
2556 | if (!block_info.local_dem.demand_p (DEMAND_AVL)) | |
2557 | return true; | |
2558 | ||
2559 | avl_info avl = block_info.local_dem.get_avl_info (); | |
2560 | unsigned int bb_index; | |
2561 | sbitmap_iterator sbi; | |
2562 | ||
2563 | EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi) | |
2564 | { | |
2565 | if (vector_exprs[bb_index]->get_avl_info () != avl) | |
2566 | return false; | |
2567 | } | |
2568 | return true; | |
2569 | } | |
2570 | ||
9243c3d1 JZZ |
2571 | size_t |
2572 | vector_infos_manager::expr_set_num (sbitmap bitdata) const | |
2573 | { | |
2574 | size_t count = 0; | |
2575 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2576 | if (bitmap_bit_p (bitdata, i)) | |
2577 | count++; | |
2578 | return count; | |
2579 | } | |
2580 | ||
2581 | void | |
2582 | vector_infos_manager::release (void) | |
2583 | { | |
2584 | if (!vector_insn_infos.is_empty ()) | |
2585 | vector_insn_infos.release (); | |
2586 | if (!vector_block_infos.is_empty ()) | |
2587 | vector_block_infos.release (); | |
2588 | if (!vector_exprs.is_empty ()) | |
2589 | vector_exprs.release (); | |
2590 | ||
ec99ffab JZZ |
2591 | gcc_assert (to_refine_vsetvls.is_empty ()); |
2592 | gcc_assert (to_delete_vsetvls.is_empty ()); | |
9243c3d1 | 2593 | if (optimize > 0) |
cfe3fbc6 JZZ |
2594 | free_bitmap_vectors (); |
2595 | } | |
2596 | ||
2597 | void | |
2598 | vector_infos_manager::create_bitmap_vectors (void) | |
2599 | { | |
2600 | /* Create the bitmap vectors. */ | |
2601 | vector_antic = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2602 | vector_exprs.length ()); | |
2603 | vector_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2604 | vector_exprs.length ()); | |
2605 | vector_comp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2606 | vector_exprs.length ()); | |
2607 | vector_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2608 | vector_exprs.length ()); | |
2609 | vector_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2610 | vector_exprs.length ()); | |
2611 | vector_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2612 | vector_exprs.length ()); | |
2613 | ||
2614 | bitmap_vector_ones (vector_transp, last_basic_block_for_fn (cfun)); | |
2615 | bitmap_vector_clear (vector_antic, last_basic_block_for_fn (cfun)); | |
2616 | bitmap_vector_clear (vector_comp, last_basic_block_for_fn (cfun)); | |
2617 | } | |
2618 | ||
2619 | void | |
2620 | vector_infos_manager::free_bitmap_vectors (void) | |
2621 | { | |
2622 | /* Finished. Free up all the things we've allocated. */ | |
2623 | free_edge_list (vector_edge_list); | |
2624 | if (vector_del) | |
2625 | sbitmap_vector_free (vector_del); | |
2626 | if (vector_insert) | |
2627 | sbitmap_vector_free (vector_insert); | |
2628 | if (vector_kill) | |
2629 | sbitmap_vector_free (vector_kill); | |
2630 | if (vector_antic) | |
2631 | sbitmap_vector_free (vector_antic); | |
2632 | if (vector_transp) | |
2633 | sbitmap_vector_free (vector_transp); | |
2634 | if (vector_comp) | |
2635 | sbitmap_vector_free (vector_comp); | |
2636 | if (vector_avin) | |
2637 | sbitmap_vector_free (vector_avin); | |
2638 | if (vector_avout) | |
2639 | sbitmap_vector_free (vector_avout); | |
2640 | ||
2641 | vector_edge_list = nullptr; | |
2642 | vector_kill = nullptr; | |
2643 | vector_del = nullptr; | |
2644 | vector_insert = nullptr; | |
2645 | vector_antic = nullptr; | |
2646 | vector_transp = nullptr; | |
2647 | vector_comp = nullptr; | |
2648 | vector_avin = nullptr; | |
2649 | vector_avout = nullptr; | |
9243c3d1 JZZ |
2650 | } |
2651 | ||
2652 | void | |
2653 | vector_infos_manager::dump (FILE *file) const | |
2654 | { | |
2655 | basic_block cfg_bb; | |
2656 | rtx_insn *rinsn; | |
2657 | ||
2658 | fprintf (file, "\n"); | |
2659 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2660 | { | |
2661 | fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index); | |
2662 | fprintf (file, "<HEADER>="); | |
2663 | vector_block_infos[cfg_bb->index].local_dem.dump (file); | |
2664 | FOR_BB_INSNS (cfg_bb, rinsn) | |
2665 | { | |
2666 | if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn)) | |
2667 | continue; | |
2668 | fprintf (file, "<insn %d>=", INSN_UID (rinsn)); | |
2669 | const auto &info = vector_insn_infos[INSN_UID (rinsn)]; | |
2670 | info.dump (file); | |
2671 | } | |
2672 | fprintf (file, "<FOOTER>="); | |
2673 | vector_block_infos[cfg_bb->index].reaching_out.dump (file); | |
acc10c79 JZZ |
2674 | fprintf (file, "<Probability>="); |
2675 | vector_block_infos[cfg_bb->index].probability.dump (file); | |
9243c3d1 JZZ |
2676 | fprintf (file, "\n\n"); |
2677 | } | |
2678 | ||
2679 | fprintf (file, "\n"); | |
2680 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2681 | { | |
2682 | fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index); | |
2683 | ||
2684 | fprintf (file, "<ANTLOC>="); | |
2685 | if (vector_antic == nullptr) | |
2686 | fprintf (file, "(nil)\n"); | |
2687 | else | |
2688 | dump_bitmap_file (file, vector_antic[cfg_bb->index]); | |
2689 | ||
2690 | fprintf (file, "<AVLOC>="); | |
2691 | if (vector_comp == nullptr) | |
2692 | fprintf (file, "(nil)\n"); | |
2693 | else | |
2694 | dump_bitmap_file (file, vector_comp[cfg_bb->index]); | |
2695 | ||
2696 | fprintf (file, "<TRANSP>="); | |
2697 | if (vector_transp == nullptr) | |
2698 | fprintf (file, "(nil)\n"); | |
2699 | else | |
2700 | dump_bitmap_file (file, vector_transp[cfg_bb->index]); | |
2701 | ||
2702 | fprintf (file, "<KILL>="); | |
2703 | if (vector_kill == nullptr) | |
2704 | fprintf (file, "(nil)\n"); | |
2705 | else | |
2706 | dump_bitmap_file (file, vector_kill[cfg_bb->index]); | |
2707 | } | |
2708 | ||
2709 | fprintf (file, "\n"); | |
2710 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2711 | { | |
2712 | fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n", | |
2713 | cfg_bb->index); | |
2714 | ||
2715 | fprintf (file, "<AVIN>="); | |
2716 | if (vector_avin == nullptr) | |
2717 | fprintf (file, "(nil)\n"); | |
2718 | else | |
2719 | dump_bitmap_file (file, vector_avin[cfg_bb->index]); | |
2720 | ||
2721 | fprintf (file, "<AVOUT>="); | |
2722 | if (vector_avout == nullptr) | |
2723 | fprintf (file, "(nil)\n"); | |
2724 | else | |
2725 | dump_bitmap_file (file, vector_avout[cfg_bb->index]); | |
2726 | ||
2727 | fprintf (file, "<DELETE>="); | |
2728 | if (vector_del == nullptr) | |
2729 | fprintf (file, "(nil)\n"); | |
2730 | else | |
2731 | dump_bitmap_file (file, vector_del[cfg_bb->index]); | |
2732 | } | |
2733 | ||
2734 | fprintf (file, "\nGlobal LCM (Lazy code motion) INSERT info:\n"); | |
2735 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2736 | { | |
2737 | for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++) | |
2738 | { | |
2739 | edge eg = INDEX_EDGE (vector_edge_list, ed); | |
2740 | if (bitmap_bit_p (vector_insert[ed], i)) | |
2741 | fprintf (dump_file, | |
2742 | "INSERT edge %d from bb %d to bb %d for VSETVL " | |
2743 | "expr[%ld]\n", | |
2744 | ed, eg->src->index, eg->dest->index, i); | |
2745 | } | |
2746 | } | |
2747 | } | |
2748 | ||
2749 | const pass_data pass_data_vsetvl = { | |
2750 | RTL_PASS, /* type */ | |
2751 | "vsetvl", /* name */ | |
2752 | OPTGROUP_NONE, /* optinfo_flags */ | |
2753 | TV_NONE, /* tv_id */ | |
2754 | 0, /* properties_required */ | |
2755 | 0, /* properties_provided */ | |
2756 | 0, /* properties_destroyed */ | |
2757 | 0, /* todo_flags_start */ | |
2758 | 0, /* todo_flags_finish */ | |
2759 | }; | |
2760 | ||
2761 | class pass_vsetvl : public rtl_opt_pass | |
2762 | { | |
2763 | private: | |
c139f5e1 KC |
2764 | vector_infos_manager *m_vector_manager; |
2765 | ||
2766 | const vector_insn_info &get_vector_info (const rtx_insn *) const; | |
2767 | const vector_insn_info &get_vector_info (const insn_info *) const; | |
2768 | const vector_block_info &get_block_info (const basic_block) const; | |
2769 | const vector_block_info &get_block_info (const bb_info *) const; | |
2770 | vector_block_info &get_block_info (const basic_block); | |
2771 | vector_block_info &get_block_info (const bb_info *); | |
2772 | void update_vector_info (const insn_info *, const vector_insn_info &); | |
9243c3d1 JZZ |
2773 | |
2774 | void simple_vsetvl (void) const; | |
2775 | void lazy_vsetvl (void); | |
2776 | ||
2777 | /* Phase 1. */ | |
2778 | void compute_local_backward_infos (const bb_info *); | |
2779 | ||
2780 | /* Phase 2. */ | |
2781 | bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const; | |
2782 | void transfer_before (vector_insn_info &, insn_info *) const; | |
2783 | void transfer_after (vector_insn_info &, insn_info *) const; | |
2784 | void emit_local_forward_vsetvls (const bb_info *); | |
2785 | ||
2786 | /* Phase 3. */ | |
4f673c5e JZZ |
2787 | enum fusion_type get_backward_fusion_type (const bb_info *, |
2788 | const vector_insn_info &); | |
6b6b9c68 | 2789 | bool hard_empty_block_p (const bb_info *, const vector_insn_info &) const; |
387cd9d3 JZZ |
2790 | bool backward_demand_fusion (void); |
2791 | bool forward_demand_fusion (void); | |
6b6b9c68 | 2792 | bool cleanup_illegal_dirty_blocks (void); |
387cd9d3 | 2793 | void demand_fusion (void); |
9243c3d1 JZZ |
2794 | |
2795 | /* Phase 4. */ | |
2796 | void prune_expressions (void); | |
2797 | void compute_local_properties (void); | |
6b6b9c68 | 2798 | bool can_refine_vsetvl_p (const basic_block, const vector_insn_info &) const; |
9243c3d1 JZZ |
2799 | void refine_vsetvls (void) const; |
2800 | void cleanup_vsetvls (void); | |
2801 | bool commit_vsetvls (void); | |
2802 | void pre_vsetvl (void); | |
2803 | ||
2804 | /* Phase 5. */ | |
c919d059 KC |
2805 | rtx_insn *get_vsetvl_at_end (const bb_info *, vector_insn_info *) const; |
2806 | void local_eliminate_vsetvl_insn (const bb_info *) const; | |
20c85207 JZ |
2807 | bool global_eliminate_vsetvl_insn (const bb_info *) const; |
2808 | void ssa_post_optimization (void) const; | |
9243c3d1 | 2809 | |
6b6b9c68 | 2810 | /* Phase 6. */ |
20c85207 | 2811 | void df_post_optimization (void) const; |
6b6b9c68 | 2812 | |
9243c3d1 JZZ |
2813 | void init (void); |
2814 | void done (void); | |
acc10c79 | 2815 | void compute_probabilities (void); |
9243c3d1 JZZ |
2816 | |
2817 | public: | |
2818 | pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {} | |
2819 | ||
2820 | /* opt_pass methods: */ | |
2821 | virtual bool gate (function *) final override { return TARGET_VECTOR; } | |
2822 | virtual unsigned int execute (function *) final override; | |
2823 | }; // class pass_vsetvl | |
2824 | ||
c139f5e1 KC |
2825 | const vector_insn_info & |
2826 | pass_vsetvl::get_vector_info (const rtx_insn *i) const | |
2827 | { | |
2828 | return m_vector_manager->vector_insn_infos[INSN_UID (i)]; | |
2829 | } | |
2830 | ||
2831 | const vector_insn_info & | |
2832 | pass_vsetvl::get_vector_info (const insn_info *i) const | |
2833 | { | |
2834 | return m_vector_manager->vector_insn_infos[i->uid ()]; | |
2835 | } | |
2836 | ||
2837 | const vector_block_info & | |
2838 | pass_vsetvl::get_block_info (const basic_block bb) const | |
2839 | { | |
2840 | return m_vector_manager->vector_block_infos[bb->index]; | |
2841 | } | |
2842 | ||
2843 | const vector_block_info & | |
2844 | pass_vsetvl::get_block_info (const bb_info *bb) const | |
2845 | { | |
2846 | return m_vector_manager->vector_block_infos[bb->index ()]; | |
2847 | } | |
2848 | ||
2849 | vector_block_info & | |
2850 | pass_vsetvl::get_block_info (const basic_block bb) | |
2851 | { | |
2852 | return m_vector_manager->vector_block_infos[bb->index]; | |
2853 | } | |
2854 | ||
2855 | vector_block_info & | |
2856 | pass_vsetvl::get_block_info (const bb_info *bb) | |
2857 | { | |
2858 | return m_vector_manager->vector_block_infos[bb->index ()]; | |
2859 | } | |
2860 | ||
2861 | void | |
2862 | pass_vsetvl::update_vector_info (const insn_info *i, | |
2863 | const vector_insn_info &new_info) | |
2864 | { | |
2865 | m_vector_manager->vector_insn_infos[i->uid ()] = new_info; | |
2866 | } | |
2867 | ||
9243c3d1 JZZ |
2868 | /* Simple m_vsetvl_insert vsetvl for optimize == 0. */ |
2869 | void | |
2870 | pass_vsetvl::simple_vsetvl (void) const | |
2871 | { | |
2872 | if (dump_file) | |
2873 | fprintf (dump_file, | |
2874 | "\nEntering Simple VSETVL PASS and Handling %d basic blocks for " | |
2875 | "function:%s\n", | |
2876 | n_basic_blocks_for_fn (cfun), function_name (cfun)); | |
2877 | ||
2878 | basic_block cfg_bb; | |
2879 | rtx_insn *rinsn; | |
2880 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2881 | { | |
2882 | FOR_BB_INSNS (cfg_bb, rinsn) | |
2883 | { | |
2884 | if (!NONDEBUG_INSN_P (rinsn)) | |
2885 | continue; | |
2886 | if (has_vtype_op (rinsn)) | |
2887 | { | |
c139f5e1 | 2888 | const auto info = get_vector_info (rinsn); |
9243c3d1 JZZ |
2889 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info, |
2890 | NULL_RTX, rinsn); | |
2891 | } | |
2892 | } | |
2893 | } | |
2894 | } | |
2895 | ||
2896 | /* Compute demanded information by backward data-flow analysis. */ | |
2897 | void | |
2898 | pass_vsetvl::compute_local_backward_infos (const bb_info *bb) | |
2899 | { | |
2900 | vector_insn_info change; | |
2901 | change.set_empty (); | |
2902 | ||
2903 | auto &block_info = m_vector_manager->vector_block_infos[bb->index ()]; | |
2904 | block_info.reaching_out = change; | |
2905 | ||
2906 | for (insn_info *insn : bb->reverse_real_nondebug_insns ()) | |
2907 | { | |
c139f5e1 | 2908 | auto &info = get_vector_info (insn); |
9243c3d1 JZZ |
2909 | |
2910 | if (info.uninit_p ()) | |
2911 | /* If it is uninitialized, propagate it directly. */ | |
c139f5e1 | 2912 | update_vector_info (insn, change); |
9243c3d1 JZZ |
2913 | else if (info.unknown_p ()) |
2914 | change = info; | |
2915 | else | |
2916 | { | |
2917 | gcc_assert (info.valid_p () && "Unexpected Invalid demanded info"); | |
ec99ffab JZZ |
2918 | if (change.valid_p ()) |
2919 | { | |
a481eed8 JZZ |
2920 | if (!(propagate_avl_across_demands_p (change, info) |
2921 | && !reg_available_p (insn, change)) | |
ec99ffab | 2922 | && change.compatible_p (info)) |
fdc5abbd | 2923 | { |
c139f5e1 | 2924 | update_vector_info (insn, change.merge (info, LOCAL_MERGE)); |
fdc5abbd JZ |
2925 | /* Fix PR109399, we should update user vsetvl instruction |
2926 | if there is a change in demand fusion. */ | |
2927 | if (vsetvl_insn_p (insn->rtl ())) | |
2928 | change_vsetvl_insn (insn, info); | |
2929 | } | |
ec99ffab | 2930 | } |
9243c3d1 JZZ |
2931 | change = info; |
2932 | } | |
2933 | } | |
2934 | ||
2935 | block_info.local_dem = change; | |
2936 | if (block_info.local_dem.empty_p ()) | |
2937 | block_info.reaching_out = block_info.local_dem; | |
2938 | } | |
2939 | ||
2940 | /* Return true if a dem_info is required to transition from curr_info to | |
2941 | require before INSN. */ | |
2942 | bool | |
2943 | pass_vsetvl::need_vsetvl (const vector_insn_info &require, | |
2944 | const vector_insn_info &curr_info) const | |
2945 | { | |
2946 | if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ()) | |
2947 | return true; | |
2948 | ||
a481eed8 | 2949 | if (require.compatible_p (static_cast<const vl_vtype_info &> (curr_info))) |
9243c3d1 JZZ |
2950 | return false; |
2951 | ||
2952 | return true; | |
2953 | } | |
2954 | ||
2955 | /* Given an incoming state reaching INSN, modifies that state so that it is | |
2956 | minimally compatible with INSN. The resulting state is guaranteed to be | |
2957 | semantically legal for INSN, but may not be the state requested by INSN. */ | |
2958 | void | |
2959 | pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const | |
2960 | { | |
2961 | if (!has_vtype_op (insn->rtl ())) | |
2962 | return; | |
2963 | ||
c139f5e1 | 2964 | const vector_insn_info require = get_vector_info (insn); |
9243c3d1 JZZ |
2965 | if (info.valid_p () && !need_vsetvl (require, info)) |
2966 | return; | |
2967 | info = require; | |
2968 | } | |
2969 | ||
2970 | /* Given a state with which we evaluated insn (see transfer_before above for why | |
2971 | this might be different that the state insn requested), modify the state to | |
2972 | reflect the changes insn might make. */ | |
2973 | void | |
2974 | pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const | |
2975 | { | |
2976 | if (vector_config_insn_p (insn->rtl ())) | |
2977 | { | |
c139f5e1 | 2978 | info = get_vector_info (insn); |
9243c3d1 JZZ |
2979 | return; |
2980 | } | |
2981 | ||
60bd33bc JZZ |
2982 | if (fault_first_load_p (insn->rtl ()) |
2983 | && info.update_fault_first_load_avl (insn)) | |
2984 | return; | |
9243c3d1 JZZ |
2985 | |
2986 | /* If this is something that updates VL/VTYPE that we don't know about, set | |
2987 | the state to unknown. */ | |
2988 | if (insn->is_call () || insn->is_asm () | |
2989 | || find_access (insn->defs (), VL_REGNUM) | |
2990 | || find_access (insn->defs (), VTYPE_REGNUM)) | |
2991 | info = vector_insn_info::get_unknown (); | |
2992 | } | |
2993 | ||
2994 | /* Emit vsetvl within each block by forward data-flow analysis. */ | |
2995 | void | |
2996 | pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb) | |
2997 | { | |
2998 | auto &block_info = m_vector_manager->vector_block_infos[bb->index ()]; | |
2999 | if (block_info.local_dem.empty_p ()) | |
3000 | return; | |
3001 | ||
3002 | vector_insn_info curr_info; | |
3003 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
3004 | { | |
3005 | const vector_insn_info prev_info = curr_info; | |
a481eed8 | 3006 | enum vsetvl_type type = NUM_VSETVL_TYPE; |
9243c3d1 JZZ |
3007 | transfer_before (curr_info, insn); |
3008 | ||
3009 | if (has_vtype_op (insn->rtl ())) | |
3010 | { | |
3011 | if (static_cast<const vl_vtype_info &> (prev_info) | |
3012 | != static_cast<const vl_vtype_info &> (curr_info)) | |
3013 | { | |
c139f5e1 | 3014 | const auto require = get_vector_info (insn); |
9243c3d1 JZZ |
3015 | if (!require.compatible_p ( |
3016 | static_cast<const vl_vtype_info &> (prev_info))) | |
a481eed8 JZZ |
3017 | type = insert_vsetvl (EMIT_BEFORE, insn->rtl (), require, |
3018 | prev_info); | |
9243c3d1 JZZ |
3019 | } |
3020 | } | |
3021 | ||
a481eed8 JZZ |
3022 | /* Fix the issue of following sequence: |
3023 | vsetivli zero, 5 | |
3024 | .... | |
3025 | vsetvli zero, zero | |
3026 | vmv.x.s (demand AVL = 8). | |
3027 | .... | |
3028 | incorrect: vsetvli zero, zero ===> Since the curr_info is AVL = 8. | |
3029 | correct: vsetivli zero, 8 | |
3030 | vadd (demand AVL = 8). */ | |
3031 | if (type == VSETVL_VTYPE_CHANGE_ONLY) | |
3032 | { | |
3033 | /* Update the curr_info to be real correct AVL. */ | |
3034 | curr_info.set_avl_info (prev_info.get_avl_info ()); | |
3035 | } | |
9243c3d1 JZZ |
3036 | transfer_after (curr_info, insn); |
3037 | } | |
3038 | ||
3039 | block_info.reaching_out = curr_info; | |
3040 | } | |
3041 | ||
4f673c5e JZZ |
3042 | enum fusion_type |
3043 | pass_vsetvl::get_backward_fusion_type (const bb_info *bb, | |
3044 | const vector_insn_info &prop) | |
9243c3d1 | 3045 | { |
4f673c5e | 3046 | insn_info *insn = prop.get_insn (); |
9243c3d1 | 3047 | |
4f673c5e JZZ |
3048 | /* TODO: We don't backward propagate the explict VSETVL here |
3049 | since we will change vsetvl and vsetvlmax intrinsics into | |
3050 | no side effects which can be optimized into optimal location | |
3051 | by GCC internal passes. We only need to support these backward | |
3052 | propagation if vsetvl intrinsics have side effects. */ | |
3053 | if (vsetvl_insn_p (insn->rtl ())) | |
3054 | return INVALID_FUSION; | |
3055 | ||
3056 | gcc_assert (has_vtype_op (insn->rtl ())); | |
3057 | rtx reg = NULL_RTX; | |
3058 | ||
3059 | /* Case 1: Don't need VL. Just let it backward propagate. */ | |
ec99ffab | 3060 | if (!prop.demand_p (DEMAND_AVL)) |
4f673c5e JZZ |
3061 | return VALID_AVL_FUSION; |
3062 | else | |
9243c3d1 | 3063 | { |
4f673c5e JZZ |
3064 | /* Case 2: CONST_INT AVL, we don't need to check def. */ |
3065 | if (prop.has_avl_imm ()) | |
3066 | return VALID_AVL_FUSION; | |
3067 | else | |
3068 | { | |
3069 | /* Case 3: REG AVL, we need to check the distance of def to make | |
3070 | sure we won't backward propagate over the def. */ | |
3071 | gcc_assert (prop.has_avl_reg ()); | |
3072 | if (vlmax_avl_p (prop.get_avl ())) | |
3073 | /* Check VL operand for vsetvl vl,zero. */ | |
ec99ffab | 3074 | reg = prop.get_avl_reg_rtx (); |
4f673c5e JZZ |
3075 | else |
3076 | /* Check AVL operand for vsetvl zero,avl. */ | |
ec99ffab | 3077 | reg = prop.get_avl (); |
4f673c5e JZZ |
3078 | } |
3079 | } | |
9243c3d1 | 3080 | |
4f673c5e | 3081 | gcc_assert (reg); |
ec99ffab JZZ |
3082 | if (!prop.get_avl_source ()->insn ()->is_phi () |
3083 | && prop.get_avl_source ()->insn ()->bb () == insn->bb ()) | |
6b6b9c68 JZZ |
3084 | return INVALID_FUSION; |
3085 | hash_set<set_info *> sets | |
3086 | = get_all_sets (prop.get_avl_source (), true, true, true); | |
3087 | if (any_set_in_bb_p (sets, insn->bb ())) | |
3088 | return INVALID_FUSION; | |
3089 | ||
3090 | if (vlmax_avl_p (prop.get_avl ())) | |
4f673c5e | 3091 | { |
6b6b9c68 | 3092 | if (find_reg_killed_by (bb, reg)) |
4f673c5e | 3093 | return INVALID_FUSION; |
6b6b9c68 JZZ |
3094 | else |
3095 | return VALID_AVL_FUSION; | |
4f673c5e | 3096 | } |
6b6b9c68 JZZ |
3097 | |
3098 | /* By default, we always enable backward fusion so that we can | |
3099 | gain more optimizations. */ | |
3100 | if (!find_reg_killed_by (bb, reg)) | |
3101 | return VALID_AVL_FUSION; | |
3102 | return KILLED_AVL_FUSION; | |
3103 | } | |
3104 | ||
3105 | /* We almost enable all cases in get_backward_fusion_type, this function | |
3106 | disable the backward fusion by changing dirty blocks into hard empty | |
3107 | blocks in forward dataflow. We can have more accurate optimization by | |
3108 | this method. */ | |
3109 | bool | |
3110 | pass_vsetvl::hard_empty_block_p (const bb_info *bb, | |
3111 | const vector_insn_info &info) const | |
3112 | { | |
3113 | if (!info.dirty_p () || !info.has_avl_reg ()) | |
3114 | return false; | |
3115 | ||
3116 | basic_block cfg_bb = bb->cfg_bb (); | |
3117 | sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index]; | |
ec99ffab JZZ |
3118 | set_info *set = info.get_avl_source (); |
3119 | rtx avl = gen_rtx_REG (Pmode, set->regno ()); | |
6b6b9c68 JZZ |
3120 | hash_set<set_info *> sets = get_all_sets (set, true, false, false); |
3121 | hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb); | |
3122 | ||
3123 | if (find_reg_killed_by (bb, avl)) | |
3124 | { | |
3125 | /* Condition 1: | |
3126 | Dirty block with killed AVL means that the empty block (no RVV | |
3127 | instructions) are polluted as Dirty blocks with the value of current | |
3128 | AVL is killed. For example: | |
3129 | bb 0: | |
3130 | ... | |
3131 | bb 1: | |
3132 | def a5 | |
3133 | bb 2: | |
3134 | RVV (use a5) | |
3135 | In backward dataflow, we will polluted BB0 and BB1 as Dirt with AVL | |
3136 | killed. since a5 is killed in BB1. | |
3137 | In this case, let's take a look at this example: | |
3138 | ||
3139 | bb 3: bb 4: | |
3140 | def3 a5 def4 a5 | |
3141 | bb 5: bb 6: | |
3142 | def1 a5 def2 a5 | |
3143 | \ / | |
3144 | \ / | |
3145 | \ / | |
3146 | \ / | |
3147 | bb 7: | |
3148 | RVV (use a5) | |
3149 | In thi case, we can polluted BB5 and BB6 as dirty if get-def | |
3150 | of a5 from RVV instruction in BB7 is the def1 in BB5 and | |
3151 | def2 BB6 so we can return false early here for HARD_EMPTY_BLOCK_P. | |
3152 | However, we are not sure whether BB3 and BB4 can be | |
3153 | polluted as Dirty with AVL killed so we can't return false | |
3154 | for HARD_EMPTY_BLOCK_P here since it's too early which will | |
3155 | potentially produce issues. */ | |
3156 | gcc_assert (info.dirty_with_killed_avl_p ()); | |
3157 | if (info.get_avl_source () | |
3158 | && get_same_bb_set (sets, bb->cfg_bb ()) == info.get_avl_source ()) | |
3159 | return false; | |
4f673c5e | 3160 | } |
9243c3d1 | 3161 | |
6b6b9c68 JZZ |
3162 | /* Condition 2: |
3163 | Suppress the VL/VTYPE info backward propagation too early: | |
3164 | ________ | |
3165 | | BB0 | | |
3166 | |________| | |
3167 | | | |
3168 | ____|____ | |
3169 | | BB1 | | |
3170 | |________| | |
3171 | In this case, suppose BB 1 has multiple predecessors, BB 0 is one | |
3172 | of them. BB1 has VL/VTYPE info (may be VALID or DIRTY) to backward | |
3173 | propagate. | |
3174 | The AVIN (available in) which is calculated by LCM is empty only | |
3175 | in these 2 circumstances: | |
3176 | 1. all predecessors of BB1 are empty (not VALID | |
3177 | and can not be polluted in backward fusion flow) | |
3178 | 2. VL/VTYPE info of BB1 predecessors are conflict. | |
3179 | ||
3180 | We keep it as dirty in 2nd circumstance and set it as HARD_EMPTY | |
3181 | (can not be polluted as DIRTY any more) in 1st circumstance. | |
3182 | We don't backward propagate in 1st circumstance since there is | |
3183 | no VALID RVV instruction and no polluted blocks (dirty blocks) | |
3184 | by backward propagation from other following blocks. | |
3185 | It's meaningless to keep it as Dirty anymore. | |
3186 | ||
3187 | However, since we keep it as dirty in 2nd since there are VALID or | |
3188 | Dirty blocks in predecessors, we can still gain the benefits and | |
3189 | optimization opportunities. For example, in this case: | |
3190 | for (size_t i = 0; i < n; i++) | |
3191 | { | |
3192 | if (i != cond) { | |
3193 | vint8mf8_t v = *(vint8mf8_t*)(in + i + 100); | |
3194 | *(vint8mf8_t*)(out + i + 100) = v; | |
3195 | } else { | |
3196 | vbool1_t v = *(vbool1_t*)(in + i + 400); | |
3197 | *(vbool1_t*)(out + i + 400) = v; | |
3198 | } | |
3199 | } | |
3200 | VL/VTYPE in if-else are conflict which will produce empty AVIN LCM result | |
3201 | but we can still keep dirty blocks if *(i != cond)* is very unlikely then | |
3202 | we can preset vsetvl (VL/VTYPE) info from else (static propability model). | |
3203 | ||
3204 | We don't want to backward propagate VL/VTYPE information too early | |
3205 | which is not the optimal and may potentially produce issues. */ | |
3206 | if (bitmap_empty_p (avin)) | |
4f673c5e | 3207 | { |
6b6b9c68 JZZ |
3208 | bool hard_empty_p = true; |
3209 | for (const basic_block pred_cfg_bb : pred_cfg_bbs) | |
3210 | { | |
3211 | if (pred_cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) | |
3212 | continue; | |
3213 | sbitmap avout = m_vector_manager->vector_avout[pred_cfg_bb->index]; | |
3214 | if (!bitmap_empty_p (avout)) | |
3215 | { | |
3216 | hard_empty_p = false; | |
3217 | break; | |
3218 | } | |
3219 | } | |
3220 | if (hard_empty_p) | |
3221 | return true; | |
4f673c5e | 3222 | } |
9243c3d1 | 3223 | |
6b6b9c68 JZZ |
3224 | edge e; |
3225 | edge_iterator ei; | |
3226 | bool has_avl_killed_insn_p = false; | |
3227 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
4f673c5e | 3228 | { |
6b6b9c68 JZZ |
3229 | const auto block_info |
3230 | = m_vector_manager->vector_block_infos[e->dest->index]; | |
3231 | if (block_info.local_dem.dirty_with_killed_avl_p ()) | |
9243c3d1 | 3232 | { |
6b6b9c68 JZZ |
3233 | has_avl_killed_insn_p = true; |
3234 | break; | |
3235 | } | |
3236 | } | |
3237 | if (!has_avl_killed_insn_p) | |
3238 | return false; | |
4f673c5e | 3239 | |
6b6b9c68 JZZ |
3240 | bool any_set_in_bbs_p = false; |
3241 | for (const basic_block pred_cfg_bb : pred_cfg_bbs) | |
3242 | { | |
3243 | insn_info *def_insn = extract_single_source (set); | |
3244 | if (def_insn) | |
3245 | { | |
3246 | /* Condition 3: | |
3247 | ||
3248 | Case 1: Case 2: | |
3249 | bb 0: bb 0: | |
3250 | def a5 101 ... | |
3251 | bb 1: bb 1: | |
3252 | ... ... | |
3253 | bb 2: bb 2: | |
3254 | RVV 1 (use a5 with TAIL ANY) ... | |
3255 | bb 3: bb 3: | |
3256 | def a5 101 def a5 101 | |
3257 | bb 4: bb 4: | |
3258 | ... ... | |
3259 | bb 5: bb 5: | |
3260 | RVV 2 (use a5 with TU) RVV 1 (use a5) | |
3261 | ||
3262 | Case 1: We can pollute BB3,BB2,BB1,BB0 are all Dirt blocks | |
3263 | with killed AVL so that we can merge TU demand info from RVV 2 | |
3264 | into RVV 1 and elide the vsevl instruction in BB5. | |
3265 | ||
3266 | TODO: We only optimize for single source def since multiple source | |
3267 | def is quite complicated. | |
3268 | ||
3269 | Case 2: We only can pollute bb 3 as dirty and it has been accepted | |
3270 | in Condition 2 and we can't pollute BB3,BB2,BB1,BB0 like case 1. */ | |
3271 | insn_info *last_killed_insn | |
3272 | = find_reg_killed_by (crtl->ssa->bb (pred_cfg_bb), avl); | |
3273 | if (!last_killed_insn || pred_cfg_bb == def_insn->bb ()->cfg_bb ()) | |
3274 | continue; | |
3275 | if (source_equal_p (last_killed_insn, def_insn)) | |
4f673c5e | 3276 | { |
6b6b9c68 JZZ |
3277 | any_set_in_bbs_p = true; |
3278 | break; | |
4f673c5e | 3279 | } |
9243c3d1 JZZ |
3280 | } |
3281 | else | |
4f673c5e | 3282 | { |
6b6b9c68 JZZ |
3283 | /* Condition 4: |
3284 | ||
3285 | bb 0: bb 1: bb 3: | |
3286 | def1 a5 def2 a5 ... | |
3287 | \ / / | |
3288 | \ / / | |
3289 | \ / / | |
3290 | \ / / | |
3291 | bb 4: / | |
3292 | | / | |
3293 | | / | |
3294 | bb 5: / | |
3295 | | / | |
3296 | | / | |
3297 | bb 6: / | |
3298 | | / | |
3299 | | / | |
3300 | bb 8: | |
3301 | RVV 1 (use a5) | |
3302 | If we get-def (REAL) of a5 from RVV 1 instruction, we will get | |
3303 | def1 from BB0 and def2 from BB1. So we will pollute BB6,BB5,BB4, | |
3304 | BB0,BB1 with DIRTY and set BB3 as HARD_EMPTY so that we won't | |
3305 | propagate AVL to BB3. */ | |
3306 | if (any_set_in_bb_p (sets, crtl->ssa->bb (pred_cfg_bb))) | |
3307 | { | |
3308 | any_set_in_bbs_p = true; | |
3309 | break; | |
3310 | } | |
4f673c5e | 3311 | } |
9243c3d1 | 3312 | } |
6b6b9c68 JZZ |
3313 | if (!any_set_in_bbs_p) |
3314 | return true; | |
3315 | return false; | |
9243c3d1 JZZ |
3316 | } |
3317 | ||
3318 | /* Compute global backward demanded info. */ | |
387cd9d3 JZZ |
3319 | bool |
3320 | pass_vsetvl::backward_demand_fusion (void) | |
9243c3d1 JZZ |
3321 | { |
3322 | /* We compute global infos by backward propagation. | |
3323 | We want to have better performance in these following cases: | |
3324 | ||
3325 | 1. for (size_t i = 0; i < n; i++) { | |
3326 | if (i != cond) { | |
3327 | vint8mf8_t v = *(vint8mf8_t*)(in + i + 100); | |
3328 | *(vint8mf8_t*)(out + i + 100) = v; | |
3329 | } else { | |
3330 | vbool1_t v = *(vbool1_t*)(in + i + 400); | |
3331 | *(vbool1_t*)(out + i + 400) = v; | |
3332 | } | |
3333 | } | |
3334 | ||
3335 | Since we don't have any RVV instruction in the BEFORE blocks, | |
3336 | LCM fails to optimize such case. We want to backward propagate | |
3337 | them into empty blocks so that we could have better performance | |
3338 | in LCM. | |
3339 | ||
3340 | 2. bb 0: | |
3341 | vsetvl e8,mf8 (demand RATIO) | |
3342 | bb 1: | |
3343 | vsetvl e32,mf2 (demand SEW and LMUL) | |
3344 | We backward propagate the first VSETVL into e32,mf2 so that we | |
3345 | could be able to eliminate the second VSETVL in LCM. */ | |
3346 | ||
387cd9d3 | 3347 | bool changed_p = false; |
9243c3d1 JZZ |
3348 | for (const bb_info *bb : crtl->ssa->reverse_bbs ()) |
3349 | { | |
3350 | basic_block cfg_bb = bb->cfg_bb (); | |
b9b251b7 JZZ |
3351 | const auto &curr_block_info |
3352 | = m_vector_manager->vector_block_infos[cfg_bb->index]; | |
3353 | const auto &prop = curr_block_info.local_dem; | |
9243c3d1 JZZ |
3354 | |
3355 | /* If there is nothing to propagate, just skip it. */ | |
3356 | if (!prop.valid_or_dirty_p ()) | |
3357 | continue; | |
3358 | ||
b9b251b7 | 3359 | if (!backward_propagate_worthwhile_p (cfg_bb, curr_block_info)) |
9243c3d1 JZZ |
3360 | continue; |
3361 | ||
d06e9264 JZ |
3362 | /* Fix PR108270: |
3363 | ||
3364 | bb 0 -> bb 1 | |
3365 | We don't need to backward fuse VL/VTYPE info from bb 1 to bb 0 | |
3366 | if bb 1 is not inside a loop and all predecessors of bb 0 are empty. */ | |
3367 | if (m_vector_manager->all_empty_predecessor_p (cfg_bb)) | |
3368 | continue; | |
3369 | ||
9243c3d1 JZZ |
3370 | edge e; |
3371 | edge_iterator ei; | |
3372 | /* Backward propagate to each predecessor. */ | |
3373 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
3374 | { | |
9243c3d1 JZZ |
3375 | auto &block_info |
3376 | = m_vector_manager->vector_block_infos[e->src->index]; | |
3377 | ||
3378 | /* We don't propagate through critical edges. */ | |
3379 | if (e->flags & EDGE_COMPLEX) | |
3380 | continue; | |
3381 | if (e->src->index == ENTRY_BLOCK_PTR_FOR_FN (cfun)->index) | |
3382 | continue; | |
44c918b5 JZZ |
3383 | /* If prop is demand of vsetvl instruction and reaching doesn't demand |
3384 | AVL. We don't backward propagate since vsetvl instruction has no | |
3385 | side effects. */ | |
3386 | if (vsetvl_insn_p (prop.get_insn ()->rtl ()) | |
3387 | && propagate_avl_across_demands_p (prop, block_info.reaching_out)) | |
3388 | continue; | |
9243c3d1 JZZ |
3389 | |
3390 | if (block_info.reaching_out.unknown_p ()) | |
3391 | continue; | |
6b6b9c68 JZZ |
3392 | else if (block_info.reaching_out.hard_empty_p ()) |
3393 | continue; | |
9243c3d1 JZZ |
3394 | else if (block_info.reaching_out.empty_p ()) |
3395 | { | |
4f673c5e JZZ |
3396 | enum fusion_type type |
3397 | = get_backward_fusion_type (crtl->ssa->bb (e->src), prop); | |
3398 | if (type == INVALID_FUSION) | |
9243c3d1 JZZ |
3399 | continue; |
3400 | ||
4f673c5e JZZ |
3401 | block_info.reaching_out = prop; |
3402 | block_info.reaching_out.set_dirty (type); | |
6b6b9c68 JZZ |
3403 | |
3404 | if (prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ())) | |
3405 | { | |
3406 | hash_set<set_info *> sets | |
3407 | = get_all_sets (prop.get_avl_source (), true, true, true); | |
3408 | set_info *set = get_same_bb_set (sets, e->src); | |
3409 | if (set) | |
3410 | block_info.reaching_out.set_avl_info ( | |
3411 | avl_info (prop.get_avl (), set)); | |
3412 | } | |
3413 | ||
4f673c5e JZZ |
3414 | block_info.local_dem = block_info.reaching_out; |
3415 | block_info.probability = curr_block_info.probability; | |
3416 | changed_p = true; | |
9243c3d1 JZZ |
3417 | } |
3418 | else if (block_info.reaching_out.dirty_p ()) | |
3419 | { | |
3420 | /* DIRTY -> DIRTY or VALID -> DIRTY. */ | |
3421 | vector_insn_info new_info; | |
3422 | ||
3423 | if (block_info.reaching_out.compatible_p (prop)) | |
3424 | { | |
ec99ffab | 3425 | if (block_info.reaching_out.available_p (prop)) |
9243c3d1 | 3426 | continue; |
4f673c5e | 3427 | new_info = block_info.reaching_out.merge (prop, GLOBAL_MERGE); |
6b6b9c68 JZZ |
3428 | new_info.set_dirty ( |
3429 | block_info.reaching_out.dirty_with_killed_avl_p ()); | |
3430 | block_info.probability += curr_block_info.probability; | |
9243c3d1 JZZ |
3431 | } |
3432 | else | |
3433 | { | |
4f673c5e JZZ |
3434 | if (curr_block_info.probability > block_info.probability) |
3435 | { | |
6b6b9c68 JZZ |
3436 | enum fusion_type type |
3437 | = get_backward_fusion_type (crtl->ssa->bb (e->src), | |
3438 | prop); | |
3439 | if (type == INVALID_FUSION) | |
3440 | continue; | |
4f673c5e | 3441 | new_info = prop; |
6b6b9c68 | 3442 | new_info.set_dirty (type); |
4f673c5e JZZ |
3443 | block_info.probability = curr_block_info.probability; |
3444 | } | |
9243c3d1 JZZ |
3445 | else |
3446 | continue; | |
3447 | } | |
3448 | ||
ec99ffab JZZ |
3449 | if (propagate_avl_across_demands_p (prop, |
3450 | block_info.reaching_out)) | |
3451 | { | |
3452 | rtx reg = new_info.get_avl_reg_rtx (); | |
3453 | if (find_reg_killed_by (crtl->ssa->bb (e->src), reg)) | |
3454 | new_info.set_dirty (true); | |
3455 | } | |
3456 | ||
9243c3d1 JZZ |
3457 | block_info.local_dem = new_info; |
3458 | block_info.reaching_out = new_info; | |
387cd9d3 | 3459 | changed_p = true; |
9243c3d1 JZZ |
3460 | } |
3461 | else | |
3462 | { | |
3463 | /* We not only change the info during backward propagation, | |
3464 | but also change the VSETVL instruction. */ | |
3465 | gcc_assert (block_info.reaching_out.valid_p ()); | |
6b6b9c68 JZZ |
3466 | hash_set<set_info *> sets |
3467 | = get_all_sets (prop.get_avl_source (), true, false, false); | |
3468 | set_info *set = get_same_bb_set (sets, e->src); | |
3469 | if (vsetvl_insn_p (block_info.reaching_out.get_insn ()->rtl ()) | |
3470 | && prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ())) | |
3471 | { | |
3472 | if (!block_info.reaching_out.same_vlmax_p (prop)) | |
3473 | continue; | |
3474 | if (block_info.reaching_out.same_vtype_p (prop)) | |
3475 | continue; | |
3476 | if (!set) | |
3477 | continue; | |
3478 | if (set->insn () != block_info.reaching_out.get_insn ()) | |
3479 | continue; | |
3480 | } | |
ec99ffab JZZ |
3481 | |
3482 | if (!block_info.reaching_out.compatible_p (prop)) | |
3483 | continue; | |
3484 | if (block_info.reaching_out.available_p (prop)) | |
3485 | continue; | |
9243c3d1 JZZ |
3486 | |
3487 | vector_insn_info be_merged = block_info.reaching_out; | |
3488 | if (block_info.local_dem == block_info.reaching_out) | |
3489 | be_merged = block_info.local_dem; | |
4f673c5e JZZ |
3490 | vector_insn_info new_info = be_merged.merge (prop, GLOBAL_MERGE); |
3491 | ||
3492 | if (curr_block_info.probability > block_info.probability) | |
3493 | block_info.probability = curr_block_info.probability; | |
9243c3d1 | 3494 | |
ec99ffab | 3495 | if (propagate_avl_across_demands_p (prop, block_info.reaching_out) |
a481eed8 JZZ |
3496 | && !reg_available_p (crtl->ssa->bb (e->src)->end_insn (), |
3497 | new_info)) | |
ec99ffab JZZ |
3498 | continue; |
3499 | ||
aef20243 | 3500 | change_vsetvl_insn (new_info.get_insn (), new_info); |
9243c3d1 JZZ |
3501 | if (block_info.local_dem == block_info.reaching_out) |
3502 | block_info.local_dem = new_info; | |
3503 | block_info.reaching_out = new_info; | |
387cd9d3 | 3504 | changed_p = true; |
9243c3d1 JZZ |
3505 | } |
3506 | } | |
3507 | } | |
387cd9d3 JZZ |
3508 | return changed_p; |
3509 | } | |
3510 | ||
3511 | /* Compute global forward demanded info. */ | |
3512 | bool | |
3513 | pass_vsetvl::forward_demand_fusion (void) | |
3514 | { | |
3515 | /* Enhance the global information propagation especially | |
3516 | backward propagation miss the propagation. | |
3517 | Consider such case: | |
3518 | ||
3519 | bb0 | |
3520 | (TU) | |
3521 | / \ | |
3522 | bb1 bb2 | |
3523 | (TU) (ANY) | |
3524 | existing edge -----> \ / (TU) <----- LCM create this edge. | |
3525 | bb3 | |
3526 | (TU) | |
3527 | ||
3528 | Base on the situation, LCM fails to eliminate the VSETVL instruction and | |
3529 | insert an edge from bb2 to bb3 since we can't backward propagate bb3 into | |
3530 | bb2. To avoid this confusing LCM result and non-optimal codegen, we should | |
3531 | forward propagate information from bb0 to bb2 which is friendly to LCM. */ | |
3532 | bool changed_p = false; | |
3533 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3534 | { | |
3535 | basic_block cfg_bb = bb->cfg_bb (); | |
3536 | const auto &prop | |
3537 | = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; | |
3538 | ||
3539 | /* If there is nothing to propagate, just skip it. */ | |
3540 | if (!prop.valid_or_dirty_p ()) | |
3541 | continue; | |
3542 | ||
00fb7698 JZZ |
3543 | if (cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
3544 | continue; | |
3545 | ||
ec99ffab JZZ |
3546 | if (vsetvl_insn_p (prop.get_insn ()->rtl ())) |
3547 | continue; | |
3548 | ||
387cd9d3 JZZ |
3549 | edge e; |
3550 | edge_iterator ei; | |
3551 | /* Forward propagate to each successor. */ | |
3552 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
3553 | { | |
3554 | auto &local_dem | |
3555 | = m_vector_manager->vector_block_infos[e->dest->index].local_dem; | |
3556 | auto &reaching_out | |
3557 | = m_vector_manager->vector_block_infos[e->dest->index].reaching_out; | |
3558 | ||
3559 | /* It's quite obvious, we don't need to propagate itself. */ | |
3560 | if (e->dest->index == cfg_bb->index) | |
3561 | continue; | |
00fb7698 JZZ |
3562 | /* We don't propagate through critical edges. */ |
3563 | if (e->flags & EDGE_COMPLEX) | |
3564 | continue; | |
3565 | if (e->dest->index == EXIT_BLOCK_PTR_FOR_FN (cfun)->index) | |
3566 | continue; | |
387cd9d3 JZZ |
3567 | |
3568 | /* If there is nothing to propagate, just skip it. */ | |
3569 | if (!local_dem.valid_or_dirty_p ()) | |
3570 | continue; | |
ec99ffab | 3571 | if (local_dem.available_p (prop)) |
4f673c5e JZZ |
3572 | continue; |
3573 | if (!local_dem.compatible_p (prop)) | |
3574 | continue; | |
ec99ffab JZZ |
3575 | if (propagate_avl_across_demands_p (prop, local_dem)) |
3576 | continue; | |
387cd9d3 | 3577 | |
4f673c5e JZZ |
3578 | vector_insn_info new_info = local_dem.merge (prop, GLOBAL_MERGE); |
3579 | new_info.set_insn (local_dem.get_insn ()); | |
3580 | if (local_dem.dirty_p ()) | |
387cd9d3 | 3581 | { |
4f673c5e | 3582 | gcc_assert (local_dem == reaching_out); |
6b6b9c68 | 3583 | new_info.set_dirty (local_dem.dirty_with_killed_avl_p ()); |
4f673c5e | 3584 | local_dem = new_info; |
4f673c5e JZZ |
3585 | reaching_out = local_dem; |
3586 | } | |
3587 | else | |
3588 | { | |
3589 | if (reaching_out == local_dem) | |
3590 | reaching_out = new_info; | |
3591 | local_dem = new_info; | |
3592 | change_vsetvl_insn (local_dem.get_insn (), new_info); | |
387cd9d3 | 3593 | } |
4f673c5e JZZ |
3594 | auto &prob |
3595 | = m_vector_manager->vector_block_infos[e->dest->index].probability; | |
3596 | auto &curr_prob | |
3597 | = m_vector_manager->vector_block_infos[cfg_bb->index].probability; | |
3598 | prob = curr_prob * e->probability; | |
3599 | changed_p = true; | |
387cd9d3 JZZ |
3600 | } |
3601 | } | |
3602 | return changed_p; | |
3603 | } | |
3604 | ||
3605 | void | |
3606 | pass_vsetvl::demand_fusion (void) | |
3607 | { | |
3608 | bool changed_p = true; | |
3609 | while (changed_p) | |
3610 | { | |
3611 | changed_p = false; | |
4f673c5e JZZ |
3612 | /* To optimize the case like this: |
3613 | void f2 (int8_t * restrict in, int8_t * restrict out, int n, int cond) | |
3614 | { | |
3615 | size_t vl = 101; | |
3616 | ||
3617 | for (size_t i = 0; i < n; i++) | |
3618 | { | |
3619 | vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl); | |
3620 | __riscv_vse8_v_i8mf8 (out + i + 300, v, vl); | |
3621 | } | |
3622 | ||
3623 | for (size_t i = 0; i < n; i++) | |
3624 | { | |
3625 | vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl); | |
3626 | __riscv_vse8_v_i8mf8 (out + i, v, vl); | |
3627 | ||
3628 | vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl); | |
3629 | __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl); | |
3630 | } | |
3631 | } | |
3632 | ||
3633 | bb 0: li a5, 101 (killed avl) | |
3634 | ... | |
3635 | bb 1: vsetvli zero, a5, ta | |
3636 | ... | |
3637 | bb 2: li a5, 101 (killed avl) | |
3638 | ... | |
3639 | bb 3: vsetvli zero, a3, tu | |
3640 | ||
3641 | We want to fuse VSEVLI instructions on bb 1 and bb 3. However, there is | |
3642 | an AVL kill instruction in bb 2 that we can't backward fuse bb 3 or | |
3643 | forward bb 1 arbitrarily. We need available information of each block to | |
3644 | help for such cases. */ | |
6b6b9c68 JZZ |
3645 | changed_p |= backward_demand_fusion (); |
3646 | changed_p |= forward_demand_fusion (); | |
3647 | } | |
3648 | ||
3649 | changed_p = true; | |
3650 | while (changed_p) | |
3651 | { | |
3652 | changed_p = false; | |
3653 | prune_expressions (); | |
3654 | m_vector_manager->create_bitmap_vectors (); | |
3655 | compute_local_properties (); | |
4f673c5e JZZ |
3656 | compute_available (m_vector_manager->vector_comp, |
3657 | m_vector_manager->vector_kill, | |
3658 | m_vector_manager->vector_avout, | |
3659 | m_vector_manager->vector_avin); | |
6b6b9c68 | 3660 | changed_p |= cleanup_illegal_dirty_blocks (); |
4f673c5e JZZ |
3661 | m_vector_manager->free_bitmap_vectors (); |
3662 | if (!m_vector_manager->vector_exprs.is_empty ()) | |
3663 | m_vector_manager->vector_exprs.release (); | |
387cd9d3 | 3664 | } |
9243c3d1 JZZ |
3665 | |
3666 | if (dump_file) | |
3667 | { | |
3668 | fprintf (dump_file, "\n\nDirty blocks list: "); | |
681a5632 JZZ |
3669 | for (const bb_info *bb : crtl->ssa->bbs ()) |
3670 | if (m_vector_manager->vector_block_infos[bb->index ()] | |
3671 | .reaching_out.dirty_p ()) | |
3672 | fprintf (dump_file, "%d ", bb->index ()); | |
9243c3d1 JZZ |
3673 | fprintf (dump_file, "\n\n"); |
3674 | } | |
3675 | } | |
3676 | ||
6b6b9c68 JZZ |
3677 | /* Cleanup illegal dirty blocks. */ |
3678 | bool | |
3679 | pass_vsetvl::cleanup_illegal_dirty_blocks (void) | |
3680 | { | |
3681 | bool changed_p = false; | |
3682 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3683 | { | |
3684 | basic_block cfg_bb = bb->cfg_bb (); | |
3685 | const auto &prop | |
3686 | = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; | |
3687 | ||
3688 | /* If there is nothing to cleanup, just skip it. */ | |
3689 | if (!prop.valid_or_dirty_p ()) | |
3690 | continue; | |
3691 | ||
3692 | if (hard_empty_block_p (bb, prop)) | |
3693 | { | |
3694 | m_vector_manager->vector_block_infos[cfg_bb->index].local_dem | |
3695 | = vector_insn_info::get_hard_empty (); | |
3696 | m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out | |
3697 | = vector_insn_info::get_hard_empty (); | |
3698 | changed_p = true; | |
3699 | continue; | |
3700 | } | |
3701 | } | |
3702 | return changed_p; | |
3703 | } | |
3704 | ||
9243c3d1 JZZ |
3705 | /* Assemble the candidates expressions for LCM. */ |
3706 | void | |
3707 | pass_vsetvl::prune_expressions (void) | |
3708 | { | |
681a5632 | 3709 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 3710 | { |
681a5632 JZZ |
3711 | if (m_vector_manager->vector_block_infos[bb->index ()] |
3712 | .local_dem.valid_or_dirty_p ()) | |
9243c3d1 | 3713 | m_vector_manager->create_expr ( |
681a5632 JZZ |
3714 | m_vector_manager->vector_block_infos[bb->index ()].local_dem); |
3715 | if (m_vector_manager->vector_block_infos[bb->index ()] | |
9243c3d1 JZZ |
3716 | .reaching_out.valid_or_dirty_p ()) |
3717 | m_vector_manager->create_expr ( | |
681a5632 | 3718 | m_vector_manager->vector_block_infos[bb->index ()].reaching_out); |
9243c3d1 JZZ |
3719 | } |
3720 | ||
3721 | if (dump_file) | |
3722 | { | |
3723 | fprintf (dump_file, "\nThe total VSETVL expression num = %d\n", | |
3724 | m_vector_manager->vector_exprs.length ()); | |
3725 | fprintf (dump_file, "Expression List:\n"); | |
3726 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
3727 | { | |
3728 | fprintf (dump_file, "Expr[%ld]:\n", i); | |
3729 | m_vector_manager->vector_exprs[i]->dump (dump_file); | |
3730 | fprintf (dump_file, "\n"); | |
3731 | } | |
3732 | } | |
3733 | } | |
3734 | ||
4f673c5e JZZ |
3735 | /* Compute the local properties of each recorded expression. |
3736 | ||
3737 | Local properties are those that are defined by the block, irrespective of | |
3738 | other blocks. | |
3739 | ||
3740 | An expression is transparent in a block if its operands are not modified | |
3741 | in the block. | |
3742 | ||
3743 | An expression is computed (locally available) in a block if it is computed | |
3744 | at least once and expression would contain the same value if the | |
3745 | computation was moved to the end of the block. | |
3746 | ||
3747 | An expression is locally anticipatable in a block if it is computed at | |
3748 | least once and expression would contain the same value if the computation | |
3749 | was moved to the beginning of the block. */ | |
9243c3d1 JZZ |
3750 | void |
3751 | pass_vsetvl::compute_local_properties (void) | |
3752 | { | |
3753 | /* - If T is locally available at the end of a block, then T' must be | |
3754 | available at the end of the same block. Since some optimization has | |
3755 | occurred earlier, T' might not be locally available, however, it must | |
3756 | have been previously computed on all paths. As a formula, T at AVLOC(B) | |
3757 | implies that T' at AVOUT(B). | |
3758 | An "available occurrence" is one that is the last occurrence in the | |
3759 | basic block and the operands are not modified by following statements in | |
3760 | the basic block [including this insn]. | |
3761 | ||
3762 | - If T is locally anticipated at the beginning of a block, then either | |
3763 | T', is locally anticipated or it is already available from previous | |
3764 | blocks. As a formula, this means that T at ANTLOC(B) implies that T' at | |
3765 | ANTLOC(B) at AVIN(B). | |
3766 | An "anticipatable occurrence" is one that is the first occurrence in the | |
3767 | basic block, the operands are not modified in the basic block prior | |
3768 | to the occurrence and the output is not used between the start of | |
3769 | the block and the occurrence. */ | |
3770 | ||
3771 | basic_block cfg_bb; | |
4f673c5e | 3772 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 3773 | { |
4f673c5e | 3774 | unsigned int curr_bb_idx = bb->index (); |
9243c3d1 JZZ |
3775 | const auto local_dem |
3776 | = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem; | |
3777 | const auto reaching_out | |
3778 | = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out; | |
3779 | ||
4f673c5e JZZ |
3780 | /* Compute transparent. */ |
3781 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
9243c3d1 | 3782 | { |
4f673c5e JZZ |
3783 | const vector_insn_info *expr = m_vector_manager->vector_exprs[i]; |
3784 | if (local_dem.real_dirty_p () || local_dem.valid_p () | |
3785 | || local_dem.unknown_p () | |
3786 | || has_vsetvl_killed_avl_p (bb, local_dem)) | |
9243c3d1 | 3787 | bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i); |
4f673c5e JZZ |
3788 | /* FIXME: Here we set the block as non-transparent (killed) if there |
3789 | is an instruction killed the value of AVL according to the | |
3790 | definition of Local transparent. This is true for such following | |
3791 | case: | |
3792 | ||
3793 | bb 0 (Loop label): | |
3794 | vsetvl zero, a5, e8, mf8 | |
3795 | bb 1: | |
3796 | def a5 | |
3797 | bb 2: | |
3798 | branch bb 0 (Loop label). | |
3799 | ||
3800 | In this case, we known there is a loop bb 0->bb 1->bb 2. According | |
3801 | to LCM definition, it is correct when we set vsetvl zero, a5, e8, | |
3802 | mf8 as non-transparent (killed) so that LCM will not hoist outside | |
3803 | the bb 0. | |
3804 | ||
3805 | However, such conservative configuration will forbid optimization | |
3806 | on some unlucky case. For example: | |
3807 | ||
3808 | bb 0: | |
3809 | li a5, 101 | |
3810 | bb 1: | |
3811 | vsetvl zero, a5, e8, mf8 | |
3812 | bb 2: | |
3813 | li a5, 101 | |
3814 | bb 3: | |
3815 | vsetvl zero, a5, e8, mf8. | |
3816 | So we also relax def a5 as transparent to gain more optimizations | |
3817 | as long as the all real def insn of avl do not come from this | |
3818 | block. This configuration may be still missing some optimization | |
3819 | opportunities. */ | |
6b6b9c68 | 3820 | if (find_reg_killed_by (bb, expr->get_avl ())) |
4f673c5e | 3821 | { |
6b6b9c68 JZZ |
3822 | hash_set<set_info *> sets |
3823 | = get_all_sets (expr->get_avl_source (), true, false, false); | |
3824 | if (any_set_in_bb_p (sets, bb)) | |
4f673c5e JZZ |
3825 | bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], |
3826 | i); | |
3827 | } | |
9243c3d1 JZZ |
3828 | } |
3829 | ||
4f673c5e | 3830 | /* Compute anticipatable occurrences. */ |
6b6b9c68 JZZ |
3831 | if (local_dem.valid_p () || local_dem.real_dirty_p () |
3832 | || (has_vsetvl_killed_avl_p (bb, local_dem) | |
3833 | && vlmax_avl_p (local_dem.get_avl ()))) | |
4f673c5e JZZ |
3834 | if (anticipatable_occurrence_p (bb, local_dem)) |
3835 | bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx], | |
3836 | m_vector_manager->get_expr_id (local_dem)); | |
9243c3d1 | 3837 | |
4f673c5e | 3838 | /* Compute available occurrences. */ |
9243c3d1 JZZ |
3839 | if (reaching_out.valid_or_dirty_p ()) |
3840 | { | |
9243c3d1 JZZ |
3841 | auto_vec<size_t> available_list |
3842 | = m_vector_manager->get_all_available_exprs (reaching_out); | |
3843 | for (size_t i = 0; i < available_list.length (); i++) | |
4f673c5e JZZ |
3844 | { |
3845 | const vector_insn_info *expr | |
3846 | = m_vector_manager->vector_exprs[available_list[i]]; | |
3847 | if (reaching_out.real_dirty_p () | |
3848 | || has_vsetvl_killed_avl_p (bb, reaching_out) | |
3849 | || available_occurrence_p (bb, *expr)) | |
3850 | bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx], | |
3851 | available_list[i]); | |
3852 | } | |
9243c3d1 JZZ |
3853 | } |
3854 | } | |
3855 | ||
3856 | /* Compute kill for each basic block using: | |
3857 | ||
3858 | ~(TRANSP | COMP) | |
3859 | */ | |
3860 | ||
3861 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3862 | { | |
3863 | bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index], | |
3864 | m_vector_manager->vector_transp[cfg_bb->index], | |
3865 | m_vector_manager->vector_comp[cfg_bb->index]); | |
3866 | bitmap_not (m_vector_manager->vector_kill[cfg_bb->index], | |
3867 | m_vector_manager->vector_kill[cfg_bb->index]); | |
3868 | } | |
3869 | ||
3870 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3871 | { | |
3872 | edge e; | |
3873 | edge_iterator ei; | |
3874 | ||
3875 | /* If the current block is the destination of an abnormal edge, we | |
3876 | kill all trapping (for PRE) and memory (for hoist) expressions | |
3877 | because we won't be able to properly place the instruction on | |
3878 | the edge. So make them neither anticipatable nor transparent. | |
3879 | This is fairly conservative. | |
3880 | ||
3881 | ??? For hoisting it may be necessary to check for set-and-jump | |
3882 | instructions here, not just for abnormal edges. The general problem | |
3883 | is that when an expression cannot not be placed right at the end of | |
3884 | a basic block we should account for any side-effects of a subsequent | |
3885 | jump instructions that could clobber the expression. It would | |
3886 | be best to implement this check along the lines of | |
3887 | should_hoist_expr_to_dom where the target block is already known | |
3888 | and, hence, there's no need to conservatively prune expressions on | |
3889 | "intermediate" set-and-jump instructions. */ | |
3890 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
3891 | if (e->flags & EDGE_COMPLEX) | |
3892 | { | |
3893 | bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]); | |
3894 | bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]); | |
3895 | } | |
3896 | } | |
3897 | } | |
3898 | ||
3899 | /* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */ | |
3900 | bool | |
6b6b9c68 JZZ |
3901 | pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb, |
3902 | const vector_insn_info &info) const | |
9243c3d1 JZZ |
3903 | { |
3904 | if (!m_vector_manager->all_same_ratio_p ( | |
3905 | m_vector_manager->vector_avin[cfg_bb->index])) | |
3906 | return false; | |
3907 | ||
005fad9d JZZ |
3908 | if (!m_vector_manager->all_same_avl_p ( |
3909 | cfg_bb, m_vector_manager->vector_avin[cfg_bb->index])) | |
3910 | return false; | |
3911 | ||
9243c3d1 JZZ |
3912 | size_t expr_id |
3913 | = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]); | |
6b6b9c68 JZZ |
3914 | if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info)) |
3915 | return false; | |
3916 | if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info)) | |
9243c3d1 JZZ |
3917 | return false; |
3918 | ||
3919 | edge e; | |
3920 | edge_iterator ei; | |
3921 | bool all_valid_p = true; | |
3922 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
3923 | { | |
3924 | if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index])) | |
3925 | { | |
3926 | all_valid_p = false; | |
3927 | break; | |
3928 | } | |
3929 | } | |
3930 | ||
3931 | if (!all_valid_p) | |
3932 | return false; | |
3933 | return true; | |
3934 | } | |
3935 | ||
3936 | /* Optimize athe case like this: | |
3937 | ||
3938 | bb 0: | |
3939 | vsetvl 0 a5,zero,e8,mf8 | |
3940 | insn 0 (demand SEW + LMUL) | |
3941 | bb 1: | |
3942 | vsetvl 1 a5,zero,e16,mf4 | |
3943 | insn 1 (demand SEW + LMUL) | |
3944 | ||
3945 | In this case, we should be able to refine | |
3946 | vsetvl 1 into vsetvl zero, zero according AVIN. */ | |
3947 | void | |
3948 | pass_vsetvl::refine_vsetvls (void) const | |
3949 | { | |
3950 | basic_block cfg_bb; | |
3951 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3952 | { | |
c139f5e1 | 3953 | auto info = get_block_info(cfg_bb).local_dem; |
9243c3d1 JZZ |
3954 | insn_info *insn = info.get_insn (); |
3955 | if (!info.valid_p ()) | |
3956 | continue; | |
3957 | ||
3958 | rtx_insn *rinsn = insn->rtl (); | |
6b6b9c68 | 3959 | if (!can_refine_vsetvl_p (cfg_bb, info)) |
9243c3d1 JZZ |
3960 | continue; |
3961 | ||
ec99ffab JZZ |
3962 | /* We can't refine user vsetvl into vsetvl zero,zero since the dest |
3963 | will be used by the following instructions. */ | |
3964 | if (vector_config_insn_p (rinsn)) | |
3965 | { | |
3966 | m_vector_manager->to_refine_vsetvls.add (rinsn); | |
3967 | continue; | |
3968 | } | |
ff8f9544 JZ |
3969 | |
3970 | /* If all incoming edges to a block have a vector state that is compatbile | |
3971 | with the block. In such a case we need not emit a vsetvl in the current | |
3972 | block. */ | |
3973 | ||
3974 | gcc_assert (has_vtype_op (insn->rtl ())); | |
3975 | rinsn = PREV_INSN (insn->rtl ()); | |
3976 | gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ()))); | |
3977 | if (m_vector_manager->all_avail_in_compatible_p (cfg_bb)) | |
3978 | { | |
3979 | size_t id = m_vector_manager->get_expr_id (info); | |
3980 | if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id)) | |
3981 | continue; | |
3982 | eliminate_insn (rinsn); | |
3983 | } | |
3984 | else | |
3985 | { | |
3986 | rtx new_pat | |
3987 | = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX); | |
3988 | change_insn (rinsn, new_pat); | |
3989 | } | |
9243c3d1 JZZ |
3990 | } |
3991 | } | |
3992 | ||
3993 | void | |
3994 | pass_vsetvl::cleanup_vsetvls () | |
3995 | { | |
3996 | basic_block cfg_bb; | |
3997 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3998 | { | |
3999 | auto &info | |
c139f5e1 | 4000 | = get_block_info(cfg_bb).reaching_out; |
9243c3d1 JZZ |
4001 | gcc_assert (m_vector_manager->expr_set_num ( |
4002 | m_vector_manager->vector_del[cfg_bb->index]) | |
4003 | <= 1); | |
4004 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
4005 | { | |
4006 | if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i)) | |
4007 | { | |
4008 | if (info.dirty_p ()) | |
4009 | info.set_unknown (); | |
4010 | else | |
4011 | { | |
4f673c5e | 4012 | const auto dem |
c139f5e1 | 4013 | = get_block_info(cfg_bb) |
4f673c5e JZZ |
4014 | .local_dem; |
4015 | gcc_assert (dem == *m_vector_manager->vector_exprs[i]); | |
4016 | insn_info *insn = dem.get_insn (); | |
9243c3d1 JZZ |
4017 | gcc_assert (insn && insn->rtl ()); |
4018 | rtx_insn *rinsn; | |
ec99ffab JZZ |
4019 | /* We can't eliminate user vsetvl since the dest will be used |
4020 | * by the following instructions. */ | |
9243c3d1 | 4021 | if (vector_config_insn_p (insn->rtl ())) |
9243c3d1 | 4022 | { |
ec99ffab JZZ |
4023 | m_vector_manager->to_delete_vsetvls.add (insn->rtl ()); |
4024 | continue; | |
9243c3d1 | 4025 | } |
ec99ffab JZZ |
4026 | |
4027 | gcc_assert (has_vtype_op (insn->rtl ())); | |
4028 | rinsn = PREV_INSN (insn->rtl ()); | |
4029 | gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ()))); | |
9243c3d1 JZZ |
4030 | eliminate_insn (rinsn); |
4031 | } | |
4032 | } | |
4033 | } | |
4034 | } | |
4035 | } | |
4036 | ||
4037 | bool | |
4038 | pass_vsetvl::commit_vsetvls (void) | |
4039 | { | |
4040 | bool need_commit = false; | |
4041 | ||
4042 | for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++) | |
4043 | { | |
4044 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
4045 | { | |
4046 | edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed); | |
4047 | if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i)) | |
4048 | { | |
4049 | const vector_insn_info *require | |
4050 | = m_vector_manager->vector_exprs[i]; | |
4051 | gcc_assert (require->valid_or_dirty_p ()); | |
4052 | rtl_profile_for_edge (eg); | |
4053 | start_sequence (); | |
4054 | ||
4055 | insn_info *insn = require->get_insn (); | |
4056 | vector_insn_info prev_info = vector_insn_info (); | |
005fad9d JZZ |
4057 | sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index]; |
4058 | if (m_vector_manager->all_same_ratio_p (bitdata) | |
4059 | && m_vector_manager->all_same_avl_p (eg->dest, bitdata)) | |
9243c3d1 | 4060 | { |
005fad9d | 4061 | size_t first = bitmap_first_set_bit (bitdata); |
9243c3d1 JZZ |
4062 | prev_info = *m_vector_manager->vector_exprs[first]; |
4063 | } | |
4064 | ||
4065 | insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info); | |
4066 | rtx_insn *rinsn = get_insns (); | |
4067 | end_sequence (); | |
4068 | default_rtl_profile (); | |
4069 | ||
4070 | /* We should not get an abnormal edge here. */ | |
4071 | gcc_assert (!(eg->flags & EDGE_ABNORMAL)); | |
4072 | need_commit = true; | |
4073 | insert_insn_on_edge (rinsn, eg); | |
4074 | } | |
4075 | } | |
4076 | } | |
4077 | ||
4f673c5e | 4078 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 4079 | { |
4f673c5e | 4080 | basic_block cfg_bb = bb->cfg_bb (); |
9243c3d1 | 4081 | const auto reaching_out |
c139f5e1 | 4082 | = get_block_info(cfg_bb).reaching_out; |
9243c3d1 JZZ |
4083 | if (!reaching_out.dirty_p ()) |
4084 | continue; | |
4085 | ||
4f673c5e JZZ |
4086 | if (reaching_out.dirty_with_killed_avl_p ()) |
4087 | { | |
4088 | if (!has_vsetvl_killed_avl_p (bb, reaching_out)) | |
4089 | continue; | |
4090 | ||
4091 | unsigned int bb_index; | |
4092 | sbitmap_iterator sbi; | |
4093 | sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index]; | |
4094 | bool available_p = false; | |
4095 | EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi) | |
4096 | { | |
ec99ffab JZZ |
4097 | if (m_vector_manager->vector_exprs[bb_index]->available_p ( |
4098 | reaching_out)) | |
4f673c5e JZZ |
4099 | { |
4100 | available_p = true; | |
4101 | break; | |
4102 | } | |
4103 | } | |
4104 | if (available_p) | |
4105 | continue; | |
4106 | } | |
7ae4d1df JZZ |
4107 | |
4108 | rtx new_pat; | |
ec99ffab JZZ |
4109 | if (!reaching_out.demand_p (DEMAND_AVL)) |
4110 | { | |
4111 | vl_vtype_info new_info = reaching_out; | |
4112 | new_info.set_avl_info (avl_info (const0_rtx, nullptr)); | |
4113 | new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX); | |
4114 | } | |
4115 | else if (can_refine_vsetvl_p (cfg_bb, reaching_out)) | |
9243c3d1 JZZ |
4116 | new_pat |
4117 | = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX); | |
7ae4d1df JZZ |
4118 | else if (vlmax_avl_p (reaching_out.get_avl ())) |
4119 | new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out, | |
ec99ffab | 4120 | reaching_out.get_avl_reg_rtx ()); |
7ae4d1df JZZ |
4121 | else |
4122 | new_pat | |
4123 | = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX); | |
9243c3d1 JZZ |
4124 | |
4125 | start_sequence (); | |
4126 | emit_insn (new_pat); | |
4127 | rtx_insn *rinsn = get_insns (); | |
4128 | end_sequence (); | |
4129 | insert_insn_end_basic_block (rinsn, cfg_bb); | |
4130 | if (dump_file) | |
4131 | { | |
4132 | fprintf (dump_file, | |
4133 | "\nInsert vsetvl insn %d at the end of <bb %d>:\n", | |
4134 | INSN_UID (rinsn), cfg_bb->index); | |
4135 | print_rtl_single (dump_file, rinsn); | |
4136 | } | |
4137 | } | |
4138 | ||
4139 | return need_commit; | |
4140 | } | |
4141 | ||
4142 | void | |
4143 | pass_vsetvl::pre_vsetvl (void) | |
4144 | { | |
4145 | /* Compute entity list. */ | |
4146 | prune_expressions (); | |
4147 | ||
cfe3fbc6 | 4148 | m_vector_manager->create_bitmap_vectors (); |
9243c3d1 JZZ |
4149 | compute_local_properties (); |
4150 | m_vector_manager->vector_edge_list = pre_edge_lcm_avs ( | |
4151 | m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp, | |
4152 | m_vector_manager->vector_comp, m_vector_manager->vector_antic, | |
4153 | m_vector_manager->vector_kill, m_vector_manager->vector_avin, | |
4154 | m_vector_manager->vector_avout, &m_vector_manager->vector_insert, | |
4155 | &m_vector_manager->vector_del); | |
4156 | ||
4157 | /* We should dump the information before CFG is changed. Otherwise it will | |
4158 | produce ICE (internal compiler error). */ | |
4159 | if (dump_file) | |
4160 | m_vector_manager->dump (dump_file); | |
4161 | ||
4162 | refine_vsetvls (); | |
4163 | cleanup_vsetvls (); | |
4164 | bool need_commit = commit_vsetvls (); | |
4165 | if (need_commit) | |
4166 | commit_edge_insertions (); | |
4167 | } | |
4168 | ||
c919d059 KC |
4169 | /* Some instruction can not be accessed in RTL_SSA when we don't re-init |
4170 | the new RTL_SSA framework but it is definetely at the END of the block. | |
4171 | ||
4172 | Here we optimize the VSETVL is hoisted by LCM: | |
4173 | ||
4174 | Before LCM: | |
4175 | bb 1: | |
4176 | vsetvli a5,a2,e32,m1,ta,mu | |
4177 | bb 2: | |
4178 | vsetvli zero,a5,e32,m1,ta,mu | |
4179 | ... | |
4180 | ||
4181 | After LCM: | |
4182 | bb 1: | |
4183 | vsetvli a5,a2,e32,m1,ta,mu | |
4184 | LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate | |
4185 | bb 2: | |
4186 | ... | |
4187 | */ | |
4188 | rtx_insn * | |
4189 | pass_vsetvl::get_vsetvl_at_end (const bb_info *bb, vector_insn_info *dem) const | |
4190 | { | |
4191 | rtx_insn *end_vsetvl = BB_END (bb->cfg_bb ()); | |
4192 | if (end_vsetvl && NONDEBUG_INSN_P (end_vsetvl)) | |
4193 | { | |
4194 | if (JUMP_P (end_vsetvl)) | |
4195 | end_vsetvl = PREV_INSN (end_vsetvl); | |
4196 | ||
4197 | if (NONDEBUG_INSN_P (end_vsetvl) | |
4198 | && vsetvl_discard_result_insn_p (end_vsetvl)) | |
4199 | { | |
4200 | /* Only handle single succ. here, multiple succ. is much | |
4201 | more complicated. */ | |
4202 | if (single_succ_p (bb->cfg_bb ())) | |
4203 | { | |
4204 | edge e = single_succ_edge (bb->cfg_bb ()); | |
4205 | *dem = get_block_info (e->dest).local_dem; | |
4206 | return end_vsetvl; | |
4207 | } | |
4208 | } | |
4209 | } | |
4210 | return nullptr; | |
4211 | } | |
4212 | ||
4213 | /* This predicator should only used within same basic block. */ | |
4214 | static bool | |
4215 | local_avl_compatible_p (rtx avl1, rtx avl2) | |
4216 | { | |
4217 | if (!REG_P (avl1) || !REG_P (avl2)) | |
4218 | return false; | |
4219 | ||
4220 | return REGNO (avl1) == REGNO (avl2); | |
4221 | } | |
4222 | ||
8421f279 JZ |
4223 | /* Local user vsetvl optimizaiton: |
4224 | ||
4225 | Case 1: | |
4226 | vsetvl a5,a4,e8,mf8 | |
4227 | ... | |
4228 | vsetvl zero,a5,e8,mf8 --> Eliminate directly. | |
4229 | ||
4230 | Case 2: | |
4231 | vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2 | |
4232 | ... | |
4233 | vsetvl zero,a5,e32,mf2 --> Eliminate directly. */ | |
4234 | void | |
c919d059 KC |
4235 | pass_vsetvl::local_eliminate_vsetvl_insn (const bb_info *bb) const |
4236 | { | |
4237 | rtx_insn *prev_vsetvl = nullptr; | |
4238 | rtx_insn *curr_vsetvl = nullptr; | |
4239 | rtx vl_placeholder = RVV_VLMAX; | |
4240 | rtx prev_avl = vl_placeholder; | |
4241 | rtx curr_avl = vl_placeholder; | |
4242 | vector_insn_info prev_dem; | |
4243 | ||
4244 | /* Instruction inserted by LCM is not appeared in RTL-SSA yet, try to | |
4245 | found those instruciton. */ | |
4246 | if (rtx_insn *end_vsetvl = get_vsetvl_at_end (bb, &prev_dem)) | |
8421f279 | 4247 | { |
c919d059 KC |
4248 | prev_avl = get_avl (end_vsetvl); |
4249 | prev_vsetvl = end_vsetvl; | |
4250 | } | |
4251 | ||
4252 | bool skip_one = false; | |
4253 | /* Backward propgate vsetvl info, drop the later one (prev_vsetvl) if it's | |
4254 | compatible with current vsetvl (curr_avl), and merge the vtype and avl | |
4255 | info. into current vsetvl. */ | |
4256 | for (insn_info *insn : bb->reverse_real_nondebug_insns ()) | |
4257 | { | |
4258 | rtx_insn *rinsn = insn->rtl (); | |
4259 | const auto &curr_dem = get_vector_info (insn); | |
4260 | bool need_invalidate = false; | |
4261 | ||
4262 | /* Skip if this insn already handled in last iteration. */ | |
4263 | if (skip_one) | |
4264 | { | |
4265 | skip_one = false; | |
4266 | continue; | |
4267 | } | |
4268 | ||
4269 | if (vsetvl_insn_p (rinsn)) | |
4270 | { | |
4271 | curr_vsetvl = rinsn; | |
4272 | /* vsetvl are using vl rather than avl since it will try to merge | |
4273 | with other vsetvl_discard_result. | |
4274 | ||
4275 | v--- avl | |
4276 | vsetvl a5,a4,e8,mf8 # vsetvl | |
4277 | ... ^--- vl | |
4278 | vsetvl zero,a5,e8,mf8 # vsetvl_discard_result | |
4279 | ^--- avl | |
4280 | */ | |
4281 | curr_avl = get_vl (rinsn); | |
4282 | /* vsetvl is a cut point of local backward vsetvl elimination. */ | |
4283 | need_invalidate = true; | |
4284 | } | |
4285 | else if (has_vtype_op (rinsn) && NONDEBUG_INSN_P (PREV_INSN (rinsn)) | |
4286 | && (vsetvl_discard_result_insn_p (PREV_INSN (rinsn)) | |
4287 | || vsetvl_insn_p (PREV_INSN (rinsn)))) | |
8421f279 | 4288 | { |
c919d059 | 4289 | curr_vsetvl = PREV_INSN (rinsn); |
8421f279 | 4290 | |
c919d059 | 4291 | if (vsetvl_insn_p (PREV_INSN (rinsn))) |
8421f279 | 4292 | { |
c919d059 KC |
4293 | /* Need invalidate and skip if it's vsetvl. */ |
4294 | need_invalidate = true; | |
4295 | /* vsetvl_discard_result_insn_p won't appeared in RTL-SSA, | |
4296 | * so only need to skip for vsetvl. */ | |
4297 | skip_one = true; | |
8421f279 | 4298 | } |
c919d059 KC |
4299 | |
4300 | curr_avl = get_avl (rinsn); | |
4301 | ||
4302 | /* Some instrucion like pred_extract_first<mode> don't reqruie avl, so | |
4303 | the avl is null, use vl_placeholder for unify the handling | |
4304 | logic. */ | |
4305 | if (!curr_avl) | |
4306 | curr_avl = vl_placeholder; | |
4307 | } | |
4308 | else if (insn->is_call () || insn->is_asm () | |
4309 | || find_access (insn->defs (), VL_REGNUM) | |
4310 | || find_access (insn->defs (), VTYPE_REGNUM) | |
4311 | || (REG_P (prev_avl) | |
4312 | && find_access (insn->defs (), REGNO (prev_avl)))) | |
4313 | { | |
4314 | /* Invalidate if this insn can't propagate vl, vtype or avl. */ | |
4315 | need_invalidate = true; | |
4316 | prev_dem = vector_insn_info (); | |
4317 | } | |
4318 | else | |
4319 | /* Not interested instruction. */ | |
4320 | continue; | |
4321 | ||
4322 | /* Local AVL compatibility checking is simpler than global, we only | |
4323 | need to check the REGNO is same. */ | |
20c85207 | 4324 | if (prev_dem.valid_or_dirty_p () && prev_dem.skip_avl_compatible_p (curr_dem) |
c919d059 KC |
4325 | && local_avl_compatible_p (prev_avl, curr_avl)) |
4326 | { | |
4327 | /* curr_dem and prev_dem is compatible! */ | |
4328 | /* Update avl info since we need to make sure they are fully | |
4329 | compatible before merge. */ | |
4330 | prev_dem.set_avl_info (curr_dem.get_avl_info ()); | |
4331 | /* Merge both and update into curr_vsetvl. */ | |
4332 | prev_dem = curr_dem.merge (prev_dem, LOCAL_MERGE); | |
4333 | change_vsetvl_insn (curr_dem.get_insn (), prev_dem); | |
4334 | /* Then we can drop prev_vsetvl. */ | |
4335 | eliminate_insn (prev_vsetvl); | |
4336 | } | |
4337 | ||
4338 | if (need_invalidate) | |
4339 | { | |
4340 | prev_vsetvl = nullptr; | |
4341 | curr_vsetvl = nullptr; | |
4342 | prev_avl = vl_placeholder; | |
4343 | curr_avl = vl_placeholder; | |
4344 | prev_dem = vector_insn_info (); | |
4345 | } | |
4346 | else | |
4347 | { | |
4348 | prev_vsetvl = curr_vsetvl; | |
4349 | prev_avl = curr_avl; | |
4350 | prev_dem = curr_dem; | |
8421f279 JZ |
4351 | } |
4352 | } | |
4353 | } | |
4354 | ||
20c85207 JZ |
4355 | /* Return the first vsetvl instruction in CFG_BB or NULL if |
4356 | none exists or if a user RVV instruction is enountered | |
4357 | prior to any vsetvl. */ | |
4358 | static rtx_insn * | |
4359 | get_first_vsetvl_before_rvv_insns (basic_block cfg_bb) | |
4360 | { | |
4361 | rtx_insn *rinsn; | |
4362 | FOR_BB_INSNS (cfg_bb, rinsn) | |
4363 | { | |
4364 | if (!NONDEBUG_INSN_P (rinsn)) | |
4365 | continue; | |
4366 | /* If we don't find any inserted vsetvli before user RVV instructions, | |
4367 | we don't need to optimize the vsetvls in this block. */ | |
4368 | if (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn)) | |
4369 | return nullptr; | |
4370 | ||
4371 | if (vsetvl_discard_result_insn_p (rinsn)) | |
4372 | return rinsn; | |
4373 | } | |
4374 | return nullptr; | |
4375 | } | |
4376 | ||
4377 | /* Global user vsetvl optimizaiton: | |
4378 | ||
4379 | Case 1: | |
4380 | bb 1: | |
4381 | vsetvl a5,a4,e8,mf8 | |
4382 | ... | |
4383 | bb 2: | |
4384 | ... | |
4385 | vsetvl zero,a5,e8,mf8 --> Eliminate directly. | |
4386 | ||
4387 | Case 2: | |
4388 | bb 1: | |
4389 | vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2 | |
4390 | ... | |
4391 | bb 2: | |
4392 | ... | |
4393 | vsetvl zero,a5,e32,mf2 --> Eliminate directly. | |
4394 | ||
4395 | Case 3: | |
4396 | bb 1: | |
4397 | vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2 | |
4398 | ... | |
4399 | bb 2: | |
4400 | ... | |
4401 | vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2 | |
4402 | goto bb 3 | |
4403 | bb 3: | |
4404 | ... | |
4405 | vsetvl zero,a5,e32,mf2 --> Eliminate directly. | |
4406 | */ | |
4407 | bool | |
4408 | pass_vsetvl::global_eliminate_vsetvl_insn (const bb_info *bb) const | |
4409 | { | |
27612ce3 | 4410 | rtx_insn *vsetvl_rinsn = NULL; |
20c85207 JZ |
4411 | vector_insn_info dem = vector_insn_info (); |
4412 | const auto &block_info = get_block_info (bb); | |
4413 | basic_block cfg_bb = bb->cfg_bb (); | |
4414 | ||
4415 | if (block_info.local_dem.valid_or_dirty_p ()) | |
4416 | { | |
4417 | /* Optimize the local vsetvl. */ | |
4418 | dem = block_info.local_dem; | |
4419 | vsetvl_rinsn = get_first_vsetvl_before_rvv_insns (cfg_bb); | |
4420 | } | |
4421 | if (!vsetvl_rinsn) | |
4422 | /* Optimize the global vsetvl inserted by LCM. */ | |
4423 | vsetvl_rinsn = get_vsetvl_at_end (bb, &dem); | |
4424 | ||
4425 | /* No need to optimize if block doesn't have vsetvl instructions. */ | |
4426 | if (!dem.valid_or_dirty_p () | |
4427 | || !vsetvl_rinsn | |
4428 | || !dem.get_avl_source () | |
4429 | || !dem.has_avl_reg ()) | |
4430 | return false; | |
4431 | ||
4432 | /* Condition 1: Check it has preds. */ | |
4433 | if (EDGE_COUNT (cfg_bb->preds) == 0) | |
4434 | return false; | |
4435 | ||
4436 | /* If all preds has VL/VTYPE status setted by user vsetvls, and these | |
4437 | user vsetvls are all skip_avl_compatible_p with the vsetvl in this | |
4438 | block, we can eliminate this vsetvl instruction. */ | |
4439 | sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index]; | |
4440 | ||
4441 | unsigned int bb_index; | |
4442 | sbitmap_iterator sbi; | |
4443 | rtx avl = get_avl (dem.get_insn ()->rtl ()); | |
4444 | hash_set<set_info *> sets | |
4445 | = get_all_sets (dem.get_avl_source (), true, false, false); | |
4446 | /* Condition 2: All VL/VTYPE available in are all compatible. */ | |
4447 | EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi) | |
4448 | { | |
4449 | const auto &expr = m_vector_manager->vector_exprs[bb_index]; | |
4450 | const auto &insn = expr->get_insn (); | |
4451 | def_info *def = find_access (insn->defs (), REGNO (avl)); | |
4452 | set_info *set = safe_dyn_cast<set_info *> (def); | |
4453 | if (!vsetvl_insn_p (insn->rtl ()) || insn->bb () == bb | |
4454 | || !sets.contains (set)) | |
4455 | return false; | |
4456 | } | |
4457 | ||
4458 | /* Condition 3: We don't do the global optimization for the block | |
4459 | has a pred is entry block or exit block. */ | |
4460 | /* Condition 4: All preds have available VL/VTYPE out. */ | |
4461 | edge e; | |
4462 | edge_iterator ei; | |
4463 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
4464 | { | |
4465 | sbitmap avout = m_vector_manager->vector_avout[e->src->index]; | |
4466 | if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun) | |
4467 | || e->src == EXIT_BLOCK_PTR_FOR_FN (cfun) || bitmap_empty_p (avout)) | |
4468 | return false; | |
4469 | ||
4470 | EXECUTE_IF_SET_IN_BITMAP (avout, 0, bb_index, sbi) | |
4471 | { | |
4472 | const auto &expr = m_vector_manager->vector_exprs[bb_index]; | |
4473 | const auto &insn = expr->get_insn (); | |
4474 | def_info *def = find_access (insn->defs (), REGNO (avl)); | |
4475 | set_info *set = safe_dyn_cast<set_info *> (def); | |
4476 | if (!vsetvl_insn_p (insn->rtl ()) || insn->bb () == bb | |
4477 | || !sets.contains (set) || !expr->skip_avl_compatible_p (dem)) | |
4478 | return false; | |
4479 | } | |
4480 | } | |
4481 | ||
4482 | /* Step1: Reshape the VL/VTYPE status to make sure everything compatible. */ | |
4483 | hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb); | |
4484 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
4485 | { | |
4486 | sbitmap avout = m_vector_manager->vector_avout[e->src->index]; | |
4487 | EXECUTE_IF_SET_IN_BITMAP (avout, 0, bb_index, sbi) | |
4488 | { | |
4489 | vector_insn_info prev_dem = *m_vector_manager->vector_exprs[bb_index]; | |
4490 | vector_insn_info curr_dem = dem; | |
4491 | insn_info *insn = prev_dem.get_insn (); | |
4492 | if (!pred_cfg_bbs.contains (insn->bb ()->cfg_bb ())) | |
4493 | continue; | |
4494 | /* Update avl info since we need to make sure they are fully | |
4495 | compatible before merge. */ | |
4496 | curr_dem.set_avl_info (prev_dem.get_avl_info ()); | |
4497 | /* Merge both and update into curr_vsetvl. */ | |
4498 | prev_dem = curr_dem.merge (prev_dem, LOCAL_MERGE); | |
4499 | change_vsetvl_insn (insn, prev_dem); | |
4500 | } | |
4501 | } | |
4502 | ||
4503 | /* Step2: eliminate the vsetvl instruction. */ | |
4504 | eliminate_insn (vsetvl_rinsn); | |
4505 | return true; | |
4506 | } | |
4507 | ||
4508 | /* This function does the following post optimization base on RTL_SSA: | |
4509 | ||
4510 | 1. Local user vsetvl optimizations. | |
4511 | 2. Global user vsetvl optimizations. | |
4512 | 3. AVL dependencies removal: | |
4513 | Before VSETVL PASS, RVV instructions pattern is depending on AVL operand | |
4514 | implicitly. Since we will emit VSETVL instruction and make RVV | |
4515 | instructions depending on VL/VTYPE global status registers, we remove the | |
4516 | such AVL operand in the RVV instructions pattern here in order to remove | |
4517 | AVL dependencies when AVL operand is a register operand. | |
4518 | ||
4519 | Before the VSETVL PASS: | |
4520 | li a5,32 | |
4521 | ... | |
4522 | vadd.vv (..., a5) | |
4523 | After the VSETVL PASS: | |
4524 | li a5,32 | |
4525 | vsetvli zero, a5, ... | |
4526 | ... | |
4527 | vadd.vv (..., const_int 0). */ | |
9243c3d1 | 4528 | void |
20c85207 | 4529 | pass_vsetvl::ssa_post_optimization (void) const |
9243c3d1 JZZ |
4530 | { |
4531 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4532 | { | |
c919d059 | 4533 | local_eliminate_vsetvl_insn (bb); |
20c85207 JZ |
4534 | bool changed_p = true; |
4535 | while (changed_p) | |
4536 | { | |
4537 | changed_p = false; | |
4538 | changed_p |= global_eliminate_vsetvl_insn (bb); | |
4539 | } | |
9243c3d1 JZZ |
4540 | for (insn_info *insn : bb->real_nondebug_insns ()) |
4541 | { | |
4542 | rtx_insn *rinsn = insn->rtl (); | |
9243c3d1 JZZ |
4543 | if (vlmax_avl_insn_p (rinsn)) |
4544 | { | |
4545 | eliminate_insn (rinsn); | |
4546 | continue; | |
4547 | } | |
4548 | ||
4549 | /* Erase the AVL operand from the instruction. */ | |
4550 | if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn))) | |
4551 | continue; | |
4552 | rtx avl = get_vl (rinsn); | |
a2d12abe | 4553 | if (count_regno_occurrences (rinsn, REGNO (avl)) == 1) |
9243c3d1 JZZ |
4554 | { |
4555 | /* Get the list of uses for the new instruction. */ | |
4556 | auto attempt = crtl->ssa->new_change_attempt (); | |
4557 | insn_change change (insn); | |
4558 | /* Remove the use of the substituted value. */ | |
4559 | access_array_builder uses_builder (attempt); | |
4560 | uses_builder.reserve (insn->num_uses () - 1); | |
4561 | for (use_info *use : insn->uses ()) | |
4562 | if (use != find_access (insn->uses (), REGNO (avl))) | |
4563 | uses_builder.quick_push (use); | |
4564 | use_array new_uses = use_array (uses_builder.finish ()); | |
4565 | change.new_uses = new_uses; | |
4566 | change.move_range = insn->ebb ()->insn_range (); | |
60bd33bc JZZ |
4567 | rtx pat; |
4568 | if (fault_first_load_p (rinsn)) | |
4569 | pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx); | |
4570 | else | |
4571 | { | |
4572 | rtx set = single_set (rinsn); | |
4573 | rtx src | |
4574 | = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx); | |
4575 | pat = gen_rtx_SET (SET_DEST (set), src); | |
4576 | } | |
1d339ce8 KC |
4577 | bool ok = change_insn (crtl->ssa, change, insn, pat); |
4578 | gcc_assert (ok); | |
9243c3d1 JZZ |
4579 | } |
4580 | } | |
4581 | } | |
4582 | } | |
4583 | ||
20c85207 JZ |
4584 | /* Return true if the SET result is not used by any instructions. */ |
4585 | static bool | |
4586 | has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno) | |
4587 | { | |
4588 | /* Handle the following case that can not be detected in RTL_SSA. */ | |
4589 | /* E.g. | |
4590 | li a5, 100 | |
4591 | vsetvli a6, a5... | |
4592 | ... | |
4593 | vadd (use a6) | |
4594 | ||
4595 | The use of "a6" is removed from "vadd" but the information is | |
4596 | not updated in RTL_SSA framework. We don't want to re-new | |
4597 | a new RTL_SSA which is expensive, instead, we use data-flow | |
4598 | analysis to check whether "a6" has no uses. */ | |
4599 | if (bitmap_bit_p (df_get_live_out (cfg_bb), regno)) | |
4600 | return false; | |
4601 | ||
4602 | rtx_insn *iter; | |
4603 | for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb)); | |
4604 | iter = NEXT_INSN (iter)) | |
4605 | if (df_find_use (iter, regno_reg_rtx[regno])) | |
4606 | return false; | |
4607 | ||
4608 | return true; | |
4609 | } | |
4610 | ||
4611 | /* This function does the following post optimization base on dataflow | |
4612 | analysis: | |
4613 | ||
4614 | 1. Change vsetvl rd, rs1 --> vsevl zero, rs1, if rd is not used by any | |
4615 | nondebug instructions. Even though this PASS runs after RA and it doesn't | |
4616 | help for reduce register pressure, it can help instructions scheduling since | |
4617 | we remove the dependencies. | |
4618 | ||
4619 | 2. Remove redundant user vsetvls base on outcome of Phase 4 (LCM) && Phase 5 | |
4620 | (AVL dependencies removal). */ | |
6b6b9c68 | 4621 | void |
20c85207 JZ |
4622 | pass_vsetvl::df_post_optimization (void) const |
4623 | { | |
6b6b9c68 | 4624 | df_analyze (); |
6b6b9c68 | 4625 | hash_set<rtx_insn *> to_delete; |
20c85207 JZ |
4626 | basic_block cfg_bb; |
4627 | rtx_insn *rinsn; | |
4628 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
6b6b9c68 | 4629 | { |
20c85207 | 4630 | FOR_BB_INSNS (cfg_bb, rinsn) |
6b6b9c68 | 4631 | { |
20c85207 | 4632 | if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn)) |
6b6b9c68 | 4633 | { |
20c85207 | 4634 | rtx vl = get_vl (rinsn); |
ec99ffab | 4635 | vector_insn_info info; |
20c85207 JZ |
4636 | info.parse_insn (rinsn); |
4637 | bool to_delete_p = m_vector_manager->to_delete_p (rinsn); | |
4638 | bool to_refine_p = m_vector_manager->to_refine_p (rinsn); | |
4639 | if (has_no_uses (cfg_bb, rinsn, REGNO (vl))) | |
ec99ffab | 4640 | { |
20c85207 JZ |
4641 | if (to_delete_p) |
4642 | to_delete.add (rinsn); | |
4643 | else if (to_refine_p) | |
ec99ffab JZZ |
4644 | { |
4645 | rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, | |
4646 | info, NULL_RTX); | |
20c85207 JZ |
4647 | validate_change (rinsn, &PATTERN (rinsn), new_pat, false); |
4648 | } | |
4649 | else if (!vlmax_avl_p (info.get_avl ())) | |
4650 | { | |
4651 | rtx new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, | |
4652 | NULL_RTX); | |
4653 | validate_change (rinsn, &PATTERN (rinsn), new_pat, false); | |
ec99ffab | 4654 | } |
6b6b9c68 JZZ |
4655 | } |
4656 | } | |
4657 | } | |
4658 | } | |
6b6b9c68 JZZ |
4659 | for (rtx_insn *rinsn : to_delete) |
4660 | eliminate_insn (rinsn); | |
4661 | } | |
4662 | ||
9243c3d1 JZZ |
4663 | void |
4664 | pass_vsetvl::init (void) | |
4665 | { | |
4666 | if (optimize > 0) | |
4667 | { | |
4668 | /* Initialization of RTL_SSA. */ | |
4669 | calculate_dominance_info (CDI_DOMINATORS); | |
4670 | df_analyze (); | |
4671 | crtl->ssa = new function_info (cfun); | |
4672 | } | |
4673 | ||
4674 | m_vector_manager = new vector_infos_manager (); | |
4f673c5e | 4675 | compute_probabilities (); |
9243c3d1 JZZ |
4676 | |
4677 | if (dump_file) | |
4678 | { | |
4679 | fprintf (dump_file, "\nPrologue: Initialize vector infos\n"); | |
4680 | m_vector_manager->dump (dump_file); | |
4681 | } | |
4682 | } | |
4683 | ||
4684 | void | |
4685 | pass_vsetvl::done (void) | |
4686 | { | |
4687 | if (optimize > 0) | |
4688 | { | |
4689 | /* Finalization of RTL_SSA. */ | |
4690 | free_dominance_info (CDI_DOMINATORS); | |
4691 | if (crtl->ssa->perform_pending_updates ()) | |
4692 | cleanup_cfg (0); | |
4693 | delete crtl->ssa; | |
4694 | crtl->ssa = nullptr; | |
4695 | } | |
4696 | m_vector_manager->release (); | |
4697 | delete m_vector_manager; | |
4698 | m_vector_manager = nullptr; | |
4699 | } | |
4700 | ||
acc10c79 JZZ |
4701 | /* Compute probability for each block. */ |
4702 | void | |
4703 | pass_vsetvl::compute_probabilities (void) | |
4704 | { | |
4705 | /* Don't compute it in -O0 since we don't need it. */ | |
4706 | if (!optimize) | |
4707 | return; | |
4708 | edge e; | |
4709 | edge_iterator ei; | |
4710 | ||
4711 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4712 | { | |
4713 | basic_block cfg_bb = bb->cfg_bb (); | |
4714 | auto &curr_prob | |
c139f5e1 | 4715 | = get_block_info(cfg_bb).probability; |
c129d22d JZZ |
4716 | |
4717 | /* GCC assume entry block (bb 0) are always so | |
4718 | executed so set its probability as "always". */ | |
acc10c79 JZZ |
4719 | if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) |
4720 | curr_prob = profile_probability::always (); | |
c129d22d JZZ |
4721 | /* Exit block (bb 1) is the block we don't need to process. */ |
4722 | if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) | |
4723 | continue; | |
4724 | ||
acc10c79 JZZ |
4725 | gcc_assert (curr_prob.initialized_p ()); |
4726 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
4727 | { | |
4728 | auto &new_prob | |
c139f5e1 | 4729 | = get_block_info(e->dest).probability; |
acc10c79 JZZ |
4730 | if (!new_prob.initialized_p ()) |
4731 | new_prob = curr_prob * e->probability; | |
4732 | else if (new_prob == profile_probability::always ()) | |
4733 | continue; | |
4734 | else | |
4735 | new_prob += curr_prob * e->probability; | |
4736 | } | |
4737 | } | |
acc10c79 JZZ |
4738 | } |
4739 | ||
9243c3d1 JZZ |
4740 | /* Lazy vsetvl insertion for optimize > 0. */ |
4741 | void | |
4742 | pass_vsetvl::lazy_vsetvl (void) | |
4743 | { | |
4744 | if (dump_file) | |
4745 | fprintf (dump_file, | |
4746 | "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for " | |
4747 | "function:%s\n", | |
4748 | n_basic_blocks_for_fn (cfun), function_name (cfun)); | |
4749 | ||
4750 | /* Phase 1 - Compute the local dems within each block. | |
4751 | The data-flow analysis within each block is backward analysis. */ | |
4752 | if (dump_file) | |
4753 | fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n"); | |
4754 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4755 | compute_local_backward_infos (bb); | |
4756 | if (dump_file) | |
4757 | m_vector_manager->dump (dump_file); | |
4758 | ||
4759 | /* Phase 2 - Emit vsetvl instructions within each basic block according to | |
4760 | demand, compute and save ANTLOC && AVLOC of each block. */ | |
4761 | if (dump_file) | |
4762 | fprintf (dump_file, | |
4763 | "\nPhase 2: Emit vsetvl instruction within each block\n"); | |
4764 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4765 | emit_local_forward_vsetvls (bb); | |
4766 | if (dump_file) | |
4767 | m_vector_manager->dump (dump_file); | |
4768 | ||
4769 | /* Phase 3 - Propagate demanded info across blocks. */ | |
4770 | if (dump_file) | |
4771 | fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n"); | |
387cd9d3 | 4772 | demand_fusion (); |
9243c3d1 JZZ |
4773 | if (dump_file) |
4774 | m_vector_manager->dump (dump_file); | |
4775 | ||
4776 | /* Phase 4 - Lazy code motion. */ | |
4777 | if (dump_file) | |
4778 | fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n"); | |
4779 | pre_vsetvl (); | |
4780 | ||
20c85207 | 4781 | /* Phase 5 - Post optimization base on RTL_SSA. */ |
9243c3d1 | 4782 | if (dump_file) |
20c85207 JZ |
4783 | fprintf (dump_file, "\nPhase 5: Post optimization base on RTL_SSA\n"); |
4784 | ssa_post_optimization (); | |
6b6b9c68 | 4785 | |
20c85207 | 4786 | /* Phase 6 - Post optimization base on data-flow analysis. */ |
6b6b9c68 JZZ |
4787 | if (dump_file) |
4788 | fprintf (dump_file, | |
20c85207 JZ |
4789 | "\nPhase 6: Post optimization base on data-flow analysis\n"); |
4790 | df_post_optimization (); | |
9243c3d1 JZZ |
4791 | } |
4792 | ||
4793 | /* Main entry point for this pass. */ | |
4794 | unsigned int | |
4795 | pass_vsetvl::execute (function *) | |
4796 | { | |
4797 | if (n_basic_blocks_for_fn (cfun) <= 0) | |
4798 | return 0; | |
4799 | ||
ca8fb009 JZZ |
4800 | /* The RVV instruction may change after split which is not a stable |
4801 | instruction. We need to split it here to avoid potential issue | |
4802 | since the VSETVL PASS is insert before split PASS. */ | |
4803 | split_all_insns (); | |
9243c3d1 JZZ |
4804 | |
4805 | /* Early return for there is no vector instructions. */ | |
4806 | if (!has_vector_insn (cfun)) | |
4807 | return 0; | |
4808 | ||
4809 | init (); | |
4810 | ||
4811 | if (!optimize) | |
4812 | simple_vsetvl (); | |
4813 | else | |
4814 | lazy_vsetvl (); | |
4815 | ||
4816 | done (); | |
4817 | return 0; | |
4818 | } | |
4819 | ||
4820 | rtl_opt_pass * | |
4821 | make_pass_vsetvl (gcc::context *ctxt) | |
4822 | { | |
4823 | return new pass_vsetvl (ctxt); | |
4824 | } |