]>
Commit | Line | Data |
---|---|---|
9243c3d1 | 1 | /* VSETVL pass for RISC-V 'V' Extension for GNU compiler. |
c841bde5 | 2 | Copyright (C) 2022-2023 Free Software Foundation, Inc. |
9243c3d1 JZZ |
3 | Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or(at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | /* This pass is to Set VL/VTYPE global status for RVV instructions | |
22 | that depend on VL and VTYPE registers by Lazy code motion (LCM). | |
23 | ||
24 | Strategy: | |
25 | ||
26 | - Backward demanded info fusion within block. | |
27 | ||
28 | - Lazy code motion (LCM) based demanded info backward propagation. | |
29 | ||
30 | - RTL_SSA framework for def-use, PHI analysis. | |
31 | ||
32 | - Lazy code motion (LCM) for global VL/VTYPE optimization. | |
33 | ||
34 | Assumption: | |
35 | ||
36 | - Each avl operand is either an immediate (must be in range 0 ~ 31) or reg. | |
37 | ||
38 | This pass consists of 5 phases: | |
39 | ||
40 | - Phase 1 - compute VL/VTYPE demanded information within each block | |
41 | by backward data-flow analysis. | |
42 | ||
43 | - Phase 2 - Emit vsetvl instructions within each basic block according to | |
44 | demand, compute and save ANTLOC && AVLOC of each block. | |
45 | ||
387cd9d3 JZZ |
46 | - Phase 3 - Backward && forward demanded info propagation and fusion across |
47 | blocks. | |
9243c3d1 JZZ |
48 | |
49 | - Phase 4 - Lazy code motion including: compute local properties, | |
50 | pre_edge_lcm and vsetvl insertion && delete edges for LCM results. | |
51 | ||
52 | - Phase 5 - Cleanup AVL operand of RVV instruction since it will not be | |
53 | used any more and VL operand of VSETVL instruction if it is not used by | |
54 | any non-debug instructions. | |
55 | ||
6b6b9c68 JZZ |
56 | - Phase 6 - Propagate AVL between vsetvl instructions. |
57 | ||
9243c3d1 JZZ |
58 | Implementation: |
59 | ||
60 | - The subroutine of optimize == 0 is simple_vsetvl. | |
61 | This function simplily vsetvl insertion for each RVV | |
62 | instruction. No optimization. | |
63 | ||
64 | - The subroutine of optimize > 0 is lazy_vsetvl. | |
65 | This function optimize vsetvl insertion process by | |
ec99ffab JZZ |
66 | lazy code motion (LCM) layering on RTL_SSA. |
67 | ||
68 | - get_avl (), get_insn (), get_avl_source (): | |
69 | ||
70 | 1. get_insn () is the current instruction, find_access (get_insn | |
71 | ())->def is the same as get_avl_source () if get_insn () demand VL. | |
72 | 2. If get_avl () is non-VLMAX REG, get_avl () == get_avl_source | |
73 | ()->regno (). | |
74 | 3. get_avl_source ()->regno () is the REGNO that we backward propagate. | |
75 | */ | |
9243c3d1 JZZ |
76 | |
77 | #define IN_TARGET_CODE 1 | |
78 | #define INCLUDE_ALGORITHM | |
79 | #define INCLUDE_FUNCTIONAL | |
80 | ||
81 | #include "config.h" | |
82 | #include "system.h" | |
83 | #include "coretypes.h" | |
84 | #include "tm.h" | |
85 | #include "backend.h" | |
86 | #include "rtl.h" | |
87 | #include "target.h" | |
88 | #include "tree-pass.h" | |
89 | #include "df.h" | |
90 | #include "rtl-ssa.h" | |
91 | #include "cfgcleanup.h" | |
92 | #include "insn-config.h" | |
93 | #include "insn-attr.h" | |
94 | #include "insn-opinit.h" | |
95 | #include "tm-constrs.h" | |
96 | #include "cfgrtl.h" | |
97 | #include "cfganal.h" | |
98 | #include "lcm.h" | |
99 | #include "predict.h" | |
100 | #include "profile-count.h" | |
101 | #include "riscv-vsetvl.h" | |
102 | ||
103 | using namespace rtl_ssa; | |
104 | using namespace riscv_vector; | |
105 | ||
ec99ffab JZZ |
106 | static CONSTEXPR const unsigned ALL_SEW[] = {8, 16, 32, 64}; |
107 | static CONSTEXPR const vlmul_type ALL_LMUL[] | |
108 | = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2}; | |
ec99ffab | 109 | |
9243c3d1 JZZ |
110 | DEBUG_FUNCTION void |
111 | debug (const vector_insn_info *info) | |
112 | { | |
113 | info->dump (stderr); | |
114 | } | |
115 | ||
116 | DEBUG_FUNCTION void | |
117 | debug (const vector_infos_manager *info) | |
118 | { | |
119 | info->dump (stderr); | |
120 | } | |
121 | ||
122 | static bool | |
123 | vlmax_avl_p (rtx x) | |
124 | { | |
125 | return x && rtx_equal_p (x, RVV_VLMAX); | |
126 | } | |
127 | ||
128 | static bool | |
129 | vlmax_avl_insn_p (rtx_insn *rinsn) | |
130 | { | |
85112fbb JZZ |
131 | return (INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi |
132 | || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi); | |
9243c3d1 JZZ |
133 | } |
134 | ||
8d8cc482 JZZ |
135 | /* Return true if the block is a loop itself: |
136 | local_dem | |
137 | __________ | |
138 | ____|____ | | |
139 | | | | | |
140 | |________| | | |
141 | |_________| | |
142 | reaching_out | |
143 | */ | |
9243c3d1 JZZ |
144 | static bool |
145 | loop_basic_block_p (const basic_block cfg_bb) | |
146 | { | |
8d8cc482 JZZ |
147 | if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb))) |
148 | { | |
149 | edge e; | |
150 | edge_iterator ei; | |
151 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
152 | if (e->dest->index == cfg_bb->index) | |
153 | return true; | |
154 | } | |
155 | return false; | |
9243c3d1 JZZ |
156 | } |
157 | ||
158 | /* Return true if it is an RVV instruction depends on VTYPE global | |
159 | status register. */ | |
160 | static bool | |
161 | has_vtype_op (rtx_insn *rinsn) | |
162 | { | |
163 | return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn); | |
164 | } | |
165 | ||
166 | /* Return true if it is an RVV instruction depends on VL global | |
167 | status register. */ | |
168 | static bool | |
169 | has_vl_op (rtx_insn *rinsn) | |
170 | { | |
171 | return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn); | |
172 | } | |
173 | ||
174 | /* Is this a SEW value that can be encoded into the VTYPE format. */ | |
175 | static bool | |
176 | valid_sew_p (size_t sew) | |
177 | { | |
178 | return exact_log2 (sew) && sew >= 8 && sew <= 64; | |
179 | } | |
180 | ||
ec99ffab JZZ |
181 | /* Return true if the instruction ignores VLMUL field of VTYPE. */ |
182 | static bool | |
183 | ignore_vlmul_insn_p (rtx_insn *rinsn) | |
184 | { | |
185 | return get_attr_type (rinsn) == TYPE_VIMOVVX | |
186 | || get_attr_type (rinsn) == TYPE_VFMOVVF | |
187 | || get_attr_type (rinsn) == TYPE_VIMOVXV | |
188 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
189 | } | |
190 | ||
191 | /* Return true if the instruction is scalar move instruction. */ | |
192 | static bool | |
193 | scalar_move_insn_p (rtx_insn *rinsn) | |
194 | { | |
195 | return get_attr_type (rinsn) == TYPE_VIMOVXV | |
196 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
197 | } | |
198 | ||
60bd33bc JZZ |
199 | /* Return true if the instruction is fault first load instruction. */ |
200 | static bool | |
201 | fault_first_load_p (rtx_insn *rinsn) | |
202 | { | |
6313b045 JZZ |
203 | return recog_memoized (rinsn) >= 0 |
204 | && (get_attr_type (rinsn) == TYPE_VLDFF | |
205 | || get_attr_type (rinsn) == TYPE_VLSEGDFF); | |
60bd33bc JZZ |
206 | } |
207 | ||
208 | /* Return true if the instruction is read vl instruction. */ | |
209 | static bool | |
210 | read_vl_insn_p (rtx_insn *rinsn) | |
211 | { | |
212 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL; | |
213 | } | |
214 | ||
9243c3d1 JZZ |
215 | /* Return true if it is a vsetvl instruction. */ |
216 | static bool | |
217 | vector_config_insn_p (rtx_insn *rinsn) | |
218 | { | |
219 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL; | |
220 | } | |
221 | ||
222 | /* Return true if it is vsetvldi or vsetvlsi. */ | |
223 | static bool | |
224 | vsetvl_insn_p (rtx_insn *rinsn) | |
225 | { | |
6b6b9c68 JZZ |
226 | if (!vector_config_insn_p (rinsn)) |
227 | return false; | |
85112fbb | 228 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi |
6b6b9c68 JZZ |
229 | || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi); |
230 | } | |
231 | ||
232 | /* Return true if it is vsetvl zero, rs1. */ | |
233 | static bool | |
234 | vsetvl_discard_result_insn_p (rtx_insn *rinsn) | |
235 | { | |
236 | if (!vector_config_insn_p (rinsn)) | |
237 | return false; | |
238 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi | |
239 | || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi); | |
9243c3d1 JZZ |
240 | } |
241 | ||
9243c3d1 | 242 | static bool |
4f673c5e | 243 | real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb) |
9243c3d1 | 244 | { |
4f673c5e | 245 | return insn != nullptr && insn->is_real () && insn->bb () == bb; |
9243c3d1 JZZ |
246 | } |
247 | ||
9243c3d1 | 248 | static bool |
4f673c5e | 249 | before_p (const insn_info *insn1, const insn_info *insn2) |
9243c3d1 | 250 | { |
9b9a1ac1 | 251 | return insn1->compare_with (insn2) < 0; |
4f673c5e JZZ |
252 | } |
253 | ||
6b6b9c68 JZZ |
254 | static insn_info * |
255 | find_reg_killed_by (const bb_info *bb, rtx x) | |
4f673c5e | 256 | { |
6b6b9c68 JZZ |
257 | if (!x || vlmax_avl_p (x) || !REG_P (x)) |
258 | return nullptr; | |
259 | for (insn_info *insn : bb->reverse_real_nondebug_insns ()) | |
4f673c5e | 260 | if (find_access (insn->defs (), REGNO (x))) |
6b6b9c68 JZZ |
261 | return insn; |
262 | return nullptr; | |
263 | } | |
264 | ||
265 | /* Helper function to get VL operand. */ | |
266 | static rtx | |
267 | get_vl (rtx_insn *rinsn) | |
268 | { | |
269 | if (has_vl_op (rinsn)) | |
270 | { | |
271 | extract_insn_cached (rinsn); | |
272 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
273 | } | |
274 | return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0)); | |
4f673c5e JZZ |
275 | } |
276 | ||
277 | static bool | |
278 | has_vsetvl_killed_avl_p (const bb_info *bb, const vector_insn_info &info) | |
279 | { | |
280 | if (info.dirty_with_killed_avl_p ()) | |
281 | { | |
282 | rtx avl = info.get_avl (); | |
6b6b9c68 | 283 | if (vlmax_avl_p (avl)) |
ec99ffab | 284 | return find_reg_killed_by (bb, info.get_avl_reg_rtx ()) != nullptr; |
4f673c5e JZZ |
285 | for (const insn_info *insn : bb->reverse_real_nondebug_insns ()) |
286 | { | |
287 | def_info *def = find_access (insn->defs (), REGNO (avl)); | |
288 | if (def) | |
289 | { | |
290 | set_info *set = safe_dyn_cast<set_info *> (def); | |
291 | if (!set) | |
292 | return false; | |
293 | ||
294 | rtx new_avl = gen_rtx_REG (GET_MODE (avl), REGNO (avl)); | |
295 | gcc_assert (new_avl != avl); | |
296 | if (!info.compatible_avl_p (avl_info (new_avl, set))) | |
297 | return false; | |
298 | ||
299 | return true; | |
300 | } | |
301 | } | |
302 | } | |
303 | return false; | |
304 | } | |
305 | ||
9243c3d1 JZZ |
306 | /* An "anticipatable occurrence" is one that is the first occurrence in the |
307 | basic block, the operands are not modified in the basic block prior | |
308 | to the occurrence and the output is not used between the start of | |
4f673c5e JZZ |
309 | the block and the occurrence. |
310 | ||
311 | For VSETVL instruction, we have these following formats: | |
312 | 1. vsetvl zero, rs1. | |
313 | 2. vsetvl zero, imm. | |
314 | 3. vsetvl rd, rs1. | |
315 | ||
316 | So base on these circumstances, a DEM is considered as a local anticipatable | |
317 | occurrence should satisfy these following conditions: | |
318 | ||
319 | 1). rs1 (avl) are not modified in the basic block prior to the VSETVL. | |
320 | 2). rd (vl) are not modified in the basic block prior to the VSETVL. | |
321 | 3). rd (vl) is not used between the start of the block and the occurrence. | |
322 | ||
323 | Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE | |
324 | is modified prior to the occurrence. This case is already considered as | |
325 | a non-local anticipatable occurrence. | |
326 | */ | |
9243c3d1 | 327 | static bool |
4f673c5e | 328 | anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem) |
9243c3d1 | 329 | { |
4f673c5e | 330 | insn_info *insn = dem.get_insn (); |
9243c3d1 JZZ |
331 | /* The only possible operand we care of VSETVL is AVL. */ |
332 | if (dem.has_avl_reg ()) | |
333 | { | |
4f673c5e | 334 | /* rs1 (avl) are not modified in the basic block prior to the VSETVL. */ |
9243c3d1 JZZ |
335 | if (!vlmax_avl_p (dem.get_avl ())) |
336 | { | |
ec99ffab | 337 | set_info *set = dem.get_avl_source (); |
9243c3d1 JZZ |
338 | /* If it's undefined, it's not anticipatable conservatively. */ |
339 | if (!set) | |
340 | return false; | |
4f673c5e JZZ |
341 | if (real_insn_and_same_bb_p (set->insn (), bb) |
342 | && before_p (set->insn (), insn)) | |
9243c3d1 JZZ |
343 | return false; |
344 | } | |
345 | } | |
346 | ||
4f673c5e | 347 | /* rd (vl) is not used between the start of the block and the occurrence. */ |
9243c3d1 JZZ |
348 | if (vsetvl_insn_p (insn->rtl ())) |
349 | { | |
4f673c5e JZZ |
350 | rtx dest = get_vl (insn->rtl ()); |
351 | for (insn_info *i = insn->prev_nondebug_insn (); | |
352 | real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ()) | |
353 | { | |
354 | /* rd (vl) is not used between the start of the block and the | |
355 | * occurrence. */ | |
356 | if (find_access (i->uses (), REGNO (dest))) | |
357 | return false; | |
358 | /* rd (vl) are not modified in the basic block prior to the VSETVL. */ | |
359 | if (find_access (i->defs (), REGNO (dest))) | |
360 | return false; | |
361 | } | |
9243c3d1 JZZ |
362 | } |
363 | ||
364 | return true; | |
365 | } | |
366 | ||
367 | /* An "available occurrence" is one that is the last occurrence in the | |
368 | basic block and the operands are not modified by following statements in | |
4f673c5e JZZ |
369 | the basic block [including this insn]. |
370 | ||
371 | For VSETVL instruction, we have these following formats: | |
372 | 1. vsetvl zero, rs1. | |
373 | 2. vsetvl zero, imm. | |
374 | 3. vsetvl rd, rs1. | |
375 | ||
376 | So base on these circumstances, a DEM is considered as a local available | |
377 | occurrence should satisfy these following conditions: | |
378 | ||
379 | 1). rs1 (avl) are not modified by following statements in | |
380 | the basic block. | |
381 | 2). rd (vl) are not modified by following statements in | |
382 | the basic block. | |
383 | ||
384 | Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE | |
385 | is modified prior to the occurrence. This case is already considered as | |
386 | a non-local available occurrence. | |
387 | */ | |
9243c3d1 | 388 | static bool |
4f673c5e | 389 | available_occurrence_p (const bb_info *bb, const vector_insn_info dem) |
9243c3d1 | 390 | { |
4f673c5e | 391 | insn_info *insn = dem.get_insn (); |
9243c3d1 JZZ |
392 | /* The only possible operand we care of VSETVL is AVL. */ |
393 | if (dem.has_avl_reg ()) | |
394 | { | |
9243c3d1 JZZ |
395 | if (!vlmax_avl_p (dem.get_avl ())) |
396 | { | |
4f673c5e JZZ |
397 | rtx dest = NULL_RTX; |
398 | if (vsetvl_insn_p (insn->rtl ())) | |
399 | dest = get_vl (insn->rtl ()); | |
400 | for (const insn_info *i = insn; real_insn_and_same_bb_p (i, bb); | |
401 | i = i->next_nondebug_insn ()) | |
402 | { | |
60bd33bc JZZ |
403 | if (read_vl_insn_p (i->rtl ())) |
404 | continue; | |
4f673c5e JZZ |
405 | /* rs1 (avl) are not modified by following statements in |
406 | the basic block. */ | |
407 | if (find_access (i->defs (), REGNO (dem.get_avl ()))) | |
408 | return false; | |
409 | /* rd (vl) are not modified by following statements in | |
410 | the basic block. */ | |
411 | if (dest && find_access (i->defs (), REGNO (dest))) | |
412 | return false; | |
413 | } | |
9243c3d1 JZZ |
414 | } |
415 | } | |
416 | return true; | |
417 | } | |
418 | ||
6b6b9c68 JZZ |
419 | static bool |
420 | insn_should_be_added_p (const insn_info *insn, unsigned int types) | |
9243c3d1 | 421 | { |
6b6b9c68 JZZ |
422 | if (insn->is_real () && (types & REAL_SET)) |
423 | return true; | |
424 | if (insn->is_phi () && (types & PHI_SET)) | |
425 | return true; | |
426 | if (insn->is_bb_head () && (types & BB_HEAD_SET)) | |
427 | return true; | |
428 | if (insn->is_bb_end () && (types & BB_END_SET)) | |
429 | return true; | |
430 | return false; | |
9243c3d1 JZZ |
431 | } |
432 | ||
6b6b9c68 JZZ |
433 | /* Recursively find all define instructions. The kind of instruction is |
434 | specified by the DEF_TYPE. */ | |
435 | static hash_set<set_info *> | |
436 | get_all_sets (phi_info *phi, unsigned int types) | |
9243c3d1 | 437 | { |
6b6b9c68 | 438 | hash_set<set_info *> insns; |
4f673c5e JZZ |
439 | auto_vec<phi_info *> work_list; |
440 | hash_set<phi_info *> visited_list; | |
441 | if (!phi) | |
6b6b9c68 | 442 | return hash_set<set_info *> (); |
4f673c5e | 443 | work_list.safe_push (phi); |
9243c3d1 | 444 | |
4f673c5e | 445 | while (!work_list.is_empty ()) |
9243c3d1 | 446 | { |
4f673c5e JZZ |
447 | phi_info *phi = work_list.pop (); |
448 | visited_list.add (phi); | |
449 | for (use_info *use : phi->inputs ()) | |
9243c3d1 | 450 | { |
4f673c5e | 451 | def_info *def = use->def (); |
6b6b9c68 JZZ |
452 | set_info *set = safe_dyn_cast<set_info *> (def); |
453 | if (!set) | |
454 | return hash_set<set_info *> (); | |
9243c3d1 | 455 | |
6b6b9c68 | 456 | gcc_assert (!set->insn ()->is_debug_insn ()); |
9243c3d1 | 457 | |
6b6b9c68 JZZ |
458 | if (insn_should_be_added_p (set->insn (), types)) |
459 | insns.add (set); | |
460 | if (set->insn ()->is_phi ()) | |
4f673c5e | 461 | { |
6b6b9c68 | 462 | phi_info *new_phi = as_a<phi_info *> (set); |
4f673c5e JZZ |
463 | if (!visited_list.contains (new_phi)) |
464 | work_list.safe_push (new_phi); | |
465 | } | |
466 | } | |
9243c3d1 | 467 | } |
4f673c5e JZZ |
468 | return insns; |
469 | } | |
9243c3d1 | 470 | |
6b6b9c68 JZZ |
471 | static hash_set<set_info *> |
472 | get_all_sets (set_info *set, bool /* get_real_inst */ real_p, | |
473 | bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p) | |
474 | { | |
475 | if (real_p && phi_p && param_p) | |
476 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
477 | REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET); | |
478 | ||
479 | else if (real_p && param_p) | |
480 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
481 | REAL_SET | BB_HEAD_SET | BB_END_SET); | |
482 | ||
483 | else if (real_p) | |
484 | return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET); | |
485 | return hash_set<set_info *> (); | |
486 | } | |
487 | ||
488 | /* Helper function to get AVL operand. */ | |
489 | static rtx | |
490 | get_avl (rtx_insn *rinsn) | |
491 | { | |
492 | if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn)) | |
493 | return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0); | |
494 | ||
495 | if (!has_vl_op (rinsn)) | |
496 | return NULL_RTX; | |
497 | if (get_attr_avl_type (rinsn) == VLMAX) | |
498 | return RVV_VLMAX; | |
499 | extract_insn_cached (rinsn); | |
500 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
501 | } | |
502 | ||
503 | static set_info * | |
504 | get_same_bb_set (hash_set<set_info *> &sets, const basic_block cfg_bb) | |
505 | { | |
506 | for (set_info *set : sets) | |
507 | if (set->bb ()->cfg_bb () == cfg_bb) | |
508 | return set; | |
509 | return nullptr; | |
510 | } | |
511 | ||
4f673c5e JZZ |
512 | /* Recursively find all predecessor blocks for cfg_bb. */ |
513 | static hash_set<basic_block> | |
514 | get_all_predecessors (basic_block cfg_bb) | |
515 | { | |
516 | hash_set<basic_block> blocks; | |
517 | auto_vec<basic_block> work_list; | |
518 | hash_set<basic_block> visited_list; | |
519 | work_list.safe_push (cfg_bb); | |
9243c3d1 | 520 | |
4f673c5e | 521 | while (!work_list.is_empty ()) |
9243c3d1 | 522 | { |
4f673c5e JZZ |
523 | basic_block new_cfg_bb = work_list.pop (); |
524 | visited_list.add (new_cfg_bb); | |
525 | edge e; | |
526 | edge_iterator ei; | |
527 | FOR_EACH_EDGE (e, ei, new_cfg_bb->preds) | |
528 | { | |
529 | if (!visited_list.contains (e->src)) | |
530 | work_list.safe_push (e->src); | |
531 | blocks.add (e->src); | |
532 | } | |
9243c3d1 | 533 | } |
4f673c5e JZZ |
534 | return blocks; |
535 | } | |
9243c3d1 | 536 | |
4f673c5e JZZ |
537 | /* Return true if there is an INSN in insns staying in the block BB. */ |
538 | static bool | |
6b6b9c68 | 539 | any_set_in_bb_p (hash_set<set_info *> sets, const bb_info *bb) |
4f673c5e | 540 | { |
6b6b9c68 JZZ |
541 | for (const set_info *set : sets) |
542 | if (set->bb ()->index () == bb->index ()) | |
4f673c5e JZZ |
543 | return true; |
544 | return false; | |
9243c3d1 JZZ |
545 | } |
546 | ||
547 | /* Helper function to get SEW operand. We always have SEW value for | |
548 | all RVV instructions that have VTYPE OP. */ | |
549 | static uint8_t | |
550 | get_sew (rtx_insn *rinsn) | |
551 | { | |
552 | return get_attr_sew (rinsn); | |
553 | } | |
554 | ||
555 | /* Helper function to get VLMUL operand. We always have VLMUL value for | |
556 | all RVV instructions that have VTYPE OP. */ | |
557 | static enum vlmul_type | |
558 | get_vlmul (rtx_insn *rinsn) | |
559 | { | |
560 | return (enum vlmul_type) get_attr_vlmul (rinsn); | |
561 | } | |
562 | ||
563 | /* Get default tail policy. */ | |
564 | static bool | |
565 | get_default_ta () | |
566 | { | |
567 | /* For the instruction that doesn't require TA, we still need a default value | |
568 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
569 | return (bool) (get_prefer_tail_policy () & 0x1 | |
570 | || (get_prefer_tail_policy () >> 1 & 0x1)); | |
571 | } | |
572 | ||
573 | /* Get default mask policy. */ | |
574 | static bool | |
575 | get_default_ma () | |
576 | { | |
577 | /* For the instruction that doesn't require MA, we still need a default value | |
578 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
579 | return (bool) (get_prefer_mask_policy () & 0x1 | |
580 | || (get_prefer_mask_policy () >> 1 & 0x1)); | |
581 | } | |
582 | ||
583 | /* Helper function to get TA operand. */ | |
584 | static bool | |
585 | tail_agnostic_p (rtx_insn *rinsn) | |
586 | { | |
587 | /* If it doesn't have TA, we return agnostic by default. */ | |
588 | extract_insn_cached (rinsn); | |
589 | int ta = get_attr_ta (rinsn); | |
590 | return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta); | |
591 | } | |
592 | ||
593 | /* Helper function to get MA operand. */ | |
594 | static bool | |
595 | mask_agnostic_p (rtx_insn *rinsn) | |
596 | { | |
597 | /* If it doesn't have MA, we return agnostic by default. */ | |
598 | extract_insn_cached (rinsn); | |
599 | int ma = get_attr_ma (rinsn); | |
600 | return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma); | |
601 | } | |
602 | ||
603 | /* Return true if FN has a vector instruction that use VL/VTYPE. */ | |
604 | static bool | |
605 | has_vector_insn (function *fn) | |
606 | { | |
607 | basic_block cfg_bb; | |
608 | rtx_insn *rinsn; | |
609 | FOR_ALL_BB_FN (cfg_bb, fn) | |
610 | FOR_BB_INSNS (cfg_bb, rinsn) | |
611 | if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn)) | |
612 | return true; | |
613 | return false; | |
614 | } | |
615 | ||
616 | /* Emit vsetvl instruction. */ | |
617 | static rtx | |
e577b91b | 618 | gen_vsetvl_pat (enum vsetvl_type insn_type, const vl_vtype_info &info, rtx vl) |
9243c3d1 JZZ |
619 | { |
620 | rtx avl = info.get_avl (); | |
621 | rtx sew = gen_int_mode (info.get_sew (), Pmode); | |
622 | rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode); | |
623 | rtx ta = gen_int_mode (info.get_ta (), Pmode); | |
624 | rtx ma = gen_int_mode (info.get_ma (), Pmode); | |
625 | ||
626 | if (insn_type == VSETVL_NORMAL) | |
627 | { | |
628 | gcc_assert (vl != NULL_RTX); | |
629 | return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma); | |
630 | } | |
631 | else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY) | |
632 | return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma); | |
633 | else | |
634 | return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma); | |
635 | } | |
636 | ||
637 | static rtx | |
e577b91b | 638 | gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info) |
9243c3d1 JZZ |
639 | { |
640 | rtx new_pat; | |
60bd33bc JZZ |
641 | vl_vtype_info new_info = info; |
642 | if (info.get_insn () && info.get_insn ()->rtl () | |
643 | && fault_first_load_p (info.get_insn ()->rtl ())) | |
644 | new_info.set_avl_info ( | |
645 | avl_info (get_avl (info.get_insn ()->rtl ()), nullptr)); | |
9243c3d1 JZZ |
646 | if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ())) |
647 | { | |
648 | rtx dest = get_vl (rinsn); | |
60bd33bc | 649 | new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, dest); |
9243c3d1 JZZ |
650 | } |
651 | else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only) | |
60bd33bc | 652 | new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, new_info, NULL_RTX); |
9243c3d1 | 653 | else |
60bd33bc | 654 | new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX); |
9243c3d1 JZZ |
655 | return new_pat; |
656 | } | |
657 | ||
658 | static void | |
659 | emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type, | |
e577b91b | 660 | const vl_vtype_info &info, rtx vl, rtx_insn *rinsn) |
9243c3d1 JZZ |
661 | { |
662 | rtx pat = gen_vsetvl_pat (insn_type, info, vl); | |
663 | if (dump_file) | |
664 | { | |
665 | fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n"); | |
666 | print_rtl_single (dump_file, pat); | |
667 | } | |
668 | ||
669 | if (emit_type == EMIT_DIRECT) | |
670 | emit_insn (pat); | |
671 | else if (emit_type == EMIT_BEFORE) | |
672 | emit_insn_before (pat, rinsn); | |
673 | else | |
674 | emit_insn_after (pat, rinsn); | |
675 | } | |
676 | ||
677 | static void | |
678 | eliminate_insn (rtx_insn *rinsn) | |
679 | { | |
680 | if (dump_file) | |
681 | { | |
682 | fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn)); | |
683 | print_rtl_single (dump_file, rinsn); | |
684 | } | |
685 | if (in_sequence_p ()) | |
686 | remove_insn (rinsn); | |
687 | else | |
688 | delete_insn (rinsn); | |
689 | } | |
690 | ||
a481eed8 | 691 | static vsetvl_type |
9243c3d1 JZZ |
692 | insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn, |
693 | const vector_insn_info &info, const vector_insn_info &prev_info) | |
694 | { | |
695 | /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same | |
696 | VLMAX. */ | |
697 | if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p () | |
4f673c5e | 698 | && info.compatible_avl_p (prev_info) && info.same_vlmax_p (prev_info)) |
9243c3d1 JZZ |
699 | { |
700 | emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX, | |
701 | rinsn); | |
a481eed8 | 702 | return VSETVL_VTYPE_CHANGE_ONLY; |
9243c3d1 JZZ |
703 | } |
704 | ||
705 | if (info.has_avl_imm ()) | |
706 | { | |
707 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, | |
708 | rinsn); | |
a481eed8 | 709 | return VSETVL_DISCARD_RESULT; |
9243c3d1 JZZ |
710 | } |
711 | ||
712 | if (info.has_avl_no_reg ()) | |
713 | { | |
714 | /* We can only use x0, x0 if there's no chance of the vtype change causing | |
715 | the previous vl to become invalid. */ | |
716 | if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p () | |
717 | && info.same_vlmax_p (prev_info)) | |
718 | { | |
719 | emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX, | |
720 | rinsn); | |
a481eed8 | 721 | return VSETVL_VTYPE_CHANGE_ONLY; |
9243c3d1 JZZ |
722 | } |
723 | /* Otherwise use an AVL of 0 to avoid depending on previous vl. */ | |
724 | vl_vtype_info new_info = info; | |
725 | new_info.set_avl_info (avl_info (const0_rtx, nullptr)); | |
726 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX, | |
727 | rinsn); | |
a481eed8 | 728 | return VSETVL_DISCARD_RESULT; |
9243c3d1 JZZ |
729 | } |
730 | ||
731 | /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the | |
732 | opcode if the AVLReg is X0 as they have different register classes for | |
733 | the AVL operand. */ | |
734 | if (vlmax_avl_p (info.get_avl ())) | |
735 | { | |
736 | gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn)); | |
ec99ffab | 737 | rtx vl_op = info.get_avl_reg_rtx (); |
9243c3d1 JZZ |
738 | gcc_assert (!vlmax_avl_p (vl_op)); |
739 | emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn); | |
a481eed8 | 740 | return VSETVL_NORMAL; |
9243c3d1 JZZ |
741 | } |
742 | ||
743 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn); | |
744 | ||
745 | if (dump_file) | |
746 | { | |
747 | fprintf (dump_file, "Update VL/VTYPE info, previous info="); | |
748 | prev_info.dump (dump_file); | |
749 | } | |
a481eed8 | 750 | return VSETVL_DISCARD_RESULT; |
9243c3d1 JZZ |
751 | } |
752 | ||
753 | /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them | |
754 | to INSN. If such notes are added to an insn which references a | |
755 | CODE_LABEL, the LABEL_NUSES count is incremented. We have to add | |
756 | that note, because the following loop optimization pass requires | |
757 | them. */ | |
758 | ||
759 | /* ??? If there was a jump optimization pass after gcse and before loop, | |
760 | then we would not need to do this here, because jump would add the | |
761 | necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */ | |
762 | ||
763 | static void | |
27a2a4b6 | 764 | add_label_notes (rtx x, rtx_insn *rinsn) |
9243c3d1 JZZ |
765 | { |
766 | enum rtx_code code = GET_CODE (x); | |
767 | int i, j; | |
768 | const char *fmt; | |
769 | ||
770 | if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x)) | |
771 | { | |
772 | /* This code used to ignore labels that referred to dispatch tables to | |
773 | avoid flow generating (slightly) worse code. | |
774 | ||
775 | We no longer ignore such label references (see LABEL_REF handling in | |
776 | mark_jump_label for additional information). */ | |
777 | ||
778 | /* There's no reason for current users to emit jump-insns with | |
779 | such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET | |
780 | notes. */ | |
27a2a4b6 JZZ |
781 | gcc_assert (!JUMP_P (rinsn)); |
782 | add_reg_note (rinsn, REG_LABEL_OPERAND, label_ref_label (x)); | |
9243c3d1 JZZ |
783 | |
784 | if (LABEL_P (label_ref_label (x))) | |
785 | LABEL_NUSES (label_ref_label (x))++; | |
786 | ||
787 | return; | |
788 | } | |
789 | ||
790 | for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) | |
791 | { | |
792 | if (fmt[i] == 'e') | |
27a2a4b6 | 793 | add_label_notes (XEXP (x, i), rinsn); |
9243c3d1 JZZ |
794 | else if (fmt[i] == 'E') |
795 | for (j = XVECLEN (x, i) - 1; j >= 0; j--) | |
27a2a4b6 | 796 | add_label_notes (XVECEXP (x, i, j), rinsn); |
9243c3d1 JZZ |
797 | } |
798 | } | |
799 | ||
800 | /* Add EXPR to the end of basic block BB. | |
801 | ||
802 | This is used by both the PRE and code hoisting. */ | |
803 | ||
804 | static void | |
805 | insert_insn_end_basic_block (rtx_insn *rinsn, basic_block cfg_bb) | |
806 | { | |
807 | rtx_insn *end_rinsn = BB_END (cfg_bb); | |
808 | rtx_insn *new_insn; | |
809 | rtx_insn *pat, *pat_end; | |
810 | ||
811 | pat = rinsn; | |
812 | gcc_assert (pat && INSN_P (pat)); | |
813 | ||
814 | pat_end = pat; | |
815 | while (NEXT_INSN (pat_end) != NULL_RTX) | |
816 | pat_end = NEXT_INSN (pat_end); | |
817 | ||
818 | /* If the last end_rinsn is a jump, insert EXPR in front. Similarly we need | |
819 | to take care of trapping instructions in presence of non-call exceptions. | |
820 | */ | |
821 | ||
822 | if (JUMP_P (end_rinsn) | |
823 | || (NONJUMP_INSN_P (end_rinsn) | |
824 | && (!single_succ_p (cfg_bb) | |
825 | || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL))) | |
826 | { | |
827 | /* FIXME: What if something in jump uses value set in new end_rinsn? */ | |
828 | new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb); | |
829 | } | |
830 | ||
831 | /* Likewise if the last end_rinsn is a call, as will happen in the presence | |
832 | of exception handling. */ | |
833 | else if (CALL_P (end_rinsn) | |
834 | && (!single_succ_p (cfg_bb) | |
835 | || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL)) | |
836 | { | |
837 | /* Keeping in mind targets with small register classes and parameters | |
838 | in registers, we search backward and place the instructions before | |
839 | the first parameter is loaded. Do this for everyone for consistency | |
840 | and a presumption that we'll get better code elsewhere as well. */ | |
841 | ||
842 | /* Since different machines initialize their parameter registers | |
843 | in different orders, assume nothing. Collect the set of all | |
844 | parameter registers. */ | |
845 | end_rinsn = find_first_parameter_load (end_rinsn, BB_HEAD (cfg_bb)); | |
846 | ||
847 | /* If we found all the parameter loads, then we want to insert | |
848 | before the first parameter load. | |
849 | ||
850 | If we did not find all the parameter loads, then we might have | |
851 | stopped on the head of the block, which could be a CODE_LABEL. | |
852 | If we inserted before the CODE_LABEL, then we would be putting | |
853 | the end_rinsn in the wrong basic block. In that case, put the | |
854 | end_rinsn after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. | |
855 | */ | |
856 | while (LABEL_P (end_rinsn) || NOTE_INSN_BASIC_BLOCK_P (end_rinsn)) | |
857 | end_rinsn = NEXT_INSN (end_rinsn); | |
858 | ||
859 | new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb); | |
860 | } | |
861 | else | |
862 | new_insn = emit_insn_after_noloc (pat, end_rinsn, cfg_bb); | |
863 | ||
864 | while (1) | |
865 | { | |
866 | if (INSN_P (pat)) | |
867 | add_label_notes (PATTERN (pat), new_insn); | |
868 | if (pat == pat_end) | |
869 | break; | |
870 | pat = NEXT_INSN (pat); | |
871 | } | |
872 | } | |
873 | ||
874 | /* Get VL/VTYPE information for INSN. */ | |
875 | static vl_vtype_info | |
876 | get_vl_vtype_info (const insn_info *insn) | |
877 | { | |
9243c3d1 JZZ |
878 | set_info *set = nullptr; |
879 | rtx avl = ::get_avl (insn->rtl ()); | |
ec99ffab JZZ |
880 | if (avl && REG_P (avl)) |
881 | { | |
882 | if (vlmax_avl_p (avl) && has_vl_op (insn->rtl ())) | |
883 | set | |
884 | = find_access (insn->uses (), REGNO (get_vl (insn->rtl ())))->def (); | |
885 | else if (!vlmax_avl_p (avl)) | |
886 | set = find_access (insn->uses (), REGNO (avl))->def (); | |
887 | else | |
888 | set = nullptr; | |
889 | } | |
9243c3d1 JZZ |
890 | |
891 | uint8_t sew = get_sew (insn->rtl ()); | |
892 | enum vlmul_type vlmul = get_vlmul (insn->rtl ()); | |
893 | uint8_t ratio = get_attr_ratio (insn->rtl ()); | |
894 | /* when get_attr_ratio is invalid, this kind of instructions | |
895 | doesn't care about ratio. However, we still need this value | |
896 | in demand info backward analysis. */ | |
897 | if (ratio == INVALID_ATTRIBUTE) | |
898 | ratio = calculate_ratio (sew, vlmul); | |
899 | bool ta = tail_agnostic_p (insn->rtl ()); | |
900 | bool ma = mask_agnostic_p (insn->rtl ()); | |
901 | ||
902 | /* If merge operand is undef value, we prefer agnostic. */ | |
903 | int merge_op_idx = get_attr_merge_op_idx (insn->rtl ()); | |
904 | if (merge_op_idx != INVALID_ATTRIBUTE | |
905 | && satisfies_constraint_vu (recog_data.operand[merge_op_idx])) | |
906 | { | |
907 | ta = true; | |
908 | ma = true; | |
909 | } | |
910 | ||
911 | vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma); | |
912 | return info; | |
913 | } | |
914 | ||
915 | static void | |
916 | change_insn (rtx_insn *rinsn, rtx new_pat) | |
917 | { | |
918 | /* We don't apply change on RTL_SSA here since it's possible a | |
919 | new INSN we add in the PASS before which doesn't have RTL_SSA | |
920 | info yet.*/ | |
921 | if (dump_file) | |
922 | { | |
923 | fprintf (dump_file, "\nChange PATTERN of insn %d from:\n", | |
924 | INSN_UID (rinsn)); | |
925 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
926 | } | |
927 | ||
011ba384 | 928 | validate_change (rinsn, &PATTERN (rinsn), new_pat, false); |
9243c3d1 JZZ |
929 | |
930 | if (dump_file) | |
931 | { | |
932 | fprintf (dump_file, "\nto:\n"); | |
933 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
934 | } | |
935 | } | |
936 | ||
60bd33bc JZZ |
937 | static const insn_info * |
938 | get_forward_read_vl_insn (const insn_info *insn) | |
939 | { | |
940 | const bb_info *bb = insn->bb (); | |
941 | for (const insn_info *i = insn->next_nondebug_insn (); | |
942 | real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ()) | |
943 | { | |
944 | if (find_access (i->defs (), VL_REGNUM)) | |
945 | return nullptr; | |
946 | if (read_vl_insn_p (i->rtl ())) | |
947 | return i; | |
948 | } | |
949 | return nullptr; | |
950 | } | |
951 | ||
952 | static const insn_info * | |
953 | get_backward_fault_first_load_insn (const insn_info *insn) | |
954 | { | |
955 | const bb_info *bb = insn->bb (); | |
956 | for (const insn_info *i = insn->prev_nondebug_insn (); | |
957 | real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ()) | |
958 | { | |
959 | if (fault_first_load_p (i->rtl ())) | |
960 | return i; | |
961 | if (find_access (i->defs (), VL_REGNUM)) | |
962 | return nullptr; | |
963 | } | |
964 | return nullptr; | |
965 | } | |
966 | ||
9243c3d1 JZZ |
967 | static bool |
968 | change_insn (function_info *ssa, insn_change change, insn_info *insn, | |
969 | rtx new_pat) | |
970 | { | |
971 | rtx_insn *rinsn = insn->rtl (); | |
972 | auto attempt = ssa->new_change_attempt (); | |
973 | if (!restrict_movement (change)) | |
974 | return false; | |
975 | ||
976 | if (dump_file) | |
977 | { | |
978 | fprintf (dump_file, "\nChange PATTERN of insn %d from:\n", | |
979 | INSN_UID (rinsn)); | |
980 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
9243c3d1 JZZ |
981 | } |
982 | ||
983 | insn_change_watermark watermark; | |
984 | validate_change (rinsn, &PATTERN (rinsn), new_pat, true); | |
985 | ||
986 | /* These routines report failures themselves. */ | |
987 | if (!recog (attempt, change) || !change_is_worthwhile (change, false)) | |
988 | return false; | |
a1e42094 JZZ |
989 | |
990 | /* Fix bug: | |
991 | (insn 12 34 13 2 (set (reg:VNx8DI 120 v24 [orig:134 _1 ] [134]) | |
992 | (if_then_else:VNx8DI (unspec:VNx8BI [ | |
993 | (const_vector:VNx8BI repeat [ | |
994 | (const_int 1 [0x1]) | |
995 | ]) | |
996 | (const_int 0 [0]) | |
997 | (const_int 2 [0x2]) repeated x2 | |
998 | (const_int 0 [0]) | |
999 | (reg:SI 66 vl) | |
1000 | (reg:SI 67 vtype) | |
1001 | ] UNSPEC_VPREDICATE) | |
1002 | (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137]) | |
1003 | (sign_extend:VNx8DI (vec_duplicate:VNx8SI (reg:SI 15 a5 | |
1004 | [140])))) (unspec:VNx8DI [ (const_int 0 [0]) ] UNSPEC_VUNDEF))) "rvv.c":8:12 | |
1005 | 2784 {pred_single_widen_addsvnx8di_scalar} (expr_list:REG_EQUIV | |
1006 | (mem/c:VNx8DI (reg:DI 10 a0 [142]) [1 <retval>+0 S[64, 64] A128]) | |
1007 | (expr_list:REG_EQUAL (if_then_else:VNx8DI (unspec:VNx8BI [ | |
1008 | (const_vector:VNx8BI repeat [ | |
1009 | (const_int 1 [0x1]) | |
1010 | ]) | |
1011 | (reg/v:DI 13 a3 [orig:139 vl ] [139]) | |
1012 | (const_int 2 [0x2]) repeated x2 | |
1013 | (const_int 0 [0]) | |
1014 | (reg:SI 66 vl) | |
1015 | (reg:SI 67 vtype) | |
1016 | ] UNSPEC_VPREDICATE) | |
1017 | (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137]) | |
1018 | (const_vector:VNx8DI repeat [ | |
1019 | (const_int 2730 [0xaaa]) | |
1020 | ])) | |
1021 | (unspec:VNx8DI [ | |
1022 | (const_int 0 [0]) | |
1023 | ] UNSPEC_VUNDEF)) | |
1024 | (nil)))) | |
1025 | Here we want to remove use "a3". However, the REG_EQUAL/REG_EQUIV note use | |
1026 | "a3" which made us fail in change_insn. We reference to the | |
1027 | 'aarch64-cc-fusion.cc' and add this method. */ | |
1028 | remove_reg_equal_equiv_notes (rinsn); | |
9243c3d1 JZZ |
1029 | confirm_change_group (); |
1030 | ssa->change_insn (change); | |
1031 | ||
1032 | if (dump_file) | |
1033 | { | |
1034 | fprintf (dump_file, "\nto:\n"); | |
1035 | print_rtl_single (dump_file, PATTERN (rinsn)); | |
9243c3d1 JZZ |
1036 | } |
1037 | return true; | |
1038 | } | |
1039 | ||
aef20243 JZZ |
1040 | static void |
1041 | change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info) | |
1042 | { | |
1043 | rtx_insn *rinsn; | |
1044 | if (vector_config_insn_p (insn->rtl ())) | |
1045 | { | |
1046 | rinsn = insn->rtl (); | |
1047 | gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet"); | |
1048 | } | |
1049 | else | |
1050 | { | |
1051 | gcc_assert (has_vtype_op (insn->rtl ())); | |
1052 | rinsn = PREV_INSN (insn->rtl ()); | |
1053 | gcc_assert (vector_config_insn_p (rinsn)); | |
1054 | } | |
1055 | rtx new_pat = gen_vsetvl_pat (rinsn, info); | |
1056 | change_insn (rinsn, new_pat); | |
1057 | } | |
1058 | ||
d51f2456 JZ |
1059 | static void |
1060 | local_eliminate_vsetvl_insn (const vector_insn_info &dem) | |
1061 | { | |
1062 | const insn_info *insn = dem.get_insn (); | |
1063 | if (!insn || insn->is_artificial ()) | |
1064 | return; | |
1065 | rtx_insn *rinsn = insn->rtl (); | |
1066 | const bb_info *bb = insn->bb (); | |
1067 | if (vsetvl_insn_p (rinsn)) | |
1068 | { | |
1069 | rtx vl = get_vl (rinsn); | |
1070 | for (insn_info *i = insn->next_nondebug_insn (); | |
1071 | real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ()) | |
1072 | { | |
1073 | if (i->is_call () || i->is_asm () | |
1074 | || find_access (i->defs (), VL_REGNUM) | |
1075 | || find_access (i->defs (), VTYPE_REGNUM)) | |
1076 | return; | |
1077 | ||
1078 | if (has_vtype_op (i->rtl ())) | |
1079 | { | |
1080 | if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ()))) | |
1081 | return; | |
1082 | rtx avl = get_avl (i->rtl ()); | |
1083 | if (avl != vl) | |
1084 | return; | |
1085 | set_info *def = find_access (i->uses (), REGNO (avl))->def (); | |
1086 | if (def->insn () != insn) | |
1087 | return; | |
1088 | ||
1089 | vector_insn_info new_info; | |
1090 | new_info.parse_insn (i); | |
1091 | if (!new_info.skip_avl_compatible_p (dem)) | |
1092 | return; | |
1093 | ||
1094 | new_info.set_avl_info (dem.get_avl_info ()); | |
1095 | new_info = dem.merge (new_info, LOCAL_MERGE); | |
1096 | change_vsetvl_insn (insn, new_info); | |
1097 | eliminate_insn (PREV_INSN (i->rtl ())); | |
1098 | return; | |
1099 | } | |
1100 | } | |
1101 | } | |
1102 | } | |
1103 | ||
4f673c5e | 1104 | static bool |
6b6b9c68 | 1105 | source_equal_p (insn_info *insn1, insn_info *insn2) |
4f673c5e | 1106 | { |
6b6b9c68 JZZ |
1107 | if (!insn1 || !insn2) |
1108 | return false; | |
1109 | rtx_insn *rinsn1 = insn1->rtl (); | |
1110 | rtx_insn *rinsn2 = insn2->rtl (); | |
4f673c5e JZZ |
1111 | if (!rinsn1 || !rinsn2) |
1112 | return false; | |
1113 | rtx note1 = find_reg_equal_equiv_note (rinsn1); | |
1114 | rtx note2 = find_reg_equal_equiv_note (rinsn2); | |
1115 | rtx single_set1 = single_set (rinsn1); | |
1116 | rtx single_set2 = single_set (rinsn2); | |
60bd33bc JZZ |
1117 | if (read_vl_insn_p (rinsn1) && read_vl_insn_p (rinsn2)) |
1118 | { | |
1119 | const insn_info *load1 = get_backward_fault_first_load_insn (insn1); | |
1120 | const insn_info *load2 = get_backward_fault_first_load_insn (insn2); | |
1121 | return load1 && load2 && load1 == load2; | |
1122 | } | |
4f673c5e JZZ |
1123 | |
1124 | if (note1 && note2 && rtx_equal_p (note1, note2)) | |
1125 | return true; | |
6b6b9c68 JZZ |
1126 | |
1127 | /* Since vsetvl instruction is not single SET. | |
1128 | We handle this case specially here. */ | |
1129 | if (vsetvl_insn_p (insn1->rtl ()) && vsetvl_insn_p (insn2->rtl ())) | |
1130 | { | |
1131 | /* For example: | |
1132 | vsetvl1 a6,a5,e32m1 | |
1133 | RVV 1 (use a6 as AVL) | |
1134 | vsetvl2 a5,a5,e8mf4 | |
1135 | RVV 2 (use a5 as AVL) | |
1136 | We consider AVL of RVV 1 and RVV 2 are same so that we can | |
1137 | gain more optimization opportunities. | |
1138 | ||
1139 | Note: insn1_info.compatible_avl_p (insn2_info) | |
1140 | will make sure there is no instruction between vsetvl1 and vsetvl2 | |
1141 | modify a5 since their def will be different if there is instruction | |
1142 | modify a5 and compatible_avl_p will return false. */ | |
1143 | vector_insn_info insn1_info, insn2_info; | |
1144 | insn1_info.parse_insn (insn1); | |
1145 | insn2_info.parse_insn (insn2); | |
1146 | if (insn1_info.same_vlmax_p (insn2_info) | |
1147 | && insn1_info.compatible_avl_p (insn2_info)) | |
1148 | return true; | |
1149 | } | |
1150 | ||
1151 | /* We only handle AVL is set by instructions with no side effects. */ | |
1152 | if (!single_set1 || !single_set2) | |
1153 | return false; | |
1154 | if (!rtx_equal_p (SET_SRC (single_set1), SET_SRC (single_set2))) | |
1155 | return false; | |
1156 | gcc_assert (insn1->uses ().size () == insn2->uses ().size ()); | |
1157 | for (size_t i = 0; i < insn1->uses ().size (); i++) | |
1158 | if (insn1->uses ()[i] != insn2->uses ()[i]) | |
1159 | return false; | |
1160 | return true; | |
4f673c5e JZZ |
1161 | } |
1162 | ||
1163 | /* Helper function to get single same real RTL source. | |
1164 | return NULL if it is not a single real RTL source. */ | |
6b6b9c68 | 1165 | static insn_info * |
4f673c5e JZZ |
1166 | extract_single_source (set_info *set) |
1167 | { | |
1168 | if (!set) | |
1169 | return nullptr; | |
1170 | if (set->insn ()->is_real ()) | |
6b6b9c68 | 1171 | return set->insn (); |
4f673c5e JZZ |
1172 | if (!set->insn ()->is_phi ()) |
1173 | return nullptr; | |
6b6b9c68 | 1174 | hash_set<set_info *> sets = get_all_sets (set, true, false, true); |
4f673c5e | 1175 | |
6b6b9c68 | 1176 | insn_info *first_insn = (*sets.begin ())->insn (); |
4f673c5e JZZ |
1177 | if (first_insn->is_artificial ()) |
1178 | return nullptr; | |
6b6b9c68 | 1179 | for (const set_info *set : sets) |
4f673c5e JZZ |
1180 | { |
1181 | /* If there is a head or end insn, we conservative return | |
1182 | NULL so that VSETVL PASS will insert vsetvl directly. */ | |
6b6b9c68 | 1183 | if (set->insn ()->is_artificial ()) |
4f673c5e | 1184 | return nullptr; |
6b6b9c68 | 1185 | if (!source_equal_p (set->insn (), first_insn)) |
4f673c5e JZZ |
1186 | return nullptr; |
1187 | } | |
1188 | ||
6b6b9c68 | 1189 | return first_insn; |
4f673c5e JZZ |
1190 | } |
1191 | ||
ec99ffab JZZ |
1192 | static unsigned |
1193 | calculate_sew (vlmul_type vlmul, unsigned int ratio) | |
1194 | { | |
1195 | for (const unsigned sew : ALL_SEW) | |
1196 | if (calculate_ratio (sew, vlmul) == ratio) | |
1197 | return sew; | |
1198 | return 0; | |
1199 | } | |
1200 | ||
1201 | static vlmul_type | |
1202 | calculate_vlmul (unsigned int sew, unsigned int ratio) | |
1203 | { | |
1204 | for (const vlmul_type vlmul : ALL_LMUL) | |
1205 | if (calculate_ratio (sew, vlmul) == ratio) | |
1206 | return vlmul; | |
1207 | return LMUL_RESERVED; | |
1208 | } | |
1209 | ||
1210 | static bool | |
1211 | incompatible_avl_p (const vector_insn_info &info1, | |
1212 | const vector_insn_info &info2) | |
1213 | { | |
1214 | return !info1.compatible_avl_p (info2) && !info2.compatible_avl_p (info1); | |
1215 | } | |
1216 | ||
1217 | static bool | |
1218 | different_sew_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1219 | { | |
1220 | return info1.get_sew () != info2.get_sew (); | |
1221 | } | |
1222 | ||
1223 | static bool | |
1224 | different_lmul_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1225 | { | |
1226 | return info1.get_vlmul () != info2.get_vlmul (); | |
1227 | } | |
1228 | ||
1229 | static bool | |
1230 | different_ratio_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1231 | { | |
1232 | return info1.get_ratio () != info2.get_ratio (); | |
1233 | } | |
1234 | ||
1235 | static bool | |
1236 | different_tail_policy_p (const vector_insn_info &info1, | |
1237 | const vector_insn_info &info2) | |
1238 | { | |
1239 | return info1.get_ta () != info2.get_ta (); | |
1240 | } | |
1241 | ||
1242 | static bool | |
1243 | different_mask_policy_p (const vector_insn_info &info1, | |
1244 | const vector_insn_info &info2) | |
1245 | { | |
1246 | return info1.get_ma () != info2.get_ma (); | |
1247 | } | |
1248 | ||
1249 | static bool | |
1250 | possible_zero_avl_p (const vector_insn_info &info1, | |
1251 | const vector_insn_info &info2) | |
1252 | { | |
1253 | return !info1.has_non_zero_avl () || !info2.has_non_zero_avl (); | |
1254 | } | |
1255 | ||
ec99ffab JZZ |
1256 | static bool |
1257 | second_ratio_invalid_for_first_sew_p (const vector_insn_info &info1, | |
1258 | const vector_insn_info &info2) | |
1259 | { | |
1260 | return calculate_vlmul (info1.get_sew (), info2.get_ratio ()) | |
1261 | == LMUL_RESERVED; | |
1262 | } | |
1263 | ||
1264 | static bool | |
1265 | second_ratio_invalid_for_first_lmul_p (const vector_insn_info &info1, | |
1266 | const vector_insn_info &info2) | |
1267 | { | |
1268 | return calculate_sew (info1.get_vlmul (), info2.get_ratio ()) == 0; | |
1269 | } | |
1270 | ||
1271 | static bool | |
1272 | second_sew_less_than_first_sew_p (const vector_insn_info &info1, | |
1273 | const vector_insn_info &info2) | |
1274 | { | |
1275 | return info2.get_sew () < info1.get_sew (); | |
1276 | } | |
1277 | ||
1278 | static bool | |
1279 | first_sew_less_than_second_sew_p (const vector_insn_info &info1, | |
1280 | const vector_insn_info &info2) | |
1281 | { | |
1282 | return info1.get_sew () < info2.get_sew (); | |
1283 | } | |
1284 | ||
1285 | /* return 0 if LMUL1 == LMUL2. | |
1286 | return -1 if LMUL1 < LMUL2. | |
1287 | return 1 if LMUL1 > LMUL2. */ | |
1288 | static int | |
1289 | compare_lmul (vlmul_type vlmul1, vlmul_type vlmul2) | |
1290 | { | |
1291 | if (vlmul1 == vlmul2) | |
1292 | return 0; | |
1293 | ||
1294 | switch (vlmul1) | |
1295 | { | |
1296 | case LMUL_1: | |
1297 | if (vlmul2 == LMUL_2 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8) | |
1298 | return 1; | |
1299 | else | |
1300 | return -1; | |
1301 | case LMUL_2: | |
1302 | if (vlmul2 == LMUL_4 || vlmul2 == LMUL_8) | |
1303 | return 1; | |
1304 | else | |
1305 | return -1; | |
1306 | case LMUL_4: | |
1307 | if (vlmul2 == LMUL_8) | |
1308 | return 1; | |
1309 | else | |
1310 | return -1; | |
1311 | case LMUL_8: | |
1312 | return -1; | |
1313 | case LMUL_F2: | |
1314 | if (vlmul2 == LMUL_1 || vlmul2 == LMUL_2 || vlmul2 == LMUL_4 | |
1315 | || vlmul2 == LMUL_8) | |
1316 | return 1; | |
1317 | else | |
1318 | return -1; | |
1319 | case LMUL_F4: | |
1320 | if (vlmul2 == LMUL_F2 || vlmul2 == LMUL_1 || vlmul2 == LMUL_2 | |
1321 | || vlmul2 == LMUL_4 || vlmul2 == LMUL_8) | |
1322 | return 1; | |
1323 | else | |
1324 | return -1; | |
1325 | case LMUL_F8: | |
1326 | return 0; | |
1327 | default: | |
1328 | gcc_unreachable (); | |
1329 | } | |
1330 | } | |
1331 | ||
1332 | static bool | |
1333 | second_lmul_less_than_first_lmul_p (const vector_insn_info &info1, | |
1334 | const vector_insn_info &info2) | |
1335 | { | |
1336 | return compare_lmul (info2.get_vlmul (), info1.get_vlmul ()) == -1; | |
1337 | } | |
1338 | ||
ec99ffab JZZ |
1339 | static bool |
1340 | second_ratio_less_than_first_ratio_p (const vector_insn_info &info1, | |
1341 | const vector_insn_info &info2) | |
1342 | { | |
1343 | return info2.get_ratio () < info1.get_ratio (); | |
1344 | } | |
1345 | ||
1346 | static CONSTEXPR const demands_cond incompatible_conds[] = { | |
1347 | #define DEF_INCOMPATIBLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, \ | |
1348 | GE_SEW1, TAIL_POLICTY1, MASK_POLICY1, AVL2, \ | |
1349 | SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, \ | |
1350 | TAIL_POLICTY2, MASK_POLICY2, COND) \ | |
1351 | {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \ | |
1352 | MASK_POLICY1}, \ | |
1353 | {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \ | |
1354 | MASK_POLICY2}}, \ | |
1355 | COND}, | |
1356 | #include "riscv-vsetvl.def" | |
1357 | }; | |
1358 | ||
1359 | static unsigned | |
1360 | greatest_sew (const vector_insn_info &info1, const vector_insn_info &info2) | |
1361 | { | |
1362 | return std::max (info1.get_sew (), info2.get_sew ()); | |
1363 | } | |
1364 | ||
1365 | static unsigned | |
1366 | first_sew (const vector_insn_info &info1, const vector_insn_info &) | |
1367 | { | |
1368 | return info1.get_sew (); | |
1369 | } | |
1370 | ||
1371 | static unsigned | |
1372 | second_sew (const vector_insn_info &, const vector_insn_info &info2) | |
1373 | { | |
1374 | return info2.get_sew (); | |
1375 | } | |
1376 | ||
1377 | static vlmul_type | |
1378 | first_vlmul (const vector_insn_info &info1, const vector_insn_info &) | |
1379 | { | |
1380 | return info1.get_vlmul (); | |
1381 | } | |
1382 | ||
1383 | static vlmul_type | |
1384 | second_vlmul (const vector_insn_info &, const vector_insn_info &info2) | |
1385 | { | |
1386 | return info2.get_vlmul (); | |
1387 | } | |
1388 | ||
1389 | static unsigned | |
1390 | first_ratio (const vector_insn_info &info1, const vector_insn_info &) | |
1391 | { | |
1392 | return info1.get_ratio (); | |
1393 | } | |
1394 | ||
1395 | static unsigned | |
1396 | second_ratio (const vector_insn_info &, const vector_insn_info &info2) | |
1397 | { | |
1398 | return info2.get_ratio (); | |
1399 | } | |
1400 | ||
1401 | static vlmul_type | |
1402 | vlmul_for_first_sew_second_ratio (const vector_insn_info &info1, | |
1403 | const vector_insn_info &info2) | |
1404 | { | |
1405 | return calculate_vlmul (info1.get_sew (), info2.get_ratio ()); | |
1406 | } | |
1407 | ||
1408 | static unsigned | |
1409 | ratio_for_second_sew_first_vlmul (const vector_insn_info &info1, | |
1410 | const vector_insn_info &info2) | |
1411 | { | |
1412 | return calculate_ratio (info2.get_sew (), info1.get_vlmul ()); | |
1413 | } | |
1414 | ||
1415 | static CONSTEXPR const demands_fuse_rule fuse_rules[] = { | |
1416 | #define DEF_SEW_LMUL_FUSE_RULE(DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, \ | |
1417 | DEMAND_GE_SEW1, DEMAND_SEW2, DEMAND_LMUL2, \ | |
1418 | DEMAND_RATIO2, DEMAND_GE_SEW2, NEW_DEMAND_SEW, \ | |
1419 | NEW_DEMAND_LMUL, NEW_DEMAND_RATIO, \ | |
1420 | NEW_DEMAND_GE_SEW, NEW_SEW, NEW_VLMUL, \ | |
1421 | NEW_RATIO) \ | |
1422 | {{{DEMAND_ANY, DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, DEMAND_ANY, \ | |
1423 | DEMAND_GE_SEW1, DEMAND_ANY, DEMAND_ANY}, \ | |
1424 | {DEMAND_ANY, DEMAND_SEW2, DEMAND_LMUL2, DEMAND_RATIO2, DEMAND_ANY, \ | |
1425 | DEMAND_GE_SEW2, DEMAND_ANY, DEMAND_ANY}}, \ | |
1426 | NEW_DEMAND_SEW, \ | |
1427 | NEW_DEMAND_LMUL, \ | |
1428 | NEW_DEMAND_RATIO, \ | |
1429 | NEW_DEMAND_GE_SEW, \ | |
1430 | NEW_SEW, \ | |
1431 | NEW_VLMUL, \ | |
1432 | NEW_RATIO}, | |
1433 | #include "riscv-vsetvl.def" | |
1434 | }; | |
1435 | ||
1436 | static bool | |
1437 | always_unavailable (const vector_insn_info &, const vector_insn_info &) | |
1438 | { | |
1439 | return true; | |
1440 | } | |
1441 | ||
1442 | static bool | |
1443 | avl_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1444 | { | |
1445 | return !info2.compatible_avl_p (info1.get_avl_info ()); | |
1446 | } | |
1447 | ||
1448 | static bool | |
1449 | sew_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2) | |
1450 | { | |
1451 | if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO)) | |
1452 | { | |
1453 | if (info2.demand_p (DEMAND_GE_SEW)) | |
1454 | return info1.get_sew () < info2.get_sew (); | |
1455 | return info1.get_sew () != info2.get_sew (); | |
1456 | } | |
1457 | return true; | |
1458 | } | |
1459 | ||
1460 | static bool | |
1461 | lmul_unavailable_p (const vector_insn_info &info1, | |
1462 | const vector_insn_info &info2) | |
1463 | { | |
1464 | if (info1.get_vlmul () == info2.get_vlmul () && !info2.demand_p (DEMAND_SEW) | |
1465 | && !info2.demand_p (DEMAND_RATIO)) | |
1466 | return false; | |
1467 | return true; | |
1468 | } | |
1469 | ||
1470 | static bool | |
1471 | ge_sew_unavailable_p (const vector_insn_info &info1, | |
1472 | const vector_insn_info &info2) | |
1473 | { | |
1474 | if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO) | |
1475 | && info2.demand_p (DEMAND_GE_SEW)) | |
1476 | return info1.get_sew () < info2.get_sew (); | |
1477 | return true; | |
1478 | } | |
1479 | ||
1480 | static bool | |
1481 | ge_sew_lmul_unavailable_p (const vector_insn_info &info1, | |
1482 | const vector_insn_info &info2) | |
1483 | { | |
1484 | if (!info2.demand_p (DEMAND_RATIO) && info2.demand_p (DEMAND_GE_SEW)) | |
1485 | return info1.get_sew () < info2.get_sew (); | |
1486 | return true; | |
1487 | } | |
1488 | ||
1489 | static bool | |
1490 | ge_sew_ratio_unavailable_p (const vector_insn_info &info1, | |
1491 | const vector_insn_info &info2) | |
1492 | { | |
1493 | if (!info2.demand_p (DEMAND_LMUL) && info2.demand_p (DEMAND_GE_SEW)) | |
1494 | return info1.get_sew () < info2.get_sew (); | |
1495 | return true; | |
1496 | } | |
1497 | ||
1498 | static CONSTEXPR const demands_cond unavailable_conds[] = { | |
1499 | #define DEF_UNAVAILABLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, \ | |
1500 | TAIL_POLICTY1, MASK_POLICY1, AVL2, SEW2, LMUL2, \ | |
1501 | RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \ | |
1502 | MASK_POLICY2, COND) \ | |
1503 | {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \ | |
1504 | MASK_POLICY1}, \ | |
1505 | {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \ | |
1506 | MASK_POLICY2}}, \ | |
1507 | COND}, | |
1508 | #include "riscv-vsetvl.def" | |
1509 | }; | |
1510 | ||
1511 | static bool | |
1512 | same_sew_lmul_demand_p (const bool *dems1, const bool *dems2) | |
1513 | { | |
1514 | return dems1[DEMAND_SEW] == dems2[DEMAND_SEW] | |
1515 | && dems1[DEMAND_LMUL] == dems2[DEMAND_LMUL] | |
1516 | && dems1[DEMAND_RATIO] == dems2[DEMAND_RATIO] && !dems1[DEMAND_GE_SEW] | |
1517 | && !dems2[DEMAND_GE_SEW]; | |
1518 | } | |
1519 | ||
1520 | static bool | |
1521 | propagate_avl_across_demands_p (const vector_insn_info &info1, | |
1522 | const vector_insn_info &info2) | |
1523 | { | |
1524 | if (info2.demand_p (DEMAND_AVL)) | |
1525 | { | |
1526 | if (info2.demand_p (DEMAND_NONZERO_AVL)) | |
1527 | return info1.demand_p (DEMAND_AVL) | |
1528 | && !info1.demand_p (DEMAND_NONZERO_AVL) && info1.has_avl_reg (); | |
1529 | } | |
1530 | else | |
1531 | return info1.demand_p (DEMAND_AVL) && info1.has_avl_reg (); | |
1532 | return false; | |
1533 | } | |
1534 | ||
1535 | static bool | |
a481eed8 | 1536 | reg_available_p (const insn_info *insn, const vector_insn_info &info) |
ec99ffab | 1537 | { |
a481eed8 | 1538 | if (info.has_avl_reg () && !info.get_avl_source ()) |
44c918b5 | 1539 | return false; |
a481eed8 JZZ |
1540 | insn_info *def_insn = info.get_avl_source ()->insn (); |
1541 | if (def_insn->bb () == insn->bb ()) | |
1542 | return before_p (def_insn, insn); | |
ec99ffab | 1543 | else |
a481eed8 JZZ |
1544 | return dominated_by_p (CDI_DOMINATORS, insn->bb ()->cfg_bb (), |
1545 | def_insn->bb ()->cfg_bb ()); | |
ec99ffab JZZ |
1546 | } |
1547 | ||
60bd33bc JZZ |
1548 | /* Return true if the instruction support relaxed compatible check. */ |
1549 | static bool | |
1550 | support_relaxed_compatible_p (const vector_insn_info &info1, | |
1551 | const vector_insn_info &info2) | |
1552 | { | |
1553 | if (fault_first_load_p (info1.get_insn ()->rtl ()) | |
1554 | && info2.demand_p (DEMAND_AVL) && info2.has_avl_reg () | |
1555 | && info2.get_avl_source () && info2.get_avl_source ()->insn ()->is_phi ()) | |
1556 | { | |
1557 | hash_set<set_info *> sets | |
1558 | = get_all_sets (info2.get_avl_source (), true, false, false); | |
1559 | for (set_info *set : sets) | |
1560 | { | |
1561 | if (read_vl_insn_p (set->insn ()->rtl ())) | |
1562 | { | |
1563 | const insn_info *insn | |
1564 | = get_backward_fault_first_load_insn (set->insn ()); | |
1565 | if (insn == info1.get_insn ()) | |
1566 | return info2.compatible_vtype_p (info1); | |
1567 | } | |
1568 | } | |
1569 | } | |
1570 | return false; | |
1571 | } | |
1572 | ||
1573 | /* Return true if the block is worthwhile backward propagation. */ | |
1574 | static bool | |
1575 | backward_propagate_worthwhile_p (const basic_block cfg_bb, | |
1576 | const vector_block_info block_info) | |
1577 | { | |
1578 | if (loop_basic_block_p (cfg_bb)) | |
1579 | { | |
1580 | if (block_info.reaching_out.valid_or_dirty_p ()) | |
1581 | { | |
1582 | if (block_info.local_dem.compatible_p (block_info.reaching_out)) | |
1583 | { | |
1584 | /* Case 1 (Can backward propagate): | |
1585 | .... | |
1586 | bb0: | |
1587 | ... | |
1588 | for (int i = 0; i < n; i++) | |
1589 | { | |
1590 | vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); | |
1591 | __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); | |
1592 | } | |
1593 | The local_dem is compatible with reaching_out. Such case is | |
1594 | worthwhile backward propagation. */ | |
1595 | return true; | |
1596 | } | |
1597 | else | |
1598 | { | |
1599 | if (support_relaxed_compatible_p (block_info.reaching_out, | |
1600 | block_info.local_dem)) | |
1601 | return true; | |
1602 | /* Case 2 (Don't backward propagate): | |
1603 | .... | |
1604 | bb0: | |
1605 | ... | |
1606 | for (int i = 0; i < n; i++) | |
1607 | { | |
1608 | vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); | |
1609 | __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); | |
1610 | vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8); | |
1611 | __riscv_vse16_v_i16mf2 (out + i + 6, v, 8); | |
1612 | } | |
1613 | The local_dem is incompatible with reaching_out. | |
1614 | It makes no sense to backward propagate the local_dem since we | |
1615 | can't avoid VSETVL inside the loop. */ | |
1616 | return false; | |
1617 | } | |
1618 | } | |
1619 | else | |
1620 | { | |
1621 | gcc_assert (block_info.reaching_out.unknown_p ()); | |
1622 | /* Case 3 (Don't backward propagate): | |
1623 | .... | |
1624 | bb0: | |
1625 | ... | |
1626 | for (int i = 0; i < n; i++) | |
1627 | { | |
1628 | vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); | |
1629 | __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); | |
1630 | fn3 (); | |
1631 | } | |
1632 | The local_dem is VALID, but the reaching_out is UNKNOWN. | |
1633 | It makes no sense to backward propagate the local_dem since we | |
1634 | can't avoid VSETVL inside the loop. */ | |
1635 | return false; | |
1636 | } | |
1637 | } | |
1638 | ||
1639 | return true; | |
1640 | } | |
1641 | ||
a2d12abe JZZ |
1642 | /* Count the number of REGNO in RINSN. */ |
1643 | static int | |
1644 | count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) | |
1645 | { | |
1646 | int count = 0; | |
1647 | extract_insn (rinsn); | |
1648 | for (int i = 0; i < recog_data.n_operands; i++) | |
1649 | if (refers_to_regno_p (regno, recog_data.operand[i])) | |
1650 | count++; | |
1651 | return count; | |
1652 | } | |
1653 | ||
12b23c71 JZZ |
1654 | avl_info::avl_info (const avl_info &other) |
1655 | { | |
1656 | m_value = other.get_value (); | |
1657 | m_source = other.get_source (); | |
1658 | } | |
1659 | ||
9243c3d1 JZZ |
1660 | avl_info::avl_info (rtx value_in, set_info *source_in) |
1661 | : m_value (value_in), m_source (source_in) | |
1662 | {} | |
1663 | ||
4f673c5e JZZ |
1664 | bool |
1665 | avl_info::single_source_equal_p (const avl_info &other) const | |
1666 | { | |
1667 | set_info *set1 = m_source; | |
1668 | set_info *set2 = other.get_source (); | |
6b6b9c68 JZZ |
1669 | insn_info *insn1 = extract_single_source (set1); |
1670 | insn_info *insn2 = extract_single_source (set2); | |
1671 | if (!insn1 || !insn2) | |
1672 | return false; | |
1673 | return source_equal_p (insn1, insn2); | |
1674 | } | |
1675 | ||
1676 | bool | |
1677 | avl_info::multiple_source_equal_p (const avl_info &other) const | |
1678 | { | |
d875d756 JZ |
1679 | /* When the def info is same in RTL_SSA namespace, it's safe |
1680 | to consider they are avl compatible. */ | |
1681 | if (m_source == other.get_source ()) | |
1682 | return true; | |
6b6b9c68 | 1683 | |
d875d756 JZ |
1684 | /* We only consider handle PHI node. */ |
1685 | if (!m_source->insn ()->is_phi () || !other.get_source ()->insn ()->is_phi ()) | |
1686 | return false; | |
6b6b9c68 | 1687 | |
d875d756 JZ |
1688 | phi_info *phi1 = as_a<phi_info *> (m_source); |
1689 | phi_info *phi2 = as_a<phi_info *> (other.get_source ()); | |
6b6b9c68 | 1690 | |
d875d756 JZ |
1691 | if (phi1->is_degenerate () && phi2->is_degenerate ()) |
1692 | { | |
1693 | /* Degenerate PHI means the PHI node only have one input. */ | |
1694 | ||
1695 | /* If both PHI nodes have the same single input in use list. | |
1696 | We consider they are AVL compatible. */ | |
1697 | if (phi1->input_value (0) == phi2->input_value (0)) | |
1698 | return true; | |
1699 | } | |
1700 | /* TODO: We can support more optimization cases in the future. */ | |
1701 | return false; | |
4f673c5e JZZ |
1702 | } |
1703 | ||
9243c3d1 JZZ |
1704 | avl_info & |
1705 | avl_info::operator= (const avl_info &other) | |
1706 | { | |
1707 | m_value = other.get_value (); | |
1708 | m_source = other.get_source (); | |
1709 | return *this; | |
1710 | } | |
1711 | ||
1712 | bool | |
1713 | avl_info::operator== (const avl_info &other) const | |
1714 | { | |
1715 | if (!m_value) | |
1716 | return !other.get_value (); | |
1717 | if (!other.get_value ()) | |
1718 | return false; | |
1719 | ||
9243c3d1 JZZ |
1720 | if (GET_CODE (m_value) != GET_CODE (other.get_value ())) |
1721 | return false; | |
1722 | ||
1723 | /* Handle CONST_INT AVL. */ | |
1724 | if (CONST_INT_P (m_value)) | |
1725 | return INTVAL (m_value) == INTVAL (other.get_value ()); | |
1726 | ||
1727 | /* Handle VLMAX AVL. */ | |
1728 | if (vlmax_avl_p (m_value)) | |
1729 | return vlmax_avl_p (other.get_value ()); | |
1730 | ||
4f673c5e JZZ |
1731 | /* If any source is undef value, we think they are not equal. */ |
1732 | if (!m_source || !other.get_source ()) | |
1733 | return false; | |
1734 | ||
1735 | /* If both sources are single source (defined by a single real RTL) | |
1736 | and their definitions are same. */ | |
1737 | if (single_source_equal_p (other)) | |
1738 | return true; | |
1739 | ||
6b6b9c68 | 1740 | return multiple_source_equal_p (other); |
9243c3d1 JZZ |
1741 | } |
1742 | ||
1743 | bool | |
1744 | avl_info::operator!= (const avl_info &other) const | |
1745 | { | |
1746 | return !(*this == other); | |
1747 | } | |
1748 | ||
ec99ffab JZZ |
1749 | bool |
1750 | avl_info::has_non_zero_avl () const | |
1751 | { | |
1752 | if (has_avl_imm ()) | |
1753 | return INTVAL (get_value ()) > 0; | |
1754 | if (has_avl_reg ()) | |
1755 | return vlmax_avl_p (get_value ()); | |
1756 | return false; | |
1757 | } | |
1758 | ||
9243c3d1 JZZ |
1759 | /* Initialize VL/VTYPE information. */ |
1760 | vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in, | |
1761 | enum vlmul_type vlmul_in, uint8_t ratio_in, | |
1762 | bool ta_in, bool ma_in) | |
1763 | : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in), | |
1764 | m_ta (ta_in), m_ma (ma_in) | |
1765 | { | |
1766 | gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW"); | |
1767 | } | |
1768 | ||
1769 | bool | |
1770 | vl_vtype_info::operator== (const vl_vtype_info &other) const | |
1771 | { | |
6b6b9c68 | 1772 | return same_avl_p (other) && m_sew == other.get_sew () |
9243c3d1 JZZ |
1773 | && m_vlmul == other.get_vlmul () && m_ta == other.get_ta () |
1774 | && m_ma == other.get_ma () && m_ratio == other.get_ratio (); | |
1775 | } | |
1776 | ||
1777 | bool | |
1778 | vl_vtype_info::operator!= (const vl_vtype_info &other) const | |
1779 | { | |
1780 | return !(*this == other); | |
1781 | } | |
1782 | ||
9243c3d1 JZZ |
1783 | bool |
1784 | vl_vtype_info::same_avl_p (const vl_vtype_info &other) const | |
1785 | { | |
6b6b9c68 JZZ |
1786 | /* We need to compare both RTL and SET. If both AVL are CONST_INT. |
1787 | For example, const_int 3 and const_int 4, we need to compare | |
1788 | RTL. If both AVL are REG and their REGNO are same, we need to | |
1789 | compare SET. */ | |
1790 | return get_avl () == other.get_avl () | |
1791 | && get_avl_source () == other.get_avl_source (); | |
9243c3d1 JZZ |
1792 | } |
1793 | ||
1794 | bool | |
1795 | vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const | |
1796 | { | |
1797 | return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul () | |
1798 | && get_ta () == other.get_ta () && get_ma () == other.get_ma (); | |
1799 | } | |
1800 | ||
1801 | bool | |
1802 | vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const | |
1803 | { | |
1804 | return get_ratio () == other.get_ratio (); | |
1805 | } | |
1806 | ||
1807 | /* Compare the compatibility between Dem1 and Dem2. | |
1808 | If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2 | |
1809 | meaning Dem1 is easier be compatible with others than Dem2 | |
1810 | or Dem2 is stricter than Dem1. | |
1811 | For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */ | |
9243c3d1 JZZ |
1812 | bool |
1813 | vector_insn_info::operator>= (const vector_insn_info &other) const | |
1814 | { | |
60bd33bc JZZ |
1815 | if (support_relaxed_compatible_p (*this, other)) |
1816 | { | |
1817 | unsigned array_size = sizeof (unavailable_conds) / sizeof (demands_cond); | |
1818 | /* Bypass AVL unavailable cases. */ | |
1819 | for (unsigned i = 2; i < array_size; i++) | |
1820 | if (unavailable_conds[i].pair.match_cond_p (this->get_demands (), | |
1821 | other.get_demands ()) | |
1822 | && unavailable_conds[i].incompatible_p (*this, other)) | |
1823 | return false; | |
1824 | return true; | |
1825 | } | |
1826 | ||
1827 | if (!other.compatible_p (static_cast<const vl_vtype_info &> (*this))) | |
1828 | return false; | |
1829 | if (!this->compatible_p (static_cast<const vl_vtype_info &> (other))) | |
9243c3d1 JZZ |
1830 | return true; |
1831 | ||
1832 | if (*this == other) | |
1833 | return true; | |
1834 | ||
ec99ffab JZZ |
1835 | for (const auto &cond : unavailable_conds) |
1836 | if (cond.pair.match_cond_p (this->get_demands (), other.get_demands ()) | |
1837 | && cond.incompatible_p (*this, other)) | |
1838 | return false; | |
9243c3d1 JZZ |
1839 | |
1840 | return true; | |
1841 | } | |
1842 | ||
1843 | bool | |
1844 | vector_insn_info::operator== (const vector_insn_info &other) const | |
1845 | { | |
1846 | gcc_assert (!uninit_p () && !other.uninit_p () | |
1847 | && "Uninitialization should not happen"); | |
1848 | ||
1849 | /* Empty is only equal to another Empty. */ | |
1850 | if (empty_p ()) | |
1851 | return other.empty_p (); | |
1852 | if (other.empty_p ()) | |
1853 | return empty_p (); | |
1854 | ||
1855 | /* Unknown is only equal to another Unknown. */ | |
1856 | if (unknown_p ()) | |
1857 | return other.unknown_p (); | |
1858 | if (other.unknown_p ()) | |
1859 | return unknown_p (); | |
1860 | ||
1861 | for (size_t i = 0; i < NUM_DEMAND; i++) | |
1862 | if (m_demands[i] != other.demand_p ((enum demand_type) i)) | |
1863 | return false; | |
1864 | ||
7ae4d1df JZZ |
1865 | if (vector_config_insn_p (m_insn->rtl ()) |
1866 | || vector_config_insn_p (other.get_insn ()->rtl ())) | |
1867 | if (m_insn != other.get_insn ()) | |
1868 | return false; | |
9243c3d1 JZZ |
1869 | |
1870 | if (!same_avl_p (other)) | |
1871 | return false; | |
1872 | ||
1873 | /* If the full VTYPE is valid, check that it is the same. */ | |
1874 | return same_vtype_p (other); | |
1875 | } | |
1876 | ||
1877 | void | |
1878 | vector_insn_info::parse_insn (rtx_insn *rinsn) | |
1879 | { | |
1880 | *this = vector_insn_info (); | |
1881 | if (!NONDEBUG_INSN_P (rinsn)) | |
1882 | return; | |
1883 | if (!has_vtype_op (rinsn)) | |
1884 | return; | |
1885 | m_state = VALID; | |
1886 | extract_insn_cached (rinsn); | |
1887 | const rtx avl = recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
1888 | m_avl = avl_info (avl, nullptr); | |
1889 | m_sew = ::get_sew (rinsn); | |
1890 | m_vlmul = ::get_vlmul (rinsn); | |
1891 | m_ta = tail_agnostic_p (rinsn); | |
1892 | m_ma = mask_agnostic_p (rinsn); | |
1893 | } | |
1894 | ||
1895 | void | |
1896 | vector_insn_info::parse_insn (insn_info *insn) | |
1897 | { | |
1898 | *this = vector_insn_info (); | |
1899 | ||
1900 | /* Return if it is debug insn for the consistency with optimize == 0. */ | |
1901 | if (insn->is_debug_insn ()) | |
1902 | return; | |
1903 | ||
1904 | /* We set it as unknown since we don't what will happen in CALL or ASM. */ | |
1905 | if (insn->is_call () || insn->is_asm ()) | |
1906 | { | |
1907 | set_unknown (); | |
1908 | return; | |
1909 | } | |
1910 | ||
1911 | /* If this is something that updates VL/VTYPE that we don't know about, set | |
1912 | the state to unknown. */ | |
60bd33bc | 1913 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()) |
9243c3d1 JZZ |
1914 | && (find_access (insn->defs (), VL_REGNUM) |
1915 | || find_access (insn->defs (), VTYPE_REGNUM))) | |
1916 | { | |
1917 | set_unknown (); | |
1918 | return; | |
1919 | } | |
1920 | ||
1921 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())) | |
1922 | return; | |
1923 | ||
1924 | /* Warning: This function has to work on both the lowered (i.e. post | |
1925 | emit_local_forward_vsetvls) and pre-lowering forms. The main implication | |
1926 | of this is that it can't use the value of a SEW, VL, or Policy operand as | |
1927 | they might be stale after lowering. */ | |
1928 | vl_vtype_info::operator= (get_vl_vtype_info (insn)); | |
1929 | m_insn = insn; | |
1930 | m_state = VALID; | |
1931 | if (vector_config_insn_p (insn->rtl ())) | |
1932 | { | |
1933 | m_demands[DEMAND_AVL] = true; | |
1934 | m_demands[DEMAND_RATIO] = true; | |
1935 | return; | |
1936 | } | |
1937 | ||
1938 | if (has_vl_op (insn->rtl ())) | |
1939 | m_demands[DEMAND_AVL] = true; | |
1940 | ||
1941 | if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE) | |
1942 | m_demands[DEMAND_RATIO] = true; | |
1943 | else | |
1944 | { | |
1945 | /* TODO: By default, if it doesn't demand RATIO, we set it | |
1946 | demand SEW && LMUL both. Some instructions may demand SEW | |
1947 | only and ignore LMUL, will fix it later. */ | |
1948 | m_demands[DEMAND_SEW] = true; | |
ec99ffab JZZ |
1949 | if (!ignore_vlmul_insn_p (insn->rtl ())) |
1950 | m_demands[DEMAND_LMUL] = true; | |
9243c3d1 JZZ |
1951 | } |
1952 | ||
1953 | if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE) | |
1954 | m_demands[DEMAND_TAIL_POLICY] = true; | |
1955 | if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE) | |
1956 | m_demands[DEMAND_MASK_POLICY] = true; | |
6b6b9c68 JZZ |
1957 | |
1958 | if (vector_config_insn_p (insn->rtl ())) | |
1959 | return; | |
1960 | ||
ec99ffab JZZ |
1961 | if (scalar_move_insn_p (insn->rtl ())) |
1962 | { | |
1963 | if (m_avl.has_non_zero_avl ()) | |
1964 | m_demands[DEMAND_NONZERO_AVL] = true; | |
1965 | if (m_ta) | |
1966 | m_demands[DEMAND_GE_SEW] = true; | |
1967 | } | |
1968 | ||
1969 | if (!m_avl.has_avl_reg () || vlmax_avl_p (get_avl ()) || !m_avl.get_source ()) | |
1970 | return; | |
1971 | if (!m_avl.get_source ()->insn ()->is_real () | |
1972 | && !m_avl.get_source ()->insn ()->is_phi ()) | |
6b6b9c68 JZZ |
1973 | return; |
1974 | ||
1975 | insn_info *def_insn = extract_single_source (m_avl.get_source ()); | |
ec99ffab JZZ |
1976 | if (!def_insn || !vsetvl_insn_p (def_insn->rtl ())) |
1977 | return; | |
9243c3d1 | 1978 | |
ec99ffab JZZ |
1979 | vector_insn_info new_info; |
1980 | new_info.parse_insn (def_insn); | |
1981 | if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ())) | |
1982 | return; | |
1983 | /* TODO: Currently, we don't forward AVL for non-VLMAX vsetvl. */ | |
1984 | if (vlmax_avl_p (new_info.get_avl ())) | |
1985 | set_avl_info (avl_info (new_info.get_avl (), get_avl_source ())); | |
1986 | ||
1987 | if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ()) | |
1988 | m_demands[DEMAND_NONZERO_AVL] = true; | |
9243c3d1 JZZ |
1989 | } |
1990 | ||
1991 | bool | |
1992 | vector_insn_info::compatible_p (const vector_insn_info &other) const | |
1993 | { | |
1994 | gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p () | |
1995 | && "Can't compare invalid demanded infos"); | |
1996 | ||
ec99ffab | 1997 | for (const auto &cond : incompatible_conds) |
60bd33bc | 1998 | if (cond.dual_incompatible_p (*this, other)) |
ec99ffab | 1999 | return false; |
9243c3d1 JZZ |
2000 | return true; |
2001 | } | |
2002 | ||
d51f2456 JZ |
2003 | bool |
2004 | vector_insn_info::skip_avl_compatible_p (const vector_insn_info &other) const | |
2005 | { | |
2006 | gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p () | |
2007 | && "Can't compare invalid demanded infos"); | |
2008 | unsigned array_size = sizeof (incompatible_conds) / sizeof (demands_cond); | |
2009 | /* Bypass AVL incompatible cases. */ | |
2010 | for (unsigned i = 1; i < array_size; i++) | |
2011 | if (incompatible_conds[i].dual_incompatible_p (*this, other)) | |
2012 | return false; | |
2013 | return true; | |
2014 | } | |
2015 | ||
9243c3d1 JZZ |
2016 | bool |
2017 | vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const | |
2018 | { | |
2019 | gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); | |
2020 | gcc_assert (!unknown_p () && "Can't compare AVL in unknown state"); | |
2021 | if (!demand_p (DEMAND_AVL)) | |
2022 | return true; | |
ec99ffab JZZ |
2023 | if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ()) |
2024 | return true; | |
9243c3d1 JZZ |
2025 | return get_avl_info () == other.get_avl_info (); |
2026 | } | |
2027 | ||
4f673c5e JZZ |
2028 | bool |
2029 | vector_insn_info::compatible_avl_p (const avl_info &other) const | |
2030 | { | |
2031 | gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); | |
2032 | gcc_assert (!unknown_p () && "Can't compare AVL in unknown state"); | |
2033 | gcc_assert (demand_p (DEMAND_AVL) && "Can't compare AVL undemand state"); | |
ec99ffab JZZ |
2034 | if (!demand_p (DEMAND_AVL)) |
2035 | return true; | |
2036 | if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ()) | |
2037 | return true; | |
4f673c5e JZZ |
2038 | return get_avl_info () == other; |
2039 | } | |
2040 | ||
9243c3d1 JZZ |
2041 | bool |
2042 | vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const | |
2043 | { | |
2044 | gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); | |
2045 | gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state"); | |
ec99ffab JZZ |
2046 | if (demand_p (DEMAND_SEW)) |
2047 | { | |
2048 | if (!demand_p (DEMAND_GE_SEW) && m_sew != other.get_sew ()) | |
2049 | return false; | |
2050 | if (demand_p (DEMAND_GE_SEW) && m_sew > other.get_sew ()) | |
2051 | return false; | |
2052 | } | |
9243c3d1 JZZ |
2053 | if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ()) |
2054 | return false; | |
2055 | if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ()) | |
2056 | return false; | |
2057 | if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ()) | |
2058 | return false; | |
2059 | if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ()) | |
2060 | return false; | |
2061 | return true; | |
2062 | } | |
2063 | ||
2064 | /* Determine whether the vector instructions requirements represented by | |
2065 | Require are compatible with the previous vsetvli instruction represented | |
2066 | by this. INSN is the instruction whose requirements we're considering. */ | |
2067 | bool | |
2068 | vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const | |
2069 | { | |
2070 | gcc_assert (!uninit_p () && "Can't handle uninitialized info"); | |
2071 | if (empty_p ()) | |
2072 | return false; | |
2073 | ||
2074 | /* Nothing is compatible with Unknown. */ | |
2075 | if (unknown_p ()) | |
2076 | return false; | |
2077 | ||
2078 | /* If the instruction doesn't need an AVLReg and the SEW matches, consider | |
2079 | it compatible. */ | |
2080 | if (!demand_p (DEMAND_AVL)) | |
2081 | if (m_sew == curr_info.get_sew ()) | |
2082 | return true; | |
2083 | ||
2084 | return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info); | |
2085 | } | |
2086 | ||
6b6b9c68 JZZ |
2087 | bool |
2088 | vector_insn_info::available_p (const vector_insn_info &other) const | |
2089 | { | |
ec99ffab JZZ |
2090 | return *this >= other; |
2091 | } | |
2092 | ||
2093 | void | |
2094 | vector_insn_info::fuse_avl (const vector_insn_info &info1, | |
2095 | const vector_insn_info &info2) | |
2096 | { | |
2097 | set_insn (info1.get_insn ()); | |
2098 | if (info1.demand_p (DEMAND_AVL)) | |
2099 | { | |
2100 | if (info1.demand_p (DEMAND_NONZERO_AVL)) | |
2101 | { | |
2102 | if (info2.demand_p (DEMAND_AVL) | |
2103 | && !info2.demand_p (DEMAND_NONZERO_AVL)) | |
2104 | { | |
2105 | set_avl_info (info2.get_avl_info ()); | |
2106 | set_demand (DEMAND_AVL, true); | |
2107 | set_demand (DEMAND_NONZERO_AVL, false); | |
2108 | return; | |
2109 | } | |
2110 | } | |
2111 | set_avl_info (info1.get_avl_info ()); | |
2112 | set_demand (DEMAND_NONZERO_AVL, info1.demand_p (DEMAND_NONZERO_AVL)); | |
2113 | } | |
2114 | else | |
2115 | { | |
2116 | set_avl_info (info2.get_avl_info ()); | |
2117 | set_demand (DEMAND_NONZERO_AVL, info2.demand_p (DEMAND_NONZERO_AVL)); | |
2118 | } | |
2119 | set_demand (DEMAND_AVL, | |
2120 | info1.demand_p (DEMAND_AVL) || info2.demand_p (DEMAND_AVL)); | |
2121 | } | |
2122 | ||
2123 | void | |
2124 | vector_insn_info::fuse_sew_lmul (const vector_insn_info &info1, | |
2125 | const vector_insn_info &info2) | |
2126 | { | |
2127 | /* We need to fuse sew && lmul according to demand info: | |
2128 | ||
2129 | 1. GE_SEW. | |
2130 | 2. SEW. | |
2131 | 3. LMUL. | |
2132 | 4. RATIO. */ | |
2133 | if (same_sew_lmul_demand_p (info1.get_demands (), info2.get_demands ())) | |
2134 | { | |
2135 | set_demand (DEMAND_SEW, info2.demand_p (DEMAND_SEW)); | |
2136 | set_demand (DEMAND_LMUL, info2.demand_p (DEMAND_LMUL)); | |
2137 | set_demand (DEMAND_RATIO, info2.demand_p (DEMAND_RATIO)); | |
2138 | set_demand (DEMAND_GE_SEW, info2.demand_p (DEMAND_GE_SEW)); | |
2139 | set_sew (info2.get_sew ()); | |
2140 | set_vlmul (info2.get_vlmul ()); | |
2141 | set_ratio (info2.get_ratio ()); | |
2142 | return; | |
2143 | } | |
2144 | for (const auto &rule : fuse_rules) | |
2145 | { | |
2146 | if (rule.pair.match_cond_p (info1.get_demands (), info2.get_demands ())) | |
2147 | { | |
2148 | set_demand (DEMAND_SEW, rule.demand_sew_p); | |
2149 | set_demand (DEMAND_LMUL, rule.demand_lmul_p); | |
2150 | set_demand (DEMAND_RATIO, rule.demand_ratio_p); | |
2151 | set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p); | |
2152 | set_sew (rule.new_sew (info1, info2)); | |
2153 | set_vlmul (rule.new_vlmul (info1, info2)); | |
2154 | set_ratio (rule.new_ratio (info1, info2)); | |
2155 | return; | |
2156 | } | |
2157 | if (rule.pair.match_cond_p (info2.get_demands (), info1.get_demands ())) | |
2158 | { | |
2159 | set_demand (DEMAND_SEW, rule.demand_sew_p); | |
2160 | set_demand (DEMAND_LMUL, rule.demand_lmul_p); | |
2161 | set_demand (DEMAND_RATIO, rule.demand_ratio_p); | |
2162 | set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p); | |
2163 | set_sew (rule.new_sew (info2, info1)); | |
2164 | set_vlmul (rule.new_vlmul (info2, info1)); | |
2165 | set_ratio (rule.new_ratio (info2, info1)); | |
2166 | return; | |
2167 | } | |
2168 | } | |
2169 | gcc_unreachable (); | |
2170 | } | |
2171 | ||
2172 | void | |
2173 | vector_insn_info::fuse_tail_policy (const vector_insn_info &info1, | |
2174 | const vector_insn_info &info2) | |
2175 | { | |
2176 | if (info1.demand_p (DEMAND_TAIL_POLICY)) | |
2177 | { | |
2178 | set_ta (info1.get_ta ()); | |
2179 | demand (DEMAND_TAIL_POLICY); | |
2180 | } | |
2181 | else if (info2.demand_p (DEMAND_TAIL_POLICY)) | |
2182 | { | |
2183 | set_ta (info2.get_ta ()); | |
2184 | demand (DEMAND_TAIL_POLICY); | |
2185 | } | |
2186 | else | |
2187 | set_ta (get_default_ta ()); | |
2188 | } | |
2189 | ||
2190 | void | |
2191 | vector_insn_info::fuse_mask_policy (const vector_insn_info &info1, | |
2192 | const vector_insn_info &info2) | |
2193 | { | |
2194 | if (info1.demand_p (DEMAND_MASK_POLICY)) | |
2195 | { | |
2196 | set_ma (info1.get_ma ()); | |
2197 | demand (DEMAND_MASK_POLICY); | |
2198 | } | |
2199 | else if (info2.demand_p (DEMAND_MASK_POLICY)) | |
2200 | { | |
2201 | set_ma (info2.get_ma ()); | |
2202 | demand (DEMAND_MASK_POLICY); | |
2203 | } | |
2204 | else | |
2205 | set_ma (get_default_ma ()); | |
6b6b9c68 JZZ |
2206 | } |
2207 | ||
9243c3d1 JZZ |
2208 | vector_insn_info |
2209 | vector_insn_info::merge (const vector_insn_info &merge_info, | |
d51f2456 | 2210 | enum merge_type type) const |
9243c3d1 | 2211 | { |
6b6b9c68 JZZ |
2212 | if (!vsetvl_insn_p (get_insn ()->rtl ())) |
2213 | gcc_assert (this->compatible_p (merge_info) | |
2214 | && "Can't merge incompatible demanded infos"); | |
9243c3d1 JZZ |
2215 | |
2216 | vector_insn_info new_info; | |
ec99ffab | 2217 | new_info.set_valid (); |
4f673c5e | 2218 | if (type == LOCAL_MERGE) |
9243c3d1 | 2219 | { |
4f673c5e | 2220 | /* For local backward data flow, we always update INSN && AVL as the |
ec99ffab JZZ |
2221 | latest INSN and AVL so that we can keep track status of each INSN. */ |
2222 | new_info.fuse_avl (merge_info, *this); | |
9243c3d1 JZZ |
2223 | } |
2224 | else | |
2225 | { | |
4f673c5e | 2226 | /* For global data flow, we should keep original INSN and AVL if they |
ec99ffab | 2227 | valid since we should keep the life information of each block. |
9243c3d1 | 2228 | |
ec99ffab JZZ |
2229 | For example: |
2230 | bb 0 -> bb 1. | |
2231 | We should keep INSN && AVL of bb 1 since we will eventually emit | |
2232 | vsetvl instruction according to INSN and AVL of bb 1. */ | |
2233 | new_info.fuse_avl (*this, merge_info); | |
2234 | } | |
9243c3d1 | 2235 | |
ec99ffab JZZ |
2236 | new_info.fuse_sew_lmul (*this, merge_info); |
2237 | new_info.fuse_tail_policy (*this, merge_info); | |
2238 | new_info.fuse_mask_policy (*this, merge_info); | |
9243c3d1 JZZ |
2239 | return new_info; |
2240 | } | |
2241 | ||
60bd33bc JZZ |
2242 | bool |
2243 | vector_insn_info::update_fault_first_load_avl (insn_info *insn) | |
2244 | { | |
2245 | // Update AVL to vl-output of the fault first load. | |
2246 | const insn_info *read_vl = get_forward_read_vl_insn (insn); | |
2247 | if (read_vl) | |
2248 | { | |
2249 | rtx vl = SET_DEST (PATTERN (read_vl->rtl ())); | |
2250 | def_info *def = find_access (read_vl->defs (), REGNO (vl)); | |
2251 | set_info *set = safe_dyn_cast<set_info *> (def); | |
2252 | set_avl_info (avl_info (vl, set)); | |
2253 | set_insn (insn); | |
2254 | return true; | |
2255 | } | |
2256 | return false; | |
2257 | } | |
2258 | ||
9243c3d1 JZZ |
2259 | void |
2260 | vector_insn_info::dump (FILE *file) const | |
2261 | { | |
2262 | fprintf (file, "["); | |
2263 | if (uninit_p ()) | |
2264 | fprintf (file, "UNINITIALIZED,"); | |
2265 | else if (valid_p ()) | |
2266 | fprintf (file, "VALID,"); | |
2267 | else if (unknown_p ()) | |
2268 | fprintf (file, "UNKNOWN,"); | |
2269 | else if (empty_p ()) | |
2270 | fprintf (file, "EMPTY,"); | |
6b6b9c68 JZZ |
2271 | else if (hard_empty_p ()) |
2272 | fprintf (file, "HARD_EMPTY,"); | |
4f673c5e JZZ |
2273 | else if (dirty_with_killed_avl_p ()) |
2274 | fprintf (file, "DIRTY_WITH_KILLED_AVL,"); | |
9243c3d1 JZZ |
2275 | else |
2276 | fprintf (file, "DIRTY,"); | |
2277 | ||
2278 | fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL)); | |
ec99ffab | 2279 | fprintf (file, "%d(DEMAND_NONZERO_AVL),", demand_p (DEMAND_NONZERO_AVL)); |
9243c3d1 | 2280 | fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW)); |
ec99ffab | 2281 | fprintf (file, "%d(DEMAND_GE_SEW),", demand_p (DEMAND_GE_SEW)); |
9243c3d1 JZZ |
2282 | fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL)); |
2283 | fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO)); | |
2284 | fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY)); | |
2285 | fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY)); | |
2286 | ||
2287 | fprintf (file, "AVL="); | |
2288 | print_rtl_single (file, get_avl ()); | |
2289 | fprintf (file, "SEW=%d,", get_sew ()); | |
2290 | fprintf (file, "VLMUL=%d,", get_vlmul ()); | |
2291 | fprintf (file, "RATIO=%d,", get_ratio ()); | |
2292 | fprintf (file, "TAIL_POLICY=%d,", get_ta ()); | |
2293 | fprintf (file, "MASK_POLICY=%d", get_ma ()); | |
2294 | fprintf (file, "]\n"); | |
2295 | ||
2296 | if (valid_p ()) | |
2297 | { | |
2298 | if (get_insn ()) | |
2299 | { | |
12b23c71 JZZ |
2300 | fprintf (file, "The real INSN="); |
2301 | print_rtl_single (file, get_insn ()->rtl ()); | |
9243c3d1 | 2302 | } |
9243c3d1 JZZ |
2303 | } |
2304 | } | |
2305 | ||
2306 | vector_infos_manager::vector_infos_manager () | |
2307 | { | |
2308 | vector_edge_list = nullptr; | |
2309 | vector_kill = nullptr; | |
2310 | vector_del = nullptr; | |
2311 | vector_insert = nullptr; | |
2312 | vector_antic = nullptr; | |
2313 | vector_transp = nullptr; | |
2314 | vector_comp = nullptr; | |
2315 | vector_avin = nullptr; | |
2316 | vector_avout = nullptr; | |
2317 | vector_insn_infos.safe_grow (get_max_uid ()); | |
2318 | vector_block_infos.safe_grow (last_basic_block_for_fn (cfun)); | |
2319 | if (!optimize) | |
2320 | { | |
2321 | basic_block cfg_bb; | |
2322 | rtx_insn *rinsn; | |
2323 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2324 | { | |
2325 | vector_block_infos[cfg_bb->index].local_dem = vector_insn_info (); | |
2326 | vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info (); | |
2327 | FOR_BB_INSNS (cfg_bb, rinsn) | |
2328 | vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn); | |
2329 | } | |
2330 | } | |
2331 | else | |
2332 | { | |
2333 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2334 | { | |
2335 | vector_block_infos[bb->index ()].local_dem = vector_insn_info (); | |
2336 | vector_block_infos[bb->index ()].reaching_out = vector_insn_info (); | |
2337 | for (insn_info *insn : bb->real_insns ()) | |
2338 | vector_insn_infos[insn->uid ()].parse_insn (insn); | |
acc10c79 | 2339 | vector_block_infos[bb->index ()].probability = profile_probability (); |
9243c3d1 JZZ |
2340 | } |
2341 | } | |
2342 | } | |
2343 | ||
2344 | void | |
2345 | vector_infos_manager::create_expr (vector_insn_info &info) | |
2346 | { | |
2347 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2348 | if (*vector_exprs[i] == info) | |
2349 | return; | |
2350 | vector_exprs.safe_push (&info); | |
2351 | } | |
2352 | ||
2353 | size_t | |
2354 | vector_infos_manager::get_expr_id (const vector_insn_info &info) const | |
2355 | { | |
2356 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2357 | if (*vector_exprs[i] == info) | |
2358 | return i; | |
2359 | gcc_unreachable (); | |
2360 | } | |
2361 | ||
2362 | auto_vec<size_t> | |
2363 | vector_infos_manager::get_all_available_exprs ( | |
2364 | const vector_insn_info &info) const | |
2365 | { | |
2366 | auto_vec<size_t> available_list; | |
2367 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
6b6b9c68 | 2368 | if (info.available_p (*vector_exprs[i])) |
9243c3d1 JZZ |
2369 | available_list.safe_push (i); |
2370 | return available_list; | |
2371 | } | |
2372 | ||
d06e9264 JZ |
2373 | bool |
2374 | vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const | |
2375 | { | |
2376 | hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb); | |
2377 | for (const basic_block pred_cfg_bb : pred_cfg_bbs) | |
2378 | { | |
2379 | const auto &pred_block_info = vector_block_infos[pred_cfg_bb->index]; | |
2380 | if (!pred_block_info.local_dem.valid_or_dirty_p () | |
2381 | && !pred_block_info.reaching_out.valid_or_dirty_p ()) | |
2382 | continue; | |
2383 | return false; | |
2384 | } | |
2385 | return true; | |
2386 | } | |
2387 | ||
9243c3d1 JZZ |
2388 | bool |
2389 | vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const | |
2390 | { | |
2391 | if (bitmap_empty_p (bitdata)) | |
2392 | return false; | |
2393 | ||
2394 | int ratio = -1; | |
2395 | unsigned int bb_index; | |
2396 | sbitmap_iterator sbi; | |
2397 | ||
2398 | EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi) | |
2399 | { | |
2400 | if (ratio == -1) | |
2401 | ratio = vector_exprs[bb_index]->get_ratio (); | |
2402 | else if (vector_exprs[bb_index]->get_ratio () != ratio) | |
2403 | return false; | |
2404 | } | |
2405 | return true; | |
2406 | } | |
2407 | ||
ff8f9544 JZ |
2408 | /* Return TRUE if the incoming vector configuration state |
2409 | to CFG_BB is compatible with the vector configuration | |
2410 | state in CFG_BB, FALSE otherwise. */ | |
2411 | bool | |
2412 | vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const | |
2413 | { | |
2414 | const auto &info = vector_block_infos[cfg_bb->index].local_dem; | |
2415 | sbitmap avin = vector_avin[cfg_bb->index]; | |
2416 | unsigned int bb_index; | |
2417 | sbitmap_iterator sbi; | |
2418 | EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi) | |
2419 | { | |
2420 | const auto &avin_info | |
2421 | = static_cast<const vl_vtype_info &> (*vector_exprs[bb_index]); | |
2422 | if (!info.compatible_p (avin_info)) | |
2423 | return false; | |
2424 | } | |
2425 | return true; | |
2426 | } | |
2427 | ||
005fad9d JZZ |
2428 | bool |
2429 | vector_infos_manager::all_same_avl_p (const basic_block cfg_bb, | |
2430 | sbitmap bitdata) const | |
2431 | { | |
2432 | if (bitmap_empty_p (bitdata)) | |
2433 | return false; | |
2434 | ||
2435 | const auto &block_info = vector_block_infos[cfg_bb->index]; | |
2436 | if (!block_info.local_dem.demand_p (DEMAND_AVL)) | |
2437 | return true; | |
2438 | ||
2439 | avl_info avl = block_info.local_dem.get_avl_info (); | |
2440 | unsigned int bb_index; | |
2441 | sbitmap_iterator sbi; | |
2442 | ||
2443 | EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi) | |
2444 | { | |
2445 | if (vector_exprs[bb_index]->get_avl_info () != avl) | |
2446 | return false; | |
2447 | } | |
2448 | return true; | |
2449 | } | |
2450 | ||
9243c3d1 JZZ |
2451 | size_t |
2452 | vector_infos_manager::expr_set_num (sbitmap bitdata) const | |
2453 | { | |
2454 | size_t count = 0; | |
2455 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2456 | if (bitmap_bit_p (bitdata, i)) | |
2457 | count++; | |
2458 | return count; | |
2459 | } | |
2460 | ||
2461 | void | |
2462 | vector_infos_manager::release (void) | |
2463 | { | |
2464 | if (!vector_insn_infos.is_empty ()) | |
2465 | vector_insn_infos.release (); | |
2466 | if (!vector_block_infos.is_empty ()) | |
2467 | vector_block_infos.release (); | |
2468 | if (!vector_exprs.is_empty ()) | |
2469 | vector_exprs.release (); | |
2470 | ||
ec99ffab JZZ |
2471 | gcc_assert (to_refine_vsetvls.is_empty ()); |
2472 | gcc_assert (to_delete_vsetvls.is_empty ()); | |
9243c3d1 | 2473 | if (optimize > 0) |
cfe3fbc6 JZZ |
2474 | free_bitmap_vectors (); |
2475 | } | |
2476 | ||
2477 | void | |
2478 | vector_infos_manager::create_bitmap_vectors (void) | |
2479 | { | |
2480 | /* Create the bitmap vectors. */ | |
2481 | vector_antic = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2482 | vector_exprs.length ()); | |
2483 | vector_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2484 | vector_exprs.length ()); | |
2485 | vector_comp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2486 | vector_exprs.length ()); | |
2487 | vector_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2488 | vector_exprs.length ()); | |
2489 | vector_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2490 | vector_exprs.length ()); | |
2491 | vector_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2492 | vector_exprs.length ()); | |
2493 | ||
2494 | bitmap_vector_ones (vector_transp, last_basic_block_for_fn (cfun)); | |
2495 | bitmap_vector_clear (vector_antic, last_basic_block_for_fn (cfun)); | |
2496 | bitmap_vector_clear (vector_comp, last_basic_block_for_fn (cfun)); | |
2497 | } | |
2498 | ||
2499 | void | |
2500 | vector_infos_manager::free_bitmap_vectors (void) | |
2501 | { | |
2502 | /* Finished. Free up all the things we've allocated. */ | |
2503 | free_edge_list (vector_edge_list); | |
2504 | if (vector_del) | |
2505 | sbitmap_vector_free (vector_del); | |
2506 | if (vector_insert) | |
2507 | sbitmap_vector_free (vector_insert); | |
2508 | if (vector_kill) | |
2509 | sbitmap_vector_free (vector_kill); | |
2510 | if (vector_antic) | |
2511 | sbitmap_vector_free (vector_antic); | |
2512 | if (vector_transp) | |
2513 | sbitmap_vector_free (vector_transp); | |
2514 | if (vector_comp) | |
2515 | sbitmap_vector_free (vector_comp); | |
2516 | if (vector_avin) | |
2517 | sbitmap_vector_free (vector_avin); | |
2518 | if (vector_avout) | |
2519 | sbitmap_vector_free (vector_avout); | |
2520 | ||
2521 | vector_edge_list = nullptr; | |
2522 | vector_kill = nullptr; | |
2523 | vector_del = nullptr; | |
2524 | vector_insert = nullptr; | |
2525 | vector_antic = nullptr; | |
2526 | vector_transp = nullptr; | |
2527 | vector_comp = nullptr; | |
2528 | vector_avin = nullptr; | |
2529 | vector_avout = nullptr; | |
9243c3d1 JZZ |
2530 | } |
2531 | ||
2532 | void | |
2533 | vector_infos_manager::dump (FILE *file) const | |
2534 | { | |
2535 | basic_block cfg_bb; | |
2536 | rtx_insn *rinsn; | |
2537 | ||
2538 | fprintf (file, "\n"); | |
2539 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2540 | { | |
2541 | fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index); | |
2542 | fprintf (file, "<HEADER>="); | |
2543 | vector_block_infos[cfg_bb->index].local_dem.dump (file); | |
2544 | FOR_BB_INSNS (cfg_bb, rinsn) | |
2545 | { | |
2546 | if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn)) | |
2547 | continue; | |
2548 | fprintf (file, "<insn %d>=", INSN_UID (rinsn)); | |
2549 | const auto &info = vector_insn_infos[INSN_UID (rinsn)]; | |
2550 | info.dump (file); | |
2551 | } | |
2552 | fprintf (file, "<FOOTER>="); | |
2553 | vector_block_infos[cfg_bb->index].reaching_out.dump (file); | |
acc10c79 JZZ |
2554 | fprintf (file, "<Probability>="); |
2555 | vector_block_infos[cfg_bb->index].probability.dump (file); | |
9243c3d1 JZZ |
2556 | fprintf (file, "\n\n"); |
2557 | } | |
2558 | ||
2559 | fprintf (file, "\n"); | |
2560 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2561 | { | |
2562 | fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index); | |
2563 | ||
2564 | fprintf (file, "<ANTLOC>="); | |
2565 | if (vector_antic == nullptr) | |
2566 | fprintf (file, "(nil)\n"); | |
2567 | else | |
2568 | dump_bitmap_file (file, vector_antic[cfg_bb->index]); | |
2569 | ||
2570 | fprintf (file, "<AVLOC>="); | |
2571 | if (vector_comp == nullptr) | |
2572 | fprintf (file, "(nil)\n"); | |
2573 | else | |
2574 | dump_bitmap_file (file, vector_comp[cfg_bb->index]); | |
2575 | ||
2576 | fprintf (file, "<TRANSP>="); | |
2577 | if (vector_transp == nullptr) | |
2578 | fprintf (file, "(nil)\n"); | |
2579 | else | |
2580 | dump_bitmap_file (file, vector_transp[cfg_bb->index]); | |
2581 | ||
2582 | fprintf (file, "<KILL>="); | |
2583 | if (vector_kill == nullptr) | |
2584 | fprintf (file, "(nil)\n"); | |
2585 | else | |
2586 | dump_bitmap_file (file, vector_kill[cfg_bb->index]); | |
2587 | } | |
2588 | ||
2589 | fprintf (file, "\n"); | |
2590 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2591 | { | |
2592 | fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n", | |
2593 | cfg_bb->index); | |
2594 | ||
2595 | fprintf (file, "<AVIN>="); | |
2596 | if (vector_avin == nullptr) | |
2597 | fprintf (file, "(nil)\n"); | |
2598 | else | |
2599 | dump_bitmap_file (file, vector_avin[cfg_bb->index]); | |
2600 | ||
2601 | fprintf (file, "<AVOUT>="); | |
2602 | if (vector_avout == nullptr) | |
2603 | fprintf (file, "(nil)\n"); | |
2604 | else | |
2605 | dump_bitmap_file (file, vector_avout[cfg_bb->index]); | |
2606 | ||
2607 | fprintf (file, "<DELETE>="); | |
2608 | if (vector_del == nullptr) | |
2609 | fprintf (file, "(nil)\n"); | |
2610 | else | |
2611 | dump_bitmap_file (file, vector_del[cfg_bb->index]); | |
2612 | } | |
2613 | ||
2614 | fprintf (file, "\nGlobal LCM (Lazy code motion) INSERT info:\n"); | |
2615 | for (size_t i = 0; i < vector_exprs.length (); i++) | |
2616 | { | |
2617 | for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++) | |
2618 | { | |
2619 | edge eg = INDEX_EDGE (vector_edge_list, ed); | |
2620 | if (bitmap_bit_p (vector_insert[ed], i)) | |
2621 | fprintf (dump_file, | |
2622 | "INSERT edge %d from bb %d to bb %d for VSETVL " | |
2623 | "expr[%ld]\n", | |
2624 | ed, eg->src->index, eg->dest->index, i); | |
2625 | } | |
2626 | } | |
2627 | } | |
2628 | ||
2629 | const pass_data pass_data_vsetvl = { | |
2630 | RTL_PASS, /* type */ | |
2631 | "vsetvl", /* name */ | |
2632 | OPTGROUP_NONE, /* optinfo_flags */ | |
2633 | TV_NONE, /* tv_id */ | |
2634 | 0, /* properties_required */ | |
2635 | 0, /* properties_provided */ | |
2636 | 0, /* properties_destroyed */ | |
2637 | 0, /* todo_flags_start */ | |
2638 | 0, /* todo_flags_finish */ | |
2639 | }; | |
2640 | ||
2641 | class pass_vsetvl : public rtl_opt_pass | |
2642 | { | |
2643 | private: | |
2644 | class vector_infos_manager *m_vector_manager; | |
2645 | ||
2646 | void simple_vsetvl (void) const; | |
2647 | void lazy_vsetvl (void); | |
2648 | ||
2649 | /* Phase 1. */ | |
2650 | void compute_local_backward_infos (const bb_info *); | |
2651 | ||
2652 | /* Phase 2. */ | |
2653 | bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const; | |
2654 | void transfer_before (vector_insn_info &, insn_info *) const; | |
2655 | void transfer_after (vector_insn_info &, insn_info *) const; | |
2656 | void emit_local_forward_vsetvls (const bb_info *); | |
2657 | ||
2658 | /* Phase 3. */ | |
4f673c5e JZZ |
2659 | enum fusion_type get_backward_fusion_type (const bb_info *, |
2660 | const vector_insn_info &); | |
6b6b9c68 | 2661 | bool hard_empty_block_p (const bb_info *, const vector_insn_info &) const; |
387cd9d3 JZZ |
2662 | bool backward_demand_fusion (void); |
2663 | bool forward_demand_fusion (void); | |
6b6b9c68 | 2664 | bool cleanup_illegal_dirty_blocks (void); |
387cd9d3 | 2665 | void demand_fusion (void); |
9243c3d1 JZZ |
2666 | |
2667 | /* Phase 4. */ | |
2668 | void prune_expressions (void); | |
2669 | void compute_local_properties (void); | |
6b6b9c68 | 2670 | bool can_refine_vsetvl_p (const basic_block, const vector_insn_info &) const; |
9243c3d1 JZZ |
2671 | void refine_vsetvls (void) const; |
2672 | void cleanup_vsetvls (void); | |
2673 | bool commit_vsetvls (void); | |
2674 | void pre_vsetvl (void); | |
2675 | ||
2676 | /* Phase 5. */ | |
2677 | void cleanup_insns (void) const; | |
2678 | ||
6b6b9c68 JZZ |
2679 | /* Phase 6. */ |
2680 | void propagate_avl (void) const; | |
2681 | ||
9243c3d1 JZZ |
2682 | void init (void); |
2683 | void done (void); | |
acc10c79 | 2684 | void compute_probabilities (void); |
9243c3d1 JZZ |
2685 | |
2686 | public: | |
2687 | pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {} | |
2688 | ||
2689 | /* opt_pass methods: */ | |
2690 | virtual bool gate (function *) final override { return TARGET_VECTOR; } | |
2691 | virtual unsigned int execute (function *) final override; | |
2692 | }; // class pass_vsetvl | |
2693 | ||
2694 | /* Simple m_vsetvl_insert vsetvl for optimize == 0. */ | |
2695 | void | |
2696 | pass_vsetvl::simple_vsetvl (void) const | |
2697 | { | |
2698 | if (dump_file) | |
2699 | fprintf (dump_file, | |
2700 | "\nEntering Simple VSETVL PASS and Handling %d basic blocks for " | |
2701 | "function:%s\n", | |
2702 | n_basic_blocks_for_fn (cfun), function_name (cfun)); | |
2703 | ||
2704 | basic_block cfg_bb; | |
2705 | rtx_insn *rinsn; | |
2706 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
2707 | { | |
2708 | FOR_BB_INSNS (cfg_bb, rinsn) | |
2709 | { | |
2710 | if (!NONDEBUG_INSN_P (rinsn)) | |
2711 | continue; | |
2712 | if (has_vtype_op (rinsn)) | |
2713 | { | |
2714 | const auto info | |
2715 | = m_vector_manager->vector_insn_infos[INSN_UID (rinsn)]; | |
2716 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info, | |
2717 | NULL_RTX, rinsn); | |
2718 | } | |
2719 | } | |
2720 | } | |
2721 | } | |
2722 | ||
2723 | /* Compute demanded information by backward data-flow analysis. */ | |
2724 | void | |
2725 | pass_vsetvl::compute_local_backward_infos (const bb_info *bb) | |
2726 | { | |
2727 | vector_insn_info change; | |
2728 | change.set_empty (); | |
2729 | ||
2730 | auto &block_info = m_vector_manager->vector_block_infos[bb->index ()]; | |
2731 | block_info.reaching_out = change; | |
2732 | ||
2733 | for (insn_info *insn : bb->reverse_real_nondebug_insns ()) | |
2734 | { | |
2735 | auto &info = m_vector_manager->vector_insn_infos[insn->uid ()]; | |
2736 | ||
2737 | if (info.uninit_p ()) | |
2738 | /* If it is uninitialized, propagate it directly. */ | |
2739 | info = change; | |
2740 | else if (info.unknown_p ()) | |
2741 | change = info; | |
2742 | else | |
2743 | { | |
2744 | gcc_assert (info.valid_p () && "Unexpected Invalid demanded info"); | |
ec99ffab JZZ |
2745 | if (change.valid_p ()) |
2746 | { | |
a481eed8 JZZ |
2747 | if (!(propagate_avl_across_demands_p (change, info) |
2748 | && !reg_available_p (insn, change)) | |
ec99ffab | 2749 | && change.compatible_p (info)) |
fdc5abbd | 2750 | { |
d51f2456 | 2751 | info = change.merge (info, LOCAL_MERGE); |
fdc5abbd JZ |
2752 | /* Fix PR109399, we should update user vsetvl instruction |
2753 | if there is a change in demand fusion. */ | |
2754 | if (vsetvl_insn_p (insn->rtl ())) | |
2755 | change_vsetvl_insn (insn, info); | |
2756 | } | |
ec99ffab | 2757 | } |
9243c3d1 JZZ |
2758 | change = info; |
2759 | } | |
2760 | } | |
2761 | ||
2762 | block_info.local_dem = change; | |
2763 | if (block_info.local_dem.empty_p ()) | |
2764 | block_info.reaching_out = block_info.local_dem; | |
2765 | } | |
2766 | ||
2767 | /* Return true if a dem_info is required to transition from curr_info to | |
2768 | require before INSN. */ | |
2769 | bool | |
2770 | pass_vsetvl::need_vsetvl (const vector_insn_info &require, | |
2771 | const vector_insn_info &curr_info) const | |
2772 | { | |
2773 | if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ()) | |
2774 | return true; | |
2775 | ||
a481eed8 | 2776 | if (require.compatible_p (static_cast<const vl_vtype_info &> (curr_info))) |
9243c3d1 JZZ |
2777 | return false; |
2778 | ||
2779 | return true; | |
2780 | } | |
2781 | ||
2782 | /* Given an incoming state reaching INSN, modifies that state so that it is | |
2783 | minimally compatible with INSN. The resulting state is guaranteed to be | |
2784 | semantically legal for INSN, but may not be the state requested by INSN. */ | |
2785 | void | |
2786 | pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const | |
2787 | { | |
2788 | if (!has_vtype_op (insn->rtl ())) | |
2789 | return; | |
2790 | ||
2791 | const vector_insn_info require | |
2792 | = m_vector_manager->vector_insn_infos[insn->uid ()]; | |
2793 | if (info.valid_p () && !need_vsetvl (require, info)) | |
2794 | return; | |
2795 | info = require; | |
2796 | } | |
2797 | ||
2798 | /* Given a state with which we evaluated insn (see transfer_before above for why | |
2799 | this might be different that the state insn requested), modify the state to | |
2800 | reflect the changes insn might make. */ | |
2801 | void | |
2802 | pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const | |
2803 | { | |
2804 | if (vector_config_insn_p (insn->rtl ())) | |
2805 | { | |
2806 | info = m_vector_manager->vector_insn_infos[insn->uid ()]; | |
2807 | return; | |
2808 | } | |
2809 | ||
60bd33bc JZZ |
2810 | if (fault_first_load_p (insn->rtl ()) |
2811 | && info.update_fault_first_load_avl (insn)) | |
2812 | return; | |
9243c3d1 JZZ |
2813 | |
2814 | /* If this is something that updates VL/VTYPE that we don't know about, set | |
2815 | the state to unknown. */ | |
2816 | if (insn->is_call () || insn->is_asm () | |
2817 | || find_access (insn->defs (), VL_REGNUM) | |
2818 | || find_access (insn->defs (), VTYPE_REGNUM)) | |
2819 | info = vector_insn_info::get_unknown (); | |
2820 | } | |
2821 | ||
2822 | /* Emit vsetvl within each block by forward data-flow analysis. */ | |
2823 | void | |
2824 | pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb) | |
2825 | { | |
2826 | auto &block_info = m_vector_manager->vector_block_infos[bb->index ()]; | |
2827 | if (block_info.local_dem.empty_p ()) | |
2828 | return; | |
2829 | ||
2830 | vector_insn_info curr_info; | |
2831 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
2832 | { | |
2833 | const vector_insn_info prev_info = curr_info; | |
a481eed8 | 2834 | enum vsetvl_type type = NUM_VSETVL_TYPE; |
9243c3d1 JZZ |
2835 | transfer_before (curr_info, insn); |
2836 | ||
2837 | if (has_vtype_op (insn->rtl ())) | |
2838 | { | |
2839 | if (static_cast<const vl_vtype_info &> (prev_info) | |
2840 | != static_cast<const vl_vtype_info &> (curr_info)) | |
2841 | { | |
2842 | const auto require | |
2843 | = m_vector_manager->vector_insn_infos[insn->uid ()]; | |
2844 | if (!require.compatible_p ( | |
2845 | static_cast<const vl_vtype_info &> (prev_info))) | |
a481eed8 JZZ |
2846 | type = insert_vsetvl (EMIT_BEFORE, insn->rtl (), require, |
2847 | prev_info); | |
9243c3d1 JZZ |
2848 | } |
2849 | } | |
2850 | ||
a481eed8 JZZ |
2851 | /* Fix the issue of following sequence: |
2852 | vsetivli zero, 5 | |
2853 | .... | |
2854 | vsetvli zero, zero | |
2855 | vmv.x.s (demand AVL = 8). | |
2856 | .... | |
2857 | incorrect: vsetvli zero, zero ===> Since the curr_info is AVL = 8. | |
2858 | correct: vsetivli zero, 8 | |
2859 | vadd (demand AVL = 8). */ | |
2860 | if (type == VSETVL_VTYPE_CHANGE_ONLY) | |
2861 | { | |
2862 | /* Update the curr_info to be real correct AVL. */ | |
2863 | curr_info.set_avl_info (prev_info.get_avl_info ()); | |
2864 | } | |
9243c3d1 JZZ |
2865 | transfer_after (curr_info, insn); |
2866 | } | |
2867 | ||
2868 | block_info.reaching_out = curr_info; | |
2869 | } | |
2870 | ||
4f673c5e JZZ |
2871 | enum fusion_type |
2872 | pass_vsetvl::get_backward_fusion_type (const bb_info *bb, | |
2873 | const vector_insn_info &prop) | |
9243c3d1 | 2874 | { |
4f673c5e | 2875 | insn_info *insn = prop.get_insn (); |
9243c3d1 | 2876 | |
4f673c5e JZZ |
2877 | /* TODO: We don't backward propagate the explict VSETVL here |
2878 | since we will change vsetvl and vsetvlmax intrinsics into | |
2879 | no side effects which can be optimized into optimal location | |
2880 | by GCC internal passes. We only need to support these backward | |
2881 | propagation if vsetvl intrinsics have side effects. */ | |
2882 | if (vsetvl_insn_p (insn->rtl ())) | |
2883 | return INVALID_FUSION; | |
2884 | ||
2885 | gcc_assert (has_vtype_op (insn->rtl ())); | |
2886 | rtx reg = NULL_RTX; | |
2887 | ||
2888 | /* Case 1: Don't need VL. Just let it backward propagate. */ | |
ec99ffab | 2889 | if (!prop.demand_p (DEMAND_AVL)) |
4f673c5e JZZ |
2890 | return VALID_AVL_FUSION; |
2891 | else | |
9243c3d1 | 2892 | { |
4f673c5e JZZ |
2893 | /* Case 2: CONST_INT AVL, we don't need to check def. */ |
2894 | if (prop.has_avl_imm ()) | |
2895 | return VALID_AVL_FUSION; | |
2896 | else | |
2897 | { | |
2898 | /* Case 3: REG AVL, we need to check the distance of def to make | |
2899 | sure we won't backward propagate over the def. */ | |
2900 | gcc_assert (prop.has_avl_reg ()); | |
2901 | if (vlmax_avl_p (prop.get_avl ())) | |
2902 | /* Check VL operand for vsetvl vl,zero. */ | |
ec99ffab | 2903 | reg = prop.get_avl_reg_rtx (); |
4f673c5e JZZ |
2904 | else |
2905 | /* Check AVL operand for vsetvl zero,avl. */ | |
ec99ffab | 2906 | reg = prop.get_avl (); |
4f673c5e JZZ |
2907 | } |
2908 | } | |
9243c3d1 | 2909 | |
4f673c5e | 2910 | gcc_assert (reg); |
ec99ffab JZZ |
2911 | if (!prop.get_avl_source ()->insn ()->is_phi () |
2912 | && prop.get_avl_source ()->insn ()->bb () == insn->bb ()) | |
6b6b9c68 JZZ |
2913 | return INVALID_FUSION; |
2914 | hash_set<set_info *> sets | |
2915 | = get_all_sets (prop.get_avl_source (), true, true, true); | |
2916 | if (any_set_in_bb_p (sets, insn->bb ())) | |
2917 | return INVALID_FUSION; | |
2918 | ||
2919 | if (vlmax_avl_p (prop.get_avl ())) | |
4f673c5e | 2920 | { |
6b6b9c68 | 2921 | if (find_reg_killed_by (bb, reg)) |
4f673c5e | 2922 | return INVALID_FUSION; |
6b6b9c68 JZZ |
2923 | else |
2924 | return VALID_AVL_FUSION; | |
4f673c5e | 2925 | } |
6b6b9c68 JZZ |
2926 | |
2927 | /* By default, we always enable backward fusion so that we can | |
2928 | gain more optimizations. */ | |
2929 | if (!find_reg_killed_by (bb, reg)) | |
2930 | return VALID_AVL_FUSION; | |
2931 | return KILLED_AVL_FUSION; | |
2932 | } | |
2933 | ||
2934 | /* We almost enable all cases in get_backward_fusion_type, this function | |
2935 | disable the backward fusion by changing dirty blocks into hard empty | |
2936 | blocks in forward dataflow. We can have more accurate optimization by | |
2937 | this method. */ | |
2938 | bool | |
2939 | pass_vsetvl::hard_empty_block_p (const bb_info *bb, | |
2940 | const vector_insn_info &info) const | |
2941 | { | |
2942 | if (!info.dirty_p () || !info.has_avl_reg ()) | |
2943 | return false; | |
2944 | ||
2945 | basic_block cfg_bb = bb->cfg_bb (); | |
2946 | sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index]; | |
ec99ffab JZZ |
2947 | set_info *set = info.get_avl_source (); |
2948 | rtx avl = gen_rtx_REG (Pmode, set->regno ()); | |
6b6b9c68 JZZ |
2949 | hash_set<set_info *> sets = get_all_sets (set, true, false, false); |
2950 | hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb); | |
2951 | ||
2952 | if (find_reg_killed_by (bb, avl)) | |
2953 | { | |
2954 | /* Condition 1: | |
2955 | Dirty block with killed AVL means that the empty block (no RVV | |
2956 | instructions) are polluted as Dirty blocks with the value of current | |
2957 | AVL is killed. For example: | |
2958 | bb 0: | |
2959 | ... | |
2960 | bb 1: | |
2961 | def a5 | |
2962 | bb 2: | |
2963 | RVV (use a5) | |
2964 | In backward dataflow, we will polluted BB0 and BB1 as Dirt with AVL | |
2965 | killed. since a5 is killed in BB1. | |
2966 | In this case, let's take a look at this example: | |
2967 | ||
2968 | bb 3: bb 4: | |
2969 | def3 a5 def4 a5 | |
2970 | bb 5: bb 6: | |
2971 | def1 a5 def2 a5 | |
2972 | \ / | |
2973 | \ / | |
2974 | \ / | |
2975 | \ / | |
2976 | bb 7: | |
2977 | RVV (use a5) | |
2978 | In thi case, we can polluted BB5 and BB6 as dirty if get-def | |
2979 | of a5 from RVV instruction in BB7 is the def1 in BB5 and | |
2980 | def2 BB6 so we can return false early here for HARD_EMPTY_BLOCK_P. | |
2981 | However, we are not sure whether BB3 and BB4 can be | |
2982 | polluted as Dirty with AVL killed so we can't return false | |
2983 | for HARD_EMPTY_BLOCK_P here since it's too early which will | |
2984 | potentially produce issues. */ | |
2985 | gcc_assert (info.dirty_with_killed_avl_p ()); | |
2986 | if (info.get_avl_source () | |
2987 | && get_same_bb_set (sets, bb->cfg_bb ()) == info.get_avl_source ()) | |
2988 | return false; | |
4f673c5e | 2989 | } |
9243c3d1 | 2990 | |
6b6b9c68 JZZ |
2991 | /* Condition 2: |
2992 | Suppress the VL/VTYPE info backward propagation too early: | |
2993 | ________ | |
2994 | | BB0 | | |
2995 | |________| | |
2996 | | | |
2997 | ____|____ | |
2998 | | BB1 | | |
2999 | |________| | |
3000 | In this case, suppose BB 1 has multiple predecessors, BB 0 is one | |
3001 | of them. BB1 has VL/VTYPE info (may be VALID or DIRTY) to backward | |
3002 | propagate. | |
3003 | The AVIN (available in) which is calculated by LCM is empty only | |
3004 | in these 2 circumstances: | |
3005 | 1. all predecessors of BB1 are empty (not VALID | |
3006 | and can not be polluted in backward fusion flow) | |
3007 | 2. VL/VTYPE info of BB1 predecessors are conflict. | |
3008 | ||
3009 | We keep it as dirty in 2nd circumstance and set it as HARD_EMPTY | |
3010 | (can not be polluted as DIRTY any more) in 1st circumstance. | |
3011 | We don't backward propagate in 1st circumstance since there is | |
3012 | no VALID RVV instruction and no polluted blocks (dirty blocks) | |
3013 | by backward propagation from other following blocks. | |
3014 | It's meaningless to keep it as Dirty anymore. | |
3015 | ||
3016 | However, since we keep it as dirty in 2nd since there are VALID or | |
3017 | Dirty blocks in predecessors, we can still gain the benefits and | |
3018 | optimization opportunities. For example, in this case: | |
3019 | for (size_t i = 0; i < n; i++) | |
3020 | { | |
3021 | if (i != cond) { | |
3022 | vint8mf8_t v = *(vint8mf8_t*)(in + i + 100); | |
3023 | *(vint8mf8_t*)(out + i + 100) = v; | |
3024 | } else { | |
3025 | vbool1_t v = *(vbool1_t*)(in + i + 400); | |
3026 | *(vbool1_t*)(out + i + 400) = v; | |
3027 | } | |
3028 | } | |
3029 | VL/VTYPE in if-else are conflict which will produce empty AVIN LCM result | |
3030 | but we can still keep dirty blocks if *(i != cond)* is very unlikely then | |
3031 | we can preset vsetvl (VL/VTYPE) info from else (static propability model). | |
3032 | ||
3033 | We don't want to backward propagate VL/VTYPE information too early | |
3034 | which is not the optimal and may potentially produce issues. */ | |
3035 | if (bitmap_empty_p (avin)) | |
4f673c5e | 3036 | { |
6b6b9c68 JZZ |
3037 | bool hard_empty_p = true; |
3038 | for (const basic_block pred_cfg_bb : pred_cfg_bbs) | |
3039 | { | |
3040 | if (pred_cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) | |
3041 | continue; | |
3042 | sbitmap avout = m_vector_manager->vector_avout[pred_cfg_bb->index]; | |
3043 | if (!bitmap_empty_p (avout)) | |
3044 | { | |
3045 | hard_empty_p = false; | |
3046 | break; | |
3047 | } | |
3048 | } | |
3049 | if (hard_empty_p) | |
3050 | return true; | |
4f673c5e | 3051 | } |
9243c3d1 | 3052 | |
6b6b9c68 JZZ |
3053 | edge e; |
3054 | edge_iterator ei; | |
3055 | bool has_avl_killed_insn_p = false; | |
3056 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
4f673c5e | 3057 | { |
6b6b9c68 JZZ |
3058 | const auto block_info |
3059 | = m_vector_manager->vector_block_infos[e->dest->index]; | |
3060 | if (block_info.local_dem.dirty_with_killed_avl_p ()) | |
9243c3d1 | 3061 | { |
6b6b9c68 JZZ |
3062 | has_avl_killed_insn_p = true; |
3063 | break; | |
3064 | } | |
3065 | } | |
3066 | if (!has_avl_killed_insn_p) | |
3067 | return false; | |
4f673c5e | 3068 | |
6b6b9c68 JZZ |
3069 | bool any_set_in_bbs_p = false; |
3070 | for (const basic_block pred_cfg_bb : pred_cfg_bbs) | |
3071 | { | |
3072 | insn_info *def_insn = extract_single_source (set); | |
3073 | if (def_insn) | |
3074 | { | |
3075 | /* Condition 3: | |
3076 | ||
3077 | Case 1: Case 2: | |
3078 | bb 0: bb 0: | |
3079 | def a5 101 ... | |
3080 | bb 1: bb 1: | |
3081 | ... ... | |
3082 | bb 2: bb 2: | |
3083 | RVV 1 (use a5 with TAIL ANY) ... | |
3084 | bb 3: bb 3: | |
3085 | def a5 101 def a5 101 | |
3086 | bb 4: bb 4: | |
3087 | ... ... | |
3088 | bb 5: bb 5: | |
3089 | RVV 2 (use a5 with TU) RVV 1 (use a5) | |
3090 | ||
3091 | Case 1: We can pollute BB3,BB2,BB1,BB0 are all Dirt blocks | |
3092 | with killed AVL so that we can merge TU demand info from RVV 2 | |
3093 | into RVV 1 and elide the vsevl instruction in BB5. | |
3094 | ||
3095 | TODO: We only optimize for single source def since multiple source | |
3096 | def is quite complicated. | |
3097 | ||
3098 | Case 2: We only can pollute bb 3 as dirty and it has been accepted | |
3099 | in Condition 2 and we can't pollute BB3,BB2,BB1,BB0 like case 1. */ | |
3100 | insn_info *last_killed_insn | |
3101 | = find_reg_killed_by (crtl->ssa->bb (pred_cfg_bb), avl); | |
3102 | if (!last_killed_insn || pred_cfg_bb == def_insn->bb ()->cfg_bb ()) | |
3103 | continue; | |
3104 | if (source_equal_p (last_killed_insn, def_insn)) | |
4f673c5e | 3105 | { |
6b6b9c68 JZZ |
3106 | any_set_in_bbs_p = true; |
3107 | break; | |
4f673c5e | 3108 | } |
9243c3d1 JZZ |
3109 | } |
3110 | else | |
4f673c5e | 3111 | { |
6b6b9c68 JZZ |
3112 | /* Condition 4: |
3113 | ||
3114 | bb 0: bb 1: bb 3: | |
3115 | def1 a5 def2 a5 ... | |
3116 | \ / / | |
3117 | \ / / | |
3118 | \ / / | |
3119 | \ / / | |
3120 | bb 4: / | |
3121 | | / | |
3122 | | / | |
3123 | bb 5: / | |
3124 | | / | |
3125 | | / | |
3126 | bb 6: / | |
3127 | | / | |
3128 | | / | |
3129 | bb 8: | |
3130 | RVV 1 (use a5) | |
3131 | If we get-def (REAL) of a5 from RVV 1 instruction, we will get | |
3132 | def1 from BB0 and def2 from BB1. So we will pollute BB6,BB5,BB4, | |
3133 | BB0,BB1 with DIRTY and set BB3 as HARD_EMPTY so that we won't | |
3134 | propagate AVL to BB3. */ | |
3135 | if (any_set_in_bb_p (sets, crtl->ssa->bb (pred_cfg_bb))) | |
3136 | { | |
3137 | any_set_in_bbs_p = true; | |
3138 | break; | |
3139 | } | |
4f673c5e | 3140 | } |
9243c3d1 | 3141 | } |
6b6b9c68 JZZ |
3142 | if (!any_set_in_bbs_p) |
3143 | return true; | |
3144 | return false; | |
9243c3d1 JZZ |
3145 | } |
3146 | ||
3147 | /* Compute global backward demanded info. */ | |
387cd9d3 JZZ |
3148 | bool |
3149 | pass_vsetvl::backward_demand_fusion (void) | |
9243c3d1 JZZ |
3150 | { |
3151 | /* We compute global infos by backward propagation. | |
3152 | We want to have better performance in these following cases: | |
3153 | ||
3154 | 1. for (size_t i = 0; i < n; i++) { | |
3155 | if (i != cond) { | |
3156 | vint8mf8_t v = *(vint8mf8_t*)(in + i + 100); | |
3157 | *(vint8mf8_t*)(out + i + 100) = v; | |
3158 | } else { | |
3159 | vbool1_t v = *(vbool1_t*)(in + i + 400); | |
3160 | *(vbool1_t*)(out + i + 400) = v; | |
3161 | } | |
3162 | } | |
3163 | ||
3164 | Since we don't have any RVV instruction in the BEFORE blocks, | |
3165 | LCM fails to optimize such case. We want to backward propagate | |
3166 | them into empty blocks so that we could have better performance | |
3167 | in LCM. | |
3168 | ||
3169 | 2. bb 0: | |
3170 | vsetvl e8,mf8 (demand RATIO) | |
3171 | bb 1: | |
3172 | vsetvl e32,mf2 (demand SEW and LMUL) | |
3173 | We backward propagate the first VSETVL into e32,mf2 so that we | |
3174 | could be able to eliminate the second VSETVL in LCM. */ | |
3175 | ||
387cd9d3 | 3176 | bool changed_p = false; |
9243c3d1 JZZ |
3177 | for (const bb_info *bb : crtl->ssa->reverse_bbs ()) |
3178 | { | |
3179 | basic_block cfg_bb = bb->cfg_bb (); | |
b9b251b7 JZZ |
3180 | const auto &curr_block_info |
3181 | = m_vector_manager->vector_block_infos[cfg_bb->index]; | |
3182 | const auto &prop = curr_block_info.local_dem; | |
9243c3d1 JZZ |
3183 | |
3184 | /* If there is nothing to propagate, just skip it. */ | |
3185 | if (!prop.valid_or_dirty_p ()) | |
3186 | continue; | |
3187 | ||
b9b251b7 | 3188 | if (!backward_propagate_worthwhile_p (cfg_bb, curr_block_info)) |
9243c3d1 JZZ |
3189 | continue; |
3190 | ||
d06e9264 JZ |
3191 | /* Fix PR108270: |
3192 | ||
3193 | bb 0 -> bb 1 | |
3194 | We don't need to backward fuse VL/VTYPE info from bb 1 to bb 0 | |
3195 | if bb 1 is not inside a loop and all predecessors of bb 0 are empty. */ | |
3196 | if (m_vector_manager->all_empty_predecessor_p (cfg_bb)) | |
3197 | continue; | |
3198 | ||
9243c3d1 JZZ |
3199 | edge e; |
3200 | edge_iterator ei; | |
3201 | /* Backward propagate to each predecessor. */ | |
3202 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
3203 | { | |
9243c3d1 JZZ |
3204 | auto &block_info |
3205 | = m_vector_manager->vector_block_infos[e->src->index]; | |
3206 | ||
3207 | /* We don't propagate through critical edges. */ | |
3208 | if (e->flags & EDGE_COMPLEX) | |
3209 | continue; | |
3210 | if (e->src->index == ENTRY_BLOCK_PTR_FOR_FN (cfun)->index) | |
3211 | continue; | |
44c918b5 JZZ |
3212 | /* If prop is demand of vsetvl instruction and reaching doesn't demand |
3213 | AVL. We don't backward propagate since vsetvl instruction has no | |
3214 | side effects. */ | |
3215 | if (vsetvl_insn_p (prop.get_insn ()->rtl ()) | |
3216 | && propagate_avl_across_demands_p (prop, block_info.reaching_out)) | |
3217 | continue; | |
9243c3d1 JZZ |
3218 | |
3219 | if (block_info.reaching_out.unknown_p ()) | |
3220 | continue; | |
6b6b9c68 JZZ |
3221 | else if (block_info.reaching_out.hard_empty_p ()) |
3222 | continue; | |
9243c3d1 JZZ |
3223 | else if (block_info.reaching_out.empty_p ()) |
3224 | { | |
4f673c5e JZZ |
3225 | enum fusion_type type |
3226 | = get_backward_fusion_type (crtl->ssa->bb (e->src), prop); | |
3227 | if (type == INVALID_FUSION) | |
9243c3d1 JZZ |
3228 | continue; |
3229 | ||
4f673c5e JZZ |
3230 | block_info.reaching_out = prop; |
3231 | block_info.reaching_out.set_dirty (type); | |
6b6b9c68 JZZ |
3232 | |
3233 | if (prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ())) | |
3234 | { | |
3235 | hash_set<set_info *> sets | |
3236 | = get_all_sets (prop.get_avl_source (), true, true, true); | |
3237 | set_info *set = get_same_bb_set (sets, e->src); | |
3238 | if (set) | |
3239 | block_info.reaching_out.set_avl_info ( | |
3240 | avl_info (prop.get_avl (), set)); | |
3241 | } | |
3242 | ||
4f673c5e JZZ |
3243 | block_info.local_dem = block_info.reaching_out; |
3244 | block_info.probability = curr_block_info.probability; | |
3245 | changed_p = true; | |
9243c3d1 JZZ |
3246 | } |
3247 | else if (block_info.reaching_out.dirty_p ()) | |
3248 | { | |
3249 | /* DIRTY -> DIRTY or VALID -> DIRTY. */ | |
3250 | vector_insn_info new_info; | |
3251 | ||
3252 | if (block_info.reaching_out.compatible_p (prop)) | |
3253 | { | |
ec99ffab | 3254 | if (block_info.reaching_out.available_p (prop)) |
9243c3d1 | 3255 | continue; |
4f673c5e | 3256 | new_info = block_info.reaching_out.merge (prop, GLOBAL_MERGE); |
6b6b9c68 JZZ |
3257 | new_info.set_dirty ( |
3258 | block_info.reaching_out.dirty_with_killed_avl_p ()); | |
3259 | block_info.probability += curr_block_info.probability; | |
9243c3d1 JZZ |
3260 | } |
3261 | else | |
3262 | { | |
4f673c5e JZZ |
3263 | if (curr_block_info.probability > block_info.probability) |
3264 | { | |
6b6b9c68 JZZ |
3265 | enum fusion_type type |
3266 | = get_backward_fusion_type (crtl->ssa->bb (e->src), | |
3267 | prop); | |
3268 | if (type == INVALID_FUSION) | |
3269 | continue; | |
4f673c5e | 3270 | new_info = prop; |
6b6b9c68 | 3271 | new_info.set_dirty (type); |
4f673c5e JZZ |
3272 | block_info.probability = curr_block_info.probability; |
3273 | } | |
9243c3d1 JZZ |
3274 | else |
3275 | continue; | |
3276 | } | |
3277 | ||
ec99ffab JZZ |
3278 | if (propagate_avl_across_demands_p (prop, |
3279 | block_info.reaching_out)) | |
3280 | { | |
3281 | rtx reg = new_info.get_avl_reg_rtx (); | |
3282 | if (find_reg_killed_by (crtl->ssa->bb (e->src), reg)) | |
3283 | new_info.set_dirty (true); | |
3284 | } | |
3285 | ||
9243c3d1 JZZ |
3286 | block_info.local_dem = new_info; |
3287 | block_info.reaching_out = new_info; | |
387cd9d3 | 3288 | changed_p = true; |
9243c3d1 JZZ |
3289 | } |
3290 | else | |
3291 | { | |
3292 | /* We not only change the info during backward propagation, | |
3293 | but also change the VSETVL instruction. */ | |
3294 | gcc_assert (block_info.reaching_out.valid_p ()); | |
6b6b9c68 JZZ |
3295 | hash_set<set_info *> sets |
3296 | = get_all_sets (prop.get_avl_source (), true, false, false); | |
3297 | set_info *set = get_same_bb_set (sets, e->src); | |
3298 | if (vsetvl_insn_p (block_info.reaching_out.get_insn ()->rtl ()) | |
3299 | && prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ())) | |
3300 | { | |
3301 | if (!block_info.reaching_out.same_vlmax_p (prop)) | |
3302 | continue; | |
3303 | if (block_info.reaching_out.same_vtype_p (prop)) | |
3304 | continue; | |
3305 | if (!set) | |
3306 | continue; | |
3307 | if (set->insn () != block_info.reaching_out.get_insn ()) | |
3308 | continue; | |
3309 | } | |
ec99ffab JZZ |
3310 | |
3311 | if (!block_info.reaching_out.compatible_p (prop)) | |
3312 | continue; | |
3313 | if (block_info.reaching_out.available_p (prop)) | |
3314 | continue; | |
9243c3d1 JZZ |
3315 | |
3316 | vector_insn_info be_merged = block_info.reaching_out; | |
3317 | if (block_info.local_dem == block_info.reaching_out) | |
3318 | be_merged = block_info.local_dem; | |
4f673c5e JZZ |
3319 | vector_insn_info new_info = be_merged.merge (prop, GLOBAL_MERGE); |
3320 | ||
3321 | if (curr_block_info.probability > block_info.probability) | |
3322 | block_info.probability = curr_block_info.probability; | |
9243c3d1 | 3323 | |
ec99ffab | 3324 | if (propagate_avl_across_demands_p (prop, block_info.reaching_out) |
a481eed8 JZZ |
3325 | && !reg_available_p (crtl->ssa->bb (e->src)->end_insn (), |
3326 | new_info)) | |
ec99ffab JZZ |
3327 | continue; |
3328 | ||
aef20243 | 3329 | change_vsetvl_insn (new_info.get_insn (), new_info); |
9243c3d1 JZZ |
3330 | if (block_info.local_dem == block_info.reaching_out) |
3331 | block_info.local_dem = new_info; | |
3332 | block_info.reaching_out = new_info; | |
387cd9d3 | 3333 | changed_p = true; |
9243c3d1 JZZ |
3334 | } |
3335 | } | |
3336 | } | |
387cd9d3 JZZ |
3337 | return changed_p; |
3338 | } | |
3339 | ||
3340 | /* Compute global forward demanded info. */ | |
3341 | bool | |
3342 | pass_vsetvl::forward_demand_fusion (void) | |
3343 | { | |
3344 | /* Enhance the global information propagation especially | |
3345 | backward propagation miss the propagation. | |
3346 | Consider such case: | |
3347 | ||
3348 | bb0 | |
3349 | (TU) | |
3350 | / \ | |
3351 | bb1 bb2 | |
3352 | (TU) (ANY) | |
3353 | existing edge -----> \ / (TU) <----- LCM create this edge. | |
3354 | bb3 | |
3355 | (TU) | |
3356 | ||
3357 | Base on the situation, LCM fails to eliminate the VSETVL instruction and | |
3358 | insert an edge from bb2 to bb3 since we can't backward propagate bb3 into | |
3359 | bb2. To avoid this confusing LCM result and non-optimal codegen, we should | |
3360 | forward propagate information from bb0 to bb2 which is friendly to LCM. */ | |
3361 | bool changed_p = false; | |
3362 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3363 | { | |
3364 | basic_block cfg_bb = bb->cfg_bb (); | |
3365 | const auto &prop | |
3366 | = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; | |
3367 | ||
3368 | /* If there is nothing to propagate, just skip it. */ | |
3369 | if (!prop.valid_or_dirty_p ()) | |
3370 | continue; | |
3371 | ||
00fb7698 JZZ |
3372 | if (cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
3373 | continue; | |
3374 | ||
ec99ffab JZZ |
3375 | if (vsetvl_insn_p (prop.get_insn ()->rtl ())) |
3376 | continue; | |
3377 | ||
387cd9d3 JZZ |
3378 | edge e; |
3379 | edge_iterator ei; | |
3380 | /* Forward propagate to each successor. */ | |
3381 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
3382 | { | |
3383 | auto &local_dem | |
3384 | = m_vector_manager->vector_block_infos[e->dest->index].local_dem; | |
3385 | auto &reaching_out | |
3386 | = m_vector_manager->vector_block_infos[e->dest->index].reaching_out; | |
3387 | ||
3388 | /* It's quite obvious, we don't need to propagate itself. */ | |
3389 | if (e->dest->index == cfg_bb->index) | |
3390 | continue; | |
00fb7698 JZZ |
3391 | /* We don't propagate through critical edges. */ |
3392 | if (e->flags & EDGE_COMPLEX) | |
3393 | continue; | |
3394 | if (e->dest->index == EXIT_BLOCK_PTR_FOR_FN (cfun)->index) | |
3395 | continue; | |
387cd9d3 JZZ |
3396 | |
3397 | /* If there is nothing to propagate, just skip it. */ | |
3398 | if (!local_dem.valid_or_dirty_p ()) | |
3399 | continue; | |
ec99ffab | 3400 | if (local_dem.available_p (prop)) |
4f673c5e JZZ |
3401 | continue; |
3402 | if (!local_dem.compatible_p (prop)) | |
3403 | continue; | |
ec99ffab JZZ |
3404 | if (propagate_avl_across_demands_p (prop, local_dem)) |
3405 | continue; | |
387cd9d3 | 3406 | |
4f673c5e JZZ |
3407 | vector_insn_info new_info = local_dem.merge (prop, GLOBAL_MERGE); |
3408 | new_info.set_insn (local_dem.get_insn ()); | |
3409 | if (local_dem.dirty_p ()) | |
387cd9d3 | 3410 | { |
4f673c5e | 3411 | gcc_assert (local_dem == reaching_out); |
6b6b9c68 | 3412 | new_info.set_dirty (local_dem.dirty_with_killed_avl_p ()); |
4f673c5e | 3413 | local_dem = new_info; |
4f673c5e JZZ |
3414 | reaching_out = local_dem; |
3415 | } | |
3416 | else | |
3417 | { | |
3418 | if (reaching_out == local_dem) | |
3419 | reaching_out = new_info; | |
3420 | local_dem = new_info; | |
3421 | change_vsetvl_insn (local_dem.get_insn (), new_info); | |
387cd9d3 | 3422 | } |
4f673c5e JZZ |
3423 | auto &prob |
3424 | = m_vector_manager->vector_block_infos[e->dest->index].probability; | |
3425 | auto &curr_prob | |
3426 | = m_vector_manager->vector_block_infos[cfg_bb->index].probability; | |
3427 | prob = curr_prob * e->probability; | |
3428 | changed_p = true; | |
387cd9d3 JZZ |
3429 | } |
3430 | } | |
3431 | return changed_p; | |
3432 | } | |
3433 | ||
3434 | void | |
3435 | pass_vsetvl::demand_fusion (void) | |
3436 | { | |
3437 | bool changed_p = true; | |
3438 | while (changed_p) | |
3439 | { | |
3440 | changed_p = false; | |
4f673c5e JZZ |
3441 | /* To optimize the case like this: |
3442 | void f2 (int8_t * restrict in, int8_t * restrict out, int n, int cond) | |
3443 | { | |
3444 | size_t vl = 101; | |
3445 | ||
3446 | for (size_t i = 0; i < n; i++) | |
3447 | { | |
3448 | vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl); | |
3449 | __riscv_vse8_v_i8mf8 (out + i + 300, v, vl); | |
3450 | } | |
3451 | ||
3452 | for (size_t i = 0; i < n; i++) | |
3453 | { | |
3454 | vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl); | |
3455 | __riscv_vse8_v_i8mf8 (out + i, v, vl); | |
3456 | ||
3457 | vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl); | |
3458 | __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl); | |
3459 | } | |
3460 | } | |
3461 | ||
3462 | bb 0: li a5, 101 (killed avl) | |
3463 | ... | |
3464 | bb 1: vsetvli zero, a5, ta | |
3465 | ... | |
3466 | bb 2: li a5, 101 (killed avl) | |
3467 | ... | |
3468 | bb 3: vsetvli zero, a3, tu | |
3469 | ||
3470 | We want to fuse VSEVLI instructions on bb 1 and bb 3. However, there is | |
3471 | an AVL kill instruction in bb 2 that we can't backward fuse bb 3 or | |
3472 | forward bb 1 arbitrarily. We need available information of each block to | |
3473 | help for such cases. */ | |
6b6b9c68 JZZ |
3474 | changed_p |= backward_demand_fusion (); |
3475 | changed_p |= forward_demand_fusion (); | |
3476 | } | |
3477 | ||
3478 | changed_p = true; | |
3479 | while (changed_p) | |
3480 | { | |
3481 | changed_p = false; | |
3482 | prune_expressions (); | |
3483 | m_vector_manager->create_bitmap_vectors (); | |
3484 | compute_local_properties (); | |
4f673c5e JZZ |
3485 | compute_available (m_vector_manager->vector_comp, |
3486 | m_vector_manager->vector_kill, | |
3487 | m_vector_manager->vector_avout, | |
3488 | m_vector_manager->vector_avin); | |
6b6b9c68 | 3489 | changed_p |= cleanup_illegal_dirty_blocks (); |
4f673c5e JZZ |
3490 | m_vector_manager->free_bitmap_vectors (); |
3491 | if (!m_vector_manager->vector_exprs.is_empty ()) | |
3492 | m_vector_manager->vector_exprs.release (); | |
387cd9d3 | 3493 | } |
9243c3d1 JZZ |
3494 | |
3495 | if (dump_file) | |
3496 | { | |
3497 | fprintf (dump_file, "\n\nDirty blocks list: "); | |
681a5632 JZZ |
3498 | for (const bb_info *bb : crtl->ssa->bbs ()) |
3499 | if (m_vector_manager->vector_block_infos[bb->index ()] | |
3500 | .reaching_out.dirty_p ()) | |
3501 | fprintf (dump_file, "%d ", bb->index ()); | |
9243c3d1 JZZ |
3502 | fprintf (dump_file, "\n\n"); |
3503 | } | |
3504 | } | |
3505 | ||
6b6b9c68 JZZ |
3506 | /* Cleanup illegal dirty blocks. */ |
3507 | bool | |
3508 | pass_vsetvl::cleanup_illegal_dirty_blocks (void) | |
3509 | { | |
3510 | bool changed_p = false; | |
3511 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3512 | { | |
3513 | basic_block cfg_bb = bb->cfg_bb (); | |
3514 | const auto &prop | |
3515 | = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; | |
3516 | ||
3517 | /* If there is nothing to cleanup, just skip it. */ | |
3518 | if (!prop.valid_or_dirty_p ()) | |
3519 | continue; | |
3520 | ||
3521 | if (hard_empty_block_p (bb, prop)) | |
3522 | { | |
3523 | m_vector_manager->vector_block_infos[cfg_bb->index].local_dem | |
3524 | = vector_insn_info::get_hard_empty (); | |
3525 | m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out | |
3526 | = vector_insn_info::get_hard_empty (); | |
3527 | changed_p = true; | |
3528 | continue; | |
3529 | } | |
3530 | } | |
3531 | return changed_p; | |
3532 | } | |
3533 | ||
9243c3d1 JZZ |
3534 | /* Assemble the candidates expressions for LCM. */ |
3535 | void | |
3536 | pass_vsetvl::prune_expressions (void) | |
3537 | { | |
681a5632 | 3538 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 3539 | { |
681a5632 JZZ |
3540 | if (m_vector_manager->vector_block_infos[bb->index ()] |
3541 | .local_dem.valid_or_dirty_p ()) | |
9243c3d1 | 3542 | m_vector_manager->create_expr ( |
681a5632 JZZ |
3543 | m_vector_manager->vector_block_infos[bb->index ()].local_dem); |
3544 | if (m_vector_manager->vector_block_infos[bb->index ()] | |
9243c3d1 JZZ |
3545 | .reaching_out.valid_or_dirty_p ()) |
3546 | m_vector_manager->create_expr ( | |
681a5632 | 3547 | m_vector_manager->vector_block_infos[bb->index ()].reaching_out); |
9243c3d1 JZZ |
3548 | } |
3549 | ||
3550 | if (dump_file) | |
3551 | { | |
3552 | fprintf (dump_file, "\nThe total VSETVL expression num = %d\n", | |
3553 | m_vector_manager->vector_exprs.length ()); | |
3554 | fprintf (dump_file, "Expression List:\n"); | |
3555 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
3556 | { | |
3557 | fprintf (dump_file, "Expr[%ld]:\n", i); | |
3558 | m_vector_manager->vector_exprs[i]->dump (dump_file); | |
3559 | fprintf (dump_file, "\n"); | |
3560 | } | |
3561 | } | |
3562 | } | |
3563 | ||
4f673c5e JZZ |
3564 | /* Compute the local properties of each recorded expression. |
3565 | ||
3566 | Local properties are those that are defined by the block, irrespective of | |
3567 | other blocks. | |
3568 | ||
3569 | An expression is transparent in a block if its operands are not modified | |
3570 | in the block. | |
3571 | ||
3572 | An expression is computed (locally available) in a block if it is computed | |
3573 | at least once and expression would contain the same value if the | |
3574 | computation was moved to the end of the block. | |
3575 | ||
3576 | An expression is locally anticipatable in a block if it is computed at | |
3577 | least once and expression would contain the same value if the computation | |
3578 | was moved to the beginning of the block. */ | |
9243c3d1 JZZ |
3579 | void |
3580 | pass_vsetvl::compute_local_properties (void) | |
3581 | { | |
3582 | /* - If T is locally available at the end of a block, then T' must be | |
3583 | available at the end of the same block. Since some optimization has | |
3584 | occurred earlier, T' might not be locally available, however, it must | |
3585 | have been previously computed on all paths. As a formula, T at AVLOC(B) | |
3586 | implies that T' at AVOUT(B). | |
3587 | An "available occurrence" is one that is the last occurrence in the | |
3588 | basic block and the operands are not modified by following statements in | |
3589 | the basic block [including this insn]. | |
3590 | ||
3591 | - If T is locally anticipated at the beginning of a block, then either | |
3592 | T', is locally anticipated or it is already available from previous | |
3593 | blocks. As a formula, this means that T at ANTLOC(B) implies that T' at | |
3594 | ANTLOC(B) at AVIN(B). | |
3595 | An "anticipatable occurrence" is one that is the first occurrence in the | |
3596 | basic block, the operands are not modified in the basic block prior | |
3597 | to the occurrence and the output is not used between the start of | |
3598 | the block and the occurrence. */ | |
3599 | ||
3600 | basic_block cfg_bb; | |
4f673c5e | 3601 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 3602 | { |
4f673c5e | 3603 | unsigned int curr_bb_idx = bb->index (); |
9243c3d1 JZZ |
3604 | const auto local_dem |
3605 | = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem; | |
3606 | const auto reaching_out | |
3607 | = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out; | |
3608 | ||
4f673c5e JZZ |
3609 | /* Compute transparent. */ |
3610 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
9243c3d1 | 3611 | { |
4f673c5e JZZ |
3612 | const vector_insn_info *expr = m_vector_manager->vector_exprs[i]; |
3613 | if (local_dem.real_dirty_p () || local_dem.valid_p () | |
3614 | || local_dem.unknown_p () | |
3615 | || has_vsetvl_killed_avl_p (bb, local_dem)) | |
9243c3d1 | 3616 | bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i); |
4f673c5e JZZ |
3617 | /* FIXME: Here we set the block as non-transparent (killed) if there |
3618 | is an instruction killed the value of AVL according to the | |
3619 | definition of Local transparent. This is true for such following | |
3620 | case: | |
3621 | ||
3622 | bb 0 (Loop label): | |
3623 | vsetvl zero, a5, e8, mf8 | |
3624 | bb 1: | |
3625 | def a5 | |
3626 | bb 2: | |
3627 | branch bb 0 (Loop label). | |
3628 | ||
3629 | In this case, we known there is a loop bb 0->bb 1->bb 2. According | |
3630 | to LCM definition, it is correct when we set vsetvl zero, a5, e8, | |
3631 | mf8 as non-transparent (killed) so that LCM will not hoist outside | |
3632 | the bb 0. | |
3633 | ||
3634 | However, such conservative configuration will forbid optimization | |
3635 | on some unlucky case. For example: | |
3636 | ||
3637 | bb 0: | |
3638 | li a5, 101 | |
3639 | bb 1: | |
3640 | vsetvl zero, a5, e8, mf8 | |
3641 | bb 2: | |
3642 | li a5, 101 | |
3643 | bb 3: | |
3644 | vsetvl zero, a5, e8, mf8. | |
3645 | So we also relax def a5 as transparent to gain more optimizations | |
3646 | as long as the all real def insn of avl do not come from this | |
3647 | block. This configuration may be still missing some optimization | |
3648 | opportunities. */ | |
6b6b9c68 | 3649 | if (find_reg_killed_by (bb, expr->get_avl ())) |
4f673c5e | 3650 | { |
6b6b9c68 JZZ |
3651 | hash_set<set_info *> sets |
3652 | = get_all_sets (expr->get_avl_source (), true, false, false); | |
3653 | if (any_set_in_bb_p (sets, bb)) | |
4f673c5e JZZ |
3654 | bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], |
3655 | i); | |
3656 | } | |
9243c3d1 JZZ |
3657 | } |
3658 | ||
4f673c5e | 3659 | /* Compute anticipatable occurrences. */ |
6b6b9c68 JZZ |
3660 | if (local_dem.valid_p () || local_dem.real_dirty_p () |
3661 | || (has_vsetvl_killed_avl_p (bb, local_dem) | |
3662 | && vlmax_avl_p (local_dem.get_avl ()))) | |
4f673c5e JZZ |
3663 | if (anticipatable_occurrence_p (bb, local_dem)) |
3664 | bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx], | |
3665 | m_vector_manager->get_expr_id (local_dem)); | |
9243c3d1 | 3666 | |
4f673c5e | 3667 | /* Compute available occurrences. */ |
9243c3d1 JZZ |
3668 | if (reaching_out.valid_or_dirty_p ()) |
3669 | { | |
9243c3d1 JZZ |
3670 | auto_vec<size_t> available_list |
3671 | = m_vector_manager->get_all_available_exprs (reaching_out); | |
3672 | for (size_t i = 0; i < available_list.length (); i++) | |
4f673c5e JZZ |
3673 | { |
3674 | const vector_insn_info *expr | |
3675 | = m_vector_manager->vector_exprs[available_list[i]]; | |
3676 | if (reaching_out.real_dirty_p () | |
3677 | || has_vsetvl_killed_avl_p (bb, reaching_out) | |
3678 | || available_occurrence_p (bb, *expr)) | |
3679 | bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx], | |
3680 | available_list[i]); | |
3681 | } | |
9243c3d1 JZZ |
3682 | } |
3683 | } | |
3684 | ||
3685 | /* Compute kill for each basic block using: | |
3686 | ||
3687 | ~(TRANSP | COMP) | |
3688 | */ | |
3689 | ||
3690 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3691 | { | |
3692 | bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index], | |
3693 | m_vector_manager->vector_transp[cfg_bb->index], | |
3694 | m_vector_manager->vector_comp[cfg_bb->index]); | |
3695 | bitmap_not (m_vector_manager->vector_kill[cfg_bb->index], | |
3696 | m_vector_manager->vector_kill[cfg_bb->index]); | |
3697 | } | |
3698 | ||
3699 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3700 | { | |
3701 | edge e; | |
3702 | edge_iterator ei; | |
3703 | ||
3704 | /* If the current block is the destination of an abnormal edge, we | |
3705 | kill all trapping (for PRE) and memory (for hoist) expressions | |
3706 | because we won't be able to properly place the instruction on | |
3707 | the edge. So make them neither anticipatable nor transparent. | |
3708 | This is fairly conservative. | |
3709 | ||
3710 | ??? For hoisting it may be necessary to check for set-and-jump | |
3711 | instructions here, not just for abnormal edges. The general problem | |
3712 | is that when an expression cannot not be placed right at the end of | |
3713 | a basic block we should account for any side-effects of a subsequent | |
3714 | jump instructions that could clobber the expression. It would | |
3715 | be best to implement this check along the lines of | |
3716 | should_hoist_expr_to_dom where the target block is already known | |
3717 | and, hence, there's no need to conservatively prune expressions on | |
3718 | "intermediate" set-and-jump instructions. */ | |
3719 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
3720 | if (e->flags & EDGE_COMPLEX) | |
3721 | { | |
3722 | bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]); | |
3723 | bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]); | |
3724 | } | |
3725 | } | |
3726 | } | |
3727 | ||
3728 | /* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */ | |
3729 | bool | |
6b6b9c68 JZZ |
3730 | pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb, |
3731 | const vector_insn_info &info) const | |
9243c3d1 JZZ |
3732 | { |
3733 | if (!m_vector_manager->all_same_ratio_p ( | |
3734 | m_vector_manager->vector_avin[cfg_bb->index])) | |
3735 | return false; | |
3736 | ||
005fad9d JZZ |
3737 | if (!m_vector_manager->all_same_avl_p ( |
3738 | cfg_bb, m_vector_manager->vector_avin[cfg_bb->index])) | |
3739 | return false; | |
3740 | ||
9243c3d1 JZZ |
3741 | size_t expr_id |
3742 | = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]); | |
6b6b9c68 JZZ |
3743 | if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info)) |
3744 | return false; | |
3745 | if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info)) | |
9243c3d1 JZZ |
3746 | return false; |
3747 | ||
3748 | edge e; | |
3749 | edge_iterator ei; | |
3750 | bool all_valid_p = true; | |
3751 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
3752 | { | |
3753 | if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index])) | |
3754 | { | |
3755 | all_valid_p = false; | |
3756 | break; | |
3757 | } | |
3758 | } | |
3759 | ||
3760 | if (!all_valid_p) | |
3761 | return false; | |
3762 | return true; | |
3763 | } | |
3764 | ||
3765 | /* Optimize athe case like this: | |
3766 | ||
3767 | bb 0: | |
3768 | vsetvl 0 a5,zero,e8,mf8 | |
3769 | insn 0 (demand SEW + LMUL) | |
3770 | bb 1: | |
3771 | vsetvl 1 a5,zero,e16,mf4 | |
3772 | insn 1 (demand SEW + LMUL) | |
3773 | ||
3774 | In this case, we should be able to refine | |
3775 | vsetvl 1 into vsetvl zero, zero according AVIN. */ | |
3776 | void | |
3777 | pass_vsetvl::refine_vsetvls (void) const | |
3778 | { | |
3779 | basic_block cfg_bb; | |
3780 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3781 | { | |
3782 | auto info = m_vector_manager->vector_block_infos[cfg_bb->index].local_dem; | |
3783 | insn_info *insn = info.get_insn (); | |
3784 | if (!info.valid_p ()) | |
3785 | continue; | |
3786 | ||
3787 | rtx_insn *rinsn = insn->rtl (); | |
6b6b9c68 | 3788 | if (!can_refine_vsetvl_p (cfg_bb, info)) |
9243c3d1 JZZ |
3789 | continue; |
3790 | ||
ec99ffab JZZ |
3791 | /* We can't refine user vsetvl into vsetvl zero,zero since the dest |
3792 | will be used by the following instructions. */ | |
3793 | if (vector_config_insn_p (rinsn)) | |
3794 | { | |
3795 | m_vector_manager->to_refine_vsetvls.add (rinsn); | |
3796 | continue; | |
3797 | } | |
ff8f9544 JZ |
3798 | |
3799 | /* If all incoming edges to a block have a vector state that is compatbile | |
3800 | with the block. In such a case we need not emit a vsetvl in the current | |
3801 | block. */ | |
3802 | ||
3803 | gcc_assert (has_vtype_op (insn->rtl ())); | |
3804 | rinsn = PREV_INSN (insn->rtl ()); | |
3805 | gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ()))); | |
3806 | if (m_vector_manager->all_avail_in_compatible_p (cfg_bb)) | |
3807 | { | |
3808 | size_t id = m_vector_manager->get_expr_id (info); | |
3809 | if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id)) | |
3810 | continue; | |
3811 | eliminate_insn (rinsn); | |
3812 | } | |
3813 | else | |
3814 | { | |
3815 | rtx new_pat | |
3816 | = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX); | |
3817 | change_insn (rinsn, new_pat); | |
3818 | } | |
9243c3d1 JZZ |
3819 | } |
3820 | } | |
3821 | ||
3822 | void | |
3823 | pass_vsetvl::cleanup_vsetvls () | |
3824 | { | |
3825 | basic_block cfg_bb; | |
3826 | FOR_EACH_BB_FN (cfg_bb, cfun) | |
3827 | { | |
3828 | auto &info | |
3829 | = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; | |
3830 | gcc_assert (m_vector_manager->expr_set_num ( | |
3831 | m_vector_manager->vector_del[cfg_bb->index]) | |
3832 | <= 1); | |
3833 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
3834 | { | |
3835 | if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i)) | |
3836 | { | |
3837 | if (info.dirty_p ()) | |
3838 | info.set_unknown (); | |
3839 | else | |
3840 | { | |
4f673c5e JZZ |
3841 | const auto dem |
3842 | = m_vector_manager->vector_block_infos[cfg_bb->index] | |
3843 | .local_dem; | |
3844 | gcc_assert (dem == *m_vector_manager->vector_exprs[i]); | |
3845 | insn_info *insn = dem.get_insn (); | |
9243c3d1 JZZ |
3846 | gcc_assert (insn && insn->rtl ()); |
3847 | rtx_insn *rinsn; | |
ec99ffab JZZ |
3848 | /* We can't eliminate user vsetvl since the dest will be used |
3849 | * by the following instructions. */ | |
9243c3d1 | 3850 | if (vector_config_insn_p (insn->rtl ())) |
9243c3d1 | 3851 | { |
ec99ffab JZZ |
3852 | m_vector_manager->to_delete_vsetvls.add (insn->rtl ()); |
3853 | continue; | |
9243c3d1 | 3854 | } |
ec99ffab JZZ |
3855 | |
3856 | gcc_assert (has_vtype_op (insn->rtl ())); | |
3857 | rinsn = PREV_INSN (insn->rtl ()); | |
3858 | gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ()))); | |
9243c3d1 JZZ |
3859 | eliminate_insn (rinsn); |
3860 | } | |
3861 | } | |
3862 | } | |
3863 | } | |
3864 | } | |
3865 | ||
3866 | bool | |
3867 | pass_vsetvl::commit_vsetvls (void) | |
3868 | { | |
3869 | bool need_commit = false; | |
3870 | ||
3871 | for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++) | |
3872 | { | |
3873 | for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) | |
3874 | { | |
3875 | edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed); | |
3876 | if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i)) | |
3877 | { | |
3878 | const vector_insn_info *require | |
3879 | = m_vector_manager->vector_exprs[i]; | |
3880 | gcc_assert (require->valid_or_dirty_p ()); | |
3881 | rtl_profile_for_edge (eg); | |
3882 | start_sequence (); | |
3883 | ||
3884 | insn_info *insn = require->get_insn (); | |
3885 | vector_insn_info prev_info = vector_insn_info (); | |
005fad9d JZZ |
3886 | sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index]; |
3887 | if (m_vector_manager->all_same_ratio_p (bitdata) | |
3888 | && m_vector_manager->all_same_avl_p (eg->dest, bitdata)) | |
9243c3d1 | 3889 | { |
005fad9d | 3890 | size_t first = bitmap_first_set_bit (bitdata); |
9243c3d1 JZZ |
3891 | prev_info = *m_vector_manager->vector_exprs[first]; |
3892 | } | |
3893 | ||
3894 | insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info); | |
3895 | rtx_insn *rinsn = get_insns (); | |
3896 | end_sequence (); | |
3897 | default_rtl_profile (); | |
3898 | ||
3899 | /* We should not get an abnormal edge here. */ | |
3900 | gcc_assert (!(eg->flags & EDGE_ABNORMAL)); | |
3901 | need_commit = true; | |
3902 | insert_insn_on_edge (rinsn, eg); | |
3903 | } | |
3904 | } | |
3905 | } | |
3906 | ||
4f673c5e | 3907 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 3908 | { |
4f673c5e | 3909 | basic_block cfg_bb = bb->cfg_bb (); |
9243c3d1 JZZ |
3910 | const auto reaching_out |
3911 | = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; | |
3912 | if (!reaching_out.dirty_p ()) | |
3913 | continue; | |
3914 | ||
4f673c5e JZZ |
3915 | if (reaching_out.dirty_with_killed_avl_p ()) |
3916 | { | |
3917 | if (!has_vsetvl_killed_avl_p (bb, reaching_out)) | |
3918 | continue; | |
3919 | ||
3920 | unsigned int bb_index; | |
3921 | sbitmap_iterator sbi; | |
3922 | sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index]; | |
3923 | bool available_p = false; | |
3924 | EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi) | |
3925 | { | |
ec99ffab JZZ |
3926 | if (m_vector_manager->vector_exprs[bb_index]->available_p ( |
3927 | reaching_out)) | |
4f673c5e JZZ |
3928 | { |
3929 | available_p = true; | |
3930 | break; | |
3931 | } | |
3932 | } | |
3933 | if (available_p) | |
3934 | continue; | |
3935 | } | |
7ae4d1df JZZ |
3936 | |
3937 | rtx new_pat; | |
ec99ffab JZZ |
3938 | if (!reaching_out.demand_p (DEMAND_AVL)) |
3939 | { | |
3940 | vl_vtype_info new_info = reaching_out; | |
3941 | new_info.set_avl_info (avl_info (const0_rtx, nullptr)); | |
3942 | new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX); | |
3943 | } | |
3944 | else if (can_refine_vsetvl_p (cfg_bb, reaching_out)) | |
9243c3d1 JZZ |
3945 | new_pat |
3946 | = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX); | |
7ae4d1df JZZ |
3947 | else if (vlmax_avl_p (reaching_out.get_avl ())) |
3948 | new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out, | |
ec99ffab | 3949 | reaching_out.get_avl_reg_rtx ()); |
7ae4d1df JZZ |
3950 | else |
3951 | new_pat | |
3952 | = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX); | |
9243c3d1 JZZ |
3953 | |
3954 | start_sequence (); | |
3955 | emit_insn (new_pat); | |
3956 | rtx_insn *rinsn = get_insns (); | |
3957 | end_sequence (); | |
3958 | insert_insn_end_basic_block (rinsn, cfg_bb); | |
3959 | if (dump_file) | |
3960 | { | |
3961 | fprintf (dump_file, | |
3962 | "\nInsert vsetvl insn %d at the end of <bb %d>:\n", | |
3963 | INSN_UID (rinsn), cfg_bb->index); | |
3964 | print_rtl_single (dump_file, rinsn); | |
3965 | } | |
3966 | } | |
3967 | ||
3968 | return need_commit; | |
3969 | } | |
3970 | ||
3971 | void | |
3972 | pass_vsetvl::pre_vsetvl (void) | |
3973 | { | |
3974 | /* Compute entity list. */ | |
3975 | prune_expressions (); | |
3976 | ||
cfe3fbc6 | 3977 | m_vector_manager->create_bitmap_vectors (); |
9243c3d1 JZZ |
3978 | compute_local_properties (); |
3979 | m_vector_manager->vector_edge_list = pre_edge_lcm_avs ( | |
3980 | m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp, | |
3981 | m_vector_manager->vector_comp, m_vector_manager->vector_antic, | |
3982 | m_vector_manager->vector_kill, m_vector_manager->vector_avin, | |
3983 | m_vector_manager->vector_avout, &m_vector_manager->vector_insert, | |
3984 | &m_vector_manager->vector_del); | |
3985 | ||
3986 | /* We should dump the information before CFG is changed. Otherwise it will | |
3987 | produce ICE (internal compiler error). */ | |
3988 | if (dump_file) | |
3989 | m_vector_manager->dump (dump_file); | |
3990 | ||
3991 | refine_vsetvls (); | |
3992 | cleanup_vsetvls (); | |
3993 | bool need_commit = commit_vsetvls (); | |
3994 | if (need_commit) | |
3995 | commit_edge_insertions (); | |
3996 | } | |
3997 | ||
c5a1fa59 JZ |
3998 | /* Before VSETVL PASS, RVV instructions pattern is depending on AVL operand |
3999 | implicitly. Since we will emit VSETVL instruction and make RVV instructions | |
4000 | depending on VL/VTYPE global status registers, we remove the such AVL operand | |
4001 | in the RVV instructions pattern here in order to remove AVL dependencies when | |
4002 | AVL operand is a register operand. | |
4003 | ||
4004 | Before the VSETVL PASS: | |
4005 | li a5,32 | |
4006 | ... | |
4007 | vadd.vv (..., a5) | |
4008 | After the VSETVL PASS: | |
4009 | li a5,32 | |
4010 | vsetvli zero, a5, ... | |
4011 | ... | |
4012 | vadd.vv (..., const_int 0). */ | |
9243c3d1 JZZ |
4013 | void |
4014 | pass_vsetvl::cleanup_insns (void) const | |
4015 | { | |
4016 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4017 | { | |
4018 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
4019 | { | |
4020 | rtx_insn *rinsn = insn->rtl (); | |
d51f2456 JZ |
4021 | const auto &dem = m_vector_manager->vector_insn_infos[insn->uid ()]; |
4022 | /* Eliminate local vsetvl: | |
4023 | bb 0: | |
4024 | vsetvl a5,a6,... | |
4025 | vsetvl zero,a5. | |
4026 | ||
4027 | Eliminate vsetvl in bb2 when a5 is only coming from | |
4028 | bb 0. */ | |
4029 | local_eliminate_vsetvl_insn (dem); | |
9243c3d1 JZZ |
4030 | |
4031 | if (vlmax_avl_insn_p (rinsn)) | |
4032 | { | |
4033 | eliminate_insn (rinsn); | |
4034 | continue; | |
4035 | } | |
4036 | ||
4037 | /* Erase the AVL operand from the instruction. */ | |
4038 | if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn))) | |
4039 | continue; | |
4040 | rtx avl = get_vl (rinsn); | |
a2d12abe | 4041 | if (count_regno_occurrences (rinsn, REGNO (avl)) == 1) |
9243c3d1 JZZ |
4042 | { |
4043 | /* Get the list of uses for the new instruction. */ | |
4044 | auto attempt = crtl->ssa->new_change_attempt (); | |
4045 | insn_change change (insn); | |
4046 | /* Remove the use of the substituted value. */ | |
4047 | access_array_builder uses_builder (attempt); | |
4048 | uses_builder.reserve (insn->num_uses () - 1); | |
4049 | for (use_info *use : insn->uses ()) | |
4050 | if (use != find_access (insn->uses (), REGNO (avl))) | |
4051 | uses_builder.quick_push (use); | |
4052 | use_array new_uses = use_array (uses_builder.finish ()); | |
4053 | change.new_uses = new_uses; | |
4054 | change.move_range = insn->ebb ()->insn_range (); | |
60bd33bc JZZ |
4055 | rtx pat; |
4056 | if (fault_first_load_p (rinsn)) | |
4057 | pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx); | |
4058 | else | |
4059 | { | |
4060 | rtx set = single_set (rinsn); | |
4061 | rtx src | |
4062 | = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx); | |
4063 | pat = gen_rtx_SET (SET_DEST (set), src); | |
4064 | } | |
9243c3d1 JZZ |
4065 | gcc_assert (change_insn (crtl->ssa, change, insn, pat)); |
4066 | } | |
4067 | } | |
4068 | } | |
4069 | } | |
4070 | ||
6b6b9c68 JZZ |
4071 | void |
4072 | pass_vsetvl::propagate_avl (void) const | |
4073 | { | |
4074 | /* Rebuild the RTL_SSA according to the new CFG generated by LCM. */ | |
4075 | /* Finalization of RTL_SSA. */ | |
4076 | free_dominance_info (CDI_DOMINATORS); | |
4077 | if (crtl->ssa->perform_pending_updates ()) | |
4078 | cleanup_cfg (0); | |
4079 | delete crtl->ssa; | |
4080 | crtl->ssa = nullptr; | |
4081 | /* Initialization of RTL_SSA. */ | |
4082 | calculate_dominance_info (CDI_DOMINATORS); | |
4083 | df_analyze (); | |
4084 | crtl->ssa = new function_info (cfun); | |
4085 | ||
4086 | hash_set<rtx_insn *> to_delete; | |
4087 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4088 | { | |
4089 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
4090 | { | |
4091 | if (vsetvl_discard_result_insn_p (insn->rtl ())) | |
4092 | { | |
4093 | rtx avl = get_avl (insn->rtl ()); | |
4094 | if (!REG_P (avl)) | |
4095 | continue; | |
4096 | ||
4097 | set_info *set = find_access (insn->uses (), REGNO (avl))->def (); | |
4098 | insn_info *def_insn = extract_single_source (set); | |
4099 | if (!def_insn) | |
4100 | continue; | |
4101 | ||
4102 | /* Handle this case: | |
4103 | vsetvli a6,zero,e32,m1,ta,mu | |
4104 | li a5,4096 | |
4105 | add a7,a0,a5 | |
4106 | addi a7,a7,-96 | |
4107 | vsetvli t1,zero,e8,mf8,ta,ma | |
4108 | vle8.v v24,0(a7) | |
4109 | add a5,a3,a5 | |
4110 | addi a5,a5,-96 | |
4111 | vse8.v v24,0(a5) | |
4112 | vsetvli zero,a6,e32,m1,tu,ma | |
4113 | */ | |
4114 | if (vsetvl_insn_p (def_insn->rtl ())) | |
4115 | { | |
4116 | vl_vtype_info def_info = get_vl_vtype_info (def_insn); | |
4117 | vl_vtype_info info = get_vl_vtype_info (insn); | |
4118 | rtx avl = get_avl (def_insn->rtl ()); | |
4119 | rtx vl = get_vl (def_insn->rtl ()); | |
4120 | if (def_info.get_ratio () == info.get_ratio ()) | |
4121 | { | |
4122 | if (vlmax_avl_p (def_info.get_avl ())) | |
4123 | { | |
4124 | info.set_avl_info ( | |
4125 | avl_info (def_info.get_avl (), nullptr)); | |
4126 | rtx new_pat | |
4127 | = gen_vsetvl_pat (VSETVL_NORMAL, info, vl); | |
4128 | validate_change (insn->rtl (), | |
4129 | &PATTERN (insn->rtl ()), new_pat, | |
4130 | false); | |
4131 | continue; | |
4132 | } | |
4133 | if (def_info.has_avl_imm () || rtx_equal_p (avl, vl)) | |
4134 | { | |
4135 | info.set_avl_info (avl_info (avl, nullptr)); | |
4136 | emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_AFTER, | |
4137 | info, NULL_RTX, insn->rtl ()); | |
4138 | if (set->single_nondebug_insn_use ()) | |
4139 | { | |
4140 | to_delete.add (insn->rtl ()); | |
4141 | to_delete.add (def_insn->rtl ()); | |
4142 | } | |
4143 | continue; | |
4144 | } | |
4145 | } | |
4146 | } | |
4147 | } | |
4148 | ||
4149 | /* Change vsetvl rd, rs1 --> vsevl zero, rs1, | |
4150 | if rd is not used by any nondebug instructions. | |
4151 | Even though this PASS runs after RA and it doesn't help for | |
4152 | reduce register pressure, it can help instructions scheduling | |
4153 | since we remove the dependencies. */ | |
4154 | if (vsetvl_insn_p (insn->rtl ())) | |
4155 | { | |
4156 | rtx vl = get_vl (insn->rtl ()); | |
4157 | rtx avl = get_avl (insn->rtl ()); | |
6b6b9c68 JZZ |
4158 | def_info *def = find_access (insn->defs (), REGNO (vl)); |
4159 | set_info *set = safe_dyn_cast<set_info *> (def); | |
ec99ffab JZZ |
4160 | vector_insn_info info; |
4161 | info.parse_insn (insn); | |
6b6b9c68 | 4162 | gcc_assert (set); |
ec99ffab JZZ |
4163 | if (m_vector_manager->to_delete_vsetvls.contains (insn->rtl ())) |
4164 | { | |
4165 | m_vector_manager->to_delete_vsetvls.remove (insn->rtl ()); | |
4166 | if (m_vector_manager->to_refine_vsetvls.contains ( | |
4167 | insn->rtl ())) | |
4168 | m_vector_manager->to_refine_vsetvls.remove (insn->rtl ()); | |
4169 | if (!set->has_nondebug_insn_uses ()) | |
4170 | { | |
4171 | to_delete.add (insn->rtl ()); | |
4172 | continue; | |
4173 | } | |
4174 | } | |
4175 | if (m_vector_manager->to_refine_vsetvls.contains (insn->rtl ())) | |
4176 | { | |
4177 | m_vector_manager->to_refine_vsetvls.remove (insn->rtl ()); | |
4178 | if (!set->has_nondebug_insn_uses ()) | |
4179 | { | |
4180 | rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, | |
4181 | info, NULL_RTX); | |
4182 | change_insn (insn->rtl (), new_pat); | |
4183 | continue; | |
4184 | } | |
4185 | } | |
4186 | if (vlmax_avl_p (avl)) | |
4187 | continue; | |
6b6b9c68 JZZ |
4188 | rtx new_pat |
4189 | = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX); | |
4190 | if (!set->has_nondebug_insn_uses ()) | |
4191 | { | |
4192 | validate_change (insn->rtl (), &PATTERN (insn->rtl ()), | |
4193 | new_pat, false); | |
4194 | continue; | |
4195 | } | |
4196 | } | |
4197 | } | |
4198 | } | |
4199 | ||
4200 | for (rtx_insn *rinsn : to_delete) | |
4201 | eliminate_insn (rinsn); | |
4202 | } | |
4203 | ||
9243c3d1 JZZ |
4204 | void |
4205 | pass_vsetvl::init (void) | |
4206 | { | |
4207 | if (optimize > 0) | |
4208 | { | |
4209 | /* Initialization of RTL_SSA. */ | |
4210 | calculate_dominance_info (CDI_DOMINATORS); | |
4211 | df_analyze (); | |
4212 | crtl->ssa = new function_info (cfun); | |
4213 | } | |
4214 | ||
4215 | m_vector_manager = new vector_infos_manager (); | |
4f673c5e | 4216 | compute_probabilities (); |
9243c3d1 JZZ |
4217 | |
4218 | if (dump_file) | |
4219 | { | |
4220 | fprintf (dump_file, "\nPrologue: Initialize vector infos\n"); | |
4221 | m_vector_manager->dump (dump_file); | |
4222 | } | |
4223 | } | |
4224 | ||
4225 | void | |
4226 | pass_vsetvl::done (void) | |
4227 | { | |
4228 | if (optimize > 0) | |
4229 | { | |
4230 | /* Finalization of RTL_SSA. */ | |
4231 | free_dominance_info (CDI_DOMINATORS); | |
4232 | if (crtl->ssa->perform_pending_updates ()) | |
4233 | cleanup_cfg (0); | |
4234 | delete crtl->ssa; | |
4235 | crtl->ssa = nullptr; | |
4236 | } | |
4237 | m_vector_manager->release (); | |
4238 | delete m_vector_manager; | |
4239 | m_vector_manager = nullptr; | |
4240 | } | |
4241 | ||
acc10c79 JZZ |
4242 | /* Compute probability for each block. */ |
4243 | void | |
4244 | pass_vsetvl::compute_probabilities (void) | |
4245 | { | |
4246 | /* Don't compute it in -O0 since we don't need it. */ | |
4247 | if (!optimize) | |
4248 | return; | |
4249 | edge e; | |
4250 | edge_iterator ei; | |
4251 | ||
4252 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4253 | { | |
4254 | basic_block cfg_bb = bb->cfg_bb (); | |
4255 | auto &curr_prob | |
4256 | = m_vector_manager->vector_block_infos[cfg_bb->index].probability; | |
c129d22d JZZ |
4257 | |
4258 | /* GCC assume entry block (bb 0) are always so | |
4259 | executed so set its probability as "always". */ | |
acc10c79 JZZ |
4260 | if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) |
4261 | curr_prob = profile_probability::always (); | |
c129d22d JZZ |
4262 | /* Exit block (bb 1) is the block we don't need to process. */ |
4263 | if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) | |
4264 | continue; | |
4265 | ||
acc10c79 JZZ |
4266 | gcc_assert (curr_prob.initialized_p ()); |
4267 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
4268 | { | |
4269 | auto &new_prob | |
4270 | = m_vector_manager->vector_block_infos[e->dest->index].probability; | |
4271 | if (!new_prob.initialized_p ()) | |
4272 | new_prob = curr_prob * e->probability; | |
4273 | else if (new_prob == profile_probability::always ()) | |
4274 | continue; | |
4275 | else | |
4276 | new_prob += curr_prob * e->probability; | |
4277 | } | |
4278 | } | |
acc10c79 JZZ |
4279 | } |
4280 | ||
9243c3d1 JZZ |
4281 | /* Lazy vsetvl insertion for optimize > 0. */ |
4282 | void | |
4283 | pass_vsetvl::lazy_vsetvl (void) | |
4284 | { | |
4285 | if (dump_file) | |
4286 | fprintf (dump_file, | |
4287 | "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for " | |
4288 | "function:%s\n", | |
4289 | n_basic_blocks_for_fn (cfun), function_name (cfun)); | |
4290 | ||
4291 | /* Phase 1 - Compute the local dems within each block. | |
4292 | The data-flow analysis within each block is backward analysis. */ | |
4293 | if (dump_file) | |
4294 | fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n"); | |
4295 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4296 | compute_local_backward_infos (bb); | |
4297 | if (dump_file) | |
4298 | m_vector_manager->dump (dump_file); | |
4299 | ||
4300 | /* Phase 2 - Emit vsetvl instructions within each basic block according to | |
4301 | demand, compute and save ANTLOC && AVLOC of each block. */ | |
4302 | if (dump_file) | |
4303 | fprintf (dump_file, | |
4304 | "\nPhase 2: Emit vsetvl instruction within each block\n"); | |
4305 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
4306 | emit_local_forward_vsetvls (bb); | |
4307 | if (dump_file) | |
4308 | m_vector_manager->dump (dump_file); | |
4309 | ||
4310 | /* Phase 3 - Propagate demanded info across blocks. */ | |
4311 | if (dump_file) | |
4312 | fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n"); | |
387cd9d3 | 4313 | demand_fusion (); |
9243c3d1 JZZ |
4314 | if (dump_file) |
4315 | m_vector_manager->dump (dump_file); | |
4316 | ||
4317 | /* Phase 4 - Lazy code motion. */ | |
4318 | if (dump_file) | |
4319 | fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n"); | |
4320 | pre_vsetvl (); | |
4321 | ||
4322 | /* Phase 5 - Cleanup AVL && VL operand of RVV instruction. */ | |
4323 | if (dump_file) | |
4324 | fprintf (dump_file, "\nPhase 5: Cleanup AVL and VL operands\n"); | |
4325 | cleanup_insns (); | |
6b6b9c68 JZZ |
4326 | |
4327 | /* Phase 6 - Rebuild RTL_SSA to propagate AVL between vsetvls. */ | |
4328 | if (dump_file) | |
4329 | fprintf (dump_file, | |
4330 | "\nPhase 6: Rebuild RTL_SSA to propagate AVL between vsetvls\n"); | |
4331 | propagate_avl (); | |
9243c3d1 JZZ |
4332 | } |
4333 | ||
4334 | /* Main entry point for this pass. */ | |
4335 | unsigned int | |
4336 | pass_vsetvl::execute (function *) | |
4337 | { | |
4338 | if (n_basic_blocks_for_fn (cfun) <= 0) | |
4339 | return 0; | |
4340 | ||
ca8fb009 JZZ |
4341 | /* The RVV instruction may change after split which is not a stable |
4342 | instruction. We need to split it here to avoid potential issue | |
4343 | since the VSETVL PASS is insert before split PASS. */ | |
4344 | split_all_insns (); | |
9243c3d1 JZZ |
4345 | |
4346 | /* Early return for there is no vector instructions. */ | |
4347 | if (!has_vector_insn (cfun)) | |
4348 | return 0; | |
4349 | ||
4350 | init (); | |
4351 | ||
4352 | if (!optimize) | |
4353 | simple_vsetvl (); | |
4354 | else | |
4355 | lazy_vsetvl (); | |
4356 | ||
4357 | done (); | |
4358 | return 0; | |
4359 | } | |
4360 | ||
4361 | rtl_opt_pass * | |
4362 | make_pass_vsetvl (gcc::context *ctxt) | |
4363 | { | |
4364 | return new pass_vsetvl (ctxt); | |
4365 | } |