]>
Commit | Line | Data |
---|---|---|
9243c3d1 | 1 | /* VSETVL pass for RISC-V 'V' Extension for GNU compiler. |
a945c346 | 2 | Copyright (C) 2022-2024 Free Software Foundation, Inc. |
9243c3d1 JZZ |
3 | Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or(at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
29331e72 LD |
21 | /* The values of the vl and vtype registers will affect the behavior of RVV |
22 | insns. That is, when we need to execute an RVV instruction, we need to set | |
23 | the correct vl and vtype values by executing the vsetvl instruction before. | |
24 | Executing the fewest number of vsetvl instructions while keeping the behavior | |
25 | the same is the problem this pass is trying to solve. This vsetvl pass is | |
26 | divided into 5 phases: | |
27 | ||
28 | - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses | |
29 | each instruction in it that affects vl and vtype state and generates an | |
30 | array of vsetvl_info objects. Then traverse the vsetvl_info array from | |
31 | front to back and perform fusion according to the fusion rules. The fused | |
32 | vsetvl infos are stored in the vsetvl_block_info object's `infos` field. | |
33 | ||
34 | - Phase 2 (earliest fuse global vsetvl infos): The header_info and | |
35 | footer_info of vsetvl_block_info are used as expressions, and the | |
36 | earliest of each expression is computed. Based on the earliest | |
37 | information, try to lift up the corresponding vsetvl info to the src | |
38 | basic block of the edge (mainly to reduce the total number of vsetvl | |
39 | instructions, this uplift will cause some execution paths to execute | |
40 | vsetvl instructions that shouldn't be there). | |
41 | ||
42 | - Phase 3 (pre global vsetvl info): The header_info and footer_info of | |
43 | vsetvl_block_info are used as expressions, and the LCM algorithm is used | |
44 | to compute the header_info that needs to be deleted and the one that | |
45 | needs to be inserted in some edges. | |
46 | ||
47 | - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and | |
48 | the deletion and insertion information of Phase 3, the mandatory vsetvl | |
49 | instruction insertion, modification and deletion are performed. | |
50 | ||
51 | - Phase 5 (cleanup): Clean up the avl operand in the RVV operator | |
52 | instruction and cleanup the unused dest operand of the vsetvl insn. | |
53 | ||
54 | After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual | |
55 | basic block is represented by vsetvl_block_info, and the virtual vsetvl | |
56 | statements inside are represented by vsetvl_info. The later phases 2 and 3 | |
57 | are constantly modifying and adjusting this virtual CFG. Phase 4 performs | |
58 | insertion, modification and deletion of vsetvl instructions based on the | |
59 | optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to | |
60 | the RTL. | |
61 | */ | |
9243c3d1 JZZ |
62 | |
63 | #define IN_TARGET_CODE 1 | |
64 | #define INCLUDE_ALGORITHM | |
65 | #define INCLUDE_FUNCTIONAL | |
66 | ||
67 | #include "config.h" | |
68 | #include "system.h" | |
69 | #include "coretypes.h" | |
70 | #include "tm.h" | |
71 | #include "backend.h" | |
72 | #include "rtl.h" | |
73 | #include "target.h" | |
74 | #include "tree-pass.h" | |
75 | #include "df.h" | |
76 | #include "rtl-ssa.h" | |
77 | #include "cfgcleanup.h" | |
78 | #include "insn-config.h" | |
79 | #include "insn-attr.h" | |
80 | #include "insn-opinit.h" | |
81 | #include "tm-constrs.h" | |
82 | #include "cfgrtl.h" | |
83 | #include "cfganal.h" | |
84 | #include "lcm.h" | |
85 | #include "predict.h" | |
86 | #include "profile-count.h" | |
a3ad2301 | 87 | #include "gcse.h" |
4a0a8dc1 | 88 | #include "cfgloop.h" |
9243c3d1 JZZ |
89 | |
90 | using namespace rtl_ssa; | |
91 | using namespace riscv_vector; | |
92 | ||
29331e72 LD |
93 | /* Set the bitmap DST to the union of SRC of predecessors of |
94 | basic block B. | |
95 | It's a bit different from bitmap_union_of_preds in cfganal.cc. This function | |
96 | takes into account the case where pred is ENTRY basic block. The main reason | |
97 | for this difference is to make it easier to insert some special value into | |
d83070ae | 98 | the ENTRY base block. For example, vsetvl_info with a status of UNKNOWN. */ |
29331e72 LD |
99 | static void |
100 | bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b) | |
101 | { | |
102 | unsigned int set_size = dst->size; | |
103 | edge e; | |
104 | unsigned ix; | |
105 | ||
106 | for (ix = 0; ix < EDGE_COUNT (b->preds); ix++) | |
107 | { | |
108 | e = EDGE_PRED (b, ix); | |
109 | bitmap_copy (dst, src[e->src->index]); | |
110 | break; | |
111 | } | |
ec99ffab | 112 | |
29331e72 LD |
113 | if (ix == EDGE_COUNT (b->preds)) |
114 | bitmap_clear (dst); | |
115 | else | |
116 | for (ix++; ix < EDGE_COUNT (b->preds); ix++) | |
117 | { | |
118 | unsigned int i; | |
119 | SBITMAP_ELT_TYPE *p, *r; | |
120 | ||
121 | e = EDGE_PRED (b, ix); | |
122 | p = src[e->src->index]->elms; | |
123 | r = dst->elms; | |
124 | for (i = 0; i < set_size; i++) | |
125 | *r++ |= *p++; | |
126 | } | |
127 | } | |
128 | ||
d83070ae KC |
129 | /* Compute the reaching definition in and out based on the gen and KILL |
130 | information's in each Base Blocks. | |
131 | This function references the compute_available implementation in lcm.cc */ | |
29331e72 LD |
132 | static void |
133 | compute_reaching_defintion (sbitmap *gen, sbitmap *kill, sbitmap *in, | |
134 | sbitmap *out) | |
9243c3d1 | 135 | { |
29331e72 LD |
136 | edge e; |
137 | basic_block *worklist, *qin, *qout, *qend, bb; | |
138 | unsigned int qlen; | |
139 | edge_iterator ei; | |
140 | ||
141 | /* Allocate a worklist array/queue. Entries are only added to the | |
142 | list if they were not already on the list. So the size is | |
143 | bounded by the number of basic blocks. */ | |
144 | qin = qout = worklist | |
145 | = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); | |
146 | ||
147 | /* Put every block on the worklist; this is necessary because of the | |
148 | optimistic initialization of AVOUT above. Use reverse postorder | |
149 | to make the forward dataflow problem require less iterations. */ | |
150 | int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); | |
151 | int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false); | |
152 | for (int i = 0; i < n; ++i) | |
153 | { | |
154 | bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); | |
155 | *qin++ = bb; | |
156 | bb->aux = bb; | |
157 | } | |
158 | free (rpo); | |
159 | ||
160 | qin = worklist; | |
161 | qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; | |
162 | qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; | |
163 | ||
164 | /* Mark blocks which are successors of the entry block so that we | |
165 | can easily identify them below. */ | |
166 | FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) | |
167 | e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
168 | ||
169 | /* Iterate until the worklist is empty. */ | |
170 | while (qlen) | |
171 | { | |
172 | /* Take the first entry off the worklist. */ | |
173 | bb = *qout++; | |
174 | qlen--; | |
175 | ||
176 | if (qout >= qend) | |
177 | qout = worklist; | |
178 | ||
179 | /* Do not clear the aux field for blocks which are successors of the | |
180 | ENTRY block. That way we never add then to the worklist again. */ | |
181 | if (bb->aux != ENTRY_BLOCK_PTR_FOR_FN (cfun)) | |
182 | bb->aux = NULL; | |
183 | ||
184 | bitmap_union_of_preds_with_entry (in[bb->index], out, bb); | |
185 | ||
186 | if (bitmap_ior_and_compl (out[bb->index], gen[bb->index], in[bb->index], | |
187 | kill[bb->index])) | |
188 | /* If the out state of this block changed, then we need | |
189 | to add the successors of this block to the worklist | |
190 | if they are not already on the worklist. */ | |
191 | FOR_EACH_EDGE (e, ei, bb->succs) | |
192 | if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
193 | { | |
194 | *qin++ = e->dest; | |
195 | e->dest->aux = e; | |
196 | qlen++; | |
197 | ||
198 | if (qin >= qend) | |
199 | qin = worklist; | |
200 | } | |
201 | } | |
202 | ||
203 | clear_aux_for_edges (); | |
204 | clear_aux_for_blocks (); | |
205 | free (worklist); | |
9243c3d1 JZZ |
206 | } |
207 | ||
29331e72 LD |
208 | /* Classification of vsetvl instruction. */ |
209 | enum vsetvl_type | |
9243c3d1 | 210 | { |
29331e72 LD |
211 | VSETVL_NORMAL, |
212 | VSETVL_VTYPE_CHANGE_ONLY, | |
213 | VSETVL_DISCARD_RESULT, | |
214 | NUM_VSETVL_TYPE | |
215 | }; | |
9243c3d1 | 216 | |
29331e72 | 217 | enum emit_type |
9243c3d1 | 218 | { |
29331e72 LD |
219 | /* emit_insn directly. */ |
220 | EMIT_DIRECT, | |
221 | EMIT_BEFORE, | |
222 | EMIT_AFTER, | |
223 | }; | |
224 | ||
225 | /* dump helper functions */ | |
226 | static const char * | |
227 | vlmul_to_str (vlmul_type vlmul) | |
228 | { | |
229 | switch (vlmul) | |
230 | { | |
231 | case LMUL_1: | |
232 | return "m1"; | |
233 | case LMUL_2: | |
234 | return "m2"; | |
235 | case LMUL_4: | |
236 | return "m4"; | |
237 | case LMUL_8: | |
238 | return "m8"; | |
239 | case LMUL_RESERVED: | |
240 | return "INVALID LMUL"; | |
241 | case LMUL_F8: | |
242 | return "mf8"; | |
243 | case LMUL_F4: | |
244 | return "mf4"; | |
245 | case LMUL_F2: | |
246 | return "mf2"; | |
247 | ||
248 | default: | |
249 | gcc_unreachable (); | |
250 | } | |
9243c3d1 JZZ |
251 | } |
252 | ||
29331e72 LD |
253 | static const char * |
254 | policy_to_str (bool agnostic_p) | |
9243c3d1 | 255 | { |
29331e72 | 256 | return agnostic_p ? "agnostic" : "undisturbed"; |
9243c3d1 JZZ |
257 | } |
258 | ||
9243c3d1 JZZ |
259 | /* Return true if it is an RVV instruction depends on VTYPE global |
260 | status register. */ | |
261 | static bool | |
262 | has_vtype_op (rtx_insn *rinsn) | |
263 | { | |
264 | return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn); | |
265 | } | |
266 | ||
ec99ffab JZZ |
267 | /* Return true if the instruction ignores VLMUL field of VTYPE. */ |
268 | static bool | |
269 | ignore_vlmul_insn_p (rtx_insn *rinsn) | |
270 | { | |
271 | return get_attr_type (rinsn) == TYPE_VIMOVVX | |
272 | || get_attr_type (rinsn) == TYPE_VFMOVVF | |
273 | || get_attr_type (rinsn) == TYPE_VIMOVXV | |
274 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
275 | } | |
276 | ||
277 | /* Return true if the instruction is scalar move instruction. */ | |
278 | static bool | |
279 | scalar_move_insn_p (rtx_insn *rinsn) | |
280 | { | |
281 | return get_attr_type (rinsn) == TYPE_VIMOVXV | |
282 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
283 | } | |
284 | ||
60bd33bc JZZ |
285 | /* Return true if the instruction is fault first load instruction. */ |
286 | static bool | |
287 | fault_first_load_p (rtx_insn *rinsn) | |
288 | { | |
6313b045 JZZ |
289 | return recog_memoized (rinsn) >= 0 |
290 | && (get_attr_type (rinsn) == TYPE_VLDFF | |
291 | || get_attr_type (rinsn) == TYPE_VLSEGDFF); | |
60bd33bc JZZ |
292 | } |
293 | ||
294 | /* Return true if the instruction is read vl instruction. */ | |
295 | static bool | |
296 | read_vl_insn_p (rtx_insn *rinsn) | |
297 | { | |
298 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL; | |
299 | } | |
300 | ||
9243c3d1 JZZ |
301 | /* Return true if it is a vsetvl instruction. */ |
302 | static bool | |
303 | vector_config_insn_p (rtx_insn *rinsn) | |
304 | { | |
305 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL; | |
306 | } | |
307 | ||
308 | /* Return true if it is vsetvldi or vsetvlsi. */ | |
309 | static bool | |
310 | vsetvl_insn_p (rtx_insn *rinsn) | |
311 | { | |
29331e72 | 312 | if (!rinsn || !vector_config_insn_p (rinsn)) |
6b6b9c68 | 313 | return false; |
85112fbb | 314 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi |
6b6b9c68 JZZ |
315 | || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi); |
316 | } | |
317 | ||
22622a5a JZ |
318 | /* Return true if it is the bogus vsetvl_pre instruction: |
319 | ||
320 | (define_insn "@vlmax_avl<mode>" | |
321 | [(set (match_operand:P 0 "register_operand" "=r") | |
322 | (unspec:P [(match_operand:P 1 "const_int_operand" "i")] UNSPEC_VLMAX))] | |
323 | "TARGET_VECTOR" | |
324 | "" | |
325 | [(set_attr "type" "vsetvl_pre")]) | |
326 | ||
327 | As described above, it's the bogus instruction which doesn't any assembler | |
328 | and should be removed eventually. It's used for occupying a scalar register | |
329 | for VLMAX avl RVV instruction before register allocation. | |
330 | ||
331 | Before RA: | |
332 | ||
333 | ... | |
334 | vsetvl_pre (set r136) | |
335 | vadd.vv (use r136 with VLMAX avl) | |
336 | ... | |
337 | ||
338 | After RA: | |
339 | ||
340 | ... | |
341 | vsetvl_pre (set a5) | |
342 | vadd.vv (use r136 with VLMAX avl) | |
343 | ... | |
344 | ||
345 | VSETVL PASS: | |
346 | ||
347 | ... | |
348 | vsetvl_pre (set a5) ---> removed. | |
349 | vsetvl a5,zero,... ---> Inserted. | |
350 | vadd.vv | |
351 | ... | |
352 | */ | |
353 | static bool | |
354 | vsetvl_pre_insn_p (rtx_insn *rinsn) | |
355 | { | |
356 | return recog_memoized (rinsn) >= 0 | |
357 | && get_attr_type (rinsn) == TYPE_VSETVL_PRE; | |
358 | } | |
359 | ||
6b6b9c68 JZZ |
360 | /* Return true if it is vsetvl zero, rs1. */ |
361 | static bool | |
362 | vsetvl_discard_result_insn_p (rtx_insn *rinsn) | |
363 | { | |
364 | if (!vector_config_insn_p (rinsn)) | |
365 | return false; | |
366 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi | |
367 | || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi); | |
9243c3d1 JZZ |
368 | } |
369 | ||
9243c3d1 | 370 | static bool |
4f673c5e | 371 | real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb) |
9243c3d1 | 372 | { |
4f673c5e | 373 | return insn != nullptr && insn->is_real () && insn->bb () == bb; |
9243c3d1 JZZ |
374 | } |
375 | ||
29331e72 | 376 | /* Helper function to get VL operand for VLMAX insn. */ |
6b6b9c68 JZZ |
377 | static rtx |
378 | get_vl (rtx_insn *rinsn) | |
379 | { | |
380 | if (has_vl_op (rinsn)) | |
381 | { | |
382 | extract_insn_cached (rinsn); | |
383 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
384 | } | |
385 | return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0)); | |
4f673c5e JZZ |
386 | } |
387 | ||
6b6b9c68 JZZ |
388 | /* Helper function to get AVL operand. */ |
389 | static rtx | |
390 | get_avl (rtx_insn *rinsn) | |
391 | { | |
392 | if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn)) | |
393 | return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0); | |
394 | ||
395 | if (!has_vl_op (rinsn)) | |
396 | return NULL_RTX; | |
5e714992 | 397 | if (vlmax_avl_type_p (rinsn)) |
6b6b9c68 JZZ |
398 | return RVV_VLMAX; |
399 | extract_insn_cached (rinsn); | |
400 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
401 | } | |
402 | ||
9243c3d1 JZZ |
403 | /* Get default mask policy. */ |
404 | static bool | |
405 | get_default_ma () | |
406 | { | |
407 | /* For the instruction that doesn't require MA, we still need a default value | |
408 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
409 | return (bool) (get_prefer_mask_policy () & 0x1 | |
410 | || (get_prefer_mask_policy () >> 1 & 0x1)); | |
411 | } | |
412 | ||
9243c3d1 JZZ |
413 | /* Helper function to get MA operand. */ |
414 | static bool | |
415 | mask_agnostic_p (rtx_insn *rinsn) | |
416 | { | |
417 | /* If it doesn't have MA, we return agnostic by default. */ | |
418 | extract_insn_cached (rinsn); | |
419 | int ma = get_attr_ma (rinsn); | |
420 | return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma); | |
421 | } | |
422 | ||
423 | /* Return true if FN has a vector instruction that use VL/VTYPE. */ | |
424 | static bool | |
425 | has_vector_insn (function *fn) | |
426 | { | |
427 | basic_block cfg_bb; | |
428 | rtx_insn *rinsn; | |
429 | FOR_ALL_BB_FN (cfg_bb, fn) | |
430 | FOR_BB_INSNS (cfg_bb, rinsn) | |
431 | if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn)) | |
432 | return true; | |
433 | return false; | |
434 | } | |
435 | ||
29331e72 LD |
436 | static vlmul_type |
437 | calculate_vlmul (unsigned int sew, unsigned int ratio) | |
9243c3d1 | 438 | { |
29331e72 LD |
439 | const vlmul_type ALL_LMUL[] |
440 | = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2}; | |
441 | for (const vlmul_type vlmul : ALL_LMUL) | |
442 | if (calculate_ratio (sew, vlmul) == ratio) | |
443 | return vlmul; | |
444 | return LMUL_RESERVED; | |
9243c3d1 JZZ |
445 | } |
446 | ||
29331e72 LD |
447 | /* Get the currently supported maximum sew used in the int rvv instructions. */ |
448 | static uint8_t | |
449 | get_max_int_sew () | |
9243c3d1 | 450 | { |
29331e72 LD |
451 | if (TARGET_VECTOR_ELEN_64) |
452 | return 64; | |
453 | else if (TARGET_VECTOR_ELEN_32) | |
454 | return 32; | |
455 | gcc_unreachable (); | |
9243c3d1 JZZ |
456 | } |
457 | ||
29331e72 LD |
458 | /* Get the currently supported maximum sew used in the float rvv instructions. |
459 | */ | |
460 | static uint8_t | |
461 | get_max_float_sew () | |
462 | { | |
463 | if (TARGET_VECTOR_ELEN_FP_64) | |
464 | return 64; | |
465 | else if (TARGET_VECTOR_ELEN_FP_32) | |
466 | return 32; | |
467 | else if (TARGET_VECTOR_ELEN_FP_16) | |
468 | return 16; | |
469 | gcc_unreachable (); | |
9243c3d1 JZZ |
470 | } |
471 | ||
29331e72 | 472 | enum def_type |
9243c3d1 | 473 | { |
29331e72 LD |
474 | REAL_SET = 1 << 0, |
475 | PHI_SET = 1 << 1, | |
476 | BB_HEAD_SET = 1 << 2, | |
477 | BB_END_SET = 1 << 3, | |
478 | /* ??? TODO: In RTL_SSA framework, we have REAL_SET, | |
479 | PHI_SET, BB_HEAD_SET, BB_END_SET and | |
480 | CLOBBER_DEF def_info types. Currently, | |
481 | we conservatively do not optimize clobber | |
482 | def since we don't see the case that we | |
483 | need to optimize it. */ | |
484 | CLOBBER_DEF = 1 << 4 | |
485 | }; | |
9243c3d1 | 486 | |
29331e72 LD |
487 | static bool |
488 | insn_should_be_added_p (const insn_info *insn, unsigned int types) | |
da93c41c | 489 | { |
29331e72 LD |
490 | if (insn->is_real () && (types & REAL_SET)) |
491 | return true; | |
492 | if (insn->is_phi () && (types & PHI_SET)) | |
493 | return true; | |
494 | if (insn->is_bb_head () && (types & BB_HEAD_SET)) | |
495 | return true; | |
496 | if (insn->is_bb_end () && (types & BB_END_SET)) | |
497 | return true; | |
498 | return false; | |
da93c41c JZ |
499 | } |
500 | ||
29331e72 LD |
501 | static const hash_set<use_info *> |
502 | get_all_real_uses (insn_info *insn, unsigned regno) | |
9243c3d1 | 503 | { |
29331e72 | 504 | gcc_assert (insn->is_real ()); |
9243c3d1 | 505 | |
29331e72 LD |
506 | hash_set<use_info *> uses; |
507 | auto_vec<phi_info *> work_list; | |
508 | hash_set<phi_info *> visited_list; | |
9243c3d1 | 509 | |
29331e72 | 510 | for (def_info *def : insn->defs ()) |
9243c3d1 | 511 | { |
29331e72 LD |
512 | if (!def->is_reg () || def->regno () != regno) |
513 | continue; | |
514 | set_info *set = safe_dyn_cast<set_info *> (def); | |
515 | if (!set) | |
516 | continue; | |
517 | for (use_info *use : set->nondebug_insn_uses ()) | |
518 | if (use->insn ()->is_real ()) | |
519 | uses.add (use); | |
520 | for (use_info *use : set->phi_uses ()) | |
521 | work_list.safe_push (use->phi ()); | |
9243c3d1 | 522 | } |
9243c3d1 | 523 | |
29331e72 | 524 | while (!work_list.is_empty ()) |
60bd33bc | 525 | { |
29331e72 LD |
526 | phi_info *phi = work_list.pop (); |
527 | visited_list.add (phi); | |
60bd33bc | 528 | |
29331e72 LD |
529 | for (use_info *use : phi->nondebug_insn_uses ()) |
530 | if (use->insn ()->is_real ()) | |
531 | uses.add (use); | |
532 | for (use_info *use : phi->phi_uses ()) | |
533 | if (!visited_list.contains (use->phi ())) | |
534 | work_list.safe_push (use->phi ()); | |
60bd33bc | 535 | } |
29331e72 | 536 | return uses; |
60bd33bc JZZ |
537 | } |
538 | ||
29331e72 LD |
539 | /* Recursively find all define instructions. The kind of instruction is |
540 | specified by the DEF_TYPE. */ | |
541 | static hash_set<set_info *> | |
542 | get_all_sets (phi_info *phi, unsigned int types) | |
9243c3d1 | 543 | { |
29331e72 LD |
544 | hash_set<set_info *> insns; |
545 | auto_vec<phi_info *> work_list; | |
546 | hash_set<phi_info *> visited_list; | |
547 | if (!phi) | |
548 | return hash_set<set_info *> (); | |
549 | work_list.safe_push (phi); | |
9243c3d1 | 550 | |
29331e72 | 551 | while (!work_list.is_empty ()) |
9243c3d1 | 552 | { |
29331e72 LD |
553 | phi_info *phi = work_list.pop (); |
554 | visited_list.add (phi); | |
555 | for (use_info *use : phi->inputs ()) | |
556 | { | |
557 | def_info *def = use->def (); | |
558 | set_info *set = safe_dyn_cast<set_info *> (def); | |
559 | if (!set) | |
560 | return hash_set<set_info *> (); | |
a1e42094 | 561 | |
29331e72 | 562 | gcc_assert (!set->insn ()->is_debug_insn ()); |
9243c3d1 | 563 | |
29331e72 LD |
564 | if (insn_should_be_added_p (set->insn (), types)) |
565 | insns.add (set); | |
566 | if (set->insn ()->is_phi ()) | |
567 | { | |
568 | phi_info *new_phi = as_a<phi_info *> (set); | |
569 | if (!visited_list.contains (new_phi)) | |
570 | work_list.safe_push (new_phi); | |
571 | } | |
572 | } | |
9243c3d1 | 573 | } |
29331e72 | 574 | return insns; |
9243c3d1 JZZ |
575 | } |
576 | ||
29331e72 LD |
577 | static hash_set<set_info *> |
578 | get_all_sets (set_info *set, bool /* get_real_inst */ real_p, | |
579 | bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p) | |
aef20243 | 580 | { |
29331e72 LD |
581 | if (real_p && phi_p && param_p) |
582 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
583 | REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET); | |
aef20243 | 584 | |
29331e72 LD |
585 | else if (real_p && param_p) |
586 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
587 | REAL_SET | BB_HEAD_SET | BB_END_SET); | |
588 | ||
589 | else if (real_p) | |
590 | return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET); | |
591 | return hash_set<set_info *> (); | |
69f39144 JZ |
592 | } |
593 | ||
4f673c5e | 594 | static bool |
6b6b9c68 | 595 | source_equal_p (insn_info *insn1, insn_info *insn2) |
4f673c5e | 596 | { |
6b6b9c68 JZZ |
597 | if (!insn1 || !insn2) |
598 | return false; | |
599 | rtx_insn *rinsn1 = insn1->rtl (); | |
600 | rtx_insn *rinsn2 = insn2->rtl (); | |
4f673c5e JZZ |
601 | if (!rinsn1 || !rinsn2) |
602 | return false; | |
29331e72 | 603 | |
4f673c5e JZZ |
604 | rtx note1 = find_reg_equal_equiv_note (rinsn1); |
605 | rtx note2 = find_reg_equal_equiv_note (rinsn2); | |
2020bce3 RD |
606 | /* We could handle the case of similar-looking REG_EQUALs as well but |
607 | would need to verify that no insn in between modifies any of the source | |
608 | operands. */ | |
609 | if (note1 && note2 && rtx_equal_p (note1, note2) | |
610 | && REG_NOTE_KIND (note1) == REG_EQUIV) | |
4f673c5e | 611 | return true; |
29331e72 | 612 | return false; |
4f673c5e JZZ |
613 | } |
614 | ||
6b6b9c68 | 615 | static insn_info * |
4f673c5e JZZ |
616 | extract_single_source (set_info *set) |
617 | { | |
618 | if (!set) | |
619 | return nullptr; | |
620 | if (set->insn ()->is_real ()) | |
6b6b9c68 | 621 | return set->insn (); |
4f673c5e JZZ |
622 | if (!set->insn ()->is_phi ()) |
623 | return nullptr; | |
6b6b9c68 | 624 | hash_set<set_info *> sets = get_all_sets (set, true, false, true); |
330bb064 JZ |
625 | if (sets.is_empty ()) |
626 | return nullptr; | |
4f673c5e | 627 | |
6b6b9c68 | 628 | insn_info *first_insn = (*sets.begin ())->insn (); |
4f673c5e JZZ |
629 | if (first_insn->is_artificial ()) |
630 | return nullptr; | |
6b6b9c68 | 631 | for (const set_info *set : sets) |
4f673c5e JZZ |
632 | { |
633 | /* If there is a head or end insn, we conservative return | |
634 | NULL so that VSETVL PASS will insert vsetvl directly. */ | |
6b6b9c68 | 635 | if (set->insn ()->is_artificial ()) |
4f673c5e | 636 | return nullptr; |
29331e72 | 637 | if (set != *sets.begin () && !source_equal_p (set->insn (), first_insn)) |
4f673c5e JZZ |
638 | return nullptr; |
639 | } | |
640 | ||
6b6b9c68 | 641 | return first_insn; |
4f673c5e JZZ |
642 | } |
643 | ||
29331e72 LD |
644 | static bool |
645 | same_equiv_note_p (set_info *set1, set_info *set2) | |
ec99ffab | 646 | { |
29331e72 LD |
647 | insn_info *insn1 = extract_single_source (set1); |
648 | insn_info *insn2 = extract_single_source (set2); | |
649 | if (!insn1 || !insn2) | |
650 | return false; | |
651 | return source_equal_p (insn1, insn2); | |
ec99ffab JZZ |
652 | } |
653 | ||
29331e72 | 654 | /* Return true if the SET result is not used by any instructions. */ |
ec99ffab | 655 | static bool |
29331e72 | 656 | has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno) |
ec99ffab | 657 | { |
29331e72 LD |
658 | if (bitmap_bit_p (df_get_live_out (cfg_bb), regno)) |
659 | return false; | |
ec99ffab | 660 | |
29331e72 LD |
661 | rtx_insn *iter; |
662 | for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb)); | |
663 | iter = NEXT_INSN (iter)) | |
664 | if (df_find_use (iter, regno_reg_rtx[regno])) | |
665 | return false; | |
ec99ffab | 666 | |
29331e72 | 667 | return true; |
ec99ffab JZZ |
668 | } |
669 | ||
4a0a8dc1 JZ |
670 | /* Return true for the special block that we can't apply LCM optimization. */ |
671 | static bool | |
672 | invalid_opt_bb_p (basic_block cfg_bb) | |
673 | { | |
674 | edge e; | |
675 | edge_iterator ei; | |
676 | ||
677 | /* We don't do LCM optimizations on complex edges. */ | |
678 | FOR_EACH_EDGE (e, ei, cfg_bb->preds) | |
679 | if (e->flags & EDGE_COMPLEX) | |
680 | return true; | |
681 | ||
682 | /* We only do LCM optimizations on blocks that are post dominated by | |
683 | EXIT block, that is, we don't do LCM optimizations on infinite loop. */ | |
684 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
685 | if (e->flags & EDGE_FAKE) | |
686 | return true; | |
687 | ||
688 | return false; | |
689 | } | |
690 | ||
c6c2a1d7 JZ |
691 | /* Get all predecessors of BB. */ |
692 | static hash_set<basic_block> | |
693 | get_all_predecessors (basic_block bb) | |
694 | { | |
695 | hash_set<basic_block> blocks; | |
696 | auto_vec<basic_block> work_list; | |
697 | hash_set<basic_block> visited_list; | |
698 | work_list.safe_push (bb); | |
699 | ||
700 | while (!work_list.is_empty ()) | |
701 | { | |
702 | basic_block new_bb = work_list.pop (); | |
703 | visited_list.add (new_bb); | |
704 | edge e; | |
705 | edge_iterator ei; | |
706 | FOR_EACH_EDGE (e, ei, new_bb->preds) | |
707 | { | |
708 | if (!visited_list.contains (e->src)) | |
709 | work_list.safe_push (e->src); | |
710 | blocks.add (e->src); | |
711 | } | |
712 | } | |
713 | return blocks; | |
714 | } | |
715 | ||
29331e72 LD |
716 | /* This flags indicates the minimum demand of the vl and vtype values by the |
717 | RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV | |
718 | instruction only needs the SEW/LMUL ratio to remain the same, and does not | |
719 | require SEW and LMUL to be fixed. | |
720 | Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter | |
721 | instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of | |
d83070ae | 722 | the former instruction, then we can make the minimum demand of the former |
29331e72 LD |
723 | instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are |
724 | the SEW and LMUL of the latter instruction, and the vsetvl instruction | |
725 | generated according to the new demand can also be used for the latter | |
726 | instruction, so there is no need to insert a separate vsetvl instruction for | |
727 | the latter instruction. */ | |
728 | enum demand_flags : unsigned | |
729 | { | |
730 | DEMAND_EMPTY_P = 0, | |
731 | DEMAND_SEW_P = 1 << 0, | |
732 | DEMAND_LMUL_P = 1 << 1, | |
733 | DEMAND_RATIO_P = 1 << 2, | |
734 | DEMAND_GE_SEW_P = 1 << 3, | |
735 | DEMAND_TAIL_POLICY_P = 1 << 4, | |
736 | DEMAND_MASK_POLICY_P = 1 << 5, | |
737 | DEMAND_AVL_P = 1 << 6, | |
738 | DEMAND_NON_ZERO_AVL_P = 1 << 7, | |
739 | }; | |
ec99ffab | 740 | |
29331e72 LD |
741 | /* We split the demand information into three parts. They are sew and lmul |
742 | related (sew_lmul_demand_type), tail and mask policy related | |
743 | (policy_demand_type) and avl related (avl_demand_type). Then we define three | |
d83070ae | 744 | interfaces available_p, compatible_p and merge. available_p is |
29331e72 LD |
745 | used to determine whether the two vsetvl infos prev_info and next_info are |
746 | available or not. If prev_info is available for next_info, it means that the | |
747 | RVV insn corresponding to next_info on the path from prev_info to next_info | |
748 | can be used without inserting a separate vsetvl instruction. compatible_p | |
749 | is used to determine whether prev_info is compatible with next_info, and if | |
750 | so, merge can be used to merge the stricter demand information from | |
751 | next_info into prev_info so that prev_info becomes available to next_info. | |
752 | */ | |
ec99ffab | 753 | |
29331e72 | 754 | enum class sew_lmul_demand_type : unsigned |
ec99ffab | 755 | { |
29331e72 LD |
756 | sew_lmul = demand_flags::DEMAND_SEW_P | demand_flags::DEMAND_LMUL_P, |
757 | ratio_only = demand_flags::DEMAND_RATIO_P, | |
758 | sew_only = demand_flags::DEMAND_SEW_P, | |
759 | ge_sew = demand_flags::DEMAND_GE_SEW_P, | |
760 | ratio_and_ge_sew | |
761 | = demand_flags::DEMAND_RATIO_P | demand_flags::DEMAND_GE_SEW_P, | |
762 | }; | |
ec99ffab | 763 | |
29331e72 | 764 | enum class policy_demand_type : unsigned |
29547511 | 765 | { |
29331e72 LD |
766 | tail_mask_policy |
767 | = demand_flags::DEMAND_TAIL_POLICY_P | demand_flags::DEMAND_MASK_POLICY_P, | |
768 | tail_policy_only = demand_flags::DEMAND_TAIL_POLICY_P, | |
769 | mask_policy_only = demand_flags::DEMAND_MASK_POLICY_P, | |
770 | ignore_policy = demand_flags::DEMAND_EMPTY_P, | |
771 | }; | |
29547511 | 772 | |
29331e72 | 773 | enum class avl_demand_type : unsigned |
ec99ffab | 774 | { |
29331e72 LD |
775 | avl = demand_flags::DEMAND_AVL_P, |
776 | non_zero_avl = demand_flags::DEMAND_NON_ZERO_AVL_P, | |
777 | ignore_avl = demand_flags::DEMAND_EMPTY_P, | |
778 | }; | |
ec99ffab | 779 | |
29331e72 | 780 | class vsetvl_info |
ec99ffab | 781 | { |
29331e72 LD |
782 | private: |
783 | insn_info *m_insn; | |
784 | bb_info *m_bb; | |
785 | rtx m_avl; | |
786 | rtx m_vl; | |
787 | set_info *m_avl_def; | |
788 | uint8_t m_sew; | |
789 | uint8_t m_max_sew; | |
790 | vlmul_type m_vlmul; | |
791 | uint8_t m_ratio; | |
792 | bool m_ta; | |
793 | bool m_ma; | |
794 | ||
795 | sew_lmul_demand_type m_sew_lmul_demand; | |
796 | policy_demand_type m_policy_demand; | |
797 | avl_demand_type m_avl_demand; | |
798 | ||
799 | enum class state_type | |
800 | { | |
801 | UNINITIALIZED, | |
802 | VALID, | |
803 | UNKNOWN, | |
804 | EMPTY, | |
805 | }; | |
806 | state_type m_state; | |
807 | ||
808 | bool m_delete; | |
809 | bool m_change_vtype_only; | |
810 | insn_info *m_read_vl_insn; | |
811 | bool m_vl_used_by_non_rvv_insn; | |
ec99ffab | 812 | |
29331e72 LD |
813 | public: |
814 | vsetvl_info () | |
815 | : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX), m_vl (NULL_RTX), | |
816 | m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED), | |
817 | m_ratio (0), m_ta (false), m_ma (false), | |
818 | m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul), | |
819 | m_policy_demand (policy_demand_type::tail_mask_policy), | |
820 | m_avl_demand (avl_demand_type::avl), m_state (state_type::UNINITIALIZED), | |
821 | m_delete (false), m_change_vtype_only (false), m_read_vl_insn (nullptr), | |
822 | m_vl_used_by_non_rvv_insn (false) | |
823 | {} | |
824 | ||
825 | vsetvl_info (insn_info *insn) : vsetvl_info () { parse_insn (insn); } | |
826 | ||
827 | vsetvl_info (rtx_insn *insn) : vsetvl_info () { parse_insn (insn); } | |
828 | ||
829 | void set_avl (rtx avl) { m_avl = avl; } | |
830 | void set_vl (rtx vl) { m_vl = vl; } | |
831 | void set_avl_def (set_info *avl_def) { m_avl_def = avl_def; } | |
832 | void set_sew (uint8_t sew) { m_sew = sew; } | |
833 | void set_vlmul (vlmul_type vlmul) { m_vlmul = vlmul; } | |
834 | void set_ratio (uint8_t ratio) { m_ratio = ratio; } | |
835 | void set_ta (bool ta) { m_ta = ta; } | |
836 | void set_ma (bool ma) { m_ma = ma; } | |
837 | void set_delete () { m_delete = true; } | |
838 | void set_bb (bb_info *bb) { m_bb = bb; } | |
839 | void set_max_sew (uint8_t max_sew) { m_max_sew = max_sew; } | |
840 | void set_change_vtype_only () { m_change_vtype_only = true; } | |
841 | void set_read_vl_insn (insn_info *insn) { m_read_vl_insn = insn; } | |
842 | ||
843 | rtx get_avl () const { return m_avl; } | |
844 | rtx get_vl () const { return m_vl; } | |
845 | set_info *get_avl_def () const { return m_avl_def; } | |
846 | uint8_t get_sew () const { return m_sew; } | |
847 | vlmul_type get_vlmul () const { return m_vlmul; } | |
848 | uint8_t get_ratio () const { return m_ratio; } | |
849 | bool get_ta () const { return m_ta; } | |
850 | bool get_ma () const { return m_ma; } | |
851 | insn_info *get_insn () const { return m_insn; } | |
852 | bool delete_p () const { return m_delete; } | |
853 | bb_info *get_bb () const { return m_bb; } | |
854 | uint8_t get_max_sew () const { return m_max_sew; } | |
855 | insn_info *get_read_vl_insn () const { return m_read_vl_insn; } | |
4cd4c34a | 856 | bool vl_used_by_non_rvv_insn_p () const { return m_vl_used_by_non_rvv_insn; } |
29331e72 LD |
857 | |
858 | bool has_imm_avl () const { return m_avl && CONST_INT_P (m_avl); } | |
859 | bool has_vlmax_avl () const { return vlmax_avl_p (m_avl); } | |
860 | bool has_nonvlmax_reg_avl () const | |
861 | { | |
862 | return m_avl && REG_P (m_avl) && !has_vlmax_avl (); | |
863 | } | |
864 | bool has_non_zero_avl () const | |
865 | { | |
866 | if (has_imm_avl ()) | |
867 | return INTVAL (m_avl) > 0; | |
868 | return has_vlmax_avl (); | |
869 | } | |
870 | bool has_vl () const | |
871 | { | |
872 | /* The VL operand can only be either a NULL_RTX or a register. */ | |
873 | gcc_assert (!m_vl || REG_P (m_vl)); | |
874 | return m_vl != NULL_RTX; | |
875 | } | |
876 | bool has_same_ratio (const vsetvl_info &other) const | |
877 | { | |
878 | return get_ratio () == other.get_ratio (); | |
879 | } | |
880 | ||
881 | /* The block of INSN isn't always same as the block of the VSETVL_INFO, | |
882 | meaning we may have 'get_insn ()->bb () != get_bb ()'. | |
883 | ||
884 | E.g. BB 2 (Empty) ---> BB 3 (VALID, has rvv insn 1) | |
885 | ||
886 | BB 2 has empty VSETVL_INFO, wheras BB 3 has VSETVL_INFO that satisfies | |
887 | get_insn ()->bb () == get_bb (). In earliest fusion, we may fuse bb 3 and | |
888 | bb 2 so that the 'get_bb ()' of BB2 VSETVL_INFO will be BB2 wheras the | |
889 | 'get_insn ()' of BB2 VSETVL INFO will be the rvv insn 1 (which is located | |
890 | at BB3). */ | |
891 | bool insn_inside_bb_p () const { return get_insn ()->bb () == get_bb (); } | |
892 | void update_avl (const vsetvl_info &other) | |
893 | { | |
894 | m_avl = other.get_avl (); | |
895 | m_vl = other.get_vl (); | |
896 | m_avl_def = other.get_avl_def (); | |
897 | } | |
898 | ||
899 | bool uninit_p () const { return m_state == state_type::UNINITIALIZED; } | |
900 | bool valid_p () const { return m_state == state_type::VALID; } | |
901 | bool unknown_p () const { return m_state == state_type::UNKNOWN; } | |
902 | bool empty_p () const { return m_state == state_type::EMPTY; } | |
903 | bool change_vtype_only_p () const { return m_change_vtype_only; } | |
904 | ||
905 | void set_valid () { m_state = state_type::VALID; } | |
906 | void set_unknown () { m_state = state_type::UNKNOWN; } | |
907 | void set_empty () { m_state = state_type::EMPTY; } | |
908 | ||
909 | void set_sew_lmul_demand (sew_lmul_demand_type demand) | |
910 | { | |
911 | m_sew_lmul_demand = demand; | |
912 | } | |
913 | void set_policy_demand (policy_demand_type demand) | |
914 | { | |
915 | m_policy_demand = demand; | |
916 | } | |
917 | void set_avl_demand (avl_demand_type demand) { m_avl_demand = demand; } | |
918 | ||
919 | sew_lmul_demand_type get_sew_lmul_demand () const | |
920 | { | |
921 | return m_sew_lmul_demand; | |
922 | } | |
923 | policy_demand_type get_policy_demand () const { return m_policy_demand; } | |
924 | avl_demand_type get_avl_demand () const { return m_avl_demand; } | |
925 | ||
926 | void normalize_demand (unsigned demand_flags) | |
927 | { | |
928 | switch (demand_flags | |
929 | & (DEMAND_SEW_P | DEMAND_LMUL_P | DEMAND_RATIO_P | DEMAND_GE_SEW_P)) | |
930 | { | |
931 | case (unsigned) sew_lmul_demand_type::sew_lmul: | |
932 | m_sew_lmul_demand = sew_lmul_demand_type::sew_lmul; | |
933 | break; | |
934 | case (unsigned) sew_lmul_demand_type::ratio_only: | |
935 | m_sew_lmul_demand = sew_lmul_demand_type::ratio_only; | |
936 | break; | |
937 | case (unsigned) sew_lmul_demand_type::sew_only: | |
938 | m_sew_lmul_demand = sew_lmul_demand_type::sew_only; | |
939 | break; | |
940 | case (unsigned) sew_lmul_demand_type::ge_sew: | |
941 | m_sew_lmul_demand = sew_lmul_demand_type::ge_sew; | |
942 | break; | |
943 | case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew: | |
944 | m_sew_lmul_demand = sew_lmul_demand_type::ratio_and_ge_sew; | |
945 | break; | |
946 | default: | |
947 | gcc_unreachable (); | |
948 | } | |
949 | ||
950 | switch (demand_flags & (DEMAND_TAIL_POLICY_P | DEMAND_MASK_POLICY_P)) | |
951 | { | |
952 | case (unsigned) policy_demand_type::tail_mask_policy: | |
953 | m_policy_demand = policy_demand_type::tail_mask_policy; | |
954 | break; | |
955 | case (unsigned) policy_demand_type::tail_policy_only: | |
956 | m_policy_demand = policy_demand_type::tail_policy_only; | |
957 | break; | |
958 | case (unsigned) policy_demand_type::mask_policy_only: | |
959 | m_policy_demand = policy_demand_type::mask_policy_only; | |
960 | break; | |
961 | case (unsigned) policy_demand_type::ignore_policy: | |
962 | m_policy_demand = policy_demand_type::ignore_policy; | |
963 | break; | |
964 | default: | |
965 | gcc_unreachable (); | |
966 | } | |
967 | ||
968 | switch (demand_flags & (DEMAND_AVL_P | DEMAND_NON_ZERO_AVL_P)) | |
969 | { | |
970 | case (unsigned) avl_demand_type::avl: | |
971 | m_avl_demand = avl_demand_type::avl; | |
972 | break; | |
973 | case (unsigned) avl_demand_type::non_zero_avl: | |
974 | m_avl_demand = avl_demand_type::non_zero_avl; | |
975 | break; | |
976 | case (unsigned) avl_demand_type::ignore_avl: | |
977 | m_avl_demand = avl_demand_type::ignore_avl; | |
978 | break; | |
979 | default: | |
980 | gcc_unreachable (); | |
981 | } | |
982 | } | |
983 | ||
984 | void parse_insn (rtx_insn *rinsn) | |
985 | { | |
986 | if (!NONDEBUG_INSN_P (rinsn)) | |
987 | return; | |
988 | if (optimize == 0 && !has_vtype_op (rinsn)) | |
989 | return; | |
990 | gcc_assert (!vsetvl_discard_result_insn_p (rinsn)); | |
991 | set_valid (); | |
992 | extract_insn_cached (rinsn); | |
993 | m_avl = ::get_avl (rinsn); | |
994 | if (has_vlmax_avl () || vsetvl_insn_p (rinsn)) | |
995 | m_vl = ::get_vl (rinsn); | |
996 | m_sew = ::get_sew (rinsn); | |
997 | m_vlmul = ::get_vlmul (rinsn); | |
998 | m_ta = tail_agnostic_p (rinsn); | |
999 | m_ma = mask_agnostic_p (rinsn); | |
1000 | } | |
1001 | ||
1002 | void parse_insn (insn_info *insn) | |
1003 | { | |
1004 | m_insn = insn; | |
1005 | m_bb = insn->bb (); | |
1006 | /* Return if it is debug insn for the consistency with optimize == 0. */ | |
1007 | if (insn->is_debug_insn ()) | |
1008 | return; | |
ec99ffab | 1009 | |
29331e72 LD |
1010 | /* We set it as unknown since we don't what will happen in CALL or ASM. */ |
1011 | if (insn->is_call () || insn->is_asm ()) | |
1012 | { | |
1013 | set_unknown (); | |
1014 | return; | |
1015 | } | |
1016 | ||
1017 | /* If this is something that updates VL/VTYPE that we don't know about, set | |
1018 | the state to unknown. */ | |
1019 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()) | |
1020 | && (find_access (insn->defs (), VL_REGNUM) | |
1021 | || find_access (insn->defs (), VTYPE_REGNUM))) | |
1022 | { | |
1023 | set_unknown (); | |
1024 | return; | |
1025 | } | |
1026 | ||
1027 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())) | |
1028 | /* uninitialized */ | |
1029 | return; | |
ec99ffab | 1030 | |
29331e72 LD |
1031 | set_valid (); |
1032 | ||
1033 | m_avl = ::get_avl (insn->rtl ()); | |
1034 | if (m_avl) | |
1035 | { | |
1036 | if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ()) | |
1037 | m_vl = ::get_vl (insn->rtl ()); | |
1038 | ||
1039 | if (has_nonvlmax_reg_avl ()) | |
1040 | m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def (); | |
1041 | } | |
1042 | ||
1043 | m_sew = ::get_sew (insn->rtl ()); | |
1044 | m_vlmul = ::get_vlmul (insn->rtl ()); | |
1045 | m_ratio = get_attr_ratio (insn->rtl ()); | |
1046 | /* when get_attr_ratio is invalid, this kind of instructions | |
1047 | doesn't care about ratio. However, we still need this value | |
1048 | in demand info backward analysis. */ | |
1049 | if (m_ratio == INVALID_ATTRIBUTE) | |
1050 | m_ratio = calculate_ratio (m_sew, m_vlmul); | |
1051 | m_ta = tail_agnostic_p (insn->rtl ()); | |
1052 | m_ma = mask_agnostic_p (insn->rtl ()); | |
1053 | ||
1054 | /* If merge operand is undef value, we prefer agnostic. */ | |
1055 | int merge_op_idx = get_attr_merge_op_idx (insn->rtl ()); | |
1056 | if (merge_op_idx != INVALID_ATTRIBUTE | |
1057 | && satisfies_constraint_vu (recog_data.operand[merge_op_idx])) | |
1058 | { | |
1059 | m_ta = true; | |
1060 | m_ma = true; | |
1061 | } | |
1062 | ||
1063 | /* Determine the demand info of the RVV insn. */ | |
1064 | m_max_sew = get_max_int_sew (); | |
193ef02a | 1065 | unsigned dflags = 0; |
29331e72 LD |
1066 | if (vector_config_insn_p (insn->rtl ())) |
1067 | { | |
193ef02a RS |
1068 | dflags |= demand_flags::DEMAND_AVL_P; |
1069 | dflags |= demand_flags::DEMAND_RATIO_P; | |
29331e72 LD |
1070 | } |
1071 | else | |
1072 | { | |
1073 | if (has_vl_op (insn->rtl ())) | |
1074 | { | |
1075 | if (scalar_move_insn_p (insn->rtl ())) | |
1076 | { | |
1077 | /* If the avl for vmv.s.x comes from the vsetvl instruction, we | |
1078 | don't know if the avl is non-zero, so it is set to | |
1079 | DEMAND_AVL_P for now. it may be corrected to | |
1080 | DEMAND_NON_ZERO_AVL_P later when more information is | |
1081 | available. | |
1082 | */ | |
1083 | if (has_non_zero_avl ()) | |
193ef02a | 1084 | dflags |= demand_flags::DEMAND_NON_ZERO_AVL_P; |
29331e72 | 1085 | else |
193ef02a | 1086 | dflags |= demand_flags::DEMAND_AVL_P; |
29331e72 LD |
1087 | } |
1088 | else | |
193ef02a | 1089 | dflags |= demand_flags::DEMAND_AVL_P; |
29331e72 | 1090 | } |
ec99ffab | 1091 | |
29331e72 | 1092 | if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE) |
193ef02a | 1093 | dflags |= demand_flags::DEMAND_RATIO_P; |
29331e72 LD |
1094 | else |
1095 | { | |
1096 | if (scalar_move_insn_p (insn->rtl ()) && m_ta) | |
1097 | { | |
193ef02a | 1098 | dflags |= demand_flags::DEMAND_GE_SEW_P; |
29331e72 LD |
1099 | m_max_sew = get_attr_type (insn->rtl ()) == TYPE_VFMOVFV |
1100 | ? get_max_float_sew () | |
1101 | : get_max_int_sew (); | |
1102 | } | |
1103 | else | |
193ef02a | 1104 | dflags |= demand_flags::DEMAND_SEW_P; |
29331e72 LD |
1105 | |
1106 | if (!ignore_vlmul_insn_p (insn->rtl ())) | |
193ef02a | 1107 | dflags |= demand_flags::DEMAND_LMUL_P; |
29331e72 | 1108 | } |
ec99ffab | 1109 | |
29331e72 | 1110 | if (!m_ta) |
193ef02a | 1111 | dflags |= demand_flags::DEMAND_TAIL_POLICY_P; |
29331e72 | 1112 | if (!m_ma) |
193ef02a | 1113 | dflags |= demand_flags::DEMAND_MASK_POLICY_P; |
29331e72 LD |
1114 | } |
1115 | ||
193ef02a | 1116 | normalize_demand (dflags); |
29331e72 LD |
1117 | |
1118 | /* Optimize AVL from the vsetvl instruction. */ | |
1119 | insn_info *def_insn = extract_single_source (get_avl_def ()); | |
1120 | if (def_insn && vsetvl_insn_p (def_insn->rtl ())) | |
1121 | { | |
1122 | vsetvl_info def_info = vsetvl_info (def_insn); | |
1123 | if ((scalar_move_insn_p (insn->rtl ()) | |
1124 | || def_info.get_ratio () == get_ratio ()) | |
1125 | && (def_info.has_vlmax_avl () || def_info.has_imm_avl ())) | |
1126 | { | |
1127 | update_avl (def_info); | |
1128 | if (scalar_move_insn_p (insn->rtl ()) && has_non_zero_avl ()) | |
1129 | m_avl_demand = avl_demand_type::non_zero_avl; | |
1130 | } | |
1131 | } | |
1132 | ||
1133 | /* Determine if dest operand(vl) has been used by non-RVV instructions. */ | |
1134 | if (has_vl ()) | |
1135 | { | |
1136 | const hash_set<use_info *> vl_uses | |
1137 | = get_all_real_uses (get_insn (), REGNO (get_vl ())); | |
1138 | for (use_info *use : vl_uses) | |
1139 | { | |
1140 | gcc_assert (use->insn ()->is_real ()); | |
1141 | rtx_insn *rinsn = use->insn ()->rtl (); | |
1142 | if (!has_vl_op (rinsn) | |
1143 | || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1) | |
1144 | { | |
1145 | m_vl_used_by_non_rvv_insn = true; | |
1146 | break; | |
1147 | } | |
1148 | rtx avl = ::get_avl (rinsn); | |
c2f23514 | 1149 | if (!avl || !REG_P (avl) || REGNO (get_vl ()) != REGNO (avl)) |
29331e72 LD |
1150 | { |
1151 | m_vl_used_by_non_rvv_insn = true; | |
1152 | break; | |
1153 | } | |
1154 | } | |
1155 | } | |
ec99ffab | 1156 | |
29331e72 LD |
1157 | /* Collect the read vl insn for the fault-only-first rvv loads. */ |
1158 | if (fault_first_load_p (insn->rtl ())) | |
1159 | { | |
1160 | for (insn_info *i = insn->next_nondebug_insn (); | |
1161 | i->bb () == insn->bb (); i = i->next_nondebug_insn ()) | |
1162 | { | |
1163 | if (find_access (i->defs (), VL_REGNUM)) | |
1164 | break; | |
1165 | if (i->rtl () && read_vl_insn_p (i->rtl ())) | |
1166 | { | |
1167 | m_read_vl_insn = i; | |
1168 | break; | |
1169 | } | |
1170 | } | |
1171 | } | |
1172 | } | |
1173 | ||
1174 | /* Returns the corresponding vsetvl rtx pat. */ | |
1175 | rtx get_vsetvl_pat (bool ignore_vl = false) const | |
1176 | { | |
1177 | rtx avl = get_avl (); | |
1178 | /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s, | |
1179 | set the value of avl to (const_int 0) so that VSETVL PASS will | |
1180 | insert vsetvl correctly.*/ | |
1181 | if (!get_avl ()) | |
1182 | avl = GEN_INT (0); | |
1183 | rtx sew = gen_int_mode (get_sew (), Pmode); | |
1184 | rtx vlmul = gen_int_mode (get_vlmul (), Pmode); | |
1185 | rtx ta = gen_int_mode (get_ta (), Pmode); | |
1186 | rtx ma = gen_int_mode (get_ma (), Pmode); | |
1187 | ||
1188 | if (change_vtype_only_p ()) | |
1189 | return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma); | |
1190 | else if (has_vl () && !ignore_vl) | |
1191 | return gen_vsetvl (Pmode, get_vl (), avl, sew, vlmul, ta, ma); | |
1192 | else | |
1193 | return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma); | |
1194 | } | |
1195 | ||
d82bb518 JZ |
1196 | /* Return true that the non-AVL operands of THIS will be modified |
1197 | if we fuse the VL modification from OTHER into THIS. */ | |
1198 | bool vl_modify_non_avl_op_p (const vsetvl_info &other) const | |
1199 | { | |
1200 | /* We don't need to worry about any operands from THIS be | |
1201 | modified by OTHER vsetvl since we OTHER vsetvl doesn't | |
1202 | modify any operand. */ | |
1203 | if (!other.has_vl ()) | |
1204 | return false; | |
1205 | ||
1206 | /* THIS VL operand always preempt OTHER VL operand. */ | |
1207 | if (this->has_vl ()) | |
1208 | return false; | |
1209 | ||
1210 | /* If THIS has non IMM AVL and THIS is AVL compatible with | |
1211 | OTHER, the AVL value of THIS is same as VL value of OTHER. */ | |
1212 | if (!this->has_imm_avl ()) | |
1213 | return false; | |
1214 | return find_access (this->get_insn ()->uses (), REGNO (other.get_vl ())); | |
1215 | } | |
1216 | ||
29331e72 LD |
1217 | bool operator== (const vsetvl_info &other) const |
1218 | { | |
1219 | gcc_assert (!uninit_p () && !other.uninit_p () | |
1220 | && "Uninitialization should not happen"); | |
1221 | ||
1222 | if (empty_p ()) | |
1223 | return other.empty_p (); | |
1224 | if (unknown_p ()) | |
1225 | return other.unknown_p (); | |
1226 | ||
1227 | return get_insn () == other.get_insn () && get_bb () == other.get_bb () | |
1228 | && get_avl () == other.get_avl () && get_vl () == other.get_vl () | |
1229 | && get_avl_def () == other.get_avl_def () | |
1230 | && get_sew () == other.get_sew () | |
1231 | && get_vlmul () == other.get_vlmul () && get_ta () == other.get_ta () | |
1232 | && get_ma () == other.get_ma () | |
1233 | && get_avl_demand () == other.get_avl_demand () | |
1234 | && get_sew_lmul_demand () == other.get_sew_lmul_demand () | |
1235 | && get_policy_demand () == other.get_policy_demand (); | |
1236 | } | |
1237 | ||
1238 | void dump (FILE *file, const char *indent = "") const | |
1239 | { | |
1240 | if (uninit_p ()) | |
1241 | { | |
1242 | fprintf (file, "UNINITIALIZED.\n"); | |
1243 | return; | |
1244 | } | |
1245 | else if (unknown_p ()) | |
1246 | { | |
1247 | fprintf (file, "UNKNOWN.\n"); | |
1248 | return; | |
1249 | } | |
1250 | else if (empty_p ()) | |
1251 | { | |
1252 | fprintf (file, "EMPTY.\n"); | |
1253 | return; | |
1254 | } | |
1255 | else if (valid_p ()) | |
1256 | fprintf (file, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (), | |
1257 | get_bb ()->index (), delete_p () ? " (deleted)" : ""); | |
1258 | else | |
1259 | gcc_unreachable (); | |
ec99ffab | 1260 | |
29331e72 LD |
1261 | fprintf (file, "%sDemand fields:", indent); |
1262 | if (m_sew_lmul_demand == sew_lmul_demand_type::sew_lmul) | |
1263 | fprintf (file, " demand_sew_lmul"); | |
1264 | else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_only) | |
1265 | fprintf (file, " demand_ratio_only"); | |
1266 | else if (m_sew_lmul_demand == sew_lmul_demand_type::sew_only) | |
1267 | fprintf (file, " demand_sew_only"); | |
1268 | else if (m_sew_lmul_demand == sew_lmul_demand_type::ge_sew) | |
1269 | fprintf (file, " demand_ge_sew"); | |
1270 | else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_and_ge_sew) | |
1271 | fprintf (file, " demand_ratio_and_ge_sew"); | |
1272 | ||
1273 | if (m_policy_demand == policy_demand_type::tail_mask_policy) | |
1274 | fprintf (file, " demand_tail_mask_policy"); | |
1275 | else if (m_policy_demand == policy_demand_type::tail_policy_only) | |
1276 | fprintf (file, " demand_tail_policy_only"); | |
1277 | else if (m_policy_demand == policy_demand_type::mask_policy_only) | |
1278 | fprintf (file, " demand_mask_policy_only"); | |
1279 | ||
1280 | if (m_avl_demand == avl_demand_type::avl) | |
1281 | fprintf (file, " demand_avl"); | |
1282 | else if (m_avl_demand == avl_demand_type::non_zero_avl) | |
1283 | fprintf (file, " demand_non_zero_avl"); | |
1284 | fprintf (file, "\n"); | |
1285 | ||
1286 | fprintf (file, "%sSEW=%d, ", indent, get_sew ()); | |
1287 | fprintf (file, "VLMUL=%s, ", vlmul_to_str (get_vlmul ())); | |
1288 | fprintf (file, "RATIO=%d, ", get_ratio ()); | |
1289 | fprintf (file, "MAX_SEW=%d\n", get_max_sew ()); | |
1290 | ||
1291 | fprintf (file, "%sTAIL_POLICY=%s, ", indent, policy_to_str (get_ta ())); | |
1292 | fprintf (file, "MASK_POLICY=%s\n", policy_to_str (get_ma ())); | |
1293 | ||
1294 | fprintf (file, "%sAVL=", indent); | |
1295 | print_rtl_single (file, get_avl ()); | |
1296 | fprintf (file, "%sVL=", indent); | |
1297 | print_rtl_single (file, get_vl ()); | |
1298 | if (change_vtype_only_p ()) | |
1299 | fprintf (file, "%schange vtype only\n", indent); | |
1300 | if (get_read_vl_insn ()) | |
1301 | fprintf (file, "%sread_vl_insn: insn %u\n", indent, | |
1302 | get_read_vl_insn ()->uid ()); | |
4cd4c34a | 1303 | if (vl_used_by_non_rvv_insn_p ()) |
29331e72 LD |
1304 | fprintf (file, "%suse_by_non_rvv_insn=true\n", indent); |
1305 | } | |
1306 | }; | |
8fbc0871 | 1307 | |
29331e72 | 1308 | class vsetvl_block_info |
ec99ffab | 1309 | { |
29331e72 LD |
1310 | public: |
1311 | /* The static execute probability of the demand info. */ | |
1312 | profile_probability probability; | |
1313 | ||
4fd09aed JZ |
1314 | auto_vec<vsetvl_info> local_infos; |
1315 | vsetvl_info global_info; | |
1316 | bb_info *bb; | |
29331e72 | 1317 | |
5ee45f5e | 1318 | vsetvl_block_info () : bb (nullptr) |
29331e72 | 1319 | { |
4fd09aed JZ |
1320 | local_infos.safe_grow_cleared (0); |
1321 | global_info.set_empty (); | |
29331e72 LD |
1322 | } |
1323 | vsetvl_block_info (const vsetvl_block_info &other) | |
4fd09aed JZ |
1324 | : probability (other.probability), local_infos (other.local_infos.copy ()), |
1325 | global_info (other.global_info), bb (other.bb) | |
29331e72 LD |
1326 | {} |
1327 | ||
1328 | vsetvl_info &get_entry_info () | |
1329 | { | |
1330 | gcc_assert (!empty_p ()); | |
4fd09aed | 1331 | return local_infos.is_empty () ? global_info : local_infos[0]; |
29331e72 LD |
1332 | } |
1333 | vsetvl_info &get_exit_info () | |
1334 | { | |
1335 | gcc_assert (!empty_p ()); | |
4fd09aed JZ |
1336 | return local_infos.is_empty () ? global_info |
1337 | : local_infos[local_infos.length () - 1]; | |
29331e72 LD |
1338 | } |
1339 | const vsetvl_info &get_entry_info () const | |
1340 | { | |
1341 | gcc_assert (!empty_p ()); | |
4fd09aed | 1342 | return local_infos.is_empty () ? global_info : local_infos[0]; |
29331e72 LD |
1343 | } |
1344 | const vsetvl_info &get_exit_info () const | |
1345 | { | |
1346 | gcc_assert (!empty_p ()); | |
4fd09aed JZ |
1347 | return local_infos.is_empty () ? global_info |
1348 | : local_infos[local_infos.length () - 1]; | |
29331e72 LD |
1349 | } |
1350 | ||
4fd09aed JZ |
1351 | bool empty_p () const { return local_infos.is_empty () && !has_info (); } |
1352 | bool has_info () const { return !global_info.empty_p (); } | |
29331e72 LD |
1353 | void set_info (const vsetvl_info &info) |
1354 | { | |
4fd09aed JZ |
1355 | gcc_assert (local_infos.is_empty ()); |
1356 | global_info = info; | |
1357 | global_info.set_bb (bb); | |
29331e72 | 1358 | } |
4fd09aed | 1359 | void set_empty_info () { global_info.set_empty (); } |
ec99ffab JZZ |
1360 | }; |
1361 | ||
29331e72 LD |
1362 | /* Demand system is the RVV-based VSETVL info analysis tools wrapper. |
1363 | It defines compatible rules for SEW/LMUL, POLICY and AVL. | |
d83070ae | 1364 | Also, it provides 3 interfaces available_p, compatible_p and |
29331e72 LD |
1365 | merge for the VSETVL PASS analysis and optimization. |
1366 | ||
d83070ae KC |
1367 | - available_p: Determine whether the next info can get the |
1368 | available VSETVL status from previous info. | |
29331e72 LD |
1369 | e.g. bb 2 (demand SEW = 32, LMUL = M2) -> bb 3 (demand RATIO = 16). |
1370 | Since bb 2 demand info (SEW/LMUL = 32/2 = 16) satisfies the bb 3 | |
1371 | demand, the VSETVL instruction in bb 3 can be elided. | |
d83070ae | 1372 | available_p (previous, next) is true in such situation. |
29331e72 | 1373 | - compatible_p: Determine whether prev_info is compatible with next_info |
d83070ae | 1374 | so that we can have a new merged info that is available to both of them. |
29331e72 LD |
1375 | - merge: Merge the stricter demand information from |
1376 | next_info into prev_info so that prev_info becomes available to | |
1377 | next_info. */ | |
1378 | class demand_system | |
ec99ffab | 1379 | { |
29331e72 | 1380 | private: |
29331e72 | 1381 | /* predictors. */ |
ec99ffab | 1382 | |
29331e72 LD |
1383 | inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED, |
1384 | const vsetvl_info &next ATTRIBUTE_UNUSED) | |
1385 | { | |
1386 | return true; | |
1387 | } | |
1388 | inline bool always_false (const vsetvl_info &prev ATTRIBUTE_UNUSED, | |
1389 | const vsetvl_info &next ATTRIBUTE_UNUSED) | |
1390 | { | |
ec99ffab | 1391 | return false; |
29331e72 LD |
1392 | } |
1393 | ||
1394 | /* predictors for sew and lmul */ | |
1395 | ||
1396 | inline bool lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1397 | { | |
1398 | return prev.get_vlmul () == next.get_vlmul (); | |
1399 | } | |
1400 | inline bool sew_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1401 | { | |
1402 | return prev.get_sew () == next.get_sew (); | |
1403 | } | |
1404 | inline bool sew_lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1405 | { | |
1406 | return lmul_eq_p (prev, next) && sew_eq_p (prev, next); | |
1407 | } | |
1408 | inline bool sew_ge_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1409 | { | |
1410 | return prev.get_sew () == next.get_sew () | |
1411 | || (next.get_ta () && prev.get_sew () > next.get_sew ()); | |
1412 | } | |
1413 | inline bool sew_le_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1414 | { | |
1415 | return prev.get_sew () == next.get_sew () | |
1416 | || (prev.get_ta () && prev.get_sew () < next.get_sew ()); | |
1417 | } | |
1418 | inline bool prev_sew_le_next_max_sew_p (const vsetvl_info &prev, | |
1419 | const vsetvl_info &next) | |
1420 | { | |
1421 | return prev.get_sew () <= next.get_max_sew (); | |
1422 | } | |
1423 | inline bool next_sew_le_prev_max_sew_p (const vsetvl_info &prev, | |
1424 | const vsetvl_info &next) | |
1425 | { | |
1426 | return next.get_sew () <= prev.get_max_sew (); | |
1427 | } | |
1428 | inline bool max_sew_overlap_p (const vsetvl_info &prev, | |
1429 | const vsetvl_info &next) | |
1430 | { | |
1431 | return !(prev.get_sew () > next.get_max_sew () | |
1432 | || next.get_sew () > prev.get_max_sew ()); | |
1433 | } | |
1434 | inline bool ratio_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1435 | { | |
1436 | return prev.has_same_ratio (next); | |
1437 | } | |
1438 | inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev, | |
1439 | const vsetvl_info &next) | |
1440 | { | |
1441 | return prev.get_ratio () >= (next.get_sew () / 8); | |
1442 | } | |
1443 | inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev, | |
1444 | const vsetvl_info &next) | |
1445 | { | |
1446 | return next.get_ratio () >= (prev.get_sew () / 8); | |
1447 | } | |
1448 | ||
1449 | inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev, | |
1450 | const vsetvl_info &next) | |
1451 | { | |
1452 | return sew_ge_p (prev, next) && ratio_eq_p (prev, next); | |
1453 | } | |
1454 | inline bool sew_ge_and_prev_sew_le_next_max_sew_p (const vsetvl_info &prev, | |
1455 | const vsetvl_info &next) | |
1456 | { | |
1457 | return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next); | |
1458 | } | |
1459 | inline bool | |
1460 | sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p ( | |
1461 | const vsetvl_info &prev, const vsetvl_info &next) | |
1462 | { | |
1463 | return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next) | |
1464 | && next_ratio_valid_for_prev_sew_p (prev, next); | |
1465 | } | |
1466 | inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev, | |
1467 | const vsetvl_info &next) | |
1468 | { | |
1469 | return sew_le_p (prev, next) && next_sew_le_prev_max_sew_p (prev, next); | |
1470 | } | |
1471 | inline bool | |
1472 | max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev, | |
1473 | const vsetvl_info &next) | |
1474 | { | |
1475 | return next_ratio_valid_for_prev_sew_p (prev, next) | |
1476 | && max_sew_overlap_p (prev, next); | |
1477 | } | |
1478 | inline bool | |
1479 | sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info &prev, | |
1480 | const vsetvl_info &next) | |
1481 | { | |
1482 | return sew_le_p (prev, next) && ratio_eq_p (prev, next) | |
1483 | && next_sew_le_prev_max_sew_p (prev, next); | |
1484 | } | |
1485 | inline bool | |
1486 | max_sew_overlap_and_prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev, | |
1487 | const vsetvl_info &next) | |
1488 | { | |
1489 | return prev_ratio_valid_for_next_sew_p (prev, next) | |
1490 | && max_sew_overlap_p (prev, next); | |
1491 | } | |
1492 | inline bool | |
1493 | sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p ( | |
1494 | const vsetvl_info &prev, const vsetvl_info &next) | |
1495 | { | |
1496 | return sew_le_p (prev, next) && prev_ratio_valid_for_next_sew_p (prev, next) | |
1497 | && next_sew_le_prev_max_sew_p (prev, next); | |
1498 | } | |
1499 | inline bool max_sew_overlap_and_ratio_eq_p (const vsetvl_info &prev, | |
1500 | const vsetvl_info &next) | |
1501 | { | |
1502 | return ratio_eq_p (prev, next) && max_sew_overlap_p (prev, next); | |
1503 | } | |
1504 | ||
1505 | /* predictors for tail and mask policy */ | |
1506 | ||
1507 | inline bool tail_policy_eq_p (const vsetvl_info &prev, | |
1508 | const vsetvl_info &next) | |
1509 | { | |
1510 | return prev.get_ta () == next.get_ta (); | |
1511 | } | |
1512 | inline bool mask_policy_eq_p (const vsetvl_info &prev, | |
1513 | const vsetvl_info &next) | |
1514 | { | |
1515 | return prev.get_ma () == next.get_ma (); | |
1516 | } | |
1517 | inline bool tail_mask_policy_eq_p (const vsetvl_info &prev, | |
1518 | const vsetvl_info &next) | |
1519 | { | |
1520 | return tail_policy_eq_p (prev, next) && mask_policy_eq_p (prev, next); | |
1521 | } | |
1522 | ||
1523 | /* predictors for avl */ | |
1524 | ||
1525 | inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info) | |
1526 | { | |
9c16ca93 JZ |
1527 | if (info.has_vl ()) |
1528 | { | |
1529 | if (find_access (i->defs (), REGNO (info.get_vl ()))) | |
1530 | return true; | |
1531 | if (find_access (i->uses (), REGNO (info.get_vl ()))) | |
1532 | { | |
1533 | resource_info resource = full_register (REGNO (info.get_vl ())); | |
1534 | def_lookup dl1 = crtl->ssa->find_def (resource, i); | |
1535 | def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ()); | |
1536 | if (dl1.matching_set () || dl2.matching_set ()) | |
1537 | return true; | |
1538 | /* If their VLs are coming from same def, we still want to fuse | |
1539 | their VSETVL demand info to gain better performance. */ | |
1540 | return dl1.prev_def (i) != dl2.prev_def (i); | |
1541 | } | |
1542 | } | |
1543 | return false; | |
29331e72 LD |
1544 | } |
1545 | inline bool modify_avl_p (insn_info *i, const vsetvl_info &info) | |
1546 | { | |
1547 | return info.has_nonvlmax_reg_avl () | |
1548 | && find_access (i->defs (), REGNO (info.get_avl ())); | |
1549 | } | |
1550 | ||
1551 | inline bool modify_reg_between_p (insn_info *prev_insn, insn_info *curr_insn, | |
1552 | unsigned regno) | |
1553 | { | |
1554 | gcc_assert (prev_insn->compare_with (curr_insn) < 0); | |
1555 | for (insn_info *i = curr_insn->prev_nondebug_insn (); i != prev_insn; | |
1556 | i = i->prev_nondebug_insn ()) | |
1557 | { | |
1558 | // no def of regno | |
1559 | if (find_access (i->defs (), regno)) | |
1560 | return true; | |
1561 | } | |
1562 | return false; | |
1563 | } | |
ec99ffab | 1564 | |
29331e72 LD |
1565 | inline bool reg_avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next) |
1566 | { | |
1567 | if (!prev.has_nonvlmax_reg_avl () || !next.has_nonvlmax_reg_avl ()) | |
1568 | return false; | |
ec99ffab | 1569 | |
29331e72 LD |
1570 | if (same_equiv_note_p (prev.get_avl_def (), next.get_avl_def ())) |
1571 | return true; | |
ec99ffab | 1572 | |
29331e72 LD |
1573 | if (REGNO (prev.get_avl ()) != REGNO (next.get_avl ())) |
1574 | return false; | |
ec99ffab | 1575 | |
29331e72 LD |
1576 | insn_info *prev_insn = prev.get_insn (); |
1577 | if (prev.get_bb () != prev_insn->bb ()) | |
1578 | prev_insn = prev.get_bb ()->end_insn (); | |
ec99ffab | 1579 | |
29331e72 LD |
1580 | insn_info *next_insn = next.get_insn (); |
1581 | if (next.get_bb () != next_insn->bb ()) | |
1582 | next_insn = next.get_bb ()->end_insn (); | |
ec99ffab | 1583 | |
29331e72 LD |
1584 | return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false); |
1585 | } | |
ec99ffab | 1586 | |
29331e72 LD |
1587 | inline bool avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next) |
1588 | { | |
1589 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
ec99ffab | 1590 | |
4cd4c34a | 1591 | if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ()) |
29331e72 | 1592 | return false; |
e030af3e | 1593 | |
29331e72 LD |
1594 | if (vector_config_insn_p (prev.get_insn ()->rtl ()) && next.get_avl_def () |
1595 | && next.get_avl_def ()->insn () == prev.get_insn ()) | |
1596 | return true; | |
e030af3e | 1597 | |
29331e72 LD |
1598 | if (prev.get_read_vl_insn ()) |
1599 | { | |
1600 | if (!next.has_nonvlmax_reg_avl () || !next.get_avl_def ()) | |
1601 | return false; | |
1602 | insn_info *avl_def_insn = extract_single_source (next.get_avl_def ()); | |
1603 | return avl_def_insn == prev.get_read_vl_insn (); | |
1604 | } | |
1605 | ||
1606 | if (prev == next && prev.has_nonvlmax_reg_avl ()) | |
1607 | { | |
1608 | insn_info *insn = prev.get_insn (); | |
1609 | bb_info *bb = insn->bb (); | |
1610 | for (insn_info *i = insn; real_insn_and_same_bb_p (i, bb); | |
1611 | i = i->next_nondebug_insn ()) | |
1612 | if (find_access (i->defs (), REGNO (prev.get_avl ()))) | |
e030af3e | 1613 | return false; |
29331e72 | 1614 | } |
60bd33bc | 1615 | |
29331e72 LD |
1616 | if (prev.has_vlmax_avl () && next.has_vlmax_avl ()) |
1617 | return true; | |
1618 | else if (prev.has_imm_avl () && next.has_imm_avl ()) | |
1619 | return INTVAL (prev.get_avl ()) == INTVAL (next.get_avl ()); | |
1620 | else if (prev.has_vl () && next.has_nonvlmax_reg_avl () | |
1621 | && REGNO (prev.get_vl ()) == REGNO (next.get_avl ())) | |
1622 | { | |
1623 | insn_info *prev_insn = prev.insn_inside_bb_p () | |
1624 | ? prev.get_insn () | |
1625 | : prev.get_bb ()->end_insn (); | |
1626 | ||
1627 | insn_info *next_insn = next.insn_inside_bb_p () | |
1628 | ? next.get_insn () | |
1629 | : next.get_bb ()->end_insn (); | |
1630 | return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false); | |
1631 | } | |
1632 | else if (prev.has_nonvlmax_reg_avl () && next.has_nonvlmax_reg_avl ()) | |
1633 | return reg_avl_equal_p (prev, next); | |
e030af3e | 1634 | |
e030af3e | 1635 | return false; |
29331e72 LD |
1636 | } |
1637 | inline bool avl_equal_or_prev_avl_non_zero_p (const vsetvl_info &prev, | |
1638 | const vsetvl_info &next) | |
1639 | { | |
1640 | return avl_equal_p (prev, next) || prev.has_non_zero_avl (); | |
1641 | } | |
1642 | ||
1643 | inline bool can_use_next_avl_p (const vsetvl_info &prev, | |
1644 | const vsetvl_info &next) | |
1645 | { | |
0c4bd132 JZ |
1646 | /* Forbid the AVL/VL propagation if VL of NEXT is used |
1647 | by non-RVV instructions. This is because: | |
1648 | ||
1649 | bb 2: | |
1650 | PREV: scalar move (no AVL) | |
1651 | bb 3: | |
1652 | NEXT: vsetvl a5(VL), a4(AVL) ... | |
1653 | branch a5,zero | |
1654 | ||
1655 | Since user vsetvl instruction is no side effect instruction | |
1656 | which should be placed in the correct and optimal location | |
1657 | of the program by the previous PASS, it is unreasonable that | |
1658 | VSETVL PASS tries to move it to another places if it used by | |
1659 | non-RVV instructions. | |
1660 | ||
1661 | Note: We only forbid the cases that VL is used by the following | |
1662 | non-RVV instructions which will cause issues. We don't forbid | |
1663 | other cases since it won't cause correctness issues and we still | |
1664 | more demand info are fused backward. The later LCM algorithm | |
1665 | should know the optimal location of the vsetvl. */ | |
1666 | if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ()) | |
1667 | return false; | |
1668 | ||
29331e72 LD |
1669 | if (!next.has_nonvlmax_reg_avl () && !next.has_vl ()) |
1670 | return true; | |
e030af3e | 1671 | |
29331e72 LD |
1672 | insn_info *prev_insn = prev.get_insn (); |
1673 | if (prev.get_bb () != prev_insn->bb ()) | |
1674 | prev_insn = prev.get_bb ()->end_insn (); | |
1675 | ||
1676 | insn_info *next_insn = next.get_insn (); | |
1677 | if (next.get_bb () != next_insn->bb ()) | |
1678 | next_insn = next.get_bb ()->end_insn (); | |
1679 | ||
1680 | return avl_vl_unmodified_between_p (prev_insn, next_insn, next); | |
1681 | } | |
1682 | ||
1683 | inline bool avl_equal_or_next_avl_non_zero_and_can_use_next_avl_p ( | |
1684 | const vsetvl_info &prev, const vsetvl_info &next) | |
1685 | { | |
1686 | return avl_equal_p (prev, next) | |
1687 | || (next.has_non_zero_avl () && can_use_next_avl_p (prev, next)); | |
1688 | } | |
1689 | ||
1690 | /* modifiers */ | |
1691 | ||
1692 | inline void nop (const vsetvl_info &prev ATTRIBUTE_UNUSED, | |
1693 | const vsetvl_info &next ATTRIBUTE_UNUSED) | |
1694 | {} | |
1695 | ||
1696 | /* modifiers for sew and lmul */ | |
1697 | ||
1698 | inline void use_min_of_max_sew (vsetvl_info &prev, const vsetvl_info &next) | |
1699 | { | |
1700 | prev.set_max_sew (MIN (prev.get_max_sew (), next.get_max_sew ())); | |
1701 | } | |
1702 | inline void use_next_sew (vsetvl_info &prev, const vsetvl_info &next) | |
1703 | { | |
1704 | prev.set_sew (next.get_sew ()); | |
1705 | use_min_of_max_sew (prev, next); | |
1706 | } | |
1707 | inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next) | |
1708 | { | |
e74c37aa | 1709 | int max_sew = MAX (prev.get_sew (), next.get_sew ()); |
29331e72 LD |
1710 | prev.set_sew (max_sew); |
1711 | use_min_of_max_sew (prev, next); | |
1712 | } | |
1713 | inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next) | |
1714 | { | |
1715 | use_next_sew (prev, next); | |
1716 | prev.set_vlmul (next.get_vlmul ()); | |
1717 | prev.set_ratio (next.get_ratio ()); | |
1718 | } | |
1719 | inline void use_next_sew_with_prev_ratio (vsetvl_info &prev, | |
1720 | const vsetvl_info &next) | |
1721 | { | |
1722 | use_next_sew (prev, next); | |
1723 | prev.set_vlmul (calculate_vlmul (next.get_sew (), prev.get_ratio ())); | |
1724 | } | |
1725 | inline void modify_lmul_with_next_ratio (vsetvl_info &prev, | |
1726 | const vsetvl_info &next) | |
1727 | { | |
1728 | prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ())); | |
1729 | prev.set_ratio (next.get_ratio ()); | |
1730 | } | |
1731 | ||
1732 | inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev, | |
1733 | const vsetvl_info &next) | |
1734 | { | |
1735 | prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ())); | |
1736 | use_max_sew (prev, next); | |
1737 | prev.set_ratio (next.get_ratio ()); | |
1738 | } | |
1739 | ||
1740 | inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info &prev, | |
1741 | const vsetvl_info &next) | |
1742 | { | |
e74c37aa | 1743 | int max_sew = MAX (prev.get_sew (), next.get_sew ()); |
29331e72 LD |
1744 | prev.set_vlmul (calculate_vlmul (max_sew, prev.get_ratio ())); |
1745 | prev.set_sew (max_sew); | |
1746 | } | |
1747 | ||
1748 | /* modifiers for tail and mask policy */ | |
1749 | ||
1750 | inline void use_tail_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1751 | { | |
1752 | if (!next.get_ta ()) | |
1753 | prev.set_ta (next.get_ta ()); | |
1754 | } | |
1755 | inline void use_mask_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1756 | { | |
1757 | if (!next.get_ma ()) | |
1758 | prev.set_ma (next.get_ma ()); | |
1759 | } | |
1760 | inline void use_tail_mask_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1761 | { | |
1762 | use_tail_policy (prev, next); | |
1763 | use_mask_policy (prev, next); | |
1764 | } | |
1765 | ||
1766 | /* modifiers for avl */ | |
1767 | ||
1768 | inline void use_next_avl (vsetvl_info &prev, const vsetvl_info &next) | |
1769 | { | |
1770 | gcc_assert (can_use_next_avl_p (prev, next)); | |
1771 | prev.update_avl (next); | |
1772 | } | |
1773 | ||
1774 | inline void use_next_avl_when_not_equal (vsetvl_info &prev, | |
1775 | const vsetvl_info &next) | |
1776 | { | |
1777 | if (avl_equal_p (prev, next)) | |
1778 | return; | |
1779 | gcc_assert (next.has_non_zero_avl ()); | |
1780 | use_next_avl (prev, next); | |
1781 | } | |
e030af3e | 1782 | |
29331e72 | 1783 | public: |
29331e72 LD |
1784 | /* Can we move vsetvl info between prev_insn and next_insn safe? */ |
1785 | bool avl_vl_unmodified_between_p (insn_info *prev_insn, insn_info *next_insn, | |
1786 | const vsetvl_info &info, | |
1787 | bool ignore_vl = false) | |
1788 | { | |
1789 | gcc_assert ((ignore_vl && info.has_nonvlmax_reg_avl ()) | |
1790 | || (info.has_nonvlmax_reg_avl () || info.has_vl ())); | |
1791 | ||
1792 | gcc_assert (!prev_insn->is_debug_insn () && !next_insn->is_debug_insn ()); | |
1793 | if (prev_insn->bb () == next_insn->bb () | |
1794 | && prev_insn->compare_with (next_insn) < 0) | |
1795 | { | |
1796 | for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn; | |
1797 | i = i->prev_nondebug_insn ()) | |
1798 | { | |
9c16ca93 | 1799 | // no def and use of vl |
29331e72 LD |
1800 | if (!ignore_vl && modify_or_use_vl_p (i, info)) |
1801 | return false; | |
e030af3e | 1802 | |
29331e72 LD |
1803 | // no def of avl |
1804 | if (modify_avl_p (i, info)) | |
1805 | return false; | |
1806 | } | |
1807 | return true; | |
1808 | } | |
1809 | else | |
1810 | { | |
3132d2d3 | 1811 | basic_block prev_cfg_bb = prev_insn->bb ()->cfg_bb (); |
29331e72 LD |
1812 | if (!ignore_vl && info.has_vl ()) |
1813 | { | |
3132d2d3 | 1814 | bitmap live_out = df_get_live_out (prev_cfg_bb); |
29331e72 LD |
1815 | if (bitmap_bit_p (live_out, REGNO (info.get_vl ()))) |
1816 | return false; | |
1817 | } | |
a2d12abe | 1818 | |
3132d2d3 JZ |
1819 | /* Find set_info at location of PREV_INSN and NEXT_INSN, Return |
1820 | false if those 2 set_info are different. | |
1821 | ||
1822 | PREV_INSN --- multiple nested blocks --- NEXT_INSN. | |
1823 | ||
1824 | Return false if there is any modifications of AVL inside those | |
1825 | multiple nested blocks. */ | |
1826 | if (info.has_nonvlmax_reg_avl ()) | |
29331e72 | 1827 | { |
3132d2d3 JZ |
1828 | resource_info resource = full_register (REGNO (info.get_avl ())); |
1829 | def_lookup dl1 = crtl->ssa->find_def (resource, prev_insn); | |
1830 | def_lookup dl2 = crtl->ssa->find_def (resource, next_insn); | |
1831 | if (dl2.matching_set ()) | |
1832 | return false; | |
1833 | ||
1834 | auto is_phi_or_real | |
1835 | = [&] (insn_info *h) { return h->is_real () || h->is_phi (); }; | |
1836 | ||
1837 | def_info *def1 = dl1.matching_set_or_last_def_of_prev_group (); | |
1838 | def_info *def2 = dl2.prev_def (next_insn); | |
1839 | set_info *set1 = safe_dyn_cast<set_info *> (def1); | |
1840 | set_info *set2 = safe_dyn_cast<set_info *> (def2); | |
1841 | if (!set1 || !set2) | |
1842 | return false; | |
1843 | ||
1844 | auto is_same_ultimate_def = [&] (set_info *s1, set_info *s2) { | |
1845 | return s1->insn ()->is_phi () && s2->insn ()->is_phi () | |
1846 | && look_through_degenerate_phi (s1) | |
1847 | == look_through_degenerate_phi (s2); | |
1848 | }; | |
1849 | ||
1850 | if (set1 != set2 && !is_same_ultimate_def (set1, set2)) | |
29331e72 | 1851 | { |
3132d2d3 JZ |
1852 | if (!is_phi_or_real (set1->insn ()) |
1853 | || !is_phi_or_real (set2->insn ())) | |
29331e72 | 1854 | return false; |
3132d2d3 JZ |
1855 | |
1856 | if (set1->insn ()->is_real () && set2->insn ()->is_phi ()) | |
1857 | { | |
1858 | hash_set<set_info *> sets | |
1859 | = get_all_sets (set2, true, false, true); | |
1860 | if (!sets.contains (set1)) | |
1861 | return false; | |
1862 | } | |
1863 | else | |
1864 | { | |
1865 | insn_info *def_insn1 = extract_single_source (set1); | |
1866 | insn_info *def_insn2 = extract_single_source (set2); | |
1867 | if (!def_insn1 || !def_insn2 || def_insn1 != def_insn2) | |
1868 | return false; | |
1869 | } | |
29331e72 | 1870 | } |
29331e72 | 1871 | } |
12b23c71 | 1872 | |
29331e72 LD |
1873 | for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn (); |
1874 | i = i->prev_nondebug_insn ()) | |
1875 | { | |
d83070ae | 1876 | // no def and use of vl |
29331e72 LD |
1877 | if (!ignore_vl && modify_or_use_vl_p (i, info)) |
1878 | return false; | |
9243c3d1 | 1879 | |
29331e72 LD |
1880 | // no def of avl |
1881 | if (modify_avl_p (i, info)) | |
1882 | return false; | |
1883 | } | |
6b6b9c68 | 1884 | |
29331e72 LD |
1885 | for (insn_info *i = prev_insn->bb ()->end_insn (); i != prev_insn; |
1886 | i = i->prev_nondebug_insn ()) | |
1887 | { | |
d83070ae | 1888 | // no def mad use of vl |
29331e72 LD |
1889 | if (!ignore_vl && modify_or_use_vl_p (i, info)) |
1890 | return false; | |
1891 | ||
1892 | // no def of avl | |
1893 | if (modify_avl_p (i, info)) | |
1894 | return false; | |
1895 | } | |
1896 | } | |
d875d756 | 1897 | return true; |
29331e72 LD |
1898 | } |
1899 | ||
1900 | bool sew_lmul_compatible_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1901 | { | |
1902 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1903 | sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand (); | |
1904 | sew_lmul_demand_type next_flags = next.get_sew_lmul_demand (); | |
1905 | #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1906 | AVAILABLE_P, FUSE) \ | |
1907 | if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \ | |
1908 | && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \ | |
1909 | return COMPATIBLE_P (prev, next); | |
6b6b9c68 | 1910 | |
29331e72 | 1911 | #include "riscv-vsetvl.def" |
6b6b9c68 | 1912 | |
29331e72 LD |
1913 | gcc_unreachable (); |
1914 | } | |
6b6b9c68 | 1915 | |
29331e72 LD |
1916 | bool sew_lmul_available_p (const vsetvl_info &prev, const vsetvl_info &next) |
1917 | { | |
1918 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1919 | sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand (); | |
1920 | sew_lmul_demand_type next_flags = next.get_sew_lmul_demand (); | |
1921 | #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1922 | AVAILABLE_P, FUSE) \ | |
1923 | if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \ | |
1924 | && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \ | |
1925 | return AVAILABLE_P (prev, next); | |
d875d756 | 1926 | |
29331e72 | 1927 | #include "riscv-vsetvl.def" |
4f673c5e | 1928 | |
29331e72 LD |
1929 | gcc_unreachable (); |
1930 | } | |
1931 | ||
1932 | void merge_sew_lmul (vsetvl_info &prev, const vsetvl_info &next) | |
1933 | { | |
1934 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1935 | sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand (); | |
1936 | sew_lmul_demand_type next_flags = next.get_sew_lmul_demand (); | |
1937 | #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1938 | AVAILABLE_P, FUSE) \ | |
1939 | if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \ | |
1940 | && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \ | |
1941 | { \ | |
1942 | gcc_assert (COMPATIBLE_P (prev, next)); \ | |
1943 | FUSE (prev, next); \ | |
1944 | prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \ | |
1945 | return; \ | |
1946 | } | |
9243c3d1 | 1947 | |
29331e72 | 1948 | #include "riscv-vsetvl.def" |
9243c3d1 | 1949 | |
29331e72 LD |
1950 | gcc_unreachable (); |
1951 | } | |
9243c3d1 | 1952 | |
29331e72 LD |
1953 | bool policy_compatible_p (const vsetvl_info &prev, const vsetvl_info &next) |
1954 | { | |
1955 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1956 | policy_demand_type prev_flags = prev.get_policy_demand (); | |
1957 | policy_demand_type next_flags = next.get_policy_demand (); | |
1958 | #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1959 | AVAILABLE_P, FUSE) \ | |
1960 | if (prev_flags == policy_demand_type::PREV_FLAGS \ | |
1961 | && next_flags == policy_demand_type::NEXT_FLAGS) \ | |
1962 | return COMPATIBLE_P (prev, next); | |
9243c3d1 | 1963 | |
29331e72 | 1964 | #include "riscv-vsetvl.def" |
9243c3d1 | 1965 | |
29331e72 LD |
1966 | gcc_unreachable (); |
1967 | } | |
4f673c5e | 1968 | |
29331e72 LD |
1969 | bool policy_available_p (const vsetvl_info &prev, const vsetvl_info &next) |
1970 | { | |
1971 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1972 | policy_demand_type prev_flags = prev.get_policy_demand (); | |
1973 | policy_demand_type next_flags = next.get_policy_demand (); | |
1974 | #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1975 | AVAILABLE_P, FUSE) \ | |
1976 | if (prev_flags == policy_demand_type::PREV_FLAGS \ | |
1977 | && next_flags == policy_demand_type::NEXT_FLAGS) \ | |
1978 | return AVAILABLE_P (prev, next); | |
4f673c5e | 1979 | |
29331e72 | 1980 | #include "riscv-vsetvl.def" |
9243c3d1 | 1981 | |
29331e72 LD |
1982 | gcc_unreachable (); |
1983 | } | |
1984 | ||
1985 | void merge_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1986 | { | |
1987 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1988 | policy_demand_type prev_flags = prev.get_policy_demand (); | |
1989 | policy_demand_type next_flags = next.get_policy_demand (); | |
1990 | #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1991 | AVAILABLE_P, FUSE) \ | |
1992 | if (prev_flags == policy_demand_type::PREV_FLAGS \ | |
1993 | && next_flags == policy_demand_type::NEXT_FLAGS) \ | |
1994 | { \ | |
1995 | gcc_assert (COMPATIBLE_P (prev, next)); \ | |
1996 | FUSE (prev, next); \ | |
1997 | prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \ | |
1998 | return; \ | |
1999 | } | |
9243c3d1 | 2000 | |
29331e72 | 2001 | #include "riscv-vsetvl.def" |
ec99ffab | 2002 | |
29331e72 LD |
2003 | gcc_unreachable (); |
2004 | } | |
9243c3d1 | 2005 | |
d82bb518 JZ |
2006 | bool vl_not_in_conflict_p (const vsetvl_info &prev, const vsetvl_info &next) |
2007 | { | |
2008 | /* We don't fuse this following case: | |
2009 | ||
2010 | li a5, -1 | |
2011 | vmv.s.x v0, a5 -- PREV | |
2012 | vsetvli a5, ... -- NEXT | |
2013 | ||
2014 | Don't fuse NEXT into PREV. | |
2015 | */ | |
2016 | return !prev.vl_modify_non_avl_op_p (next) | |
2017 | && !next.vl_modify_non_avl_op_p (prev); | |
2018 | } | |
2019 | ||
29331e72 LD |
2020 | bool avl_compatible_p (const vsetvl_info &prev, const vsetvl_info &next) |
2021 | { | |
2022 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
2023 | avl_demand_type prev_flags = prev.get_avl_demand (); | |
2024 | avl_demand_type next_flags = next.get_avl_demand (); | |
2025 | #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
2026 | AVAILABLE_P, FUSE) \ | |
2027 | if (prev_flags == avl_demand_type::PREV_FLAGS \ | |
2028 | && next_flags == avl_demand_type::NEXT_FLAGS) \ | |
2029 | return COMPATIBLE_P (prev, next); | |
9243c3d1 | 2030 | |
29331e72 | 2031 | #include "riscv-vsetvl.def" |
9243c3d1 | 2032 | |
29331e72 LD |
2033 | gcc_unreachable (); |
2034 | } | |
9243c3d1 | 2035 | |
29331e72 LD |
2036 | bool avl_available_p (const vsetvl_info &prev, const vsetvl_info &next) |
2037 | { | |
2038 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
2039 | avl_demand_type prev_flags = prev.get_avl_demand (); | |
2040 | avl_demand_type next_flags = next.get_avl_demand (); | |
2041 | #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
2042 | AVAILABLE_P, FUSE) \ | |
2043 | if (prev_flags == avl_demand_type::PREV_FLAGS \ | |
2044 | && next_flags == avl_demand_type::NEXT_FLAGS) \ | |
2045 | return AVAILABLE_P (prev, next); | |
9243c3d1 | 2046 | |
29331e72 | 2047 | #include "riscv-vsetvl.def" |
9243c3d1 | 2048 | |
29331e72 LD |
2049 | gcc_unreachable (); |
2050 | } | |
2051 | ||
2052 | void merge_avl (vsetvl_info &prev, const vsetvl_info &next) | |
2053 | { | |
2054 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
2055 | avl_demand_type prev_flags = prev.get_avl_demand (); | |
2056 | avl_demand_type next_flags = next.get_avl_demand (); | |
2057 | #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
2058 | AVAILABLE_P, FUSE) \ | |
2059 | if (prev_flags == avl_demand_type::PREV_FLAGS \ | |
2060 | && next_flags == avl_demand_type::NEXT_FLAGS) \ | |
2061 | { \ | |
2062 | gcc_assert (COMPATIBLE_P (prev, next)); \ | |
2063 | FUSE (prev, next); \ | |
2064 | prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \ | |
2065 | return; \ | |
60bd33bc JZZ |
2066 | } |
2067 | ||
29331e72 | 2068 | #include "riscv-vsetvl.def" |
9243c3d1 | 2069 | |
29331e72 LD |
2070 | gcc_unreachable (); |
2071 | } | |
2072 | ||
2073 | bool compatible_p (const vsetvl_info &prev, const vsetvl_info &next) | |
2074 | { | |
2075 | bool compatible_p = sew_lmul_compatible_p (prev, next) | |
2076 | && policy_compatible_p (prev, next) | |
d82bb518 JZ |
2077 | && avl_compatible_p (prev, next) |
2078 | && vl_not_in_conflict_p (prev, next); | |
29331e72 LD |
2079 | return compatible_p; |
2080 | } | |
2081 | ||
2082 | bool available_p (const vsetvl_info &prev, const vsetvl_info &next) | |
2083 | { | |
2084 | bool available_p = sew_lmul_available_p (prev, next) | |
2085 | && policy_available_p (prev, next) | |
d82bb518 JZ |
2086 | && avl_available_p (prev, next) |
2087 | && vl_not_in_conflict_p (prev, next); | |
29331e72 LD |
2088 | gcc_assert (!available_p || compatible_p (prev, next)); |
2089 | return available_p; | |
2090 | } | |
2091 | ||
2092 | void merge (vsetvl_info &prev, const vsetvl_info &next) | |
2093 | { | |
2094 | gcc_assert (compatible_p (prev, next)); | |
2095 | merge_sew_lmul (prev, next); | |
2096 | merge_policy (prev, next); | |
2097 | merge_avl (prev, next); | |
2098 | gcc_assert (available_p (prev, next)); | |
2099 | } | |
2100 | }; | |
9243c3d1 | 2101 | |
9243c3d1 | 2102 | |
29331e72 | 2103 | class pre_vsetvl |
9243c3d1 | 2104 | { |
29331e72 LD |
2105 | private: |
2106 | demand_system m_dem; | |
2107 | auto_vec<vsetvl_block_info> m_vector_block_infos; | |
2108 | ||
d83070ae | 2109 | /* data for avl reaching definition. */ |
29331e72 LD |
2110 | sbitmap *m_reg_def_loc; |
2111 | ||
d83070ae KC |
2112 | /* data for vsetvl info reaching definition. */ |
2113 | vsetvl_info m_unknown_info; | |
29331e72 LD |
2114 | auto_vec<vsetvl_info *> m_vsetvl_def_exprs; |
2115 | sbitmap *m_vsetvl_def_in; | |
2116 | sbitmap *m_vsetvl_def_out; | |
2117 | ||
2118 | /* data for lcm */ | |
2119 | auto_vec<vsetvl_info *> m_exprs; | |
2120 | sbitmap *m_avloc; | |
2121 | sbitmap *m_avin; | |
2122 | sbitmap *m_avout; | |
2123 | sbitmap *m_kill; | |
2124 | sbitmap *m_antloc; | |
2125 | sbitmap *m_transp; | |
2126 | sbitmap *m_insert; | |
2127 | sbitmap *m_del; | |
2128 | struct edge_list *m_edges; | |
2129 | ||
2130 | auto_vec<vsetvl_info> m_delete_list; | |
2131 | ||
2132 | vsetvl_block_info &get_block_info (const bb_info *bb) | |
2133 | { | |
2134 | return m_vector_block_infos[bb->index ()]; | |
2135 | } | |
2136 | const vsetvl_block_info &get_block_info (const basic_block bb) const | |
2137 | { | |
2138 | return m_vector_block_infos[bb->index]; | |
2139 | } | |
2140 | ||
2141 | vsetvl_block_info &get_block_info (const basic_block bb) | |
2142 | { | |
2143 | return m_vector_block_infos[bb->index]; | |
2144 | } | |
2145 | ||
2146 | void add_expr (auto_vec<vsetvl_info *> &m_exprs, vsetvl_info &info) | |
2147 | { | |
2148 | for (vsetvl_info *item : m_exprs) | |
2149 | { | |
2150 | if (*item == info) | |
2151 | return; | |
2152 | } | |
2153 | m_exprs.safe_push (&info); | |
2154 | } | |
2155 | ||
2156 | unsigned get_expr_index (auto_vec<vsetvl_info *> &m_exprs, | |
2157 | const vsetvl_info &info) | |
2158 | { | |
2159 | for (size_t i = 0; i < m_exprs.length (); i += 1) | |
2160 | { | |
2161 | if (*m_exprs[i] == info) | |
2162 | return i; | |
2163 | } | |
2164 | gcc_unreachable (); | |
2165 | } | |
2166 | ||
c9d5b46a | 2167 | bool anticipated_exp_p (const vsetvl_info &header_info) |
29331e72 LD |
2168 | { |
2169 | if (!header_info.has_nonvlmax_reg_avl () && !header_info.has_vl ()) | |
2170 | return true; | |
9243c3d1 | 2171 | |
29331e72 LD |
2172 | bb_info *bb = header_info.get_bb (); |
2173 | insn_info *prev_insn = bb->head_insn (); | |
2174 | insn_info *next_insn = header_info.insn_inside_bb_p () | |
2175 | ? header_info.get_insn () | |
2176 | : header_info.get_bb ()->end_insn (); | |
2177 | ||
2178 | return m_dem.avl_vl_unmodified_between_p (prev_insn, next_insn, | |
2179 | header_info); | |
2180 | } | |
2181 | ||
2182 | bool available_exp_p (const vsetvl_info &prev_info, | |
2183 | const vsetvl_info &next_info) | |
2184 | { | |
2185 | return m_dem.available_p (prev_info, next_info); | |
2186 | } | |
2187 | ||
2188 | void compute_probabilities () | |
2189 | { | |
2190 | edge e; | |
2191 | edge_iterator ei; | |
2192 | ||
2193 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2194 | { | |
2195 | basic_block cfg_bb = bb->cfg_bb (); | |
2196 | auto &curr_prob = get_block_info (cfg_bb).probability; | |
2197 | ||
2198 | /* GCC assume entry block (bb 0) are always so | |
2199 | executed so set its probability as "always". */ | |
2200 | if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) | |
2201 | curr_prob = profile_probability::always (); | |
2202 | /* Exit block (bb 1) is the block we don't need to process. */ | |
2203 | if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) | |
2204 | continue; | |
9243c3d1 | 2205 | |
29331e72 LD |
2206 | gcc_assert (curr_prob.initialized_p ()); |
2207 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
2208 | { | |
2209 | auto &new_prob = get_block_info (e->dest).probability; | |
2210 | /* Normally, the edge probability should be initialized. | |
2211 | However, some special testing code which is written in | |
d83070ae | 2212 | GIMPLE IR style force the edge probability uninitialized, |
29331e72 | 2213 | we conservatively set it as never so that it will not |
d83070ae | 2214 | affect PRE (Phase 3 && Phase 4). */ |
29331e72 LD |
2215 | if (!e->probability.initialized_p ()) |
2216 | new_prob = profile_probability::never (); | |
2217 | else if (!new_prob.initialized_p ()) | |
2218 | new_prob = curr_prob * e->probability; | |
2219 | else if (new_prob == profile_probability::always ()) | |
2220 | continue; | |
2221 | else | |
2222 | new_prob += curr_prob * e->probability; | |
2223 | } | |
2224 | } | |
2225 | } | |
2226 | ||
2227 | void insert_vsetvl_insn (enum emit_type emit_type, const vsetvl_info &info) | |
2228 | { | |
2229 | rtx pat = info.get_vsetvl_pat (); | |
2230 | rtx_insn *rinsn = info.get_insn ()->rtl (); | |
2231 | ||
2232 | if (emit_type == EMIT_DIRECT) | |
2233 | { | |
2234 | emit_insn (pat); | |
2235 | if (dump_file) | |
2236 | { | |
2237 | fprintf (dump_file, " Insert vsetvl insn %d:\n", | |
2238 | INSN_UID (get_last_insn ())); | |
2239 | print_rtl_single (dump_file, get_last_insn ()); | |
2240 | } | |
2241 | } | |
2242 | else if (emit_type == EMIT_BEFORE) | |
2243 | { | |
2244 | emit_insn_before (pat, rinsn); | |
2245 | if (dump_file) | |
2246 | { | |
2247 | fprintf (dump_file, " Insert vsetvl insn before insn %d:\n", | |
2248 | INSN_UID (rinsn)); | |
2249 | print_rtl_single (dump_file, PREV_INSN (rinsn)); | |
2250 | } | |
2251 | } | |
2252 | else | |
2253 | { | |
2254 | emit_insn_after (pat, rinsn); | |
2255 | if (dump_file) | |
2256 | { | |
2257 | fprintf (dump_file, " Insert vsetvl insn after insn %d:\n", | |
2258 | INSN_UID (rinsn)); | |
2259 | print_rtl_single (dump_file, NEXT_INSN (rinsn)); | |
2260 | } | |
2261 | } | |
2262 | } | |
2263 | ||
2264 | void change_vsetvl_insn (const vsetvl_info &info) | |
2265 | { | |
2266 | rtx_insn *rinsn = info.get_insn ()->rtl (); | |
2267 | rtx new_pat = info.get_vsetvl_pat (); | |
2268 | ||
2269 | if (dump_file) | |
2270 | { | |
2271 | fprintf (dump_file, " Change insn %d from:\n", INSN_UID (rinsn)); | |
2272 | print_rtl_single (dump_file, rinsn); | |
2273 | } | |
2274 | ||
2275 | validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false); | |
2276 | ||
2277 | if (dump_file) | |
2278 | { | |
2279 | fprintf (dump_file, "\n to:\n"); | |
2280 | print_rtl_single (dump_file, rinsn); | |
2281 | } | |
2282 | } | |
2283 | ||
d29136ad | 2284 | void remove_vsetvl_insn (rtx_insn *rinsn) |
29331e72 | 2285 | { |
29331e72 LD |
2286 | if (dump_file) |
2287 | { | |
2288 | fprintf (dump_file, " Eliminate insn %d:\n", INSN_UID (rinsn)); | |
2289 | print_rtl_single (dump_file, rinsn); | |
2290 | } | |
2291 | if (in_sequence_p ()) | |
2292 | remove_insn (rinsn); | |
2293 | else | |
2294 | delete_insn (rinsn); | |
2295 | } | |
2296 | ||
2297 | bool successors_probability_equal_p (const basic_block cfg_bb) const | |
2298 | { | |
2299 | edge e; | |
2300 | edge_iterator ei; | |
2301 | profile_probability prob = profile_probability::uninitialized (); | |
2302 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
2303 | { | |
2304 | if (prob == profile_probability::uninitialized ()) | |
2305 | prob = m_vector_block_infos[e->dest->index].probability; | |
2306 | else if (prob == m_vector_block_infos[e->dest->index].probability) | |
2307 | continue; | |
2308 | else | |
2309 | /* We pick the highest probability among those incompatible VSETVL | |
d83070ae | 2310 | infos. When all incompatible VSETVL infos have same probability, we |
29331e72 LD |
2311 | don't pick any of them. */ |
2312 | return false; | |
2313 | } | |
ec99ffab | 2314 | return true; |
29331e72 LD |
2315 | } |
2316 | ||
e935c066 JZ |
2317 | bool has_compatible_reaching_vsetvl_p (vsetvl_info info) |
2318 | { | |
2319 | unsigned int index; | |
2320 | sbitmap_iterator sbi; | |
2321 | EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[info.get_bb ()->index ()], 0, | |
2322 | index, sbi) | |
2323 | { | |
2324 | const auto prev_info = *m_vsetvl_def_exprs[index]; | |
2325 | if (!prev_info.valid_p ()) | |
2326 | continue; | |
2327 | if (m_dem.compatible_p (prev_info, info)) | |
2328 | return true; | |
2329 | } | |
2330 | return false; | |
2331 | } | |
2332 | ||
923a67f1 | 2333 | bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info) |
29331e72 LD |
2334 | { |
2335 | gcc_assert ( | |
2336 | !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()])); | |
2337 | ||
2338 | unsigned expr_index; | |
2339 | sbitmap_iterator sbi; | |
2340 | EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[curr_info.get_bb ()->index ()], 0, | |
2341 | expr_index, sbi) | |
2342 | { | |
2343 | const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index]; | |
2344 | if (!prev_info.valid_p () | |
923a67f1 JZ |
2345 | || !m_dem.avl_available_p (prev_info, curr_info) |
2346 | || prev_info.get_ratio () != curr_info.get_ratio ()) | |
29331e72 LD |
2347 | return false; |
2348 | } | |
005fad9d | 2349 | |
005fad9d | 2350 | return true; |
29331e72 | 2351 | } |
005fad9d | 2352 | |
29331e72 LD |
2353 | public: |
2354 | pre_vsetvl () | |
3132d2d3 | 2355 | : m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr), |
29331e72 LD |
2356 | m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr), |
2357 | m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr) | |
2358 | { | |
2359 | /* Initialization of RTL_SSA. */ | |
2360 | calculate_dominance_info (CDI_DOMINATORS); | |
4a0a8dc1 JZ |
2361 | loop_optimizer_init (LOOPS_NORMAL); |
2362 | /* Create FAKE edges for infinite loops. */ | |
2363 | connect_infinite_loops_to_exit (); | |
29331e72 LD |
2364 | df_analyze (); |
2365 | crtl->ssa = new function_info (cfun); | |
2366 | m_vector_block_infos.safe_grow_cleared (last_basic_block_for_fn (cfun)); | |
2367 | compute_probabilities (); | |
d83070ae | 2368 | m_unknown_info.set_unknown (); |
29331e72 LD |
2369 | } |
2370 | ||
2371 | void finish () | |
2372 | { | |
2373 | free_dominance_info (CDI_DOMINATORS); | |
4a0a8dc1 | 2374 | loop_optimizer_finalize (); |
29331e72 LD |
2375 | if (crtl->ssa->perform_pending_updates ()) |
2376 | cleanup_cfg (0); | |
2377 | delete crtl->ssa; | |
2378 | crtl->ssa = nullptr; | |
2379 | ||
29331e72 LD |
2380 | if (m_reg_def_loc) |
2381 | sbitmap_vector_free (m_reg_def_loc); | |
2382 | ||
29331e72 LD |
2383 | if (m_vsetvl_def_in) |
2384 | sbitmap_vector_free (m_vsetvl_def_in); | |
2385 | if (m_vsetvl_def_out) | |
2386 | sbitmap_vector_free (m_vsetvl_def_out); | |
2387 | ||
2388 | if (m_avloc) | |
2389 | sbitmap_vector_free (m_avloc); | |
2390 | if (m_kill) | |
2391 | sbitmap_vector_free (m_kill); | |
2392 | if (m_antloc) | |
2393 | sbitmap_vector_free (m_antloc); | |
2394 | if (m_transp) | |
2395 | sbitmap_vector_free (m_transp); | |
2396 | if (m_insert) | |
2397 | sbitmap_vector_free (m_insert); | |
2398 | if (m_del) | |
2399 | sbitmap_vector_free (m_del); | |
2400 | if (m_avin) | |
2401 | sbitmap_vector_free (m_avin); | |
2402 | if (m_avout) | |
2403 | sbitmap_vector_free (m_avout); | |
2404 | ||
2405 | if (m_edges) | |
2406 | free_edge_list (m_edges); | |
2407 | } | |
2408 | ||
29331e72 | 2409 | void compute_vsetvl_def_data (); |
9dd10de1 | 2410 | void compute_transparent (const bb_info *); |
29331e72 LD |
2411 | void compute_lcm_local_properties (); |
2412 | ||
2413 | void fuse_local_vsetvl_info (); | |
33408780 | 2414 | bool earliest_fuse_vsetvl_info (int iter); |
29331e72 LD |
2415 | void pre_global_vsetvl_info (); |
2416 | void emit_vsetvl (); | |
d83070ae | 2417 | void cleanup (); |
29331e72 LD |
2418 | void remove_avl_operand (); |
2419 | void remove_unused_dest_operand (); | |
22622a5a | 2420 | void remove_vsetvl_pre_insns (); |
29331e72 LD |
2421 | |
2422 | void dump (FILE *file, const char *title) const | |
2423 | { | |
2424 | fprintf (file, "\nVSETVL infos after %s\n\n", title); | |
2425 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2426 | { | |
2427 | const auto &block_info = m_vector_block_infos[bb->index ()]; | |
2428 | fprintf (file, " bb %d:\n", bb->index ()); | |
2429 | fprintf (file, " probability: "); | |
2430 | block_info.probability.dump (file); | |
2431 | fprintf (file, "\n"); | |
2432 | if (!block_info.empty_p ()) | |
2433 | { | |
2434 | fprintf (file, " Header vsetvl info:"); | |
2435 | block_info.get_entry_info ().dump (file, " "); | |
2436 | fprintf (file, " Footer vsetvl info:"); | |
2437 | block_info.get_exit_info ().dump (file, " "); | |
4fd09aed | 2438 | for (const auto &info : block_info.local_infos) |
29331e72 LD |
2439 | { |
2440 | fprintf (file, | |
2441 | " insn %d vsetvl info:", info.get_insn ()->uid ()); | |
2442 | info.dump (file, " "); | |
2443 | } | |
2444 | } | |
2445 | } | |
2446 | } | |
2447 | }; | |
c139f5e1 | 2448 | |
9243c3d1 | 2449 | void |
29331e72 | 2450 | pre_vsetvl::compute_vsetvl_def_data () |
9243c3d1 | 2451 | { |
29331e72 | 2452 | m_vsetvl_def_exprs.truncate (0); |
d83070ae | 2453 | add_expr (m_vsetvl_def_exprs, m_unknown_info); |
29331e72 | 2454 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2455 | { |
29331e72 LD |
2456 | vsetvl_block_info &block_info = get_block_info (bb); |
2457 | if (block_info.empty_p ()) | |
2458 | continue; | |
2459 | vsetvl_info &footer_info = block_info.get_exit_info (); | |
2460 | gcc_assert (footer_info.valid_p () || footer_info.unknown_p ()); | |
2461 | add_expr (m_vsetvl_def_exprs, footer_info); | |
9243c3d1 JZZ |
2462 | } |
2463 | ||
29331e72 LD |
2464 | if (m_vsetvl_def_in) |
2465 | sbitmap_vector_free (m_vsetvl_def_in); | |
2466 | if (m_vsetvl_def_out) | |
2467 | sbitmap_vector_free (m_vsetvl_def_out); | |
9243c3d1 | 2468 | |
29331e72 LD |
2469 | sbitmap *def_loc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
2470 | m_vsetvl_def_exprs.length ()); | |
2471 | sbitmap *m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2472 | m_vsetvl_def_exprs.length ()); | |
9243c3d1 | 2473 | |
29331e72 LD |
2474 | m_vsetvl_def_in = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
2475 | m_vsetvl_def_exprs.length ()); | |
2476 | m_vsetvl_def_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2477 | m_vsetvl_def_exprs.length ()); | |
9243c3d1 | 2478 | |
29331e72 LD |
2479 | bitmap_vector_clear (def_loc, last_basic_block_for_fn (cfun)); |
2480 | bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun)); | |
2481 | bitmap_vector_clear (m_vsetvl_def_out, last_basic_block_for_fn (cfun)); | |
9243c3d1 | 2482 | |
29331e72 LD |
2483 | for (const bb_info *bb : crtl->ssa->bbs ()) |
2484 | { | |
2485 | vsetvl_block_info &block_info = get_block_info (bb); | |
2486 | if (block_info.empty_p ()) | |
9243c3d1 | 2487 | { |
29331e72 | 2488 | for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i += 1) |
9243c3d1 | 2489 | { |
9dd10de1 JZ |
2490 | auto *info = m_vsetvl_def_exprs[i]; |
2491 | if (info->has_nonvlmax_reg_avl () | |
2492 | && bitmap_bit_p (m_reg_def_loc[bb->index ()], | |
2493 | REGNO (info->get_avl ()))) | |
2494 | { | |
2495 | bitmap_set_bit (m_kill[bb->index ()], i); | |
2496 | bitmap_set_bit (def_loc[bb->index ()], | |
2497 | get_expr_index (m_vsetvl_def_exprs, | |
d83070ae | 2498 | m_unknown_info)); |
9dd10de1 | 2499 | } |
9243c3d1 | 2500 | } |
29331e72 | 2501 | continue; |
9243c3d1 JZZ |
2502 | } |
2503 | ||
29331e72 LD |
2504 | vsetvl_info &footer_info = block_info.get_exit_info (); |
2505 | bitmap_ones (m_kill[bb->index ()]); | |
2506 | bitmap_set_bit (def_loc[bb->index ()], | |
2507 | get_expr_index (m_vsetvl_def_exprs, footer_info)); | |
9243c3d1 JZZ |
2508 | } |
2509 | ||
d83070ae | 2510 | /* Set the def_out of the ENTRY basic block to m_unknown_info expr. */ |
29331e72 LD |
2511 | basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); |
2512 | bitmap_set_bit (m_vsetvl_def_out[entry->index], | |
d83070ae | 2513 | get_expr_index (m_vsetvl_def_exprs, m_unknown_info)); |
9243c3d1 | 2514 | |
29331e72 LD |
2515 | compute_reaching_defintion (def_loc, m_kill, m_vsetvl_def_in, |
2516 | m_vsetvl_def_out); | |
2517 | ||
2518 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
e030af3e | 2519 | { |
29331e72 | 2520 | fprintf (dump_file, |
d83070ae | 2521 | "\n Compute vsetvl info reaching definition data:\n\n"); |
29331e72 LD |
2522 | fprintf (dump_file, " Expression List (%d):\n", |
2523 | m_vsetvl_def_exprs.length ()); | |
2524 | for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i++) | |
2525 | { | |
2526 | const auto &info = *m_vsetvl_def_exprs[i]; | |
2527 | fprintf (dump_file, " Expr[%u]: ", i); | |
2528 | info.dump (dump_file, " "); | |
2529 | } | |
2530 | fprintf (dump_file, "\n bitmap data:\n"); | |
2531 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2532 | { | |
2533 | unsigned int i = bb->index (); | |
2534 | fprintf (dump_file, " BB %u:\n", i); | |
2535 | fprintf (dump_file, " def_loc: "); | |
2536 | dump_bitmap_file (dump_file, def_loc[i]); | |
2537 | fprintf (dump_file, " kill: "); | |
2538 | dump_bitmap_file (dump_file, m_kill[i]); | |
2539 | fprintf (dump_file, " vsetvl_def_in: "); | |
2540 | dump_bitmap_file (dump_file, m_vsetvl_def_in[i]); | |
2541 | fprintf (dump_file, " vsetvl_def_out: "); | |
2542 | dump_bitmap_file (dump_file, m_vsetvl_def_out[i]); | |
2543 | } | |
e030af3e | 2544 | } |
4f673c5e | 2545 | |
29331e72 LD |
2546 | sbitmap_vector_free (def_loc); |
2547 | sbitmap_vector_free (m_kill); | |
e030af3e | 2548 | } |
9243c3d1 | 2549 | |
9dd10de1 JZ |
2550 | /* Subroutine of compute_lcm_local_properties which Compute local transparent |
2551 | BB. Note that the compile time is very sensitive to compute_transparent and | |
2552 | compute_lcm_local_properties, any change of these 2 functions should be | |
2553 | aware of the compile time changing of the program which has a large number of | |
2554 | blocks, e.g SPEC 2017 wrf. | |
2555 | ||
2556 | Current compile time profile of SPEC 2017 wrf: | |
2557 | ||
2558 | 1. scheduling - 27% | |
2559 | 2. machine dep reorg (VSETVL PASS) - 18% | |
2560 | ||
2561 | VSETVL pass should not spend more time than scheduling in compilation. */ | |
2562 | void | |
2563 | pre_vsetvl::compute_transparent (const bb_info *bb) | |
2564 | { | |
2565 | int num_exprs = m_exprs.length (); | |
2566 | unsigned bb_index = bb->index (); | |
2567 | for (int i = 0; i < num_exprs; i++) | |
2568 | { | |
2569 | auto *info = m_exprs[i]; | |
2570 | if (info->has_nonvlmax_reg_avl () | |
2571 | && bitmap_bit_p (m_reg_def_loc[bb_index], REGNO (info->get_avl ()))) | |
2572 | bitmap_clear_bit (m_transp[bb_index], i); | |
2573 | else if (info->has_vl () | |
2574 | && bitmap_bit_p (m_reg_def_loc[bb_index], | |
2575 | REGNO (info->get_vl ()))) | |
2576 | bitmap_clear_bit (m_transp[bb_index], i); | |
2577 | } | |
2578 | } | |
2579 | ||
e030af3e | 2580 | /* Compute the local properties of each recorded expression. |
6b6b9c68 | 2581 | |
e030af3e JZ |
2582 | Local properties are those that are defined by the block, irrespective of |
2583 | other blocks. | |
6b6b9c68 | 2584 | |
e030af3e JZ |
2585 | An expression is transparent in a block if its operands are not modified |
2586 | in the block. | |
6b6b9c68 | 2587 | |
e030af3e JZ |
2588 | An expression is computed (locally available) in a block if it is computed |
2589 | at least once and expression would contain the same value if the | |
2590 | computation was moved to the end of the block. | |
2591 | ||
2592 | An expression is locally anticipatable in a block if it is computed at | |
2593 | least once and expression would contain the same value if the computation | |
2594 | was moved to the beginning of the block. */ | |
2595 | void | |
29331e72 | 2596 | pre_vsetvl::compute_lcm_local_properties () |
6b6b9c68 | 2597 | { |
29331e72 LD |
2598 | m_exprs.truncate (0); |
2599 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2600 | { | |
2601 | vsetvl_block_info &block_info = get_block_info (bb); | |
2602 | if (block_info.empty_p ()) | |
2603 | continue; | |
2604 | vsetvl_info &header_info = block_info.get_entry_info (); | |
2605 | vsetvl_info &footer_info = block_info.get_exit_info (); | |
2606 | gcc_assert (footer_info.valid_p () || footer_info.unknown_p ()); | |
d40b3c1e JZ |
2607 | if (header_info.valid_p ()) |
2608 | add_expr (m_exprs, header_info); | |
2609 | if (footer_info.valid_p ()) | |
2610 | add_expr (m_exprs, footer_info); | |
29331e72 LD |
2611 | } |
2612 | ||
2613 | int num_exprs = m_exprs.length (); | |
2614 | if (m_avloc) | |
2615 | sbitmap_vector_free (m_avloc); | |
2616 | if (m_kill) | |
2617 | sbitmap_vector_free (m_kill); | |
2618 | if (m_antloc) | |
2619 | sbitmap_vector_free (m_antloc); | |
2620 | if (m_transp) | |
2621 | sbitmap_vector_free (m_transp); | |
2622 | if (m_avin) | |
2623 | sbitmap_vector_free (m_avin); | |
2624 | if (m_avout) | |
2625 | sbitmap_vector_free (m_avout); | |
2626 | ||
2627 | m_avloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2628 | m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2629 | m_antloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2630 | m_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2631 | m_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2632 | m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2633 | ||
2634 | bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun)); | |
2635 | bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun)); | |
9dd10de1 | 2636 | bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun)); |
29331e72 | 2637 | |
e030af3e JZ |
2638 | /* - If T is locally available at the end of a block, then T' must be |
2639 | available at the end of the same block. Since some optimization has | |
2640 | occurred earlier, T' might not be locally available, however, it must | |
2641 | have been previously computed on all paths. As a formula, T at AVLOC(B) | |
2642 | implies that T' at AVOUT(B). | |
2643 | An "available occurrence" is one that is the last occurrence in the | |
2644 | basic block and the operands are not modified by following statements in | |
2645 | the basic block [including this insn]. | |
6b6b9c68 | 2646 | |
e030af3e JZ |
2647 | - If T is locally anticipated at the beginning of a block, then either |
2648 | T', is locally anticipated or it is already available from previous | |
2649 | blocks. As a formula, this means that T at ANTLOC(B) implies that T' at | |
2650 | ANTLOC(B) at AVIN(B). | |
2651 | An "anticipatable occurrence" is one that is the first occurrence in the | |
2652 | basic block, the operands are not modified in the basic block prior | |
2653 | to the occurrence and the output is not used between the start of | |
2654 | the block and the occurrence. */ | |
e030af3e | 2655 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2656 | { |
29331e72 LD |
2657 | unsigned bb_index = bb->index (); |
2658 | vsetvl_block_info &block_info = get_block_info (bb); | |
9243c3d1 | 2659 | |
29331e72 LD |
2660 | /* Compute m_transp */ |
2661 | if (block_info.empty_p ()) | |
9dd10de1 JZ |
2662 | compute_transparent (bb); |
2663 | else | |
9243c3d1 | 2664 | { |
9dd10de1 JZ |
2665 | bitmap_clear (m_transp[bb_index]); |
2666 | vsetvl_info &header_info = block_info.get_entry_info (); | |
2667 | vsetvl_info &footer_info = block_info.get_exit_info (); | |
29331e72 | 2668 | |
9dd10de1 JZ |
2669 | if (header_info.valid_p () && anticipated_exp_p (header_info)) |
2670 | bitmap_set_bit (m_antloc[bb_index], | |
2671 | get_expr_index (m_exprs, header_info)); | |
9243c3d1 | 2672 | |
9dd10de1 JZ |
2673 | if (footer_info.valid_p ()) |
2674 | for (int i = 0; i < num_exprs; i += 1) | |
2675 | { | |
2676 | const vsetvl_info &info = *m_exprs[i]; | |
2677 | if (!info.valid_p ()) | |
2678 | continue; | |
2679 | if (available_exp_p (footer_info, info)) | |
2680 | bitmap_set_bit (m_avloc[bb_index], i); | |
2681 | } | |
9243c3d1 | 2682 | } |
e030af3e | 2683 | |
4a0a8dc1 JZ |
2684 | if (invalid_opt_bb_p (bb->cfg_bb ())) |
2685 | { | |
2686 | bitmap_clear (m_antloc[bb_index]); | |
2687 | bitmap_clear (m_transp[bb_index]); | |
2688 | } | |
9dd10de1 | 2689 | |
d40b3c1e JZ |
2690 | /* Compute ae_kill for each basic block using: |
2691 | ||
2692 | ~(TRANSP | COMP) | |
2693 | */ | |
2694 | bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]); | |
2695 | bitmap_not (m_kill[bb_index], m_kill[bb_index]); | |
9243c3d1 JZZ |
2696 | } |
2697 | } | |
2698 | ||
29331e72 LD |
2699 | void |
2700 | pre_vsetvl::fuse_local_vsetvl_info () | |
e030af3e | 2701 | { |
29331e72 LD |
2702 | m_reg_def_loc |
2703 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), GP_REG_LAST + 1); | |
2704 | bitmap_vector_clear (m_reg_def_loc, last_basic_block_for_fn (cfun)); | |
2705 | bitmap_ones (m_reg_def_loc[ENTRY_BLOCK_PTR_FOR_FN (cfun)->index]); | |
2706 | ||
2707 | for (bb_info *bb : crtl->ssa->bbs ()) | |
e030af3e | 2708 | { |
29331e72 | 2709 | auto &block_info = get_block_info (bb); |
4fd09aed | 2710 | block_info.bb = bb; |
29331e72 | 2711 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e030af3e | 2712 | { |
29331e72 LD |
2713 | fprintf (dump_file, " Try fuse basic block %d\n", bb->index ()); |
2714 | } | |
2715 | auto_vec<vsetvl_info> infos; | |
2716 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
2717 | { | |
2718 | vsetvl_info curr_info = vsetvl_info (insn); | |
2719 | if (curr_info.valid_p () || curr_info.unknown_p ()) | |
2720 | infos.safe_push (curr_info); | |
2721 | ||
2722 | /* Collecting GP registers modified by the current bb. */ | |
2723 | if (insn->is_real ()) | |
2724 | for (def_info *def : insn->defs ()) | |
2725 | if (def->is_reg () && GP_REG_P (def->regno ())) | |
2726 | bitmap_set_bit (m_reg_def_loc[bb->index ()], def->regno ()); | |
2727 | } | |
e030af3e | 2728 | |
29331e72 LD |
2729 | vsetvl_info prev_info = vsetvl_info (); |
2730 | prev_info.set_empty (); | |
2731 | for (auto &curr_info : infos) | |
2732 | { | |
2733 | if (prev_info.empty_p ()) | |
2734 | prev_info = curr_info; | |
2735 | else if ((curr_info.unknown_p () && prev_info.valid_p ()) | |
2736 | || (curr_info.valid_p () && prev_info.unknown_p ())) | |
2737 | { | |
4fd09aed | 2738 | block_info.local_infos.safe_push (prev_info); |
29331e72 LD |
2739 | prev_info = curr_info; |
2740 | } | |
2741 | else if (curr_info.valid_p () && prev_info.valid_p ()) | |
2742 | { | |
2743 | if (m_dem.available_p (prev_info, curr_info)) | |
e7b585a4 | 2744 | { |
29331e72 | 2745 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e7b585a4 | 2746 | { |
29331e72 LD |
2747 | fprintf (dump_file, |
2748 | " Ignore curr info since prev info " | |
2749 | "available with it:\n"); | |
2750 | fprintf (dump_file, " prev_info: "); | |
2751 | prev_info.dump (dump_file, " "); | |
2752 | fprintf (dump_file, " curr_info: "); | |
2753 | curr_info.dump (dump_file, " "); | |
2754 | fprintf (dump_file, "\n"); | |
e7b585a4 | 2755 | } |
6cf47447 JZ |
2756 | /* Even though prev_info is available with curr_info, |
2757 | we need to update the MAX_SEW of prev_info since | |
2758 | we don't check MAX_SEW in available_p check. | |
2759 | ||
2760 | prev_info: | |
2761 | Demand fields: demand_ratio_and_ge_sew demand_avl | |
2762 | SEW=16, VLMUL=mf4, RATIO=64, MAX_SEW=64 | |
2763 | ||
2764 | curr_info: | |
2765 | Demand fields: demand_ge_sew demand_non_zero_avl | |
2766 | SEW=16, VLMUL=m1, RATIO=16, MAX_SEW=32 | |
2767 | ||
2768 | In the example above, prev_info is available with | |
2769 | curr_info, we need to update prev_info MAX_SEW from | |
2770 | 64 into 32. */ | |
2771 | prev_info.set_max_sew ( | |
2772 | MIN (prev_info.get_max_sew (), curr_info.get_max_sew ())); | |
4cd4c34a | 2773 | if (!curr_info.vl_used_by_non_rvv_insn_p () |
29331e72 LD |
2774 | && vsetvl_insn_p (curr_info.get_insn ()->rtl ())) |
2775 | m_delete_list.safe_push (curr_info); | |
e030af3e | 2776 | |
29331e72 LD |
2777 | if (curr_info.get_read_vl_insn ()) |
2778 | prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ()); | |
e030af3e | 2779 | } |
29331e72 | 2780 | else if (m_dem.compatible_p (prev_info, curr_info)) |
e030af3e | 2781 | { |
29331e72 | 2782 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e030af3e | 2783 | { |
29331e72 LD |
2784 | fprintf (dump_file, " Fuse curr info since prev info " |
2785 | "compatible with it:\n"); | |
2786 | fprintf (dump_file, " prev_info: "); | |
2787 | prev_info.dump (dump_file, " "); | |
2788 | fprintf (dump_file, " curr_info: "); | |
2789 | curr_info.dump (dump_file, " "); | |
e030af3e | 2790 | } |
29331e72 LD |
2791 | m_dem.merge (prev_info, curr_info); |
2792 | if (curr_info.get_read_vl_insn ()) | |
2793 | prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ()); | |
2794 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
e030af3e | 2795 | { |
29331e72 LD |
2796 | fprintf (dump_file, " prev_info after fused: "); |
2797 | prev_info.dump (dump_file, " "); | |
2798 | fprintf (dump_file, "\n"); | |
e030af3e | 2799 | } |
e030af3e JZ |
2800 | } |
2801 | else | |
2802 | { | |
29331e72 LD |
2803 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2804 | { | |
2805 | fprintf (dump_file, | |
d83070ae | 2806 | " Cannot fuse incompatible infos:\n"); |
29331e72 LD |
2807 | fprintf (dump_file, " prev_info: "); |
2808 | prev_info.dump (dump_file, " "); | |
2809 | fprintf (dump_file, " curr_info: "); | |
2810 | curr_info.dump (dump_file, " "); | |
2811 | } | |
4fd09aed | 2812 | block_info.local_infos.safe_push (prev_info); |
29331e72 | 2813 | prev_info = curr_info; |
e030af3e JZ |
2814 | } |
2815 | } | |
2816 | } | |
29331e72 LD |
2817 | |
2818 | if (prev_info.valid_p () || prev_info.unknown_p ()) | |
4fd09aed | 2819 | block_info.local_infos.safe_push (prev_info); |
e030af3e | 2820 | } |
e030af3e JZ |
2821 | } |
2822 | ||
29331e72 | 2823 | |
9243c3d1 | 2824 | bool |
33408780 | 2825 | pre_vsetvl::earliest_fuse_vsetvl_info (int iter) |
9243c3d1 | 2826 | { |
29331e72 LD |
2827 | compute_vsetvl_def_data (); |
2828 | compute_lcm_local_properties (); | |
9243c3d1 | 2829 | |
29331e72 LD |
2830 | unsigned num_exprs = m_exprs.length (); |
2831 | struct edge_list *m_edges = create_edge_list (); | |
2832 | unsigned num_edges = NUM_EDGES (m_edges); | |
2833 | sbitmap *antin | |
2834 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2835 | sbitmap *antout | |
2836 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
005fad9d | 2837 | |
29331e72 | 2838 | sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs); |
9243c3d1 | 2839 | |
29331e72 LD |
2840 | compute_available (m_avloc, m_kill, m_avout, m_avin); |
2841 | compute_antinout_edge (m_antloc, m_transp, antin, antout); | |
2842 | compute_earliest (m_edges, num_exprs, antin, antout, m_avout, m_kill, | |
2843 | earliest); | |
2844 | ||
2845 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
9243c3d1 | 2846 | { |
33408780 VG |
2847 | fprintf (dump_file, "\n Compute LCM earliest insert data (lift %d):\n\n", |
2848 | iter); | |
29331e72 LD |
2849 | fprintf (dump_file, " Expression List (%u):\n", num_exprs); |
2850 | for (unsigned i = 0; i < num_exprs; i++) | |
9243c3d1 | 2851 | { |
29331e72 LD |
2852 | const auto &info = *m_exprs[i]; |
2853 | fprintf (dump_file, " Expr[%u]: ", i); | |
2854 | info.dump (dump_file, " "); | |
9243c3d1 | 2855 | } |
29331e72 LD |
2856 | fprintf (dump_file, "\n bitmap data:\n"); |
2857 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2858 | { | |
2859 | unsigned int i = bb->index (); | |
2860 | fprintf (dump_file, " BB %u:\n", i); | |
2861 | fprintf (dump_file, " avloc: "); | |
2862 | dump_bitmap_file (dump_file, m_avloc[i]); | |
2863 | fprintf (dump_file, " kill: "); | |
2864 | dump_bitmap_file (dump_file, m_kill[i]); | |
2865 | fprintf (dump_file, " antloc: "); | |
2866 | dump_bitmap_file (dump_file, m_antloc[i]); | |
2867 | fprintf (dump_file, " transp: "); | |
2868 | dump_bitmap_file (dump_file, m_transp[i]); | |
2869 | ||
2870 | fprintf (dump_file, " avin: "); | |
2871 | dump_bitmap_file (dump_file, m_avin[i]); | |
2872 | fprintf (dump_file, " avout: "); | |
2873 | dump_bitmap_file (dump_file, m_avout[i]); | |
2874 | fprintf (dump_file, " antin: "); | |
2875 | dump_bitmap_file (dump_file, antin[i]); | |
2876 | fprintf (dump_file, " antout: "); | |
2877 | dump_bitmap_file (dump_file, antout[i]); | |
2878 | } | |
2879 | fprintf (dump_file, "\n"); | |
2880 | fprintf (dump_file, " earliest:\n"); | |
2881 | for (unsigned ed = 0; ed < num_edges; ed++) | |
2882 | { | |
2883 | edge eg = INDEX_EDGE (m_edges, ed); | |
9243c3d1 | 2884 | |
29331e72 LD |
2885 | if (bitmap_empty_p (earliest[ed])) |
2886 | continue; | |
2887 | fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index, | |
2888 | eg->dest->index); | |
2889 | dump_bitmap_file (dump_file, earliest[ed]); | |
2890 | } | |
2891 | fprintf (dump_file, "\n"); | |
2892 | } | |
9243c3d1 | 2893 | |
29331e72 | 2894 | if (dump_file && (dump_flags & TDF_DETAILS)) |
9243c3d1 | 2895 | { |
33408780 | 2896 | fprintf (dump_file, " Fused global info result (lift %d):\n", iter); |
29331e72 | 2897 | } |
9243c3d1 | 2898 | |
29331e72 LD |
2899 | bool changed = false; |
2900 | for (unsigned ed = 0; ed < num_edges; ed++) | |
2901 | { | |
2902 | sbitmap e = earliest[ed]; | |
2903 | if (bitmap_empty_p (e)) | |
9243c3d1 JZZ |
2904 | continue; |
2905 | ||
29331e72 LD |
2906 | unsigned int expr_index; |
2907 | sbitmap_iterator sbi; | |
2908 | EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi) | |
ec99ffab | 2909 | { |
29331e72 | 2910 | vsetvl_info &curr_info = *m_exprs[expr_index]; |
29331e72 | 2911 | edge eg = INDEX_EDGE (m_edges, ed); |
29331e72 LD |
2912 | vsetvl_block_info &src_block_info = get_block_info (eg->src); |
2913 | vsetvl_block_info &dest_block_info = get_block_info (eg->dest); | |
ff8f9544 | 2914 | |
bf23a62e JZ |
2915 | if (!curr_info.valid_p () |
2916 | || eg->probability == profile_probability::never () | |
2917 | || src_block_info.probability | |
2918 | == profile_probability::uninitialized () | |
2919 | /* When multiple set bits in earliest edge, such edge may | |
2920 | have infinite loop in preds or succs or multiple conflict | |
2921 | vsetvl expression which make such edge is unrelated. We | |
2922 | don't perform fusion for such situation. */ | |
2923 | || bitmap_count_bits (e) != 1) | |
ff8f9544 | 2924 | continue; |
9243c3d1 | 2925 | |
29331e72 | 2926 | if (src_block_info.empty_p ()) |
9243c3d1 | 2927 | { |
29331e72 LD |
2928 | vsetvl_info new_curr_info = curr_info; |
2929 | new_curr_info.set_bb (crtl->ssa->bb (eg->dest)); | |
e935c066 JZ |
2930 | bool has_compatible_p |
2931 | = has_compatible_reaching_vsetvl_p (new_curr_info); | |
29331e72 | 2932 | if (!has_compatible_p) |
9243c3d1 | 2933 | { |
29331e72 LD |
2934 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2935 | { | |
2936 | fprintf (dump_file, | |
2937 | " Forbidden lift up vsetvl info into bb %u " | |
2938 | "since there is no vsetvl info that reaching in " | |
2939 | "is compatible with it:", | |
2940 | eg->src->index); | |
2941 | curr_info.dump (dump_file, " "); | |
2942 | } | |
2943 | continue; | |
9243c3d1 JZZ |
2944 | } |
2945 | ||
29331e72 | 2946 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e030af3e JZ |
2947 | { |
2948 | fprintf (dump_file, | |
29331e72 LD |
2949 | " Set empty bb %u to info:", eg->src->index); |
2950 | curr_info.dump (dump_file, " "); | |
e030af3e | 2951 | } |
29331e72 LD |
2952 | src_block_info.set_info (curr_info); |
2953 | src_block_info.probability = dest_block_info.probability; | |
2954 | changed = true; | |
9243c3d1 | 2955 | } |
29331e72 LD |
2956 | else if (src_block_info.has_info ()) |
2957 | { | |
2958 | vsetvl_info &prev_info = src_block_info.get_exit_info (); | |
2959 | gcc_assert (prev_info.valid_p ()); | |
2960 | ||
2961 | if (m_dem.compatible_p (prev_info, curr_info)) | |
2962 | { | |
2963 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2964 | { | |
2965 | fprintf (dump_file, " Fuse curr info since prev info " | |
2966 | "compatible with it:\n"); | |
2967 | fprintf (dump_file, " prev_info: "); | |
2968 | prev_info.dump (dump_file, " "); | |
2969 | fprintf (dump_file, " curr_info: "); | |
2970 | curr_info.dump (dump_file, " "); | |
2971 | } | |
2972 | m_dem.merge (prev_info, curr_info); | |
2973 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2974 | { | |
2975 | fprintf (dump_file, " prev_info after fused: "); | |
2976 | prev_info.dump (dump_file, " "); | |
2977 | fprintf (dump_file, "\n"); | |
2978 | } | |
2979 | changed = true; | |
2980 | if (src_block_info.has_info ()) | |
2981 | src_block_info.probability += dest_block_info.probability; | |
2982 | } | |
33408780 | 2983 | else |
29331e72 LD |
2984 | { |
2985 | /* Cancel lift up if probabilities are equal. */ | |
e935c066 JZ |
2986 | if (successors_probability_equal_p (eg->src) |
2987 | || (dest_block_info.probability | |
2988 | > src_block_info.probability | |
2989 | && !has_compatible_reaching_vsetvl_p (curr_info))) | |
29331e72 LD |
2990 | { |
2991 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2992 | { | |
2993 | fprintf (dump_file, | |
33408780 | 2994 | " Reset bb %u:", |
29331e72 LD |
2995 | eg->src->index); |
2996 | prev_info.dump (dump_file, " "); | |
e935c066 JZ |
2997 | fprintf (dump_file, " due to (same probability or no " |
2998 | "compatible reaching):"); | |
29331e72 LD |
2999 | curr_info.dump (dump_file, " "); |
3000 | } | |
3001 | src_block_info.set_empty_info (); | |
3002 | src_block_info.probability | |
3003 | = profile_probability::uninitialized (); | |
60820248 JZ |
3004 | /* See PR113696, we should reset immediate dominator to |
3005 | empty since we may uplift ineffective vsetvl which | |
3006 | locate at low probability block. */ | |
3007 | basic_block dom | |
3008 | = get_immediate_dominator (CDI_DOMINATORS, eg->src); | |
3009 | auto &dom_block_info = get_block_info (dom); | |
3010 | if (dom_block_info.has_info () | |
3011 | && !m_dem.compatible_p ( | |
3012 | dom_block_info.get_exit_info (), curr_info)) | |
3013 | { | |
3014 | dom_block_info.set_empty_info (); | |
3015 | dom_block_info.probability | |
3016 | = profile_probability::uninitialized (); | |
3017 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3018 | { | |
3019 | fprintf (dump_file, | |
3020 | " Reset dominator bb %u:", | |
3021 | dom->index); | |
3022 | prev_info.dump (dump_file, " "); | |
3023 | fprintf (dump_file, | |
3024 | " due to (same probability or no " | |
3025 | "compatible reaching):"); | |
3026 | curr_info.dump (dump_file, " "); | |
3027 | } | |
3028 | } | |
29331e72 LD |
3029 | changed = true; |
3030 | } | |
3031 | /* Choose the one with higher probability. */ | |
3032 | else if (dest_block_info.probability | |
3033 | > src_block_info.probability) | |
3034 | { | |
3035 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3036 | { | |
3037 | fprintf (dump_file, | |
33408780 | 3038 | " Change bb %u from:", |
29331e72 LD |
3039 | eg->src->index); |
3040 | prev_info.dump (dump_file, " "); | |
3041 | fprintf (dump_file, | |
3042 | " to (higher probability):"); | |
3043 | curr_info.dump (dump_file, " "); | |
3044 | } | |
3045 | src_block_info.set_info (curr_info); | |
3046 | src_block_info.probability = dest_block_info.probability; | |
3047 | changed = true; | |
3048 | } | |
3049 | } | |
3050 | } | |
3051 | else | |
e030af3e | 3052 | { |
29331e72 LD |
3053 | vsetvl_info &prev_info = src_block_info.get_exit_info (); |
3054 | if (!prev_info.valid_p () | |
bf23a62e JZ |
3055 | || m_dem.available_p (prev_info, curr_info) |
3056 | || !m_dem.compatible_p (prev_info, curr_info)) | |
29331e72 LD |
3057 | continue; |
3058 | ||
bf23a62e | 3059 | if (dump_file && (dump_flags & TDF_DETAILS)) |
29331e72 | 3060 | { |
bf23a62e JZ |
3061 | fprintf (dump_file, " Fuse curr info since prev info " |
3062 | "compatible with it:\n"); | |
3063 | fprintf (dump_file, " prev_info: "); | |
3064 | prev_info.dump (dump_file, " "); | |
3065 | fprintf (dump_file, " curr_info: "); | |
3066 | curr_info.dump (dump_file, " "); | |
3067 | } | |
3068 | m_dem.merge (prev_info, curr_info); | |
3069 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3070 | { | |
3071 | fprintf (dump_file, " prev_info after fused: "); | |
3072 | prev_info.dump (dump_file, " "); | |
3073 | fprintf (dump_file, "\n"); | |
29331e72 | 3074 | } |
bf23a62e | 3075 | changed = true; |
e030af3e | 3076 | } |
9243c3d1 JZZ |
3077 | } |
3078 | } | |
3079 | ||
0d50facd | 3080 | if (dump_file && (dump_flags & TDF_DETAILS)) |
c919d059 | 3081 | { |
29331e72 | 3082 | fprintf (dump_file, "\n"); |
c919d059 | 3083 | } |
c919d059 | 3084 | |
29331e72 LD |
3085 | sbitmap_vector_free (antin); |
3086 | sbitmap_vector_free (antout); | |
3087 | sbitmap_vector_free (earliest); | |
3088 | free_edge_list (m_edges); | |
c919d059 | 3089 | |
29331e72 | 3090 | return changed; |
c919d059 KC |
3091 | } |
3092 | ||
8421f279 | 3093 | void |
29331e72 | 3094 | pre_vsetvl::pre_global_vsetvl_info () |
c919d059 | 3095 | { |
29331e72 LD |
3096 | compute_vsetvl_def_data (); |
3097 | compute_lcm_local_properties (); | |
c919d059 | 3098 | |
29331e72 LD |
3099 | unsigned num_exprs = m_exprs.length (); |
3100 | m_edges = pre_edge_lcm_avs (num_exprs, m_transp, m_avloc, m_antloc, m_kill, | |
3101 | m_avin, m_avout, &m_insert, &m_del); | |
3102 | unsigned num_edges = NUM_EDGES (m_edges); | |
c919d059 | 3103 | |
29331e72 LD |
3104 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3105 | { | |
3106 | fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n"); | |
3107 | fprintf (dump_file, " Expression List (%u):\n", num_exprs); | |
3108 | for (unsigned i = 0; i < num_exprs; i++) | |
c919d059 | 3109 | { |
29331e72 LD |
3110 | const auto &info = *m_exprs[i]; |
3111 | fprintf (dump_file, " Expr[%u]: ", i); | |
3112 | info.dump (dump_file, " "); | |
c919d059 | 3113 | } |
29331e72 LD |
3114 | fprintf (dump_file, "\n bitmap data:\n"); |
3115 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
c919d059 | 3116 | { |
29331e72 LD |
3117 | unsigned i = bb->index (); |
3118 | fprintf (dump_file, " BB %u:\n", i); | |
3119 | fprintf (dump_file, " avloc: "); | |
3120 | dump_bitmap_file (dump_file, m_avloc[i]); | |
3121 | fprintf (dump_file, " kill: "); | |
3122 | dump_bitmap_file (dump_file, m_kill[i]); | |
3123 | fprintf (dump_file, " antloc: "); | |
3124 | dump_bitmap_file (dump_file, m_antloc[i]); | |
3125 | fprintf (dump_file, " transp: "); | |
3126 | dump_bitmap_file (dump_file, m_transp[i]); | |
3127 | ||
3128 | fprintf (dump_file, " avin: "); | |
3129 | dump_bitmap_file (dump_file, m_avin[i]); | |
3130 | fprintf (dump_file, " avout: "); | |
3131 | dump_bitmap_file (dump_file, m_avout[i]); | |
3132 | fprintf (dump_file, " del: "); | |
3133 | dump_bitmap_file (dump_file, m_del[i]); | |
c919d059 | 3134 | } |
29331e72 LD |
3135 | fprintf (dump_file, "\n"); |
3136 | fprintf (dump_file, " insert:\n"); | |
3137 | for (unsigned ed = 0; ed < num_edges; ed++) | |
8421f279 | 3138 | { |
29331e72 | 3139 | edge eg = INDEX_EDGE (m_edges, ed); |
c919d059 | 3140 | |
29331e72 LD |
3141 | if (bitmap_empty_p (m_insert[ed])) |
3142 | continue; | |
3143 | fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index, | |
3144 | eg->dest->index); | |
3145 | dump_bitmap_file (dump_file, m_insert[ed]); | |
c919d059 | 3146 | } |
29331e72 LD |
3147 | } |
3148 | ||
3149 | /* Remove vsetvl infos as LCM suggest */ | |
3150 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3151 | { | |
3152 | sbitmap d = m_del[bb->index ()]; | |
3153 | if (bitmap_count_bits (d) == 0) | |
c919d059 | 3154 | continue; |
29331e72 LD |
3155 | gcc_assert (bitmap_count_bits (d) == 1); |
3156 | unsigned expr_index = bitmap_first_set_bit (d); | |
3157 | vsetvl_info &info = *m_exprs[expr_index]; | |
3158 | gcc_assert (info.valid_p ()); | |
3159 | gcc_assert (info.get_bb () == bb); | |
3160 | const vsetvl_block_info &block_info = get_block_info (info.get_bb ()); | |
3161 | gcc_assert (block_info.get_entry_info () == info); | |
3162 | info.set_delete (); | |
c6c2a1d7 JZ |
3163 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3164 | { | |
3165 | fprintf (dump_file, | |
3166 | "\nLCM deleting vsetvl of block %d, it has predecessors: \n", | |
3167 | bb->index ()); | |
3168 | hash_set<basic_block> all_preds | |
3169 | = get_all_predecessors (bb->cfg_bb ()); | |
3170 | int i = 0; | |
3171 | for (const auto pred : all_preds) | |
3172 | { | |
3173 | fprintf (dump_file, "%d ", pred->index); | |
3174 | i++; | |
3175 | if (i % 32 == 0) | |
3176 | fprintf (dump_file, "\n"); | |
3177 | } | |
3178 | fprintf (dump_file, "\n"); | |
3179 | } | |
29331e72 | 3180 | } |
c919d059 | 3181 | |
d83070ae | 3182 | /* Remove vsetvl infos if all predecessors are available to the block. */ |
ef21ae5c JZ |
3183 | for (const bb_info *bb : crtl->ssa->bbs ()) |
3184 | { | |
3185 | vsetvl_block_info &block_info = get_block_info (bb); | |
5ee45f5e JZ |
3186 | if (block_info.empty_p ()) |
3187 | continue; | |
3188 | vsetvl_info &curr_info = block_info.get_entry_info (); | |
3189 | if (!curr_info.valid_p ()) | |
ef21ae5c JZ |
3190 | continue; |
3191 | ||
5ee45f5e JZ |
3192 | unsigned int expr_index; |
3193 | sbitmap_iterator sbi; | |
3194 | gcc_assert ( | |
3195 | !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()])); | |
3196 | bool full_available = true; | |
3197 | EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[bb->index ()], 0, expr_index, | |
3198 | sbi) | |
3199 | { | |
3200 | vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index]; | |
3201 | if (!prev_info.valid_p () | |
3202 | || !m_dem.available_p (prev_info, curr_info)) | |
3203 | { | |
3204 | full_available = false; | |
3205 | break; | |
3206 | } | |
3207 | } | |
3208 | if (full_available) | |
3209 | curr_info.set_delete (); | |
ef21ae5c JZ |
3210 | } |
3211 | ||
29331e72 LD |
3212 | for (const bb_info *bb : crtl->ssa->bbs ()) |
3213 | { | |
3214 | vsetvl_block_info &block_info = get_block_info (bb); | |
3215 | if (block_info.empty_p ()) | |
3216 | continue; | |
3217 | vsetvl_info &curr_info = block_info.get_entry_info (); | |
3218 | if (curr_info.delete_p ()) | |
c919d059 | 3219 | { |
4fd09aed | 3220 | if (block_info.local_infos.is_empty ()) |
29331e72 | 3221 | continue; |
4fd09aed | 3222 | curr_info = block_info.local_infos[0]; |
c919d059 | 3223 | } |
4cd4c34a | 3224 | if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p () |
923a67f1 | 3225 | && preds_all_same_avl_and_ratio_p (curr_info)) |
29331e72 | 3226 | curr_info.set_change_vtype_only (); |
c919d059 | 3227 | |
29331e72 LD |
3228 | vsetvl_info prev_info = vsetvl_info (); |
3229 | prev_info.set_empty (); | |
4fd09aed | 3230 | for (auto &curr_info : block_info.local_infos) |
c919d059 | 3231 | { |
29331e72 | 3232 | if (prev_info.valid_p () && curr_info.valid_p () |
923a67f1 JZ |
3233 | && m_dem.avl_available_p (prev_info, curr_info) |
3234 | && prev_info.get_ratio () == curr_info.get_ratio ()) | |
29331e72 LD |
3235 | curr_info.set_change_vtype_only (); |
3236 | prev_info = curr_info; | |
c919d059 | 3237 | } |
20c85207 | 3238 | } |
20c85207 JZ |
3239 | } |
3240 | ||
29331e72 LD |
3241 | void |
3242 | pre_vsetvl::emit_vsetvl () | |
20c85207 | 3243 | { |
29331e72 | 3244 | bool need_commit = false; |
20c85207 | 3245 | |
4a0a8dc1 JZ |
3246 | /* Fake edge is created by connect infinite loops to exit function. |
3247 | We should commit vsetvl edge after fake edges removes, otherwise, | |
3248 | it will cause ICE. */ | |
3249 | remove_fake_exit_edges (); | |
29331e72 | 3250 | for (const bb_info *bb : crtl->ssa->bbs ()) |
20c85207 | 3251 | { |
4fd09aed | 3252 | for (const auto &curr_info : get_block_info (bb).local_infos) |
29331e72 LD |
3253 | { |
3254 | insn_info *insn = curr_info.get_insn (); | |
3255 | if (curr_info.delete_p ()) | |
3256 | { | |
3257 | if (vsetvl_insn_p (insn->rtl ())) | |
d29136ad | 3258 | remove_vsetvl_insn (curr_info.get_insn ()->rtl ()); |
29331e72 LD |
3259 | continue; |
3260 | } | |
3261 | else if (curr_info.valid_p ()) | |
3262 | { | |
3263 | if (vsetvl_insn_p (insn->rtl ())) | |
3264 | { | |
3265 | const vsetvl_info temp = vsetvl_info (insn); | |
3266 | if (!(curr_info == temp)) | |
3267 | { | |
3268 | if (dump_file) | |
3269 | { | |
3270 | fprintf (dump_file, "\n Change vsetvl info from: "); | |
3271 | temp.dump (dump_file, " "); | |
3272 | fprintf (dump_file, " to: "); | |
3273 | curr_info.dump (dump_file, " "); | |
3274 | } | |
3275 | change_vsetvl_insn (curr_info); | |
3276 | } | |
3277 | } | |
3278 | else | |
3279 | { | |
3280 | if (dump_file) | |
3281 | { | |
3282 | fprintf (dump_file, | |
3283 | "\n Insert vsetvl info before insn %d: ", | |
3284 | insn->uid ()); | |
3285 | curr_info.dump (dump_file, " "); | |
3286 | } | |
3287 | insert_vsetvl_insn (EMIT_BEFORE, curr_info); | |
3288 | } | |
3289 | } | |
3290 | } | |
20c85207 | 3291 | } |
20c85207 | 3292 | |
29331e72 | 3293 | for (const vsetvl_info &item : m_delete_list) |
20c85207 | 3294 | { |
29331e72 | 3295 | gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ())); |
d29136ad | 3296 | remove_vsetvl_insn (item.get_insn ()->rtl ()); |
20c85207 JZ |
3297 | } |
3298 | ||
d1189cee JZ |
3299 | /* Insert vsetvl info that was not deleted after lift up. */ |
3300 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3301 | { | |
3302 | const vsetvl_block_info &block_info = get_block_info (bb); | |
3303 | if (!block_info.has_info ()) | |
3304 | continue; | |
3305 | ||
3306 | const vsetvl_info &footer_info = block_info.get_exit_info (); | |
3307 | ||
3308 | if (footer_info.delete_p ()) | |
3309 | continue; | |
3310 | ||
3311 | edge eg; | |
3312 | edge_iterator eg_iterator; | |
3313 | FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs) | |
3314 | { | |
3315 | gcc_assert (!(eg->flags & EDGE_ABNORMAL)); | |
3316 | if (dump_file) | |
3317 | { | |
3318 | fprintf ( | |
3319 | dump_file, | |
3320 | "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ", | |
3321 | eg->src->index, eg->dest->index); | |
3322 | footer_info.dump (dump_file, " "); | |
3323 | } | |
3324 | start_sequence (); | |
3325 | insert_vsetvl_insn (EMIT_DIRECT, footer_info); | |
3326 | rtx_insn *rinsn = get_insns (); | |
3327 | end_sequence (); | |
3328 | default_rtl_profile (); | |
3329 | insert_insn_on_edge (rinsn, eg); | |
3330 | need_commit = true; | |
3331 | } | |
3332 | } | |
3333 | ||
29331e72 LD |
3334 | /* m_insert vsetvl as LCM suggest. */ |
3335 | for (int ed = 0; ed < NUM_EDGES (m_edges); ed++) | |
20c85207 | 3336 | { |
29331e72 LD |
3337 | edge eg = INDEX_EDGE (m_edges, ed); |
3338 | sbitmap i = m_insert[ed]; | |
bf23a62e | 3339 | if (bitmap_count_bits (i) != 1) |
29331e72 LD |
3340 | /* For code with infinite loop (e.g. pr61634.c), The data flow is |
3341 | completely wrong. */ | |
3342 | continue; | |
3343 | ||
29331e72 LD |
3344 | unsigned expr_index = bitmap_first_set_bit (i); |
3345 | const vsetvl_info &info = *m_exprs[expr_index]; | |
3346 | gcc_assert (info.valid_p ()); | |
3347 | if (dump_file) | |
20c85207 | 3348 | { |
29331e72 LD |
3349 | fprintf (dump_file, |
3350 | "\n Insert vsetvl info at edge(bb %u -> bb %u): ", | |
3351 | eg->src->index, eg->dest->index); | |
3352 | info.dump (dump_file, " "); | |
20c85207 | 3353 | } |
29331e72 LD |
3354 | rtl_profile_for_edge (eg); |
3355 | start_sequence (); | |
3356 | ||
3357 | insert_vsetvl_insn (EMIT_DIRECT, info); | |
3358 | rtx_insn *rinsn = get_insns (); | |
3359 | end_sequence (); | |
3360 | default_rtl_profile (); | |
3361 | ||
3362 | /* We should not get an abnormal edge here. */ | |
3363 | gcc_assert (!(eg->flags & EDGE_ABNORMAL)); | |
3364 | need_commit = true; | |
3365 | insert_insn_on_edge (rinsn, eg); | |
20c85207 JZ |
3366 | } |
3367 | ||
29331e72 LD |
3368 | if (need_commit) |
3369 | commit_edge_insertions (); | |
20c85207 JZ |
3370 | } |
3371 | ||
9243c3d1 | 3372 | void |
d83070ae | 3373 | pre_vsetvl::cleanup () |
9243c3d1 | 3374 | { |
29331e72 LD |
3375 | remove_avl_operand (); |
3376 | remove_unused_dest_operand (); | |
22622a5a | 3377 | remove_vsetvl_pre_insns (); |
29331e72 | 3378 | } |
9243c3d1 | 3379 | |
29331e72 LD |
3380 | void |
3381 | pre_vsetvl::remove_avl_operand () | |
3382 | { | |
3383 | basic_block cfg_bb; | |
3384 | rtx_insn *rinsn; | |
3385 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
3386 | FOR_BB_INSNS (cfg_bb, rinsn) | |
3387 | if (NONDEBUG_INSN_P (rinsn) && has_vl_op (rinsn) | |
3388 | && REG_P (get_vl (rinsn))) | |
3389 | { | |
9243c3d1 | 3390 | rtx avl = get_vl (rinsn); |
a2d12abe | 3391 | if (count_regno_occurrences (rinsn, REGNO (avl)) == 1) |
9243c3d1 | 3392 | { |
29331e72 | 3393 | rtx new_pat; |
60bd33bc | 3394 | if (fault_first_load_p (rinsn)) |
29331e72 LD |
3395 | new_pat |
3396 | = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx); | |
60bd33bc JZZ |
3397 | else |
3398 | { | |
3399 | rtx set = single_set (rinsn); | |
3400 | rtx src | |
3401 | = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx); | |
29331e72 LD |
3402 | new_pat = gen_rtx_SET (SET_DEST (set), src); |
3403 | } | |
3404 | if (dump_file) | |
3405 | { | |
3406 | fprintf (dump_file, " Cleanup insn %u's avl operand:\n", | |
3407 | INSN_UID (rinsn)); | |
3408 | print_rtl_single (dump_file, rinsn); | |
60bd33bc | 3409 | } |
29331e72 | 3410 | validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false); |
9243c3d1 JZZ |
3411 | } |
3412 | } | |
20c85207 JZ |
3413 | } |
3414 | ||
6b6b9c68 | 3415 | void |
29331e72 | 3416 | pre_vsetvl::remove_unused_dest_operand () |
20c85207 | 3417 | { |
6b6b9c68 | 3418 | df_analyze (); |
20c85207 JZ |
3419 | basic_block cfg_bb; |
3420 | rtx_insn *rinsn; | |
3421 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
29331e72 LD |
3422 | FOR_BB_INSNS (cfg_bb, rinsn) |
3423 | if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn)) | |
6b6b9c68 | 3424 | { |
29331e72 LD |
3425 | rtx vl = get_vl (rinsn); |
3426 | vsetvl_info info = vsetvl_info (rinsn); | |
3427 | if (has_no_uses (cfg_bb, rinsn, REGNO (vl))) | |
3428 | if (!info.has_vlmax_avl ()) | |
3429 | { | |
3430 | rtx new_pat = info.get_vsetvl_pat (true); | |
3431 | if (dump_file) | |
3432 | { | |
3433 | fprintf (dump_file, | |
3434 | " Remove vsetvl insn %u's dest(vl) operand since " | |
3435 | "it unused:\n", | |
3436 | INSN_UID (rinsn)); | |
3437 | print_rtl_single (dump_file, rinsn); | |
3438 | } | |
3439 | validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, | |
3440 | false); | |
3441 | } | |
6b6b9c68 | 3442 | } |
6b6b9c68 JZZ |
3443 | } |
3444 | ||
22622a5a JZ |
3445 | /* Remove all bogus vsetvl_pre instructions. */ |
3446 | void | |
3447 | pre_vsetvl::remove_vsetvl_pre_insns () | |
3448 | { | |
3449 | basic_block cfg_bb; | |
3450 | rtx_insn *rinsn; | |
3451 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
3452 | FOR_BB_INSNS (cfg_bb, rinsn) | |
3453 | if (NONDEBUG_INSN_P (rinsn) && vsetvl_pre_insn_p (rinsn)) | |
3454 | { | |
3455 | if (dump_file) | |
3456 | { | |
3457 | fprintf (dump_file, " Eliminate vsetvl_pre insn %d:\n", | |
3458 | INSN_UID (rinsn)); | |
3459 | print_rtl_single (dump_file, rinsn); | |
3460 | } | |
d29136ad | 3461 | remove_vsetvl_insn (rinsn); |
22622a5a JZ |
3462 | } |
3463 | } | |
3464 | ||
29331e72 LD |
3465 | const pass_data pass_data_vsetvl = { |
3466 | RTL_PASS, /* type */ | |
3467 | "vsetvl", /* name */ | |
3468 | OPTGROUP_NONE, /* optinfo_flags */ | |
01260a82 | 3469 | TV_MACH_DEP, /* tv_id */ |
29331e72 LD |
3470 | 0, /* properties_required */ |
3471 | 0, /* properties_provided */ | |
3472 | 0, /* properties_destroyed */ | |
3473 | 0, /* todo_flags_start */ | |
3474 | 0, /* todo_flags_finish */ | |
3475 | }; | |
9243c3d1 | 3476 | |
29331e72 LD |
3477 | class pass_vsetvl : public rtl_opt_pass |
3478 | { | |
3479 | private: | |
3480 | void simple_vsetvl (); | |
3481 | void lazy_vsetvl (); | |
9243c3d1 | 3482 | |
29331e72 LD |
3483 | public: |
3484 | pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {} | |
9243c3d1 | 3485 | |
29331e72 LD |
3486 | /* opt_pass methods: */ |
3487 | virtual bool gate (function *) final override { return TARGET_VECTOR; } | |
3488 | virtual unsigned int execute (function *) final override; | |
3489 | }; // class pass_vsetvl | |
9243c3d1 | 3490 | |
acc10c79 | 3491 | void |
29331e72 | 3492 | pass_vsetvl::simple_vsetvl () |
acc10c79 | 3493 | { |
29331e72 LD |
3494 | if (dump_file) |
3495 | fprintf (dump_file, "\nEntering Simple VSETVL PASS\n"); | |
acc10c79 | 3496 | |
29331e72 LD |
3497 | basic_block cfg_bb; |
3498 | rtx_insn *rinsn; | |
3499 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
acc10c79 | 3500 | { |
29331e72 | 3501 | FOR_BB_INSNS (cfg_bb, rinsn) |
acc10c79 | 3502 | { |
29331e72 | 3503 | if (!NONDEBUG_INSN_P (rinsn)) |
acc10c79 | 3504 | continue; |
29331e72 LD |
3505 | if (has_vtype_op (rinsn)) |
3506 | { | |
3507 | const auto &info = vsetvl_info (rinsn); | |
3508 | rtx pat = info.get_vsetvl_pat (); | |
3509 | emit_insn_before (pat, rinsn); | |
3510 | if (dump_file) | |
3511 | { | |
3512 | fprintf (dump_file, " Insert vsetvl insn before insn %d:\n", | |
3513 | INSN_UID (rinsn)); | |
3514 | print_rtl_single (dump_file, PREV_INSN (rinsn)); | |
3515 | } | |
3516 | } | |
acc10c79 JZZ |
3517 | } |
3518 | } | |
acc10c79 JZZ |
3519 | } |
3520 | ||
9243c3d1 JZZ |
3521 | /* Lazy vsetvl insertion for optimize > 0. */ |
3522 | void | |
29331e72 | 3523 | pass_vsetvl::lazy_vsetvl () |
9243c3d1 JZZ |
3524 | { |
3525 | if (dump_file) | |
29331e72 LD |
3526 | fprintf (dump_file, "\nEntering Lazy VSETVL PASS\n\n"); |
3527 | ||
3528 | pre_vsetvl pre = pre_vsetvl (); | |
9243c3d1 | 3529 | |
9243c3d1 | 3530 | if (dump_file) |
29331e72 LD |
3531 | fprintf (dump_file, "\nPhase 1: Fuse local vsetvl infos.\n\n"); |
3532 | pre.fuse_local_vsetvl_info (); | |
0d50facd | 3533 | if (dump_file && (dump_flags & TDF_DETAILS)) |
29331e72 | 3534 | pre.dump (dump_file, "phase 1"); |
9243c3d1 | 3535 | |
29331e72 | 3536 | /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */ |
9243c3d1 | 3537 | if (dump_file) |
29331e72 | 3538 | fprintf (dump_file, "\nPhase 2: Lift up vsetvl info.\n\n"); |
1a8bebb1 | 3539 | if (vsetvl_strategy != VSETVL_OPT_NO_FUSION) |
29331e72 | 3540 | { |
1a8bebb1 JZ |
3541 | bool changed = true; |
3542 | int fused_count = 0; | |
3543 | do | |
3544 | { | |
3545 | if (dump_file) | |
3546 | fprintf (dump_file, " Try lift up %d.\n\n", fused_count); | |
3547 | changed = pre.earliest_fuse_vsetvl_info (fused_count); | |
3548 | fused_count += 1; | |
3549 | } while (changed); | |
3550 | } | |
0d50facd | 3551 | if (dump_file && (dump_flags & TDF_DETAILS)) |
29331e72 | 3552 | pre.dump (dump_file, "phase 2"); |
9243c3d1 | 3553 | |
29331e72 | 3554 | /* Phase 3: Reducing redundant vsetvl infos using LCM. */ |
9243c3d1 | 3555 | if (dump_file) |
29331e72 LD |
3556 | fprintf (dump_file, "\nPhase 3: Reduce global vsetvl infos.\n\n"); |
3557 | pre.pre_global_vsetvl_info (); | |
3558 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3559 | pre.dump (dump_file, "phase 3"); | |
9243c3d1 | 3560 | |
29331e72 | 3561 | /* Phase 4: Insert, modify and remove vsetvl insns. */ |
9243c3d1 | 3562 | if (dump_file) |
29331e72 LD |
3563 | fprintf (dump_file, |
3564 | "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n"); | |
3565 | pre.emit_vsetvl (); | |
9243c3d1 | 3566 | |
d83070ae | 3567 | /* Phase 5: Cleanup */ |
9243c3d1 | 3568 | if (dump_file) |
d83070ae KC |
3569 | fprintf (dump_file, "\nPhase 5: Cleanup\n\n"); |
3570 | pre.cleanup (); | |
6b6b9c68 | 3571 | |
29331e72 | 3572 | pre.finish (); |
9243c3d1 JZZ |
3573 | } |
3574 | ||
3575 | /* Main entry point for this pass. */ | |
3576 | unsigned int | |
3577 | pass_vsetvl::execute (function *) | |
3578 | { | |
3579 | if (n_basic_blocks_for_fn (cfun) <= 0) | |
3580 | return 0; | |
3581 | ||
ca8fb009 JZZ |
3582 | /* The RVV instruction may change after split which is not a stable |
3583 | instruction. We need to split it here to avoid potential issue | |
3584 | since the VSETVL PASS is insert before split PASS. */ | |
3585 | split_all_insns (); | |
9243c3d1 JZZ |
3586 | |
3587 | /* Early return for there is no vector instructions. */ | |
3588 | if (!has_vector_insn (cfun)) | |
3589 | return 0; | |
3590 | ||
1a8bebb1 | 3591 | if (!optimize || vsetvl_strategy == VSETVL_SIMPLE) |
9243c3d1 JZZ |
3592 | simple_vsetvl (); |
3593 | else | |
3594 | lazy_vsetvl (); | |
3595 | ||
9243c3d1 JZZ |
3596 | return 0; |
3597 | } | |
3598 | ||
3599 | rtl_opt_pass * | |
3600 | make_pass_vsetvl (gcc::context *ctxt) | |
3601 | { | |
3602 | return new pass_vsetvl (ctxt); | |
3603 | } |