]>
Commit | Line | Data |
---|---|---|
9243c3d1 | 1 | /* VSETVL pass for RISC-V 'V' Extension for GNU compiler. |
c841bde5 | 2 | Copyright (C) 2022-2023 Free Software Foundation, Inc. |
9243c3d1 JZZ |
3 | Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or(at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
29331e72 LD |
21 | /* The values of the vl and vtype registers will affect the behavior of RVV |
22 | insns. That is, when we need to execute an RVV instruction, we need to set | |
23 | the correct vl and vtype values by executing the vsetvl instruction before. | |
24 | Executing the fewest number of vsetvl instructions while keeping the behavior | |
25 | the same is the problem this pass is trying to solve. This vsetvl pass is | |
26 | divided into 5 phases: | |
27 | ||
28 | - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses | |
29 | each instruction in it that affects vl and vtype state and generates an | |
30 | array of vsetvl_info objects. Then traverse the vsetvl_info array from | |
31 | front to back and perform fusion according to the fusion rules. The fused | |
32 | vsetvl infos are stored in the vsetvl_block_info object's `infos` field. | |
33 | ||
34 | - Phase 2 (earliest fuse global vsetvl infos): The header_info and | |
35 | footer_info of vsetvl_block_info are used as expressions, and the | |
36 | earliest of each expression is computed. Based on the earliest | |
37 | information, try to lift up the corresponding vsetvl info to the src | |
38 | basic block of the edge (mainly to reduce the total number of vsetvl | |
39 | instructions, this uplift will cause some execution paths to execute | |
40 | vsetvl instructions that shouldn't be there). | |
41 | ||
42 | - Phase 3 (pre global vsetvl info): The header_info and footer_info of | |
43 | vsetvl_block_info are used as expressions, and the LCM algorithm is used | |
44 | to compute the header_info that needs to be deleted and the one that | |
45 | needs to be inserted in some edges. | |
46 | ||
47 | - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and | |
48 | the deletion and insertion information of Phase 3, the mandatory vsetvl | |
49 | instruction insertion, modification and deletion are performed. | |
50 | ||
51 | - Phase 5 (cleanup): Clean up the avl operand in the RVV operator | |
52 | instruction and cleanup the unused dest operand of the vsetvl insn. | |
53 | ||
54 | After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual | |
55 | basic block is represented by vsetvl_block_info, and the virtual vsetvl | |
56 | statements inside are represented by vsetvl_info. The later phases 2 and 3 | |
57 | are constantly modifying and adjusting this virtual CFG. Phase 4 performs | |
58 | insertion, modification and deletion of vsetvl instructions based on the | |
59 | optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to | |
60 | the RTL. | |
61 | */ | |
9243c3d1 JZZ |
62 | |
63 | #define IN_TARGET_CODE 1 | |
64 | #define INCLUDE_ALGORITHM | |
65 | #define INCLUDE_FUNCTIONAL | |
66 | ||
67 | #include "config.h" | |
68 | #include "system.h" | |
69 | #include "coretypes.h" | |
70 | #include "tm.h" | |
71 | #include "backend.h" | |
72 | #include "rtl.h" | |
73 | #include "target.h" | |
74 | #include "tree-pass.h" | |
75 | #include "df.h" | |
76 | #include "rtl-ssa.h" | |
77 | #include "cfgcleanup.h" | |
78 | #include "insn-config.h" | |
79 | #include "insn-attr.h" | |
80 | #include "insn-opinit.h" | |
81 | #include "tm-constrs.h" | |
82 | #include "cfgrtl.h" | |
83 | #include "cfganal.h" | |
84 | #include "lcm.h" | |
85 | #include "predict.h" | |
86 | #include "profile-count.h" | |
a3ad2301 | 87 | #include "gcse.h" |
9243c3d1 JZZ |
88 | |
89 | using namespace rtl_ssa; | |
90 | using namespace riscv_vector; | |
91 | ||
29331e72 LD |
92 | /* Set the bitmap DST to the union of SRC of predecessors of |
93 | basic block B. | |
94 | It's a bit different from bitmap_union_of_preds in cfganal.cc. This function | |
95 | takes into account the case where pred is ENTRY basic block. The main reason | |
96 | for this difference is to make it easier to insert some special value into | |
97 | the ENTRY base block. For example, vsetvl_info with a status of UNKNOW. */ | |
98 | static void | |
99 | bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b) | |
100 | { | |
101 | unsigned int set_size = dst->size; | |
102 | edge e; | |
103 | unsigned ix; | |
104 | ||
105 | for (ix = 0; ix < EDGE_COUNT (b->preds); ix++) | |
106 | { | |
107 | e = EDGE_PRED (b, ix); | |
108 | bitmap_copy (dst, src[e->src->index]); | |
109 | break; | |
110 | } | |
ec99ffab | 111 | |
29331e72 LD |
112 | if (ix == EDGE_COUNT (b->preds)) |
113 | bitmap_clear (dst); | |
114 | else | |
115 | for (ix++; ix < EDGE_COUNT (b->preds); ix++) | |
116 | { | |
117 | unsigned int i; | |
118 | SBITMAP_ELT_TYPE *p, *r; | |
119 | ||
120 | e = EDGE_PRED (b, ix); | |
121 | p = src[e->src->index]->elms; | |
122 | r = dst->elms; | |
123 | for (i = 0; i < set_size; i++) | |
124 | *r++ |= *p++; | |
125 | } | |
126 | } | |
127 | ||
128 | /* Compute the reaching defintion in and out based on the gen and KILL | |
129 | informations in each Base Blocks. | |
130 | This function references the compute_avaiable implementation in lcm.cc */ | |
131 | static void | |
132 | compute_reaching_defintion (sbitmap *gen, sbitmap *kill, sbitmap *in, | |
133 | sbitmap *out) | |
9243c3d1 | 134 | { |
29331e72 LD |
135 | edge e; |
136 | basic_block *worklist, *qin, *qout, *qend, bb; | |
137 | unsigned int qlen; | |
138 | edge_iterator ei; | |
139 | ||
140 | /* Allocate a worklist array/queue. Entries are only added to the | |
141 | list if they were not already on the list. So the size is | |
142 | bounded by the number of basic blocks. */ | |
143 | qin = qout = worklist | |
144 | = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); | |
145 | ||
146 | /* Put every block on the worklist; this is necessary because of the | |
147 | optimistic initialization of AVOUT above. Use reverse postorder | |
148 | to make the forward dataflow problem require less iterations. */ | |
149 | int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); | |
150 | int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false); | |
151 | for (int i = 0; i < n; ++i) | |
152 | { | |
153 | bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); | |
154 | *qin++ = bb; | |
155 | bb->aux = bb; | |
156 | } | |
157 | free (rpo); | |
158 | ||
159 | qin = worklist; | |
160 | qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; | |
161 | qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; | |
162 | ||
163 | /* Mark blocks which are successors of the entry block so that we | |
164 | can easily identify them below. */ | |
165 | FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) | |
166 | e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
167 | ||
168 | /* Iterate until the worklist is empty. */ | |
169 | while (qlen) | |
170 | { | |
171 | /* Take the first entry off the worklist. */ | |
172 | bb = *qout++; | |
173 | qlen--; | |
174 | ||
175 | if (qout >= qend) | |
176 | qout = worklist; | |
177 | ||
178 | /* Do not clear the aux field for blocks which are successors of the | |
179 | ENTRY block. That way we never add then to the worklist again. */ | |
180 | if (bb->aux != ENTRY_BLOCK_PTR_FOR_FN (cfun)) | |
181 | bb->aux = NULL; | |
182 | ||
183 | bitmap_union_of_preds_with_entry (in[bb->index], out, bb); | |
184 | ||
185 | if (bitmap_ior_and_compl (out[bb->index], gen[bb->index], in[bb->index], | |
186 | kill[bb->index])) | |
187 | /* If the out state of this block changed, then we need | |
188 | to add the successors of this block to the worklist | |
189 | if they are not already on the worklist. */ | |
190 | FOR_EACH_EDGE (e, ei, bb->succs) | |
191 | if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
192 | { | |
193 | *qin++ = e->dest; | |
194 | e->dest->aux = e; | |
195 | qlen++; | |
196 | ||
197 | if (qin >= qend) | |
198 | qin = worklist; | |
199 | } | |
200 | } | |
201 | ||
202 | clear_aux_for_edges (); | |
203 | clear_aux_for_blocks (); | |
204 | free (worklist); | |
9243c3d1 JZZ |
205 | } |
206 | ||
29331e72 LD |
207 | /* Classification of vsetvl instruction. */ |
208 | enum vsetvl_type | |
9243c3d1 | 209 | { |
29331e72 LD |
210 | VSETVL_NORMAL, |
211 | VSETVL_VTYPE_CHANGE_ONLY, | |
212 | VSETVL_DISCARD_RESULT, | |
213 | NUM_VSETVL_TYPE | |
214 | }; | |
9243c3d1 | 215 | |
29331e72 | 216 | enum emit_type |
9243c3d1 | 217 | { |
29331e72 LD |
218 | /* emit_insn directly. */ |
219 | EMIT_DIRECT, | |
220 | EMIT_BEFORE, | |
221 | EMIT_AFTER, | |
222 | }; | |
223 | ||
224 | /* dump helper functions */ | |
225 | static const char * | |
226 | vlmul_to_str (vlmul_type vlmul) | |
227 | { | |
228 | switch (vlmul) | |
229 | { | |
230 | case LMUL_1: | |
231 | return "m1"; | |
232 | case LMUL_2: | |
233 | return "m2"; | |
234 | case LMUL_4: | |
235 | return "m4"; | |
236 | case LMUL_8: | |
237 | return "m8"; | |
238 | case LMUL_RESERVED: | |
239 | return "INVALID LMUL"; | |
240 | case LMUL_F8: | |
241 | return "mf8"; | |
242 | case LMUL_F4: | |
243 | return "mf4"; | |
244 | case LMUL_F2: | |
245 | return "mf2"; | |
246 | ||
247 | default: | |
248 | gcc_unreachable (); | |
249 | } | |
9243c3d1 JZZ |
250 | } |
251 | ||
29331e72 LD |
252 | static const char * |
253 | policy_to_str (bool agnostic_p) | |
9243c3d1 | 254 | { |
29331e72 | 255 | return agnostic_p ? "agnostic" : "undisturbed"; |
9243c3d1 JZZ |
256 | } |
257 | ||
9243c3d1 JZZ |
258 | /* Return true if it is an RVV instruction depends on VTYPE global |
259 | status register. */ | |
260 | static bool | |
261 | has_vtype_op (rtx_insn *rinsn) | |
262 | { | |
263 | return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn); | |
264 | } | |
265 | ||
ec99ffab JZZ |
266 | /* Return true if the instruction ignores VLMUL field of VTYPE. */ |
267 | static bool | |
268 | ignore_vlmul_insn_p (rtx_insn *rinsn) | |
269 | { | |
270 | return get_attr_type (rinsn) == TYPE_VIMOVVX | |
271 | || get_attr_type (rinsn) == TYPE_VFMOVVF | |
272 | || get_attr_type (rinsn) == TYPE_VIMOVXV | |
273 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
274 | } | |
275 | ||
276 | /* Return true if the instruction is scalar move instruction. */ | |
277 | static bool | |
278 | scalar_move_insn_p (rtx_insn *rinsn) | |
279 | { | |
280 | return get_attr_type (rinsn) == TYPE_VIMOVXV | |
281 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
282 | } | |
283 | ||
60bd33bc JZZ |
284 | /* Return true if the instruction is fault first load instruction. */ |
285 | static bool | |
286 | fault_first_load_p (rtx_insn *rinsn) | |
287 | { | |
6313b045 JZZ |
288 | return recog_memoized (rinsn) >= 0 |
289 | && (get_attr_type (rinsn) == TYPE_VLDFF | |
290 | || get_attr_type (rinsn) == TYPE_VLSEGDFF); | |
60bd33bc JZZ |
291 | } |
292 | ||
293 | /* Return true if the instruction is read vl instruction. */ | |
294 | static bool | |
295 | read_vl_insn_p (rtx_insn *rinsn) | |
296 | { | |
297 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL; | |
298 | } | |
299 | ||
9243c3d1 JZZ |
300 | /* Return true if it is a vsetvl instruction. */ |
301 | static bool | |
302 | vector_config_insn_p (rtx_insn *rinsn) | |
303 | { | |
304 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL; | |
305 | } | |
306 | ||
307 | /* Return true if it is vsetvldi or vsetvlsi. */ | |
308 | static bool | |
309 | vsetvl_insn_p (rtx_insn *rinsn) | |
310 | { | |
29331e72 | 311 | if (!rinsn || !vector_config_insn_p (rinsn)) |
6b6b9c68 | 312 | return false; |
85112fbb | 313 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi |
6b6b9c68 JZZ |
314 | || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi); |
315 | } | |
316 | ||
317 | /* Return true if it is vsetvl zero, rs1. */ | |
318 | static bool | |
319 | vsetvl_discard_result_insn_p (rtx_insn *rinsn) | |
320 | { | |
321 | if (!vector_config_insn_p (rinsn)) | |
322 | return false; | |
323 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi | |
324 | || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi); | |
9243c3d1 JZZ |
325 | } |
326 | ||
9243c3d1 | 327 | static bool |
4f673c5e | 328 | real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb) |
9243c3d1 | 329 | { |
4f673c5e | 330 | return insn != nullptr && insn->is_real () && insn->bb () == bb; |
9243c3d1 JZZ |
331 | } |
332 | ||
29331e72 | 333 | /* Helper function to get VL operand for VLMAX insn. */ |
6b6b9c68 JZZ |
334 | static rtx |
335 | get_vl (rtx_insn *rinsn) | |
336 | { | |
337 | if (has_vl_op (rinsn)) | |
338 | { | |
339 | extract_insn_cached (rinsn); | |
340 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
341 | } | |
342 | return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0)); | |
4f673c5e JZZ |
343 | } |
344 | ||
6b6b9c68 JZZ |
345 | /* Helper function to get AVL operand. */ |
346 | static rtx | |
347 | get_avl (rtx_insn *rinsn) | |
348 | { | |
349 | if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn)) | |
350 | return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0); | |
351 | ||
352 | if (!has_vl_op (rinsn)) | |
353 | return NULL_RTX; | |
5e714992 | 354 | if (vlmax_avl_type_p (rinsn)) |
6b6b9c68 JZZ |
355 | return RVV_VLMAX; |
356 | extract_insn_cached (rinsn); | |
357 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
358 | } | |
359 | ||
9243c3d1 JZZ |
360 | /* Get default mask policy. */ |
361 | static bool | |
362 | get_default_ma () | |
363 | { | |
364 | /* For the instruction that doesn't require MA, we still need a default value | |
365 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
366 | return (bool) (get_prefer_mask_policy () & 0x1 | |
367 | || (get_prefer_mask_policy () >> 1 & 0x1)); | |
368 | } | |
369 | ||
9243c3d1 JZZ |
370 | /* Helper function to get MA operand. */ |
371 | static bool | |
372 | mask_agnostic_p (rtx_insn *rinsn) | |
373 | { | |
374 | /* If it doesn't have MA, we return agnostic by default. */ | |
375 | extract_insn_cached (rinsn); | |
376 | int ma = get_attr_ma (rinsn); | |
377 | return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma); | |
378 | } | |
379 | ||
380 | /* Return true if FN has a vector instruction that use VL/VTYPE. */ | |
381 | static bool | |
382 | has_vector_insn (function *fn) | |
383 | { | |
384 | basic_block cfg_bb; | |
385 | rtx_insn *rinsn; | |
386 | FOR_ALL_BB_FN (cfg_bb, fn) | |
387 | FOR_BB_INSNS (cfg_bb, rinsn) | |
388 | if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn)) | |
389 | return true; | |
390 | return false; | |
391 | } | |
392 | ||
29331e72 LD |
393 | static vlmul_type |
394 | calculate_vlmul (unsigned int sew, unsigned int ratio) | |
9243c3d1 | 395 | { |
29331e72 LD |
396 | const vlmul_type ALL_LMUL[] |
397 | = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2}; | |
398 | for (const vlmul_type vlmul : ALL_LMUL) | |
399 | if (calculate_ratio (sew, vlmul) == ratio) | |
400 | return vlmul; | |
401 | return LMUL_RESERVED; | |
9243c3d1 JZZ |
402 | } |
403 | ||
29331e72 LD |
404 | /* Get the currently supported maximum sew used in the int rvv instructions. */ |
405 | static uint8_t | |
406 | get_max_int_sew () | |
9243c3d1 | 407 | { |
29331e72 LD |
408 | if (TARGET_VECTOR_ELEN_64) |
409 | return 64; | |
410 | else if (TARGET_VECTOR_ELEN_32) | |
411 | return 32; | |
412 | gcc_unreachable (); | |
9243c3d1 JZZ |
413 | } |
414 | ||
29331e72 LD |
415 | /* Get the currently supported maximum sew used in the float rvv instructions. |
416 | */ | |
417 | static uint8_t | |
418 | get_max_float_sew () | |
419 | { | |
420 | if (TARGET_VECTOR_ELEN_FP_64) | |
421 | return 64; | |
422 | else if (TARGET_VECTOR_ELEN_FP_32) | |
423 | return 32; | |
424 | else if (TARGET_VECTOR_ELEN_FP_16) | |
425 | return 16; | |
426 | gcc_unreachable (); | |
9243c3d1 JZZ |
427 | } |
428 | ||
29331e72 | 429 | enum def_type |
9243c3d1 | 430 | { |
29331e72 LD |
431 | REAL_SET = 1 << 0, |
432 | PHI_SET = 1 << 1, | |
433 | BB_HEAD_SET = 1 << 2, | |
434 | BB_END_SET = 1 << 3, | |
435 | /* ??? TODO: In RTL_SSA framework, we have REAL_SET, | |
436 | PHI_SET, BB_HEAD_SET, BB_END_SET and | |
437 | CLOBBER_DEF def_info types. Currently, | |
438 | we conservatively do not optimize clobber | |
439 | def since we don't see the case that we | |
440 | need to optimize it. */ | |
441 | CLOBBER_DEF = 1 << 4 | |
442 | }; | |
9243c3d1 | 443 | |
29331e72 LD |
444 | static bool |
445 | insn_should_be_added_p (const insn_info *insn, unsigned int types) | |
da93c41c | 446 | { |
29331e72 LD |
447 | if (insn->is_real () && (types & REAL_SET)) |
448 | return true; | |
449 | if (insn->is_phi () && (types & PHI_SET)) | |
450 | return true; | |
451 | if (insn->is_bb_head () && (types & BB_HEAD_SET)) | |
452 | return true; | |
453 | if (insn->is_bb_end () && (types & BB_END_SET)) | |
454 | return true; | |
455 | return false; | |
da93c41c JZ |
456 | } |
457 | ||
29331e72 LD |
458 | static const hash_set<use_info *> |
459 | get_all_real_uses (insn_info *insn, unsigned regno) | |
9243c3d1 | 460 | { |
29331e72 | 461 | gcc_assert (insn->is_real ()); |
9243c3d1 | 462 | |
29331e72 LD |
463 | hash_set<use_info *> uses; |
464 | auto_vec<phi_info *> work_list; | |
465 | hash_set<phi_info *> visited_list; | |
9243c3d1 | 466 | |
29331e72 | 467 | for (def_info *def : insn->defs ()) |
9243c3d1 | 468 | { |
29331e72 LD |
469 | if (!def->is_reg () || def->regno () != regno) |
470 | continue; | |
471 | set_info *set = safe_dyn_cast<set_info *> (def); | |
472 | if (!set) | |
473 | continue; | |
474 | for (use_info *use : set->nondebug_insn_uses ()) | |
475 | if (use->insn ()->is_real ()) | |
476 | uses.add (use); | |
477 | for (use_info *use : set->phi_uses ()) | |
478 | work_list.safe_push (use->phi ()); | |
9243c3d1 | 479 | } |
9243c3d1 | 480 | |
29331e72 | 481 | while (!work_list.is_empty ()) |
60bd33bc | 482 | { |
29331e72 LD |
483 | phi_info *phi = work_list.pop (); |
484 | visited_list.add (phi); | |
60bd33bc | 485 | |
29331e72 LD |
486 | for (use_info *use : phi->nondebug_insn_uses ()) |
487 | if (use->insn ()->is_real ()) | |
488 | uses.add (use); | |
489 | for (use_info *use : phi->phi_uses ()) | |
490 | if (!visited_list.contains (use->phi ())) | |
491 | work_list.safe_push (use->phi ()); | |
60bd33bc | 492 | } |
29331e72 | 493 | return uses; |
60bd33bc JZZ |
494 | } |
495 | ||
29331e72 LD |
496 | /* Recursively find all define instructions. The kind of instruction is |
497 | specified by the DEF_TYPE. */ | |
498 | static hash_set<set_info *> | |
499 | get_all_sets (phi_info *phi, unsigned int types) | |
9243c3d1 | 500 | { |
29331e72 LD |
501 | hash_set<set_info *> insns; |
502 | auto_vec<phi_info *> work_list; | |
503 | hash_set<phi_info *> visited_list; | |
504 | if (!phi) | |
505 | return hash_set<set_info *> (); | |
506 | work_list.safe_push (phi); | |
9243c3d1 | 507 | |
29331e72 | 508 | while (!work_list.is_empty ()) |
9243c3d1 | 509 | { |
29331e72 LD |
510 | phi_info *phi = work_list.pop (); |
511 | visited_list.add (phi); | |
512 | for (use_info *use : phi->inputs ()) | |
513 | { | |
514 | def_info *def = use->def (); | |
515 | set_info *set = safe_dyn_cast<set_info *> (def); | |
516 | if (!set) | |
517 | return hash_set<set_info *> (); | |
a1e42094 | 518 | |
29331e72 | 519 | gcc_assert (!set->insn ()->is_debug_insn ()); |
9243c3d1 | 520 | |
29331e72 LD |
521 | if (insn_should_be_added_p (set->insn (), types)) |
522 | insns.add (set); | |
523 | if (set->insn ()->is_phi ()) | |
524 | { | |
525 | phi_info *new_phi = as_a<phi_info *> (set); | |
526 | if (!visited_list.contains (new_phi)) | |
527 | work_list.safe_push (new_phi); | |
528 | } | |
529 | } | |
9243c3d1 | 530 | } |
29331e72 | 531 | return insns; |
9243c3d1 JZZ |
532 | } |
533 | ||
29331e72 LD |
534 | static hash_set<set_info *> |
535 | get_all_sets (set_info *set, bool /* get_real_inst */ real_p, | |
536 | bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p) | |
aef20243 | 537 | { |
29331e72 LD |
538 | if (real_p && phi_p && param_p) |
539 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
540 | REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET); | |
aef20243 | 541 | |
29331e72 LD |
542 | else if (real_p && param_p) |
543 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
544 | REAL_SET | BB_HEAD_SET | BB_END_SET); | |
545 | ||
546 | else if (real_p) | |
547 | return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET); | |
548 | return hash_set<set_info *> (); | |
69f39144 JZ |
549 | } |
550 | ||
4f673c5e | 551 | static bool |
6b6b9c68 | 552 | source_equal_p (insn_info *insn1, insn_info *insn2) |
4f673c5e | 553 | { |
6b6b9c68 JZZ |
554 | if (!insn1 || !insn2) |
555 | return false; | |
556 | rtx_insn *rinsn1 = insn1->rtl (); | |
557 | rtx_insn *rinsn2 = insn2->rtl (); | |
4f673c5e JZZ |
558 | if (!rinsn1 || !rinsn2) |
559 | return false; | |
29331e72 | 560 | |
4f673c5e JZZ |
561 | rtx note1 = find_reg_equal_equiv_note (rinsn1); |
562 | rtx note2 = find_reg_equal_equiv_note (rinsn2); | |
2020bce3 RD |
563 | /* We could handle the case of similar-looking REG_EQUALs as well but |
564 | would need to verify that no insn in between modifies any of the source | |
565 | operands. */ | |
566 | if (note1 && note2 && rtx_equal_p (note1, note2) | |
567 | && REG_NOTE_KIND (note1) == REG_EQUIV) | |
4f673c5e | 568 | return true; |
29331e72 | 569 | return false; |
4f673c5e JZZ |
570 | } |
571 | ||
6b6b9c68 | 572 | static insn_info * |
4f673c5e JZZ |
573 | extract_single_source (set_info *set) |
574 | { | |
575 | if (!set) | |
576 | return nullptr; | |
577 | if (set->insn ()->is_real ()) | |
6b6b9c68 | 578 | return set->insn (); |
4f673c5e JZZ |
579 | if (!set->insn ()->is_phi ()) |
580 | return nullptr; | |
6b6b9c68 | 581 | hash_set<set_info *> sets = get_all_sets (set, true, false, true); |
4f673c5e | 582 | |
6b6b9c68 | 583 | insn_info *first_insn = (*sets.begin ())->insn (); |
4f673c5e JZZ |
584 | if (first_insn->is_artificial ()) |
585 | return nullptr; | |
6b6b9c68 | 586 | for (const set_info *set : sets) |
4f673c5e JZZ |
587 | { |
588 | /* If there is a head or end insn, we conservative return | |
589 | NULL so that VSETVL PASS will insert vsetvl directly. */ | |
6b6b9c68 | 590 | if (set->insn ()->is_artificial ()) |
4f673c5e | 591 | return nullptr; |
29331e72 | 592 | if (set != *sets.begin () && !source_equal_p (set->insn (), first_insn)) |
4f673c5e JZZ |
593 | return nullptr; |
594 | } | |
595 | ||
6b6b9c68 | 596 | return first_insn; |
4f673c5e JZZ |
597 | } |
598 | ||
29331e72 LD |
599 | static bool |
600 | same_equiv_note_p (set_info *set1, set_info *set2) | |
ec99ffab | 601 | { |
29331e72 LD |
602 | insn_info *insn1 = extract_single_source (set1); |
603 | insn_info *insn2 = extract_single_source (set2); | |
604 | if (!insn1 || !insn2) | |
605 | return false; | |
606 | return source_equal_p (insn1, insn2); | |
ec99ffab JZZ |
607 | } |
608 | ||
29331e72 LD |
609 | static unsigned |
610 | get_expr_id (unsigned bb_index, unsigned regno, unsigned num_bbs) | |
ec99ffab | 611 | { |
29331e72 | 612 | return regno * num_bbs + bb_index; |
ec99ffab | 613 | } |
29331e72 LD |
614 | static unsigned |
615 | get_regno (unsigned expr_id, unsigned num_bb) | |
ec99ffab | 616 | { |
29331e72 | 617 | return expr_id / num_bb; |
ec99ffab | 618 | } |
29331e72 LD |
619 | static unsigned |
620 | get_bb_index (unsigned expr_id, unsigned num_bb) | |
ec99ffab | 621 | { |
29331e72 | 622 | return expr_id % num_bb; |
ec99ffab JZZ |
623 | } |
624 | ||
29331e72 | 625 | /* Return true if the SET result is not used by any instructions. */ |
ec99ffab | 626 | static bool |
29331e72 | 627 | has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno) |
ec99ffab | 628 | { |
29331e72 LD |
629 | if (bitmap_bit_p (df_get_live_out (cfg_bb), regno)) |
630 | return false; | |
ec99ffab | 631 | |
29331e72 LD |
632 | rtx_insn *iter; |
633 | for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb)); | |
634 | iter = NEXT_INSN (iter)) | |
635 | if (df_find_use (iter, regno_reg_rtx[regno])) | |
636 | return false; | |
ec99ffab | 637 | |
29331e72 | 638 | return true; |
ec99ffab JZZ |
639 | } |
640 | ||
29331e72 LD |
641 | /* This flags indicates the minimum demand of the vl and vtype values by the |
642 | RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV | |
643 | instruction only needs the SEW/LMUL ratio to remain the same, and does not | |
644 | require SEW and LMUL to be fixed. | |
645 | Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter | |
646 | instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of | |
647 | the former instruction, then we can make the minimu demand of the former | |
648 | instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are | |
649 | the SEW and LMUL of the latter instruction, and the vsetvl instruction | |
650 | generated according to the new demand can also be used for the latter | |
651 | instruction, so there is no need to insert a separate vsetvl instruction for | |
652 | the latter instruction. */ | |
653 | enum demand_flags : unsigned | |
654 | { | |
655 | DEMAND_EMPTY_P = 0, | |
656 | DEMAND_SEW_P = 1 << 0, | |
657 | DEMAND_LMUL_P = 1 << 1, | |
658 | DEMAND_RATIO_P = 1 << 2, | |
659 | DEMAND_GE_SEW_P = 1 << 3, | |
660 | DEMAND_TAIL_POLICY_P = 1 << 4, | |
661 | DEMAND_MASK_POLICY_P = 1 << 5, | |
662 | DEMAND_AVL_P = 1 << 6, | |
663 | DEMAND_NON_ZERO_AVL_P = 1 << 7, | |
664 | }; | |
ec99ffab | 665 | |
29331e72 LD |
666 | /* We split the demand information into three parts. They are sew and lmul |
667 | related (sew_lmul_demand_type), tail and mask policy related | |
668 | (policy_demand_type) and avl related (avl_demand_type). Then we define three | |
669 | interfaces avaiable_with, compatible_p and merge. avaiable_with is | |
670 | used to determine whether the two vsetvl infos prev_info and next_info are | |
671 | available or not. If prev_info is available for next_info, it means that the | |
672 | RVV insn corresponding to next_info on the path from prev_info to next_info | |
673 | can be used without inserting a separate vsetvl instruction. compatible_p | |
674 | is used to determine whether prev_info is compatible with next_info, and if | |
675 | so, merge can be used to merge the stricter demand information from | |
676 | next_info into prev_info so that prev_info becomes available to next_info. | |
677 | */ | |
ec99ffab | 678 | |
29331e72 | 679 | enum class sew_lmul_demand_type : unsigned |
ec99ffab | 680 | { |
29331e72 LD |
681 | sew_lmul = demand_flags::DEMAND_SEW_P | demand_flags::DEMAND_LMUL_P, |
682 | ratio_only = demand_flags::DEMAND_RATIO_P, | |
683 | sew_only = demand_flags::DEMAND_SEW_P, | |
684 | ge_sew = demand_flags::DEMAND_GE_SEW_P, | |
685 | ratio_and_ge_sew | |
686 | = demand_flags::DEMAND_RATIO_P | demand_flags::DEMAND_GE_SEW_P, | |
687 | }; | |
ec99ffab | 688 | |
29331e72 | 689 | enum class policy_demand_type : unsigned |
29547511 | 690 | { |
29331e72 LD |
691 | tail_mask_policy |
692 | = demand_flags::DEMAND_TAIL_POLICY_P | demand_flags::DEMAND_MASK_POLICY_P, | |
693 | tail_policy_only = demand_flags::DEMAND_TAIL_POLICY_P, | |
694 | mask_policy_only = demand_flags::DEMAND_MASK_POLICY_P, | |
695 | ignore_policy = demand_flags::DEMAND_EMPTY_P, | |
696 | }; | |
29547511 | 697 | |
29331e72 | 698 | enum class avl_demand_type : unsigned |
ec99ffab | 699 | { |
29331e72 LD |
700 | avl = demand_flags::DEMAND_AVL_P, |
701 | non_zero_avl = demand_flags::DEMAND_NON_ZERO_AVL_P, | |
702 | ignore_avl = demand_flags::DEMAND_EMPTY_P, | |
703 | }; | |
ec99ffab | 704 | |
29331e72 | 705 | class vsetvl_info |
ec99ffab | 706 | { |
29331e72 LD |
707 | private: |
708 | insn_info *m_insn; | |
709 | bb_info *m_bb; | |
710 | rtx m_avl; | |
711 | rtx m_vl; | |
712 | set_info *m_avl_def; | |
713 | uint8_t m_sew; | |
714 | uint8_t m_max_sew; | |
715 | vlmul_type m_vlmul; | |
716 | uint8_t m_ratio; | |
717 | bool m_ta; | |
718 | bool m_ma; | |
719 | ||
720 | sew_lmul_demand_type m_sew_lmul_demand; | |
721 | policy_demand_type m_policy_demand; | |
722 | avl_demand_type m_avl_demand; | |
723 | ||
724 | enum class state_type | |
725 | { | |
726 | UNINITIALIZED, | |
727 | VALID, | |
728 | UNKNOWN, | |
729 | EMPTY, | |
730 | }; | |
731 | state_type m_state; | |
732 | ||
733 | bool m_delete; | |
734 | bool m_change_vtype_only; | |
735 | insn_info *m_read_vl_insn; | |
736 | bool m_vl_used_by_non_rvv_insn; | |
ec99ffab | 737 | |
29331e72 LD |
738 | public: |
739 | vsetvl_info () | |
740 | : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX), m_vl (NULL_RTX), | |
741 | m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED), | |
742 | m_ratio (0), m_ta (false), m_ma (false), | |
743 | m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul), | |
744 | m_policy_demand (policy_demand_type::tail_mask_policy), | |
745 | m_avl_demand (avl_demand_type::avl), m_state (state_type::UNINITIALIZED), | |
746 | m_delete (false), m_change_vtype_only (false), m_read_vl_insn (nullptr), | |
747 | m_vl_used_by_non_rvv_insn (false) | |
748 | {} | |
749 | ||
750 | vsetvl_info (insn_info *insn) : vsetvl_info () { parse_insn (insn); } | |
751 | ||
752 | vsetvl_info (rtx_insn *insn) : vsetvl_info () { parse_insn (insn); } | |
753 | ||
754 | void set_avl (rtx avl) { m_avl = avl; } | |
755 | void set_vl (rtx vl) { m_vl = vl; } | |
756 | void set_avl_def (set_info *avl_def) { m_avl_def = avl_def; } | |
757 | void set_sew (uint8_t sew) { m_sew = sew; } | |
758 | void set_vlmul (vlmul_type vlmul) { m_vlmul = vlmul; } | |
759 | void set_ratio (uint8_t ratio) { m_ratio = ratio; } | |
760 | void set_ta (bool ta) { m_ta = ta; } | |
761 | void set_ma (bool ma) { m_ma = ma; } | |
762 | void set_delete () { m_delete = true; } | |
763 | void set_bb (bb_info *bb) { m_bb = bb; } | |
764 | void set_max_sew (uint8_t max_sew) { m_max_sew = max_sew; } | |
765 | void set_change_vtype_only () { m_change_vtype_only = true; } | |
766 | void set_read_vl_insn (insn_info *insn) { m_read_vl_insn = insn; } | |
767 | ||
768 | rtx get_avl () const { return m_avl; } | |
769 | rtx get_vl () const { return m_vl; } | |
770 | set_info *get_avl_def () const { return m_avl_def; } | |
771 | uint8_t get_sew () const { return m_sew; } | |
772 | vlmul_type get_vlmul () const { return m_vlmul; } | |
773 | uint8_t get_ratio () const { return m_ratio; } | |
774 | bool get_ta () const { return m_ta; } | |
775 | bool get_ma () const { return m_ma; } | |
776 | insn_info *get_insn () const { return m_insn; } | |
777 | bool delete_p () const { return m_delete; } | |
778 | bb_info *get_bb () const { return m_bb; } | |
779 | uint8_t get_max_sew () const { return m_max_sew; } | |
780 | insn_info *get_read_vl_insn () const { return m_read_vl_insn; } | |
4cd4c34a | 781 | bool vl_used_by_non_rvv_insn_p () const { return m_vl_used_by_non_rvv_insn; } |
29331e72 LD |
782 | |
783 | bool has_imm_avl () const { return m_avl && CONST_INT_P (m_avl); } | |
784 | bool has_vlmax_avl () const { return vlmax_avl_p (m_avl); } | |
785 | bool has_nonvlmax_reg_avl () const | |
786 | { | |
787 | return m_avl && REG_P (m_avl) && !has_vlmax_avl (); | |
788 | } | |
789 | bool has_non_zero_avl () const | |
790 | { | |
791 | if (has_imm_avl ()) | |
792 | return INTVAL (m_avl) > 0; | |
793 | return has_vlmax_avl (); | |
794 | } | |
795 | bool has_vl () const | |
796 | { | |
797 | /* The VL operand can only be either a NULL_RTX or a register. */ | |
798 | gcc_assert (!m_vl || REG_P (m_vl)); | |
799 | return m_vl != NULL_RTX; | |
800 | } | |
801 | bool has_same_ratio (const vsetvl_info &other) const | |
802 | { | |
803 | return get_ratio () == other.get_ratio (); | |
804 | } | |
805 | ||
806 | /* The block of INSN isn't always same as the block of the VSETVL_INFO, | |
807 | meaning we may have 'get_insn ()->bb () != get_bb ()'. | |
808 | ||
809 | E.g. BB 2 (Empty) ---> BB 3 (VALID, has rvv insn 1) | |
810 | ||
811 | BB 2 has empty VSETVL_INFO, wheras BB 3 has VSETVL_INFO that satisfies | |
812 | get_insn ()->bb () == get_bb (). In earliest fusion, we may fuse bb 3 and | |
813 | bb 2 so that the 'get_bb ()' of BB2 VSETVL_INFO will be BB2 wheras the | |
814 | 'get_insn ()' of BB2 VSETVL INFO will be the rvv insn 1 (which is located | |
815 | at BB3). */ | |
816 | bool insn_inside_bb_p () const { return get_insn ()->bb () == get_bb (); } | |
817 | void update_avl (const vsetvl_info &other) | |
818 | { | |
819 | m_avl = other.get_avl (); | |
820 | m_vl = other.get_vl (); | |
821 | m_avl_def = other.get_avl_def (); | |
822 | } | |
823 | ||
824 | bool uninit_p () const { return m_state == state_type::UNINITIALIZED; } | |
825 | bool valid_p () const { return m_state == state_type::VALID; } | |
826 | bool unknown_p () const { return m_state == state_type::UNKNOWN; } | |
827 | bool empty_p () const { return m_state == state_type::EMPTY; } | |
828 | bool change_vtype_only_p () const { return m_change_vtype_only; } | |
829 | ||
830 | void set_valid () { m_state = state_type::VALID; } | |
831 | void set_unknown () { m_state = state_type::UNKNOWN; } | |
832 | void set_empty () { m_state = state_type::EMPTY; } | |
833 | ||
834 | void set_sew_lmul_demand (sew_lmul_demand_type demand) | |
835 | { | |
836 | m_sew_lmul_demand = demand; | |
837 | } | |
838 | void set_policy_demand (policy_demand_type demand) | |
839 | { | |
840 | m_policy_demand = demand; | |
841 | } | |
842 | void set_avl_demand (avl_demand_type demand) { m_avl_demand = demand; } | |
843 | ||
844 | sew_lmul_demand_type get_sew_lmul_demand () const | |
845 | { | |
846 | return m_sew_lmul_demand; | |
847 | } | |
848 | policy_demand_type get_policy_demand () const { return m_policy_demand; } | |
849 | avl_demand_type get_avl_demand () const { return m_avl_demand; } | |
850 | ||
851 | void normalize_demand (unsigned demand_flags) | |
852 | { | |
853 | switch (demand_flags | |
854 | & (DEMAND_SEW_P | DEMAND_LMUL_P | DEMAND_RATIO_P | DEMAND_GE_SEW_P)) | |
855 | { | |
856 | case (unsigned) sew_lmul_demand_type::sew_lmul: | |
857 | m_sew_lmul_demand = sew_lmul_demand_type::sew_lmul; | |
858 | break; | |
859 | case (unsigned) sew_lmul_demand_type::ratio_only: | |
860 | m_sew_lmul_demand = sew_lmul_demand_type::ratio_only; | |
861 | break; | |
862 | case (unsigned) sew_lmul_demand_type::sew_only: | |
863 | m_sew_lmul_demand = sew_lmul_demand_type::sew_only; | |
864 | break; | |
865 | case (unsigned) sew_lmul_demand_type::ge_sew: | |
866 | m_sew_lmul_demand = sew_lmul_demand_type::ge_sew; | |
867 | break; | |
868 | case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew: | |
869 | m_sew_lmul_demand = sew_lmul_demand_type::ratio_and_ge_sew; | |
870 | break; | |
871 | default: | |
872 | gcc_unreachable (); | |
873 | } | |
874 | ||
875 | switch (demand_flags & (DEMAND_TAIL_POLICY_P | DEMAND_MASK_POLICY_P)) | |
876 | { | |
877 | case (unsigned) policy_demand_type::tail_mask_policy: | |
878 | m_policy_demand = policy_demand_type::tail_mask_policy; | |
879 | break; | |
880 | case (unsigned) policy_demand_type::tail_policy_only: | |
881 | m_policy_demand = policy_demand_type::tail_policy_only; | |
882 | break; | |
883 | case (unsigned) policy_demand_type::mask_policy_only: | |
884 | m_policy_demand = policy_demand_type::mask_policy_only; | |
885 | break; | |
886 | case (unsigned) policy_demand_type::ignore_policy: | |
887 | m_policy_demand = policy_demand_type::ignore_policy; | |
888 | break; | |
889 | default: | |
890 | gcc_unreachable (); | |
891 | } | |
892 | ||
893 | switch (demand_flags & (DEMAND_AVL_P | DEMAND_NON_ZERO_AVL_P)) | |
894 | { | |
895 | case (unsigned) avl_demand_type::avl: | |
896 | m_avl_demand = avl_demand_type::avl; | |
897 | break; | |
898 | case (unsigned) avl_demand_type::non_zero_avl: | |
899 | m_avl_demand = avl_demand_type::non_zero_avl; | |
900 | break; | |
901 | case (unsigned) avl_demand_type::ignore_avl: | |
902 | m_avl_demand = avl_demand_type::ignore_avl; | |
903 | break; | |
904 | default: | |
905 | gcc_unreachable (); | |
906 | } | |
907 | } | |
908 | ||
909 | void parse_insn (rtx_insn *rinsn) | |
910 | { | |
911 | if (!NONDEBUG_INSN_P (rinsn)) | |
912 | return; | |
913 | if (optimize == 0 && !has_vtype_op (rinsn)) | |
914 | return; | |
915 | gcc_assert (!vsetvl_discard_result_insn_p (rinsn)); | |
916 | set_valid (); | |
917 | extract_insn_cached (rinsn); | |
918 | m_avl = ::get_avl (rinsn); | |
919 | if (has_vlmax_avl () || vsetvl_insn_p (rinsn)) | |
920 | m_vl = ::get_vl (rinsn); | |
921 | m_sew = ::get_sew (rinsn); | |
922 | m_vlmul = ::get_vlmul (rinsn); | |
923 | m_ta = tail_agnostic_p (rinsn); | |
924 | m_ma = mask_agnostic_p (rinsn); | |
925 | } | |
926 | ||
927 | void parse_insn (insn_info *insn) | |
928 | { | |
929 | m_insn = insn; | |
930 | m_bb = insn->bb (); | |
931 | /* Return if it is debug insn for the consistency with optimize == 0. */ | |
932 | if (insn->is_debug_insn ()) | |
933 | return; | |
ec99ffab | 934 | |
29331e72 LD |
935 | /* We set it as unknown since we don't what will happen in CALL or ASM. */ |
936 | if (insn->is_call () || insn->is_asm ()) | |
937 | { | |
938 | set_unknown (); | |
939 | return; | |
940 | } | |
941 | ||
942 | /* If this is something that updates VL/VTYPE that we don't know about, set | |
943 | the state to unknown. */ | |
944 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()) | |
945 | && (find_access (insn->defs (), VL_REGNUM) | |
946 | || find_access (insn->defs (), VTYPE_REGNUM))) | |
947 | { | |
948 | set_unknown (); | |
949 | return; | |
950 | } | |
951 | ||
952 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())) | |
953 | /* uninitialized */ | |
954 | return; | |
ec99ffab | 955 | |
29331e72 LD |
956 | set_valid (); |
957 | ||
958 | m_avl = ::get_avl (insn->rtl ()); | |
959 | if (m_avl) | |
960 | { | |
961 | if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ()) | |
962 | m_vl = ::get_vl (insn->rtl ()); | |
963 | ||
964 | if (has_nonvlmax_reg_avl ()) | |
965 | m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def (); | |
966 | } | |
967 | ||
968 | m_sew = ::get_sew (insn->rtl ()); | |
969 | m_vlmul = ::get_vlmul (insn->rtl ()); | |
970 | m_ratio = get_attr_ratio (insn->rtl ()); | |
971 | /* when get_attr_ratio is invalid, this kind of instructions | |
972 | doesn't care about ratio. However, we still need this value | |
973 | in demand info backward analysis. */ | |
974 | if (m_ratio == INVALID_ATTRIBUTE) | |
975 | m_ratio = calculate_ratio (m_sew, m_vlmul); | |
976 | m_ta = tail_agnostic_p (insn->rtl ()); | |
977 | m_ma = mask_agnostic_p (insn->rtl ()); | |
978 | ||
979 | /* If merge operand is undef value, we prefer agnostic. */ | |
980 | int merge_op_idx = get_attr_merge_op_idx (insn->rtl ()); | |
981 | if (merge_op_idx != INVALID_ATTRIBUTE | |
982 | && satisfies_constraint_vu (recog_data.operand[merge_op_idx])) | |
983 | { | |
984 | m_ta = true; | |
985 | m_ma = true; | |
986 | } | |
987 | ||
988 | /* Determine the demand info of the RVV insn. */ | |
989 | m_max_sew = get_max_int_sew (); | |
193ef02a | 990 | unsigned dflags = 0; |
29331e72 LD |
991 | if (vector_config_insn_p (insn->rtl ())) |
992 | { | |
193ef02a RS |
993 | dflags |= demand_flags::DEMAND_AVL_P; |
994 | dflags |= demand_flags::DEMAND_RATIO_P; | |
29331e72 LD |
995 | } |
996 | else | |
997 | { | |
998 | if (has_vl_op (insn->rtl ())) | |
999 | { | |
1000 | if (scalar_move_insn_p (insn->rtl ())) | |
1001 | { | |
1002 | /* If the avl for vmv.s.x comes from the vsetvl instruction, we | |
1003 | don't know if the avl is non-zero, so it is set to | |
1004 | DEMAND_AVL_P for now. it may be corrected to | |
1005 | DEMAND_NON_ZERO_AVL_P later when more information is | |
1006 | available. | |
1007 | */ | |
1008 | if (has_non_zero_avl ()) | |
193ef02a | 1009 | dflags |= demand_flags::DEMAND_NON_ZERO_AVL_P; |
29331e72 | 1010 | else |
193ef02a | 1011 | dflags |= demand_flags::DEMAND_AVL_P; |
29331e72 LD |
1012 | } |
1013 | else | |
193ef02a | 1014 | dflags |= demand_flags::DEMAND_AVL_P; |
29331e72 | 1015 | } |
ec99ffab | 1016 | |
29331e72 | 1017 | if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE) |
193ef02a | 1018 | dflags |= demand_flags::DEMAND_RATIO_P; |
29331e72 LD |
1019 | else |
1020 | { | |
1021 | if (scalar_move_insn_p (insn->rtl ()) && m_ta) | |
1022 | { | |
193ef02a | 1023 | dflags |= demand_flags::DEMAND_GE_SEW_P; |
29331e72 LD |
1024 | m_max_sew = get_attr_type (insn->rtl ()) == TYPE_VFMOVFV |
1025 | ? get_max_float_sew () | |
1026 | : get_max_int_sew (); | |
1027 | } | |
1028 | else | |
193ef02a | 1029 | dflags |= demand_flags::DEMAND_SEW_P; |
29331e72 LD |
1030 | |
1031 | if (!ignore_vlmul_insn_p (insn->rtl ())) | |
193ef02a | 1032 | dflags |= demand_flags::DEMAND_LMUL_P; |
29331e72 | 1033 | } |
ec99ffab | 1034 | |
29331e72 | 1035 | if (!m_ta) |
193ef02a | 1036 | dflags |= demand_flags::DEMAND_TAIL_POLICY_P; |
29331e72 | 1037 | if (!m_ma) |
193ef02a | 1038 | dflags |= demand_flags::DEMAND_MASK_POLICY_P; |
29331e72 LD |
1039 | } |
1040 | ||
193ef02a | 1041 | normalize_demand (dflags); |
29331e72 LD |
1042 | |
1043 | /* Optimize AVL from the vsetvl instruction. */ | |
1044 | insn_info *def_insn = extract_single_source (get_avl_def ()); | |
1045 | if (def_insn && vsetvl_insn_p (def_insn->rtl ())) | |
1046 | { | |
1047 | vsetvl_info def_info = vsetvl_info (def_insn); | |
1048 | if ((scalar_move_insn_p (insn->rtl ()) | |
1049 | || def_info.get_ratio () == get_ratio ()) | |
1050 | && (def_info.has_vlmax_avl () || def_info.has_imm_avl ())) | |
1051 | { | |
1052 | update_avl (def_info); | |
1053 | if (scalar_move_insn_p (insn->rtl ()) && has_non_zero_avl ()) | |
1054 | m_avl_demand = avl_demand_type::non_zero_avl; | |
1055 | } | |
1056 | } | |
1057 | ||
1058 | /* Determine if dest operand(vl) has been used by non-RVV instructions. */ | |
1059 | if (has_vl ()) | |
1060 | { | |
1061 | const hash_set<use_info *> vl_uses | |
1062 | = get_all_real_uses (get_insn (), REGNO (get_vl ())); | |
1063 | for (use_info *use : vl_uses) | |
1064 | { | |
1065 | gcc_assert (use->insn ()->is_real ()); | |
1066 | rtx_insn *rinsn = use->insn ()->rtl (); | |
1067 | if (!has_vl_op (rinsn) | |
1068 | || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1) | |
1069 | { | |
1070 | m_vl_used_by_non_rvv_insn = true; | |
1071 | break; | |
1072 | } | |
1073 | rtx avl = ::get_avl (rinsn); | |
c2f23514 | 1074 | if (!avl || !REG_P (avl) || REGNO (get_vl ()) != REGNO (avl)) |
29331e72 LD |
1075 | { |
1076 | m_vl_used_by_non_rvv_insn = true; | |
1077 | break; | |
1078 | } | |
1079 | } | |
1080 | } | |
ec99ffab | 1081 | |
29331e72 LD |
1082 | /* Collect the read vl insn for the fault-only-first rvv loads. */ |
1083 | if (fault_first_load_p (insn->rtl ())) | |
1084 | { | |
1085 | for (insn_info *i = insn->next_nondebug_insn (); | |
1086 | i->bb () == insn->bb (); i = i->next_nondebug_insn ()) | |
1087 | { | |
1088 | if (find_access (i->defs (), VL_REGNUM)) | |
1089 | break; | |
1090 | if (i->rtl () && read_vl_insn_p (i->rtl ())) | |
1091 | { | |
1092 | m_read_vl_insn = i; | |
1093 | break; | |
1094 | } | |
1095 | } | |
1096 | } | |
1097 | } | |
1098 | ||
1099 | /* Returns the corresponding vsetvl rtx pat. */ | |
1100 | rtx get_vsetvl_pat (bool ignore_vl = false) const | |
1101 | { | |
1102 | rtx avl = get_avl (); | |
1103 | /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s, | |
1104 | set the value of avl to (const_int 0) so that VSETVL PASS will | |
1105 | insert vsetvl correctly.*/ | |
1106 | if (!get_avl ()) | |
1107 | avl = GEN_INT (0); | |
1108 | rtx sew = gen_int_mode (get_sew (), Pmode); | |
1109 | rtx vlmul = gen_int_mode (get_vlmul (), Pmode); | |
1110 | rtx ta = gen_int_mode (get_ta (), Pmode); | |
1111 | rtx ma = gen_int_mode (get_ma (), Pmode); | |
1112 | ||
1113 | if (change_vtype_only_p ()) | |
1114 | return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma); | |
1115 | else if (has_vl () && !ignore_vl) | |
1116 | return gen_vsetvl (Pmode, get_vl (), avl, sew, vlmul, ta, ma); | |
1117 | else | |
1118 | return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma); | |
1119 | } | |
1120 | ||
1121 | bool operator== (const vsetvl_info &other) const | |
1122 | { | |
1123 | gcc_assert (!uninit_p () && !other.uninit_p () | |
1124 | && "Uninitialization should not happen"); | |
1125 | ||
1126 | if (empty_p ()) | |
1127 | return other.empty_p (); | |
1128 | if (unknown_p ()) | |
1129 | return other.unknown_p (); | |
1130 | ||
1131 | return get_insn () == other.get_insn () && get_bb () == other.get_bb () | |
1132 | && get_avl () == other.get_avl () && get_vl () == other.get_vl () | |
1133 | && get_avl_def () == other.get_avl_def () | |
1134 | && get_sew () == other.get_sew () | |
1135 | && get_vlmul () == other.get_vlmul () && get_ta () == other.get_ta () | |
1136 | && get_ma () == other.get_ma () | |
1137 | && get_avl_demand () == other.get_avl_demand () | |
1138 | && get_sew_lmul_demand () == other.get_sew_lmul_demand () | |
1139 | && get_policy_demand () == other.get_policy_demand (); | |
1140 | } | |
1141 | ||
1142 | void dump (FILE *file, const char *indent = "") const | |
1143 | { | |
1144 | if (uninit_p ()) | |
1145 | { | |
1146 | fprintf (file, "UNINITIALIZED.\n"); | |
1147 | return; | |
1148 | } | |
1149 | else if (unknown_p ()) | |
1150 | { | |
1151 | fprintf (file, "UNKNOWN.\n"); | |
1152 | return; | |
1153 | } | |
1154 | else if (empty_p ()) | |
1155 | { | |
1156 | fprintf (file, "EMPTY.\n"); | |
1157 | return; | |
1158 | } | |
1159 | else if (valid_p ()) | |
1160 | fprintf (file, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (), | |
1161 | get_bb ()->index (), delete_p () ? " (deleted)" : ""); | |
1162 | else | |
1163 | gcc_unreachable (); | |
ec99ffab | 1164 | |
29331e72 LD |
1165 | fprintf (file, "%sDemand fields:", indent); |
1166 | if (m_sew_lmul_demand == sew_lmul_demand_type::sew_lmul) | |
1167 | fprintf (file, " demand_sew_lmul"); | |
1168 | else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_only) | |
1169 | fprintf (file, " demand_ratio_only"); | |
1170 | else if (m_sew_lmul_demand == sew_lmul_demand_type::sew_only) | |
1171 | fprintf (file, " demand_sew_only"); | |
1172 | else if (m_sew_lmul_demand == sew_lmul_demand_type::ge_sew) | |
1173 | fprintf (file, " demand_ge_sew"); | |
1174 | else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_and_ge_sew) | |
1175 | fprintf (file, " demand_ratio_and_ge_sew"); | |
1176 | ||
1177 | if (m_policy_demand == policy_demand_type::tail_mask_policy) | |
1178 | fprintf (file, " demand_tail_mask_policy"); | |
1179 | else if (m_policy_demand == policy_demand_type::tail_policy_only) | |
1180 | fprintf (file, " demand_tail_policy_only"); | |
1181 | else if (m_policy_demand == policy_demand_type::mask_policy_only) | |
1182 | fprintf (file, " demand_mask_policy_only"); | |
1183 | ||
1184 | if (m_avl_demand == avl_demand_type::avl) | |
1185 | fprintf (file, " demand_avl"); | |
1186 | else if (m_avl_demand == avl_demand_type::non_zero_avl) | |
1187 | fprintf (file, " demand_non_zero_avl"); | |
1188 | fprintf (file, "\n"); | |
1189 | ||
1190 | fprintf (file, "%sSEW=%d, ", indent, get_sew ()); | |
1191 | fprintf (file, "VLMUL=%s, ", vlmul_to_str (get_vlmul ())); | |
1192 | fprintf (file, "RATIO=%d, ", get_ratio ()); | |
1193 | fprintf (file, "MAX_SEW=%d\n", get_max_sew ()); | |
1194 | ||
1195 | fprintf (file, "%sTAIL_POLICY=%s, ", indent, policy_to_str (get_ta ())); | |
1196 | fprintf (file, "MASK_POLICY=%s\n", policy_to_str (get_ma ())); | |
1197 | ||
1198 | fprintf (file, "%sAVL=", indent); | |
1199 | print_rtl_single (file, get_avl ()); | |
1200 | fprintf (file, "%sVL=", indent); | |
1201 | print_rtl_single (file, get_vl ()); | |
1202 | if (change_vtype_only_p ()) | |
1203 | fprintf (file, "%schange vtype only\n", indent); | |
1204 | if (get_read_vl_insn ()) | |
1205 | fprintf (file, "%sread_vl_insn: insn %u\n", indent, | |
1206 | get_read_vl_insn ()->uid ()); | |
4cd4c34a | 1207 | if (vl_used_by_non_rvv_insn_p ()) |
29331e72 LD |
1208 | fprintf (file, "%suse_by_non_rvv_insn=true\n", indent); |
1209 | } | |
1210 | }; | |
8fbc0871 | 1211 | |
29331e72 | 1212 | class vsetvl_block_info |
ec99ffab | 1213 | { |
29331e72 LD |
1214 | public: |
1215 | /* The static execute probability of the demand info. */ | |
1216 | profile_probability probability; | |
1217 | ||
4fd09aed JZ |
1218 | auto_vec<vsetvl_info> local_infos; |
1219 | vsetvl_info global_info; | |
1220 | bb_info *bb; | |
29331e72 LD |
1221 | |
1222 | bool full_available; | |
1223 | ||
4fd09aed | 1224 | vsetvl_block_info () : bb (nullptr), full_available (false) |
29331e72 | 1225 | { |
4fd09aed JZ |
1226 | local_infos.safe_grow_cleared (0); |
1227 | global_info.set_empty (); | |
29331e72 LD |
1228 | } |
1229 | vsetvl_block_info (const vsetvl_block_info &other) | |
4fd09aed JZ |
1230 | : probability (other.probability), local_infos (other.local_infos.copy ()), |
1231 | global_info (other.global_info), bb (other.bb) | |
29331e72 LD |
1232 | {} |
1233 | ||
1234 | vsetvl_info &get_entry_info () | |
1235 | { | |
1236 | gcc_assert (!empty_p ()); | |
4fd09aed | 1237 | return local_infos.is_empty () ? global_info : local_infos[0]; |
29331e72 LD |
1238 | } |
1239 | vsetvl_info &get_exit_info () | |
1240 | { | |
1241 | gcc_assert (!empty_p ()); | |
4fd09aed JZ |
1242 | return local_infos.is_empty () ? global_info |
1243 | : local_infos[local_infos.length () - 1]; | |
29331e72 LD |
1244 | } |
1245 | const vsetvl_info &get_entry_info () const | |
1246 | { | |
1247 | gcc_assert (!empty_p ()); | |
4fd09aed | 1248 | return local_infos.is_empty () ? global_info : local_infos[0]; |
29331e72 LD |
1249 | } |
1250 | const vsetvl_info &get_exit_info () const | |
1251 | { | |
1252 | gcc_assert (!empty_p ()); | |
4fd09aed JZ |
1253 | return local_infos.is_empty () ? global_info |
1254 | : local_infos[local_infos.length () - 1]; | |
29331e72 LD |
1255 | } |
1256 | ||
4fd09aed JZ |
1257 | bool empty_p () const { return local_infos.is_empty () && !has_info (); } |
1258 | bool has_info () const { return !global_info.empty_p (); } | |
29331e72 LD |
1259 | void set_info (const vsetvl_info &info) |
1260 | { | |
4fd09aed JZ |
1261 | gcc_assert (local_infos.is_empty ()); |
1262 | global_info = info; | |
1263 | global_info.set_bb (bb); | |
29331e72 | 1264 | } |
4fd09aed | 1265 | void set_empty_info () { global_info.set_empty (); } |
ec99ffab JZZ |
1266 | }; |
1267 | ||
29331e72 LD |
1268 | /* Demand system is the RVV-based VSETVL info analysis tools wrapper. |
1269 | It defines compatible rules for SEW/LMUL, POLICY and AVL. | |
1270 | Also, it provides 3 iterfaces avaiable_p, compatible_p and | |
1271 | merge for the VSETVL PASS analysis and optimization. | |
1272 | ||
1273 | - avaiable_p: Determine whether the next info can get the | |
1274 | avaiable VSETVL status from previous info. | |
1275 | e.g. bb 2 (demand SEW = 32, LMUL = M2) -> bb 3 (demand RATIO = 16). | |
1276 | Since bb 2 demand info (SEW/LMUL = 32/2 = 16) satisfies the bb 3 | |
1277 | demand, the VSETVL instruction in bb 3 can be elided. | |
1278 | avaiable_p (previous, next) is true in such situation. | |
1279 | - compatible_p: Determine whether prev_info is compatible with next_info | |
1280 | so that we can have a new merged info that is avaiable to both of them. | |
1281 | - merge: Merge the stricter demand information from | |
1282 | next_info into prev_info so that prev_info becomes available to | |
1283 | next_info. */ | |
1284 | class demand_system | |
ec99ffab | 1285 | { |
29331e72 LD |
1286 | private: |
1287 | sbitmap *m_avl_def_in; | |
1288 | sbitmap *m_avl_def_out; | |
ec99ffab | 1289 | |
29331e72 | 1290 | /* predictors. */ |
ec99ffab | 1291 | |
29331e72 LD |
1292 | inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED, |
1293 | const vsetvl_info &next ATTRIBUTE_UNUSED) | |
1294 | { | |
1295 | return true; | |
1296 | } | |
1297 | inline bool always_false (const vsetvl_info &prev ATTRIBUTE_UNUSED, | |
1298 | const vsetvl_info &next ATTRIBUTE_UNUSED) | |
1299 | { | |
ec99ffab | 1300 | return false; |
29331e72 LD |
1301 | } |
1302 | ||
1303 | /* predictors for sew and lmul */ | |
1304 | ||
1305 | inline bool lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1306 | { | |
1307 | return prev.get_vlmul () == next.get_vlmul (); | |
1308 | } | |
1309 | inline bool sew_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1310 | { | |
1311 | return prev.get_sew () == next.get_sew (); | |
1312 | } | |
1313 | inline bool sew_lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1314 | { | |
1315 | return lmul_eq_p (prev, next) && sew_eq_p (prev, next); | |
1316 | } | |
1317 | inline bool sew_ge_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1318 | { | |
1319 | return prev.get_sew () == next.get_sew () | |
1320 | || (next.get_ta () && prev.get_sew () > next.get_sew ()); | |
1321 | } | |
1322 | inline bool sew_le_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1323 | { | |
1324 | return prev.get_sew () == next.get_sew () | |
1325 | || (prev.get_ta () && prev.get_sew () < next.get_sew ()); | |
1326 | } | |
1327 | inline bool prev_sew_le_next_max_sew_p (const vsetvl_info &prev, | |
1328 | const vsetvl_info &next) | |
1329 | { | |
1330 | return prev.get_sew () <= next.get_max_sew (); | |
1331 | } | |
1332 | inline bool next_sew_le_prev_max_sew_p (const vsetvl_info &prev, | |
1333 | const vsetvl_info &next) | |
1334 | { | |
1335 | return next.get_sew () <= prev.get_max_sew (); | |
1336 | } | |
1337 | inline bool max_sew_overlap_p (const vsetvl_info &prev, | |
1338 | const vsetvl_info &next) | |
1339 | { | |
1340 | return !(prev.get_sew () > next.get_max_sew () | |
1341 | || next.get_sew () > prev.get_max_sew ()); | |
1342 | } | |
1343 | inline bool ratio_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1344 | { | |
1345 | return prev.has_same_ratio (next); | |
1346 | } | |
1347 | inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev, | |
1348 | const vsetvl_info &next) | |
1349 | { | |
1350 | return prev.get_ratio () >= (next.get_sew () / 8); | |
1351 | } | |
1352 | inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev, | |
1353 | const vsetvl_info &next) | |
1354 | { | |
1355 | return next.get_ratio () >= (prev.get_sew () / 8); | |
1356 | } | |
1357 | ||
1358 | inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev, | |
1359 | const vsetvl_info &next) | |
1360 | { | |
1361 | return sew_ge_p (prev, next) && ratio_eq_p (prev, next); | |
1362 | } | |
1363 | inline bool sew_ge_and_prev_sew_le_next_max_sew_p (const vsetvl_info &prev, | |
1364 | const vsetvl_info &next) | |
1365 | { | |
1366 | return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next); | |
1367 | } | |
1368 | inline bool | |
1369 | sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p ( | |
1370 | const vsetvl_info &prev, const vsetvl_info &next) | |
1371 | { | |
1372 | return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next) | |
1373 | && next_ratio_valid_for_prev_sew_p (prev, next); | |
1374 | } | |
1375 | inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev, | |
1376 | const vsetvl_info &next) | |
1377 | { | |
1378 | return sew_le_p (prev, next) && next_sew_le_prev_max_sew_p (prev, next); | |
1379 | } | |
1380 | inline bool | |
1381 | max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev, | |
1382 | const vsetvl_info &next) | |
1383 | { | |
1384 | return next_ratio_valid_for_prev_sew_p (prev, next) | |
1385 | && max_sew_overlap_p (prev, next); | |
1386 | } | |
1387 | inline bool | |
1388 | sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info &prev, | |
1389 | const vsetvl_info &next) | |
1390 | { | |
1391 | return sew_le_p (prev, next) && ratio_eq_p (prev, next) | |
1392 | && next_sew_le_prev_max_sew_p (prev, next); | |
1393 | } | |
1394 | inline bool | |
1395 | max_sew_overlap_and_prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev, | |
1396 | const vsetvl_info &next) | |
1397 | { | |
1398 | return prev_ratio_valid_for_next_sew_p (prev, next) | |
1399 | && max_sew_overlap_p (prev, next); | |
1400 | } | |
1401 | inline bool | |
1402 | sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p ( | |
1403 | const vsetvl_info &prev, const vsetvl_info &next) | |
1404 | { | |
1405 | return sew_le_p (prev, next) && prev_ratio_valid_for_next_sew_p (prev, next) | |
1406 | && next_sew_le_prev_max_sew_p (prev, next); | |
1407 | } | |
1408 | inline bool max_sew_overlap_and_ratio_eq_p (const vsetvl_info &prev, | |
1409 | const vsetvl_info &next) | |
1410 | { | |
1411 | return ratio_eq_p (prev, next) && max_sew_overlap_p (prev, next); | |
1412 | } | |
1413 | ||
1414 | /* predictors for tail and mask policy */ | |
1415 | ||
1416 | inline bool tail_policy_eq_p (const vsetvl_info &prev, | |
1417 | const vsetvl_info &next) | |
1418 | { | |
1419 | return prev.get_ta () == next.get_ta (); | |
1420 | } | |
1421 | inline bool mask_policy_eq_p (const vsetvl_info &prev, | |
1422 | const vsetvl_info &next) | |
1423 | { | |
1424 | return prev.get_ma () == next.get_ma (); | |
1425 | } | |
1426 | inline bool tail_mask_policy_eq_p (const vsetvl_info &prev, | |
1427 | const vsetvl_info &next) | |
1428 | { | |
1429 | return tail_policy_eq_p (prev, next) && mask_policy_eq_p (prev, next); | |
1430 | } | |
1431 | ||
1432 | /* predictors for avl */ | |
1433 | ||
1434 | inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info) | |
1435 | { | |
9c16ca93 JZ |
1436 | if (info.has_vl ()) |
1437 | { | |
1438 | if (find_access (i->defs (), REGNO (info.get_vl ()))) | |
1439 | return true; | |
1440 | if (find_access (i->uses (), REGNO (info.get_vl ()))) | |
1441 | { | |
1442 | resource_info resource = full_register (REGNO (info.get_vl ())); | |
1443 | def_lookup dl1 = crtl->ssa->find_def (resource, i); | |
1444 | def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ()); | |
1445 | if (dl1.matching_set () || dl2.matching_set ()) | |
1446 | return true; | |
1447 | /* If their VLs are coming from same def, we still want to fuse | |
1448 | their VSETVL demand info to gain better performance. */ | |
1449 | return dl1.prev_def (i) != dl2.prev_def (i); | |
1450 | } | |
1451 | } | |
1452 | return false; | |
29331e72 LD |
1453 | } |
1454 | inline bool modify_avl_p (insn_info *i, const vsetvl_info &info) | |
1455 | { | |
1456 | return info.has_nonvlmax_reg_avl () | |
1457 | && find_access (i->defs (), REGNO (info.get_avl ())); | |
1458 | } | |
1459 | ||
1460 | inline bool modify_reg_between_p (insn_info *prev_insn, insn_info *curr_insn, | |
1461 | unsigned regno) | |
1462 | { | |
1463 | gcc_assert (prev_insn->compare_with (curr_insn) < 0); | |
1464 | for (insn_info *i = curr_insn->prev_nondebug_insn (); i != prev_insn; | |
1465 | i = i->prev_nondebug_insn ()) | |
1466 | { | |
1467 | // no def of regno | |
1468 | if (find_access (i->defs (), regno)) | |
1469 | return true; | |
1470 | } | |
1471 | return false; | |
1472 | } | |
ec99ffab | 1473 | |
29331e72 LD |
1474 | inline bool reg_avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next) |
1475 | { | |
1476 | if (!prev.has_nonvlmax_reg_avl () || !next.has_nonvlmax_reg_avl ()) | |
1477 | return false; | |
ec99ffab | 1478 | |
29331e72 LD |
1479 | if (same_equiv_note_p (prev.get_avl_def (), next.get_avl_def ())) |
1480 | return true; | |
ec99ffab | 1481 | |
29331e72 LD |
1482 | if (REGNO (prev.get_avl ()) != REGNO (next.get_avl ())) |
1483 | return false; | |
ec99ffab | 1484 | |
29331e72 LD |
1485 | insn_info *prev_insn = prev.get_insn (); |
1486 | if (prev.get_bb () != prev_insn->bb ()) | |
1487 | prev_insn = prev.get_bb ()->end_insn (); | |
ec99ffab | 1488 | |
29331e72 LD |
1489 | insn_info *next_insn = next.get_insn (); |
1490 | if (next.get_bb () != next_insn->bb ()) | |
1491 | next_insn = next.get_bb ()->end_insn (); | |
ec99ffab | 1492 | |
29331e72 LD |
1493 | return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false); |
1494 | } | |
ec99ffab | 1495 | |
29331e72 LD |
1496 | inline bool avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next) |
1497 | { | |
1498 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
ec99ffab | 1499 | |
4cd4c34a | 1500 | if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ()) |
29331e72 | 1501 | return false; |
e030af3e | 1502 | |
29331e72 LD |
1503 | if (vector_config_insn_p (prev.get_insn ()->rtl ()) && next.get_avl_def () |
1504 | && next.get_avl_def ()->insn () == prev.get_insn ()) | |
1505 | return true; | |
e030af3e | 1506 | |
29331e72 LD |
1507 | if (prev.get_read_vl_insn ()) |
1508 | { | |
1509 | if (!next.has_nonvlmax_reg_avl () || !next.get_avl_def ()) | |
1510 | return false; | |
1511 | insn_info *avl_def_insn = extract_single_source (next.get_avl_def ()); | |
1512 | return avl_def_insn == prev.get_read_vl_insn (); | |
1513 | } | |
1514 | ||
1515 | if (prev == next && prev.has_nonvlmax_reg_avl ()) | |
1516 | { | |
1517 | insn_info *insn = prev.get_insn (); | |
1518 | bb_info *bb = insn->bb (); | |
1519 | for (insn_info *i = insn; real_insn_and_same_bb_p (i, bb); | |
1520 | i = i->next_nondebug_insn ()) | |
1521 | if (find_access (i->defs (), REGNO (prev.get_avl ()))) | |
e030af3e | 1522 | return false; |
29331e72 | 1523 | } |
60bd33bc | 1524 | |
29331e72 LD |
1525 | if (prev.has_vlmax_avl () && next.has_vlmax_avl ()) |
1526 | return true; | |
1527 | else if (prev.has_imm_avl () && next.has_imm_avl ()) | |
1528 | return INTVAL (prev.get_avl ()) == INTVAL (next.get_avl ()); | |
1529 | else if (prev.has_vl () && next.has_nonvlmax_reg_avl () | |
1530 | && REGNO (prev.get_vl ()) == REGNO (next.get_avl ())) | |
1531 | { | |
1532 | insn_info *prev_insn = prev.insn_inside_bb_p () | |
1533 | ? prev.get_insn () | |
1534 | : prev.get_bb ()->end_insn (); | |
1535 | ||
1536 | insn_info *next_insn = next.insn_inside_bb_p () | |
1537 | ? next.get_insn () | |
1538 | : next.get_bb ()->end_insn (); | |
1539 | return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false); | |
1540 | } | |
1541 | else if (prev.has_nonvlmax_reg_avl () && next.has_nonvlmax_reg_avl ()) | |
1542 | return reg_avl_equal_p (prev, next); | |
e030af3e | 1543 | |
e030af3e | 1544 | return false; |
29331e72 LD |
1545 | } |
1546 | inline bool avl_equal_or_prev_avl_non_zero_p (const vsetvl_info &prev, | |
1547 | const vsetvl_info &next) | |
1548 | { | |
1549 | return avl_equal_p (prev, next) || prev.has_non_zero_avl (); | |
1550 | } | |
1551 | ||
1552 | inline bool can_use_next_avl_p (const vsetvl_info &prev, | |
1553 | const vsetvl_info &next) | |
1554 | { | |
0c4bd132 JZ |
1555 | /* Forbid the AVL/VL propagation if VL of NEXT is used |
1556 | by non-RVV instructions. This is because: | |
1557 | ||
1558 | bb 2: | |
1559 | PREV: scalar move (no AVL) | |
1560 | bb 3: | |
1561 | NEXT: vsetvl a5(VL), a4(AVL) ... | |
1562 | branch a5,zero | |
1563 | ||
1564 | Since user vsetvl instruction is no side effect instruction | |
1565 | which should be placed in the correct and optimal location | |
1566 | of the program by the previous PASS, it is unreasonable that | |
1567 | VSETVL PASS tries to move it to another places if it used by | |
1568 | non-RVV instructions. | |
1569 | ||
1570 | Note: We only forbid the cases that VL is used by the following | |
1571 | non-RVV instructions which will cause issues. We don't forbid | |
1572 | other cases since it won't cause correctness issues and we still | |
1573 | more demand info are fused backward. The later LCM algorithm | |
1574 | should know the optimal location of the vsetvl. */ | |
1575 | if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ()) | |
1576 | return false; | |
1577 | ||
29331e72 LD |
1578 | if (!next.has_nonvlmax_reg_avl () && !next.has_vl ()) |
1579 | return true; | |
e030af3e | 1580 | |
29331e72 LD |
1581 | insn_info *prev_insn = prev.get_insn (); |
1582 | if (prev.get_bb () != prev_insn->bb ()) | |
1583 | prev_insn = prev.get_bb ()->end_insn (); | |
1584 | ||
1585 | insn_info *next_insn = next.get_insn (); | |
1586 | if (next.get_bb () != next_insn->bb ()) | |
1587 | next_insn = next.get_bb ()->end_insn (); | |
1588 | ||
1589 | return avl_vl_unmodified_between_p (prev_insn, next_insn, next); | |
1590 | } | |
1591 | ||
1592 | inline bool avl_equal_or_next_avl_non_zero_and_can_use_next_avl_p ( | |
1593 | const vsetvl_info &prev, const vsetvl_info &next) | |
1594 | { | |
1595 | return avl_equal_p (prev, next) | |
1596 | || (next.has_non_zero_avl () && can_use_next_avl_p (prev, next)); | |
1597 | } | |
1598 | ||
1599 | /* modifiers */ | |
1600 | ||
1601 | inline void nop (const vsetvl_info &prev ATTRIBUTE_UNUSED, | |
1602 | const vsetvl_info &next ATTRIBUTE_UNUSED) | |
1603 | {} | |
1604 | ||
1605 | /* modifiers for sew and lmul */ | |
1606 | ||
1607 | inline void use_min_of_max_sew (vsetvl_info &prev, const vsetvl_info &next) | |
1608 | { | |
1609 | prev.set_max_sew (MIN (prev.get_max_sew (), next.get_max_sew ())); | |
1610 | } | |
1611 | inline void use_next_sew (vsetvl_info &prev, const vsetvl_info &next) | |
1612 | { | |
1613 | prev.set_sew (next.get_sew ()); | |
1614 | use_min_of_max_sew (prev, next); | |
1615 | } | |
1616 | inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next) | |
1617 | { | |
1618 | auto max_sew = std::max (prev.get_sew (), next.get_sew ()); | |
1619 | prev.set_sew (max_sew); | |
1620 | use_min_of_max_sew (prev, next); | |
1621 | } | |
1622 | inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next) | |
1623 | { | |
1624 | use_next_sew (prev, next); | |
1625 | prev.set_vlmul (next.get_vlmul ()); | |
1626 | prev.set_ratio (next.get_ratio ()); | |
1627 | } | |
1628 | inline void use_next_sew_with_prev_ratio (vsetvl_info &prev, | |
1629 | const vsetvl_info &next) | |
1630 | { | |
1631 | use_next_sew (prev, next); | |
1632 | prev.set_vlmul (calculate_vlmul (next.get_sew (), prev.get_ratio ())); | |
1633 | } | |
1634 | inline void modify_lmul_with_next_ratio (vsetvl_info &prev, | |
1635 | const vsetvl_info &next) | |
1636 | { | |
1637 | prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ())); | |
1638 | prev.set_ratio (next.get_ratio ()); | |
1639 | } | |
1640 | ||
1641 | inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev, | |
1642 | const vsetvl_info &next) | |
1643 | { | |
1644 | prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ())); | |
1645 | use_max_sew (prev, next); | |
1646 | prev.set_ratio (next.get_ratio ()); | |
1647 | } | |
1648 | ||
1649 | inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info &prev, | |
1650 | const vsetvl_info &next) | |
1651 | { | |
1652 | auto max_sew = std::max (prev.get_sew (), next.get_sew ()); | |
1653 | prev.set_vlmul (calculate_vlmul (max_sew, prev.get_ratio ())); | |
1654 | prev.set_sew (max_sew); | |
1655 | } | |
1656 | ||
1657 | /* modifiers for tail and mask policy */ | |
1658 | ||
1659 | inline void use_tail_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1660 | { | |
1661 | if (!next.get_ta ()) | |
1662 | prev.set_ta (next.get_ta ()); | |
1663 | } | |
1664 | inline void use_mask_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1665 | { | |
1666 | if (!next.get_ma ()) | |
1667 | prev.set_ma (next.get_ma ()); | |
1668 | } | |
1669 | inline void use_tail_mask_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1670 | { | |
1671 | use_tail_policy (prev, next); | |
1672 | use_mask_policy (prev, next); | |
1673 | } | |
1674 | ||
1675 | /* modifiers for avl */ | |
1676 | ||
1677 | inline void use_next_avl (vsetvl_info &prev, const vsetvl_info &next) | |
1678 | { | |
1679 | gcc_assert (can_use_next_avl_p (prev, next)); | |
1680 | prev.update_avl (next); | |
1681 | } | |
1682 | ||
1683 | inline void use_next_avl_when_not_equal (vsetvl_info &prev, | |
1684 | const vsetvl_info &next) | |
1685 | { | |
1686 | if (avl_equal_p (prev, next)) | |
1687 | return; | |
1688 | gcc_assert (next.has_non_zero_avl ()); | |
1689 | use_next_avl (prev, next); | |
1690 | } | |
e030af3e | 1691 | |
29331e72 LD |
1692 | public: |
1693 | demand_system () : m_avl_def_in (nullptr), m_avl_def_out (nullptr) {} | |
1694 | ||
1695 | void set_avl_in_out_data (sbitmap *m_avl_def_in, sbitmap *m_avl_def_out) | |
1696 | { | |
1697 | m_avl_def_in = m_avl_def_in; | |
1698 | m_avl_def_out = m_avl_def_out; | |
1699 | } | |
1700 | ||
1701 | /* Can we move vsetvl info between prev_insn and next_insn safe? */ | |
1702 | bool avl_vl_unmodified_between_p (insn_info *prev_insn, insn_info *next_insn, | |
1703 | const vsetvl_info &info, | |
1704 | bool ignore_vl = false) | |
1705 | { | |
1706 | gcc_assert ((ignore_vl && info.has_nonvlmax_reg_avl ()) | |
1707 | || (info.has_nonvlmax_reg_avl () || info.has_vl ())); | |
1708 | ||
1709 | gcc_assert (!prev_insn->is_debug_insn () && !next_insn->is_debug_insn ()); | |
1710 | if (prev_insn->bb () == next_insn->bb () | |
1711 | && prev_insn->compare_with (next_insn) < 0) | |
1712 | { | |
1713 | for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn; | |
1714 | i = i->prev_nondebug_insn ()) | |
1715 | { | |
9c16ca93 | 1716 | // no def and use of vl |
29331e72 LD |
1717 | if (!ignore_vl && modify_or_use_vl_p (i, info)) |
1718 | return false; | |
e030af3e | 1719 | |
29331e72 LD |
1720 | // no def of avl |
1721 | if (modify_avl_p (i, info)) | |
1722 | return false; | |
1723 | } | |
1724 | return true; | |
1725 | } | |
1726 | else | |
1727 | { | |
1728 | if (!ignore_vl && info.has_vl ()) | |
1729 | { | |
1730 | bitmap live_out = df_get_live_out (prev_insn->bb ()->cfg_bb ()); | |
1731 | if (bitmap_bit_p (live_out, REGNO (info.get_vl ()))) | |
1732 | return false; | |
1733 | } | |
a2d12abe | 1734 | |
29331e72 LD |
1735 | if (info.has_nonvlmax_reg_avl () && m_avl_def_in && m_avl_def_out) |
1736 | { | |
1737 | bool has_avl_out = false; | |
1738 | unsigned regno = REGNO (info.get_avl ()); | |
1739 | unsigned expr_id; | |
1740 | sbitmap_iterator sbi; | |
1741 | EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[prev_insn->bb ()->index ()], | |
1742 | 0, expr_id, sbi) | |
1743 | { | |
1744 | if (get_regno (expr_id, last_basic_block_for_fn (cfun)) | |
1745 | != regno) | |
1746 | continue; | |
1747 | has_avl_out = true; | |
1748 | if (!bitmap_bit_p (m_avl_def_in[next_insn->bb ()->index ()], | |
1749 | expr_id)) | |
1750 | return false; | |
1751 | } | |
1752 | if (!has_avl_out) | |
1753 | return false; | |
1754 | } | |
12b23c71 | 1755 | |
29331e72 LD |
1756 | for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn (); |
1757 | i = i->prev_nondebug_insn ()) | |
1758 | { | |
1759 | // no def amd use of vl | |
1760 | if (!ignore_vl && modify_or_use_vl_p (i, info)) | |
1761 | return false; | |
9243c3d1 | 1762 | |
29331e72 LD |
1763 | // no def of avl |
1764 | if (modify_avl_p (i, info)) | |
1765 | return false; | |
1766 | } | |
6b6b9c68 | 1767 | |
29331e72 LD |
1768 | for (insn_info *i = prev_insn->bb ()->end_insn (); i != prev_insn; |
1769 | i = i->prev_nondebug_insn ()) | |
1770 | { | |
1771 | // no def amd use of vl | |
1772 | if (!ignore_vl && modify_or_use_vl_p (i, info)) | |
1773 | return false; | |
1774 | ||
1775 | // no def of avl | |
1776 | if (modify_avl_p (i, info)) | |
1777 | return false; | |
1778 | } | |
1779 | } | |
d875d756 | 1780 | return true; |
29331e72 LD |
1781 | } |
1782 | ||
1783 | bool sew_lmul_compatible_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1784 | { | |
1785 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1786 | sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand (); | |
1787 | sew_lmul_demand_type next_flags = next.get_sew_lmul_demand (); | |
1788 | #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1789 | AVAILABLE_P, FUSE) \ | |
1790 | if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \ | |
1791 | && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \ | |
1792 | return COMPATIBLE_P (prev, next); | |
6b6b9c68 | 1793 | |
29331e72 | 1794 | #include "riscv-vsetvl.def" |
6b6b9c68 | 1795 | |
29331e72 LD |
1796 | gcc_unreachable (); |
1797 | } | |
6b6b9c68 | 1798 | |
29331e72 LD |
1799 | bool sew_lmul_available_p (const vsetvl_info &prev, const vsetvl_info &next) |
1800 | { | |
1801 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1802 | sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand (); | |
1803 | sew_lmul_demand_type next_flags = next.get_sew_lmul_demand (); | |
1804 | #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1805 | AVAILABLE_P, FUSE) \ | |
1806 | if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \ | |
1807 | && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \ | |
1808 | return AVAILABLE_P (prev, next); | |
d875d756 | 1809 | |
29331e72 | 1810 | #include "riscv-vsetvl.def" |
4f673c5e | 1811 | |
29331e72 LD |
1812 | gcc_unreachable (); |
1813 | } | |
1814 | ||
1815 | void merge_sew_lmul (vsetvl_info &prev, const vsetvl_info &next) | |
1816 | { | |
1817 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1818 | sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand (); | |
1819 | sew_lmul_demand_type next_flags = next.get_sew_lmul_demand (); | |
1820 | #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1821 | AVAILABLE_P, FUSE) \ | |
1822 | if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \ | |
1823 | && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \ | |
1824 | { \ | |
1825 | gcc_assert (COMPATIBLE_P (prev, next)); \ | |
1826 | FUSE (prev, next); \ | |
1827 | prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \ | |
1828 | return; \ | |
1829 | } | |
9243c3d1 | 1830 | |
29331e72 | 1831 | #include "riscv-vsetvl.def" |
9243c3d1 | 1832 | |
29331e72 LD |
1833 | gcc_unreachable (); |
1834 | } | |
9243c3d1 | 1835 | |
29331e72 LD |
1836 | bool policy_compatible_p (const vsetvl_info &prev, const vsetvl_info &next) |
1837 | { | |
1838 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1839 | policy_demand_type prev_flags = prev.get_policy_demand (); | |
1840 | policy_demand_type next_flags = next.get_policy_demand (); | |
1841 | #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1842 | AVAILABLE_P, FUSE) \ | |
1843 | if (prev_flags == policy_demand_type::PREV_FLAGS \ | |
1844 | && next_flags == policy_demand_type::NEXT_FLAGS) \ | |
1845 | return COMPATIBLE_P (prev, next); | |
9243c3d1 | 1846 | |
29331e72 | 1847 | #include "riscv-vsetvl.def" |
9243c3d1 | 1848 | |
29331e72 LD |
1849 | gcc_unreachable (); |
1850 | } | |
4f673c5e | 1851 | |
29331e72 LD |
1852 | bool policy_available_p (const vsetvl_info &prev, const vsetvl_info &next) |
1853 | { | |
1854 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1855 | policy_demand_type prev_flags = prev.get_policy_demand (); | |
1856 | policy_demand_type next_flags = next.get_policy_demand (); | |
1857 | #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1858 | AVAILABLE_P, FUSE) \ | |
1859 | if (prev_flags == policy_demand_type::PREV_FLAGS \ | |
1860 | && next_flags == policy_demand_type::NEXT_FLAGS) \ | |
1861 | return AVAILABLE_P (prev, next); | |
4f673c5e | 1862 | |
29331e72 | 1863 | #include "riscv-vsetvl.def" |
9243c3d1 | 1864 | |
29331e72 LD |
1865 | gcc_unreachable (); |
1866 | } | |
1867 | ||
1868 | void merge_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1869 | { | |
1870 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1871 | policy_demand_type prev_flags = prev.get_policy_demand (); | |
1872 | policy_demand_type next_flags = next.get_policy_demand (); | |
1873 | #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1874 | AVAILABLE_P, FUSE) \ | |
1875 | if (prev_flags == policy_demand_type::PREV_FLAGS \ | |
1876 | && next_flags == policy_demand_type::NEXT_FLAGS) \ | |
1877 | { \ | |
1878 | gcc_assert (COMPATIBLE_P (prev, next)); \ | |
1879 | FUSE (prev, next); \ | |
1880 | prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \ | |
1881 | return; \ | |
1882 | } | |
9243c3d1 | 1883 | |
29331e72 | 1884 | #include "riscv-vsetvl.def" |
ec99ffab | 1885 | |
29331e72 LD |
1886 | gcc_unreachable (); |
1887 | } | |
9243c3d1 | 1888 | |
29331e72 LD |
1889 | bool avl_compatible_p (const vsetvl_info &prev, const vsetvl_info &next) |
1890 | { | |
1891 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1892 | avl_demand_type prev_flags = prev.get_avl_demand (); | |
1893 | avl_demand_type next_flags = next.get_avl_demand (); | |
1894 | #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1895 | AVAILABLE_P, FUSE) \ | |
1896 | if (prev_flags == avl_demand_type::PREV_FLAGS \ | |
1897 | && next_flags == avl_demand_type::NEXT_FLAGS) \ | |
1898 | return COMPATIBLE_P (prev, next); | |
9243c3d1 | 1899 | |
29331e72 | 1900 | #include "riscv-vsetvl.def" |
9243c3d1 | 1901 | |
29331e72 LD |
1902 | gcc_unreachable (); |
1903 | } | |
9243c3d1 | 1904 | |
29331e72 LD |
1905 | bool avl_available_p (const vsetvl_info &prev, const vsetvl_info &next) |
1906 | { | |
1907 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1908 | avl_demand_type prev_flags = prev.get_avl_demand (); | |
1909 | avl_demand_type next_flags = next.get_avl_demand (); | |
1910 | #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1911 | AVAILABLE_P, FUSE) \ | |
1912 | if (prev_flags == avl_demand_type::PREV_FLAGS \ | |
1913 | && next_flags == avl_demand_type::NEXT_FLAGS) \ | |
1914 | return AVAILABLE_P (prev, next); | |
9243c3d1 | 1915 | |
29331e72 | 1916 | #include "riscv-vsetvl.def" |
9243c3d1 | 1917 | |
29331e72 LD |
1918 | gcc_unreachable (); |
1919 | } | |
1920 | ||
1921 | void merge_avl (vsetvl_info &prev, const vsetvl_info &next) | |
1922 | { | |
1923 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1924 | avl_demand_type prev_flags = prev.get_avl_demand (); | |
1925 | avl_demand_type next_flags = next.get_avl_demand (); | |
1926 | #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1927 | AVAILABLE_P, FUSE) \ | |
1928 | if (prev_flags == avl_demand_type::PREV_FLAGS \ | |
1929 | && next_flags == avl_demand_type::NEXT_FLAGS) \ | |
1930 | { \ | |
1931 | gcc_assert (COMPATIBLE_P (prev, next)); \ | |
1932 | FUSE (prev, next); \ | |
1933 | prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \ | |
1934 | return; \ | |
60bd33bc JZZ |
1935 | } |
1936 | ||
29331e72 | 1937 | #include "riscv-vsetvl.def" |
9243c3d1 | 1938 | |
29331e72 LD |
1939 | gcc_unreachable (); |
1940 | } | |
1941 | ||
1942 | bool compatible_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1943 | { | |
1944 | bool compatible_p = sew_lmul_compatible_p (prev, next) | |
1945 | && policy_compatible_p (prev, next) | |
1946 | && avl_compatible_p (prev, next); | |
1947 | return compatible_p; | |
1948 | } | |
1949 | ||
1950 | bool available_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1951 | { | |
1952 | bool available_p = sew_lmul_available_p (prev, next) | |
1953 | && policy_available_p (prev, next) | |
1954 | && avl_available_p (prev, next); | |
1955 | gcc_assert (!available_p || compatible_p (prev, next)); | |
1956 | return available_p; | |
1957 | } | |
1958 | ||
1959 | void merge (vsetvl_info &prev, const vsetvl_info &next) | |
1960 | { | |
1961 | gcc_assert (compatible_p (prev, next)); | |
1962 | merge_sew_lmul (prev, next); | |
1963 | merge_policy (prev, next); | |
1964 | merge_avl (prev, next); | |
1965 | gcc_assert (available_p (prev, next)); | |
1966 | } | |
1967 | }; | |
9243c3d1 | 1968 | |
9243c3d1 | 1969 | |
29331e72 | 1970 | class pre_vsetvl |
9243c3d1 | 1971 | { |
29331e72 LD |
1972 | private: |
1973 | demand_system m_dem; | |
1974 | auto_vec<vsetvl_block_info> m_vector_block_infos; | |
1975 | ||
1976 | /* data for avl reaching defintion. */ | |
1977 | sbitmap m_avl_regs; | |
1978 | sbitmap *m_avl_def_in; | |
1979 | sbitmap *m_avl_def_out; | |
1980 | sbitmap *m_reg_def_loc; | |
1981 | ||
1982 | /* data for vsetvl info reaching defintion. */ | |
1983 | vsetvl_info m_unknow_info; | |
1984 | auto_vec<vsetvl_info *> m_vsetvl_def_exprs; | |
1985 | sbitmap *m_vsetvl_def_in; | |
1986 | sbitmap *m_vsetvl_def_out; | |
1987 | ||
1988 | /* data for lcm */ | |
1989 | auto_vec<vsetvl_info *> m_exprs; | |
1990 | sbitmap *m_avloc; | |
1991 | sbitmap *m_avin; | |
1992 | sbitmap *m_avout; | |
1993 | sbitmap *m_kill; | |
1994 | sbitmap *m_antloc; | |
1995 | sbitmap *m_transp; | |
1996 | sbitmap *m_insert; | |
1997 | sbitmap *m_del; | |
1998 | struct edge_list *m_edges; | |
1999 | ||
2000 | auto_vec<vsetvl_info> m_delete_list; | |
2001 | ||
2002 | vsetvl_block_info &get_block_info (const bb_info *bb) | |
2003 | { | |
2004 | return m_vector_block_infos[bb->index ()]; | |
2005 | } | |
2006 | const vsetvl_block_info &get_block_info (const basic_block bb) const | |
2007 | { | |
2008 | return m_vector_block_infos[bb->index]; | |
2009 | } | |
2010 | ||
2011 | vsetvl_block_info &get_block_info (const basic_block bb) | |
2012 | { | |
2013 | return m_vector_block_infos[bb->index]; | |
2014 | } | |
2015 | ||
2016 | void add_expr (auto_vec<vsetvl_info *> &m_exprs, vsetvl_info &info) | |
2017 | { | |
2018 | for (vsetvl_info *item : m_exprs) | |
2019 | { | |
2020 | if (*item == info) | |
2021 | return; | |
2022 | } | |
2023 | m_exprs.safe_push (&info); | |
2024 | } | |
2025 | ||
2026 | unsigned get_expr_index (auto_vec<vsetvl_info *> &m_exprs, | |
2027 | const vsetvl_info &info) | |
2028 | { | |
2029 | for (size_t i = 0; i < m_exprs.length (); i += 1) | |
2030 | { | |
2031 | if (*m_exprs[i] == info) | |
2032 | return i; | |
2033 | } | |
2034 | gcc_unreachable (); | |
2035 | } | |
2036 | ||
2037 | bool anticpatable_exp_p (const vsetvl_info &header_info) | |
2038 | { | |
2039 | if (!header_info.has_nonvlmax_reg_avl () && !header_info.has_vl ()) | |
2040 | return true; | |
9243c3d1 | 2041 | |
29331e72 LD |
2042 | bb_info *bb = header_info.get_bb (); |
2043 | insn_info *prev_insn = bb->head_insn (); | |
2044 | insn_info *next_insn = header_info.insn_inside_bb_p () | |
2045 | ? header_info.get_insn () | |
2046 | : header_info.get_bb ()->end_insn (); | |
2047 | ||
2048 | return m_dem.avl_vl_unmodified_between_p (prev_insn, next_insn, | |
2049 | header_info); | |
2050 | } | |
2051 | ||
2052 | bool available_exp_p (const vsetvl_info &prev_info, | |
2053 | const vsetvl_info &next_info) | |
2054 | { | |
2055 | return m_dem.available_p (prev_info, next_info); | |
2056 | } | |
2057 | ||
2058 | void compute_probabilities () | |
2059 | { | |
2060 | edge e; | |
2061 | edge_iterator ei; | |
2062 | ||
2063 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2064 | { | |
2065 | basic_block cfg_bb = bb->cfg_bb (); | |
2066 | auto &curr_prob = get_block_info (cfg_bb).probability; | |
2067 | ||
2068 | /* GCC assume entry block (bb 0) are always so | |
2069 | executed so set its probability as "always". */ | |
2070 | if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) | |
2071 | curr_prob = profile_probability::always (); | |
2072 | /* Exit block (bb 1) is the block we don't need to process. */ | |
2073 | if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) | |
2074 | continue; | |
9243c3d1 | 2075 | |
29331e72 LD |
2076 | gcc_assert (curr_prob.initialized_p ()); |
2077 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
2078 | { | |
2079 | auto &new_prob = get_block_info (e->dest).probability; | |
2080 | /* Normally, the edge probability should be initialized. | |
2081 | However, some special testing code which is written in | |
2082 | GIMPLE IR style force the edge probility uninitialized, | |
2083 | we conservatively set it as never so that it will not | |
2084 | affect PRE (Phase 3 && Phse 4). */ | |
2085 | if (!e->probability.initialized_p ()) | |
2086 | new_prob = profile_probability::never (); | |
2087 | else if (!new_prob.initialized_p ()) | |
2088 | new_prob = curr_prob * e->probability; | |
2089 | else if (new_prob == profile_probability::always ()) | |
2090 | continue; | |
2091 | else | |
2092 | new_prob += curr_prob * e->probability; | |
2093 | } | |
2094 | } | |
2095 | } | |
2096 | ||
2097 | void insert_vsetvl_insn (enum emit_type emit_type, const vsetvl_info &info) | |
2098 | { | |
2099 | rtx pat = info.get_vsetvl_pat (); | |
2100 | rtx_insn *rinsn = info.get_insn ()->rtl (); | |
2101 | ||
2102 | if (emit_type == EMIT_DIRECT) | |
2103 | { | |
2104 | emit_insn (pat); | |
2105 | if (dump_file) | |
2106 | { | |
2107 | fprintf (dump_file, " Insert vsetvl insn %d:\n", | |
2108 | INSN_UID (get_last_insn ())); | |
2109 | print_rtl_single (dump_file, get_last_insn ()); | |
2110 | } | |
2111 | } | |
2112 | else if (emit_type == EMIT_BEFORE) | |
2113 | { | |
2114 | emit_insn_before (pat, rinsn); | |
2115 | if (dump_file) | |
2116 | { | |
2117 | fprintf (dump_file, " Insert vsetvl insn before insn %d:\n", | |
2118 | INSN_UID (rinsn)); | |
2119 | print_rtl_single (dump_file, PREV_INSN (rinsn)); | |
2120 | } | |
2121 | } | |
2122 | else | |
2123 | { | |
2124 | emit_insn_after (pat, rinsn); | |
2125 | if (dump_file) | |
2126 | { | |
2127 | fprintf (dump_file, " Insert vsetvl insn after insn %d:\n", | |
2128 | INSN_UID (rinsn)); | |
2129 | print_rtl_single (dump_file, NEXT_INSN (rinsn)); | |
2130 | } | |
2131 | } | |
2132 | } | |
2133 | ||
2134 | void change_vsetvl_insn (const vsetvl_info &info) | |
2135 | { | |
2136 | rtx_insn *rinsn = info.get_insn ()->rtl (); | |
2137 | rtx new_pat = info.get_vsetvl_pat (); | |
2138 | ||
2139 | if (dump_file) | |
2140 | { | |
2141 | fprintf (dump_file, " Change insn %d from:\n", INSN_UID (rinsn)); | |
2142 | print_rtl_single (dump_file, rinsn); | |
2143 | } | |
2144 | ||
2145 | validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false); | |
2146 | ||
2147 | if (dump_file) | |
2148 | { | |
2149 | fprintf (dump_file, "\n to:\n"); | |
2150 | print_rtl_single (dump_file, rinsn); | |
2151 | } | |
2152 | } | |
2153 | ||
2154 | void remove_vsetvl_insn (const vsetvl_info &info) | |
2155 | { | |
2156 | rtx_insn *rinsn = info.get_insn ()->rtl (); | |
2157 | if (dump_file) | |
2158 | { | |
2159 | fprintf (dump_file, " Eliminate insn %d:\n", INSN_UID (rinsn)); | |
2160 | print_rtl_single (dump_file, rinsn); | |
2161 | } | |
2162 | if (in_sequence_p ()) | |
2163 | remove_insn (rinsn); | |
2164 | else | |
2165 | delete_insn (rinsn); | |
2166 | } | |
2167 | ||
2168 | bool successors_probability_equal_p (const basic_block cfg_bb) const | |
2169 | { | |
2170 | edge e; | |
2171 | edge_iterator ei; | |
2172 | profile_probability prob = profile_probability::uninitialized (); | |
2173 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
2174 | { | |
2175 | if (prob == profile_probability::uninitialized ()) | |
2176 | prob = m_vector_block_infos[e->dest->index].probability; | |
2177 | else if (prob == m_vector_block_infos[e->dest->index].probability) | |
2178 | continue; | |
2179 | else | |
2180 | /* We pick the highest probability among those incompatible VSETVL | |
2181 | infos. When all incompatible VSTEVL infos have same probability, we | |
2182 | don't pick any of them. */ | |
2183 | return false; | |
2184 | } | |
ec99ffab | 2185 | return true; |
29331e72 LD |
2186 | } |
2187 | ||
923a67f1 | 2188 | bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info) |
29331e72 LD |
2189 | { |
2190 | gcc_assert ( | |
2191 | !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()])); | |
2192 | ||
2193 | unsigned expr_index; | |
2194 | sbitmap_iterator sbi; | |
2195 | EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[curr_info.get_bb ()->index ()], 0, | |
2196 | expr_index, sbi) | |
2197 | { | |
2198 | const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index]; | |
2199 | if (!prev_info.valid_p () | |
923a67f1 JZ |
2200 | || !m_dem.avl_available_p (prev_info, curr_info) |
2201 | || prev_info.get_ratio () != curr_info.get_ratio ()) | |
29331e72 LD |
2202 | return false; |
2203 | } | |
005fad9d | 2204 | |
005fad9d | 2205 | return true; |
29331e72 | 2206 | } |
005fad9d | 2207 | |
29331e72 LD |
2208 | public: |
2209 | pre_vsetvl () | |
2210 | : m_avl_def_in (nullptr), m_avl_def_out (nullptr), | |
2211 | m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr), | |
2212 | m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr), | |
2213 | m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr) | |
2214 | { | |
2215 | /* Initialization of RTL_SSA. */ | |
2216 | calculate_dominance_info (CDI_DOMINATORS); | |
2217 | df_analyze (); | |
2218 | crtl->ssa = new function_info (cfun); | |
2219 | m_vector_block_infos.safe_grow_cleared (last_basic_block_for_fn (cfun)); | |
2220 | compute_probabilities (); | |
2221 | m_unknow_info.set_unknown (); | |
2222 | } | |
2223 | ||
2224 | void finish () | |
2225 | { | |
2226 | free_dominance_info (CDI_DOMINATORS); | |
2227 | if (crtl->ssa->perform_pending_updates ()) | |
2228 | cleanup_cfg (0); | |
2229 | delete crtl->ssa; | |
2230 | crtl->ssa = nullptr; | |
2231 | ||
2232 | if (m_avl_regs) | |
2233 | sbitmap_free (m_avl_regs); | |
2234 | if (m_reg_def_loc) | |
2235 | sbitmap_vector_free (m_reg_def_loc); | |
2236 | ||
2237 | if (m_avl_def_in) | |
2238 | sbitmap_vector_free (m_avl_def_in); | |
2239 | if (m_avl_def_out) | |
2240 | sbitmap_vector_free (m_avl_def_out); | |
2241 | ||
2242 | if (m_vsetvl_def_in) | |
2243 | sbitmap_vector_free (m_vsetvl_def_in); | |
2244 | if (m_vsetvl_def_out) | |
2245 | sbitmap_vector_free (m_vsetvl_def_out); | |
2246 | ||
2247 | if (m_avloc) | |
2248 | sbitmap_vector_free (m_avloc); | |
2249 | if (m_kill) | |
2250 | sbitmap_vector_free (m_kill); | |
2251 | if (m_antloc) | |
2252 | sbitmap_vector_free (m_antloc); | |
2253 | if (m_transp) | |
2254 | sbitmap_vector_free (m_transp); | |
2255 | if (m_insert) | |
2256 | sbitmap_vector_free (m_insert); | |
2257 | if (m_del) | |
2258 | sbitmap_vector_free (m_del); | |
2259 | if (m_avin) | |
2260 | sbitmap_vector_free (m_avin); | |
2261 | if (m_avout) | |
2262 | sbitmap_vector_free (m_avout); | |
2263 | ||
2264 | if (m_edges) | |
2265 | free_edge_list (m_edges); | |
2266 | } | |
2267 | ||
2268 | void compute_avl_def_data (); | |
2269 | void compute_vsetvl_def_data (); | |
2270 | void compute_lcm_local_properties (); | |
2271 | ||
2272 | void fuse_local_vsetvl_info (); | |
2273 | bool earliest_fuse_vsetvl_info (); | |
2274 | void pre_global_vsetvl_info (); | |
2275 | void emit_vsetvl (); | |
2276 | void cleaup (); | |
2277 | void remove_avl_operand (); | |
2278 | void remove_unused_dest_operand (); | |
2279 | ||
2280 | void dump (FILE *file, const char *title) const | |
2281 | { | |
2282 | fprintf (file, "\nVSETVL infos after %s\n\n", title); | |
2283 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2284 | { | |
2285 | const auto &block_info = m_vector_block_infos[bb->index ()]; | |
2286 | fprintf (file, " bb %d:\n", bb->index ()); | |
2287 | fprintf (file, " probability: "); | |
2288 | block_info.probability.dump (file); | |
2289 | fprintf (file, "\n"); | |
2290 | if (!block_info.empty_p ()) | |
2291 | { | |
2292 | fprintf (file, " Header vsetvl info:"); | |
2293 | block_info.get_entry_info ().dump (file, " "); | |
2294 | fprintf (file, " Footer vsetvl info:"); | |
2295 | block_info.get_exit_info ().dump (file, " "); | |
4fd09aed | 2296 | for (const auto &info : block_info.local_infos) |
29331e72 LD |
2297 | { |
2298 | fprintf (file, | |
2299 | " insn %d vsetvl info:", info.get_insn ()->uid ()); | |
2300 | info.dump (file, " "); | |
2301 | } | |
2302 | } | |
2303 | } | |
2304 | } | |
2305 | }; | |
c139f5e1 | 2306 | |
e030af3e | 2307 | void |
29331e72 | 2308 | pre_vsetvl::compute_avl_def_data () |
e030af3e | 2309 | { |
29331e72 LD |
2310 | if (bitmap_empty_p (m_avl_regs)) |
2311 | return; | |
e030af3e | 2312 | |
29331e72 LD |
2313 | unsigned num_regs = GP_REG_LAST + 1; |
2314 | unsigned num_bbs = last_basic_block_for_fn (cfun); | |
9243c3d1 | 2315 | |
29331e72 LD |
2316 | sbitmap *avl_def_loc_temp = sbitmap_vector_alloc (num_bbs, num_regs); |
2317 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
9243c3d1 | 2318 | { |
29331e72 LD |
2319 | bitmap_and (avl_def_loc_temp[bb->index ()], m_avl_regs, |
2320 | m_reg_def_loc[bb->index ()]); | |
2321 | ||
2322 | vsetvl_block_info &block_info = get_block_info (bb); | |
2323 | if (block_info.has_info ()) | |
9243c3d1 | 2324 | { |
29331e72 LD |
2325 | vsetvl_info &footer_info = block_info.get_exit_info (); |
2326 | gcc_assert (footer_info.valid_p ()); | |
2327 | if (footer_info.has_vl ()) | |
2328 | bitmap_set_bit (avl_def_loc_temp[bb->index ()], | |
2329 | REGNO (footer_info.get_vl ())); | |
9243c3d1 JZZ |
2330 | } |
2331 | } | |
9243c3d1 | 2332 | |
29331e72 LD |
2333 | if (m_avl_def_in) |
2334 | sbitmap_vector_free (m_avl_def_in); | |
2335 | if (m_avl_def_out) | |
2336 | sbitmap_vector_free (m_avl_def_out); | |
9243c3d1 | 2337 | |
29331e72 LD |
2338 | unsigned num_exprs = num_bbs * num_regs; |
2339 | sbitmap *avl_def_loc = sbitmap_vector_alloc (num_bbs, num_exprs); | |
2340 | sbitmap *m_kill = sbitmap_vector_alloc (num_bbs, num_exprs); | |
2341 | m_avl_def_in = sbitmap_vector_alloc (num_bbs, num_exprs); | |
2342 | m_avl_def_out = sbitmap_vector_alloc (num_bbs, num_exprs); | |
9243c3d1 | 2343 | |
29331e72 LD |
2344 | bitmap_vector_clear (avl_def_loc, num_bbs); |
2345 | bitmap_vector_clear (m_kill, num_bbs); | |
2346 | bitmap_vector_clear (m_avl_def_out, num_bbs); | |
2347 | ||
2348 | unsigned regno; | |
2349 | sbitmap_iterator sbi; | |
2350 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2351 | EXECUTE_IF_SET_IN_BITMAP (avl_def_loc_temp[bb->index ()], 0, regno, sbi) | |
2352 | { | |
2353 | bitmap_set_bit (avl_def_loc[bb->index ()], | |
2354 | get_expr_id (bb->index (), regno, num_bbs)); | |
2355 | bitmap_set_range (m_kill[bb->index ()], regno * num_bbs, num_bbs); | |
2356 | } | |
2357 | ||
2358 | basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
2359 | EXECUTE_IF_SET_IN_BITMAP (m_avl_regs, 0, regno, sbi) | |
2360 | bitmap_set_bit (m_avl_def_out[entry->index], | |
2361 | get_expr_id (entry->index, regno, num_bbs)); | |
2362 | ||
2363 | compute_reaching_defintion (avl_def_loc, m_kill, m_avl_def_in, m_avl_def_out); | |
2364 | ||
2365 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
9243c3d1 | 2366 | { |
29331e72 LD |
2367 | fprintf (dump_file, |
2368 | " Compute avl reaching defition data (num_bbs %d, num_regs " | |
2369 | "%d):\n\n", | |
2370 | num_bbs, num_regs); | |
2371 | fprintf (dump_file, " avl_regs: "); | |
2372 | dump_bitmap_file (dump_file, m_avl_regs); | |
2373 | fprintf (dump_file, "\n bitmap data:\n"); | |
2374 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
9243c3d1 | 2375 | { |
29331e72 LD |
2376 | unsigned int i = bb->index (); |
2377 | fprintf (dump_file, " BB %u:\n", i); | |
2378 | fprintf (dump_file, " avl_def_loc:"); | |
2379 | unsigned expr_id; | |
2380 | sbitmap_iterator sbi; | |
2381 | EXECUTE_IF_SET_IN_BITMAP (avl_def_loc[i], 0, expr_id, sbi) | |
ec99ffab | 2382 | { |
29331e72 LD |
2383 | fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs), |
2384 | get_bb_index (expr_id, num_bbs)); | |
2385 | } | |
2386 | fprintf (dump_file, "\n kill:"); | |
2387 | EXECUTE_IF_SET_IN_BITMAP (m_kill[i], 0, expr_id, sbi) | |
2388 | { | |
2389 | fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs), | |
2390 | get_bb_index (expr_id, num_bbs)); | |
2391 | } | |
2392 | fprintf (dump_file, "\n avl_def_in:"); | |
2393 | EXECUTE_IF_SET_IN_BITMAP (m_avl_def_in[i], 0, expr_id, sbi) | |
2394 | { | |
2395 | fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs), | |
2396 | get_bb_index (expr_id, num_bbs)); | |
2397 | } | |
2398 | fprintf (dump_file, "\n avl_def_out:"); | |
2399 | EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[i], 0, expr_id, sbi) | |
2400 | { | |
2401 | fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs), | |
2402 | get_bb_index (expr_id, num_bbs)); | |
ec99ffab | 2403 | } |
29331e72 | 2404 | fprintf (dump_file, "\n"); |
9243c3d1 JZZ |
2405 | } |
2406 | } | |
2407 | ||
29331e72 LD |
2408 | sbitmap_vector_free (avl_def_loc); |
2409 | sbitmap_vector_free (m_kill); | |
2410 | sbitmap_vector_free (avl_def_loc_temp); | |
9243c3d1 | 2411 | |
29331e72 | 2412 | m_dem.set_avl_in_out_data (m_avl_def_in, m_avl_def_out); |
9243c3d1 JZZ |
2413 | } |
2414 | ||
9243c3d1 | 2415 | void |
29331e72 | 2416 | pre_vsetvl::compute_vsetvl_def_data () |
9243c3d1 | 2417 | { |
29331e72 LD |
2418 | m_vsetvl_def_exprs.truncate (0); |
2419 | add_expr (m_vsetvl_def_exprs, m_unknow_info); | |
2420 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
9243c3d1 | 2421 | { |
29331e72 LD |
2422 | vsetvl_block_info &block_info = get_block_info (bb); |
2423 | if (block_info.empty_p ()) | |
2424 | continue; | |
2425 | vsetvl_info &footer_info = block_info.get_exit_info (); | |
2426 | gcc_assert (footer_info.valid_p () || footer_info.unknown_p ()); | |
2427 | add_expr (m_vsetvl_def_exprs, footer_info); | |
9243c3d1 JZZ |
2428 | } |
2429 | ||
29331e72 LD |
2430 | if (m_vsetvl_def_in) |
2431 | sbitmap_vector_free (m_vsetvl_def_in); | |
2432 | if (m_vsetvl_def_out) | |
2433 | sbitmap_vector_free (m_vsetvl_def_out); | |
9243c3d1 | 2434 | |
29331e72 LD |
2435 | sbitmap *def_loc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
2436 | m_vsetvl_def_exprs.length ()); | |
2437 | sbitmap *m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2438 | m_vsetvl_def_exprs.length ()); | |
9243c3d1 | 2439 | |
29331e72 LD |
2440 | m_vsetvl_def_in = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
2441 | m_vsetvl_def_exprs.length ()); | |
2442 | m_vsetvl_def_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2443 | m_vsetvl_def_exprs.length ()); | |
9243c3d1 | 2444 | |
29331e72 LD |
2445 | bitmap_vector_clear (def_loc, last_basic_block_for_fn (cfun)); |
2446 | bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun)); | |
2447 | bitmap_vector_clear (m_vsetvl_def_out, last_basic_block_for_fn (cfun)); | |
9243c3d1 | 2448 | |
29331e72 LD |
2449 | for (const bb_info *bb : crtl->ssa->bbs ()) |
2450 | { | |
2451 | vsetvl_block_info &block_info = get_block_info (bb); | |
2452 | if (block_info.empty_p ()) | |
9243c3d1 | 2453 | { |
29331e72 | 2454 | for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i += 1) |
9243c3d1 | 2455 | { |
29331e72 LD |
2456 | const vsetvl_info &info = *m_vsetvl_def_exprs[i]; |
2457 | if (!info.has_nonvlmax_reg_avl ()) | |
2458 | continue; | |
2459 | unsigned int regno; | |
2460 | sbitmap_iterator sbi; | |
2461 | EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0, regno, | |
2462 | sbi) | |
2463 | if (regno == REGNO (info.get_avl ())) | |
2464 | { | |
2465 | bitmap_set_bit (m_kill[bb->index ()], i); | |
2466 | bitmap_set_bit (def_loc[bb->index ()], | |
2467 | get_expr_index (m_vsetvl_def_exprs, | |
2468 | m_unknow_info)); | |
2469 | } | |
9243c3d1 | 2470 | } |
29331e72 | 2471 | continue; |
9243c3d1 JZZ |
2472 | } |
2473 | ||
29331e72 LD |
2474 | vsetvl_info &footer_info = block_info.get_exit_info (); |
2475 | bitmap_ones (m_kill[bb->index ()]); | |
2476 | bitmap_set_bit (def_loc[bb->index ()], | |
2477 | get_expr_index (m_vsetvl_def_exprs, footer_info)); | |
9243c3d1 JZZ |
2478 | } |
2479 | ||
29331e72 LD |
2480 | /* Set the def_out of the ENTRY basic block to m_unknow_info expr. */ |
2481 | basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
2482 | bitmap_set_bit (m_vsetvl_def_out[entry->index], | |
2483 | get_expr_index (m_vsetvl_def_exprs, m_unknow_info)); | |
9243c3d1 | 2484 | |
29331e72 LD |
2485 | compute_reaching_defintion (def_loc, m_kill, m_vsetvl_def_in, |
2486 | m_vsetvl_def_out); | |
2487 | ||
2488 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
e030af3e | 2489 | { |
29331e72 LD |
2490 | fprintf (dump_file, |
2491 | "\n Compute vsetvl info reaching defition data:\n\n"); | |
2492 | fprintf (dump_file, " Expression List (%d):\n", | |
2493 | m_vsetvl_def_exprs.length ()); | |
2494 | for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i++) | |
2495 | { | |
2496 | const auto &info = *m_vsetvl_def_exprs[i]; | |
2497 | fprintf (dump_file, " Expr[%u]: ", i); | |
2498 | info.dump (dump_file, " "); | |
2499 | } | |
2500 | fprintf (dump_file, "\n bitmap data:\n"); | |
2501 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2502 | { | |
2503 | unsigned int i = bb->index (); | |
2504 | fprintf (dump_file, " BB %u:\n", i); | |
2505 | fprintf (dump_file, " def_loc: "); | |
2506 | dump_bitmap_file (dump_file, def_loc[i]); | |
2507 | fprintf (dump_file, " kill: "); | |
2508 | dump_bitmap_file (dump_file, m_kill[i]); | |
2509 | fprintf (dump_file, " vsetvl_def_in: "); | |
2510 | dump_bitmap_file (dump_file, m_vsetvl_def_in[i]); | |
2511 | fprintf (dump_file, " vsetvl_def_out: "); | |
2512 | dump_bitmap_file (dump_file, m_vsetvl_def_out[i]); | |
2513 | } | |
e030af3e | 2514 | } |
4f673c5e | 2515 | |
29331e72 | 2516 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2517 | { |
29331e72 LD |
2518 | vsetvl_block_info &block_info = get_block_info (bb); |
2519 | if (block_info.empty_p ()) | |
2520 | continue; | |
2521 | vsetvl_info &curr_info = block_info.get_entry_info (); | |
2522 | if (!curr_info.valid_p ()) | |
2523 | continue; | |
2524 | ||
2525 | unsigned int expr_index; | |
2526 | sbitmap_iterator sbi; | |
2527 | gcc_assert ( | |
2528 | !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()])); | |
2529 | bool full_available = true; | |
2530 | EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[bb->index ()], 0, expr_index, | |
2531 | sbi) | |
4f673c5e | 2532 | { |
29331e72 LD |
2533 | vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index]; |
2534 | if (!prev_info.valid_p () | |
2535 | || !m_dem.available_p (prev_info, curr_info)) | |
2536 | { | |
2537 | full_available = false; | |
2538 | break; | |
2539 | } | |
4f673c5e | 2540 | } |
29331e72 | 2541 | block_info.full_available = full_available; |
4f673c5e | 2542 | } |
29331e72 LD |
2543 | |
2544 | sbitmap_vector_free (def_loc); | |
2545 | sbitmap_vector_free (m_kill); | |
e030af3e | 2546 | } |
9243c3d1 | 2547 | |
e030af3e | 2548 | /* Compute the local properties of each recorded expression. |
6b6b9c68 | 2549 | |
e030af3e JZ |
2550 | Local properties are those that are defined by the block, irrespective of |
2551 | other blocks. | |
6b6b9c68 | 2552 | |
e030af3e JZ |
2553 | An expression is transparent in a block if its operands are not modified |
2554 | in the block. | |
6b6b9c68 | 2555 | |
e030af3e JZ |
2556 | An expression is computed (locally available) in a block if it is computed |
2557 | at least once and expression would contain the same value if the | |
2558 | computation was moved to the end of the block. | |
2559 | ||
2560 | An expression is locally anticipatable in a block if it is computed at | |
2561 | least once and expression would contain the same value if the computation | |
2562 | was moved to the beginning of the block. */ | |
2563 | void | |
29331e72 | 2564 | pre_vsetvl::compute_lcm_local_properties () |
6b6b9c68 | 2565 | { |
29331e72 LD |
2566 | m_exprs.truncate (0); |
2567 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2568 | { | |
2569 | vsetvl_block_info &block_info = get_block_info (bb); | |
2570 | if (block_info.empty_p ()) | |
2571 | continue; | |
2572 | vsetvl_info &header_info = block_info.get_entry_info (); | |
2573 | vsetvl_info &footer_info = block_info.get_exit_info (); | |
2574 | gcc_assert (footer_info.valid_p () || footer_info.unknown_p ()); | |
2575 | add_expr (m_exprs, header_info); | |
2576 | add_expr (m_exprs, footer_info); | |
2577 | } | |
2578 | ||
2579 | int num_exprs = m_exprs.length (); | |
2580 | if (m_avloc) | |
2581 | sbitmap_vector_free (m_avloc); | |
2582 | if (m_kill) | |
2583 | sbitmap_vector_free (m_kill); | |
2584 | if (m_antloc) | |
2585 | sbitmap_vector_free (m_antloc); | |
2586 | if (m_transp) | |
2587 | sbitmap_vector_free (m_transp); | |
2588 | if (m_avin) | |
2589 | sbitmap_vector_free (m_avin); | |
2590 | if (m_avout) | |
2591 | sbitmap_vector_free (m_avout); | |
2592 | ||
2593 | m_avloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2594 | m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2595 | m_antloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2596 | m_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2597 | m_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2598 | m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2599 | ||
2600 | bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun)); | |
2601 | bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun)); | |
2602 | bitmap_vector_clear (m_transp, last_basic_block_for_fn (cfun)); | |
2603 | ||
e030af3e JZ |
2604 | /* - If T is locally available at the end of a block, then T' must be |
2605 | available at the end of the same block. Since some optimization has | |
2606 | occurred earlier, T' might not be locally available, however, it must | |
2607 | have been previously computed on all paths. As a formula, T at AVLOC(B) | |
2608 | implies that T' at AVOUT(B). | |
2609 | An "available occurrence" is one that is the last occurrence in the | |
2610 | basic block and the operands are not modified by following statements in | |
2611 | the basic block [including this insn]. | |
6b6b9c68 | 2612 | |
e030af3e JZ |
2613 | - If T is locally anticipated at the beginning of a block, then either |
2614 | T', is locally anticipated or it is already available from previous | |
2615 | blocks. As a formula, this means that T at ANTLOC(B) implies that T' at | |
2616 | ANTLOC(B) at AVIN(B). | |
2617 | An "anticipatable occurrence" is one that is the first occurrence in the | |
2618 | basic block, the operands are not modified in the basic block prior | |
2619 | to the occurrence and the output is not used between the start of | |
2620 | the block and the occurrence. */ | |
e030af3e | 2621 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2622 | { |
29331e72 LD |
2623 | unsigned bb_index = bb->index (); |
2624 | vsetvl_block_info &block_info = get_block_info (bb); | |
9243c3d1 | 2625 | |
29331e72 LD |
2626 | /* Compute m_transp */ |
2627 | if (block_info.empty_p ()) | |
9243c3d1 | 2628 | { |
29331e72 LD |
2629 | bitmap_ones (m_transp[bb_index]); |
2630 | for (int i = 0; i < num_exprs; i += 1) | |
4f673c5e | 2631 | { |
29331e72 LD |
2632 | const vsetvl_info &info = *m_exprs[i]; |
2633 | if (!info.has_nonvlmax_reg_avl () && !info.has_vl ()) | |
2634 | continue; | |
2635 | ||
7b2984ad | 2636 | if (info.has_nonvlmax_reg_avl ()) |
29331e72 | 2637 | { |
7b2984ad JZ |
2638 | unsigned int regno; |
2639 | sbitmap_iterator sbi; | |
2640 | EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0, | |
2641 | regno, sbi) | |
2642 | { | |
2643 | if (regno == REGNO (info.get_avl ())) | |
2644 | bitmap_clear_bit (m_transp[bb->index ()], i); | |
2645 | } | |
29331e72 LD |
2646 | } |
2647 | ||
e030af3e JZ |
2648 | for (const insn_info *insn : bb->real_nondebug_insns ()) |
2649 | { | |
9c16ca93 JZ |
2650 | if (info.has_nonvlmax_reg_avl () |
2651 | && find_access (insn->defs (), REGNO (info.get_avl ()))) | |
e030af3e | 2652 | { |
29331e72 | 2653 | bitmap_clear_bit (m_transp[bb_index], i); |
e030af3e JZ |
2654 | break; |
2655 | } | |
2656 | } | |
4f673c5e | 2657 | } |
9243c3d1 | 2658 | |
29331e72 | 2659 | continue; |
9243c3d1 | 2660 | } |
e030af3e | 2661 | |
29331e72 LD |
2662 | vsetvl_info &header_info = block_info.get_entry_info (); |
2663 | vsetvl_info &footer_info = block_info.get_exit_info (); | |
9243c3d1 | 2664 | |
29331e72 LD |
2665 | if (header_info.valid_p () |
2666 | && (anticpatable_exp_p (header_info) || block_info.full_available)) | |
2667 | bitmap_set_bit (m_antloc[bb_index], | |
2668 | get_expr_index (m_exprs, header_info)); | |
9243c3d1 | 2669 | |
29331e72 LD |
2670 | if (footer_info.valid_p ()) |
2671 | for (int i = 0; i < num_exprs; i += 1) | |
2672 | { | |
2673 | const vsetvl_info &info = *m_exprs[i]; | |
2674 | if (!info.valid_p ()) | |
2675 | continue; | |
2676 | if (available_exp_p (footer_info, info)) | |
2677 | bitmap_set_bit (m_avloc[bb_index], i); | |
2678 | } | |
2679 | } | |
9243c3d1 | 2680 | |
29331e72 | 2681 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2682 | { |
29331e72 LD |
2683 | unsigned bb_index = bb->index (); |
2684 | bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]); | |
2685 | bitmap_not (m_kill[bb_index], m_kill[bb_index]); | |
9243c3d1 JZZ |
2686 | } |
2687 | ||
29331e72 | 2688 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2689 | { |
29331e72 | 2690 | unsigned bb_index = bb->index (); |
9243c3d1 JZZ |
2691 | edge e; |
2692 | edge_iterator ei; | |
29331e72 | 2693 | FOR_EACH_EDGE (e, ei, bb->cfg_bb ()->preds) |
9243c3d1 JZZ |
2694 | if (e->flags & EDGE_COMPLEX) |
2695 | { | |
29331e72 LD |
2696 | bitmap_clear (m_antloc[bb_index]); |
2697 | bitmap_clear (m_transp[bb_index]); | |
9243c3d1 JZZ |
2698 | } |
2699 | } | |
2700 | } | |
2701 | ||
29331e72 LD |
2702 | void |
2703 | pre_vsetvl::fuse_local_vsetvl_info () | |
e030af3e | 2704 | { |
29331e72 LD |
2705 | m_reg_def_loc |
2706 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), GP_REG_LAST + 1); | |
2707 | bitmap_vector_clear (m_reg_def_loc, last_basic_block_for_fn (cfun)); | |
2708 | bitmap_ones (m_reg_def_loc[ENTRY_BLOCK_PTR_FOR_FN (cfun)->index]); | |
2709 | ||
2710 | for (bb_info *bb : crtl->ssa->bbs ()) | |
e030af3e | 2711 | { |
29331e72 | 2712 | auto &block_info = get_block_info (bb); |
4fd09aed | 2713 | block_info.bb = bb; |
29331e72 | 2714 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e030af3e | 2715 | { |
29331e72 LD |
2716 | fprintf (dump_file, " Try fuse basic block %d\n", bb->index ()); |
2717 | } | |
2718 | auto_vec<vsetvl_info> infos; | |
2719 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
2720 | { | |
2721 | vsetvl_info curr_info = vsetvl_info (insn); | |
2722 | if (curr_info.valid_p () || curr_info.unknown_p ()) | |
2723 | infos.safe_push (curr_info); | |
2724 | ||
2725 | /* Collecting GP registers modified by the current bb. */ | |
2726 | if (insn->is_real ()) | |
2727 | for (def_info *def : insn->defs ()) | |
2728 | if (def->is_reg () && GP_REG_P (def->regno ())) | |
2729 | bitmap_set_bit (m_reg_def_loc[bb->index ()], def->regno ()); | |
2730 | } | |
e030af3e | 2731 | |
29331e72 LD |
2732 | vsetvl_info prev_info = vsetvl_info (); |
2733 | prev_info.set_empty (); | |
2734 | for (auto &curr_info : infos) | |
2735 | { | |
2736 | if (prev_info.empty_p ()) | |
2737 | prev_info = curr_info; | |
2738 | else if ((curr_info.unknown_p () && prev_info.valid_p ()) | |
2739 | || (curr_info.valid_p () && prev_info.unknown_p ())) | |
2740 | { | |
4fd09aed | 2741 | block_info.local_infos.safe_push (prev_info); |
29331e72 LD |
2742 | prev_info = curr_info; |
2743 | } | |
2744 | else if (curr_info.valid_p () && prev_info.valid_p ()) | |
2745 | { | |
2746 | if (m_dem.available_p (prev_info, curr_info)) | |
e7b585a4 | 2747 | { |
29331e72 | 2748 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e7b585a4 | 2749 | { |
29331e72 LD |
2750 | fprintf (dump_file, |
2751 | " Ignore curr info since prev info " | |
2752 | "available with it:\n"); | |
2753 | fprintf (dump_file, " prev_info: "); | |
2754 | prev_info.dump (dump_file, " "); | |
2755 | fprintf (dump_file, " curr_info: "); | |
2756 | curr_info.dump (dump_file, " "); | |
2757 | fprintf (dump_file, "\n"); | |
e7b585a4 | 2758 | } |
4cd4c34a | 2759 | if (!curr_info.vl_used_by_non_rvv_insn_p () |
29331e72 LD |
2760 | && vsetvl_insn_p (curr_info.get_insn ()->rtl ())) |
2761 | m_delete_list.safe_push (curr_info); | |
e030af3e | 2762 | |
29331e72 LD |
2763 | if (curr_info.get_read_vl_insn ()) |
2764 | prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ()); | |
e030af3e | 2765 | } |
29331e72 | 2766 | else if (m_dem.compatible_p (prev_info, curr_info)) |
e030af3e | 2767 | { |
29331e72 | 2768 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e030af3e | 2769 | { |
29331e72 LD |
2770 | fprintf (dump_file, " Fuse curr info since prev info " |
2771 | "compatible with it:\n"); | |
2772 | fprintf (dump_file, " prev_info: "); | |
2773 | prev_info.dump (dump_file, " "); | |
2774 | fprintf (dump_file, " curr_info: "); | |
2775 | curr_info.dump (dump_file, " "); | |
e030af3e | 2776 | } |
29331e72 LD |
2777 | m_dem.merge (prev_info, curr_info); |
2778 | if (curr_info.get_read_vl_insn ()) | |
2779 | prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ()); | |
2780 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
e030af3e | 2781 | { |
29331e72 LD |
2782 | fprintf (dump_file, " prev_info after fused: "); |
2783 | prev_info.dump (dump_file, " "); | |
2784 | fprintf (dump_file, "\n"); | |
e030af3e | 2785 | } |
e030af3e JZ |
2786 | } |
2787 | else | |
2788 | { | |
29331e72 LD |
2789 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2790 | { | |
2791 | fprintf (dump_file, | |
2792 | " Cannot fuse uncompatible infos:\n"); | |
2793 | fprintf (dump_file, " prev_info: "); | |
2794 | prev_info.dump (dump_file, " "); | |
2795 | fprintf (dump_file, " curr_info: "); | |
2796 | curr_info.dump (dump_file, " "); | |
2797 | } | |
4fd09aed | 2798 | block_info.local_infos.safe_push (prev_info); |
29331e72 | 2799 | prev_info = curr_info; |
e030af3e JZ |
2800 | } |
2801 | } | |
2802 | } | |
29331e72 LD |
2803 | |
2804 | if (prev_info.valid_p () || prev_info.unknown_p ()) | |
4fd09aed | 2805 | block_info.local_infos.safe_push (prev_info); |
e030af3e | 2806 | } |
e030af3e | 2807 | |
29331e72 LD |
2808 | m_avl_regs = sbitmap_alloc (GP_REG_LAST + 1); |
2809 | bitmap_clear (m_avl_regs); | |
2810 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
e030af3e | 2811 | { |
29331e72 LD |
2812 | vsetvl_block_info &block_info = get_block_info (bb); |
2813 | if (block_info.empty_p ()) | |
2814 | continue; | |
2815 | ||
2816 | vsetvl_info &header_info = block_info.get_entry_info (); | |
2817 | if (header_info.valid_p () && header_info.has_nonvlmax_reg_avl ()) | |
e030af3e | 2818 | { |
29331e72 LD |
2819 | gcc_assert (GP_REG_P (REGNO (header_info.get_avl ()))); |
2820 | bitmap_set_bit (m_avl_regs, REGNO (header_info.get_avl ())); | |
e030af3e | 2821 | } |
e030af3e JZ |
2822 | } |
2823 | } | |
2824 | ||
29331e72 | 2825 | |
9243c3d1 | 2826 | bool |
29331e72 | 2827 | pre_vsetvl::earliest_fuse_vsetvl_info () |
9243c3d1 | 2828 | { |
29331e72 LD |
2829 | compute_avl_def_data (); |
2830 | compute_vsetvl_def_data (); | |
2831 | compute_lcm_local_properties (); | |
9243c3d1 | 2832 | |
29331e72 LD |
2833 | unsigned num_exprs = m_exprs.length (); |
2834 | struct edge_list *m_edges = create_edge_list (); | |
2835 | unsigned num_edges = NUM_EDGES (m_edges); | |
2836 | sbitmap *antin | |
2837 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2838 | sbitmap *antout | |
2839 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
005fad9d | 2840 | |
29331e72 | 2841 | sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs); |
9243c3d1 | 2842 | |
29331e72 LD |
2843 | compute_available (m_avloc, m_kill, m_avout, m_avin); |
2844 | compute_antinout_edge (m_antloc, m_transp, antin, antout); | |
2845 | compute_earliest (m_edges, num_exprs, antin, antout, m_avout, m_kill, | |
2846 | earliest); | |
2847 | ||
2848 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
9243c3d1 | 2849 | { |
29331e72 LD |
2850 | fprintf (dump_file, "\n Compute LCM earliest insert data:\n\n"); |
2851 | fprintf (dump_file, " Expression List (%u):\n", num_exprs); | |
2852 | for (unsigned i = 0; i < num_exprs; i++) | |
9243c3d1 | 2853 | { |
29331e72 LD |
2854 | const auto &info = *m_exprs[i]; |
2855 | fprintf (dump_file, " Expr[%u]: ", i); | |
2856 | info.dump (dump_file, " "); | |
9243c3d1 | 2857 | } |
29331e72 LD |
2858 | fprintf (dump_file, "\n bitmap data:\n"); |
2859 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2860 | { | |
2861 | unsigned int i = bb->index (); | |
2862 | fprintf (dump_file, " BB %u:\n", i); | |
2863 | fprintf (dump_file, " avloc: "); | |
2864 | dump_bitmap_file (dump_file, m_avloc[i]); | |
2865 | fprintf (dump_file, " kill: "); | |
2866 | dump_bitmap_file (dump_file, m_kill[i]); | |
2867 | fprintf (dump_file, " antloc: "); | |
2868 | dump_bitmap_file (dump_file, m_antloc[i]); | |
2869 | fprintf (dump_file, " transp: "); | |
2870 | dump_bitmap_file (dump_file, m_transp[i]); | |
2871 | ||
2872 | fprintf (dump_file, " avin: "); | |
2873 | dump_bitmap_file (dump_file, m_avin[i]); | |
2874 | fprintf (dump_file, " avout: "); | |
2875 | dump_bitmap_file (dump_file, m_avout[i]); | |
2876 | fprintf (dump_file, " antin: "); | |
2877 | dump_bitmap_file (dump_file, antin[i]); | |
2878 | fprintf (dump_file, " antout: "); | |
2879 | dump_bitmap_file (dump_file, antout[i]); | |
2880 | } | |
2881 | fprintf (dump_file, "\n"); | |
2882 | fprintf (dump_file, " earliest:\n"); | |
2883 | for (unsigned ed = 0; ed < num_edges; ed++) | |
2884 | { | |
2885 | edge eg = INDEX_EDGE (m_edges, ed); | |
9243c3d1 | 2886 | |
29331e72 LD |
2887 | if (bitmap_empty_p (earliest[ed])) |
2888 | continue; | |
2889 | fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index, | |
2890 | eg->dest->index); | |
2891 | dump_bitmap_file (dump_file, earliest[ed]); | |
2892 | } | |
2893 | fprintf (dump_file, "\n"); | |
2894 | } | |
9243c3d1 | 2895 | |
29331e72 | 2896 | if (dump_file && (dump_flags & TDF_DETAILS)) |
9243c3d1 | 2897 | { |
29331e72 LD |
2898 | fprintf (dump_file, " Fused global info result:\n"); |
2899 | } | |
9243c3d1 | 2900 | |
29331e72 LD |
2901 | bool changed = false; |
2902 | for (unsigned ed = 0; ed < num_edges; ed++) | |
2903 | { | |
2904 | sbitmap e = earliest[ed]; | |
2905 | if (bitmap_empty_p (e)) | |
9243c3d1 JZZ |
2906 | continue; |
2907 | ||
29331e72 LD |
2908 | unsigned int expr_index; |
2909 | sbitmap_iterator sbi; | |
2910 | EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi) | |
ec99ffab | 2911 | { |
29331e72 LD |
2912 | vsetvl_info &curr_info = *m_exprs[expr_index]; |
2913 | if (!curr_info.valid_p ()) | |
2914 | continue; | |
2915 | ||
2916 | edge eg = INDEX_EDGE (m_edges, ed); | |
2917 | if (eg->probability == profile_probability::never ()) | |
2918 | continue; | |
2919 | if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun) | |
2920 | || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
2921 | continue; | |
ff8f9544 | 2922 | |
29331e72 LD |
2923 | vsetvl_block_info &src_block_info = get_block_info (eg->src); |
2924 | vsetvl_block_info &dest_block_info = get_block_info (eg->dest); | |
ff8f9544 | 2925 | |
29331e72 LD |
2926 | if (src_block_info.probability |
2927 | == profile_probability::uninitialized ()) | |
ff8f9544 | 2928 | continue; |
9243c3d1 | 2929 | |
29331e72 | 2930 | if (src_block_info.empty_p ()) |
9243c3d1 | 2931 | { |
29331e72 LD |
2932 | vsetvl_info new_curr_info = curr_info; |
2933 | new_curr_info.set_bb (crtl->ssa->bb (eg->dest)); | |
2934 | bool has_compatible_p = false; | |
2935 | unsigned int def_expr_index; | |
2936 | sbitmap_iterator sbi2; | |
2937 | EXECUTE_IF_SET_IN_BITMAP ( | |
2938 | m_vsetvl_def_in[new_curr_info.get_bb ()->index ()], 0, | |
2939 | def_expr_index, sbi2) | |
9243c3d1 | 2940 | { |
29331e72 LD |
2941 | vsetvl_info &prev_info = *m_vsetvl_def_exprs[def_expr_index]; |
2942 | if (!prev_info.valid_p ()) | |
2943 | continue; | |
2944 | if (m_dem.compatible_p (prev_info, new_curr_info)) | |
9243c3d1 | 2945 | { |
29331e72 LD |
2946 | has_compatible_p = true; |
2947 | break; | |
9243c3d1 | 2948 | } |
9243c3d1 | 2949 | } |
29331e72 | 2950 | if (!has_compatible_p) |
9243c3d1 | 2951 | { |
29331e72 LD |
2952 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2953 | { | |
2954 | fprintf (dump_file, | |
2955 | " Forbidden lift up vsetvl info into bb %u " | |
2956 | "since there is no vsetvl info that reaching in " | |
2957 | "is compatible with it:", | |
2958 | eg->src->index); | |
2959 | curr_info.dump (dump_file, " "); | |
2960 | } | |
2961 | continue; | |
9243c3d1 JZZ |
2962 | } |
2963 | ||
29331e72 | 2964 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e030af3e JZ |
2965 | { |
2966 | fprintf (dump_file, | |
29331e72 LD |
2967 | " Set empty bb %u to info:", eg->src->index); |
2968 | curr_info.dump (dump_file, " "); | |
e030af3e | 2969 | } |
29331e72 LD |
2970 | src_block_info.set_info (curr_info); |
2971 | src_block_info.probability = dest_block_info.probability; | |
2972 | changed = true; | |
9243c3d1 | 2973 | } |
29331e72 LD |
2974 | else if (src_block_info.has_info ()) |
2975 | { | |
2976 | vsetvl_info &prev_info = src_block_info.get_exit_info (); | |
2977 | gcc_assert (prev_info.valid_p ()); | |
2978 | ||
2979 | if (m_dem.compatible_p (prev_info, curr_info)) | |
2980 | { | |
2981 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2982 | { | |
2983 | fprintf (dump_file, " Fuse curr info since prev info " | |
2984 | "compatible with it:\n"); | |
2985 | fprintf (dump_file, " prev_info: "); | |
2986 | prev_info.dump (dump_file, " "); | |
2987 | fprintf (dump_file, " curr_info: "); | |
2988 | curr_info.dump (dump_file, " "); | |
2989 | } | |
2990 | m_dem.merge (prev_info, curr_info); | |
2991 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
2992 | { | |
2993 | fprintf (dump_file, " prev_info after fused: "); | |
2994 | prev_info.dump (dump_file, " "); | |
2995 | fprintf (dump_file, "\n"); | |
2996 | } | |
2997 | changed = true; | |
2998 | if (src_block_info.has_info ()) | |
2999 | src_block_info.probability += dest_block_info.probability; | |
3000 | } | |
3001 | else if (src_block_info.has_info () | |
3002 | && !m_dem.compatible_p (prev_info, curr_info)) | |
3003 | { | |
3004 | /* Cancel lift up if probabilities are equal. */ | |
3005 | if (successors_probability_equal_p (eg->src)) | |
3006 | { | |
3007 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3008 | { | |
3009 | fprintf (dump_file, | |
3010 | " Change empty bb %u to from:", | |
3011 | eg->src->index); | |
3012 | prev_info.dump (dump_file, " "); | |
3013 | fprintf (dump_file, | |
3014 | " to (higher probability):"); | |
3015 | curr_info.dump (dump_file, " "); | |
3016 | } | |
3017 | src_block_info.set_empty_info (); | |
3018 | src_block_info.probability | |
3019 | = profile_probability::uninitialized (); | |
3020 | changed = true; | |
3021 | } | |
3022 | /* Choose the one with higher probability. */ | |
3023 | else if (dest_block_info.probability | |
3024 | > src_block_info.probability) | |
3025 | { | |
3026 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3027 | { | |
3028 | fprintf (dump_file, | |
3029 | " Change empty bb %u to from:", | |
3030 | eg->src->index); | |
3031 | prev_info.dump (dump_file, " "); | |
3032 | fprintf (dump_file, | |
3033 | " to (higher probability):"); | |
3034 | curr_info.dump (dump_file, " "); | |
3035 | } | |
3036 | src_block_info.set_info (curr_info); | |
3037 | src_block_info.probability = dest_block_info.probability; | |
3038 | changed = true; | |
3039 | } | |
3040 | } | |
3041 | } | |
3042 | else | |
e030af3e | 3043 | { |
29331e72 LD |
3044 | vsetvl_info &prev_info = src_block_info.get_exit_info (); |
3045 | if (!prev_info.valid_p () | |
3046 | || m_dem.available_p (prev_info, curr_info)) | |
3047 | continue; | |
3048 | ||
3049 | if (m_dem.compatible_p (prev_info, curr_info)) | |
3050 | { | |
3051 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3052 | { | |
3053 | fprintf (dump_file, " Fuse curr info since prev info " | |
3054 | "compatible with it:\n"); | |
3055 | fprintf (dump_file, " prev_info: "); | |
3056 | prev_info.dump (dump_file, " "); | |
3057 | fprintf (dump_file, " curr_info: "); | |
3058 | curr_info.dump (dump_file, " "); | |
3059 | } | |
3060 | m_dem.merge (prev_info, curr_info); | |
3061 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3062 | { | |
3063 | fprintf (dump_file, " prev_info after fused: "); | |
3064 | prev_info.dump (dump_file, " "); | |
3065 | fprintf (dump_file, "\n"); | |
3066 | } | |
3067 | changed = true; | |
3068 | } | |
e030af3e | 3069 | } |
9243c3d1 JZZ |
3070 | } |
3071 | } | |
3072 | ||
0d50facd | 3073 | if (dump_file && (dump_flags & TDF_DETAILS)) |
c919d059 | 3074 | { |
29331e72 | 3075 | fprintf (dump_file, "\n"); |
c919d059 | 3076 | } |
c919d059 | 3077 | |
29331e72 LD |
3078 | sbitmap_vector_free (antin); |
3079 | sbitmap_vector_free (antout); | |
3080 | sbitmap_vector_free (earliest); | |
3081 | free_edge_list (m_edges); | |
c919d059 | 3082 | |
29331e72 | 3083 | return changed; |
c919d059 KC |
3084 | } |
3085 | ||
8421f279 | 3086 | void |
29331e72 | 3087 | pre_vsetvl::pre_global_vsetvl_info () |
c919d059 | 3088 | { |
29331e72 LD |
3089 | compute_avl_def_data (); |
3090 | compute_vsetvl_def_data (); | |
3091 | compute_lcm_local_properties (); | |
c919d059 | 3092 | |
29331e72 LD |
3093 | unsigned num_exprs = m_exprs.length (); |
3094 | m_edges = pre_edge_lcm_avs (num_exprs, m_transp, m_avloc, m_antloc, m_kill, | |
3095 | m_avin, m_avout, &m_insert, &m_del); | |
3096 | unsigned num_edges = NUM_EDGES (m_edges); | |
c919d059 | 3097 | |
29331e72 LD |
3098 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3099 | { | |
3100 | fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n"); | |
3101 | fprintf (dump_file, " Expression List (%u):\n", num_exprs); | |
3102 | for (unsigned i = 0; i < num_exprs; i++) | |
c919d059 | 3103 | { |
29331e72 LD |
3104 | const auto &info = *m_exprs[i]; |
3105 | fprintf (dump_file, " Expr[%u]: ", i); | |
3106 | info.dump (dump_file, " "); | |
c919d059 | 3107 | } |
29331e72 LD |
3108 | fprintf (dump_file, "\n bitmap data:\n"); |
3109 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
c919d059 | 3110 | { |
29331e72 LD |
3111 | unsigned i = bb->index (); |
3112 | fprintf (dump_file, " BB %u:\n", i); | |
3113 | fprintf (dump_file, " avloc: "); | |
3114 | dump_bitmap_file (dump_file, m_avloc[i]); | |
3115 | fprintf (dump_file, " kill: "); | |
3116 | dump_bitmap_file (dump_file, m_kill[i]); | |
3117 | fprintf (dump_file, " antloc: "); | |
3118 | dump_bitmap_file (dump_file, m_antloc[i]); | |
3119 | fprintf (dump_file, " transp: "); | |
3120 | dump_bitmap_file (dump_file, m_transp[i]); | |
3121 | ||
3122 | fprintf (dump_file, " avin: "); | |
3123 | dump_bitmap_file (dump_file, m_avin[i]); | |
3124 | fprintf (dump_file, " avout: "); | |
3125 | dump_bitmap_file (dump_file, m_avout[i]); | |
3126 | fprintf (dump_file, " del: "); | |
3127 | dump_bitmap_file (dump_file, m_del[i]); | |
c919d059 | 3128 | } |
29331e72 LD |
3129 | fprintf (dump_file, "\n"); |
3130 | fprintf (dump_file, " insert:\n"); | |
3131 | for (unsigned ed = 0; ed < num_edges; ed++) | |
8421f279 | 3132 | { |
29331e72 | 3133 | edge eg = INDEX_EDGE (m_edges, ed); |
c919d059 | 3134 | |
29331e72 LD |
3135 | if (bitmap_empty_p (m_insert[ed])) |
3136 | continue; | |
3137 | fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index, | |
3138 | eg->dest->index); | |
3139 | dump_bitmap_file (dump_file, m_insert[ed]); | |
c919d059 | 3140 | } |
29331e72 LD |
3141 | } |
3142 | ||
3143 | /* Remove vsetvl infos as LCM suggest */ | |
3144 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3145 | { | |
3146 | sbitmap d = m_del[bb->index ()]; | |
3147 | if (bitmap_count_bits (d) == 0) | |
c919d059 | 3148 | continue; |
29331e72 LD |
3149 | gcc_assert (bitmap_count_bits (d) == 1); |
3150 | unsigned expr_index = bitmap_first_set_bit (d); | |
3151 | vsetvl_info &info = *m_exprs[expr_index]; | |
3152 | gcc_assert (info.valid_p ()); | |
3153 | gcc_assert (info.get_bb () == bb); | |
3154 | const vsetvl_block_info &block_info = get_block_info (info.get_bb ()); | |
3155 | gcc_assert (block_info.get_entry_info () == info); | |
3156 | info.set_delete (); | |
3157 | } | |
c919d059 | 3158 | |
29331e72 LD |
3159 | for (const bb_info *bb : crtl->ssa->bbs ()) |
3160 | { | |
3161 | vsetvl_block_info &block_info = get_block_info (bb); | |
3162 | if (block_info.empty_p ()) | |
3163 | continue; | |
3164 | vsetvl_info &curr_info = block_info.get_entry_info (); | |
3165 | if (curr_info.delete_p ()) | |
c919d059 | 3166 | { |
4fd09aed | 3167 | if (block_info.local_infos.is_empty ()) |
29331e72 | 3168 | continue; |
4fd09aed | 3169 | curr_info = block_info.local_infos[0]; |
c919d059 | 3170 | } |
4cd4c34a | 3171 | if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p () |
923a67f1 | 3172 | && preds_all_same_avl_and_ratio_p (curr_info)) |
29331e72 | 3173 | curr_info.set_change_vtype_only (); |
c919d059 | 3174 | |
29331e72 LD |
3175 | vsetvl_info prev_info = vsetvl_info (); |
3176 | prev_info.set_empty (); | |
4fd09aed | 3177 | for (auto &curr_info : block_info.local_infos) |
c919d059 | 3178 | { |
29331e72 | 3179 | if (prev_info.valid_p () && curr_info.valid_p () |
923a67f1 JZ |
3180 | && m_dem.avl_available_p (prev_info, curr_info) |
3181 | && prev_info.get_ratio () == curr_info.get_ratio ()) | |
29331e72 LD |
3182 | curr_info.set_change_vtype_only (); |
3183 | prev_info = curr_info; | |
c919d059 | 3184 | } |
20c85207 | 3185 | } |
20c85207 JZ |
3186 | } |
3187 | ||
29331e72 LD |
3188 | void |
3189 | pre_vsetvl::emit_vsetvl () | |
20c85207 | 3190 | { |
29331e72 | 3191 | bool need_commit = false; |
20c85207 | 3192 | |
29331e72 | 3193 | for (const bb_info *bb : crtl->ssa->bbs ()) |
20c85207 | 3194 | { |
4fd09aed | 3195 | for (const auto &curr_info : get_block_info (bb).local_infos) |
29331e72 LD |
3196 | { |
3197 | insn_info *insn = curr_info.get_insn (); | |
3198 | if (curr_info.delete_p ()) | |
3199 | { | |
3200 | if (vsetvl_insn_p (insn->rtl ())) | |
3201 | remove_vsetvl_insn (curr_info); | |
3202 | continue; | |
3203 | } | |
3204 | else if (curr_info.valid_p ()) | |
3205 | { | |
3206 | if (vsetvl_insn_p (insn->rtl ())) | |
3207 | { | |
3208 | const vsetvl_info temp = vsetvl_info (insn); | |
3209 | if (!(curr_info == temp)) | |
3210 | { | |
3211 | if (dump_file) | |
3212 | { | |
3213 | fprintf (dump_file, "\n Change vsetvl info from: "); | |
3214 | temp.dump (dump_file, " "); | |
3215 | fprintf (dump_file, " to: "); | |
3216 | curr_info.dump (dump_file, " "); | |
3217 | } | |
3218 | change_vsetvl_insn (curr_info); | |
3219 | } | |
3220 | } | |
3221 | else | |
3222 | { | |
3223 | if (dump_file) | |
3224 | { | |
3225 | fprintf (dump_file, | |
3226 | "\n Insert vsetvl info before insn %d: ", | |
3227 | insn->uid ()); | |
3228 | curr_info.dump (dump_file, " "); | |
3229 | } | |
3230 | insert_vsetvl_insn (EMIT_BEFORE, curr_info); | |
3231 | } | |
3232 | } | |
3233 | } | |
20c85207 | 3234 | } |
20c85207 | 3235 | |
29331e72 | 3236 | for (const vsetvl_info &item : m_delete_list) |
20c85207 | 3237 | { |
29331e72 LD |
3238 | gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ())); |
3239 | remove_vsetvl_insn (item); | |
20c85207 JZ |
3240 | } |
3241 | ||
d1189cee JZ |
3242 | /* Insert vsetvl info that was not deleted after lift up. */ |
3243 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3244 | { | |
3245 | const vsetvl_block_info &block_info = get_block_info (bb); | |
3246 | if (!block_info.has_info ()) | |
3247 | continue; | |
3248 | ||
3249 | const vsetvl_info &footer_info = block_info.get_exit_info (); | |
3250 | ||
3251 | if (footer_info.delete_p ()) | |
3252 | continue; | |
3253 | ||
3254 | edge eg; | |
3255 | edge_iterator eg_iterator; | |
3256 | FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs) | |
3257 | { | |
3258 | gcc_assert (!(eg->flags & EDGE_ABNORMAL)); | |
3259 | if (dump_file) | |
3260 | { | |
3261 | fprintf ( | |
3262 | dump_file, | |
3263 | "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ", | |
3264 | eg->src->index, eg->dest->index); | |
3265 | footer_info.dump (dump_file, " "); | |
3266 | } | |
3267 | start_sequence (); | |
3268 | insert_vsetvl_insn (EMIT_DIRECT, footer_info); | |
3269 | rtx_insn *rinsn = get_insns (); | |
3270 | end_sequence (); | |
3271 | default_rtl_profile (); | |
3272 | insert_insn_on_edge (rinsn, eg); | |
3273 | need_commit = true; | |
3274 | } | |
3275 | } | |
3276 | ||
29331e72 LD |
3277 | /* m_insert vsetvl as LCM suggest. */ |
3278 | for (int ed = 0; ed < NUM_EDGES (m_edges); ed++) | |
20c85207 | 3279 | { |
29331e72 LD |
3280 | edge eg = INDEX_EDGE (m_edges, ed); |
3281 | sbitmap i = m_insert[ed]; | |
3282 | if (bitmap_count_bits (i) < 1) | |
3283 | continue; | |
3284 | ||
3285 | if (bitmap_count_bits (i) > 1) | |
3286 | /* For code with infinite loop (e.g. pr61634.c), The data flow is | |
3287 | completely wrong. */ | |
3288 | continue; | |
3289 | ||
3290 | gcc_assert (bitmap_count_bits (i) == 1); | |
3291 | unsigned expr_index = bitmap_first_set_bit (i); | |
3292 | const vsetvl_info &info = *m_exprs[expr_index]; | |
3293 | gcc_assert (info.valid_p ()); | |
3294 | if (dump_file) | |
20c85207 | 3295 | { |
29331e72 LD |
3296 | fprintf (dump_file, |
3297 | "\n Insert vsetvl info at edge(bb %u -> bb %u): ", | |
3298 | eg->src->index, eg->dest->index); | |
3299 | info.dump (dump_file, " "); | |
20c85207 | 3300 | } |
29331e72 LD |
3301 | rtl_profile_for_edge (eg); |
3302 | start_sequence (); | |
3303 | ||
3304 | insert_vsetvl_insn (EMIT_DIRECT, info); | |
3305 | rtx_insn *rinsn = get_insns (); | |
3306 | end_sequence (); | |
3307 | default_rtl_profile (); | |
3308 | ||
3309 | /* We should not get an abnormal edge here. */ | |
3310 | gcc_assert (!(eg->flags & EDGE_ABNORMAL)); | |
3311 | need_commit = true; | |
3312 | insert_insn_on_edge (rinsn, eg); | |
20c85207 JZ |
3313 | } |
3314 | ||
29331e72 LD |
3315 | if (need_commit) |
3316 | commit_edge_insertions (); | |
20c85207 JZ |
3317 | } |
3318 | ||
9243c3d1 | 3319 | void |
29331e72 | 3320 | pre_vsetvl::cleaup () |
9243c3d1 | 3321 | { |
29331e72 LD |
3322 | remove_avl_operand (); |
3323 | remove_unused_dest_operand (); | |
3324 | } | |
9243c3d1 | 3325 | |
29331e72 LD |
3326 | void |
3327 | pre_vsetvl::remove_avl_operand () | |
3328 | { | |
3329 | basic_block cfg_bb; | |
3330 | rtx_insn *rinsn; | |
3331 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
3332 | FOR_BB_INSNS (cfg_bb, rinsn) | |
3333 | if (NONDEBUG_INSN_P (rinsn) && has_vl_op (rinsn) | |
3334 | && REG_P (get_vl (rinsn))) | |
3335 | { | |
9243c3d1 | 3336 | rtx avl = get_vl (rinsn); |
a2d12abe | 3337 | if (count_regno_occurrences (rinsn, REGNO (avl)) == 1) |
9243c3d1 | 3338 | { |
29331e72 | 3339 | rtx new_pat; |
60bd33bc | 3340 | if (fault_first_load_p (rinsn)) |
29331e72 LD |
3341 | new_pat |
3342 | = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx); | |
60bd33bc JZZ |
3343 | else |
3344 | { | |
3345 | rtx set = single_set (rinsn); | |
3346 | rtx src | |
3347 | = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx); | |
29331e72 LD |
3348 | new_pat = gen_rtx_SET (SET_DEST (set), src); |
3349 | } | |
3350 | if (dump_file) | |
3351 | { | |
3352 | fprintf (dump_file, " Cleanup insn %u's avl operand:\n", | |
3353 | INSN_UID (rinsn)); | |
3354 | print_rtl_single (dump_file, rinsn); | |
60bd33bc | 3355 | } |
29331e72 | 3356 | validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false); |
9243c3d1 JZZ |
3357 | } |
3358 | } | |
20c85207 JZ |
3359 | } |
3360 | ||
6b6b9c68 | 3361 | void |
29331e72 | 3362 | pre_vsetvl::remove_unused_dest_operand () |
20c85207 | 3363 | { |
6b6b9c68 | 3364 | df_analyze (); |
20c85207 JZ |
3365 | basic_block cfg_bb; |
3366 | rtx_insn *rinsn; | |
3367 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
29331e72 LD |
3368 | FOR_BB_INSNS (cfg_bb, rinsn) |
3369 | if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn)) | |
6b6b9c68 | 3370 | { |
29331e72 LD |
3371 | rtx vl = get_vl (rinsn); |
3372 | vsetvl_info info = vsetvl_info (rinsn); | |
3373 | if (has_no_uses (cfg_bb, rinsn, REGNO (vl))) | |
3374 | if (!info.has_vlmax_avl ()) | |
3375 | { | |
3376 | rtx new_pat = info.get_vsetvl_pat (true); | |
3377 | if (dump_file) | |
3378 | { | |
3379 | fprintf (dump_file, | |
3380 | " Remove vsetvl insn %u's dest(vl) operand since " | |
3381 | "it unused:\n", | |
3382 | INSN_UID (rinsn)); | |
3383 | print_rtl_single (dump_file, rinsn); | |
3384 | } | |
3385 | validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, | |
3386 | false); | |
3387 | } | |
6b6b9c68 | 3388 | } |
6b6b9c68 JZZ |
3389 | } |
3390 | ||
29331e72 LD |
3391 | const pass_data pass_data_vsetvl = { |
3392 | RTL_PASS, /* type */ | |
3393 | "vsetvl", /* name */ | |
3394 | OPTGROUP_NONE, /* optinfo_flags */ | |
3395 | TV_NONE, /* tv_id */ | |
3396 | 0, /* properties_required */ | |
3397 | 0, /* properties_provided */ | |
3398 | 0, /* properties_destroyed */ | |
3399 | 0, /* todo_flags_start */ | |
3400 | 0, /* todo_flags_finish */ | |
3401 | }; | |
9243c3d1 | 3402 | |
29331e72 LD |
3403 | class pass_vsetvl : public rtl_opt_pass |
3404 | { | |
3405 | private: | |
3406 | void simple_vsetvl (); | |
3407 | void lazy_vsetvl (); | |
9243c3d1 | 3408 | |
29331e72 LD |
3409 | public: |
3410 | pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {} | |
9243c3d1 | 3411 | |
29331e72 LD |
3412 | /* opt_pass methods: */ |
3413 | virtual bool gate (function *) final override { return TARGET_VECTOR; } | |
3414 | virtual unsigned int execute (function *) final override; | |
3415 | }; // class pass_vsetvl | |
9243c3d1 | 3416 | |
acc10c79 | 3417 | void |
29331e72 | 3418 | pass_vsetvl::simple_vsetvl () |
acc10c79 | 3419 | { |
29331e72 LD |
3420 | if (dump_file) |
3421 | fprintf (dump_file, "\nEntering Simple VSETVL PASS\n"); | |
acc10c79 | 3422 | |
29331e72 LD |
3423 | basic_block cfg_bb; |
3424 | rtx_insn *rinsn; | |
3425 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
acc10c79 | 3426 | { |
29331e72 | 3427 | FOR_BB_INSNS (cfg_bb, rinsn) |
acc10c79 | 3428 | { |
29331e72 | 3429 | if (!NONDEBUG_INSN_P (rinsn)) |
acc10c79 | 3430 | continue; |
29331e72 LD |
3431 | if (has_vtype_op (rinsn)) |
3432 | { | |
3433 | const auto &info = vsetvl_info (rinsn); | |
3434 | rtx pat = info.get_vsetvl_pat (); | |
3435 | emit_insn_before (pat, rinsn); | |
3436 | if (dump_file) | |
3437 | { | |
3438 | fprintf (dump_file, " Insert vsetvl insn before insn %d:\n", | |
3439 | INSN_UID (rinsn)); | |
3440 | print_rtl_single (dump_file, PREV_INSN (rinsn)); | |
3441 | } | |
3442 | } | |
acc10c79 JZZ |
3443 | } |
3444 | } | |
acc10c79 JZZ |
3445 | } |
3446 | ||
9243c3d1 JZZ |
3447 | /* Lazy vsetvl insertion for optimize > 0. */ |
3448 | void | |
29331e72 | 3449 | pass_vsetvl::lazy_vsetvl () |
9243c3d1 JZZ |
3450 | { |
3451 | if (dump_file) | |
29331e72 LD |
3452 | fprintf (dump_file, "\nEntering Lazy VSETVL PASS\n\n"); |
3453 | ||
3454 | pre_vsetvl pre = pre_vsetvl (); | |
9243c3d1 | 3455 | |
9243c3d1 | 3456 | if (dump_file) |
29331e72 LD |
3457 | fprintf (dump_file, "\nPhase 1: Fuse local vsetvl infos.\n\n"); |
3458 | pre.fuse_local_vsetvl_info (); | |
0d50facd | 3459 | if (dump_file && (dump_flags & TDF_DETAILS)) |
29331e72 | 3460 | pre.dump (dump_file, "phase 1"); |
9243c3d1 | 3461 | |
29331e72 | 3462 | /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */ |
9243c3d1 | 3463 | if (dump_file) |
29331e72 LD |
3464 | fprintf (dump_file, "\nPhase 2: Lift up vsetvl info.\n\n"); |
3465 | bool changed; | |
3466 | int fused_count = 0; | |
3467 | do | |
3468 | { | |
3469 | if (dump_file) | |
3470 | fprintf (dump_file, " Try lift up %d.\n\n", fused_count); | |
3471 | changed = pre.earliest_fuse_vsetvl_info (); | |
3472 | fused_count += 1; | |
3473 | } while (changed); | |
3474 | ||
0d50facd | 3475 | if (dump_file && (dump_flags & TDF_DETAILS)) |
29331e72 | 3476 | pre.dump (dump_file, "phase 2"); |
9243c3d1 | 3477 | |
29331e72 | 3478 | /* Phase 3: Reducing redundant vsetvl infos using LCM. */ |
9243c3d1 | 3479 | if (dump_file) |
29331e72 LD |
3480 | fprintf (dump_file, "\nPhase 3: Reduce global vsetvl infos.\n\n"); |
3481 | pre.pre_global_vsetvl_info (); | |
3482 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3483 | pre.dump (dump_file, "phase 3"); | |
9243c3d1 | 3484 | |
29331e72 | 3485 | /* Phase 4: Insert, modify and remove vsetvl insns. */ |
9243c3d1 | 3486 | if (dump_file) |
29331e72 LD |
3487 | fprintf (dump_file, |
3488 | "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n"); | |
3489 | pre.emit_vsetvl (); | |
9243c3d1 | 3490 | |
29331e72 | 3491 | /* Phase 5: Cleaup */ |
9243c3d1 | 3492 | if (dump_file) |
29331e72 LD |
3493 | fprintf (dump_file, "\nPhase 5: Cleaup\n\n"); |
3494 | pre.cleaup (); | |
6b6b9c68 | 3495 | |
29331e72 | 3496 | pre.finish (); |
9243c3d1 JZZ |
3497 | } |
3498 | ||
3499 | /* Main entry point for this pass. */ | |
3500 | unsigned int | |
3501 | pass_vsetvl::execute (function *) | |
3502 | { | |
3503 | if (n_basic_blocks_for_fn (cfun) <= 0) | |
3504 | return 0; | |
3505 | ||
ca8fb009 JZZ |
3506 | /* The RVV instruction may change after split which is not a stable |
3507 | instruction. We need to split it here to avoid potential issue | |
3508 | since the VSETVL PASS is insert before split PASS. */ | |
3509 | split_all_insns (); | |
9243c3d1 JZZ |
3510 | |
3511 | /* Early return for there is no vector instructions. */ | |
3512 | if (!has_vector_insn (cfun)) | |
3513 | return 0; | |
3514 | ||
9243c3d1 JZZ |
3515 | if (!optimize) |
3516 | simple_vsetvl (); | |
3517 | else | |
3518 | lazy_vsetvl (); | |
3519 | ||
9243c3d1 JZZ |
3520 | return 0; |
3521 | } | |
3522 | ||
3523 | rtl_opt_pass * | |
3524 | make_pass_vsetvl (gcc::context *ctxt) | |
3525 | { | |
3526 | return new pass_vsetvl (ctxt); | |
3527 | } |