]>
Commit | Line | Data |
---|---|---|
9243c3d1 | 1 | /* VSETVL pass for RISC-V 'V' Extension for GNU compiler. |
a945c346 | 2 | Copyright (C) 2022-2024 Free Software Foundation, Inc. |
9243c3d1 JZZ |
3 | Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or(at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
29331e72 LD |
21 | /* The values of the vl and vtype registers will affect the behavior of RVV |
22 | insns. That is, when we need to execute an RVV instruction, we need to set | |
23 | the correct vl and vtype values by executing the vsetvl instruction before. | |
24 | Executing the fewest number of vsetvl instructions while keeping the behavior | |
25 | the same is the problem this pass is trying to solve. This vsetvl pass is | |
26 | divided into 5 phases: | |
27 | ||
28 | - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses | |
29 | each instruction in it that affects vl and vtype state and generates an | |
30 | array of vsetvl_info objects. Then traverse the vsetvl_info array from | |
31 | front to back and perform fusion according to the fusion rules. The fused | |
32 | vsetvl infos are stored in the vsetvl_block_info object's `infos` field. | |
33 | ||
34 | - Phase 2 (earliest fuse global vsetvl infos): The header_info and | |
35 | footer_info of vsetvl_block_info are used as expressions, and the | |
36 | earliest of each expression is computed. Based on the earliest | |
37 | information, try to lift up the corresponding vsetvl info to the src | |
38 | basic block of the edge (mainly to reduce the total number of vsetvl | |
39 | instructions, this uplift will cause some execution paths to execute | |
40 | vsetvl instructions that shouldn't be there). | |
41 | ||
42 | - Phase 3 (pre global vsetvl info): The header_info and footer_info of | |
43 | vsetvl_block_info are used as expressions, and the LCM algorithm is used | |
44 | to compute the header_info that needs to be deleted and the one that | |
45 | needs to be inserted in some edges. | |
46 | ||
47 | - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and | |
48 | the deletion and insertion information of Phase 3, the mandatory vsetvl | |
49 | instruction insertion, modification and deletion are performed. | |
50 | ||
51 | - Phase 5 (cleanup): Clean up the avl operand in the RVV operator | |
52 | instruction and cleanup the unused dest operand of the vsetvl insn. | |
53 | ||
54 | After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual | |
55 | basic block is represented by vsetvl_block_info, and the virtual vsetvl | |
56 | statements inside are represented by vsetvl_info. The later phases 2 and 3 | |
57 | are constantly modifying and adjusting this virtual CFG. Phase 4 performs | |
58 | insertion, modification and deletion of vsetvl instructions based on the | |
59 | optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to | |
60 | the RTL. | |
61 | */ | |
9243c3d1 JZZ |
62 | |
63 | #define IN_TARGET_CODE 1 | |
64 | #define INCLUDE_ALGORITHM | |
65 | #define INCLUDE_FUNCTIONAL | |
66 | ||
67 | #include "config.h" | |
68 | #include "system.h" | |
69 | #include "coretypes.h" | |
70 | #include "tm.h" | |
71 | #include "backend.h" | |
72 | #include "rtl.h" | |
73 | #include "target.h" | |
74 | #include "tree-pass.h" | |
75 | #include "df.h" | |
76 | #include "rtl-ssa.h" | |
77 | #include "cfgcleanup.h" | |
78 | #include "insn-config.h" | |
79 | #include "insn-attr.h" | |
80 | #include "insn-opinit.h" | |
81 | #include "tm-constrs.h" | |
82 | #include "cfgrtl.h" | |
83 | #include "cfganal.h" | |
84 | #include "lcm.h" | |
85 | #include "predict.h" | |
86 | #include "profile-count.h" | |
a3ad2301 | 87 | #include "gcse.h" |
9243c3d1 JZZ |
88 | |
89 | using namespace rtl_ssa; | |
90 | using namespace riscv_vector; | |
91 | ||
29331e72 LD |
92 | /* Set the bitmap DST to the union of SRC of predecessors of |
93 | basic block B. | |
94 | It's a bit different from bitmap_union_of_preds in cfganal.cc. This function | |
95 | takes into account the case where pred is ENTRY basic block. The main reason | |
96 | for this difference is to make it easier to insert some special value into | |
97 | the ENTRY base block. For example, vsetvl_info with a status of UNKNOW. */ | |
98 | static void | |
99 | bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b) | |
100 | { | |
101 | unsigned int set_size = dst->size; | |
102 | edge e; | |
103 | unsigned ix; | |
104 | ||
105 | for (ix = 0; ix < EDGE_COUNT (b->preds); ix++) | |
106 | { | |
107 | e = EDGE_PRED (b, ix); | |
108 | bitmap_copy (dst, src[e->src->index]); | |
109 | break; | |
110 | } | |
ec99ffab | 111 | |
29331e72 LD |
112 | if (ix == EDGE_COUNT (b->preds)) |
113 | bitmap_clear (dst); | |
114 | else | |
115 | for (ix++; ix < EDGE_COUNT (b->preds); ix++) | |
116 | { | |
117 | unsigned int i; | |
118 | SBITMAP_ELT_TYPE *p, *r; | |
119 | ||
120 | e = EDGE_PRED (b, ix); | |
121 | p = src[e->src->index]->elms; | |
122 | r = dst->elms; | |
123 | for (i = 0; i < set_size; i++) | |
124 | *r++ |= *p++; | |
125 | } | |
126 | } | |
127 | ||
128 | /* Compute the reaching defintion in and out based on the gen and KILL | |
129 | informations in each Base Blocks. | |
130 | This function references the compute_avaiable implementation in lcm.cc */ | |
131 | static void | |
132 | compute_reaching_defintion (sbitmap *gen, sbitmap *kill, sbitmap *in, | |
133 | sbitmap *out) | |
9243c3d1 | 134 | { |
29331e72 LD |
135 | edge e; |
136 | basic_block *worklist, *qin, *qout, *qend, bb; | |
137 | unsigned int qlen; | |
138 | edge_iterator ei; | |
139 | ||
140 | /* Allocate a worklist array/queue. Entries are only added to the | |
141 | list if they were not already on the list. So the size is | |
142 | bounded by the number of basic blocks. */ | |
143 | qin = qout = worklist | |
144 | = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); | |
145 | ||
146 | /* Put every block on the worklist; this is necessary because of the | |
147 | optimistic initialization of AVOUT above. Use reverse postorder | |
148 | to make the forward dataflow problem require less iterations. */ | |
149 | int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); | |
150 | int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false); | |
151 | for (int i = 0; i < n; ++i) | |
152 | { | |
153 | bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); | |
154 | *qin++ = bb; | |
155 | bb->aux = bb; | |
156 | } | |
157 | free (rpo); | |
158 | ||
159 | qin = worklist; | |
160 | qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; | |
161 | qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; | |
162 | ||
163 | /* Mark blocks which are successors of the entry block so that we | |
164 | can easily identify them below. */ | |
165 | FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) | |
166 | e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
167 | ||
168 | /* Iterate until the worklist is empty. */ | |
169 | while (qlen) | |
170 | { | |
171 | /* Take the first entry off the worklist. */ | |
172 | bb = *qout++; | |
173 | qlen--; | |
174 | ||
175 | if (qout >= qend) | |
176 | qout = worklist; | |
177 | ||
178 | /* Do not clear the aux field for blocks which are successors of the | |
179 | ENTRY block. That way we never add then to the worklist again. */ | |
180 | if (bb->aux != ENTRY_BLOCK_PTR_FOR_FN (cfun)) | |
181 | bb->aux = NULL; | |
182 | ||
183 | bitmap_union_of_preds_with_entry (in[bb->index], out, bb); | |
184 | ||
185 | if (bitmap_ior_and_compl (out[bb->index], gen[bb->index], in[bb->index], | |
186 | kill[bb->index])) | |
187 | /* If the out state of this block changed, then we need | |
188 | to add the successors of this block to the worklist | |
189 | if they are not already on the worklist. */ | |
190 | FOR_EACH_EDGE (e, ei, bb->succs) | |
191 | if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
192 | { | |
193 | *qin++ = e->dest; | |
194 | e->dest->aux = e; | |
195 | qlen++; | |
196 | ||
197 | if (qin >= qend) | |
198 | qin = worklist; | |
199 | } | |
200 | } | |
201 | ||
202 | clear_aux_for_edges (); | |
203 | clear_aux_for_blocks (); | |
204 | free (worklist); | |
9243c3d1 JZZ |
205 | } |
206 | ||
29331e72 LD |
207 | /* Classification of vsetvl instruction. */ |
208 | enum vsetvl_type | |
9243c3d1 | 209 | { |
29331e72 LD |
210 | VSETVL_NORMAL, |
211 | VSETVL_VTYPE_CHANGE_ONLY, | |
212 | VSETVL_DISCARD_RESULT, | |
213 | NUM_VSETVL_TYPE | |
214 | }; | |
9243c3d1 | 215 | |
29331e72 | 216 | enum emit_type |
9243c3d1 | 217 | { |
29331e72 LD |
218 | /* emit_insn directly. */ |
219 | EMIT_DIRECT, | |
220 | EMIT_BEFORE, | |
221 | EMIT_AFTER, | |
222 | }; | |
223 | ||
224 | /* dump helper functions */ | |
225 | static const char * | |
226 | vlmul_to_str (vlmul_type vlmul) | |
227 | { | |
228 | switch (vlmul) | |
229 | { | |
230 | case LMUL_1: | |
231 | return "m1"; | |
232 | case LMUL_2: | |
233 | return "m2"; | |
234 | case LMUL_4: | |
235 | return "m4"; | |
236 | case LMUL_8: | |
237 | return "m8"; | |
238 | case LMUL_RESERVED: | |
239 | return "INVALID LMUL"; | |
240 | case LMUL_F8: | |
241 | return "mf8"; | |
242 | case LMUL_F4: | |
243 | return "mf4"; | |
244 | case LMUL_F2: | |
245 | return "mf2"; | |
246 | ||
247 | default: | |
248 | gcc_unreachable (); | |
249 | } | |
9243c3d1 JZZ |
250 | } |
251 | ||
29331e72 LD |
252 | static const char * |
253 | policy_to_str (bool agnostic_p) | |
9243c3d1 | 254 | { |
29331e72 | 255 | return agnostic_p ? "agnostic" : "undisturbed"; |
9243c3d1 JZZ |
256 | } |
257 | ||
9243c3d1 JZZ |
258 | /* Return true if it is an RVV instruction depends on VTYPE global |
259 | status register. */ | |
260 | static bool | |
261 | has_vtype_op (rtx_insn *rinsn) | |
262 | { | |
263 | return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn); | |
264 | } | |
265 | ||
ec99ffab JZZ |
266 | /* Return true if the instruction ignores VLMUL field of VTYPE. */ |
267 | static bool | |
268 | ignore_vlmul_insn_p (rtx_insn *rinsn) | |
269 | { | |
270 | return get_attr_type (rinsn) == TYPE_VIMOVVX | |
271 | || get_attr_type (rinsn) == TYPE_VFMOVVF | |
272 | || get_attr_type (rinsn) == TYPE_VIMOVXV | |
273 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
274 | } | |
275 | ||
276 | /* Return true if the instruction is scalar move instruction. */ | |
277 | static bool | |
278 | scalar_move_insn_p (rtx_insn *rinsn) | |
279 | { | |
280 | return get_attr_type (rinsn) == TYPE_VIMOVXV | |
281 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
282 | } | |
283 | ||
60bd33bc JZZ |
284 | /* Return true if the instruction is fault first load instruction. */ |
285 | static bool | |
286 | fault_first_load_p (rtx_insn *rinsn) | |
287 | { | |
6313b045 JZZ |
288 | return recog_memoized (rinsn) >= 0 |
289 | && (get_attr_type (rinsn) == TYPE_VLDFF | |
290 | || get_attr_type (rinsn) == TYPE_VLSEGDFF); | |
60bd33bc JZZ |
291 | } |
292 | ||
293 | /* Return true if the instruction is read vl instruction. */ | |
294 | static bool | |
295 | read_vl_insn_p (rtx_insn *rinsn) | |
296 | { | |
297 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL; | |
298 | } | |
299 | ||
9243c3d1 JZZ |
300 | /* Return true if it is a vsetvl instruction. */ |
301 | static bool | |
302 | vector_config_insn_p (rtx_insn *rinsn) | |
303 | { | |
304 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL; | |
305 | } | |
306 | ||
307 | /* Return true if it is vsetvldi or vsetvlsi. */ | |
308 | static bool | |
309 | vsetvl_insn_p (rtx_insn *rinsn) | |
310 | { | |
29331e72 | 311 | if (!rinsn || !vector_config_insn_p (rinsn)) |
6b6b9c68 | 312 | return false; |
85112fbb | 313 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi |
6b6b9c68 JZZ |
314 | || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi); |
315 | } | |
316 | ||
317 | /* Return true if it is vsetvl zero, rs1. */ | |
318 | static bool | |
319 | vsetvl_discard_result_insn_p (rtx_insn *rinsn) | |
320 | { | |
321 | if (!vector_config_insn_p (rinsn)) | |
322 | return false; | |
323 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi | |
324 | || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi); | |
9243c3d1 JZZ |
325 | } |
326 | ||
9243c3d1 | 327 | static bool |
4f673c5e | 328 | real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb) |
9243c3d1 | 329 | { |
4f673c5e | 330 | return insn != nullptr && insn->is_real () && insn->bb () == bb; |
9243c3d1 JZZ |
331 | } |
332 | ||
29331e72 | 333 | /* Helper function to get VL operand for VLMAX insn. */ |
6b6b9c68 JZZ |
334 | static rtx |
335 | get_vl (rtx_insn *rinsn) | |
336 | { | |
337 | if (has_vl_op (rinsn)) | |
338 | { | |
339 | extract_insn_cached (rinsn); | |
340 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
341 | } | |
342 | return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0)); | |
4f673c5e JZZ |
343 | } |
344 | ||
6b6b9c68 JZZ |
345 | /* Helper function to get AVL operand. */ |
346 | static rtx | |
347 | get_avl (rtx_insn *rinsn) | |
348 | { | |
349 | if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn)) | |
350 | return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0); | |
351 | ||
352 | if (!has_vl_op (rinsn)) | |
353 | return NULL_RTX; | |
5e714992 | 354 | if (vlmax_avl_type_p (rinsn)) |
6b6b9c68 JZZ |
355 | return RVV_VLMAX; |
356 | extract_insn_cached (rinsn); | |
357 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
358 | } | |
359 | ||
9243c3d1 JZZ |
360 | /* Get default mask policy. */ |
361 | static bool | |
362 | get_default_ma () | |
363 | { | |
364 | /* For the instruction that doesn't require MA, we still need a default value | |
365 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
366 | return (bool) (get_prefer_mask_policy () & 0x1 | |
367 | || (get_prefer_mask_policy () >> 1 & 0x1)); | |
368 | } | |
369 | ||
9243c3d1 JZZ |
370 | /* Helper function to get MA operand. */ |
371 | static bool | |
372 | mask_agnostic_p (rtx_insn *rinsn) | |
373 | { | |
374 | /* If it doesn't have MA, we return agnostic by default. */ | |
375 | extract_insn_cached (rinsn); | |
376 | int ma = get_attr_ma (rinsn); | |
377 | return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma); | |
378 | } | |
379 | ||
380 | /* Return true if FN has a vector instruction that use VL/VTYPE. */ | |
381 | static bool | |
382 | has_vector_insn (function *fn) | |
383 | { | |
384 | basic_block cfg_bb; | |
385 | rtx_insn *rinsn; | |
386 | FOR_ALL_BB_FN (cfg_bb, fn) | |
387 | FOR_BB_INSNS (cfg_bb, rinsn) | |
388 | if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn)) | |
389 | return true; | |
390 | return false; | |
391 | } | |
392 | ||
29331e72 LD |
393 | static vlmul_type |
394 | calculate_vlmul (unsigned int sew, unsigned int ratio) | |
9243c3d1 | 395 | { |
29331e72 LD |
396 | const vlmul_type ALL_LMUL[] |
397 | = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2}; | |
398 | for (const vlmul_type vlmul : ALL_LMUL) | |
399 | if (calculate_ratio (sew, vlmul) == ratio) | |
400 | return vlmul; | |
401 | return LMUL_RESERVED; | |
9243c3d1 JZZ |
402 | } |
403 | ||
29331e72 LD |
404 | /* Get the currently supported maximum sew used in the int rvv instructions. */ |
405 | static uint8_t | |
406 | get_max_int_sew () | |
9243c3d1 | 407 | { |
29331e72 LD |
408 | if (TARGET_VECTOR_ELEN_64) |
409 | return 64; | |
410 | else if (TARGET_VECTOR_ELEN_32) | |
411 | return 32; | |
412 | gcc_unreachable (); | |
9243c3d1 JZZ |
413 | } |
414 | ||
29331e72 LD |
415 | /* Get the currently supported maximum sew used in the float rvv instructions. |
416 | */ | |
417 | static uint8_t | |
418 | get_max_float_sew () | |
419 | { | |
420 | if (TARGET_VECTOR_ELEN_FP_64) | |
421 | return 64; | |
422 | else if (TARGET_VECTOR_ELEN_FP_32) | |
423 | return 32; | |
424 | else if (TARGET_VECTOR_ELEN_FP_16) | |
425 | return 16; | |
426 | gcc_unreachable (); | |
9243c3d1 JZZ |
427 | } |
428 | ||
29331e72 | 429 | enum def_type |
9243c3d1 | 430 | { |
29331e72 LD |
431 | REAL_SET = 1 << 0, |
432 | PHI_SET = 1 << 1, | |
433 | BB_HEAD_SET = 1 << 2, | |
434 | BB_END_SET = 1 << 3, | |
435 | /* ??? TODO: In RTL_SSA framework, we have REAL_SET, | |
436 | PHI_SET, BB_HEAD_SET, BB_END_SET and | |
437 | CLOBBER_DEF def_info types. Currently, | |
438 | we conservatively do not optimize clobber | |
439 | def since we don't see the case that we | |
440 | need to optimize it. */ | |
441 | CLOBBER_DEF = 1 << 4 | |
442 | }; | |
9243c3d1 | 443 | |
29331e72 LD |
444 | static bool |
445 | insn_should_be_added_p (const insn_info *insn, unsigned int types) | |
da93c41c | 446 | { |
29331e72 LD |
447 | if (insn->is_real () && (types & REAL_SET)) |
448 | return true; | |
449 | if (insn->is_phi () && (types & PHI_SET)) | |
450 | return true; | |
451 | if (insn->is_bb_head () && (types & BB_HEAD_SET)) | |
452 | return true; | |
453 | if (insn->is_bb_end () && (types & BB_END_SET)) | |
454 | return true; | |
455 | return false; | |
da93c41c JZ |
456 | } |
457 | ||
29331e72 LD |
458 | static const hash_set<use_info *> |
459 | get_all_real_uses (insn_info *insn, unsigned regno) | |
9243c3d1 | 460 | { |
29331e72 | 461 | gcc_assert (insn->is_real ()); |
9243c3d1 | 462 | |
29331e72 LD |
463 | hash_set<use_info *> uses; |
464 | auto_vec<phi_info *> work_list; | |
465 | hash_set<phi_info *> visited_list; | |
9243c3d1 | 466 | |
29331e72 | 467 | for (def_info *def : insn->defs ()) |
9243c3d1 | 468 | { |
29331e72 LD |
469 | if (!def->is_reg () || def->regno () != regno) |
470 | continue; | |
471 | set_info *set = safe_dyn_cast<set_info *> (def); | |
472 | if (!set) | |
473 | continue; | |
474 | for (use_info *use : set->nondebug_insn_uses ()) | |
475 | if (use->insn ()->is_real ()) | |
476 | uses.add (use); | |
477 | for (use_info *use : set->phi_uses ()) | |
478 | work_list.safe_push (use->phi ()); | |
9243c3d1 | 479 | } |
9243c3d1 | 480 | |
29331e72 | 481 | while (!work_list.is_empty ()) |
60bd33bc | 482 | { |
29331e72 LD |
483 | phi_info *phi = work_list.pop (); |
484 | visited_list.add (phi); | |
60bd33bc | 485 | |
29331e72 LD |
486 | for (use_info *use : phi->nondebug_insn_uses ()) |
487 | if (use->insn ()->is_real ()) | |
488 | uses.add (use); | |
489 | for (use_info *use : phi->phi_uses ()) | |
490 | if (!visited_list.contains (use->phi ())) | |
491 | work_list.safe_push (use->phi ()); | |
60bd33bc | 492 | } |
29331e72 | 493 | return uses; |
60bd33bc JZZ |
494 | } |
495 | ||
29331e72 LD |
496 | /* Recursively find all define instructions. The kind of instruction is |
497 | specified by the DEF_TYPE. */ | |
498 | static hash_set<set_info *> | |
499 | get_all_sets (phi_info *phi, unsigned int types) | |
9243c3d1 | 500 | { |
29331e72 LD |
501 | hash_set<set_info *> insns; |
502 | auto_vec<phi_info *> work_list; | |
503 | hash_set<phi_info *> visited_list; | |
504 | if (!phi) | |
505 | return hash_set<set_info *> (); | |
506 | work_list.safe_push (phi); | |
9243c3d1 | 507 | |
29331e72 | 508 | while (!work_list.is_empty ()) |
9243c3d1 | 509 | { |
29331e72 LD |
510 | phi_info *phi = work_list.pop (); |
511 | visited_list.add (phi); | |
512 | for (use_info *use : phi->inputs ()) | |
513 | { | |
514 | def_info *def = use->def (); | |
515 | set_info *set = safe_dyn_cast<set_info *> (def); | |
516 | if (!set) | |
517 | return hash_set<set_info *> (); | |
a1e42094 | 518 | |
29331e72 | 519 | gcc_assert (!set->insn ()->is_debug_insn ()); |
9243c3d1 | 520 | |
29331e72 LD |
521 | if (insn_should_be_added_p (set->insn (), types)) |
522 | insns.add (set); | |
523 | if (set->insn ()->is_phi ()) | |
524 | { | |
525 | phi_info *new_phi = as_a<phi_info *> (set); | |
526 | if (!visited_list.contains (new_phi)) | |
527 | work_list.safe_push (new_phi); | |
528 | } | |
529 | } | |
9243c3d1 | 530 | } |
29331e72 | 531 | return insns; |
9243c3d1 JZZ |
532 | } |
533 | ||
29331e72 LD |
534 | static hash_set<set_info *> |
535 | get_all_sets (set_info *set, bool /* get_real_inst */ real_p, | |
536 | bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p) | |
aef20243 | 537 | { |
29331e72 LD |
538 | if (real_p && phi_p && param_p) |
539 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
540 | REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET); | |
aef20243 | 541 | |
29331e72 LD |
542 | else if (real_p && param_p) |
543 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
544 | REAL_SET | BB_HEAD_SET | BB_END_SET); | |
545 | ||
546 | else if (real_p) | |
547 | return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET); | |
548 | return hash_set<set_info *> (); | |
69f39144 JZ |
549 | } |
550 | ||
4f673c5e | 551 | static bool |
6b6b9c68 | 552 | source_equal_p (insn_info *insn1, insn_info *insn2) |
4f673c5e | 553 | { |
6b6b9c68 JZZ |
554 | if (!insn1 || !insn2) |
555 | return false; | |
556 | rtx_insn *rinsn1 = insn1->rtl (); | |
557 | rtx_insn *rinsn2 = insn2->rtl (); | |
4f673c5e JZZ |
558 | if (!rinsn1 || !rinsn2) |
559 | return false; | |
29331e72 | 560 | |
4f673c5e JZZ |
561 | rtx note1 = find_reg_equal_equiv_note (rinsn1); |
562 | rtx note2 = find_reg_equal_equiv_note (rinsn2); | |
2020bce3 RD |
563 | /* We could handle the case of similar-looking REG_EQUALs as well but |
564 | would need to verify that no insn in between modifies any of the source | |
565 | operands. */ | |
566 | if (note1 && note2 && rtx_equal_p (note1, note2) | |
567 | && REG_NOTE_KIND (note1) == REG_EQUIV) | |
4f673c5e | 568 | return true; |
29331e72 | 569 | return false; |
4f673c5e JZZ |
570 | } |
571 | ||
6b6b9c68 | 572 | static insn_info * |
4f673c5e JZZ |
573 | extract_single_source (set_info *set) |
574 | { | |
575 | if (!set) | |
576 | return nullptr; | |
577 | if (set->insn ()->is_real ()) | |
6b6b9c68 | 578 | return set->insn (); |
4f673c5e JZZ |
579 | if (!set->insn ()->is_phi ()) |
580 | return nullptr; | |
6b6b9c68 | 581 | hash_set<set_info *> sets = get_all_sets (set, true, false, true); |
330bb064 JZ |
582 | if (sets.is_empty ()) |
583 | return nullptr; | |
4f673c5e | 584 | |
6b6b9c68 | 585 | insn_info *first_insn = (*sets.begin ())->insn (); |
4f673c5e JZZ |
586 | if (first_insn->is_artificial ()) |
587 | return nullptr; | |
6b6b9c68 | 588 | for (const set_info *set : sets) |
4f673c5e JZZ |
589 | { |
590 | /* If there is a head or end insn, we conservative return | |
591 | NULL so that VSETVL PASS will insert vsetvl directly. */ | |
6b6b9c68 | 592 | if (set->insn ()->is_artificial ()) |
4f673c5e | 593 | return nullptr; |
29331e72 | 594 | if (set != *sets.begin () && !source_equal_p (set->insn (), first_insn)) |
4f673c5e JZZ |
595 | return nullptr; |
596 | } | |
597 | ||
6b6b9c68 | 598 | return first_insn; |
4f673c5e JZZ |
599 | } |
600 | ||
db642d60 JZ |
601 | static insn_info * |
602 | extract_single_source (def_info *def) | |
603 | { | |
604 | if (!def) | |
605 | return nullptr; | |
606 | return extract_single_source (dyn_cast<set_info *> (def)); | |
607 | } | |
608 | ||
29331e72 LD |
609 | static bool |
610 | same_equiv_note_p (set_info *set1, set_info *set2) | |
ec99ffab | 611 | { |
29331e72 LD |
612 | insn_info *insn1 = extract_single_source (set1); |
613 | insn_info *insn2 = extract_single_source (set2); | |
614 | if (!insn1 || !insn2) | |
615 | return false; | |
616 | return source_equal_p (insn1, insn2); | |
ec99ffab JZZ |
617 | } |
618 | ||
29331e72 LD |
619 | static unsigned |
620 | get_expr_id (unsigned bb_index, unsigned regno, unsigned num_bbs) | |
ec99ffab | 621 | { |
29331e72 | 622 | return regno * num_bbs + bb_index; |
ec99ffab | 623 | } |
29331e72 LD |
624 | static unsigned |
625 | get_regno (unsigned expr_id, unsigned num_bb) | |
ec99ffab | 626 | { |
29331e72 | 627 | return expr_id / num_bb; |
ec99ffab | 628 | } |
29331e72 LD |
629 | static unsigned |
630 | get_bb_index (unsigned expr_id, unsigned num_bb) | |
ec99ffab | 631 | { |
29331e72 | 632 | return expr_id % num_bb; |
ec99ffab JZZ |
633 | } |
634 | ||
29331e72 | 635 | /* Return true if the SET result is not used by any instructions. */ |
ec99ffab | 636 | static bool |
29331e72 | 637 | has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno) |
ec99ffab | 638 | { |
29331e72 LD |
639 | if (bitmap_bit_p (df_get_live_out (cfg_bb), regno)) |
640 | return false; | |
ec99ffab | 641 | |
29331e72 LD |
642 | rtx_insn *iter; |
643 | for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb)); | |
644 | iter = NEXT_INSN (iter)) | |
645 | if (df_find_use (iter, regno_reg_rtx[regno])) | |
646 | return false; | |
ec99ffab | 647 | |
29331e72 | 648 | return true; |
ec99ffab JZZ |
649 | } |
650 | ||
29331e72 LD |
651 | /* This flags indicates the minimum demand of the vl and vtype values by the |
652 | RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV | |
653 | instruction only needs the SEW/LMUL ratio to remain the same, and does not | |
654 | require SEW and LMUL to be fixed. | |
655 | Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter | |
656 | instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of | |
657 | the former instruction, then we can make the minimu demand of the former | |
658 | instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are | |
659 | the SEW and LMUL of the latter instruction, and the vsetvl instruction | |
660 | generated according to the new demand can also be used for the latter | |
661 | instruction, so there is no need to insert a separate vsetvl instruction for | |
662 | the latter instruction. */ | |
663 | enum demand_flags : unsigned | |
664 | { | |
665 | DEMAND_EMPTY_P = 0, | |
666 | DEMAND_SEW_P = 1 << 0, | |
667 | DEMAND_LMUL_P = 1 << 1, | |
668 | DEMAND_RATIO_P = 1 << 2, | |
669 | DEMAND_GE_SEW_P = 1 << 3, | |
670 | DEMAND_TAIL_POLICY_P = 1 << 4, | |
671 | DEMAND_MASK_POLICY_P = 1 << 5, | |
672 | DEMAND_AVL_P = 1 << 6, | |
673 | DEMAND_NON_ZERO_AVL_P = 1 << 7, | |
674 | }; | |
ec99ffab | 675 | |
29331e72 LD |
676 | /* We split the demand information into three parts. They are sew and lmul |
677 | related (sew_lmul_demand_type), tail and mask policy related | |
678 | (policy_demand_type) and avl related (avl_demand_type). Then we define three | |
679 | interfaces avaiable_with, compatible_p and merge. avaiable_with is | |
680 | used to determine whether the two vsetvl infos prev_info and next_info are | |
681 | available or not. If prev_info is available for next_info, it means that the | |
682 | RVV insn corresponding to next_info on the path from prev_info to next_info | |
683 | can be used without inserting a separate vsetvl instruction. compatible_p | |
684 | is used to determine whether prev_info is compatible with next_info, and if | |
685 | so, merge can be used to merge the stricter demand information from | |
686 | next_info into prev_info so that prev_info becomes available to next_info. | |
687 | */ | |
ec99ffab | 688 | |
29331e72 | 689 | enum class sew_lmul_demand_type : unsigned |
ec99ffab | 690 | { |
29331e72 LD |
691 | sew_lmul = demand_flags::DEMAND_SEW_P | demand_flags::DEMAND_LMUL_P, |
692 | ratio_only = demand_flags::DEMAND_RATIO_P, | |
693 | sew_only = demand_flags::DEMAND_SEW_P, | |
694 | ge_sew = demand_flags::DEMAND_GE_SEW_P, | |
695 | ratio_and_ge_sew | |
696 | = demand_flags::DEMAND_RATIO_P | demand_flags::DEMAND_GE_SEW_P, | |
697 | }; | |
ec99ffab | 698 | |
29331e72 | 699 | enum class policy_demand_type : unsigned |
29547511 | 700 | { |
29331e72 LD |
701 | tail_mask_policy |
702 | = demand_flags::DEMAND_TAIL_POLICY_P | demand_flags::DEMAND_MASK_POLICY_P, | |
703 | tail_policy_only = demand_flags::DEMAND_TAIL_POLICY_P, | |
704 | mask_policy_only = demand_flags::DEMAND_MASK_POLICY_P, | |
705 | ignore_policy = demand_flags::DEMAND_EMPTY_P, | |
706 | }; | |
29547511 | 707 | |
29331e72 | 708 | enum class avl_demand_type : unsigned |
ec99ffab | 709 | { |
29331e72 LD |
710 | avl = demand_flags::DEMAND_AVL_P, |
711 | non_zero_avl = demand_flags::DEMAND_NON_ZERO_AVL_P, | |
712 | ignore_avl = demand_flags::DEMAND_EMPTY_P, | |
713 | }; | |
ec99ffab | 714 | |
29331e72 | 715 | class vsetvl_info |
ec99ffab | 716 | { |
29331e72 LD |
717 | private: |
718 | insn_info *m_insn; | |
719 | bb_info *m_bb; | |
720 | rtx m_avl; | |
721 | rtx m_vl; | |
722 | set_info *m_avl_def; | |
723 | uint8_t m_sew; | |
724 | uint8_t m_max_sew; | |
725 | vlmul_type m_vlmul; | |
726 | uint8_t m_ratio; | |
727 | bool m_ta; | |
728 | bool m_ma; | |
729 | ||
730 | sew_lmul_demand_type m_sew_lmul_demand; | |
731 | policy_demand_type m_policy_demand; | |
732 | avl_demand_type m_avl_demand; | |
733 | ||
734 | enum class state_type | |
735 | { | |
736 | UNINITIALIZED, | |
737 | VALID, | |
738 | UNKNOWN, | |
739 | EMPTY, | |
740 | }; | |
741 | state_type m_state; | |
742 | ||
743 | bool m_delete; | |
744 | bool m_change_vtype_only; | |
745 | insn_info *m_read_vl_insn; | |
746 | bool m_vl_used_by_non_rvv_insn; | |
ec99ffab | 747 | |
29331e72 LD |
748 | public: |
749 | vsetvl_info () | |
750 | : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX), m_vl (NULL_RTX), | |
751 | m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED), | |
752 | m_ratio (0), m_ta (false), m_ma (false), | |
753 | m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul), | |
754 | m_policy_demand (policy_demand_type::tail_mask_policy), | |
755 | m_avl_demand (avl_demand_type::avl), m_state (state_type::UNINITIALIZED), | |
756 | m_delete (false), m_change_vtype_only (false), m_read_vl_insn (nullptr), | |
757 | m_vl_used_by_non_rvv_insn (false) | |
758 | {} | |
759 | ||
760 | vsetvl_info (insn_info *insn) : vsetvl_info () { parse_insn (insn); } | |
761 | ||
762 | vsetvl_info (rtx_insn *insn) : vsetvl_info () { parse_insn (insn); } | |
763 | ||
764 | void set_avl (rtx avl) { m_avl = avl; } | |
765 | void set_vl (rtx vl) { m_vl = vl; } | |
766 | void set_avl_def (set_info *avl_def) { m_avl_def = avl_def; } | |
767 | void set_sew (uint8_t sew) { m_sew = sew; } | |
768 | void set_vlmul (vlmul_type vlmul) { m_vlmul = vlmul; } | |
769 | void set_ratio (uint8_t ratio) { m_ratio = ratio; } | |
770 | void set_ta (bool ta) { m_ta = ta; } | |
771 | void set_ma (bool ma) { m_ma = ma; } | |
772 | void set_delete () { m_delete = true; } | |
773 | void set_bb (bb_info *bb) { m_bb = bb; } | |
774 | void set_max_sew (uint8_t max_sew) { m_max_sew = max_sew; } | |
775 | void set_change_vtype_only () { m_change_vtype_only = true; } | |
776 | void set_read_vl_insn (insn_info *insn) { m_read_vl_insn = insn; } | |
777 | ||
778 | rtx get_avl () const { return m_avl; } | |
779 | rtx get_vl () const { return m_vl; } | |
780 | set_info *get_avl_def () const { return m_avl_def; } | |
781 | uint8_t get_sew () const { return m_sew; } | |
782 | vlmul_type get_vlmul () const { return m_vlmul; } | |
783 | uint8_t get_ratio () const { return m_ratio; } | |
784 | bool get_ta () const { return m_ta; } | |
785 | bool get_ma () const { return m_ma; } | |
786 | insn_info *get_insn () const { return m_insn; } | |
787 | bool delete_p () const { return m_delete; } | |
788 | bb_info *get_bb () const { return m_bb; } | |
789 | uint8_t get_max_sew () const { return m_max_sew; } | |
790 | insn_info *get_read_vl_insn () const { return m_read_vl_insn; } | |
4cd4c34a | 791 | bool vl_used_by_non_rvv_insn_p () const { return m_vl_used_by_non_rvv_insn; } |
29331e72 LD |
792 | |
793 | bool has_imm_avl () const { return m_avl && CONST_INT_P (m_avl); } | |
794 | bool has_vlmax_avl () const { return vlmax_avl_p (m_avl); } | |
795 | bool has_nonvlmax_reg_avl () const | |
796 | { | |
797 | return m_avl && REG_P (m_avl) && !has_vlmax_avl (); | |
798 | } | |
799 | bool has_non_zero_avl () const | |
800 | { | |
801 | if (has_imm_avl ()) | |
802 | return INTVAL (m_avl) > 0; | |
803 | return has_vlmax_avl (); | |
804 | } | |
805 | bool has_vl () const | |
806 | { | |
807 | /* The VL operand can only be either a NULL_RTX or a register. */ | |
808 | gcc_assert (!m_vl || REG_P (m_vl)); | |
809 | return m_vl != NULL_RTX; | |
810 | } | |
811 | bool has_same_ratio (const vsetvl_info &other) const | |
812 | { | |
813 | return get_ratio () == other.get_ratio (); | |
814 | } | |
815 | ||
816 | /* The block of INSN isn't always same as the block of the VSETVL_INFO, | |
817 | meaning we may have 'get_insn ()->bb () != get_bb ()'. | |
818 | ||
819 | E.g. BB 2 (Empty) ---> BB 3 (VALID, has rvv insn 1) | |
820 | ||
821 | BB 2 has empty VSETVL_INFO, wheras BB 3 has VSETVL_INFO that satisfies | |
822 | get_insn ()->bb () == get_bb (). In earliest fusion, we may fuse bb 3 and | |
823 | bb 2 so that the 'get_bb ()' of BB2 VSETVL_INFO will be BB2 wheras the | |
824 | 'get_insn ()' of BB2 VSETVL INFO will be the rvv insn 1 (which is located | |
825 | at BB3). */ | |
826 | bool insn_inside_bb_p () const { return get_insn ()->bb () == get_bb (); } | |
827 | void update_avl (const vsetvl_info &other) | |
828 | { | |
829 | m_avl = other.get_avl (); | |
830 | m_vl = other.get_vl (); | |
831 | m_avl_def = other.get_avl_def (); | |
832 | } | |
833 | ||
834 | bool uninit_p () const { return m_state == state_type::UNINITIALIZED; } | |
835 | bool valid_p () const { return m_state == state_type::VALID; } | |
836 | bool unknown_p () const { return m_state == state_type::UNKNOWN; } | |
837 | bool empty_p () const { return m_state == state_type::EMPTY; } | |
838 | bool change_vtype_only_p () const { return m_change_vtype_only; } | |
839 | ||
840 | void set_valid () { m_state = state_type::VALID; } | |
841 | void set_unknown () { m_state = state_type::UNKNOWN; } | |
842 | void set_empty () { m_state = state_type::EMPTY; } | |
843 | ||
844 | void set_sew_lmul_demand (sew_lmul_demand_type demand) | |
845 | { | |
846 | m_sew_lmul_demand = demand; | |
847 | } | |
848 | void set_policy_demand (policy_demand_type demand) | |
849 | { | |
850 | m_policy_demand = demand; | |
851 | } | |
852 | void set_avl_demand (avl_demand_type demand) { m_avl_demand = demand; } | |
853 | ||
854 | sew_lmul_demand_type get_sew_lmul_demand () const | |
855 | { | |
856 | return m_sew_lmul_demand; | |
857 | } | |
858 | policy_demand_type get_policy_demand () const { return m_policy_demand; } | |
859 | avl_demand_type get_avl_demand () const { return m_avl_demand; } | |
860 | ||
861 | void normalize_demand (unsigned demand_flags) | |
862 | { | |
863 | switch (demand_flags | |
864 | & (DEMAND_SEW_P | DEMAND_LMUL_P | DEMAND_RATIO_P | DEMAND_GE_SEW_P)) | |
865 | { | |
866 | case (unsigned) sew_lmul_demand_type::sew_lmul: | |
867 | m_sew_lmul_demand = sew_lmul_demand_type::sew_lmul; | |
868 | break; | |
869 | case (unsigned) sew_lmul_demand_type::ratio_only: | |
870 | m_sew_lmul_demand = sew_lmul_demand_type::ratio_only; | |
871 | break; | |
872 | case (unsigned) sew_lmul_demand_type::sew_only: | |
873 | m_sew_lmul_demand = sew_lmul_demand_type::sew_only; | |
874 | break; | |
875 | case (unsigned) sew_lmul_demand_type::ge_sew: | |
876 | m_sew_lmul_demand = sew_lmul_demand_type::ge_sew; | |
877 | break; | |
878 | case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew: | |
879 | m_sew_lmul_demand = sew_lmul_demand_type::ratio_and_ge_sew; | |
880 | break; | |
881 | default: | |
882 | gcc_unreachable (); | |
883 | } | |
884 | ||
885 | switch (demand_flags & (DEMAND_TAIL_POLICY_P | DEMAND_MASK_POLICY_P)) | |
886 | { | |
887 | case (unsigned) policy_demand_type::tail_mask_policy: | |
888 | m_policy_demand = policy_demand_type::tail_mask_policy; | |
889 | break; | |
890 | case (unsigned) policy_demand_type::tail_policy_only: | |
891 | m_policy_demand = policy_demand_type::tail_policy_only; | |
892 | break; | |
893 | case (unsigned) policy_demand_type::mask_policy_only: | |
894 | m_policy_demand = policy_demand_type::mask_policy_only; | |
895 | break; | |
896 | case (unsigned) policy_demand_type::ignore_policy: | |
897 | m_policy_demand = policy_demand_type::ignore_policy; | |
898 | break; | |
899 | default: | |
900 | gcc_unreachable (); | |
901 | } | |
902 | ||
903 | switch (demand_flags & (DEMAND_AVL_P | DEMAND_NON_ZERO_AVL_P)) | |
904 | { | |
905 | case (unsigned) avl_demand_type::avl: | |
906 | m_avl_demand = avl_demand_type::avl; | |
907 | break; | |
908 | case (unsigned) avl_demand_type::non_zero_avl: | |
909 | m_avl_demand = avl_demand_type::non_zero_avl; | |
910 | break; | |
911 | case (unsigned) avl_demand_type::ignore_avl: | |
912 | m_avl_demand = avl_demand_type::ignore_avl; | |
913 | break; | |
914 | default: | |
915 | gcc_unreachable (); | |
916 | } | |
917 | } | |
918 | ||
919 | void parse_insn (rtx_insn *rinsn) | |
920 | { | |
921 | if (!NONDEBUG_INSN_P (rinsn)) | |
922 | return; | |
923 | if (optimize == 0 && !has_vtype_op (rinsn)) | |
924 | return; | |
925 | gcc_assert (!vsetvl_discard_result_insn_p (rinsn)); | |
926 | set_valid (); | |
927 | extract_insn_cached (rinsn); | |
928 | m_avl = ::get_avl (rinsn); | |
929 | if (has_vlmax_avl () || vsetvl_insn_p (rinsn)) | |
930 | m_vl = ::get_vl (rinsn); | |
931 | m_sew = ::get_sew (rinsn); | |
932 | m_vlmul = ::get_vlmul (rinsn); | |
933 | m_ta = tail_agnostic_p (rinsn); | |
934 | m_ma = mask_agnostic_p (rinsn); | |
935 | } | |
936 | ||
937 | void parse_insn (insn_info *insn) | |
938 | { | |
939 | m_insn = insn; | |
940 | m_bb = insn->bb (); | |
941 | /* Return if it is debug insn for the consistency with optimize == 0. */ | |
942 | if (insn->is_debug_insn ()) | |
943 | return; | |
ec99ffab | 944 | |
29331e72 LD |
945 | /* We set it as unknown since we don't what will happen in CALL or ASM. */ |
946 | if (insn->is_call () || insn->is_asm ()) | |
947 | { | |
948 | set_unknown (); | |
949 | return; | |
950 | } | |
951 | ||
952 | /* If this is something that updates VL/VTYPE that we don't know about, set | |
953 | the state to unknown. */ | |
954 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()) | |
955 | && (find_access (insn->defs (), VL_REGNUM) | |
956 | || find_access (insn->defs (), VTYPE_REGNUM))) | |
957 | { | |
958 | set_unknown (); | |
959 | return; | |
960 | } | |
961 | ||
962 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())) | |
963 | /* uninitialized */ | |
964 | return; | |
ec99ffab | 965 | |
29331e72 LD |
966 | set_valid (); |
967 | ||
968 | m_avl = ::get_avl (insn->rtl ()); | |
969 | if (m_avl) | |
970 | { | |
971 | if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ()) | |
972 | m_vl = ::get_vl (insn->rtl ()); | |
973 | ||
974 | if (has_nonvlmax_reg_avl ()) | |
975 | m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def (); | |
976 | } | |
977 | ||
978 | m_sew = ::get_sew (insn->rtl ()); | |
979 | m_vlmul = ::get_vlmul (insn->rtl ()); | |
980 | m_ratio = get_attr_ratio (insn->rtl ()); | |
981 | /* when get_attr_ratio is invalid, this kind of instructions | |
982 | doesn't care about ratio. However, we still need this value | |
983 | in demand info backward analysis. */ | |
984 | if (m_ratio == INVALID_ATTRIBUTE) | |
985 | m_ratio = calculate_ratio (m_sew, m_vlmul); | |
986 | m_ta = tail_agnostic_p (insn->rtl ()); | |
987 | m_ma = mask_agnostic_p (insn->rtl ()); | |
988 | ||
989 | /* If merge operand is undef value, we prefer agnostic. */ | |
990 | int merge_op_idx = get_attr_merge_op_idx (insn->rtl ()); | |
991 | if (merge_op_idx != INVALID_ATTRIBUTE | |
992 | && satisfies_constraint_vu (recog_data.operand[merge_op_idx])) | |
993 | { | |
994 | m_ta = true; | |
995 | m_ma = true; | |
996 | } | |
997 | ||
998 | /* Determine the demand info of the RVV insn. */ | |
999 | m_max_sew = get_max_int_sew (); | |
193ef02a | 1000 | unsigned dflags = 0; |
29331e72 LD |
1001 | if (vector_config_insn_p (insn->rtl ())) |
1002 | { | |
193ef02a RS |
1003 | dflags |= demand_flags::DEMAND_AVL_P; |
1004 | dflags |= demand_flags::DEMAND_RATIO_P; | |
29331e72 LD |
1005 | } |
1006 | else | |
1007 | { | |
1008 | if (has_vl_op (insn->rtl ())) | |
1009 | { | |
1010 | if (scalar_move_insn_p (insn->rtl ())) | |
1011 | { | |
1012 | /* If the avl for vmv.s.x comes from the vsetvl instruction, we | |
1013 | don't know if the avl is non-zero, so it is set to | |
1014 | DEMAND_AVL_P for now. it may be corrected to | |
1015 | DEMAND_NON_ZERO_AVL_P later when more information is | |
1016 | available. | |
1017 | */ | |
1018 | if (has_non_zero_avl ()) | |
193ef02a | 1019 | dflags |= demand_flags::DEMAND_NON_ZERO_AVL_P; |
29331e72 | 1020 | else |
193ef02a | 1021 | dflags |= demand_flags::DEMAND_AVL_P; |
29331e72 LD |
1022 | } |
1023 | else | |
193ef02a | 1024 | dflags |= demand_flags::DEMAND_AVL_P; |
29331e72 | 1025 | } |
ec99ffab | 1026 | |
29331e72 | 1027 | if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE) |
193ef02a | 1028 | dflags |= demand_flags::DEMAND_RATIO_P; |
29331e72 LD |
1029 | else |
1030 | { | |
1031 | if (scalar_move_insn_p (insn->rtl ()) && m_ta) | |
1032 | { | |
193ef02a | 1033 | dflags |= demand_flags::DEMAND_GE_SEW_P; |
29331e72 LD |
1034 | m_max_sew = get_attr_type (insn->rtl ()) == TYPE_VFMOVFV |
1035 | ? get_max_float_sew () | |
1036 | : get_max_int_sew (); | |
1037 | } | |
1038 | else | |
193ef02a | 1039 | dflags |= demand_flags::DEMAND_SEW_P; |
29331e72 LD |
1040 | |
1041 | if (!ignore_vlmul_insn_p (insn->rtl ())) | |
193ef02a | 1042 | dflags |= demand_flags::DEMAND_LMUL_P; |
29331e72 | 1043 | } |
ec99ffab | 1044 | |
29331e72 | 1045 | if (!m_ta) |
193ef02a | 1046 | dflags |= demand_flags::DEMAND_TAIL_POLICY_P; |
29331e72 | 1047 | if (!m_ma) |
193ef02a | 1048 | dflags |= demand_flags::DEMAND_MASK_POLICY_P; |
29331e72 LD |
1049 | } |
1050 | ||
193ef02a | 1051 | normalize_demand (dflags); |
29331e72 LD |
1052 | |
1053 | /* Optimize AVL from the vsetvl instruction. */ | |
1054 | insn_info *def_insn = extract_single_source (get_avl_def ()); | |
1055 | if (def_insn && vsetvl_insn_p (def_insn->rtl ())) | |
1056 | { | |
1057 | vsetvl_info def_info = vsetvl_info (def_insn); | |
1058 | if ((scalar_move_insn_p (insn->rtl ()) | |
1059 | || def_info.get_ratio () == get_ratio ()) | |
1060 | && (def_info.has_vlmax_avl () || def_info.has_imm_avl ())) | |
1061 | { | |
1062 | update_avl (def_info); | |
1063 | if (scalar_move_insn_p (insn->rtl ()) && has_non_zero_avl ()) | |
1064 | m_avl_demand = avl_demand_type::non_zero_avl; | |
1065 | } | |
1066 | } | |
1067 | ||
1068 | /* Determine if dest operand(vl) has been used by non-RVV instructions. */ | |
1069 | if (has_vl ()) | |
1070 | { | |
1071 | const hash_set<use_info *> vl_uses | |
1072 | = get_all_real_uses (get_insn (), REGNO (get_vl ())); | |
1073 | for (use_info *use : vl_uses) | |
1074 | { | |
1075 | gcc_assert (use->insn ()->is_real ()); | |
1076 | rtx_insn *rinsn = use->insn ()->rtl (); | |
1077 | if (!has_vl_op (rinsn) | |
1078 | || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1) | |
1079 | { | |
1080 | m_vl_used_by_non_rvv_insn = true; | |
1081 | break; | |
1082 | } | |
1083 | rtx avl = ::get_avl (rinsn); | |
c2f23514 | 1084 | if (!avl || !REG_P (avl) || REGNO (get_vl ()) != REGNO (avl)) |
29331e72 LD |
1085 | { |
1086 | m_vl_used_by_non_rvv_insn = true; | |
1087 | break; | |
1088 | } | |
1089 | } | |
1090 | } | |
ec99ffab | 1091 | |
29331e72 LD |
1092 | /* Collect the read vl insn for the fault-only-first rvv loads. */ |
1093 | if (fault_first_load_p (insn->rtl ())) | |
1094 | { | |
1095 | for (insn_info *i = insn->next_nondebug_insn (); | |
1096 | i->bb () == insn->bb (); i = i->next_nondebug_insn ()) | |
1097 | { | |
1098 | if (find_access (i->defs (), VL_REGNUM)) | |
1099 | break; | |
1100 | if (i->rtl () && read_vl_insn_p (i->rtl ())) | |
1101 | { | |
1102 | m_read_vl_insn = i; | |
1103 | break; | |
1104 | } | |
1105 | } | |
1106 | } | |
1107 | } | |
1108 | ||
1109 | /* Returns the corresponding vsetvl rtx pat. */ | |
1110 | rtx get_vsetvl_pat (bool ignore_vl = false) const | |
1111 | { | |
1112 | rtx avl = get_avl (); | |
1113 | /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s, | |
1114 | set the value of avl to (const_int 0) so that VSETVL PASS will | |
1115 | insert vsetvl correctly.*/ | |
1116 | if (!get_avl ()) | |
1117 | avl = GEN_INT (0); | |
1118 | rtx sew = gen_int_mode (get_sew (), Pmode); | |
1119 | rtx vlmul = gen_int_mode (get_vlmul (), Pmode); | |
1120 | rtx ta = gen_int_mode (get_ta (), Pmode); | |
1121 | rtx ma = gen_int_mode (get_ma (), Pmode); | |
1122 | ||
1123 | if (change_vtype_only_p ()) | |
1124 | return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma); | |
1125 | else if (has_vl () && !ignore_vl) | |
1126 | return gen_vsetvl (Pmode, get_vl (), avl, sew, vlmul, ta, ma); | |
1127 | else | |
1128 | return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma); | |
1129 | } | |
1130 | ||
d82bb518 JZ |
1131 | /* Return true that the non-AVL operands of THIS will be modified |
1132 | if we fuse the VL modification from OTHER into THIS. */ | |
1133 | bool vl_modify_non_avl_op_p (const vsetvl_info &other) const | |
1134 | { | |
1135 | /* We don't need to worry about any operands from THIS be | |
1136 | modified by OTHER vsetvl since we OTHER vsetvl doesn't | |
1137 | modify any operand. */ | |
1138 | if (!other.has_vl ()) | |
1139 | return false; | |
1140 | ||
1141 | /* THIS VL operand always preempt OTHER VL operand. */ | |
1142 | if (this->has_vl ()) | |
1143 | return false; | |
1144 | ||
1145 | /* If THIS has non IMM AVL and THIS is AVL compatible with | |
1146 | OTHER, the AVL value of THIS is same as VL value of OTHER. */ | |
1147 | if (!this->has_imm_avl ()) | |
1148 | return false; | |
1149 | return find_access (this->get_insn ()->uses (), REGNO (other.get_vl ())); | |
1150 | } | |
1151 | ||
29331e72 LD |
1152 | bool operator== (const vsetvl_info &other) const |
1153 | { | |
1154 | gcc_assert (!uninit_p () && !other.uninit_p () | |
1155 | && "Uninitialization should not happen"); | |
1156 | ||
1157 | if (empty_p ()) | |
1158 | return other.empty_p (); | |
1159 | if (unknown_p ()) | |
1160 | return other.unknown_p (); | |
1161 | ||
1162 | return get_insn () == other.get_insn () && get_bb () == other.get_bb () | |
1163 | && get_avl () == other.get_avl () && get_vl () == other.get_vl () | |
1164 | && get_avl_def () == other.get_avl_def () | |
1165 | && get_sew () == other.get_sew () | |
1166 | && get_vlmul () == other.get_vlmul () && get_ta () == other.get_ta () | |
1167 | && get_ma () == other.get_ma () | |
1168 | && get_avl_demand () == other.get_avl_demand () | |
1169 | && get_sew_lmul_demand () == other.get_sew_lmul_demand () | |
1170 | && get_policy_demand () == other.get_policy_demand (); | |
1171 | } | |
1172 | ||
1173 | void dump (FILE *file, const char *indent = "") const | |
1174 | { | |
1175 | if (uninit_p ()) | |
1176 | { | |
1177 | fprintf (file, "UNINITIALIZED.\n"); | |
1178 | return; | |
1179 | } | |
1180 | else if (unknown_p ()) | |
1181 | { | |
1182 | fprintf (file, "UNKNOWN.\n"); | |
1183 | return; | |
1184 | } | |
1185 | else if (empty_p ()) | |
1186 | { | |
1187 | fprintf (file, "EMPTY.\n"); | |
1188 | return; | |
1189 | } | |
1190 | else if (valid_p ()) | |
1191 | fprintf (file, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (), | |
1192 | get_bb ()->index (), delete_p () ? " (deleted)" : ""); | |
1193 | else | |
1194 | gcc_unreachable (); | |
ec99ffab | 1195 | |
29331e72 LD |
1196 | fprintf (file, "%sDemand fields:", indent); |
1197 | if (m_sew_lmul_demand == sew_lmul_demand_type::sew_lmul) | |
1198 | fprintf (file, " demand_sew_lmul"); | |
1199 | else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_only) | |
1200 | fprintf (file, " demand_ratio_only"); | |
1201 | else if (m_sew_lmul_demand == sew_lmul_demand_type::sew_only) | |
1202 | fprintf (file, " demand_sew_only"); | |
1203 | else if (m_sew_lmul_demand == sew_lmul_demand_type::ge_sew) | |
1204 | fprintf (file, " demand_ge_sew"); | |
1205 | else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_and_ge_sew) | |
1206 | fprintf (file, " demand_ratio_and_ge_sew"); | |
1207 | ||
1208 | if (m_policy_demand == policy_demand_type::tail_mask_policy) | |
1209 | fprintf (file, " demand_tail_mask_policy"); | |
1210 | else if (m_policy_demand == policy_demand_type::tail_policy_only) | |
1211 | fprintf (file, " demand_tail_policy_only"); | |
1212 | else if (m_policy_demand == policy_demand_type::mask_policy_only) | |
1213 | fprintf (file, " demand_mask_policy_only"); | |
1214 | ||
1215 | if (m_avl_demand == avl_demand_type::avl) | |
1216 | fprintf (file, " demand_avl"); | |
1217 | else if (m_avl_demand == avl_demand_type::non_zero_avl) | |
1218 | fprintf (file, " demand_non_zero_avl"); | |
1219 | fprintf (file, "\n"); | |
1220 | ||
1221 | fprintf (file, "%sSEW=%d, ", indent, get_sew ()); | |
1222 | fprintf (file, "VLMUL=%s, ", vlmul_to_str (get_vlmul ())); | |
1223 | fprintf (file, "RATIO=%d, ", get_ratio ()); | |
1224 | fprintf (file, "MAX_SEW=%d\n", get_max_sew ()); | |
1225 | ||
1226 | fprintf (file, "%sTAIL_POLICY=%s, ", indent, policy_to_str (get_ta ())); | |
1227 | fprintf (file, "MASK_POLICY=%s\n", policy_to_str (get_ma ())); | |
1228 | ||
1229 | fprintf (file, "%sAVL=", indent); | |
1230 | print_rtl_single (file, get_avl ()); | |
1231 | fprintf (file, "%sVL=", indent); | |
1232 | print_rtl_single (file, get_vl ()); | |
1233 | if (change_vtype_only_p ()) | |
1234 | fprintf (file, "%schange vtype only\n", indent); | |
1235 | if (get_read_vl_insn ()) | |
1236 | fprintf (file, "%sread_vl_insn: insn %u\n", indent, | |
1237 | get_read_vl_insn ()->uid ()); | |
4cd4c34a | 1238 | if (vl_used_by_non_rvv_insn_p ()) |
29331e72 LD |
1239 | fprintf (file, "%suse_by_non_rvv_insn=true\n", indent); |
1240 | } | |
1241 | }; | |
8fbc0871 | 1242 | |
29331e72 | 1243 | class vsetvl_block_info |
ec99ffab | 1244 | { |
29331e72 LD |
1245 | public: |
1246 | /* The static execute probability of the demand info. */ | |
1247 | profile_probability probability; | |
1248 | ||
4fd09aed JZ |
1249 | auto_vec<vsetvl_info> local_infos; |
1250 | vsetvl_info global_info; | |
1251 | bb_info *bb; | |
29331e72 LD |
1252 | |
1253 | bool full_available; | |
1254 | ||
4fd09aed | 1255 | vsetvl_block_info () : bb (nullptr), full_available (false) |
29331e72 | 1256 | { |
4fd09aed JZ |
1257 | local_infos.safe_grow_cleared (0); |
1258 | global_info.set_empty (); | |
29331e72 LD |
1259 | } |
1260 | vsetvl_block_info (const vsetvl_block_info &other) | |
4fd09aed JZ |
1261 | : probability (other.probability), local_infos (other.local_infos.copy ()), |
1262 | global_info (other.global_info), bb (other.bb) | |
29331e72 LD |
1263 | {} |
1264 | ||
1265 | vsetvl_info &get_entry_info () | |
1266 | { | |
1267 | gcc_assert (!empty_p ()); | |
4fd09aed | 1268 | return local_infos.is_empty () ? global_info : local_infos[0]; |
29331e72 LD |
1269 | } |
1270 | vsetvl_info &get_exit_info () | |
1271 | { | |
1272 | gcc_assert (!empty_p ()); | |
4fd09aed JZ |
1273 | return local_infos.is_empty () ? global_info |
1274 | : local_infos[local_infos.length () - 1]; | |
29331e72 LD |
1275 | } |
1276 | const vsetvl_info &get_entry_info () const | |
1277 | { | |
1278 | gcc_assert (!empty_p ()); | |
4fd09aed | 1279 | return local_infos.is_empty () ? global_info : local_infos[0]; |
29331e72 LD |
1280 | } |
1281 | const vsetvl_info &get_exit_info () const | |
1282 | { | |
1283 | gcc_assert (!empty_p ()); | |
4fd09aed JZ |
1284 | return local_infos.is_empty () ? global_info |
1285 | : local_infos[local_infos.length () - 1]; | |
29331e72 LD |
1286 | } |
1287 | ||
4fd09aed JZ |
1288 | bool empty_p () const { return local_infos.is_empty () && !has_info (); } |
1289 | bool has_info () const { return !global_info.empty_p (); } | |
29331e72 LD |
1290 | void set_info (const vsetvl_info &info) |
1291 | { | |
4fd09aed JZ |
1292 | gcc_assert (local_infos.is_empty ()); |
1293 | global_info = info; | |
1294 | global_info.set_bb (bb); | |
29331e72 | 1295 | } |
4fd09aed | 1296 | void set_empty_info () { global_info.set_empty (); } |
ec99ffab JZZ |
1297 | }; |
1298 | ||
29331e72 LD |
1299 | /* Demand system is the RVV-based VSETVL info analysis tools wrapper. |
1300 | It defines compatible rules for SEW/LMUL, POLICY and AVL. | |
1301 | Also, it provides 3 iterfaces avaiable_p, compatible_p and | |
1302 | merge for the VSETVL PASS analysis and optimization. | |
1303 | ||
1304 | - avaiable_p: Determine whether the next info can get the | |
1305 | avaiable VSETVL status from previous info. | |
1306 | e.g. bb 2 (demand SEW = 32, LMUL = M2) -> bb 3 (demand RATIO = 16). | |
1307 | Since bb 2 demand info (SEW/LMUL = 32/2 = 16) satisfies the bb 3 | |
1308 | demand, the VSETVL instruction in bb 3 can be elided. | |
1309 | avaiable_p (previous, next) is true in such situation. | |
1310 | - compatible_p: Determine whether prev_info is compatible with next_info | |
1311 | so that we can have a new merged info that is avaiable to both of them. | |
1312 | - merge: Merge the stricter demand information from | |
1313 | next_info into prev_info so that prev_info becomes available to | |
1314 | next_info. */ | |
1315 | class demand_system | |
ec99ffab | 1316 | { |
29331e72 LD |
1317 | private: |
1318 | sbitmap *m_avl_def_in; | |
1319 | sbitmap *m_avl_def_out; | |
ec99ffab | 1320 | |
29331e72 | 1321 | /* predictors. */ |
ec99ffab | 1322 | |
29331e72 LD |
1323 | inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED, |
1324 | const vsetvl_info &next ATTRIBUTE_UNUSED) | |
1325 | { | |
1326 | return true; | |
1327 | } | |
1328 | inline bool always_false (const vsetvl_info &prev ATTRIBUTE_UNUSED, | |
1329 | const vsetvl_info &next ATTRIBUTE_UNUSED) | |
1330 | { | |
ec99ffab | 1331 | return false; |
29331e72 LD |
1332 | } |
1333 | ||
1334 | /* predictors for sew and lmul */ | |
1335 | ||
1336 | inline bool lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1337 | { | |
1338 | return prev.get_vlmul () == next.get_vlmul (); | |
1339 | } | |
1340 | inline bool sew_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1341 | { | |
1342 | return prev.get_sew () == next.get_sew (); | |
1343 | } | |
1344 | inline bool sew_lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1345 | { | |
1346 | return lmul_eq_p (prev, next) && sew_eq_p (prev, next); | |
1347 | } | |
1348 | inline bool sew_ge_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1349 | { | |
1350 | return prev.get_sew () == next.get_sew () | |
1351 | || (next.get_ta () && prev.get_sew () > next.get_sew ()); | |
1352 | } | |
1353 | inline bool sew_le_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1354 | { | |
1355 | return prev.get_sew () == next.get_sew () | |
1356 | || (prev.get_ta () && prev.get_sew () < next.get_sew ()); | |
1357 | } | |
1358 | inline bool prev_sew_le_next_max_sew_p (const vsetvl_info &prev, | |
1359 | const vsetvl_info &next) | |
1360 | { | |
1361 | return prev.get_sew () <= next.get_max_sew (); | |
1362 | } | |
1363 | inline bool next_sew_le_prev_max_sew_p (const vsetvl_info &prev, | |
1364 | const vsetvl_info &next) | |
1365 | { | |
1366 | return next.get_sew () <= prev.get_max_sew (); | |
1367 | } | |
1368 | inline bool max_sew_overlap_p (const vsetvl_info &prev, | |
1369 | const vsetvl_info &next) | |
1370 | { | |
1371 | return !(prev.get_sew () > next.get_max_sew () | |
1372 | || next.get_sew () > prev.get_max_sew ()); | |
1373 | } | |
1374 | inline bool ratio_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1375 | { | |
1376 | return prev.has_same_ratio (next); | |
1377 | } | |
1378 | inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev, | |
1379 | const vsetvl_info &next) | |
1380 | { | |
1381 | return prev.get_ratio () >= (next.get_sew () / 8); | |
1382 | } | |
1383 | inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev, | |
1384 | const vsetvl_info &next) | |
1385 | { | |
1386 | return next.get_ratio () >= (prev.get_sew () / 8); | |
1387 | } | |
1388 | ||
1389 | inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev, | |
1390 | const vsetvl_info &next) | |
1391 | { | |
1392 | return sew_ge_p (prev, next) && ratio_eq_p (prev, next); | |
1393 | } | |
1394 | inline bool sew_ge_and_prev_sew_le_next_max_sew_p (const vsetvl_info &prev, | |
1395 | const vsetvl_info &next) | |
1396 | { | |
1397 | return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next); | |
1398 | } | |
1399 | inline bool | |
1400 | sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p ( | |
1401 | const vsetvl_info &prev, const vsetvl_info &next) | |
1402 | { | |
1403 | return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next) | |
1404 | && next_ratio_valid_for_prev_sew_p (prev, next); | |
1405 | } | |
1406 | inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev, | |
1407 | const vsetvl_info &next) | |
1408 | { | |
1409 | return sew_le_p (prev, next) && next_sew_le_prev_max_sew_p (prev, next); | |
1410 | } | |
1411 | inline bool | |
1412 | max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev, | |
1413 | const vsetvl_info &next) | |
1414 | { | |
1415 | return next_ratio_valid_for_prev_sew_p (prev, next) | |
1416 | && max_sew_overlap_p (prev, next); | |
1417 | } | |
1418 | inline bool | |
1419 | sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info &prev, | |
1420 | const vsetvl_info &next) | |
1421 | { | |
1422 | return sew_le_p (prev, next) && ratio_eq_p (prev, next) | |
1423 | && next_sew_le_prev_max_sew_p (prev, next); | |
1424 | } | |
1425 | inline bool | |
1426 | max_sew_overlap_and_prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev, | |
1427 | const vsetvl_info &next) | |
1428 | { | |
1429 | return prev_ratio_valid_for_next_sew_p (prev, next) | |
1430 | && max_sew_overlap_p (prev, next); | |
1431 | } | |
1432 | inline bool | |
1433 | sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p ( | |
1434 | const vsetvl_info &prev, const vsetvl_info &next) | |
1435 | { | |
1436 | return sew_le_p (prev, next) && prev_ratio_valid_for_next_sew_p (prev, next) | |
1437 | && next_sew_le_prev_max_sew_p (prev, next); | |
1438 | } | |
1439 | inline bool max_sew_overlap_and_ratio_eq_p (const vsetvl_info &prev, | |
1440 | const vsetvl_info &next) | |
1441 | { | |
1442 | return ratio_eq_p (prev, next) && max_sew_overlap_p (prev, next); | |
1443 | } | |
1444 | ||
1445 | /* predictors for tail and mask policy */ | |
1446 | ||
1447 | inline bool tail_policy_eq_p (const vsetvl_info &prev, | |
1448 | const vsetvl_info &next) | |
1449 | { | |
1450 | return prev.get_ta () == next.get_ta (); | |
1451 | } | |
1452 | inline bool mask_policy_eq_p (const vsetvl_info &prev, | |
1453 | const vsetvl_info &next) | |
1454 | { | |
1455 | return prev.get_ma () == next.get_ma (); | |
1456 | } | |
1457 | inline bool tail_mask_policy_eq_p (const vsetvl_info &prev, | |
1458 | const vsetvl_info &next) | |
1459 | { | |
1460 | return tail_policy_eq_p (prev, next) && mask_policy_eq_p (prev, next); | |
1461 | } | |
1462 | ||
1463 | /* predictors for avl */ | |
1464 | ||
1465 | inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info) | |
1466 | { | |
9c16ca93 JZ |
1467 | if (info.has_vl ()) |
1468 | { | |
1469 | if (find_access (i->defs (), REGNO (info.get_vl ()))) | |
1470 | return true; | |
1471 | if (find_access (i->uses (), REGNO (info.get_vl ()))) | |
1472 | { | |
1473 | resource_info resource = full_register (REGNO (info.get_vl ())); | |
1474 | def_lookup dl1 = crtl->ssa->find_def (resource, i); | |
1475 | def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ()); | |
1476 | if (dl1.matching_set () || dl2.matching_set ()) | |
1477 | return true; | |
1478 | /* If their VLs are coming from same def, we still want to fuse | |
1479 | their VSETVL demand info to gain better performance. */ | |
1480 | return dl1.prev_def (i) != dl2.prev_def (i); | |
1481 | } | |
1482 | } | |
1483 | return false; | |
29331e72 LD |
1484 | } |
1485 | inline bool modify_avl_p (insn_info *i, const vsetvl_info &info) | |
1486 | { | |
1487 | return info.has_nonvlmax_reg_avl () | |
1488 | && find_access (i->defs (), REGNO (info.get_avl ())); | |
1489 | } | |
1490 | ||
1491 | inline bool modify_reg_between_p (insn_info *prev_insn, insn_info *curr_insn, | |
1492 | unsigned regno) | |
1493 | { | |
1494 | gcc_assert (prev_insn->compare_with (curr_insn) < 0); | |
1495 | for (insn_info *i = curr_insn->prev_nondebug_insn (); i != prev_insn; | |
1496 | i = i->prev_nondebug_insn ()) | |
1497 | { | |
1498 | // no def of regno | |
1499 | if (find_access (i->defs (), regno)) | |
1500 | return true; | |
1501 | } | |
1502 | return false; | |
1503 | } | |
ec99ffab | 1504 | |
29331e72 LD |
1505 | inline bool reg_avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next) |
1506 | { | |
1507 | if (!prev.has_nonvlmax_reg_avl () || !next.has_nonvlmax_reg_avl ()) | |
1508 | return false; | |
ec99ffab | 1509 | |
29331e72 LD |
1510 | if (same_equiv_note_p (prev.get_avl_def (), next.get_avl_def ())) |
1511 | return true; | |
ec99ffab | 1512 | |
29331e72 LD |
1513 | if (REGNO (prev.get_avl ()) != REGNO (next.get_avl ())) |
1514 | return false; | |
ec99ffab | 1515 | |
29331e72 LD |
1516 | insn_info *prev_insn = prev.get_insn (); |
1517 | if (prev.get_bb () != prev_insn->bb ()) | |
1518 | prev_insn = prev.get_bb ()->end_insn (); | |
ec99ffab | 1519 | |
29331e72 LD |
1520 | insn_info *next_insn = next.get_insn (); |
1521 | if (next.get_bb () != next_insn->bb ()) | |
1522 | next_insn = next.get_bb ()->end_insn (); | |
ec99ffab | 1523 | |
29331e72 LD |
1524 | return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false); |
1525 | } | |
ec99ffab | 1526 | |
29331e72 LD |
1527 | inline bool avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next) |
1528 | { | |
1529 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
ec99ffab | 1530 | |
4cd4c34a | 1531 | if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ()) |
29331e72 | 1532 | return false; |
e030af3e | 1533 | |
29331e72 LD |
1534 | if (vector_config_insn_p (prev.get_insn ()->rtl ()) && next.get_avl_def () |
1535 | && next.get_avl_def ()->insn () == prev.get_insn ()) | |
1536 | return true; | |
e030af3e | 1537 | |
29331e72 LD |
1538 | if (prev.get_read_vl_insn ()) |
1539 | { | |
1540 | if (!next.has_nonvlmax_reg_avl () || !next.get_avl_def ()) | |
1541 | return false; | |
1542 | insn_info *avl_def_insn = extract_single_source (next.get_avl_def ()); | |
1543 | return avl_def_insn == prev.get_read_vl_insn (); | |
1544 | } | |
1545 | ||
1546 | if (prev == next && prev.has_nonvlmax_reg_avl ()) | |
1547 | { | |
1548 | insn_info *insn = prev.get_insn (); | |
1549 | bb_info *bb = insn->bb (); | |
1550 | for (insn_info *i = insn; real_insn_and_same_bb_p (i, bb); | |
1551 | i = i->next_nondebug_insn ()) | |
1552 | if (find_access (i->defs (), REGNO (prev.get_avl ()))) | |
e030af3e | 1553 | return false; |
29331e72 | 1554 | } |
60bd33bc | 1555 | |
29331e72 LD |
1556 | if (prev.has_vlmax_avl () && next.has_vlmax_avl ()) |
1557 | return true; | |
1558 | else if (prev.has_imm_avl () && next.has_imm_avl ()) | |
1559 | return INTVAL (prev.get_avl ()) == INTVAL (next.get_avl ()); | |
1560 | else if (prev.has_vl () && next.has_nonvlmax_reg_avl () | |
1561 | && REGNO (prev.get_vl ()) == REGNO (next.get_avl ())) | |
1562 | { | |
1563 | insn_info *prev_insn = prev.insn_inside_bb_p () | |
1564 | ? prev.get_insn () | |
1565 | : prev.get_bb ()->end_insn (); | |
1566 | ||
1567 | insn_info *next_insn = next.insn_inside_bb_p () | |
1568 | ? next.get_insn () | |
1569 | : next.get_bb ()->end_insn (); | |
1570 | return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false); | |
1571 | } | |
1572 | else if (prev.has_nonvlmax_reg_avl () && next.has_nonvlmax_reg_avl ()) | |
1573 | return reg_avl_equal_p (prev, next); | |
e030af3e | 1574 | |
e030af3e | 1575 | return false; |
29331e72 LD |
1576 | } |
1577 | inline bool avl_equal_or_prev_avl_non_zero_p (const vsetvl_info &prev, | |
1578 | const vsetvl_info &next) | |
1579 | { | |
1580 | return avl_equal_p (prev, next) || prev.has_non_zero_avl (); | |
1581 | } | |
1582 | ||
1583 | inline bool can_use_next_avl_p (const vsetvl_info &prev, | |
1584 | const vsetvl_info &next) | |
1585 | { | |
0c4bd132 JZ |
1586 | /* Forbid the AVL/VL propagation if VL of NEXT is used |
1587 | by non-RVV instructions. This is because: | |
1588 | ||
1589 | bb 2: | |
1590 | PREV: scalar move (no AVL) | |
1591 | bb 3: | |
1592 | NEXT: vsetvl a5(VL), a4(AVL) ... | |
1593 | branch a5,zero | |
1594 | ||
1595 | Since user vsetvl instruction is no side effect instruction | |
1596 | which should be placed in the correct and optimal location | |
1597 | of the program by the previous PASS, it is unreasonable that | |
1598 | VSETVL PASS tries to move it to another places if it used by | |
1599 | non-RVV instructions. | |
1600 | ||
1601 | Note: We only forbid the cases that VL is used by the following | |
1602 | non-RVV instructions which will cause issues. We don't forbid | |
1603 | other cases since it won't cause correctness issues and we still | |
1604 | more demand info are fused backward. The later LCM algorithm | |
1605 | should know the optimal location of the vsetvl. */ | |
1606 | if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ()) | |
1607 | return false; | |
1608 | ||
29331e72 LD |
1609 | if (!next.has_nonvlmax_reg_avl () && !next.has_vl ()) |
1610 | return true; | |
e030af3e | 1611 | |
29331e72 LD |
1612 | insn_info *prev_insn = prev.get_insn (); |
1613 | if (prev.get_bb () != prev_insn->bb ()) | |
1614 | prev_insn = prev.get_bb ()->end_insn (); | |
1615 | ||
1616 | insn_info *next_insn = next.get_insn (); | |
1617 | if (next.get_bb () != next_insn->bb ()) | |
1618 | next_insn = next.get_bb ()->end_insn (); | |
1619 | ||
1620 | return avl_vl_unmodified_between_p (prev_insn, next_insn, next); | |
1621 | } | |
1622 | ||
1623 | inline bool avl_equal_or_next_avl_non_zero_and_can_use_next_avl_p ( | |
1624 | const vsetvl_info &prev, const vsetvl_info &next) | |
1625 | { | |
1626 | return avl_equal_p (prev, next) | |
1627 | || (next.has_non_zero_avl () && can_use_next_avl_p (prev, next)); | |
1628 | } | |
1629 | ||
1630 | /* modifiers */ | |
1631 | ||
1632 | inline void nop (const vsetvl_info &prev ATTRIBUTE_UNUSED, | |
1633 | const vsetvl_info &next ATTRIBUTE_UNUSED) | |
1634 | {} | |
1635 | ||
1636 | /* modifiers for sew and lmul */ | |
1637 | ||
1638 | inline void use_min_of_max_sew (vsetvl_info &prev, const vsetvl_info &next) | |
1639 | { | |
1640 | prev.set_max_sew (MIN (prev.get_max_sew (), next.get_max_sew ())); | |
1641 | } | |
1642 | inline void use_next_sew (vsetvl_info &prev, const vsetvl_info &next) | |
1643 | { | |
1644 | prev.set_sew (next.get_sew ()); | |
1645 | use_min_of_max_sew (prev, next); | |
1646 | } | |
1647 | inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next) | |
1648 | { | |
1649 | auto max_sew = std::max (prev.get_sew (), next.get_sew ()); | |
1650 | prev.set_sew (max_sew); | |
1651 | use_min_of_max_sew (prev, next); | |
1652 | } | |
1653 | inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next) | |
1654 | { | |
1655 | use_next_sew (prev, next); | |
1656 | prev.set_vlmul (next.get_vlmul ()); | |
1657 | prev.set_ratio (next.get_ratio ()); | |
1658 | } | |
1659 | inline void use_next_sew_with_prev_ratio (vsetvl_info &prev, | |
1660 | const vsetvl_info &next) | |
1661 | { | |
1662 | use_next_sew (prev, next); | |
1663 | prev.set_vlmul (calculate_vlmul (next.get_sew (), prev.get_ratio ())); | |
1664 | } | |
1665 | inline void modify_lmul_with_next_ratio (vsetvl_info &prev, | |
1666 | const vsetvl_info &next) | |
1667 | { | |
1668 | prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ())); | |
1669 | prev.set_ratio (next.get_ratio ()); | |
1670 | } | |
1671 | ||
1672 | inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev, | |
1673 | const vsetvl_info &next) | |
1674 | { | |
1675 | prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ())); | |
1676 | use_max_sew (prev, next); | |
1677 | prev.set_ratio (next.get_ratio ()); | |
1678 | } | |
1679 | ||
1680 | inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info &prev, | |
1681 | const vsetvl_info &next) | |
1682 | { | |
1683 | auto max_sew = std::max (prev.get_sew (), next.get_sew ()); | |
1684 | prev.set_vlmul (calculate_vlmul (max_sew, prev.get_ratio ())); | |
1685 | prev.set_sew (max_sew); | |
1686 | } | |
1687 | ||
1688 | /* modifiers for tail and mask policy */ | |
1689 | ||
1690 | inline void use_tail_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1691 | { | |
1692 | if (!next.get_ta ()) | |
1693 | prev.set_ta (next.get_ta ()); | |
1694 | } | |
1695 | inline void use_mask_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1696 | { | |
1697 | if (!next.get_ma ()) | |
1698 | prev.set_ma (next.get_ma ()); | |
1699 | } | |
1700 | inline void use_tail_mask_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1701 | { | |
1702 | use_tail_policy (prev, next); | |
1703 | use_mask_policy (prev, next); | |
1704 | } | |
1705 | ||
1706 | /* modifiers for avl */ | |
1707 | ||
1708 | inline void use_next_avl (vsetvl_info &prev, const vsetvl_info &next) | |
1709 | { | |
1710 | gcc_assert (can_use_next_avl_p (prev, next)); | |
1711 | prev.update_avl (next); | |
1712 | } | |
1713 | ||
1714 | inline void use_next_avl_when_not_equal (vsetvl_info &prev, | |
1715 | const vsetvl_info &next) | |
1716 | { | |
1717 | if (avl_equal_p (prev, next)) | |
1718 | return; | |
1719 | gcc_assert (next.has_non_zero_avl ()); | |
1720 | use_next_avl (prev, next); | |
1721 | } | |
e030af3e | 1722 | |
29331e72 LD |
1723 | public: |
1724 | demand_system () : m_avl_def_in (nullptr), m_avl_def_out (nullptr) {} | |
1725 | ||
1726 | void set_avl_in_out_data (sbitmap *m_avl_def_in, sbitmap *m_avl_def_out) | |
1727 | { | |
1728 | m_avl_def_in = m_avl_def_in; | |
1729 | m_avl_def_out = m_avl_def_out; | |
1730 | } | |
1731 | ||
1732 | /* Can we move vsetvl info between prev_insn and next_insn safe? */ | |
1733 | bool avl_vl_unmodified_between_p (insn_info *prev_insn, insn_info *next_insn, | |
1734 | const vsetvl_info &info, | |
1735 | bool ignore_vl = false) | |
1736 | { | |
1737 | gcc_assert ((ignore_vl && info.has_nonvlmax_reg_avl ()) | |
1738 | || (info.has_nonvlmax_reg_avl () || info.has_vl ())); | |
1739 | ||
1740 | gcc_assert (!prev_insn->is_debug_insn () && !next_insn->is_debug_insn ()); | |
1741 | if (prev_insn->bb () == next_insn->bb () | |
1742 | && prev_insn->compare_with (next_insn) < 0) | |
1743 | { | |
1744 | for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn; | |
1745 | i = i->prev_nondebug_insn ()) | |
1746 | { | |
9c16ca93 | 1747 | // no def and use of vl |
29331e72 LD |
1748 | if (!ignore_vl && modify_or_use_vl_p (i, info)) |
1749 | return false; | |
e030af3e | 1750 | |
29331e72 LD |
1751 | // no def of avl |
1752 | if (modify_avl_p (i, info)) | |
1753 | return false; | |
1754 | } | |
1755 | return true; | |
1756 | } | |
1757 | else | |
1758 | { | |
1759 | if (!ignore_vl && info.has_vl ()) | |
1760 | { | |
1761 | bitmap live_out = df_get_live_out (prev_insn->bb ()->cfg_bb ()); | |
1762 | if (bitmap_bit_p (live_out, REGNO (info.get_vl ()))) | |
1763 | return false; | |
1764 | } | |
a2d12abe | 1765 | |
29331e72 LD |
1766 | if (info.has_nonvlmax_reg_avl () && m_avl_def_in && m_avl_def_out) |
1767 | { | |
1768 | bool has_avl_out = false; | |
1769 | unsigned regno = REGNO (info.get_avl ()); | |
1770 | unsigned expr_id; | |
1771 | sbitmap_iterator sbi; | |
1772 | EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[prev_insn->bb ()->index ()], | |
1773 | 0, expr_id, sbi) | |
1774 | { | |
1775 | if (get_regno (expr_id, last_basic_block_for_fn (cfun)) | |
1776 | != regno) | |
1777 | continue; | |
1778 | has_avl_out = true; | |
1779 | if (!bitmap_bit_p (m_avl_def_in[next_insn->bb ()->index ()], | |
1780 | expr_id)) | |
1781 | return false; | |
1782 | } | |
1783 | if (!has_avl_out) | |
1784 | return false; | |
1785 | } | |
12b23c71 | 1786 | |
29331e72 LD |
1787 | for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn (); |
1788 | i = i->prev_nondebug_insn ()) | |
1789 | { | |
1790 | // no def amd use of vl | |
1791 | if (!ignore_vl && modify_or_use_vl_p (i, info)) | |
1792 | return false; | |
9243c3d1 | 1793 | |
29331e72 LD |
1794 | // no def of avl |
1795 | if (modify_avl_p (i, info)) | |
1796 | return false; | |
1797 | } | |
6b6b9c68 | 1798 | |
29331e72 LD |
1799 | for (insn_info *i = prev_insn->bb ()->end_insn (); i != prev_insn; |
1800 | i = i->prev_nondebug_insn ()) | |
1801 | { | |
1802 | // no def amd use of vl | |
1803 | if (!ignore_vl && modify_or_use_vl_p (i, info)) | |
1804 | return false; | |
1805 | ||
1806 | // no def of avl | |
1807 | if (modify_avl_p (i, info)) | |
1808 | return false; | |
1809 | } | |
1810 | } | |
d875d756 | 1811 | return true; |
29331e72 LD |
1812 | } |
1813 | ||
1814 | bool sew_lmul_compatible_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1815 | { | |
1816 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1817 | sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand (); | |
1818 | sew_lmul_demand_type next_flags = next.get_sew_lmul_demand (); | |
1819 | #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1820 | AVAILABLE_P, FUSE) \ | |
1821 | if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \ | |
1822 | && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \ | |
1823 | return COMPATIBLE_P (prev, next); | |
6b6b9c68 | 1824 | |
29331e72 | 1825 | #include "riscv-vsetvl.def" |
6b6b9c68 | 1826 | |
29331e72 LD |
1827 | gcc_unreachable (); |
1828 | } | |
6b6b9c68 | 1829 | |
29331e72 LD |
1830 | bool sew_lmul_available_p (const vsetvl_info &prev, const vsetvl_info &next) |
1831 | { | |
1832 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1833 | sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand (); | |
1834 | sew_lmul_demand_type next_flags = next.get_sew_lmul_demand (); | |
1835 | #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1836 | AVAILABLE_P, FUSE) \ | |
1837 | if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \ | |
1838 | && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \ | |
1839 | return AVAILABLE_P (prev, next); | |
d875d756 | 1840 | |
29331e72 | 1841 | #include "riscv-vsetvl.def" |
4f673c5e | 1842 | |
29331e72 LD |
1843 | gcc_unreachable (); |
1844 | } | |
1845 | ||
1846 | void merge_sew_lmul (vsetvl_info &prev, const vsetvl_info &next) | |
1847 | { | |
1848 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1849 | sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand (); | |
1850 | sew_lmul_demand_type next_flags = next.get_sew_lmul_demand (); | |
1851 | #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1852 | AVAILABLE_P, FUSE) \ | |
1853 | if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \ | |
1854 | && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \ | |
1855 | { \ | |
1856 | gcc_assert (COMPATIBLE_P (prev, next)); \ | |
1857 | FUSE (prev, next); \ | |
1858 | prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \ | |
1859 | return; \ | |
1860 | } | |
9243c3d1 | 1861 | |
29331e72 | 1862 | #include "riscv-vsetvl.def" |
9243c3d1 | 1863 | |
29331e72 LD |
1864 | gcc_unreachable (); |
1865 | } | |
9243c3d1 | 1866 | |
29331e72 LD |
1867 | bool policy_compatible_p (const vsetvl_info &prev, const vsetvl_info &next) |
1868 | { | |
1869 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1870 | policy_demand_type prev_flags = prev.get_policy_demand (); | |
1871 | policy_demand_type next_flags = next.get_policy_demand (); | |
1872 | #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1873 | AVAILABLE_P, FUSE) \ | |
1874 | if (prev_flags == policy_demand_type::PREV_FLAGS \ | |
1875 | && next_flags == policy_demand_type::NEXT_FLAGS) \ | |
1876 | return COMPATIBLE_P (prev, next); | |
9243c3d1 | 1877 | |
29331e72 | 1878 | #include "riscv-vsetvl.def" |
9243c3d1 | 1879 | |
29331e72 LD |
1880 | gcc_unreachable (); |
1881 | } | |
4f673c5e | 1882 | |
29331e72 LD |
1883 | bool policy_available_p (const vsetvl_info &prev, const vsetvl_info &next) |
1884 | { | |
1885 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1886 | policy_demand_type prev_flags = prev.get_policy_demand (); | |
1887 | policy_demand_type next_flags = next.get_policy_demand (); | |
1888 | #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1889 | AVAILABLE_P, FUSE) \ | |
1890 | if (prev_flags == policy_demand_type::PREV_FLAGS \ | |
1891 | && next_flags == policy_demand_type::NEXT_FLAGS) \ | |
1892 | return AVAILABLE_P (prev, next); | |
4f673c5e | 1893 | |
29331e72 | 1894 | #include "riscv-vsetvl.def" |
9243c3d1 | 1895 | |
29331e72 LD |
1896 | gcc_unreachable (); |
1897 | } | |
1898 | ||
1899 | void merge_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1900 | { | |
1901 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1902 | policy_demand_type prev_flags = prev.get_policy_demand (); | |
1903 | policy_demand_type next_flags = next.get_policy_demand (); | |
1904 | #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1905 | AVAILABLE_P, FUSE) \ | |
1906 | if (prev_flags == policy_demand_type::PREV_FLAGS \ | |
1907 | && next_flags == policy_demand_type::NEXT_FLAGS) \ | |
1908 | { \ | |
1909 | gcc_assert (COMPATIBLE_P (prev, next)); \ | |
1910 | FUSE (prev, next); \ | |
1911 | prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \ | |
1912 | return; \ | |
1913 | } | |
9243c3d1 | 1914 | |
29331e72 | 1915 | #include "riscv-vsetvl.def" |
ec99ffab | 1916 | |
29331e72 LD |
1917 | gcc_unreachable (); |
1918 | } | |
9243c3d1 | 1919 | |
d82bb518 JZ |
1920 | bool vl_not_in_conflict_p (const vsetvl_info &prev, const vsetvl_info &next) |
1921 | { | |
1922 | /* We don't fuse this following case: | |
1923 | ||
1924 | li a5, -1 | |
1925 | vmv.s.x v0, a5 -- PREV | |
1926 | vsetvli a5, ... -- NEXT | |
1927 | ||
1928 | Don't fuse NEXT into PREV. | |
1929 | */ | |
1930 | return !prev.vl_modify_non_avl_op_p (next) | |
1931 | && !next.vl_modify_non_avl_op_p (prev); | |
1932 | } | |
1933 | ||
29331e72 LD |
1934 | bool avl_compatible_p (const vsetvl_info &prev, const vsetvl_info &next) |
1935 | { | |
1936 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1937 | avl_demand_type prev_flags = prev.get_avl_demand (); | |
1938 | avl_demand_type next_flags = next.get_avl_demand (); | |
1939 | #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1940 | AVAILABLE_P, FUSE) \ | |
1941 | if (prev_flags == avl_demand_type::PREV_FLAGS \ | |
1942 | && next_flags == avl_demand_type::NEXT_FLAGS) \ | |
1943 | return COMPATIBLE_P (prev, next); | |
9243c3d1 | 1944 | |
29331e72 | 1945 | #include "riscv-vsetvl.def" |
9243c3d1 | 1946 | |
29331e72 LD |
1947 | gcc_unreachable (); |
1948 | } | |
9243c3d1 | 1949 | |
29331e72 LD |
1950 | bool avl_available_p (const vsetvl_info &prev, const vsetvl_info &next) |
1951 | { | |
1952 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1953 | avl_demand_type prev_flags = prev.get_avl_demand (); | |
1954 | avl_demand_type next_flags = next.get_avl_demand (); | |
1955 | #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1956 | AVAILABLE_P, FUSE) \ | |
1957 | if (prev_flags == avl_demand_type::PREV_FLAGS \ | |
1958 | && next_flags == avl_demand_type::NEXT_FLAGS) \ | |
1959 | return AVAILABLE_P (prev, next); | |
9243c3d1 | 1960 | |
29331e72 | 1961 | #include "riscv-vsetvl.def" |
9243c3d1 | 1962 | |
29331e72 LD |
1963 | gcc_unreachable (); |
1964 | } | |
1965 | ||
1966 | void merge_avl (vsetvl_info &prev, const vsetvl_info &next) | |
1967 | { | |
1968 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1969 | avl_demand_type prev_flags = prev.get_avl_demand (); | |
1970 | avl_demand_type next_flags = next.get_avl_demand (); | |
1971 | #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1972 | AVAILABLE_P, FUSE) \ | |
1973 | if (prev_flags == avl_demand_type::PREV_FLAGS \ | |
1974 | && next_flags == avl_demand_type::NEXT_FLAGS) \ | |
1975 | { \ | |
1976 | gcc_assert (COMPATIBLE_P (prev, next)); \ | |
1977 | FUSE (prev, next); \ | |
1978 | prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \ | |
1979 | return; \ | |
60bd33bc JZZ |
1980 | } |
1981 | ||
29331e72 | 1982 | #include "riscv-vsetvl.def" |
9243c3d1 | 1983 | |
29331e72 LD |
1984 | gcc_unreachable (); |
1985 | } | |
1986 | ||
1987 | bool compatible_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1988 | { | |
1989 | bool compatible_p = sew_lmul_compatible_p (prev, next) | |
1990 | && policy_compatible_p (prev, next) | |
d82bb518 JZ |
1991 | && avl_compatible_p (prev, next) |
1992 | && vl_not_in_conflict_p (prev, next); | |
29331e72 LD |
1993 | return compatible_p; |
1994 | } | |
1995 | ||
1996 | bool available_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1997 | { | |
1998 | bool available_p = sew_lmul_available_p (prev, next) | |
1999 | && policy_available_p (prev, next) | |
d82bb518 JZ |
2000 | && avl_available_p (prev, next) |
2001 | && vl_not_in_conflict_p (prev, next); | |
29331e72 LD |
2002 | gcc_assert (!available_p || compatible_p (prev, next)); |
2003 | return available_p; | |
2004 | } | |
2005 | ||
2006 | void merge (vsetvl_info &prev, const vsetvl_info &next) | |
2007 | { | |
2008 | gcc_assert (compatible_p (prev, next)); | |
2009 | merge_sew_lmul (prev, next); | |
2010 | merge_policy (prev, next); | |
2011 | merge_avl (prev, next); | |
2012 | gcc_assert (available_p (prev, next)); | |
2013 | } | |
2014 | }; | |
9243c3d1 | 2015 | |
9243c3d1 | 2016 | |
29331e72 | 2017 | class pre_vsetvl |
9243c3d1 | 2018 | { |
29331e72 LD |
2019 | private: |
2020 | demand_system m_dem; | |
2021 | auto_vec<vsetvl_block_info> m_vector_block_infos; | |
2022 | ||
2023 | /* data for avl reaching defintion. */ | |
2024 | sbitmap m_avl_regs; | |
2025 | sbitmap *m_avl_def_in; | |
2026 | sbitmap *m_avl_def_out; | |
2027 | sbitmap *m_reg_def_loc; | |
2028 | ||
2029 | /* data for vsetvl info reaching defintion. */ | |
2030 | vsetvl_info m_unknow_info; | |
2031 | auto_vec<vsetvl_info *> m_vsetvl_def_exprs; | |
2032 | sbitmap *m_vsetvl_def_in; | |
2033 | sbitmap *m_vsetvl_def_out; | |
2034 | ||
2035 | /* data for lcm */ | |
2036 | auto_vec<vsetvl_info *> m_exprs; | |
2037 | sbitmap *m_avloc; | |
2038 | sbitmap *m_avin; | |
2039 | sbitmap *m_avout; | |
2040 | sbitmap *m_kill; | |
2041 | sbitmap *m_antloc; | |
2042 | sbitmap *m_transp; | |
2043 | sbitmap *m_insert; | |
2044 | sbitmap *m_del; | |
2045 | struct edge_list *m_edges; | |
2046 | ||
2047 | auto_vec<vsetvl_info> m_delete_list; | |
2048 | ||
2049 | vsetvl_block_info &get_block_info (const bb_info *bb) | |
2050 | { | |
2051 | return m_vector_block_infos[bb->index ()]; | |
2052 | } | |
2053 | const vsetvl_block_info &get_block_info (const basic_block bb) const | |
2054 | { | |
2055 | return m_vector_block_infos[bb->index]; | |
2056 | } | |
2057 | ||
2058 | vsetvl_block_info &get_block_info (const basic_block bb) | |
2059 | { | |
2060 | return m_vector_block_infos[bb->index]; | |
2061 | } | |
2062 | ||
2063 | void add_expr (auto_vec<vsetvl_info *> &m_exprs, vsetvl_info &info) | |
2064 | { | |
2065 | for (vsetvl_info *item : m_exprs) | |
2066 | { | |
2067 | if (*item == info) | |
2068 | return; | |
2069 | } | |
2070 | m_exprs.safe_push (&info); | |
2071 | } | |
2072 | ||
2073 | unsigned get_expr_index (auto_vec<vsetvl_info *> &m_exprs, | |
2074 | const vsetvl_info &info) | |
2075 | { | |
2076 | for (size_t i = 0; i < m_exprs.length (); i += 1) | |
2077 | { | |
2078 | if (*m_exprs[i] == info) | |
2079 | return i; | |
2080 | } | |
2081 | gcc_unreachable (); | |
2082 | } | |
2083 | ||
c9d5b46a | 2084 | bool anticipated_exp_p (const vsetvl_info &header_info) |
29331e72 LD |
2085 | { |
2086 | if (!header_info.has_nonvlmax_reg_avl () && !header_info.has_vl ()) | |
2087 | return true; | |
9243c3d1 | 2088 | |
29331e72 LD |
2089 | bb_info *bb = header_info.get_bb (); |
2090 | insn_info *prev_insn = bb->head_insn (); | |
2091 | insn_info *next_insn = header_info.insn_inside_bb_p () | |
2092 | ? header_info.get_insn () | |
2093 | : header_info.get_bb ()->end_insn (); | |
2094 | ||
2095 | return m_dem.avl_vl_unmodified_between_p (prev_insn, next_insn, | |
2096 | header_info); | |
2097 | } | |
2098 | ||
2099 | bool available_exp_p (const vsetvl_info &prev_info, | |
2100 | const vsetvl_info &next_info) | |
2101 | { | |
2102 | return m_dem.available_p (prev_info, next_info); | |
2103 | } | |
2104 | ||
2105 | void compute_probabilities () | |
2106 | { | |
2107 | edge e; | |
2108 | edge_iterator ei; | |
2109 | ||
2110 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2111 | { | |
2112 | basic_block cfg_bb = bb->cfg_bb (); | |
2113 | auto &curr_prob = get_block_info (cfg_bb).probability; | |
2114 | ||
2115 | /* GCC assume entry block (bb 0) are always so | |
2116 | executed so set its probability as "always". */ | |
2117 | if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) | |
2118 | curr_prob = profile_probability::always (); | |
2119 | /* Exit block (bb 1) is the block we don't need to process. */ | |
2120 | if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) | |
2121 | continue; | |
9243c3d1 | 2122 | |
29331e72 LD |
2123 | gcc_assert (curr_prob.initialized_p ()); |
2124 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
2125 | { | |
2126 | auto &new_prob = get_block_info (e->dest).probability; | |
2127 | /* Normally, the edge probability should be initialized. | |
2128 | However, some special testing code which is written in | |
2129 | GIMPLE IR style force the edge probility uninitialized, | |
2130 | we conservatively set it as never so that it will not | |
2131 | affect PRE (Phase 3 && Phse 4). */ | |
2132 | if (!e->probability.initialized_p ()) | |
2133 | new_prob = profile_probability::never (); | |
2134 | else if (!new_prob.initialized_p ()) | |
2135 | new_prob = curr_prob * e->probability; | |
2136 | else if (new_prob == profile_probability::always ()) | |
2137 | continue; | |
2138 | else | |
2139 | new_prob += curr_prob * e->probability; | |
2140 | } | |
2141 | } | |
2142 | } | |
2143 | ||
2144 | void insert_vsetvl_insn (enum emit_type emit_type, const vsetvl_info &info) | |
2145 | { | |
2146 | rtx pat = info.get_vsetvl_pat (); | |
2147 | rtx_insn *rinsn = info.get_insn ()->rtl (); | |
2148 | ||
2149 | if (emit_type == EMIT_DIRECT) | |
2150 | { | |
2151 | emit_insn (pat); | |
2152 | if (dump_file) | |
2153 | { | |
2154 | fprintf (dump_file, " Insert vsetvl insn %d:\n", | |
2155 | INSN_UID (get_last_insn ())); | |
2156 | print_rtl_single (dump_file, get_last_insn ()); | |
2157 | } | |
2158 | } | |
2159 | else if (emit_type == EMIT_BEFORE) | |
2160 | { | |
2161 | emit_insn_before (pat, rinsn); | |
2162 | if (dump_file) | |
2163 | { | |
2164 | fprintf (dump_file, " Insert vsetvl insn before insn %d:\n", | |
2165 | INSN_UID (rinsn)); | |
2166 | print_rtl_single (dump_file, PREV_INSN (rinsn)); | |
2167 | } | |
2168 | } | |
2169 | else | |
2170 | { | |
2171 | emit_insn_after (pat, rinsn); | |
2172 | if (dump_file) | |
2173 | { | |
2174 | fprintf (dump_file, " Insert vsetvl insn after insn %d:\n", | |
2175 | INSN_UID (rinsn)); | |
2176 | print_rtl_single (dump_file, NEXT_INSN (rinsn)); | |
2177 | } | |
2178 | } | |
2179 | } | |
2180 | ||
2181 | void change_vsetvl_insn (const vsetvl_info &info) | |
2182 | { | |
2183 | rtx_insn *rinsn = info.get_insn ()->rtl (); | |
2184 | rtx new_pat = info.get_vsetvl_pat (); | |
2185 | ||
2186 | if (dump_file) | |
2187 | { | |
2188 | fprintf (dump_file, " Change insn %d from:\n", INSN_UID (rinsn)); | |
2189 | print_rtl_single (dump_file, rinsn); | |
2190 | } | |
2191 | ||
2192 | validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false); | |
2193 | ||
2194 | if (dump_file) | |
2195 | { | |
2196 | fprintf (dump_file, "\n to:\n"); | |
2197 | print_rtl_single (dump_file, rinsn); | |
2198 | } | |
2199 | } | |
2200 | ||
2201 | void remove_vsetvl_insn (const vsetvl_info &info) | |
2202 | { | |
2203 | rtx_insn *rinsn = info.get_insn ()->rtl (); | |
2204 | if (dump_file) | |
2205 | { | |
2206 | fprintf (dump_file, " Eliminate insn %d:\n", INSN_UID (rinsn)); | |
2207 | print_rtl_single (dump_file, rinsn); | |
2208 | } | |
2209 | if (in_sequence_p ()) | |
2210 | remove_insn (rinsn); | |
2211 | else | |
2212 | delete_insn (rinsn); | |
2213 | } | |
2214 | ||
2215 | bool successors_probability_equal_p (const basic_block cfg_bb) const | |
2216 | { | |
2217 | edge e; | |
2218 | edge_iterator ei; | |
2219 | profile_probability prob = profile_probability::uninitialized (); | |
2220 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
2221 | { | |
2222 | if (prob == profile_probability::uninitialized ()) | |
2223 | prob = m_vector_block_infos[e->dest->index].probability; | |
2224 | else if (prob == m_vector_block_infos[e->dest->index].probability) | |
2225 | continue; | |
2226 | else | |
2227 | /* We pick the highest probability among those incompatible VSETVL | |
2228 | infos. When all incompatible VSTEVL infos have same probability, we | |
2229 | don't pick any of them. */ | |
2230 | return false; | |
2231 | } | |
ec99ffab | 2232 | return true; |
29331e72 LD |
2233 | } |
2234 | ||
923a67f1 | 2235 | bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info) |
29331e72 LD |
2236 | { |
2237 | gcc_assert ( | |
2238 | !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()])); | |
2239 | ||
2240 | unsigned expr_index; | |
2241 | sbitmap_iterator sbi; | |
2242 | EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[curr_info.get_bb ()->index ()], 0, | |
2243 | expr_index, sbi) | |
2244 | { | |
2245 | const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index]; | |
2246 | if (!prev_info.valid_p () | |
923a67f1 JZ |
2247 | || !m_dem.avl_available_p (prev_info, curr_info) |
2248 | || prev_info.get_ratio () != curr_info.get_ratio ()) | |
29331e72 LD |
2249 | return false; |
2250 | } | |
005fad9d | 2251 | |
005fad9d | 2252 | return true; |
29331e72 | 2253 | } |
005fad9d | 2254 | |
29331e72 LD |
2255 | public: |
2256 | pre_vsetvl () | |
2257 | : m_avl_def_in (nullptr), m_avl_def_out (nullptr), | |
2258 | m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr), | |
2259 | m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr), | |
2260 | m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr) | |
2261 | { | |
2262 | /* Initialization of RTL_SSA. */ | |
2263 | calculate_dominance_info (CDI_DOMINATORS); | |
2264 | df_analyze (); | |
2265 | crtl->ssa = new function_info (cfun); | |
2266 | m_vector_block_infos.safe_grow_cleared (last_basic_block_for_fn (cfun)); | |
2267 | compute_probabilities (); | |
2268 | m_unknow_info.set_unknown (); | |
2269 | } | |
2270 | ||
2271 | void finish () | |
2272 | { | |
2273 | free_dominance_info (CDI_DOMINATORS); | |
2274 | if (crtl->ssa->perform_pending_updates ()) | |
2275 | cleanup_cfg (0); | |
2276 | delete crtl->ssa; | |
2277 | crtl->ssa = nullptr; | |
2278 | ||
2279 | if (m_avl_regs) | |
2280 | sbitmap_free (m_avl_regs); | |
2281 | if (m_reg_def_loc) | |
2282 | sbitmap_vector_free (m_reg_def_loc); | |
2283 | ||
2284 | if (m_avl_def_in) | |
2285 | sbitmap_vector_free (m_avl_def_in); | |
2286 | if (m_avl_def_out) | |
2287 | sbitmap_vector_free (m_avl_def_out); | |
2288 | ||
2289 | if (m_vsetvl_def_in) | |
2290 | sbitmap_vector_free (m_vsetvl_def_in); | |
2291 | if (m_vsetvl_def_out) | |
2292 | sbitmap_vector_free (m_vsetvl_def_out); | |
2293 | ||
2294 | if (m_avloc) | |
2295 | sbitmap_vector_free (m_avloc); | |
2296 | if (m_kill) | |
2297 | sbitmap_vector_free (m_kill); | |
2298 | if (m_antloc) | |
2299 | sbitmap_vector_free (m_antloc); | |
2300 | if (m_transp) | |
2301 | sbitmap_vector_free (m_transp); | |
2302 | if (m_insert) | |
2303 | sbitmap_vector_free (m_insert); | |
2304 | if (m_del) | |
2305 | sbitmap_vector_free (m_del); | |
2306 | if (m_avin) | |
2307 | sbitmap_vector_free (m_avin); | |
2308 | if (m_avout) | |
2309 | sbitmap_vector_free (m_avout); | |
2310 | ||
2311 | if (m_edges) | |
2312 | free_edge_list (m_edges); | |
2313 | } | |
2314 | ||
2315 | void compute_avl_def_data (); | |
2316 | void compute_vsetvl_def_data (); | |
2317 | void compute_lcm_local_properties (); | |
2318 | ||
2319 | void fuse_local_vsetvl_info (); | |
2320 | bool earliest_fuse_vsetvl_info (); | |
2321 | void pre_global_vsetvl_info (); | |
2322 | void emit_vsetvl (); | |
2323 | void cleaup (); | |
2324 | void remove_avl_operand (); | |
2325 | void remove_unused_dest_operand (); | |
2326 | ||
2327 | void dump (FILE *file, const char *title) const | |
2328 | { | |
2329 | fprintf (file, "\nVSETVL infos after %s\n\n", title); | |
2330 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2331 | { | |
2332 | const auto &block_info = m_vector_block_infos[bb->index ()]; | |
2333 | fprintf (file, " bb %d:\n", bb->index ()); | |
2334 | fprintf (file, " probability: "); | |
2335 | block_info.probability.dump (file); | |
2336 | fprintf (file, "\n"); | |
2337 | if (!block_info.empty_p ()) | |
2338 | { | |
2339 | fprintf (file, " Header vsetvl info:"); | |
2340 | block_info.get_entry_info ().dump (file, " "); | |
2341 | fprintf (file, " Footer vsetvl info:"); | |
2342 | block_info.get_exit_info ().dump (file, " "); | |
4fd09aed | 2343 | for (const auto &info : block_info.local_infos) |
29331e72 LD |
2344 | { |
2345 | fprintf (file, | |
2346 | " insn %d vsetvl info:", info.get_insn ()->uid ()); | |
2347 | info.dump (file, " "); | |
2348 | } | |
2349 | } | |
2350 | } | |
2351 | } | |
2352 | }; | |
c139f5e1 | 2353 | |
e030af3e | 2354 | void |
29331e72 | 2355 | pre_vsetvl::compute_avl_def_data () |
e030af3e | 2356 | { |
29331e72 LD |
2357 | if (bitmap_empty_p (m_avl_regs)) |
2358 | return; | |
e030af3e | 2359 | |
29331e72 LD |
2360 | unsigned num_regs = GP_REG_LAST + 1; |
2361 | unsigned num_bbs = last_basic_block_for_fn (cfun); | |
9243c3d1 | 2362 | |
29331e72 LD |
2363 | sbitmap *avl_def_loc_temp = sbitmap_vector_alloc (num_bbs, num_regs); |
2364 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
9243c3d1 | 2365 | { |
29331e72 LD |
2366 | bitmap_and (avl_def_loc_temp[bb->index ()], m_avl_regs, |
2367 | m_reg_def_loc[bb->index ()]); | |
2368 | ||
2369 | vsetvl_block_info &block_info = get_block_info (bb); | |
2370 | if (block_info.has_info ()) | |
9243c3d1 | 2371 | { |
29331e72 LD |
2372 | vsetvl_info &footer_info = block_info.get_exit_info (); |
2373 | gcc_assert (footer_info.valid_p ()); | |
2374 | if (footer_info.has_vl ()) | |
2375 | bitmap_set_bit (avl_def_loc_temp[bb->index ()], | |
2376 | REGNO (footer_info.get_vl ())); | |
9243c3d1 JZZ |
2377 | } |
2378 | } | |
9243c3d1 | 2379 | |
29331e72 LD |
2380 | if (m_avl_def_in) |
2381 | sbitmap_vector_free (m_avl_def_in); | |
2382 | if (m_avl_def_out) | |
2383 | sbitmap_vector_free (m_avl_def_out); | |
9243c3d1 | 2384 | |
29331e72 LD |
2385 | unsigned num_exprs = num_bbs * num_regs; |
2386 | sbitmap *avl_def_loc = sbitmap_vector_alloc (num_bbs, num_exprs); | |
2387 | sbitmap *m_kill = sbitmap_vector_alloc (num_bbs, num_exprs); | |
2388 | m_avl_def_in = sbitmap_vector_alloc (num_bbs, num_exprs); | |
2389 | m_avl_def_out = sbitmap_vector_alloc (num_bbs, num_exprs); | |
9243c3d1 | 2390 | |
29331e72 LD |
2391 | bitmap_vector_clear (avl_def_loc, num_bbs); |
2392 | bitmap_vector_clear (m_kill, num_bbs); | |
2393 | bitmap_vector_clear (m_avl_def_out, num_bbs); | |
2394 | ||
2395 | unsigned regno; | |
2396 | sbitmap_iterator sbi; | |
2397 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2398 | EXECUTE_IF_SET_IN_BITMAP (avl_def_loc_temp[bb->index ()], 0, regno, sbi) | |
2399 | { | |
2400 | bitmap_set_bit (avl_def_loc[bb->index ()], | |
2401 | get_expr_id (bb->index (), regno, num_bbs)); | |
2402 | bitmap_set_range (m_kill[bb->index ()], regno * num_bbs, num_bbs); | |
2403 | } | |
2404 | ||
2405 | basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
2406 | EXECUTE_IF_SET_IN_BITMAP (m_avl_regs, 0, regno, sbi) | |
2407 | bitmap_set_bit (m_avl_def_out[entry->index], | |
2408 | get_expr_id (entry->index, regno, num_bbs)); | |
2409 | ||
2410 | compute_reaching_defintion (avl_def_loc, m_kill, m_avl_def_in, m_avl_def_out); | |
2411 | ||
2412 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
9243c3d1 | 2413 | { |
29331e72 LD |
2414 | fprintf (dump_file, |
2415 | " Compute avl reaching defition data (num_bbs %d, num_regs " | |
2416 | "%d):\n\n", | |
2417 | num_bbs, num_regs); | |
2418 | fprintf (dump_file, " avl_regs: "); | |
2419 | dump_bitmap_file (dump_file, m_avl_regs); | |
2420 | fprintf (dump_file, "\n bitmap data:\n"); | |
2421 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
9243c3d1 | 2422 | { |
29331e72 LD |
2423 | unsigned int i = bb->index (); |
2424 | fprintf (dump_file, " BB %u:\n", i); | |
2425 | fprintf (dump_file, " avl_def_loc:"); | |
2426 | unsigned expr_id; | |
2427 | sbitmap_iterator sbi; | |
2428 | EXECUTE_IF_SET_IN_BITMAP (avl_def_loc[i], 0, expr_id, sbi) | |
ec99ffab | 2429 | { |
29331e72 LD |
2430 | fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs), |
2431 | get_bb_index (expr_id, num_bbs)); | |
2432 | } | |
2433 | fprintf (dump_file, "\n kill:"); | |
2434 | EXECUTE_IF_SET_IN_BITMAP (m_kill[i], 0, expr_id, sbi) | |
2435 | { | |
2436 | fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs), | |
2437 | get_bb_index (expr_id, num_bbs)); | |
2438 | } | |
2439 | fprintf (dump_file, "\n avl_def_in:"); | |
2440 | EXECUTE_IF_SET_IN_BITMAP (m_avl_def_in[i], 0, expr_id, sbi) | |
2441 | { | |
2442 | fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs), | |
2443 | get_bb_index (expr_id, num_bbs)); | |
2444 | } | |
2445 | fprintf (dump_file, "\n avl_def_out:"); | |
2446 | EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[i], 0, expr_id, sbi) | |
2447 | { | |
2448 | fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs), | |
2449 | get_bb_index (expr_id, num_bbs)); | |
ec99ffab | 2450 | } |
29331e72 | 2451 | fprintf (dump_file, "\n"); |
9243c3d1 JZZ |
2452 | } |
2453 | } | |
2454 | ||
29331e72 LD |
2455 | sbitmap_vector_free (avl_def_loc); |
2456 | sbitmap_vector_free (m_kill); | |
2457 | sbitmap_vector_free (avl_def_loc_temp); | |
9243c3d1 | 2458 | |
29331e72 | 2459 | m_dem.set_avl_in_out_data (m_avl_def_in, m_avl_def_out); |
9243c3d1 JZZ |
2460 | } |
2461 | ||
9243c3d1 | 2462 | void |
29331e72 | 2463 | pre_vsetvl::compute_vsetvl_def_data () |
9243c3d1 | 2464 | { |
29331e72 LD |
2465 | m_vsetvl_def_exprs.truncate (0); |
2466 | add_expr (m_vsetvl_def_exprs, m_unknow_info); | |
2467 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
9243c3d1 | 2468 | { |
29331e72 LD |
2469 | vsetvl_block_info &block_info = get_block_info (bb); |
2470 | if (block_info.empty_p ()) | |
2471 | continue; | |
2472 | vsetvl_info &footer_info = block_info.get_exit_info (); | |
2473 | gcc_assert (footer_info.valid_p () || footer_info.unknown_p ()); | |
2474 | add_expr (m_vsetvl_def_exprs, footer_info); | |
9243c3d1 JZZ |
2475 | } |
2476 | ||
29331e72 LD |
2477 | if (m_vsetvl_def_in) |
2478 | sbitmap_vector_free (m_vsetvl_def_in); | |
2479 | if (m_vsetvl_def_out) | |
2480 | sbitmap_vector_free (m_vsetvl_def_out); | |
9243c3d1 | 2481 | |
29331e72 LD |
2482 | sbitmap *def_loc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
2483 | m_vsetvl_def_exprs.length ()); | |
2484 | sbitmap *m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2485 | m_vsetvl_def_exprs.length ()); | |
9243c3d1 | 2486 | |
29331e72 LD |
2487 | m_vsetvl_def_in = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
2488 | m_vsetvl_def_exprs.length ()); | |
2489 | m_vsetvl_def_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2490 | m_vsetvl_def_exprs.length ()); | |
9243c3d1 | 2491 | |
29331e72 LD |
2492 | bitmap_vector_clear (def_loc, last_basic_block_for_fn (cfun)); |
2493 | bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun)); | |
2494 | bitmap_vector_clear (m_vsetvl_def_out, last_basic_block_for_fn (cfun)); | |
9243c3d1 | 2495 | |
29331e72 LD |
2496 | for (const bb_info *bb : crtl->ssa->bbs ()) |
2497 | { | |
2498 | vsetvl_block_info &block_info = get_block_info (bb); | |
2499 | if (block_info.empty_p ()) | |
9243c3d1 | 2500 | { |
29331e72 | 2501 | for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i += 1) |
9243c3d1 | 2502 | { |
29331e72 LD |
2503 | const vsetvl_info &info = *m_vsetvl_def_exprs[i]; |
2504 | if (!info.has_nonvlmax_reg_avl ()) | |
2505 | continue; | |
2506 | unsigned int regno; | |
2507 | sbitmap_iterator sbi; | |
2508 | EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0, regno, | |
2509 | sbi) | |
2510 | if (regno == REGNO (info.get_avl ())) | |
2511 | { | |
2512 | bitmap_set_bit (m_kill[bb->index ()], i); | |
2513 | bitmap_set_bit (def_loc[bb->index ()], | |
2514 | get_expr_index (m_vsetvl_def_exprs, | |
2515 | m_unknow_info)); | |
2516 | } | |
9243c3d1 | 2517 | } |
29331e72 | 2518 | continue; |
9243c3d1 JZZ |
2519 | } |
2520 | ||
29331e72 LD |
2521 | vsetvl_info &footer_info = block_info.get_exit_info (); |
2522 | bitmap_ones (m_kill[bb->index ()]); | |
2523 | bitmap_set_bit (def_loc[bb->index ()], | |
2524 | get_expr_index (m_vsetvl_def_exprs, footer_info)); | |
9243c3d1 JZZ |
2525 | } |
2526 | ||
29331e72 LD |
2527 | /* Set the def_out of the ENTRY basic block to m_unknow_info expr. */ |
2528 | basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
2529 | bitmap_set_bit (m_vsetvl_def_out[entry->index], | |
2530 | get_expr_index (m_vsetvl_def_exprs, m_unknow_info)); | |
9243c3d1 | 2531 | |
29331e72 LD |
2532 | compute_reaching_defintion (def_loc, m_kill, m_vsetvl_def_in, |
2533 | m_vsetvl_def_out); | |
2534 | ||
2535 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
e030af3e | 2536 | { |
29331e72 LD |
2537 | fprintf (dump_file, |
2538 | "\n Compute vsetvl info reaching defition data:\n\n"); | |
2539 | fprintf (dump_file, " Expression List (%d):\n", | |
2540 | m_vsetvl_def_exprs.length ()); | |
2541 | for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i++) | |
2542 | { | |
2543 | const auto &info = *m_vsetvl_def_exprs[i]; | |
2544 | fprintf (dump_file, " Expr[%u]: ", i); | |
2545 | info.dump (dump_file, " "); | |
2546 | } | |
2547 | fprintf (dump_file, "\n bitmap data:\n"); | |
2548 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2549 | { | |
2550 | unsigned int i = bb->index (); | |
2551 | fprintf (dump_file, " BB %u:\n", i); | |
2552 | fprintf (dump_file, " def_loc: "); | |
2553 | dump_bitmap_file (dump_file, def_loc[i]); | |
2554 | fprintf (dump_file, " kill: "); | |
2555 | dump_bitmap_file (dump_file, m_kill[i]); | |
2556 | fprintf (dump_file, " vsetvl_def_in: "); | |
2557 | dump_bitmap_file (dump_file, m_vsetvl_def_in[i]); | |
2558 | fprintf (dump_file, " vsetvl_def_out: "); | |
2559 | dump_bitmap_file (dump_file, m_vsetvl_def_out[i]); | |
2560 | } | |
e030af3e | 2561 | } |
4f673c5e | 2562 | |
29331e72 | 2563 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2564 | { |
29331e72 LD |
2565 | vsetvl_block_info &block_info = get_block_info (bb); |
2566 | if (block_info.empty_p ()) | |
2567 | continue; | |
2568 | vsetvl_info &curr_info = block_info.get_entry_info (); | |
2569 | if (!curr_info.valid_p ()) | |
2570 | continue; | |
2571 | ||
2572 | unsigned int expr_index; | |
2573 | sbitmap_iterator sbi; | |
2574 | gcc_assert ( | |
2575 | !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()])); | |
2576 | bool full_available = true; | |
2577 | EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[bb->index ()], 0, expr_index, | |
2578 | sbi) | |
4f673c5e | 2579 | { |
29331e72 LD |
2580 | vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index]; |
2581 | if (!prev_info.valid_p () | |
2582 | || !m_dem.available_p (prev_info, curr_info)) | |
2583 | { | |
2584 | full_available = false; | |
2585 | break; | |
2586 | } | |
4f673c5e | 2587 | } |
29331e72 | 2588 | block_info.full_available = full_available; |
4f673c5e | 2589 | } |
29331e72 LD |
2590 | |
2591 | sbitmap_vector_free (def_loc); | |
2592 | sbitmap_vector_free (m_kill); | |
e030af3e | 2593 | } |
9243c3d1 | 2594 | |
e030af3e | 2595 | /* Compute the local properties of each recorded expression. |
6b6b9c68 | 2596 | |
e030af3e JZ |
2597 | Local properties are those that are defined by the block, irrespective of |
2598 | other blocks. | |
6b6b9c68 | 2599 | |
e030af3e JZ |
2600 | An expression is transparent in a block if its operands are not modified |
2601 | in the block. | |
6b6b9c68 | 2602 | |
e030af3e JZ |
2603 | An expression is computed (locally available) in a block if it is computed |
2604 | at least once and expression would contain the same value if the | |
2605 | computation was moved to the end of the block. | |
2606 | ||
2607 | An expression is locally anticipatable in a block if it is computed at | |
2608 | least once and expression would contain the same value if the computation | |
2609 | was moved to the beginning of the block. */ | |
2610 | void | |
29331e72 | 2611 | pre_vsetvl::compute_lcm_local_properties () |
6b6b9c68 | 2612 | { |
29331e72 LD |
2613 | m_exprs.truncate (0); |
2614 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2615 | { | |
2616 | vsetvl_block_info &block_info = get_block_info (bb); | |
2617 | if (block_info.empty_p ()) | |
2618 | continue; | |
2619 | vsetvl_info &header_info = block_info.get_entry_info (); | |
2620 | vsetvl_info &footer_info = block_info.get_exit_info (); | |
2621 | gcc_assert (footer_info.valid_p () || footer_info.unknown_p ()); | |
2622 | add_expr (m_exprs, header_info); | |
2623 | add_expr (m_exprs, footer_info); | |
2624 | } | |
2625 | ||
2626 | int num_exprs = m_exprs.length (); | |
2627 | if (m_avloc) | |
2628 | sbitmap_vector_free (m_avloc); | |
2629 | if (m_kill) | |
2630 | sbitmap_vector_free (m_kill); | |
2631 | if (m_antloc) | |
2632 | sbitmap_vector_free (m_antloc); | |
2633 | if (m_transp) | |
2634 | sbitmap_vector_free (m_transp); | |
2635 | if (m_avin) | |
2636 | sbitmap_vector_free (m_avin); | |
2637 | if (m_avout) | |
2638 | sbitmap_vector_free (m_avout); | |
2639 | ||
2640 | m_avloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2641 | m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2642 | m_antloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2643 | m_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2644 | m_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2645 | m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2646 | ||
2647 | bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun)); | |
2648 | bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun)); | |
2649 | bitmap_vector_clear (m_transp, last_basic_block_for_fn (cfun)); | |
2650 | ||
e030af3e JZ |
2651 | /* - If T is locally available at the end of a block, then T' must be |
2652 | available at the end of the same block. Since some optimization has | |
2653 | occurred earlier, T' might not be locally available, however, it must | |
2654 | have been previously computed on all paths. As a formula, T at AVLOC(B) | |
2655 | implies that T' at AVOUT(B). | |
2656 | An "available occurrence" is one that is the last occurrence in the | |
2657 | basic block and the operands are not modified by following statements in | |
2658 | the basic block [including this insn]. | |
6b6b9c68 | 2659 | |
e030af3e JZ |
2660 | - If T is locally anticipated at the beginning of a block, then either |
2661 | T', is locally anticipated or it is already available from previous | |
2662 | blocks. As a formula, this means that T at ANTLOC(B) implies that T' at | |
2663 | ANTLOC(B) at AVIN(B). | |
2664 | An "anticipatable occurrence" is one that is the first occurrence in the | |
2665 | basic block, the operands are not modified in the basic block prior | |
2666 | to the occurrence and the output is not used between the start of | |
2667 | the block and the occurrence. */ | |
e030af3e | 2668 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2669 | { |
29331e72 LD |
2670 | unsigned bb_index = bb->index (); |
2671 | vsetvl_block_info &block_info = get_block_info (bb); | |
9243c3d1 | 2672 | |
29331e72 LD |
2673 | /* Compute m_transp */ |
2674 | if (block_info.empty_p ()) | |
9243c3d1 | 2675 | { |
29331e72 LD |
2676 | bitmap_ones (m_transp[bb_index]); |
2677 | for (int i = 0; i < num_exprs; i += 1) | |
4f673c5e | 2678 | { |
29331e72 LD |
2679 | const vsetvl_info &info = *m_exprs[i]; |
2680 | if (!info.has_nonvlmax_reg_avl () && !info.has_vl ()) | |
2681 | continue; | |
2682 | ||
7b2984ad | 2683 | if (info.has_nonvlmax_reg_avl ()) |
29331e72 | 2684 | { |
7b2984ad JZ |
2685 | unsigned int regno; |
2686 | sbitmap_iterator sbi; | |
2687 | EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0, | |
2688 | regno, sbi) | |
2689 | { | |
2690 | if (regno == REGNO (info.get_avl ())) | |
2691 | bitmap_clear_bit (m_transp[bb->index ()], i); | |
2692 | } | |
29331e72 LD |
2693 | } |
2694 | ||
c9d5b46a | 2695 | for (insn_info *insn : bb->real_nondebug_insns ()) |
e030af3e | 2696 | { |
9c16ca93 JZ |
2697 | if (info.has_nonvlmax_reg_avl () |
2698 | && find_access (insn->defs (), REGNO (info.get_avl ()))) | |
e030af3e | 2699 | { |
29331e72 | 2700 | bitmap_clear_bit (m_transp[bb_index], i); |
e030af3e JZ |
2701 | break; |
2702 | } | |
c9d5b46a JZ |
2703 | |
2704 | if (info.has_vl () | |
2705 | && reg_mentioned_p (info.get_vl (), insn->rtl ())) | |
2706 | { | |
2707 | if (find_access (insn->defs (), REGNO (info.get_vl ()))) | |
2708 | /* We can't fuse vsetvl into the blocks that modify the | |
2709 | VL operand since successors of such blocks will need | |
2710 | the value of those blocks are defining. | |
2711 | ||
2712 | bb 4: def a5 | |
2713 | / \ | |
2714 | bb 5:use a5 bb 6:vsetvl a5, 5 | |
2715 | ||
2716 | The example above shows that we can't fuse vsetvl | |
2717 | from bb 6 into bb 4 since the successor bb 5 is using | |
2718 | the value defined in bb 4. */ | |
2719 | ; | |
2720 | else | |
2721 | { | |
2722 | /* We can't fuse vsetvl into the blocks that use the | |
2723 | VL operand which has different value from the | |
2724 | vsetvl info. | |
2725 | ||
2726 | bb 4: def a5 | |
2727 | | | |
2728 | bb 5: use a5 | |
2729 | | | |
2730 | bb 6: def a5 | |
2731 | | | |
2732 | bb 7: use a5 | |
2733 | ||
2734 | The example above shows that we can't fuse vsetvl | |
2735 | from bb 6 into bb 5 since their value is different. | |
2736 | */ | |
2737 | resource_info resource | |
2738 | = full_register (REGNO (info.get_vl ())); | |
2739 | def_lookup dl = crtl->ssa->find_def (resource, insn); | |
2740 | def_info *def | |
2741 | = dl.matching_set_or_last_def_of_prev_group (); | |
db642d60 | 2742 | insn_info *def_insn = extract_single_source (def); |
c9d5b46a JZ |
2743 | if (def_insn && vsetvl_insn_p (def_insn->rtl ())) |
2744 | { | |
2745 | vsetvl_info def_info = vsetvl_info (def_insn); | |
2746 | if (m_dem.compatible_p (def_info, info)) | |
2747 | continue; | |
2748 | } | |
2749 | } | |
2750 | ||
2751 | bitmap_clear_bit (m_transp[bb_index], i); | |
2752 | break; | |
2753 | } | |
e030af3e | 2754 | } |
4f673c5e | 2755 | } |
9243c3d1 | 2756 | |
29331e72 | 2757 | continue; |
9243c3d1 | 2758 | } |
e030af3e | 2759 | |
29331e72 LD |
2760 | vsetvl_info &header_info = block_info.get_entry_info (); |
2761 | vsetvl_info &footer_info = block_info.get_exit_info (); | |
9243c3d1 | 2762 | |
ef21ae5c | 2763 | if (header_info.valid_p () && anticipated_exp_p (header_info)) |
29331e72 LD |
2764 | bitmap_set_bit (m_antloc[bb_index], |
2765 | get_expr_index (m_exprs, header_info)); | |
9243c3d1 | 2766 | |
29331e72 LD |
2767 | if (footer_info.valid_p ()) |
2768 | for (int i = 0; i < num_exprs; i += 1) | |
2769 | { | |
2770 | const vsetvl_info &info = *m_exprs[i]; | |
2771 | if (!info.valid_p ()) | |
2772 | continue; | |
2773 | if (available_exp_p (footer_info, info)) | |
2774 | bitmap_set_bit (m_avloc[bb_index], i); | |
2775 | } | |
2776 | } | |
9243c3d1 | 2777 | |
29331e72 | 2778 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2779 | { |
29331e72 LD |
2780 | unsigned bb_index = bb->index (); |
2781 | bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]); | |
2782 | bitmap_not (m_kill[bb_index], m_kill[bb_index]); | |
9243c3d1 JZZ |
2783 | } |
2784 | ||
29331e72 | 2785 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2786 | { |
29331e72 | 2787 | unsigned bb_index = bb->index (); |
9243c3d1 JZZ |
2788 | edge e; |
2789 | edge_iterator ei; | |
29331e72 | 2790 | FOR_EACH_EDGE (e, ei, bb->cfg_bb ()->preds) |
9243c3d1 JZZ |
2791 | if (e->flags & EDGE_COMPLEX) |
2792 | { | |
29331e72 LD |
2793 | bitmap_clear (m_antloc[bb_index]); |
2794 | bitmap_clear (m_transp[bb_index]); | |
9243c3d1 JZZ |
2795 | } |
2796 | } | |
2797 | } | |
2798 | ||
29331e72 LD |
2799 | void |
2800 | pre_vsetvl::fuse_local_vsetvl_info () | |
e030af3e | 2801 | { |
29331e72 LD |
2802 | m_reg_def_loc |
2803 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), GP_REG_LAST + 1); | |
2804 | bitmap_vector_clear (m_reg_def_loc, last_basic_block_for_fn (cfun)); | |
2805 | bitmap_ones (m_reg_def_loc[ENTRY_BLOCK_PTR_FOR_FN (cfun)->index]); | |
2806 | ||
2807 | for (bb_info *bb : crtl->ssa->bbs ()) | |
e030af3e | 2808 | { |
29331e72 | 2809 | auto &block_info = get_block_info (bb); |
4fd09aed | 2810 | block_info.bb = bb; |
29331e72 | 2811 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e030af3e | 2812 | { |
29331e72 LD |
2813 | fprintf (dump_file, " Try fuse basic block %d\n", bb->index ()); |
2814 | } | |
2815 | auto_vec<vsetvl_info> infos; | |
2816 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
2817 | { | |
2818 | vsetvl_info curr_info = vsetvl_info (insn); | |
2819 | if (curr_info.valid_p () || curr_info.unknown_p ()) | |
2820 | infos.safe_push (curr_info); | |
2821 | ||
2822 | /* Collecting GP registers modified by the current bb. */ | |
2823 | if (insn->is_real ()) | |
2824 | for (def_info *def : insn->defs ()) | |
2825 | if (def->is_reg () && GP_REG_P (def->regno ())) | |
2826 | bitmap_set_bit (m_reg_def_loc[bb->index ()], def->regno ()); | |
2827 | } | |
e030af3e | 2828 | |
29331e72 LD |
2829 | vsetvl_info prev_info = vsetvl_info (); |
2830 | prev_info.set_empty (); | |
2831 | for (auto &curr_info : infos) | |
2832 | { | |
2833 | if (prev_info.empty_p ()) | |
2834 | prev_info = curr_info; | |
2835 | else if ((curr_info.unknown_p () && prev_info.valid_p ()) | |
2836 | || (curr_info.valid_p () && prev_info.unknown_p ())) | |
2837 | { | |
4fd09aed | 2838 | block_info.local_infos.safe_push (prev_info); |
29331e72 LD |
2839 | prev_info = curr_info; |
2840 | } | |
2841 | else if (curr_info.valid_p () && prev_info.valid_p ()) | |
2842 | { | |
2843 | if (m_dem.available_p (prev_info, curr_info)) | |
e7b585a4 | 2844 | { |
29331e72 | 2845 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e7b585a4 | 2846 | { |
29331e72 LD |
2847 | fprintf (dump_file, |
2848 | " Ignore curr info since prev info " | |
2849 | "available with it:\n"); | |
2850 | fprintf (dump_file, " prev_info: "); | |
2851 | prev_info.dump (dump_file, " "); | |
2852 | fprintf (dump_file, " curr_info: "); | |
2853 | curr_info.dump (dump_file, " "); | |
2854 | fprintf (dump_file, "\n"); | |
e7b585a4 | 2855 | } |
4cd4c34a | 2856 | if (!curr_info.vl_used_by_non_rvv_insn_p () |
29331e72 LD |
2857 | && vsetvl_insn_p (curr_info.get_insn ()->rtl ())) |
2858 | m_delete_list.safe_push (curr_info); | |
e030af3e | 2859 | |
29331e72 LD |
2860 | if (curr_info.get_read_vl_insn ()) |
2861 | prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ()); | |
e030af3e | 2862 | } |
29331e72 | 2863 | else if (m_dem.compatible_p (prev_info, curr_info)) |
e030af3e | 2864 | { |
29331e72 | 2865 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e030af3e | 2866 | { |
29331e72 LD |
2867 | fprintf (dump_file, " Fuse curr info since prev info " |
2868 | "compatible with it:\n"); | |
2869 | fprintf (dump_file, " prev_info: "); | |
2870 | prev_info.dump (dump_file, " "); | |
2871 | fprintf (dump_file, " curr_info: "); | |
2872 | curr_info.dump (dump_file, " "); | |
e030af3e | 2873 | } |
29331e72 LD |
2874 | m_dem.merge (prev_info, curr_info); |
2875 | if (curr_info.get_read_vl_insn ()) | |
2876 | prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ()); | |
2877 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
e030af3e | 2878 | { |
29331e72 LD |
2879 | fprintf (dump_file, " prev_info after fused: "); |
2880 | prev_info.dump (dump_file, " "); | |
2881 | fprintf (dump_file, "\n"); | |
e030af3e | 2882 | } |
e030af3e JZ |
2883 | } |
2884 | else | |
2885 | { | |
29331e72 LD |
2886 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2887 | { | |
2888 | fprintf (dump_file, | |
2889 | " Cannot fuse uncompatible infos:\n"); | |
2890 | fprintf (dump_file, " prev_info: "); | |
2891 | prev_info.dump (dump_file, " "); | |
2892 | fprintf (dump_file, " curr_info: "); | |
2893 | curr_info.dump (dump_file, " "); | |
2894 | } | |
4fd09aed | 2895 | block_info.local_infos.safe_push (prev_info); |
29331e72 | 2896 | prev_info = curr_info; |
e030af3e JZ |
2897 | } |
2898 | } | |
2899 | } | |
29331e72 LD |
2900 | |
2901 | if (prev_info.valid_p () || prev_info.unknown_p ()) | |
4fd09aed | 2902 | block_info.local_infos.safe_push (prev_info); |
e030af3e | 2903 | } |
e030af3e | 2904 | |
29331e72 LD |
2905 | m_avl_regs = sbitmap_alloc (GP_REG_LAST + 1); |
2906 | bitmap_clear (m_avl_regs); | |
2907 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
e030af3e | 2908 | { |
29331e72 LD |
2909 | vsetvl_block_info &block_info = get_block_info (bb); |
2910 | if (block_info.empty_p ()) | |
2911 | continue; | |
2912 | ||
2913 | vsetvl_info &header_info = block_info.get_entry_info (); | |
2914 | if (header_info.valid_p () && header_info.has_nonvlmax_reg_avl ()) | |
e030af3e | 2915 | { |
29331e72 LD |
2916 | gcc_assert (GP_REG_P (REGNO (header_info.get_avl ()))); |
2917 | bitmap_set_bit (m_avl_regs, REGNO (header_info.get_avl ())); | |
e030af3e | 2918 | } |
e030af3e JZ |
2919 | } |
2920 | } | |
2921 | ||
29331e72 | 2922 | |
9243c3d1 | 2923 | bool |
29331e72 | 2924 | pre_vsetvl::earliest_fuse_vsetvl_info () |
9243c3d1 | 2925 | { |
29331e72 LD |
2926 | compute_avl_def_data (); |
2927 | compute_vsetvl_def_data (); | |
2928 | compute_lcm_local_properties (); | |
9243c3d1 | 2929 | |
29331e72 LD |
2930 | unsigned num_exprs = m_exprs.length (); |
2931 | struct edge_list *m_edges = create_edge_list (); | |
2932 | unsigned num_edges = NUM_EDGES (m_edges); | |
2933 | sbitmap *antin | |
2934 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2935 | sbitmap *antout | |
2936 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
005fad9d | 2937 | |
29331e72 | 2938 | sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs); |
9243c3d1 | 2939 | |
29331e72 LD |
2940 | compute_available (m_avloc, m_kill, m_avout, m_avin); |
2941 | compute_antinout_edge (m_antloc, m_transp, antin, antout); | |
2942 | compute_earliest (m_edges, num_exprs, antin, antout, m_avout, m_kill, | |
2943 | earliest); | |
2944 | ||
2945 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
9243c3d1 | 2946 | { |
29331e72 LD |
2947 | fprintf (dump_file, "\n Compute LCM earliest insert data:\n\n"); |
2948 | fprintf (dump_file, " Expression List (%u):\n", num_exprs); | |
2949 | for (unsigned i = 0; i < num_exprs; i++) | |
9243c3d1 | 2950 | { |
29331e72 LD |
2951 | const auto &info = *m_exprs[i]; |
2952 | fprintf (dump_file, " Expr[%u]: ", i); | |
2953 | info.dump (dump_file, " "); | |
9243c3d1 | 2954 | } |
29331e72 LD |
2955 | fprintf (dump_file, "\n bitmap data:\n"); |
2956 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2957 | { | |
2958 | unsigned int i = bb->index (); | |
2959 | fprintf (dump_file, " BB %u:\n", i); | |
2960 | fprintf (dump_file, " avloc: "); | |
2961 | dump_bitmap_file (dump_file, m_avloc[i]); | |
2962 | fprintf (dump_file, " kill: "); | |
2963 | dump_bitmap_file (dump_file, m_kill[i]); | |
2964 | fprintf (dump_file, " antloc: "); | |
2965 | dump_bitmap_file (dump_file, m_antloc[i]); | |
2966 | fprintf (dump_file, " transp: "); | |
2967 | dump_bitmap_file (dump_file, m_transp[i]); | |
2968 | ||
2969 | fprintf (dump_file, " avin: "); | |
2970 | dump_bitmap_file (dump_file, m_avin[i]); | |
2971 | fprintf (dump_file, " avout: "); | |
2972 | dump_bitmap_file (dump_file, m_avout[i]); | |
2973 | fprintf (dump_file, " antin: "); | |
2974 | dump_bitmap_file (dump_file, antin[i]); | |
2975 | fprintf (dump_file, " antout: "); | |
2976 | dump_bitmap_file (dump_file, antout[i]); | |
2977 | } | |
2978 | fprintf (dump_file, "\n"); | |
2979 | fprintf (dump_file, " earliest:\n"); | |
2980 | for (unsigned ed = 0; ed < num_edges; ed++) | |
2981 | { | |
2982 | edge eg = INDEX_EDGE (m_edges, ed); | |
9243c3d1 | 2983 | |
29331e72 LD |
2984 | if (bitmap_empty_p (earliest[ed])) |
2985 | continue; | |
2986 | fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index, | |
2987 | eg->dest->index); | |
2988 | dump_bitmap_file (dump_file, earliest[ed]); | |
2989 | } | |
2990 | fprintf (dump_file, "\n"); | |
2991 | } | |
9243c3d1 | 2992 | |
29331e72 | 2993 | if (dump_file && (dump_flags & TDF_DETAILS)) |
9243c3d1 | 2994 | { |
29331e72 LD |
2995 | fprintf (dump_file, " Fused global info result:\n"); |
2996 | } | |
9243c3d1 | 2997 | |
29331e72 LD |
2998 | bool changed = false; |
2999 | for (unsigned ed = 0; ed < num_edges; ed++) | |
3000 | { | |
3001 | sbitmap e = earliest[ed]; | |
3002 | if (bitmap_empty_p (e)) | |
9243c3d1 JZZ |
3003 | continue; |
3004 | ||
29331e72 LD |
3005 | unsigned int expr_index; |
3006 | sbitmap_iterator sbi; | |
3007 | EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi) | |
ec99ffab | 3008 | { |
29331e72 LD |
3009 | vsetvl_info &curr_info = *m_exprs[expr_index]; |
3010 | if (!curr_info.valid_p ()) | |
3011 | continue; | |
3012 | ||
3013 | edge eg = INDEX_EDGE (m_edges, ed); | |
3014 | if (eg->probability == profile_probability::never ()) | |
3015 | continue; | |
3016 | if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun) | |
3017 | || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
3018 | continue; | |
ff8f9544 | 3019 | |
c9d5b46a JZ |
3020 | /* When multiple set bits in earliest edge, such edge may |
3021 | have infinite loop in preds or succs or multiple conflict | |
3022 | vsetvl expression which make such edge is unrelated. We | |
3023 | don't perform fusion for such situation. */ | |
3024 | if (bitmap_count_bits (e) != 1) | |
3025 | continue; | |
3026 | ||
29331e72 LD |
3027 | vsetvl_block_info &src_block_info = get_block_info (eg->src); |
3028 | vsetvl_block_info &dest_block_info = get_block_info (eg->dest); | |
ff8f9544 | 3029 | |
29331e72 LD |
3030 | if (src_block_info.probability |
3031 | == profile_probability::uninitialized ()) | |
ff8f9544 | 3032 | continue; |
9243c3d1 | 3033 | |
29331e72 | 3034 | if (src_block_info.empty_p ()) |
9243c3d1 | 3035 | { |
29331e72 LD |
3036 | vsetvl_info new_curr_info = curr_info; |
3037 | new_curr_info.set_bb (crtl->ssa->bb (eg->dest)); | |
3038 | bool has_compatible_p = false; | |
3039 | unsigned int def_expr_index; | |
3040 | sbitmap_iterator sbi2; | |
3041 | EXECUTE_IF_SET_IN_BITMAP ( | |
3042 | m_vsetvl_def_in[new_curr_info.get_bb ()->index ()], 0, | |
3043 | def_expr_index, sbi2) | |
9243c3d1 | 3044 | { |
29331e72 LD |
3045 | vsetvl_info &prev_info = *m_vsetvl_def_exprs[def_expr_index]; |
3046 | if (!prev_info.valid_p ()) | |
3047 | continue; | |
3048 | if (m_dem.compatible_p (prev_info, new_curr_info)) | |
9243c3d1 | 3049 | { |
29331e72 LD |
3050 | has_compatible_p = true; |
3051 | break; | |
9243c3d1 | 3052 | } |
9243c3d1 | 3053 | } |
29331e72 | 3054 | if (!has_compatible_p) |
9243c3d1 | 3055 | { |
29331e72 LD |
3056 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3057 | { | |
3058 | fprintf (dump_file, | |
3059 | " Forbidden lift up vsetvl info into bb %u " | |
3060 | "since there is no vsetvl info that reaching in " | |
3061 | "is compatible with it:", | |
3062 | eg->src->index); | |
3063 | curr_info.dump (dump_file, " "); | |
3064 | } | |
3065 | continue; | |
9243c3d1 JZZ |
3066 | } |
3067 | ||
29331e72 | 3068 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e030af3e JZ |
3069 | { |
3070 | fprintf (dump_file, | |
29331e72 LD |
3071 | " Set empty bb %u to info:", eg->src->index); |
3072 | curr_info.dump (dump_file, " "); | |
e030af3e | 3073 | } |
29331e72 LD |
3074 | src_block_info.set_info (curr_info); |
3075 | src_block_info.probability = dest_block_info.probability; | |
3076 | changed = true; | |
9243c3d1 | 3077 | } |
29331e72 LD |
3078 | else if (src_block_info.has_info ()) |
3079 | { | |
3080 | vsetvl_info &prev_info = src_block_info.get_exit_info (); | |
3081 | gcc_assert (prev_info.valid_p ()); | |
3082 | ||
3083 | if (m_dem.compatible_p (prev_info, curr_info)) | |
3084 | { | |
3085 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3086 | { | |
3087 | fprintf (dump_file, " Fuse curr info since prev info " | |
3088 | "compatible with it:\n"); | |
3089 | fprintf (dump_file, " prev_info: "); | |
3090 | prev_info.dump (dump_file, " "); | |
3091 | fprintf (dump_file, " curr_info: "); | |
3092 | curr_info.dump (dump_file, " "); | |
3093 | } | |
3094 | m_dem.merge (prev_info, curr_info); | |
3095 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3096 | { | |
3097 | fprintf (dump_file, " prev_info after fused: "); | |
3098 | prev_info.dump (dump_file, " "); | |
3099 | fprintf (dump_file, "\n"); | |
3100 | } | |
3101 | changed = true; | |
3102 | if (src_block_info.has_info ()) | |
3103 | src_block_info.probability += dest_block_info.probability; | |
3104 | } | |
3105 | else if (src_block_info.has_info () | |
3106 | && !m_dem.compatible_p (prev_info, curr_info)) | |
3107 | { | |
3108 | /* Cancel lift up if probabilities are equal. */ | |
3109 | if (successors_probability_equal_p (eg->src)) | |
3110 | { | |
3111 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3112 | { | |
3113 | fprintf (dump_file, | |
3114 | " Change empty bb %u to from:", | |
3115 | eg->src->index); | |
3116 | prev_info.dump (dump_file, " "); | |
3117 | fprintf (dump_file, | |
3118 | " to (higher probability):"); | |
3119 | curr_info.dump (dump_file, " "); | |
3120 | } | |
3121 | src_block_info.set_empty_info (); | |
3122 | src_block_info.probability | |
3123 | = profile_probability::uninitialized (); | |
3124 | changed = true; | |
3125 | } | |
3126 | /* Choose the one with higher probability. */ | |
3127 | else if (dest_block_info.probability | |
3128 | > src_block_info.probability) | |
3129 | { | |
3130 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3131 | { | |
3132 | fprintf (dump_file, | |
3133 | " Change empty bb %u to from:", | |
3134 | eg->src->index); | |
3135 | prev_info.dump (dump_file, " "); | |
3136 | fprintf (dump_file, | |
3137 | " to (higher probability):"); | |
3138 | curr_info.dump (dump_file, " "); | |
3139 | } | |
3140 | src_block_info.set_info (curr_info); | |
3141 | src_block_info.probability = dest_block_info.probability; | |
3142 | changed = true; | |
3143 | } | |
3144 | } | |
3145 | } | |
3146 | else | |
e030af3e | 3147 | { |
29331e72 LD |
3148 | vsetvl_info &prev_info = src_block_info.get_exit_info (); |
3149 | if (!prev_info.valid_p () | |
3150 | || m_dem.available_p (prev_info, curr_info)) | |
3151 | continue; | |
3152 | ||
3153 | if (m_dem.compatible_p (prev_info, curr_info)) | |
3154 | { | |
3155 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3156 | { | |
3157 | fprintf (dump_file, " Fuse curr info since prev info " | |
3158 | "compatible with it:\n"); | |
3159 | fprintf (dump_file, " prev_info: "); | |
3160 | prev_info.dump (dump_file, " "); | |
3161 | fprintf (dump_file, " curr_info: "); | |
3162 | curr_info.dump (dump_file, " "); | |
3163 | } | |
3164 | m_dem.merge (prev_info, curr_info); | |
3165 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3166 | { | |
3167 | fprintf (dump_file, " prev_info after fused: "); | |
3168 | prev_info.dump (dump_file, " "); | |
3169 | fprintf (dump_file, "\n"); | |
3170 | } | |
3171 | changed = true; | |
3172 | } | |
e030af3e | 3173 | } |
9243c3d1 JZZ |
3174 | } |
3175 | } | |
3176 | ||
0d50facd | 3177 | if (dump_file && (dump_flags & TDF_DETAILS)) |
c919d059 | 3178 | { |
29331e72 | 3179 | fprintf (dump_file, "\n"); |
c919d059 | 3180 | } |
c919d059 | 3181 | |
29331e72 LD |
3182 | sbitmap_vector_free (antin); |
3183 | sbitmap_vector_free (antout); | |
3184 | sbitmap_vector_free (earliest); | |
3185 | free_edge_list (m_edges); | |
c919d059 | 3186 | |
29331e72 | 3187 | return changed; |
c919d059 KC |
3188 | } |
3189 | ||
8421f279 | 3190 | void |
29331e72 | 3191 | pre_vsetvl::pre_global_vsetvl_info () |
c919d059 | 3192 | { |
29331e72 LD |
3193 | compute_avl_def_data (); |
3194 | compute_vsetvl_def_data (); | |
3195 | compute_lcm_local_properties (); | |
c919d059 | 3196 | |
29331e72 LD |
3197 | unsigned num_exprs = m_exprs.length (); |
3198 | m_edges = pre_edge_lcm_avs (num_exprs, m_transp, m_avloc, m_antloc, m_kill, | |
3199 | m_avin, m_avout, &m_insert, &m_del); | |
3200 | unsigned num_edges = NUM_EDGES (m_edges); | |
c919d059 | 3201 | |
29331e72 LD |
3202 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3203 | { | |
3204 | fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n"); | |
3205 | fprintf (dump_file, " Expression List (%u):\n", num_exprs); | |
3206 | for (unsigned i = 0; i < num_exprs; i++) | |
c919d059 | 3207 | { |
29331e72 LD |
3208 | const auto &info = *m_exprs[i]; |
3209 | fprintf (dump_file, " Expr[%u]: ", i); | |
3210 | info.dump (dump_file, " "); | |
c919d059 | 3211 | } |
29331e72 LD |
3212 | fprintf (dump_file, "\n bitmap data:\n"); |
3213 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
c919d059 | 3214 | { |
29331e72 LD |
3215 | unsigned i = bb->index (); |
3216 | fprintf (dump_file, " BB %u:\n", i); | |
3217 | fprintf (dump_file, " avloc: "); | |
3218 | dump_bitmap_file (dump_file, m_avloc[i]); | |
3219 | fprintf (dump_file, " kill: "); | |
3220 | dump_bitmap_file (dump_file, m_kill[i]); | |
3221 | fprintf (dump_file, " antloc: "); | |
3222 | dump_bitmap_file (dump_file, m_antloc[i]); | |
3223 | fprintf (dump_file, " transp: "); | |
3224 | dump_bitmap_file (dump_file, m_transp[i]); | |
3225 | ||
3226 | fprintf (dump_file, " avin: "); | |
3227 | dump_bitmap_file (dump_file, m_avin[i]); | |
3228 | fprintf (dump_file, " avout: "); | |
3229 | dump_bitmap_file (dump_file, m_avout[i]); | |
3230 | fprintf (dump_file, " del: "); | |
3231 | dump_bitmap_file (dump_file, m_del[i]); | |
c919d059 | 3232 | } |
29331e72 LD |
3233 | fprintf (dump_file, "\n"); |
3234 | fprintf (dump_file, " insert:\n"); | |
3235 | for (unsigned ed = 0; ed < num_edges; ed++) | |
8421f279 | 3236 | { |
29331e72 | 3237 | edge eg = INDEX_EDGE (m_edges, ed); |
c919d059 | 3238 | |
29331e72 LD |
3239 | if (bitmap_empty_p (m_insert[ed])) |
3240 | continue; | |
3241 | fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index, | |
3242 | eg->dest->index); | |
3243 | dump_bitmap_file (dump_file, m_insert[ed]); | |
c919d059 | 3244 | } |
29331e72 LD |
3245 | } |
3246 | ||
3247 | /* Remove vsetvl infos as LCM suggest */ | |
3248 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3249 | { | |
3250 | sbitmap d = m_del[bb->index ()]; | |
3251 | if (bitmap_count_bits (d) == 0) | |
c919d059 | 3252 | continue; |
29331e72 LD |
3253 | gcc_assert (bitmap_count_bits (d) == 1); |
3254 | unsigned expr_index = bitmap_first_set_bit (d); | |
3255 | vsetvl_info &info = *m_exprs[expr_index]; | |
3256 | gcc_assert (info.valid_p ()); | |
3257 | gcc_assert (info.get_bb () == bb); | |
3258 | const vsetvl_block_info &block_info = get_block_info (info.get_bb ()); | |
3259 | gcc_assert (block_info.get_entry_info () == info); | |
3260 | info.set_delete (); | |
3261 | } | |
c919d059 | 3262 | |
ef21ae5c JZ |
3263 | /* Remove vsetvl infos if all precessors are available to the block. */ |
3264 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3265 | { | |
3266 | vsetvl_block_info &block_info = get_block_info (bb); | |
3267 | if (block_info.empty_p () || !block_info.full_available) | |
3268 | continue; | |
3269 | ||
3270 | vsetvl_info &info = block_info.get_entry_info (); | |
3271 | info.set_delete (); | |
3272 | } | |
3273 | ||
29331e72 LD |
3274 | for (const bb_info *bb : crtl->ssa->bbs ()) |
3275 | { | |
3276 | vsetvl_block_info &block_info = get_block_info (bb); | |
3277 | if (block_info.empty_p ()) | |
3278 | continue; | |
3279 | vsetvl_info &curr_info = block_info.get_entry_info (); | |
3280 | if (curr_info.delete_p ()) | |
c919d059 | 3281 | { |
4fd09aed | 3282 | if (block_info.local_infos.is_empty ()) |
29331e72 | 3283 | continue; |
4fd09aed | 3284 | curr_info = block_info.local_infos[0]; |
c919d059 | 3285 | } |
4cd4c34a | 3286 | if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p () |
923a67f1 | 3287 | && preds_all_same_avl_and_ratio_p (curr_info)) |
29331e72 | 3288 | curr_info.set_change_vtype_only (); |
c919d059 | 3289 | |
29331e72 LD |
3290 | vsetvl_info prev_info = vsetvl_info (); |
3291 | prev_info.set_empty (); | |
4fd09aed | 3292 | for (auto &curr_info : block_info.local_infos) |
c919d059 | 3293 | { |
29331e72 | 3294 | if (prev_info.valid_p () && curr_info.valid_p () |
923a67f1 JZ |
3295 | && m_dem.avl_available_p (prev_info, curr_info) |
3296 | && prev_info.get_ratio () == curr_info.get_ratio ()) | |
29331e72 LD |
3297 | curr_info.set_change_vtype_only (); |
3298 | prev_info = curr_info; | |
c919d059 | 3299 | } |
20c85207 | 3300 | } |
20c85207 JZ |
3301 | } |
3302 | ||
29331e72 LD |
3303 | void |
3304 | pre_vsetvl::emit_vsetvl () | |
20c85207 | 3305 | { |
29331e72 | 3306 | bool need_commit = false; |
20c85207 | 3307 | |
29331e72 | 3308 | for (const bb_info *bb : crtl->ssa->bbs ()) |
20c85207 | 3309 | { |
4fd09aed | 3310 | for (const auto &curr_info : get_block_info (bb).local_infos) |
29331e72 LD |
3311 | { |
3312 | insn_info *insn = curr_info.get_insn (); | |
3313 | if (curr_info.delete_p ()) | |
3314 | { | |
3315 | if (vsetvl_insn_p (insn->rtl ())) | |
3316 | remove_vsetvl_insn (curr_info); | |
3317 | continue; | |
3318 | } | |
3319 | else if (curr_info.valid_p ()) | |
3320 | { | |
3321 | if (vsetvl_insn_p (insn->rtl ())) | |
3322 | { | |
3323 | const vsetvl_info temp = vsetvl_info (insn); | |
3324 | if (!(curr_info == temp)) | |
3325 | { | |
3326 | if (dump_file) | |
3327 | { | |
3328 | fprintf (dump_file, "\n Change vsetvl info from: "); | |
3329 | temp.dump (dump_file, " "); | |
3330 | fprintf (dump_file, " to: "); | |
3331 | curr_info.dump (dump_file, " "); | |
3332 | } | |
3333 | change_vsetvl_insn (curr_info); | |
3334 | } | |
3335 | } | |
3336 | else | |
3337 | { | |
3338 | if (dump_file) | |
3339 | { | |
3340 | fprintf (dump_file, | |
3341 | "\n Insert vsetvl info before insn %d: ", | |
3342 | insn->uid ()); | |
3343 | curr_info.dump (dump_file, " "); | |
3344 | } | |
3345 | insert_vsetvl_insn (EMIT_BEFORE, curr_info); | |
3346 | } | |
3347 | } | |
3348 | } | |
20c85207 | 3349 | } |
20c85207 | 3350 | |
29331e72 | 3351 | for (const vsetvl_info &item : m_delete_list) |
20c85207 | 3352 | { |
29331e72 LD |
3353 | gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ())); |
3354 | remove_vsetvl_insn (item); | |
20c85207 JZ |
3355 | } |
3356 | ||
d1189cee JZ |
3357 | /* Insert vsetvl info that was not deleted after lift up. */ |
3358 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3359 | { | |
3360 | const vsetvl_block_info &block_info = get_block_info (bb); | |
3361 | if (!block_info.has_info ()) | |
3362 | continue; | |
3363 | ||
3364 | const vsetvl_info &footer_info = block_info.get_exit_info (); | |
3365 | ||
3366 | if (footer_info.delete_p ()) | |
3367 | continue; | |
3368 | ||
3369 | edge eg; | |
3370 | edge_iterator eg_iterator; | |
3371 | FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs) | |
3372 | { | |
3373 | gcc_assert (!(eg->flags & EDGE_ABNORMAL)); | |
3374 | if (dump_file) | |
3375 | { | |
3376 | fprintf ( | |
3377 | dump_file, | |
3378 | "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ", | |
3379 | eg->src->index, eg->dest->index); | |
3380 | footer_info.dump (dump_file, " "); | |
3381 | } | |
3382 | start_sequence (); | |
3383 | insert_vsetvl_insn (EMIT_DIRECT, footer_info); | |
3384 | rtx_insn *rinsn = get_insns (); | |
3385 | end_sequence (); | |
3386 | default_rtl_profile (); | |
3387 | insert_insn_on_edge (rinsn, eg); | |
3388 | need_commit = true; | |
3389 | } | |
3390 | } | |
3391 | ||
29331e72 LD |
3392 | /* m_insert vsetvl as LCM suggest. */ |
3393 | for (int ed = 0; ed < NUM_EDGES (m_edges); ed++) | |
20c85207 | 3394 | { |
29331e72 LD |
3395 | edge eg = INDEX_EDGE (m_edges, ed); |
3396 | sbitmap i = m_insert[ed]; | |
3397 | if (bitmap_count_bits (i) < 1) | |
3398 | continue; | |
3399 | ||
3400 | if (bitmap_count_bits (i) > 1) | |
3401 | /* For code with infinite loop (e.g. pr61634.c), The data flow is | |
3402 | completely wrong. */ | |
3403 | continue; | |
3404 | ||
3405 | gcc_assert (bitmap_count_bits (i) == 1); | |
3406 | unsigned expr_index = bitmap_first_set_bit (i); | |
3407 | const vsetvl_info &info = *m_exprs[expr_index]; | |
3408 | gcc_assert (info.valid_p ()); | |
3409 | if (dump_file) | |
20c85207 | 3410 | { |
29331e72 LD |
3411 | fprintf (dump_file, |
3412 | "\n Insert vsetvl info at edge(bb %u -> bb %u): ", | |
3413 | eg->src->index, eg->dest->index); | |
3414 | info.dump (dump_file, " "); | |
20c85207 | 3415 | } |
29331e72 LD |
3416 | rtl_profile_for_edge (eg); |
3417 | start_sequence (); | |
3418 | ||
3419 | insert_vsetvl_insn (EMIT_DIRECT, info); | |
3420 | rtx_insn *rinsn = get_insns (); | |
3421 | end_sequence (); | |
3422 | default_rtl_profile (); | |
3423 | ||
3424 | /* We should not get an abnormal edge here. */ | |
3425 | gcc_assert (!(eg->flags & EDGE_ABNORMAL)); | |
3426 | need_commit = true; | |
3427 | insert_insn_on_edge (rinsn, eg); | |
20c85207 JZ |
3428 | } |
3429 | ||
29331e72 LD |
3430 | if (need_commit) |
3431 | commit_edge_insertions (); | |
20c85207 JZ |
3432 | } |
3433 | ||
9243c3d1 | 3434 | void |
29331e72 | 3435 | pre_vsetvl::cleaup () |
9243c3d1 | 3436 | { |
29331e72 LD |
3437 | remove_avl_operand (); |
3438 | remove_unused_dest_operand (); | |
3439 | } | |
9243c3d1 | 3440 | |
29331e72 LD |
3441 | void |
3442 | pre_vsetvl::remove_avl_operand () | |
3443 | { | |
3444 | basic_block cfg_bb; | |
3445 | rtx_insn *rinsn; | |
3446 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
3447 | FOR_BB_INSNS (cfg_bb, rinsn) | |
3448 | if (NONDEBUG_INSN_P (rinsn) && has_vl_op (rinsn) | |
3449 | && REG_P (get_vl (rinsn))) | |
3450 | { | |
9243c3d1 | 3451 | rtx avl = get_vl (rinsn); |
a2d12abe | 3452 | if (count_regno_occurrences (rinsn, REGNO (avl)) == 1) |
9243c3d1 | 3453 | { |
29331e72 | 3454 | rtx new_pat; |
60bd33bc | 3455 | if (fault_first_load_p (rinsn)) |
29331e72 LD |
3456 | new_pat |
3457 | = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx); | |
60bd33bc JZZ |
3458 | else |
3459 | { | |
3460 | rtx set = single_set (rinsn); | |
3461 | rtx src | |
3462 | = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx); | |
29331e72 LD |
3463 | new_pat = gen_rtx_SET (SET_DEST (set), src); |
3464 | } | |
3465 | if (dump_file) | |
3466 | { | |
3467 | fprintf (dump_file, " Cleanup insn %u's avl operand:\n", | |
3468 | INSN_UID (rinsn)); | |
3469 | print_rtl_single (dump_file, rinsn); | |
60bd33bc | 3470 | } |
29331e72 | 3471 | validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false); |
9243c3d1 JZZ |
3472 | } |
3473 | } | |
20c85207 JZ |
3474 | } |
3475 | ||
6b6b9c68 | 3476 | void |
29331e72 | 3477 | pre_vsetvl::remove_unused_dest_operand () |
20c85207 | 3478 | { |
6b6b9c68 | 3479 | df_analyze (); |
20c85207 JZ |
3480 | basic_block cfg_bb; |
3481 | rtx_insn *rinsn; | |
3482 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
29331e72 LD |
3483 | FOR_BB_INSNS (cfg_bb, rinsn) |
3484 | if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn)) | |
6b6b9c68 | 3485 | { |
29331e72 LD |
3486 | rtx vl = get_vl (rinsn); |
3487 | vsetvl_info info = vsetvl_info (rinsn); | |
3488 | if (has_no_uses (cfg_bb, rinsn, REGNO (vl))) | |
3489 | if (!info.has_vlmax_avl ()) | |
3490 | { | |
3491 | rtx new_pat = info.get_vsetvl_pat (true); | |
3492 | if (dump_file) | |
3493 | { | |
3494 | fprintf (dump_file, | |
3495 | " Remove vsetvl insn %u's dest(vl) operand since " | |
3496 | "it unused:\n", | |
3497 | INSN_UID (rinsn)); | |
3498 | print_rtl_single (dump_file, rinsn); | |
3499 | } | |
3500 | validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, | |
3501 | false); | |
3502 | } | |
6b6b9c68 | 3503 | } |
6b6b9c68 JZZ |
3504 | } |
3505 | ||
29331e72 LD |
3506 | const pass_data pass_data_vsetvl = { |
3507 | RTL_PASS, /* type */ | |
3508 | "vsetvl", /* name */ | |
3509 | OPTGROUP_NONE, /* optinfo_flags */ | |
3510 | TV_NONE, /* tv_id */ | |
3511 | 0, /* properties_required */ | |
3512 | 0, /* properties_provided */ | |
3513 | 0, /* properties_destroyed */ | |
3514 | 0, /* todo_flags_start */ | |
3515 | 0, /* todo_flags_finish */ | |
3516 | }; | |
9243c3d1 | 3517 | |
29331e72 LD |
3518 | class pass_vsetvl : public rtl_opt_pass |
3519 | { | |
3520 | private: | |
3521 | void simple_vsetvl (); | |
3522 | void lazy_vsetvl (); | |
9243c3d1 | 3523 | |
29331e72 LD |
3524 | public: |
3525 | pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {} | |
9243c3d1 | 3526 | |
29331e72 LD |
3527 | /* opt_pass methods: */ |
3528 | virtual bool gate (function *) final override { return TARGET_VECTOR; } | |
3529 | virtual unsigned int execute (function *) final override; | |
3530 | }; // class pass_vsetvl | |
9243c3d1 | 3531 | |
acc10c79 | 3532 | void |
29331e72 | 3533 | pass_vsetvl::simple_vsetvl () |
acc10c79 | 3534 | { |
29331e72 LD |
3535 | if (dump_file) |
3536 | fprintf (dump_file, "\nEntering Simple VSETVL PASS\n"); | |
acc10c79 | 3537 | |
29331e72 LD |
3538 | basic_block cfg_bb; |
3539 | rtx_insn *rinsn; | |
3540 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
acc10c79 | 3541 | { |
29331e72 | 3542 | FOR_BB_INSNS (cfg_bb, rinsn) |
acc10c79 | 3543 | { |
29331e72 | 3544 | if (!NONDEBUG_INSN_P (rinsn)) |
acc10c79 | 3545 | continue; |
29331e72 LD |
3546 | if (has_vtype_op (rinsn)) |
3547 | { | |
3548 | const auto &info = vsetvl_info (rinsn); | |
3549 | rtx pat = info.get_vsetvl_pat (); | |
3550 | emit_insn_before (pat, rinsn); | |
3551 | if (dump_file) | |
3552 | { | |
3553 | fprintf (dump_file, " Insert vsetvl insn before insn %d:\n", | |
3554 | INSN_UID (rinsn)); | |
3555 | print_rtl_single (dump_file, PREV_INSN (rinsn)); | |
3556 | } | |
3557 | } | |
acc10c79 JZZ |
3558 | } |
3559 | } | |
acc10c79 JZZ |
3560 | } |
3561 | ||
9243c3d1 JZZ |
3562 | /* Lazy vsetvl insertion for optimize > 0. */ |
3563 | void | |
29331e72 | 3564 | pass_vsetvl::lazy_vsetvl () |
9243c3d1 JZZ |
3565 | { |
3566 | if (dump_file) | |
29331e72 LD |
3567 | fprintf (dump_file, "\nEntering Lazy VSETVL PASS\n\n"); |
3568 | ||
3569 | pre_vsetvl pre = pre_vsetvl (); | |
9243c3d1 | 3570 | |
9243c3d1 | 3571 | if (dump_file) |
29331e72 LD |
3572 | fprintf (dump_file, "\nPhase 1: Fuse local vsetvl infos.\n\n"); |
3573 | pre.fuse_local_vsetvl_info (); | |
0d50facd | 3574 | if (dump_file && (dump_flags & TDF_DETAILS)) |
29331e72 | 3575 | pre.dump (dump_file, "phase 1"); |
9243c3d1 | 3576 | |
29331e72 | 3577 | /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */ |
9243c3d1 | 3578 | if (dump_file) |
29331e72 LD |
3579 | fprintf (dump_file, "\nPhase 2: Lift up vsetvl info.\n\n"); |
3580 | bool changed; | |
3581 | int fused_count = 0; | |
3582 | do | |
3583 | { | |
3584 | if (dump_file) | |
3585 | fprintf (dump_file, " Try lift up %d.\n\n", fused_count); | |
3586 | changed = pre.earliest_fuse_vsetvl_info (); | |
3587 | fused_count += 1; | |
3588 | } while (changed); | |
3589 | ||
0d50facd | 3590 | if (dump_file && (dump_flags & TDF_DETAILS)) |
29331e72 | 3591 | pre.dump (dump_file, "phase 2"); |
9243c3d1 | 3592 | |
29331e72 | 3593 | /* Phase 3: Reducing redundant vsetvl infos using LCM. */ |
9243c3d1 | 3594 | if (dump_file) |
29331e72 LD |
3595 | fprintf (dump_file, "\nPhase 3: Reduce global vsetvl infos.\n\n"); |
3596 | pre.pre_global_vsetvl_info (); | |
3597 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3598 | pre.dump (dump_file, "phase 3"); | |
9243c3d1 | 3599 | |
29331e72 | 3600 | /* Phase 4: Insert, modify and remove vsetvl insns. */ |
9243c3d1 | 3601 | if (dump_file) |
29331e72 LD |
3602 | fprintf (dump_file, |
3603 | "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n"); | |
3604 | pre.emit_vsetvl (); | |
9243c3d1 | 3605 | |
29331e72 | 3606 | /* Phase 5: Cleaup */ |
9243c3d1 | 3607 | if (dump_file) |
29331e72 LD |
3608 | fprintf (dump_file, "\nPhase 5: Cleaup\n\n"); |
3609 | pre.cleaup (); | |
6b6b9c68 | 3610 | |
29331e72 | 3611 | pre.finish (); |
9243c3d1 JZZ |
3612 | } |
3613 | ||
3614 | /* Main entry point for this pass. */ | |
3615 | unsigned int | |
3616 | pass_vsetvl::execute (function *) | |
3617 | { | |
3618 | if (n_basic_blocks_for_fn (cfun) <= 0) | |
3619 | return 0; | |
3620 | ||
ca8fb009 JZZ |
3621 | /* The RVV instruction may change after split which is not a stable |
3622 | instruction. We need to split it here to avoid potential issue | |
3623 | since the VSETVL PASS is insert before split PASS. */ | |
3624 | split_all_insns (); | |
9243c3d1 JZZ |
3625 | |
3626 | /* Early return for there is no vector instructions. */ | |
3627 | if (!has_vector_insn (cfun)) | |
3628 | return 0; | |
3629 | ||
9243c3d1 JZZ |
3630 | if (!optimize) |
3631 | simple_vsetvl (); | |
3632 | else | |
3633 | lazy_vsetvl (); | |
3634 | ||
9243c3d1 JZZ |
3635 | return 0; |
3636 | } | |
3637 | ||
3638 | rtl_opt_pass * | |
3639 | make_pass_vsetvl (gcc::context *ctxt) | |
3640 | { | |
3641 | return new pass_vsetvl (ctxt); | |
3642 | } |