]>
Commit | Line | Data |
---|---|---|
9243c3d1 | 1 | /* VSETVL pass for RISC-V 'V' Extension for GNU compiler. |
c841bde5 | 2 | Copyright (C) 2022-2023 Free Software Foundation, Inc. |
9243c3d1 JZZ |
3 | Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or(at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
29331e72 LD |
21 | /* The values of the vl and vtype registers will affect the behavior of RVV |
22 | insns. That is, when we need to execute an RVV instruction, we need to set | |
23 | the correct vl and vtype values by executing the vsetvl instruction before. | |
24 | Executing the fewest number of vsetvl instructions while keeping the behavior | |
25 | the same is the problem this pass is trying to solve. This vsetvl pass is | |
26 | divided into 5 phases: | |
27 | ||
28 | - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses | |
29 | each instruction in it that affects vl and vtype state and generates an | |
30 | array of vsetvl_info objects. Then traverse the vsetvl_info array from | |
31 | front to back and perform fusion according to the fusion rules. The fused | |
32 | vsetvl infos are stored in the vsetvl_block_info object's `infos` field. | |
33 | ||
34 | - Phase 2 (earliest fuse global vsetvl infos): The header_info and | |
35 | footer_info of vsetvl_block_info are used as expressions, and the | |
36 | earliest of each expression is computed. Based on the earliest | |
37 | information, try to lift up the corresponding vsetvl info to the src | |
38 | basic block of the edge (mainly to reduce the total number of vsetvl | |
39 | instructions, this uplift will cause some execution paths to execute | |
40 | vsetvl instructions that shouldn't be there). | |
41 | ||
42 | - Phase 3 (pre global vsetvl info): The header_info and footer_info of | |
43 | vsetvl_block_info are used as expressions, and the LCM algorithm is used | |
44 | to compute the header_info that needs to be deleted and the one that | |
45 | needs to be inserted in some edges. | |
46 | ||
47 | - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and | |
48 | the deletion and insertion information of Phase 3, the mandatory vsetvl | |
49 | instruction insertion, modification and deletion are performed. | |
50 | ||
51 | - Phase 5 (cleanup): Clean up the avl operand in the RVV operator | |
52 | instruction and cleanup the unused dest operand of the vsetvl insn. | |
53 | ||
54 | After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual | |
55 | basic block is represented by vsetvl_block_info, and the virtual vsetvl | |
56 | statements inside are represented by vsetvl_info. The later phases 2 and 3 | |
57 | are constantly modifying and adjusting this virtual CFG. Phase 4 performs | |
58 | insertion, modification and deletion of vsetvl instructions based on the | |
59 | optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to | |
60 | the RTL. | |
61 | */ | |
9243c3d1 JZZ |
62 | |
63 | #define IN_TARGET_CODE 1 | |
64 | #define INCLUDE_ALGORITHM | |
65 | #define INCLUDE_FUNCTIONAL | |
66 | ||
67 | #include "config.h" | |
68 | #include "system.h" | |
69 | #include "coretypes.h" | |
70 | #include "tm.h" | |
71 | #include "backend.h" | |
72 | #include "rtl.h" | |
73 | #include "target.h" | |
74 | #include "tree-pass.h" | |
75 | #include "df.h" | |
76 | #include "rtl-ssa.h" | |
77 | #include "cfgcleanup.h" | |
78 | #include "insn-config.h" | |
79 | #include "insn-attr.h" | |
80 | #include "insn-opinit.h" | |
81 | #include "tm-constrs.h" | |
82 | #include "cfgrtl.h" | |
83 | #include "cfganal.h" | |
84 | #include "lcm.h" | |
85 | #include "predict.h" | |
86 | #include "profile-count.h" | |
a3ad2301 | 87 | #include "gcse.h" |
9243c3d1 JZZ |
88 | |
89 | using namespace rtl_ssa; | |
90 | using namespace riscv_vector; | |
91 | ||
29331e72 LD |
92 | /* Set the bitmap DST to the union of SRC of predecessors of |
93 | basic block B. | |
94 | It's a bit different from bitmap_union_of_preds in cfganal.cc. This function | |
95 | takes into account the case where pred is ENTRY basic block. The main reason | |
96 | for this difference is to make it easier to insert some special value into | |
97 | the ENTRY base block. For example, vsetvl_info with a status of UNKNOW. */ | |
98 | static void | |
99 | bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b) | |
100 | { | |
101 | unsigned int set_size = dst->size; | |
102 | edge e; | |
103 | unsigned ix; | |
104 | ||
105 | for (ix = 0; ix < EDGE_COUNT (b->preds); ix++) | |
106 | { | |
107 | e = EDGE_PRED (b, ix); | |
108 | bitmap_copy (dst, src[e->src->index]); | |
109 | break; | |
110 | } | |
ec99ffab | 111 | |
29331e72 LD |
112 | if (ix == EDGE_COUNT (b->preds)) |
113 | bitmap_clear (dst); | |
114 | else | |
115 | for (ix++; ix < EDGE_COUNT (b->preds); ix++) | |
116 | { | |
117 | unsigned int i; | |
118 | SBITMAP_ELT_TYPE *p, *r; | |
119 | ||
120 | e = EDGE_PRED (b, ix); | |
121 | p = src[e->src->index]->elms; | |
122 | r = dst->elms; | |
123 | for (i = 0; i < set_size; i++) | |
124 | *r++ |= *p++; | |
125 | } | |
126 | } | |
127 | ||
128 | /* Compute the reaching defintion in and out based on the gen and KILL | |
129 | informations in each Base Blocks. | |
130 | This function references the compute_avaiable implementation in lcm.cc */ | |
131 | static void | |
132 | compute_reaching_defintion (sbitmap *gen, sbitmap *kill, sbitmap *in, | |
133 | sbitmap *out) | |
9243c3d1 | 134 | { |
29331e72 LD |
135 | edge e; |
136 | basic_block *worklist, *qin, *qout, *qend, bb; | |
137 | unsigned int qlen; | |
138 | edge_iterator ei; | |
139 | ||
140 | /* Allocate a worklist array/queue. Entries are only added to the | |
141 | list if they were not already on the list. So the size is | |
142 | bounded by the number of basic blocks. */ | |
143 | qin = qout = worklist | |
144 | = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); | |
145 | ||
146 | /* Put every block on the worklist; this is necessary because of the | |
147 | optimistic initialization of AVOUT above. Use reverse postorder | |
148 | to make the forward dataflow problem require less iterations. */ | |
149 | int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); | |
150 | int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false); | |
151 | for (int i = 0; i < n; ++i) | |
152 | { | |
153 | bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); | |
154 | *qin++ = bb; | |
155 | bb->aux = bb; | |
156 | } | |
157 | free (rpo); | |
158 | ||
159 | qin = worklist; | |
160 | qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; | |
161 | qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; | |
162 | ||
163 | /* Mark blocks which are successors of the entry block so that we | |
164 | can easily identify them below. */ | |
165 | FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) | |
166 | e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
167 | ||
168 | /* Iterate until the worklist is empty. */ | |
169 | while (qlen) | |
170 | { | |
171 | /* Take the first entry off the worklist. */ | |
172 | bb = *qout++; | |
173 | qlen--; | |
174 | ||
175 | if (qout >= qend) | |
176 | qout = worklist; | |
177 | ||
178 | /* Do not clear the aux field for blocks which are successors of the | |
179 | ENTRY block. That way we never add then to the worklist again. */ | |
180 | if (bb->aux != ENTRY_BLOCK_PTR_FOR_FN (cfun)) | |
181 | bb->aux = NULL; | |
182 | ||
183 | bitmap_union_of_preds_with_entry (in[bb->index], out, bb); | |
184 | ||
185 | if (bitmap_ior_and_compl (out[bb->index], gen[bb->index], in[bb->index], | |
186 | kill[bb->index])) | |
187 | /* If the out state of this block changed, then we need | |
188 | to add the successors of this block to the worklist | |
189 | if they are not already on the worklist. */ | |
190 | FOR_EACH_EDGE (e, ei, bb->succs) | |
191 | if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
192 | { | |
193 | *qin++ = e->dest; | |
194 | e->dest->aux = e; | |
195 | qlen++; | |
196 | ||
197 | if (qin >= qend) | |
198 | qin = worklist; | |
199 | } | |
200 | } | |
201 | ||
202 | clear_aux_for_edges (); | |
203 | clear_aux_for_blocks (); | |
204 | free (worklist); | |
9243c3d1 JZZ |
205 | } |
206 | ||
29331e72 LD |
207 | /* Classification of vsetvl instruction. */ |
208 | enum vsetvl_type | |
9243c3d1 | 209 | { |
29331e72 LD |
210 | VSETVL_NORMAL, |
211 | VSETVL_VTYPE_CHANGE_ONLY, | |
212 | VSETVL_DISCARD_RESULT, | |
213 | NUM_VSETVL_TYPE | |
214 | }; | |
9243c3d1 | 215 | |
29331e72 | 216 | enum emit_type |
9243c3d1 | 217 | { |
29331e72 LD |
218 | /* emit_insn directly. */ |
219 | EMIT_DIRECT, | |
220 | EMIT_BEFORE, | |
221 | EMIT_AFTER, | |
222 | }; | |
223 | ||
224 | /* dump helper functions */ | |
225 | static const char * | |
226 | vlmul_to_str (vlmul_type vlmul) | |
227 | { | |
228 | switch (vlmul) | |
229 | { | |
230 | case LMUL_1: | |
231 | return "m1"; | |
232 | case LMUL_2: | |
233 | return "m2"; | |
234 | case LMUL_4: | |
235 | return "m4"; | |
236 | case LMUL_8: | |
237 | return "m8"; | |
238 | case LMUL_RESERVED: | |
239 | return "INVALID LMUL"; | |
240 | case LMUL_F8: | |
241 | return "mf8"; | |
242 | case LMUL_F4: | |
243 | return "mf4"; | |
244 | case LMUL_F2: | |
245 | return "mf2"; | |
246 | ||
247 | default: | |
248 | gcc_unreachable (); | |
249 | } | |
9243c3d1 JZZ |
250 | } |
251 | ||
29331e72 LD |
252 | static const char * |
253 | policy_to_str (bool agnostic_p) | |
9243c3d1 | 254 | { |
29331e72 | 255 | return agnostic_p ? "agnostic" : "undisturbed"; |
9243c3d1 JZZ |
256 | } |
257 | ||
258 | static bool | |
29331e72 | 259 | vlmax_avl_p (rtx x) |
9243c3d1 | 260 | { |
29331e72 | 261 | return x && rtx_equal_p (x, RVV_VLMAX); |
9243c3d1 JZZ |
262 | } |
263 | ||
264 | /* Return true if it is an RVV instruction depends on VTYPE global | |
265 | status register. */ | |
266 | static bool | |
267 | has_vtype_op (rtx_insn *rinsn) | |
268 | { | |
269 | return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn); | |
270 | } | |
271 | ||
272 | /* Return true if it is an RVV instruction depends on VL global | |
273 | status register. */ | |
274 | static bool | |
275 | has_vl_op (rtx_insn *rinsn) | |
276 | { | |
277 | return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn); | |
278 | } | |
279 | ||
ec99ffab JZZ |
280 | /* Return true if the instruction ignores VLMUL field of VTYPE. */ |
281 | static bool | |
282 | ignore_vlmul_insn_p (rtx_insn *rinsn) | |
283 | { | |
284 | return get_attr_type (rinsn) == TYPE_VIMOVVX | |
285 | || get_attr_type (rinsn) == TYPE_VFMOVVF | |
286 | || get_attr_type (rinsn) == TYPE_VIMOVXV | |
287 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
288 | } | |
289 | ||
290 | /* Return true if the instruction is scalar move instruction. */ | |
291 | static bool | |
292 | scalar_move_insn_p (rtx_insn *rinsn) | |
293 | { | |
294 | return get_attr_type (rinsn) == TYPE_VIMOVXV | |
295 | || get_attr_type (rinsn) == TYPE_VFMOVFV; | |
296 | } | |
297 | ||
60bd33bc JZZ |
298 | /* Return true if the instruction is fault first load instruction. */ |
299 | static bool | |
300 | fault_first_load_p (rtx_insn *rinsn) | |
301 | { | |
6313b045 JZZ |
302 | return recog_memoized (rinsn) >= 0 |
303 | && (get_attr_type (rinsn) == TYPE_VLDFF | |
304 | || get_attr_type (rinsn) == TYPE_VLSEGDFF); | |
60bd33bc JZZ |
305 | } |
306 | ||
307 | /* Return true if the instruction is read vl instruction. */ | |
308 | static bool | |
309 | read_vl_insn_p (rtx_insn *rinsn) | |
310 | { | |
311 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL; | |
312 | } | |
313 | ||
9243c3d1 JZZ |
314 | /* Return true if it is a vsetvl instruction. */ |
315 | static bool | |
316 | vector_config_insn_p (rtx_insn *rinsn) | |
317 | { | |
318 | return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL; | |
319 | } | |
320 | ||
321 | /* Return true if it is vsetvldi or vsetvlsi. */ | |
322 | static bool | |
323 | vsetvl_insn_p (rtx_insn *rinsn) | |
324 | { | |
29331e72 | 325 | if (!rinsn || !vector_config_insn_p (rinsn)) |
6b6b9c68 | 326 | return false; |
85112fbb | 327 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi |
6b6b9c68 JZZ |
328 | || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi); |
329 | } | |
330 | ||
331 | /* Return true if it is vsetvl zero, rs1. */ | |
332 | static bool | |
333 | vsetvl_discard_result_insn_p (rtx_insn *rinsn) | |
334 | { | |
335 | if (!vector_config_insn_p (rinsn)) | |
336 | return false; | |
337 | return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi | |
338 | || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi); | |
9243c3d1 JZZ |
339 | } |
340 | ||
9243c3d1 | 341 | static bool |
4f673c5e | 342 | real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb) |
9243c3d1 | 343 | { |
4f673c5e | 344 | return insn != nullptr && insn->is_real () && insn->bb () == bb; |
9243c3d1 JZZ |
345 | } |
346 | ||
29331e72 | 347 | /* Helper function to get VL operand for VLMAX insn. */ |
6b6b9c68 JZZ |
348 | static rtx |
349 | get_vl (rtx_insn *rinsn) | |
350 | { | |
351 | if (has_vl_op (rinsn)) | |
352 | { | |
353 | extract_insn_cached (rinsn); | |
354 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
355 | } | |
356 | return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0)); | |
4f673c5e JZZ |
357 | } |
358 | ||
6b6b9c68 JZZ |
359 | /* Helper function to get AVL operand. */ |
360 | static rtx | |
361 | get_avl (rtx_insn *rinsn) | |
362 | { | |
363 | if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn)) | |
364 | return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0); | |
365 | ||
366 | if (!has_vl_op (rinsn)) | |
367 | return NULL_RTX; | |
368 | if (get_attr_avl_type (rinsn) == VLMAX) | |
369 | return RVV_VLMAX; | |
370 | extract_insn_cached (rinsn); | |
371 | return recog_data.operand[get_attr_vl_op_idx (rinsn)]; | |
372 | } | |
373 | ||
9243c3d1 JZZ |
374 | /* Helper function to get SEW operand. We always have SEW value for |
375 | all RVV instructions that have VTYPE OP. */ | |
376 | static uint8_t | |
377 | get_sew (rtx_insn *rinsn) | |
378 | { | |
379 | return get_attr_sew (rinsn); | |
380 | } | |
381 | ||
382 | /* Helper function to get VLMUL operand. We always have VLMUL value for | |
383 | all RVV instructions that have VTYPE OP. */ | |
384 | static enum vlmul_type | |
385 | get_vlmul (rtx_insn *rinsn) | |
386 | { | |
387 | return (enum vlmul_type) get_attr_vlmul (rinsn); | |
388 | } | |
389 | ||
390 | /* Get default tail policy. */ | |
391 | static bool | |
392 | get_default_ta () | |
393 | { | |
394 | /* For the instruction that doesn't require TA, we still need a default value | |
395 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
396 | return (bool) (get_prefer_tail_policy () & 0x1 | |
397 | || (get_prefer_tail_policy () >> 1 & 0x1)); | |
398 | } | |
399 | ||
400 | /* Get default mask policy. */ | |
401 | static bool | |
402 | get_default_ma () | |
403 | { | |
404 | /* For the instruction that doesn't require MA, we still need a default value | |
405 | to emit vsetvl. We pick up the default value according to prefer policy. */ | |
406 | return (bool) (get_prefer_mask_policy () & 0x1 | |
407 | || (get_prefer_mask_policy () >> 1 & 0x1)); | |
408 | } | |
409 | ||
410 | /* Helper function to get TA operand. */ | |
411 | static bool | |
412 | tail_agnostic_p (rtx_insn *rinsn) | |
413 | { | |
414 | /* If it doesn't have TA, we return agnostic by default. */ | |
415 | extract_insn_cached (rinsn); | |
416 | int ta = get_attr_ta (rinsn); | |
417 | return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta); | |
418 | } | |
419 | ||
420 | /* Helper function to get MA operand. */ | |
421 | static bool | |
422 | mask_agnostic_p (rtx_insn *rinsn) | |
423 | { | |
424 | /* If it doesn't have MA, we return agnostic by default. */ | |
425 | extract_insn_cached (rinsn); | |
426 | int ma = get_attr_ma (rinsn); | |
427 | return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma); | |
428 | } | |
429 | ||
430 | /* Return true if FN has a vector instruction that use VL/VTYPE. */ | |
431 | static bool | |
432 | has_vector_insn (function *fn) | |
433 | { | |
434 | basic_block cfg_bb; | |
435 | rtx_insn *rinsn; | |
436 | FOR_ALL_BB_FN (cfg_bb, fn) | |
437 | FOR_BB_INSNS (cfg_bb, rinsn) | |
438 | if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn)) | |
439 | return true; | |
440 | return false; | |
441 | } | |
442 | ||
29331e72 LD |
443 | static vlmul_type |
444 | calculate_vlmul (unsigned int sew, unsigned int ratio) | |
9243c3d1 | 445 | { |
29331e72 LD |
446 | const vlmul_type ALL_LMUL[] |
447 | = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2}; | |
448 | for (const vlmul_type vlmul : ALL_LMUL) | |
449 | if (calculate_ratio (sew, vlmul) == ratio) | |
450 | return vlmul; | |
451 | return LMUL_RESERVED; | |
9243c3d1 JZZ |
452 | } |
453 | ||
29331e72 LD |
454 | /* Get the currently supported maximum sew used in the int rvv instructions. */ |
455 | static uint8_t | |
456 | get_max_int_sew () | |
9243c3d1 | 457 | { |
29331e72 LD |
458 | if (TARGET_VECTOR_ELEN_64) |
459 | return 64; | |
460 | else if (TARGET_VECTOR_ELEN_32) | |
461 | return 32; | |
462 | gcc_unreachable (); | |
9243c3d1 JZZ |
463 | } |
464 | ||
29331e72 LD |
465 | /* Get the currently supported maximum sew used in the float rvv instructions. |
466 | */ | |
467 | static uint8_t | |
468 | get_max_float_sew () | |
469 | { | |
470 | if (TARGET_VECTOR_ELEN_FP_64) | |
471 | return 64; | |
472 | else if (TARGET_VECTOR_ELEN_FP_32) | |
473 | return 32; | |
474 | else if (TARGET_VECTOR_ELEN_FP_16) | |
475 | return 16; | |
476 | gcc_unreachable (); | |
9243c3d1 JZZ |
477 | } |
478 | ||
29331e72 LD |
479 | /* Count the number of REGNO in RINSN. */ |
480 | static int | |
481 | count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) | |
9243c3d1 | 482 | { |
29331e72 LD |
483 | int count = 0; |
484 | extract_insn (rinsn); | |
485 | for (int i = 0; i < recog_data.n_operands; i++) | |
486 | if (refers_to_regno_p (regno, recog_data.operand[i])) | |
487 | count++; | |
488 | return count; | |
9243c3d1 JZZ |
489 | } |
490 | ||
29331e72 | 491 | enum def_type |
9243c3d1 | 492 | { |
29331e72 LD |
493 | REAL_SET = 1 << 0, |
494 | PHI_SET = 1 << 1, | |
495 | BB_HEAD_SET = 1 << 2, | |
496 | BB_END_SET = 1 << 3, | |
497 | /* ??? TODO: In RTL_SSA framework, we have REAL_SET, | |
498 | PHI_SET, BB_HEAD_SET, BB_END_SET and | |
499 | CLOBBER_DEF def_info types. Currently, | |
500 | we conservatively do not optimize clobber | |
501 | def since we don't see the case that we | |
502 | need to optimize it. */ | |
503 | CLOBBER_DEF = 1 << 4 | |
504 | }; | |
9243c3d1 | 505 | |
29331e72 LD |
506 | static bool |
507 | insn_should_be_added_p (const insn_info *insn, unsigned int types) | |
da93c41c | 508 | { |
29331e72 LD |
509 | if (insn->is_real () && (types & REAL_SET)) |
510 | return true; | |
511 | if (insn->is_phi () && (types & PHI_SET)) | |
512 | return true; | |
513 | if (insn->is_bb_head () && (types & BB_HEAD_SET)) | |
514 | return true; | |
515 | if (insn->is_bb_end () && (types & BB_END_SET)) | |
516 | return true; | |
517 | return false; | |
da93c41c JZ |
518 | } |
519 | ||
29331e72 LD |
520 | static const hash_set<use_info *> |
521 | get_all_real_uses (insn_info *insn, unsigned regno) | |
9243c3d1 | 522 | { |
29331e72 | 523 | gcc_assert (insn->is_real ()); |
9243c3d1 | 524 | |
29331e72 LD |
525 | hash_set<use_info *> uses; |
526 | auto_vec<phi_info *> work_list; | |
527 | hash_set<phi_info *> visited_list; | |
9243c3d1 | 528 | |
29331e72 | 529 | for (def_info *def : insn->defs ()) |
9243c3d1 | 530 | { |
29331e72 LD |
531 | if (!def->is_reg () || def->regno () != regno) |
532 | continue; | |
533 | set_info *set = safe_dyn_cast<set_info *> (def); | |
534 | if (!set) | |
535 | continue; | |
536 | for (use_info *use : set->nondebug_insn_uses ()) | |
537 | if (use->insn ()->is_real ()) | |
538 | uses.add (use); | |
539 | for (use_info *use : set->phi_uses ()) | |
540 | work_list.safe_push (use->phi ()); | |
9243c3d1 | 541 | } |
9243c3d1 | 542 | |
29331e72 | 543 | while (!work_list.is_empty ()) |
60bd33bc | 544 | { |
29331e72 LD |
545 | phi_info *phi = work_list.pop (); |
546 | visited_list.add (phi); | |
60bd33bc | 547 | |
29331e72 LD |
548 | for (use_info *use : phi->nondebug_insn_uses ()) |
549 | if (use->insn ()->is_real ()) | |
550 | uses.add (use); | |
551 | for (use_info *use : phi->phi_uses ()) | |
552 | if (!visited_list.contains (use->phi ())) | |
553 | work_list.safe_push (use->phi ()); | |
60bd33bc | 554 | } |
29331e72 | 555 | return uses; |
60bd33bc JZZ |
556 | } |
557 | ||
29331e72 LD |
558 | /* Recursively find all define instructions. The kind of instruction is |
559 | specified by the DEF_TYPE. */ | |
560 | static hash_set<set_info *> | |
561 | get_all_sets (phi_info *phi, unsigned int types) | |
9243c3d1 | 562 | { |
29331e72 LD |
563 | hash_set<set_info *> insns; |
564 | auto_vec<phi_info *> work_list; | |
565 | hash_set<phi_info *> visited_list; | |
566 | if (!phi) | |
567 | return hash_set<set_info *> (); | |
568 | work_list.safe_push (phi); | |
9243c3d1 | 569 | |
29331e72 | 570 | while (!work_list.is_empty ()) |
9243c3d1 | 571 | { |
29331e72 LD |
572 | phi_info *phi = work_list.pop (); |
573 | visited_list.add (phi); | |
574 | for (use_info *use : phi->inputs ()) | |
575 | { | |
576 | def_info *def = use->def (); | |
577 | set_info *set = safe_dyn_cast<set_info *> (def); | |
578 | if (!set) | |
579 | return hash_set<set_info *> (); | |
a1e42094 | 580 | |
29331e72 | 581 | gcc_assert (!set->insn ()->is_debug_insn ()); |
9243c3d1 | 582 | |
29331e72 LD |
583 | if (insn_should_be_added_p (set->insn (), types)) |
584 | insns.add (set); | |
585 | if (set->insn ()->is_phi ()) | |
586 | { | |
587 | phi_info *new_phi = as_a<phi_info *> (set); | |
588 | if (!visited_list.contains (new_phi)) | |
589 | work_list.safe_push (new_phi); | |
590 | } | |
591 | } | |
9243c3d1 | 592 | } |
29331e72 | 593 | return insns; |
9243c3d1 JZZ |
594 | } |
595 | ||
29331e72 LD |
596 | static hash_set<set_info *> |
597 | get_all_sets (set_info *set, bool /* get_real_inst */ real_p, | |
598 | bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p) | |
aef20243 | 599 | { |
29331e72 LD |
600 | if (real_p && phi_p && param_p) |
601 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
602 | REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET); | |
aef20243 | 603 | |
29331e72 LD |
604 | else if (real_p && param_p) |
605 | return get_all_sets (safe_dyn_cast<phi_info *> (set), | |
606 | REAL_SET | BB_HEAD_SET | BB_END_SET); | |
607 | ||
608 | else if (real_p) | |
609 | return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET); | |
610 | return hash_set<set_info *> (); | |
69f39144 JZ |
611 | } |
612 | ||
4f673c5e | 613 | static bool |
6b6b9c68 | 614 | source_equal_p (insn_info *insn1, insn_info *insn2) |
4f673c5e | 615 | { |
6b6b9c68 JZZ |
616 | if (!insn1 || !insn2) |
617 | return false; | |
618 | rtx_insn *rinsn1 = insn1->rtl (); | |
619 | rtx_insn *rinsn2 = insn2->rtl (); | |
4f673c5e JZZ |
620 | if (!rinsn1 || !rinsn2) |
621 | return false; | |
29331e72 | 622 | |
4f673c5e JZZ |
623 | rtx note1 = find_reg_equal_equiv_note (rinsn1); |
624 | rtx note2 = find_reg_equal_equiv_note (rinsn2); | |
4f673c5e JZZ |
625 | if (note1 && note2 && rtx_equal_p (note1, note2)) |
626 | return true; | |
29331e72 | 627 | return false; |
4f673c5e JZZ |
628 | } |
629 | ||
6b6b9c68 | 630 | static insn_info * |
4f673c5e JZZ |
631 | extract_single_source (set_info *set) |
632 | { | |
633 | if (!set) | |
634 | return nullptr; | |
635 | if (set->insn ()->is_real ()) | |
6b6b9c68 | 636 | return set->insn (); |
4f673c5e JZZ |
637 | if (!set->insn ()->is_phi ()) |
638 | return nullptr; | |
6b6b9c68 | 639 | hash_set<set_info *> sets = get_all_sets (set, true, false, true); |
4f673c5e | 640 | |
6b6b9c68 | 641 | insn_info *first_insn = (*sets.begin ())->insn (); |
4f673c5e JZZ |
642 | if (first_insn->is_artificial ()) |
643 | return nullptr; | |
6b6b9c68 | 644 | for (const set_info *set : sets) |
4f673c5e JZZ |
645 | { |
646 | /* If there is a head or end insn, we conservative return | |
647 | NULL so that VSETVL PASS will insert vsetvl directly. */ | |
6b6b9c68 | 648 | if (set->insn ()->is_artificial ()) |
4f673c5e | 649 | return nullptr; |
29331e72 | 650 | if (set != *sets.begin () && !source_equal_p (set->insn (), first_insn)) |
4f673c5e JZZ |
651 | return nullptr; |
652 | } | |
653 | ||
6b6b9c68 | 654 | return first_insn; |
4f673c5e JZZ |
655 | } |
656 | ||
29331e72 LD |
657 | static bool |
658 | same_equiv_note_p (set_info *set1, set_info *set2) | |
ec99ffab | 659 | { |
29331e72 LD |
660 | insn_info *insn1 = extract_single_source (set1); |
661 | insn_info *insn2 = extract_single_source (set2); | |
662 | if (!insn1 || !insn2) | |
663 | return false; | |
664 | return source_equal_p (insn1, insn2); | |
ec99ffab JZZ |
665 | } |
666 | ||
29331e72 LD |
667 | static unsigned |
668 | get_expr_id (unsigned bb_index, unsigned regno, unsigned num_bbs) | |
ec99ffab | 669 | { |
29331e72 | 670 | return regno * num_bbs + bb_index; |
ec99ffab | 671 | } |
29331e72 LD |
672 | static unsigned |
673 | get_regno (unsigned expr_id, unsigned num_bb) | |
ec99ffab | 674 | { |
29331e72 | 675 | return expr_id / num_bb; |
ec99ffab | 676 | } |
29331e72 LD |
677 | static unsigned |
678 | get_bb_index (unsigned expr_id, unsigned num_bb) | |
ec99ffab | 679 | { |
29331e72 | 680 | return expr_id % num_bb; |
ec99ffab JZZ |
681 | } |
682 | ||
29331e72 | 683 | /* Return true if the SET result is not used by any instructions. */ |
ec99ffab | 684 | static bool |
29331e72 | 685 | has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno) |
ec99ffab | 686 | { |
29331e72 LD |
687 | if (bitmap_bit_p (df_get_live_out (cfg_bb), regno)) |
688 | return false; | |
ec99ffab | 689 | |
29331e72 LD |
690 | rtx_insn *iter; |
691 | for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb)); | |
692 | iter = NEXT_INSN (iter)) | |
693 | if (df_find_use (iter, regno_reg_rtx[regno])) | |
694 | return false; | |
ec99ffab | 695 | |
29331e72 | 696 | return true; |
ec99ffab JZZ |
697 | } |
698 | ||
29331e72 LD |
699 | /* Change insn and Assert the change always happens. */ |
700 | static void | |
701 | validate_change_or_fail (rtx object, rtx *loc, rtx new_rtx, bool in_group) | |
ec99ffab | 702 | { |
29331e72 LD |
703 | bool change_p = validate_change (object, loc, new_rtx, in_group); |
704 | gcc_assert (change_p); | |
ec99ffab JZZ |
705 | } |
706 | ||
29331e72 LD |
707 | /* This flags indicates the minimum demand of the vl and vtype values by the |
708 | RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV | |
709 | instruction only needs the SEW/LMUL ratio to remain the same, and does not | |
710 | require SEW and LMUL to be fixed. | |
711 | Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter | |
712 | instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of | |
713 | the former instruction, then we can make the minimu demand of the former | |
714 | instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are | |
715 | the SEW and LMUL of the latter instruction, and the vsetvl instruction | |
716 | generated according to the new demand can also be used for the latter | |
717 | instruction, so there is no need to insert a separate vsetvl instruction for | |
718 | the latter instruction. */ | |
719 | enum demand_flags : unsigned | |
720 | { | |
721 | DEMAND_EMPTY_P = 0, | |
722 | DEMAND_SEW_P = 1 << 0, | |
723 | DEMAND_LMUL_P = 1 << 1, | |
724 | DEMAND_RATIO_P = 1 << 2, | |
725 | DEMAND_GE_SEW_P = 1 << 3, | |
726 | DEMAND_TAIL_POLICY_P = 1 << 4, | |
727 | DEMAND_MASK_POLICY_P = 1 << 5, | |
728 | DEMAND_AVL_P = 1 << 6, | |
729 | DEMAND_NON_ZERO_AVL_P = 1 << 7, | |
730 | }; | |
ec99ffab | 731 | |
29331e72 LD |
732 | /* We split the demand information into three parts. They are sew and lmul |
733 | related (sew_lmul_demand_type), tail and mask policy related | |
734 | (policy_demand_type) and avl related (avl_demand_type). Then we define three | |
735 | interfaces avaiable_with, compatible_p and merge. avaiable_with is | |
736 | used to determine whether the two vsetvl infos prev_info and next_info are | |
737 | available or not. If prev_info is available for next_info, it means that the | |
738 | RVV insn corresponding to next_info on the path from prev_info to next_info | |
739 | can be used without inserting a separate vsetvl instruction. compatible_p | |
740 | is used to determine whether prev_info is compatible with next_info, and if | |
741 | so, merge can be used to merge the stricter demand information from | |
742 | next_info into prev_info so that prev_info becomes available to next_info. | |
743 | */ | |
ec99ffab | 744 | |
29331e72 | 745 | enum class sew_lmul_demand_type : unsigned |
ec99ffab | 746 | { |
29331e72 LD |
747 | sew_lmul = demand_flags::DEMAND_SEW_P | demand_flags::DEMAND_LMUL_P, |
748 | ratio_only = demand_flags::DEMAND_RATIO_P, | |
749 | sew_only = demand_flags::DEMAND_SEW_P, | |
750 | ge_sew = demand_flags::DEMAND_GE_SEW_P, | |
751 | ratio_and_ge_sew | |
752 | = demand_flags::DEMAND_RATIO_P | demand_flags::DEMAND_GE_SEW_P, | |
753 | }; | |
ec99ffab | 754 | |
29331e72 | 755 | enum class policy_demand_type : unsigned |
29547511 | 756 | { |
29331e72 LD |
757 | tail_mask_policy |
758 | = demand_flags::DEMAND_TAIL_POLICY_P | demand_flags::DEMAND_MASK_POLICY_P, | |
759 | tail_policy_only = demand_flags::DEMAND_TAIL_POLICY_P, | |
760 | mask_policy_only = demand_flags::DEMAND_MASK_POLICY_P, | |
761 | ignore_policy = demand_flags::DEMAND_EMPTY_P, | |
762 | }; | |
29547511 | 763 | |
29331e72 | 764 | enum class avl_demand_type : unsigned |
ec99ffab | 765 | { |
29331e72 LD |
766 | avl = demand_flags::DEMAND_AVL_P, |
767 | non_zero_avl = demand_flags::DEMAND_NON_ZERO_AVL_P, | |
768 | ignore_avl = demand_flags::DEMAND_EMPTY_P, | |
769 | }; | |
ec99ffab | 770 | |
29331e72 | 771 | class vsetvl_info |
ec99ffab | 772 | { |
29331e72 LD |
773 | private: |
774 | insn_info *m_insn; | |
775 | bb_info *m_bb; | |
776 | rtx m_avl; | |
777 | rtx m_vl; | |
778 | set_info *m_avl_def; | |
779 | uint8_t m_sew; | |
780 | uint8_t m_max_sew; | |
781 | vlmul_type m_vlmul; | |
782 | uint8_t m_ratio; | |
783 | bool m_ta; | |
784 | bool m_ma; | |
785 | ||
786 | sew_lmul_demand_type m_sew_lmul_demand; | |
787 | policy_demand_type m_policy_demand; | |
788 | avl_demand_type m_avl_demand; | |
789 | ||
790 | enum class state_type | |
791 | { | |
792 | UNINITIALIZED, | |
793 | VALID, | |
794 | UNKNOWN, | |
795 | EMPTY, | |
796 | }; | |
797 | state_type m_state; | |
798 | ||
799 | bool m_delete; | |
800 | bool m_change_vtype_only; | |
801 | insn_info *m_read_vl_insn; | |
802 | bool m_vl_used_by_non_rvv_insn; | |
ec99ffab | 803 | |
29331e72 LD |
804 | public: |
805 | vsetvl_info () | |
806 | : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX), m_vl (NULL_RTX), | |
807 | m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED), | |
808 | m_ratio (0), m_ta (false), m_ma (false), | |
809 | m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul), | |
810 | m_policy_demand (policy_demand_type::tail_mask_policy), | |
811 | m_avl_demand (avl_demand_type::avl), m_state (state_type::UNINITIALIZED), | |
812 | m_delete (false), m_change_vtype_only (false), m_read_vl_insn (nullptr), | |
813 | m_vl_used_by_non_rvv_insn (false) | |
814 | {} | |
815 | ||
816 | vsetvl_info (insn_info *insn) : vsetvl_info () { parse_insn (insn); } | |
817 | ||
818 | vsetvl_info (rtx_insn *insn) : vsetvl_info () { parse_insn (insn); } | |
819 | ||
820 | void set_avl (rtx avl) { m_avl = avl; } | |
821 | void set_vl (rtx vl) { m_vl = vl; } | |
822 | void set_avl_def (set_info *avl_def) { m_avl_def = avl_def; } | |
823 | void set_sew (uint8_t sew) { m_sew = sew; } | |
824 | void set_vlmul (vlmul_type vlmul) { m_vlmul = vlmul; } | |
825 | void set_ratio (uint8_t ratio) { m_ratio = ratio; } | |
826 | void set_ta (bool ta) { m_ta = ta; } | |
827 | void set_ma (bool ma) { m_ma = ma; } | |
828 | void set_delete () { m_delete = true; } | |
829 | void set_bb (bb_info *bb) { m_bb = bb; } | |
830 | void set_max_sew (uint8_t max_sew) { m_max_sew = max_sew; } | |
831 | void set_change_vtype_only () { m_change_vtype_only = true; } | |
832 | void set_read_vl_insn (insn_info *insn) { m_read_vl_insn = insn; } | |
833 | ||
834 | rtx get_avl () const { return m_avl; } | |
835 | rtx get_vl () const { return m_vl; } | |
836 | set_info *get_avl_def () const { return m_avl_def; } | |
837 | uint8_t get_sew () const { return m_sew; } | |
838 | vlmul_type get_vlmul () const { return m_vlmul; } | |
839 | uint8_t get_ratio () const { return m_ratio; } | |
840 | bool get_ta () const { return m_ta; } | |
841 | bool get_ma () const { return m_ma; } | |
842 | insn_info *get_insn () const { return m_insn; } | |
843 | bool delete_p () const { return m_delete; } | |
844 | bb_info *get_bb () const { return m_bb; } | |
845 | uint8_t get_max_sew () const { return m_max_sew; } | |
846 | insn_info *get_read_vl_insn () const { return m_read_vl_insn; } | |
847 | bool vl_use_by_non_rvv_insn_p () const { return m_vl_used_by_non_rvv_insn; } | |
848 | ||
849 | bool has_imm_avl () const { return m_avl && CONST_INT_P (m_avl); } | |
850 | bool has_vlmax_avl () const { return vlmax_avl_p (m_avl); } | |
851 | bool has_nonvlmax_reg_avl () const | |
852 | { | |
853 | return m_avl && REG_P (m_avl) && !has_vlmax_avl (); | |
854 | } | |
855 | bool has_non_zero_avl () const | |
856 | { | |
857 | if (has_imm_avl ()) | |
858 | return INTVAL (m_avl) > 0; | |
859 | return has_vlmax_avl (); | |
860 | } | |
861 | bool has_vl () const | |
862 | { | |
863 | /* The VL operand can only be either a NULL_RTX or a register. */ | |
864 | gcc_assert (!m_vl || REG_P (m_vl)); | |
865 | return m_vl != NULL_RTX; | |
866 | } | |
867 | bool has_same_ratio (const vsetvl_info &other) const | |
868 | { | |
869 | return get_ratio () == other.get_ratio (); | |
870 | } | |
871 | ||
872 | /* The block of INSN isn't always same as the block of the VSETVL_INFO, | |
873 | meaning we may have 'get_insn ()->bb () != get_bb ()'. | |
874 | ||
875 | E.g. BB 2 (Empty) ---> BB 3 (VALID, has rvv insn 1) | |
876 | ||
877 | BB 2 has empty VSETVL_INFO, wheras BB 3 has VSETVL_INFO that satisfies | |
878 | get_insn ()->bb () == get_bb (). In earliest fusion, we may fuse bb 3 and | |
879 | bb 2 so that the 'get_bb ()' of BB2 VSETVL_INFO will be BB2 wheras the | |
880 | 'get_insn ()' of BB2 VSETVL INFO will be the rvv insn 1 (which is located | |
881 | at BB3). */ | |
882 | bool insn_inside_bb_p () const { return get_insn ()->bb () == get_bb (); } | |
883 | void update_avl (const vsetvl_info &other) | |
884 | { | |
885 | m_avl = other.get_avl (); | |
886 | m_vl = other.get_vl (); | |
887 | m_avl_def = other.get_avl_def (); | |
888 | } | |
889 | ||
890 | bool uninit_p () const { return m_state == state_type::UNINITIALIZED; } | |
891 | bool valid_p () const { return m_state == state_type::VALID; } | |
892 | bool unknown_p () const { return m_state == state_type::UNKNOWN; } | |
893 | bool empty_p () const { return m_state == state_type::EMPTY; } | |
894 | bool change_vtype_only_p () const { return m_change_vtype_only; } | |
895 | ||
896 | void set_valid () { m_state = state_type::VALID; } | |
897 | void set_unknown () { m_state = state_type::UNKNOWN; } | |
898 | void set_empty () { m_state = state_type::EMPTY; } | |
899 | ||
900 | void set_sew_lmul_demand (sew_lmul_demand_type demand) | |
901 | { | |
902 | m_sew_lmul_demand = demand; | |
903 | } | |
904 | void set_policy_demand (policy_demand_type demand) | |
905 | { | |
906 | m_policy_demand = demand; | |
907 | } | |
908 | void set_avl_demand (avl_demand_type demand) { m_avl_demand = demand; } | |
909 | ||
910 | sew_lmul_demand_type get_sew_lmul_demand () const | |
911 | { | |
912 | return m_sew_lmul_demand; | |
913 | } | |
914 | policy_demand_type get_policy_demand () const { return m_policy_demand; } | |
915 | avl_demand_type get_avl_demand () const { return m_avl_demand; } | |
916 | ||
917 | void normalize_demand (unsigned demand_flags) | |
918 | { | |
919 | switch (demand_flags | |
920 | & (DEMAND_SEW_P | DEMAND_LMUL_P | DEMAND_RATIO_P | DEMAND_GE_SEW_P)) | |
921 | { | |
922 | case (unsigned) sew_lmul_demand_type::sew_lmul: | |
923 | m_sew_lmul_demand = sew_lmul_demand_type::sew_lmul; | |
924 | break; | |
925 | case (unsigned) sew_lmul_demand_type::ratio_only: | |
926 | m_sew_lmul_demand = sew_lmul_demand_type::ratio_only; | |
927 | break; | |
928 | case (unsigned) sew_lmul_demand_type::sew_only: | |
929 | m_sew_lmul_demand = sew_lmul_demand_type::sew_only; | |
930 | break; | |
931 | case (unsigned) sew_lmul_demand_type::ge_sew: | |
932 | m_sew_lmul_demand = sew_lmul_demand_type::ge_sew; | |
933 | break; | |
934 | case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew: | |
935 | m_sew_lmul_demand = sew_lmul_demand_type::ratio_and_ge_sew; | |
936 | break; | |
937 | default: | |
938 | gcc_unreachable (); | |
939 | } | |
940 | ||
941 | switch (demand_flags & (DEMAND_TAIL_POLICY_P | DEMAND_MASK_POLICY_P)) | |
942 | { | |
943 | case (unsigned) policy_demand_type::tail_mask_policy: | |
944 | m_policy_demand = policy_demand_type::tail_mask_policy; | |
945 | break; | |
946 | case (unsigned) policy_demand_type::tail_policy_only: | |
947 | m_policy_demand = policy_demand_type::tail_policy_only; | |
948 | break; | |
949 | case (unsigned) policy_demand_type::mask_policy_only: | |
950 | m_policy_demand = policy_demand_type::mask_policy_only; | |
951 | break; | |
952 | case (unsigned) policy_demand_type::ignore_policy: | |
953 | m_policy_demand = policy_demand_type::ignore_policy; | |
954 | break; | |
955 | default: | |
956 | gcc_unreachable (); | |
957 | } | |
958 | ||
959 | switch (demand_flags & (DEMAND_AVL_P | DEMAND_NON_ZERO_AVL_P)) | |
960 | { | |
961 | case (unsigned) avl_demand_type::avl: | |
962 | m_avl_demand = avl_demand_type::avl; | |
963 | break; | |
964 | case (unsigned) avl_demand_type::non_zero_avl: | |
965 | m_avl_demand = avl_demand_type::non_zero_avl; | |
966 | break; | |
967 | case (unsigned) avl_demand_type::ignore_avl: | |
968 | m_avl_demand = avl_demand_type::ignore_avl; | |
969 | break; | |
970 | default: | |
971 | gcc_unreachable (); | |
972 | } | |
973 | } | |
974 | ||
975 | void parse_insn (rtx_insn *rinsn) | |
976 | { | |
977 | if (!NONDEBUG_INSN_P (rinsn)) | |
978 | return; | |
979 | if (optimize == 0 && !has_vtype_op (rinsn)) | |
980 | return; | |
981 | gcc_assert (!vsetvl_discard_result_insn_p (rinsn)); | |
982 | set_valid (); | |
983 | extract_insn_cached (rinsn); | |
984 | m_avl = ::get_avl (rinsn); | |
985 | if (has_vlmax_avl () || vsetvl_insn_p (rinsn)) | |
986 | m_vl = ::get_vl (rinsn); | |
987 | m_sew = ::get_sew (rinsn); | |
988 | m_vlmul = ::get_vlmul (rinsn); | |
989 | m_ta = tail_agnostic_p (rinsn); | |
990 | m_ma = mask_agnostic_p (rinsn); | |
991 | } | |
992 | ||
993 | void parse_insn (insn_info *insn) | |
994 | { | |
995 | m_insn = insn; | |
996 | m_bb = insn->bb (); | |
997 | /* Return if it is debug insn for the consistency with optimize == 0. */ | |
998 | if (insn->is_debug_insn ()) | |
999 | return; | |
ec99ffab | 1000 | |
29331e72 LD |
1001 | /* We set it as unknown since we don't what will happen in CALL or ASM. */ |
1002 | if (insn->is_call () || insn->is_asm ()) | |
1003 | { | |
1004 | set_unknown (); | |
1005 | return; | |
1006 | } | |
1007 | ||
1008 | /* If this is something that updates VL/VTYPE that we don't know about, set | |
1009 | the state to unknown. */ | |
1010 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()) | |
1011 | && (find_access (insn->defs (), VL_REGNUM) | |
1012 | || find_access (insn->defs (), VTYPE_REGNUM))) | |
1013 | { | |
1014 | set_unknown (); | |
1015 | return; | |
1016 | } | |
1017 | ||
1018 | if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())) | |
1019 | /* uninitialized */ | |
1020 | return; | |
ec99ffab | 1021 | |
29331e72 LD |
1022 | set_valid (); |
1023 | ||
1024 | m_avl = ::get_avl (insn->rtl ()); | |
1025 | if (m_avl) | |
1026 | { | |
1027 | if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ()) | |
1028 | m_vl = ::get_vl (insn->rtl ()); | |
1029 | ||
1030 | if (has_nonvlmax_reg_avl ()) | |
1031 | m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def (); | |
1032 | } | |
1033 | ||
1034 | m_sew = ::get_sew (insn->rtl ()); | |
1035 | m_vlmul = ::get_vlmul (insn->rtl ()); | |
1036 | m_ratio = get_attr_ratio (insn->rtl ()); | |
1037 | /* when get_attr_ratio is invalid, this kind of instructions | |
1038 | doesn't care about ratio. However, we still need this value | |
1039 | in demand info backward analysis. */ | |
1040 | if (m_ratio == INVALID_ATTRIBUTE) | |
1041 | m_ratio = calculate_ratio (m_sew, m_vlmul); | |
1042 | m_ta = tail_agnostic_p (insn->rtl ()); | |
1043 | m_ma = mask_agnostic_p (insn->rtl ()); | |
1044 | ||
1045 | /* If merge operand is undef value, we prefer agnostic. */ | |
1046 | int merge_op_idx = get_attr_merge_op_idx (insn->rtl ()); | |
1047 | if (merge_op_idx != INVALID_ATTRIBUTE | |
1048 | && satisfies_constraint_vu (recog_data.operand[merge_op_idx])) | |
1049 | { | |
1050 | m_ta = true; | |
1051 | m_ma = true; | |
1052 | } | |
1053 | ||
1054 | /* Determine the demand info of the RVV insn. */ | |
1055 | m_max_sew = get_max_int_sew (); | |
1056 | unsigned demand_flags = 0; | |
1057 | if (vector_config_insn_p (insn->rtl ())) | |
1058 | { | |
1059 | demand_flags |= demand_flags::DEMAND_AVL_P; | |
1060 | demand_flags |= demand_flags::DEMAND_RATIO_P; | |
1061 | } | |
1062 | else | |
1063 | { | |
1064 | if (has_vl_op (insn->rtl ())) | |
1065 | { | |
1066 | if (scalar_move_insn_p (insn->rtl ())) | |
1067 | { | |
1068 | /* If the avl for vmv.s.x comes from the vsetvl instruction, we | |
1069 | don't know if the avl is non-zero, so it is set to | |
1070 | DEMAND_AVL_P for now. it may be corrected to | |
1071 | DEMAND_NON_ZERO_AVL_P later when more information is | |
1072 | available. | |
1073 | */ | |
1074 | if (has_non_zero_avl ()) | |
1075 | demand_flags |= demand_flags::DEMAND_NON_ZERO_AVL_P; | |
1076 | else | |
1077 | demand_flags |= demand_flags::DEMAND_AVL_P; | |
1078 | } | |
1079 | else | |
1080 | demand_flags |= demand_flags::DEMAND_AVL_P; | |
1081 | } | |
ec99ffab | 1082 | |
29331e72 LD |
1083 | if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE) |
1084 | demand_flags |= demand_flags::DEMAND_RATIO_P; | |
1085 | else | |
1086 | { | |
1087 | if (scalar_move_insn_p (insn->rtl ()) && m_ta) | |
1088 | { | |
1089 | demand_flags |= demand_flags::DEMAND_GE_SEW_P; | |
1090 | m_max_sew = get_attr_type (insn->rtl ()) == TYPE_VFMOVFV | |
1091 | ? get_max_float_sew () | |
1092 | : get_max_int_sew (); | |
1093 | } | |
1094 | else | |
1095 | demand_flags |= demand_flags::DEMAND_SEW_P; | |
1096 | ||
1097 | if (!ignore_vlmul_insn_p (insn->rtl ())) | |
1098 | demand_flags |= demand_flags::DEMAND_LMUL_P; | |
1099 | } | |
ec99ffab | 1100 | |
29331e72 LD |
1101 | if (!m_ta) |
1102 | demand_flags |= demand_flags::DEMAND_TAIL_POLICY_P; | |
1103 | if (!m_ma) | |
1104 | demand_flags |= demand_flags::DEMAND_MASK_POLICY_P; | |
1105 | } | |
1106 | ||
1107 | normalize_demand (demand_flags); | |
1108 | ||
1109 | /* Optimize AVL from the vsetvl instruction. */ | |
1110 | insn_info *def_insn = extract_single_source (get_avl_def ()); | |
1111 | if (def_insn && vsetvl_insn_p (def_insn->rtl ())) | |
1112 | { | |
1113 | vsetvl_info def_info = vsetvl_info (def_insn); | |
1114 | if ((scalar_move_insn_p (insn->rtl ()) | |
1115 | || def_info.get_ratio () == get_ratio ()) | |
1116 | && (def_info.has_vlmax_avl () || def_info.has_imm_avl ())) | |
1117 | { | |
1118 | update_avl (def_info); | |
1119 | if (scalar_move_insn_p (insn->rtl ()) && has_non_zero_avl ()) | |
1120 | m_avl_demand = avl_demand_type::non_zero_avl; | |
1121 | } | |
1122 | } | |
1123 | ||
1124 | /* Determine if dest operand(vl) has been used by non-RVV instructions. */ | |
1125 | if (has_vl ()) | |
1126 | { | |
1127 | const hash_set<use_info *> vl_uses | |
1128 | = get_all_real_uses (get_insn (), REGNO (get_vl ())); | |
1129 | for (use_info *use : vl_uses) | |
1130 | { | |
1131 | gcc_assert (use->insn ()->is_real ()); | |
1132 | rtx_insn *rinsn = use->insn ()->rtl (); | |
1133 | if (!has_vl_op (rinsn) | |
1134 | || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1) | |
1135 | { | |
1136 | m_vl_used_by_non_rvv_insn = true; | |
1137 | break; | |
1138 | } | |
1139 | rtx avl = ::get_avl (rinsn); | |
1140 | if (!avl || REGNO (get_vl ()) != REGNO (avl)) | |
1141 | { | |
1142 | m_vl_used_by_non_rvv_insn = true; | |
1143 | break; | |
1144 | } | |
1145 | } | |
1146 | } | |
ec99ffab | 1147 | |
29331e72 LD |
1148 | /* Collect the read vl insn for the fault-only-first rvv loads. */ |
1149 | if (fault_first_load_p (insn->rtl ())) | |
1150 | { | |
1151 | for (insn_info *i = insn->next_nondebug_insn (); | |
1152 | i->bb () == insn->bb (); i = i->next_nondebug_insn ()) | |
1153 | { | |
1154 | if (find_access (i->defs (), VL_REGNUM)) | |
1155 | break; | |
1156 | if (i->rtl () && read_vl_insn_p (i->rtl ())) | |
1157 | { | |
1158 | m_read_vl_insn = i; | |
1159 | break; | |
1160 | } | |
1161 | } | |
1162 | } | |
1163 | } | |
1164 | ||
1165 | /* Returns the corresponding vsetvl rtx pat. */ | |
1166 | rtx get_vsetvl_pat (bool ignore_vl = false) const | |
1167 | { | |
1168 | rtx avl = get_avl (); | |
1169 | /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s, | |
1170 | set the value of avl to (const_int 0) so that VSETVL PASS will | |
1171 | insert vsetvl correctly.*/ | |
1172 | if (!get_avl ()) | |
1173 | avl = GEN_INT (0); | |
1174 | rtx sew = gen_int_mode (get_sew (), Pmode); | |
1175 | rtx vlmul = gen_int_mode (get_vlmul (), Pmode); | |
1176 | rtx ta = gen_int_mode (get_ta (), Pmode); | |
1177 | rtx ma = gen_int_mode (get_ma (), Pmode); | |
1178 | ||
1179 | if (change_vtype_only_p ()) | |
1180 | return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma); | |
1181 | else if (has_vl () && !ignore_vl) | |
1182 | return gen_vsetvl (Pmode, get_vl (), avl, sew, vlmul, ta, ma); | |
1183 | else | |
1184 | return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma); | |
1185 | } | |
1186 | ||
1187 | bool operator== (const vsetvl_info &other) const | |
1188 | { | |
1189 | gcc_assert (!uninit_p () && !other.uninit_p () | |
1190 | && "Uninitialization should not happen"); | |
1191 | ||
1192 | if (empty_p ()) | |
1193 | return other.empty_p (); | |
1194 | if (unknown_p ()) | |
1195 | return other.unknown_p (); | |
1196 | ||
1197 | return get_insn () == other.get_insn () && get_bb () == other.get_bb () | |
1198 | && get_avl () == other.get_avl () && get_vl () == other.get_vl () | |
1199 | && get_avl_def () == other.get_avl_def () | |
1200 | && get_sew () == other.get_sew () | |
1201 | && get_vlmul () == other.get_vlmul () && get_ta () == other.get_ta () | |
1202 | && get_ma () == other.get_ma () | |
1203 | && get_avl_demand () == other.get_avl_demand () | |
1204 | && get_sew_lmul_demand () == other.get_sew_lmul_demand () | |
1205 | && get_policy_demand () == other.get_policy_demand (); | |
1206 | } | |
1207 | ||
1208 | void dump (FILE *file, const char *indent = "") const | |
1209 | { | |
1210 | if (uninit_p ()) | |
1211 | { | |
1212 | fprintf (file, "UNINITIALIZED.\n"); | |
1213 | return; | |
1214 | } | |
1215 | else if (unknown_p ()) | |
1216 | { | |
1217 | fprintf (file, "UNKNOWN.\n"); | |
1218 | return; | |
1219 | } | |
1220 | else if (empty_p ()) | |
1221 | { | |
1222 | fprintf (file, "EMPTY.\n"); | |
1223 | return; | |
1224 | } | |
1225 | else if (valid_p ()) | |
1226 | fprintf (file, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (), | |
1227 | get_bb ()->index (), delete_p () ? " (deleted)" : ""); | |
1228 | else | |
1229 | gcc_unreachable (); | |
ec99ffab | 1230 | |
29331e72 LD |
1231 | fprintf (file, "%sDemand fields:", indent); |
1232 | if (m_sew_lmul_demand == sew_lmul_demand_type::sew_lmul) | |
1233 | fprintf (file, " demand_sew_lmul"); | |
1234 | else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_only) | |
1235 | fprintf (file, " demand_ratio_only"); | |
1236 | else if (m_sew_lmul_demand == sew_lmul_demand_type::sew_only) | |
1237 | fprintf (file, " demand_sew_only"); | |
1238 | else if (m_sew_lmul_demand == sew_lmul_demand_type::ge_sew) | |
1239 | fprintf (file, " demand_ge_sew"); | |
1240 | else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_and_ge_sew) | |
1241 | fprintf (file, " demand_ratio_and_ge_sew"); | |
1242 | ||
1243 | if (m_policy_demand == policy_demand_type::tail_mask_policy) | |
1244 | fprintf (file, " demand_tail_mask_policy"); | |
1245 | else if (m_policy_demand == policy_demand_type::tail_policy_only) | |
1246 | fprintf (file, " demand_tail_policy_only"); | |
1247 | else if (m_policy_demand == policy_demand_type::mask_policy_only) | |
1248 | fprintf (file, " demand_mask_policy_only"); | |
1249 | ||
1250 | if (m_avl_demand == avl_demand_type::avl) | |
1251 | fprintf (file, " demand_avl"); | |
1252 | else if (m_avl_demand == avl_demand_type::non_zero_avl) | |
1253 | fprintf (file, " demand_non_zero_avl"); | |
1254 | fprintf (file, "\n"); | |
1255 | ||
1256 | fprintf (file, "%sSEW=%d, ", indent, get_sew ()); | |
1257 | fprintf (file, "VLMUL=%s, ", vlmul_to_str (get_vlmul ())); | |
1258 | fprintf (file, "RATIO=%d, ", get_ratio ()); | |
1259 | fprintf (file, "MAX_SEW=%d\n", get_max_sew ()); | |
1260 | ||
1261 | fprintf (file, "%sTAIL_POLICY=%s, ", indent, policy_to_str (get_ta ())); | |
1262 | fprintf (file, "MASK_POLICY=%s\n", policy_to_str (get_ma ())); | |
1263 | ||
1264 | fprintf (file, "%sAVL=", indent); | |
1265 | print_rtl_single (file, get_avl ()); | |
1266 | fprintf (file, "%sVL=", indent); | |
1267 | print_rtl_single (file, get_vl ()); | |
1268 | if (change_vtype_only_p ()) | |
1269 | fprintf (file, "%schange vtype only\n", indent); | |
1270 | if (get_read_vl_insn ()) | |
1271 | fprintf (file, "%sread_vl_insn: insn %u\n", indent, | |
1272 | get_read_vl_insn ()->uid ()); | |
1273 | if (vl_use_by_non_rvv_insn_p ()) | |
1274 | fprintf (file, "%suse_by_non_rvv_insn=true\n", indent); | |
1275 | } | |
1276 | }; | |
8fbc0871 | 1277 | |
29331e72 | 1278 | class vsetvl_block_info |
ec99ffab | 1279 | { |
29331e72 LD |
1280 | public: |
1281 | /* The static execute probability of the demand info. */ | |
1282 | profile_probability probability; | |
1283 | ||
4fd09aed JZ |
1284 | auto_vec<vsetvl_info> local_infos; |
1285 | vsetvl_info global_info; | |
1286 | bb_info *bb; | |
29331e72 LD |
1287 | |
1288 | bool full_available; | |
1289 | ||
4fd09aed | 1290 | vsetvl_block_info () : bb (nullptr), full_available (false) |
29331e72 | 1291 | { |
4fd09aed JZ |
1292 | local_infos.safe_grow_cleared (0); |
1293 | global_info.set_empty (); | |
29331e72 LD |
1294 | } |
1295 | vsetvl_block_info (const vsetvl_block_info &other) | |
4fd09aed JZ |
1296 | : probability (other.probability), local_infos (other.local_infos.copy ()), |
1297 | global_info (other.global_info), bb (other.bb) | |
29331e72 LD |
1298 | {} |
1299 | ||
1300 | vsetvl_info &get_entry_info () | |
1301 | { | |
1302 | gcc_assert (!empty_p ()); | |
4fd09aed | 1303 | return local_infos.is_empty () ? global_info : local_infos[0]; |
29331e72 LD |
1304 | } |
1305 | vsetvl_info &get_exit_info () | |
1306 | { | |
1307 | gcc_assert (!empty_p ()); | |
4fd09aed JZ |
1308 | return local_infos.is_empty () ? global_info |
1309 | : local_infos[local_infos.length () - 1]; | |
29331e72 LD |
1310 | } |
1311 | const vsetvl_info &get_entry_info () const | |
1312 | { | |
1313 | gcc_assert (!empty_p ()); | |
4fd09aed | 1314 | return local_infos.is_empty () ? global_info : local_infos[0]; |
29331e72 LD |
1315 | } |
1316 | const vsetvl_info &get_exit_info () const | |
1317 | { | |
1318 | gcc_assert (!empty_p ()); | |
4fd09aed JZ |
1319 | return local_infos.is_empty () ? global_info |
1320 | : local_infos[local_infos.length () - 1]; | |
29331e72 LD |
1321 | } |
1322 | ||
4fd09aed JZ |
1323 | bool empty_p () const { return local_infos.is_empty () && !has_info (); } |
1324 | bool has_info () const { return !global_info.empty_p (); } | |
29331e72 LD |
1325 | void set_info (const vsetvl_info &info) |
1326 | { | |
4fd09aed JZ |
1327 | gcc_assert (local_infos.is_empty ()); |
1328 | global_info = info; | |
1329 | global_info.set_bb (bb); | |
29331e72 | 1330 | } |
4fd09aed | 1331 | void set_empty_info () { global_info.set_empty (); } |
ec99ffab JZZ |
1332 | }; |
1333 | ||
29331e72 LD |
1334 | /* Demand system is the RVV-based VSETVL info analysis tools wrapper. |
1335 | It defines compatible rules for SEW/LMUL, POLICY and AVL. | |
1336 | Also, it provides 3 iterfaces avaiable_p, compatible_p and | |
1337 | merge for the VSETVL PASS analysis and optimization. | |
1338 | ||
1339 | - avaiable_p: Determine whether the next info can get the | |
1340 | avaiable VSETVL status from previous info. | |
1341 | e.g. bb 2 (demand SEW = 32, LMUL = M2) -> bb 3 (demand RATIO = 16). | |
1342 | Since bb 2 demand info (SEW/LMUL = 32/2 = 16) satisfies the bb 3 | |
1343 | demand, the VSETVL instruction in bb 3 can be elided. | |
1344 | avaiable_p (previous, next) is true in such situation. | |
1345 | - compatible_p: Determine whether prev_info is compatible with next_info | |
1346 | so that we can have a new merged info that is avaiable to both of them. | |
1347 | - merge: Merge the stricter demand information from | |
1348 | next_info into prev_info so that prev_info becomes available to | |
1349 | next_info. */ | |
1350 | class demand_system | |
ec99ffab | 1351 | { |
29331e72 LD |
1352 | private: |
1353 | sbitmap *m_avl_def_in; | |
1354 | sbitmap *m_avl_def_out; | |
ec99ffab | 1355 | |
29331e72 | 1356 | /* predictors. */ |
ec99ffab | 1357 | |
29331e72 LD |
1358 | inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED, |
1359 | const vsetvl_info &next ATTRIBUTE_UNUSED) | |
1360 | { | |
1361 | return true; | |
1362 | } | |
1363 | inline bool always_false (const vsetvl_info &prev ATTRIBUTE_UNUSED, | |
1364 | const vsetvl_info &next ATTRIBUTE_UNUSED) | |
1365 | { | |
ec99ffab | 1366 | return false; |
29331e72 LD |
1367 | } |
1368 | ||
1369 | /* predictors for sew and lmul */ | |
1370 | ||
1371 | inline bool lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1372 | { | |
1373 | return prev.get_vlmul () == next.get_vlmul (); | |
1374 | } | |
1375 | inline bool sew_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1376 | { | |
1377 | return prev.get_sew () == next.get_sew (); | |
1378 | } | |
1379 | inline bool sew_lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1380 | { | |
1381 | return lmul_eq_p (prev, next) && sew_eq_p (prev, next); | |
1382 | } | |
1383 | inline bool sew_ge_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1384 | { | |
1385 | return prev.get_sew () == next.get_sew () | |
1386 | || (next.get_ta () && prev.get_sew () > next.get_sew ()); | |
1387 | } | |
1388 | inline bool sew_le_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1389 | { | |
1390 | return prev.get_sew () == next.get_sew () | |
1391 | || (prev.get_ta () && prev.get_sew () < next.get_sew ()); | |
1392 | } | |
1393 | inline bool prev_sew_le_next_max_sew_p (const vsetvl_info &prev, | |
1394 | const vsetvl_info &next) | |
1395 | { | |
1396 | return prev.get_sew () <= next.get_max_sew (); | |
1397 | } | |
1398 | inline bool next_sew_le_prev_max_sew_p (const vsetvl_info &prev, | |
1399 | const vsetvl_info &next) | |
1400 | { | |
1401 | return next.get_sew () <= prev.get_max_sew (); | |
1402 | } | |
1403 | inline bool max_sew_overlap_p (const vsetvl_info &prev, | |
1404 | const vsetvl_info &next) | |
1405 | { | |
1406 | return !(prev.get_sew () > next.get_max_sew () | |
1407 | || next.get_sew () > prev.get_max_sew ()); | |
1408 | } | |
1409 | inline bool ratio_eq_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1410 | { | |
1411 | return prev.has_same_ratio (next); | |
1412 | } | |
1413 | inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev, | |
1414 | const vsetvl_info &next) | |
1415 | { | |
1416 | return prev.get_ratio () >= (next.get_sew () / 8); | |
1417 | } | |
1418 | inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev, | |
1419 | const vsetvl_info &next) | |
1420 | { | |
1421 | return next.get_ratio () >= (prev.get_sew () / 8); | |
1422 | } | |
1423 | ||
1424 | inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev, | |
1425 | const vsetvl_info &next) | |
1426 | { | |
1427 | return sew_ge_p (prev, next) && ratio_eq_p (prev, next); | |
1428 | } | |
1429 | inline bool sew_ge_and_prev_sew_le_next_max_sew_p (const vsetvl_info &prev, | |
1430 | const vsetvl_info &next) | |
1431 | { | |
1432 | return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next); | |
1433 | } | |
1434 | inline bool | |
1435 | sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p ( | |
1436 | const vsetvl_info &prev, const vsetvl_info &next) | |
1437 | { | |
1438 | return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next) | |
1439 | && next_ratio_valid_for_prev_sew_p (prev, next); | |
1440 | } | |
1441 | inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev, | |
1442 | const vsetvl_info &next) | |
1443 | { | |
1444 | return sew_le_p (prev, next) && next_sew_le_prev_max_sew_p (prev, next); | |
1445 | } | |
1446 | inline bool | |
1447 | max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev, | |
1448 | const vsetvl_info &next) | |
1449 | { | |
1450 | return next_ratio_valid_for_prev_sew_p (prev, next) | |
1451 | && max_sew_overlap_p (prev, next); | |
1452 | } | |
1453 | inline bool | |
1454 | sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info &prev, | |
1455 | const vsetvl_info &next) | |
1456 | { | |
1457 | return sew_le_p (prev, next) && ratio_eq_p (prev, next) | |
1458 | && next_sew_le_prev_max_sew_p (prev, next); | |
1459 | } | |
1460 | inline bool | |
1461 | max_sew_overlap_and_prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev, | |
1462 | const vsetvl_info &next) | |
1463 | { | |
1464 | return prev_ratio_valid_for_next_sew_p (prev, next) | |
1465 | && max_sew_overlap_p (prev, next); | |
1466 | } | |
1467 | inline bool | |
1468 | sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p ( | |
1469 | const vsetvl_info &prev, const vsetvl_info &next) | |
1470 | { | |
1471 | return sew_le_p (prev, next) && prev_ratio_valid_for_next_sew_p (prev, next) | |
1472 | && next_sew_le_prev_max_sew_p (prev, next); | |
1473 | } | |
1474 | inline bool max_sew_overlap_and_ratio_eq_p (const vsetvl_info &prev, | |
1475 | const vsetvl_info &next) | |
1476 | { | |
1477 | return ratio_eq_p (prev, next) && max_sew_overlap_p (prev, next); | |
1478 | } | |
1479 | ||
1480 | /* predictors for tail and mask policy */ | |
1481 | ||
1482 | inline bool tail_policy_eq_p (const vsetvl_info &prev, | |
1483 | const vsetvl_info &next) | |
1484 | { | |
1485 | return prev.get_ta () == next.get_ta (); | |
1486 | } | |
1487 | inline bool mask_policy_eq_p (const vsetvl_info &prev, | |
1488 | const vsetvl_info &next) | |
1489 | { | |
1490 | return prev.get_ma () == next.get_ma (); | |
1491 | } | |
1492 | inline bool tail_mask_policy_eq_p (const vsetvl_info &prev, | |
1493 | const vsetvl_info &next) | |
1494 | { | |
1495 | return tail_policy_eq_p (prev, next) && mask_policy_eq_p (prev, next); | |
1496 | } | |
1497 | ||
1498 | /* predictors for avl */ | |
1499 | ||
1500 | inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info) | |
1501 | { | |
1502 | return info.has_vl () | |
1503 | && (find_access (i->uses (), REGNO (info.get_vl ())) | |
1504 | || find_access (i->defs (), REGNO (info.get_vl ()))); | |
1505 | } | |
1506 | inline bool modify_avl_p (insn_info *i, const vsetvl_info &info) | |
1507 | { | |
1508 | return info.has_nonvlmax_reg_avl () | |
1509 | && find_access (i->defs (), REGNO (info.get_avl ())); | |
1510 | } | |
1511 | ||
1512 | inline bool modify_reg_between_p (insn_info *prev_insn, insn_info *curr_insn, | |
1513 | unsigned regno) | |
1514 | { | |
1515 | gcc_assert (prev_insn->compare_with (curr_insn) < 0); | |
1516 | for (insn_info *i = curr_insn->prev_nondebug_insn (); i != prev_insn; | |
1517 | i = i->prev_nondebug_insn ()) | |
1518 | { | |
1519 | // no def of regno | |
1520 | if (find_access (i->defs (), regno)) | |
1521 | return true; | |
1522 | } | |
1523 | return false; | |
1524 | } | |
ec99ffab | 1525 | |
29331e72 LD |
1526 | inline bool reg_avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next) |
1527 | { | |
1528 | if (!prev.has_nonvlmax_reg_avl () || !next.has_nonvlmax_reg_avl ()) | |
1529 | return false; | |
ec99ffab | 1530 | |
29331e72 LD |
1531 | if (same_equiv_note_p (prev.get_avl_def (), next.get_avl_def ())) |
1532 | return true; | |
ec99ffab | 1533 | |
29331e72 LD |
1534 | if (REGNO (prev.get_avl ()) != REGNO (next.get_avl ())) |
1535 | return false; | |
ec99ffab | 1536 | |
29331e72 LD |
1537 | insn_info *prev_insn = prev.get_insn (); |
1538 | if (prev.get_bb () != prev_insn->bb ()) | |
1539 | prev_insn = prev.get_bb ()->end_insn (); | |
ec99ffab | 1540 | |
29331e72 LD |
1541 | insn_info *next_insn = next.get_insn (); |
1542 | if (next.get_bb () != next_insn->bb ()) | |
1543 | next_insn = next.get_bb ()->end_insn (); | |
ec99ffab | 1544 | |
29331e72 LD |
1545 | return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false); |
1546 | } | |
ec99ffab | 1547 | |
29331e72 LD |
1548 | inline bool avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next) |
1549 | { | |
1550 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
ec99ffab | 1551 | |
29331e72 LD |
1552 | if (prev.get_ratio () != next.get_ratio ()) |
1553 | return false; | |
60bd33bc | 1554 | |
29331e72 LD |
1555 | if (next.has_vl () && next.vl_use_by_non_rvv_insn_p ()) |
1556 | return false; | |
e030af3e | 1557 | |
29331e72 LD |
1558 | if (vector_config_insn_p (prev.get_insn ()->rtl ()) && next.get_avl_def () |
1559 | && next.get_avl_def ()->insn () == prev.get_insn ()) | |
1560 | return true; | |
e030af3e | 1561 | |
29331e72 LD |
1562 | if (prev.get_read_vl_insn ()) |
1563 | { | |
1564 | if (!next.has_nonvlmax_reg_avl () || !next.get_avl_def ()) | |
1565 | return false; | |
1566 | insn_info *avl_def_insn = extract_single_source (next.get_avl_def ()); | |
1567 | return avl_def_insn == prev.get_read_vl_insn (); | |
1568 | } | |
1569 | ||
1570 | if (prev == next && prev.has_nonvlmax_reg_avl ()) | |
1571 | { | |
1572 | insn_info *insn = prev.get_insn (); | |
1573 | bb_info *bb = insn->bb (); | |
1574 | for (insn_info *i = insn; real_insn_and_same_bb_p (i, bb); | |
1575 | i = i->next_nondebug_insn ()) | |
1576 | if (find_access (i->defs (), REGNO (prev.get_avl ()))) | |
e030af3e | 1577 | return false; |
29331e72 | 1578 | } |
60bd33bc | 1579 | |
29331e72 LD |
1580 | if (prev.has_vlmax_avl () && next.has_vlmax_avl ()) |
1581 | return true; | |
1582 | else if (prev.has_imm_avl () && next.has_imm_avl ()) | |
1583 | return INTVAL (prev.get_avl ()) == INTVAL (next.get_avl ()); | |
1584 | else if (prev.has_vl () && next.has_nonvlmax_reg_avl () | |
1585 | && REGNO (prev.get_vl ()) == REGNO (next.get_avl ())) | |
1586 | { | |
1587 | insn_info *prev_insn = prev.insn_inside_bb_p () | |
1588 | ? prev.get_insn () | |
1589 | : prev.get_bb ()->end_insn (); | |
1590 | ||
1591 | insn_info *next_insn = next.insn_inside_bb_p () | |
1592 | ? next.get_insn () | |
1593 | : next.get_bb ()->end_insn (); | |
1594 | return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false); | |
1595 | } | |
1596 | else if (prev.has_nonvlmax_reg_avl () && next.has_nonvlmax_reg_avl ()) | |
1597 | return reg_avl_equal_p (prev, next); | |
e030af3e | 1598 | |
e030af3e | 1599 | return false; |
29331e72 LD |
1600 | } |
1601 | inline bool avl_equal_or_prev_avl_non_zero_p (const vsetvl_info &prev, | |
1602 | const vsetvl_info &next) | |
1603 | { | |
1604 | return avl_equal_p (prev, next) || prev.has_non_zero_avl (); | |
1605 | } | |
1606 | ||
1607 | inline bool can_use_next_avl_p (const vsetvl_info &prev, | |
1608 | const vsetvl_info &next) | |
1609 | { | |
1610 | if (!next.has_nonvlmax_reg_avl () && !next.has_vl ()) | |
1611 | return true; | |
e030af3e | 1612 | |
29331e72 LD |
1613 | insn_info *prev_insn = prev.get_insn (); |
1614 | if (prev.get_bb () != prev_insn->bb ()) | |
1615 | prev_insn = prev.get_bb ()->end_insn (); | |
1616 | ||
1617 | insn_info *next_insn = next.get_insn (); | |
1618 | if (next.get_bb () != next_insn->bb ()) | |
1619 | next_insn = next.get_bb ()->end_insn (); | |
1620 | ||
1621 | return avl_vl_unmodified_between_p (prev_insn, next_insn, next); | |
1622 | } | |
1623 | ||
1624 | inline bool avl_equal_or_next_avl_non_zero_and_can_use_next_avl_p ( | |
1625 | const vsetvl_info &prev, const vsetvl_info &next) | |
1626 | { | |
1627 | return avl_equal_p (prev, next) | |
1628 | || (next.has_non_zero_avl () && can_use_next_avl_p (prev, next)); | |
1629 | } | |
1630 | ||
1631 | /* modifiers */ | |
1632 | ||
1633 | inline void nop (const vsetvl_info &prev ATTRIBUTE_UNUSED, | |
1634 | const vsetvl_info &next ATTRIBUTE_UNUSED) | |
1635 | {} | |
1636 | ||
1637 | /* modifiers for sew and lmul */ | |
1638 | ||
1639 | inline void use_min_of_max_sew (vsetvl_info &prev, const vsetvl_info &next) | |
1640 | { | |
1641 | prev.set_max_sew (MIN (prev.get_max_sew (), next.get_max_sew ())); | |
1642 | } | |
1643 | inline void use_next_sew (vsetvl_info &prev, const vsetvl_info &next) | |
1644 | { | |
1645 | prev.set_sew (next.get_sew ()); | |
1646 | use_min_of_max_sew (prev, next); | |
1647 | } | |
1648 | inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next) | |
1649 | { | |
1650 | auto max_sew = std::max (prev.get_sew (), next.get_sew ()); | |
1651 | prev.set_sew (max_sew); | |
1652 | use_min_of_max_sew (prev, next); | |
1653 | } | |
1654 | inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next) | |
1655 | { | |
1656 | use_next_sew (prev, next); | |
1657 | prev.set_vlmul (next.get_vlmul ()); | |
1658 | prev.set_ratio (next.get_ratio ()); | |
1659 | } | |
1660 | inline void use_next_sew_with_prev_ratio (vsetvl_info &prev, | |
1661 | const vsetvl_info &next) | |
1662 | { | |
1663 | use_next_sew (prev, next); | |
1664 | prev.set_vlmul (calculate_vlmul (next.get_sew (), prev.get_ratio ())); | |
1665 | } | |
1666 | inline void modify_lmul_with_next_ratio (vsetvl_info &prev, | |
1667 | const vsetvl_info &next) | |
1668 | { | |
1669 | prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ())); | |
1670 | prev.set_ratio (next.get_ratio ()); | |
1671 | } | |
1672 | ||
1673 | inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev, | |
1674 | const vsetvl_info &next) | |
1675 | { | |
1676 | prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ())); | |
1677 | use_max_sew (prev, next); | |
1678 | prev.set_ratio (next.get_ratio ()); | |
1679 | } | |
1680 | ||
1681 | inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info &prev, | |
1682 | const vsetvl_info &next) | |
1683 | { | |
1684 | auto max_sew = std::max (prev.get_sew (), next.get_sew ()); | |
1685 | prev.set_vlmul (calculate_vlmul (max_sew, prev.get_ratio ())); | |
1686 | prev.set_sew (max_sew); | |
1687 | } | |
1688 | ||
1689 | /* modifiers for tail and mask policy */ | |
1690 | ||
1691 | inline void use_tail_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1692 | { | |
1693 | if (!next.get_ta ()) | |
1694 | prev.set_ta (next.get_ta ()); | |
1695 | } | |
1696 | inline void use_mask_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1697 | { | |
1698 | if (!next.get_ma ()) | |
1699 | prev.set_ma (next.get_ma ()); | |
1700 | } | |
1701 | inline void use_tail_mask_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1702 | { | |
1703 | use_tail_policy (prev, next); | |
1704 | use_mask_policy (prev, next); | |
1705 | } | |
1706 | ||
1707 | /* modifiers for avl */ | |
1708 | ||
1709 | inline void use_next_avl (vsetvl_info &prev, const vsetvl_info &next) | |
1710 | { | |
1711 | gcc_assert (can_use_next_avl_p (prev, next)); | |
1712 | prev.update_avl (next); | |
1713 | } | |
1714 | ||
1715 | inline void use_next_avl_when_not_equal (vsetvl_info &prev, | |
1716 | const vsetvl_info &next) | |
1717 | { | |
1718 | if (avl_equal_p (prev, next)) | |
1719 | return; | |
1720 | gcc_assert (next.has_non_zero_avl ()); | |
1721 | use_next_avl (prev, next); | |
1722 | } | |
e030af3e | 1723 | |
29331e72 LD |
1724 | public: |
1725 | demand_system () : m_avl_def_in (nullptr), m_avl_def_out (nullptr) {} | |
1726 | ||
1727 | void set_avl_in_out_data (sbitmap *m_avl_def_in, sbitmap *m_avl_def_out) | |
1728 | { | |
1729 | m_avl_def_in = m_avl_def_in; | |
1730 | m_avl_def_out = m_avl_def_out; | |
1731 | } | |
1732 | ||
1733 | /* Can we move vsetvl info between prev_insn and next_insn safe? */ | |
1734 | bool avl_vl_unmodified_between_p (insn_info *prev_insn, insn_info *next_insn, | |
1735 | const vsetvl_info &info, | |
1736 | bool ignore_vl = false) | |
1737 | { | |
1738 | gcc_assert ((ignore_vl && info.has_nonvlmax_reg_avl ()) | |
1739 | || (info.has_nonvlmax_reg_avl () || info.has_vl ())); | |
1740 | ||
1741 | gcc_assert (!prev_insn->is_debug_insn () && !next_insn->is_debug_insn ()); | |
1742 | if (prev_insn->bb () == next_insn->bb () | |
1743 | && prev_insn->compare_with (next_insn) < 0) | |
1744 | { | |
1745 | for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn; | |
1746 | i = i->prev_nondebug_insn ()) | |
1747 | { | |
1748 | // no def amd use of vl | |
1749 | if (!ignore_vl && modify_or_use_vl_p (i, info)) | |
1750 | return false; | |
e030af3e | 1751 | |
29331e72 LD |
1752 | // no def of avl |
1753 | if (modify_avl_p (i, info)) | |
1754 | return false; | |
1755 | } | |
1756 | return true; | |
1757 | } | |
1758 | else | |
1759 | { | |
1760 | if (!ignore_vl && info.has_vl ()) | |
1761 | { | |
1762 | bitmap live_out = df_get_live_out (prev_insn->bb ()->cfg_bb ()); | |
1763 | if (bitmap_bit_p (live_out, REGNO (info.get_vl ()))) | |
1764 | return false; | |
1765 | } | |
a2d12abe | 1766 | |
29331e72 LD |
1767 | if (info.has_nonvlmax_reg_avl () && m_avl_def_in && m_avl_def_out) |
1768 | { | |
1769 | bool has_avl_out = false; | |
1770 | unsigned regno = REGNO (info.get_avl ()); | |
1771 | unsigned expr_id; | |
1772 | sbitmap_iterator sbi; | |
1773 | EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[prev_insn->bb ()->index ()], | |
1774 | 0, expr_id, sbi) | |
1775 | { | |
1776 | if (get_regno (expr_id, last_basic_block_for_fn (cfun)) | |
1777 | != regno) | |
1778 | continue; | |
1779 | has_avl_out = true; | |
1780 | if (!bitmap_bit_p (m_avl_def_in[next_insn->bb ()->index ()], | |
1781 | expr_id)) | |
1782 | return false; | |
1783 | } | |
1784 | if (!has_avl_out) | |
1785 | return false; | |
1786 | } | |
12b23c71 | 1787 | |
29331e72 LD |
1788 | for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn (); |
1789 | i = i->prev_nondebug_insn ()) | |
1790 | { | |
1791 | // no def amd use of vl | |
1792 | if (!ignore_vl && modify_or_use_vl_p (i, info)) | |
1793 | return false; | |
9243c3d1 | 1794 | |
29331e72 LD |
1795 | // no def of avl |
1796 | if (modify_avl_p (i, info)) | |
1797 | return false; | |
1798 | } | |
6b6b9c68 | 1799 | |
29331e72 LD |
1800 | for (insn_info *i = prev_insn->bb ()->end_insn (); i != prev_insn; |
1801 | i = i->prev_nondebug_insn ()) | |
1802 | { | |
1803 | // no def amd use of vl | |
1804 | if (!ignore_vl && modify_or_use_vl_p (i, info)) | |
1805 | return false; | |
1806 | ||
1807 | // no def of avl | |
1808 | if (modify_avl_p (i, info)) | |
1809 | return false; | |
1810 | } | |
1811 | } | |
d875d756 | 1812 | return true; |
29331e72 LD |
1813 | } |
1814 | ||
1815 | bool sew_lmul_compatible_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1816 | { | |
1817 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1818 | sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand (); | |
1819 | sew_lmul_demand_type next_flags = next.get_sew_lmul_demand (); | |
1820 | #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1821 | AVAILABLE_P, FUSE) \ | |
1822 | if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \ | |
1823 | && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \ | |
1824 | return COMPATIBLE_P (prev, next); | |
6b6b9c68 | 1825 | |
29331e72 | 1826 | #include "riscv-vsetvl.def" |
6b6b9c68 | 1827 | |
29331e72 LD |
1828 | gcc_unreachable (); |
1829 | } | |
6b6b9c68 | 1830 | |
29331e72 LD |
1831 | bool sew_lmul_available_p (const vsetvl_info &prev, const vsetvl_info &next) |
1832 | { | |
1833 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1834 | sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand (); | |
1835 | sew_lmul_demand_type next_flags = next.get_sew_lmul_demand (); | |
1836 | #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1837 | AVAILABLE_P, FUSE) \ | |
1838 | if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \ | |
1839 | && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \ | |
1840 | return AVAILABLE_P (prev, next); | |
d875d756 | 1841 | |
29331e72 | 1842 | #include "riscv-vsetvl.def" |
4f673c5e | 1843 | |
29331e72 LD |
1844 | gcc_unreachable (); |
1845 | } | |
1846 | ||
1847 | void merge_sew_lmul (vsetvl_info &prev, const vsetvl_info &next) | |
1848 | { | |
1849 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1850 | sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand (); | |
1851 | sew_lmul_demand_type next_flags = next.get_sew_lmul_demand (); | |
1852 | #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1853 | AVAILABLE_P, FUSE) \ | |
1854 | if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \ | |
1855 | && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \ | |
1856 | { \ | |
1857 | gcc_assert (COMPATIBLE_P (prev, next)); \ | |
1858 | FUSE (prev, next); \ | |
1859 | prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \ | |
1860 | return; \ | |
1861 | } | |
9243c3d1 | 1862 | |
29331e72 | 1863 | #include "riscv-vsetvl.def" |
9243c3d1 | 1864 | |
29331e72 LD |
1865 | gcc_unreachable (); |
1866 | } | |
9243c3d1 | 1867 | |
29331e72 LD |
1868 | bool policy_compatible_p (const vsetvl_info &prev, const vsetvl_info &next) |
1869 | { | |
1870 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1871 | policy_demand_type prev_flags = prev.get_policy_demand (); | |
1872 | policy_demand_type next_flags = next.get_policy_demand (); | |
1873 | #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1874 | AVAILABLE_P, FUSE) \ | |
1875 | if (prev_flags == policy_demand_type::PREV_FLAGS \ | |
1876 | && next_flags == policy_demand_type::NEXT_FLAGS) \ | |
1877 | return COMPATIBLE_P (prev, next); | |
9243c3d1 | 1878 | |
29331e72 | 1879 | #include "riscv-vsetvl.def" |
9243c3d1 | 1880 | |
29331e72 LD |
1881 | gcc_unreachable (); |
1882 | } | |
4f673c5e | 1883 | |
29331e72 LD |
1884 | bool policy_available_p (const vsetvl_info &prev, const vsetvl_info &next) |
1885 | { | |
1886 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1887 | policy_demand_type prev_flags = prev.get_policy_demand (); | |
1888 | policy_demand_type next_flags = next.get_policy_demand (); | |
1889 | #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1890 | AVAILABLE_P, FUSE) \ | |
1891 | if (prev_flags == policy_demand_type::PREV_FLAGS \ | |
1892 | && next_flags == policy_demand_type::NEXT_FLAGS) \ | |
1893 | return AVAILABLE_P (prev, next); | |
4f673c5e | 1894 | |
29331e72 | 1895 | #include "riscv-vsetvl.def" |
9243c3d1 | 1896 | |
29331e72 LD |
1897 | gcc_unreachable (); |
1898 | } | |
1899 | ||
1900 | void merge_policy (vsetvl_info &prev, const vsetvl_info &next) | |
1901 | { | |
1902 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1903 | policy_demand_type prev_flags = prev.get_policy_demand (); | |
1904 | policy_demand_type next_flags = next.get_policy_demand (); | |
1905 | #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1906 | AVAILABLE_P, FUSE) \ | |
1907 | if (prev_flags == policy_demand_type::PREV_FLAGS \ | |
1908 | && next_flags == policy_demand_type::NEXT_FLAGS) \ | |
1909 | { \ | |
1910 | gcc_assert (COMPATIBLE_P (prev, next)); \ | |
1911 | FUSE (prev, next); \ | |
1912 | prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \ | |
1913 | return; \ | |
1914 | } | |
9243c3d1 | 1915 | |
29331e72 | 1916 | #include "riscv-vsetvl.def" |
ec99ffab | 1917 | |
29331e72 LD |
1918 | gcc_unreachable (); |
1919 | } | |
9243c3d1 | 1920 | |
29331e72 LD |
1921 | bool avl_compatible_p (const vsetvl_info &prev, const vsetvl_info &next) |
1922 | { | |
1923 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1924 | avl_demand_type prev_flags = prev.get_avl_demand (); | |
1925 | avl_demand_type next_flags = next.get_avl_demand (); | |
1926 | #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1927 | AVAILABLE_P, FUSE) \ | |
1928 | if (prev_flags == avl_demand_type::PREV_FLAGS \ | |
1929 | && next_flags == avl_demand_type::NEXT_FLAGS) \ | |
1930 | return COMPATIBLE_P (prev, next); | |
9243c3d1 | 1931 | |
29331e72 | 1932 | #include "riscv-vsetvl.def" |
9243c3d1 | 1933 | |
29331e72 LD |
1934 | gcc_unreachable (); |
1935 | } | |
9243c3d1 | 1936 | |
29331e72 LD |
1937 | bool avl_available_p (const vsetvl_info &prev, const vsetvl_info &next) |
1938 | { | |
1939 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1940 | avl_demand_type prev_flags = prev.get_avl_demand (); | |
1941 | avl_demand_type next_flags = next.get_avl_demand (); | |
1942 | #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1943 | AVAILABLE_P, FUSE) \ | |
1944 | if (prev_flags == avl_demand_type::PREV_FLAGS \ | |
1945 | && next_flags == avl_demand_type::NEXT_FLAGS) \ | |
1946 | return AVAILABLE_P (prev, next); | |
9243c3d1 | 1947 | |
29331e72 | 1948 | #include "riscv-vsetvl.def" |
9243c3d1 | 1949 | |
29331e72 LD |
1950 | gcc_unreachable (); |
1951 | } | |
1952 | ||
1953 | void merge_avl (vsetvl_info &prev, const vsetvl_info &next) | |
1954 | { | |
1955 | gcc_assert (prev.valid_p () && next.valid_p ()); | |
1956 | avl_demand_type prev_flags = prev.get_avl_demand (); | |
1957 | avl_demand_type next_flags = next.get_avl_demand (); | |
1958 | #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \ | |
1959 | AVAILABLE_P, FUSE) \ | |
1960 | if (prev_flags == avl_demand_type::PREV_FLAGS \ | |
1961 | && next_flags == avl_demand_type::NEXT_FLAGS) \ | |
1962 | { \ | |
1963 | gcc_assert (COMPATIBLE_P (prev, next)); \ | |
1964 | FUSE (prev, next); \ | |
1965 | prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \ | |
1966 | return; \ | |
60bd33bc JZZ |
1967 | } |
1968 | ||
29331e72 | 1969 | #include "riscv-vsetvl.def" |
9243c3d1 | 1970 | |
29331e72 LD |
1971 | gcc_unreachable (); |
1972 | } | |
1973 | ||
1974 | bool compatible_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1975 | { | |
1976 | bool compatible_p = sew_lmul_compatible_p (prev, next) | |
1977 | && policy_compatible_p (prev, next) | |
1978 | && avl_compatible_p (prev, next); | |
1979 | return compatible_p; | |
1980 | } | |
1981 | ||
1982 | bool available_p (const vsetvl_info &prev, const vsetvl_info &next) | |
1983 | { | |
1984 | bool available_p = sew_lmul_available_p (prev, next) | |
1985 | && policy_available_p (prev, next) | |
1986 | && avl_available_p (prev, next); | |
1987 | gcc_assert (!available_p || compatible_p (prev, next)); | |
1988 | return available_p; | |
1989 | } | |
1990 | ||
1991 | void merge (vsetvl_info &prev, const vsetvl_info &next) | |
1992 | { | |
1993 | gcc_assert (compatible_p (prev, next)); | |
1994 | merge_sew_lmul (prev, next); | |
1995 | merge_policy (prev, next); | |
1996 | merge_avl (prev, next); | |
1997 | gcc_assert (available_p (prev, next)); | |
1998 | } | |
1999 | }; | |
9243c3d1 | 2000 | |
9243c3d1 | 2001 | |
29331e72 | 2002 | class pre_vsetvl |
9243c3d1 | 2003 | { |
29331e72 LD |
2004 | private: |
2005 | demand_system m_dem; | |
2006 | auto_vec<vsetvl_block_info> m_vector_block_infos; | |
2007 | ||
2008 | /* data for avl reaching defintion. */ | |
2009 | sbitmap m_avl_regs; | |
2010 | sbitmap *m_avl_def_in; | |
2011 | sbitmap *m_avl_def_out; | |
2012 | sbitmap *m_reg_def_loc; | |
2013 | ||
2014 | /* data for vsetvl info reaching defintion. */ | |
2015 | vsetvl_info m_unknow_info; | |
2016 | auto_vec<vsetvl_info *> m_vsetvl_def_exprs; | |
2017 | sbitmap *m_vsetvl_def_in; | |
2018 | sbitmap *m_vsetvl_def_out; | |
2019 | ||
2020 | /* data for lcm */ | |
2021 | auto_vec<vsetvl_info *> m_exprs; | |
2022 | sbitmap *m_avloc; | |
2023 | sbitmap *m_avin; | |
2024 | sbitmap *m_avout; | |
2025 | sbitmap *m_kill; | |
2026 | sbitmap *m_antloc; | |
2027 | sbitmap *m_transp; | |
2028 | sbitmap *m_insert; | |
2029 | sbitmap *m_del; | |
2030 | struct edge_list *m_edges; | |
2031 | ||
2032 | auto_vec<vsetvl_info> m_delete_list; | |
2033 | ||
2034 | vsetvl_block_info &get_block_info (const bb_info *bb) | |
2035 | { | |
2036 | return m_vector_block_infos[bb->index ()]; | |
2037 | } | |
2038 | const vsetvl_block_info &get_block_info (const basic_block bb) const | |
2039 | { | |
2040 | return m_vector_block_infos[bb->index]; | |
2041 | } | |
2042 | ||
2043 | vsetvl_block_info &get_block_info (const basic_block bb) | |
2044 | { | |
2045 | return m_vector_block_infos[bb->index]; | |
2046 | } | |
2047 | ||
2048 | void add_expr (auto_vec<vsetvl_info *> &m_exprs, vsetvl_info &info) | |
2049 | { | |
2050 | for (vsetvl_info *item : m_exprs) | |
2051 | { | |
2052 | if (*item == info) | |
2053 | return; | |
2054 | } | |
2055 | m_exprs.safe_push (&info); | |
2056 | } | |
2057 | ||
2058 | unsigned get_expr_index (auto_vec<vsetvl_info *> &m_exprs, | |
2059 | const vsetvl_info &info) | |
2060 | { | |
2061 | for (size_t i = 0; i < m_exprs.length (); i += 1) | |
2062 | { | |
2063 | if (*m_exprs[i] == info) | |
2064 | return i; | |
2065 | } | |
2066 | gcc_unreachable (); | |
2067 | } | |
2068 | ||
2069 | bool anticpatable_exp_p (const vsetvl_info &header_info) | |
2070 | { | |
2071 | if (!header_info.has_nonvlmax_reg_avl () && !header_info.has_vl ()) | |
2072 | return true; | |
9243c3d1 | 2073 | |
29331e72 LD |
2074 | bb_info *bb = header_info.get_bb (); |
2075 | insn_info *prev_insn = bb->head_insn (); | |
2076 | insn_info *next_insn = header_info.insn_inside_bb_p () | |
2077 | ? header_info.get_insn () | |
2078 | : header_info.get_bb ()->end_insn (); | |
2079 | ||
2080 | return m_dem.avl_vl_unmodified_between_p (prev_insn, next_insn, | |
2081 | header_info); | |
2082 | } | |
2083 | ||
2084 | bool available_exp_p (const vsetvl_info &prev_info, | |
2085 | const vsetvl_info &next_info) | |
2086 | { | |
2087 | return m_dem.available_p (prev_info, next_info); | |
2088 | } | |
2089 | ||
2090 | void compute_probabilities () | |
2091 | { | |
2092 | edge e; | |
2093 | edge_iterator ei; | |
2094 | ||
2095 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2096 | { | |
2097 | basic_block cfg_bb = bb->cfg_bb (); | |
2098 | auto &curr_prob = get_block_info (cfg_bb).probability; | |
2099 | ||
2100 | /* GCC assume entry block (bb 0) are always so | |
2101 | executed so set its probability as "always". */ | |
2102 | if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) | |
2103 | curr_prob = profile_probability::always (); | |
2104 | /* Exit block (bb 1) is the block we don't need to process. */ | |
2105 | if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) | |
2106 | continue; | |
9243c3d1 | 2107 | |
29331e72 LD |
2108 | gcc_assert (curr_prob.initialized_p ()); |
2109 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
2110 | { | |
2111 | auto &new_prob = get_block_info (e->dest).probability; | |
2112 | /* Normally, the edge probability should be initialized. | |
2113 | However, some special testing code which is written in | |
2114 | GIMPLE IR style force the edge probility uninitialized, | |
2115 | we conservatively set it as never so that it will not | |
2116 | affect PRE (Phase 3 && Phse 4). */ | |
2117 | if (!e->probability.initialized_p ()) | |
2118 | new_prob = profile_probability::never (); | |
2119 | else if (!new_prob.initialized_p ()) | |
2120 | new_prob = curr_prob * e->probability; | |
2121 | else if (new_prob == profile_probability::always ()) | |
2122 | continue; | |
2123 | else | |
2124 | new_prob += curr_prob * e->probability; | |
2125 | } | |
2126 | } | |
2127 | } | |
2128 | ||
2129 | void insert_vsetvl_insn (enum emit_type emit_type, const vsetvl_info &info) | |
2130 | { | |
2131 | rtx pat = info.get_vsetvl_pat (); | |
2132 | rtx_insn *rinsn = info.get_insn ()->rtl (); | |
2133 | ||
2134 | if (emit_type == EMIT_DIRECT) | |
2135 | { | |
2136 | emit_insn (pat); | |
2137 | if (dump_file) | |
2138 | { | |
2139 | fprintf (dump_file, " Insert vsetvl insn %d:\n", | |
2140 | INSN_UID (get_last_insn ())); | |
2141 | print_rtl_single (dump_file, get_last_insn ()); | |
2142 | } | |
2143 | } | |
2144 | else if (emit_type == EMIT_BEFORE) | |
2145 | { | |
2146 | emit_insn_before (pat, rinsn); | |
2147 | if (dump_file) | |
2148 | { | |
2149 | fprintf (dump_file, " Insert vsetvl insn before insn %d:\n", | |
2150 | INSN_UID (rinsn)); | |
2151 | print_rtl_single (dump_file, PREV_INSN (rinsn)); | |
2152 | } | |
2153 | } | |
2154 | else | |
2155 | { | |
2156 | emit_insn_after (pat, rinsn); | |
2157 | if (dump_file) | |
2158 | { | |
2159 | fprintf (dump_file, " Insert vsetvl insn after insn %d:\n", | |
2160 | INSN_UID (rinsn)); | |
2161 | print_rtl_single (dump_file, NEXT_INSN (rinsn)); | |
2162 | } | |
2163 | } | |
2164 | } | |
2165 | ||
2166 | void change_vsetvl_insn (const vsetvl_info &info) | |
2167 | { | |
2168 | rtx_insn *rinsn = info.get_insn ()->rtl (); | |
2169 | rtx new_pat = info.get_vsetvl_pat (); | |
2170 | ||
2171 | if (dump_file) | |
2172 | { | |
2173 | fprintf (dump_file, " Change insn %d from:\n", INSN_UID (rinsn)); | |
2174 | print_rtl_single (dump_file, rinsn); | |
2175 | } | |
2176 | ||
2177 | validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false); | |
2178 | ||
2179 | if (dump_file) | |
2180 | { | |
2181 | fprintf (dump_file, "\n to:\n"); | |
2182 | print_rtl_single (dump_file, rinsn); | |
2183 | } | |
2184 | } | |
2185 | ||
2186 | void remove_vsetvl_insn (const vsetvl_info &info) | |
2187 | { | |
2188 | rtx_insn *rinsn = info.get_insn ()->rtl (); | |
2189 | if (dump_file) | |
2190 | { | |
2191 | fprintf (dump_file, " Eliminate insn %d:\n", INSN_UID (rinsn)); | |
2192 | print_rtl_single (dump_file, rinsn); | |
2193 | } | |
2194 | if (in_sequence_p ()) | |
2195 | remove_insn (rinsn); | |
2196 | else | |
2197 | delete_insn (rinsn); | |
2198 | } | |
2199 | ||
2200 | bool successors_probability_equal_p (const basic_block cfg_bb) const | |
2201 | { | |
2202 | edge e; | |
2203 | edge_iterator ei; | |
2204 | profile_probability prob = profile_probability::uninitialized (); | |
2205 | FOR_EACH_EDGE (e, ei, cfg_bb->succs) | |
2206 | { | |
2207 | if (prob == profile_probability::uninitialized ()) | |
2208 | prob = m_vector_block_infos[e->dest->index].probability; | |
2209 | else if (prob == m_vector_block_infos[e->dest->index].probability) | |
2210 | continue; | |
2211 | else | |
2212 | /* We pick the highest probability among those incompatible VSETVL | |
2213 | infos. When all incompatible VSTEVL infos have same probability, we | |
2214 | don't pick any of them. */ | |
2215 | return false; | |
2216 | } | |
ec99ffab | 2217 | return true; |
29331e72 LD |
2218 | } |
2219 | ||
2220 | bool preds_has_same_avl_p (const vsetvl_info &curr_info) | |
2221 | { | |
2222 | gcc_assert ( | |
2223 | !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()])); | |
2224 | ||
2225 | unsigned expr_index; | |
2226 | sbitmap_iterator sbi; | |
2227 | EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[curr_info.get_bb ()->index ()], 0, | |
2228 | expr_index, sbi) | |
2229 | { | |
2230 | const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index]; | |
2231 | if (!prev_info.valid_p () | |
2232 | || !m_dem.avl_available_p (prev_info, curr_info)) | |
2233 | return false; | |
2234 | } | |
005fad9d | 2235 | |
005fad9d | 2236 | return true; |
29331e72 | 2237 | } |
005fad9d | 2238 | |
29331e72 LD |
2239 | public: |
2240 | pre_vsetvl () | |
2241 | : m_avl_def_in (nullptr), m_avl_def_out (nullptr), | |
2242 | m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr), | |
2243 | m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr), | |
2244 | m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr) | |
2245 | { | |
2246 | /* Initialization of RTL_SSA. */ | |
2247 | calculate_dominance_info (CDI_DOMINATORS); | |
2248 | df_analyze (); | |
2249 | crtl->ssa = new function_info (cfun); | |
2250 | m_vector_block_infos.safe_grow_cleared (last_basic_block_for_fn (cfun)); | |
2251 | compute_probabilities (); | |
2252 | m_unknow_info.set_unknown (); | |
2253 | } | |
2254 | ||
2255 | void finish () | |
2256 | { | |
2257 | free_dominance_info (CDI_DOMINATORS); | |
2258 | if (crtl->ssa->perform_pending_updates ()) | |
2259 | cleanup_cfg (0); | |
2260 | delete crtl->ssa; | |
2261 | crtl->ssa = nullptr; | |
2262 | ||
2263 | if (m_avl_regs) | |
2264 | sbitmap_free (m_avl_regs); | |
2265 | if (m_reg_def_loc) | |
2266 | sbitmap_vector_free (m_reg_def_loc); | |
2267 | ||
2268 | if (m_avl_def_in) | |
2269 | sbitmap_vector_free (m_avl_def_in); | |
2270 | if (m_avl_def_out) | |
2271 | sbitmap_vector_free (m_avl_def_out); | |
2272 | ||
2273 | if (m_vsetvl_def_in) | |
2274 | sbitmap_vector_free (m_vsetvl_def_in); | |
2275 | if (m_vsetvl_def_out) | |
2276 | sbitmap_vector_free (m_vsetvl_def_out); | |
2277 | ||
2278 | if (m_avloc) | |
2279 | sbitmap_vector_free (m_avloc); | |
2280 | if (m_kill) | |
2281 | sbitmap_vector_free (m_kill); | |
2282 | if (m_antloc) | |
2283 | sbitmap_vector_free (m_antloc); | |
2284 | if (m_transp) | |
2285 | sbitmap_vector_free (m_transp); | |
2286 | if (m_insert) | |
2287 | sbitmap_vector_free (m_insert); | |
2288 | if (m_del) | |
2289 | sbitmap_vector_free (m_del); | |
2290 | if (m_avin) | |
2291 | sbitmap_vector_free (m_avin); | |
2292 | if (m_avout) | |
2293 | sbitmap_vector_free (m_avout); | |
2294 | ||
2295 | if (m_edges) | |
2296 | free_edge_list (m_edges); | |
2297 | } | |
2298 | ||
2299 | void compute_avl_def_data (); | |
2300 | void compute_vsetvl_def_data (); | |
2301 | void compute_lcm_local_properties (); | |
2302 | ||
2303 | void fuse_local_vsetvl_info (); | |
2304 | bool earliest_fuse_vsetvl_info (); | |
2305 | void pre_global_vsetvl_info (); | |
2306 | void emit_vsetvl (); | |
2307 | void cleaup (); | |
2308 | void remove_avl_operand (); | |
2309 | void remove_unused_dest_operand (); | |
2310 | ||
2311 | void dump (FILE *file, const char *title) const | |
2312 | { | |
2313 | fprintf (file, "\nVSETVL infos after %s\n\n", title); | |
2314 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2315 | { | |
2316 | const auto &block_info = m_vector_block_infos[bb->index ()]; | |
2317 | fprintf (file, " bb %d:\n", bb->index ()); | |
2318 | fprintf (file, " probability: "); | |
2319 | block_info.probability.dump (file); | |
2320 | fprintf (file, "\n"); | |
2321 | if (!block_info.empty_p ()) | |
2322 | { | |
2323 | fprintf (file, " Header vsetvl info:"); | |
2324 | block_info.get_entry_info ().dump (file, " "); | |
2325 | fprintf (file, " Footer vsetvl info:"); | |
2326 | block_info.get_exit_info ().dump (file, " "); | |
4fd09aed | 2327 | for (const auto &info : block_info.local_infos) |
29331e72 LD |
2328 | { |
2329 | fprintf (file, | |
2330 | " insn %d vsetvl info:", info.get_insn ()->uid ()); | |
2331 | info.dump (file, " "); | |
2332 | } | |
2333 | } | |
2334 | } | |
2335 | } | |
2336 | }; | |
c139f5e1 | 2337 | |
e030af3e | 2338 | void |
29331e72 | 2339 | pre_vsetvl::compute_avl_def_data () |
e030af3e | 2340 | { |
29331e72 LD |
2341 | if (bitmap_empty_p (m_avl_regs)) |
2342 | return; | |
e030af3e | 2343 | |
29331e72 LD |
2344 | unsigned num_regs = GP_REG_LAST + 1; |
2345 | unsigned num_bbs = last_basic_block_for_fn (cfun); | |
9243c3d1 | 2346 | |
29331e72 LD |
2347 | sbitmap *avl_def_loc_temp = sbitmap_vector_alloc (num_bbs, num_regs); |
2348 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
9243c3d1 | 2349 | { |
29331e72 LD |
2350 | bitmap_and (avl_def_loc_temp[bb->index ()], m_avl_regs, |
2351 | m_reg_def_loc[bb->index ()]); | |
2352 | ||
2353 | vsetvl_block_info &block_info = get_block_info (bb); | |
2354 | if (block_info.has_info ()) | |
9243c3d1 | 2355 | { |
29331e72 LD |
2356 | vsetvl_info &footer_info = block_info.get_exit_info (); |
2357 | gcc_assert (footer_info.valid_p ()); | |
2358 | if (footer_info.has_vl ()) | |
2359 | bitmap_set_bit (avl_def_loc_temp[bb->index ()], | |
2360 | REGNO (footer_info.get_vl ())); | |
9243c3d1 JZZ |
2361 | } |
2362 | } | |
9243c3d1 | 2363 | |
29331e72 LD |
2364 | if (m_avl_def_in) |
2365 | sbitmap_vector_free (m_avl_def_in); | |
2366 | if (m_avl_def_out) | |
2367 | sbitmap_vector_free (m_avl_def_out); | |
9243c3d1 | 2368 | |
29331e72 LD |
2369 | unsigned num_exprs = num_bbs * num_regs; |
2370 | sbitmap *avl_def_loc = sbitmap_vector_alloc (num_bbs, num_exprs); | |
2371 | sbitmap *m_kill = sbitmap_vector_alloc (num_bbs, num_exprs); | |
2372 | m_avl_def_in = sbitmap_vector_alloc (num_bbs, num_exprs); | |
2373 | m_avl_def_out = sbitmap_vector_alloc (num_bbs, num_exprs); | |
9243c3d1 | 2374 | |
29331e72 LD |
2375 | bitmap_vector_clear (avl_def_loc, num_bbs); |
2376 | bitmap_vector_clear (m_kill, num_bbs); | |
2377 | bitmap_vector_clear (m_avl_def_out, num_bbs); | |
2378 | ||
2379 | unsigned regno; | |
2380 | sbitmap_iterator sbi; | |
2381 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2382 | EXECUTE_IF_SET_IN_BITMAP (avl_def_loc_temp[bb->index ()], 0, regno, sbi) | |
2383 | { | |
2384 | bitmap_set_bit (avl_def_loc[bb->index ()], | |
2385 | get_expr_id (bb->index (), regno, num_bbs)); | |
2386 | bitmap_set_range (m_kill[bb->index ()], regno * num_bbs, num_bbs); | |
2387 | } | |
2388 | ||
2389 | basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
2390 | EXECUTE_IF_SET_IN_BITMAP (m_avl_regs, 0, regno, sbi) | |
2391 | bitmap_set_bit (m_avl_def_out[entry->index], | |
2392 | get_expr_id (entry->index, regno, num_bbs)); | |
2393 | ||
2394 | compute_reaching_defintion (avl_def_loc, m_kill, m_avl_def_in, m_avl_def_out); | |
2395 | ||
2396 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
9243c3d1 | 2397 | { |
29331e72 LD |
2398 | fprintf (dump_file, |
2399 | " Compute avl reaching defition data (num_bbs %d, num_regs " | |
2400 | "%d):\n\n", | |
2401 | num_bbs, num_regs); | |
2402 | fprintf (dump_file, " avl_regs: "); | |
2403 | dump_bitmap_file (dump_file, m_avl_regs); | |
2404 | fprintf (dump_file, "\n bitmap data:\n"); | |
2405 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
9243c3d1 | 2406 | { |
29331e72 LD |
2407 | unsigned int i = bb->index (); |
2408 | fprintf (dump_file, " BB %u:\n", i); | |
2409 | fprintf (dump_file, " avl_def_loc:"); | |
2410 | unsigned expr_id; | |
2411 | sbitmap_iterator sbi; | |
2412 | EXECUTE_IF_SET_IN_BITMAP (avl_def_loc[i], 0, expr_id, sbi) | |
ec99ffab | 2413 | { |
29331e72 LD |
2414 | fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs), |
2415 | get_bb_index (expr_id, num_bbs)); | |
2416 | } | |
2417 | fprintf (dump_file, "\n kill:"); | |
2418 | EXECUTE_IF_SET_IN_BITMAP (m_kill[i], 0, expr_id, sbi) | |
2419 | { | |
2420 | fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs), | |
2421 | get_bb_index (expr_id, num_bbs)); | |
2422 | } | |
2423 | fprintf (dump_file, "\n avl_def_in:"); | |
2424 | EXECUTE_IF_SET_IN_BITMAP (m_avl_def_in[i], 0, expr_id, sbi) | |
2425 | { | |
2426 | fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs), | |
2427 | get_bb_index (expr_id, num_bbs)); | |
2428 | } | |
2429 | fprintf (dump_file, "\n avl_def_out:"); | |
2430 | EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[i], 0, expr_id, sbi) | |
2431 | { | |
2432 | fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs), | |
2433 | get_bb_index (expr_id, num_bbs)); | |
ec99ffab | 2434 | } |
29331e72 | 2435 | fprintf (dump_file, "\n"); |
9243c3d1 JZZ |
2436 | } |
2437 | } | |
2438 | ||
29331e72 LD |
2439 | sbitmap_vector_free (avl_def_loc); |
2440 | sbitmap_vector_free (m_kill); | |
2441 | sbitmap_vector_free (avl_def_loc_temp); | |
9243c3d1 | 2442 | |
29331e72 | 2443 | m_dem.set_avl_in_out_data (m_avl_def_in, m_avl_def_out); |
9243c3d1 JZZ |
2444 | } |
2445 | ||
9243c3d1 | 2446 | void |
29331e72 | 2447 | pre_vsetvl::compute_vsetvl_def_data () |
9243c3d1 | 2448 | { |
29331e72 LD |
2449 | m_vsetvl_def_exprs.truncate (0); |
2450 | add_expr (m_vsetvl_def_exprs, m_unknow_info); | |
2451 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
9243c3d1 | 2452 | { |
29331e72 LD |
2453 | vsetvl_block_info &block_info = get_block_info (bb); |
2454 | if (block_info.empty_p ()) | |
2455 | continue; | |
2456 | vsetvl_info &footer_info = block_info.get_exit_info (); | |
2457 | gcc_assert (footer_info.valid_p () || footer_info.unknown_p ()); | |
2458 | add_expr (m_vsetvl_def_exprs, footer_info); | |
9243c3d1 JZZ |
2459 | } |
2460 | ||
29331e72 LD |
2461 | if (m_vsetvl_def_in) |
2462 | sbitmap_vector_free (m_vsetvl_def_in); | |
2463 | if (m_vsetvl_def_out) | |
2464 | sbitmap_vector_free (m_vsetvl_def_out); | |
9243c3d1 | 2465 | |
29331e72 LD |
2466 | sbitmap *def_loc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
2467 | m_vsetvl_def_exprs.length ()); | |
2468 | sbitmap *m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2469 | m_vsetvl_def_exprs.length ()); | |
9243c3d1 | 2470 | |
29331e72 LD |
2471 | m_vsetvl_def_in = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
2472 | m_vsetvl_def_exprs.length ()); | |
2473 | m_vsetvl_def_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), | |
2474 | m_vsetvl_def_exprs.length ()); | |
9243c3d1 | 2475 | |
29331e72 LD |
2476 | bitmap_vector_clear (def_loc, last_basic_block_for_fn (cfun)); |
2477 | bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun)); | |
2478 | bitmap_vector_clear (m_vsetvl_def_out, last_basic_block_for_fn (cfun)); | |
9243c3d1 | 2479 | |
29331e72 LD |
2480 | for (const bb_info *bb : crtl->ssa->bbs ()) |
2481 | { | |
2482 | vsetvl_block_info &block_info = get_block_info (bb); | |
2483 | if (block_info.empty_p ()) | |
9243c3d1 | 2484 | { |
29331e72 | 2485 | for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i += 1) |
9243c3d1 | 2486 | { |
29331e72 LD |
2487 | const vsetvl_info &info = *m_vsetvl_def_exprs[i]; |
2488 | if (!info.has_nonvlmax_reg_avl ()) | |
2489 | continue; | |
2490 | unsigned int regno; | |
2491 | sbitmap_iterator sbi; | |
2492 | EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0, regno, | |
2493 | sbi) | |
2494 | if (regno == REGNO (info.get_avl ())) | |
2495 | { | |
2496 | bitmap_set_bit (m_kill[bb->index ()], i); | |
2497 | bitmap_set_bit (def_loc[bb->index ()], | |
2498 | get_expr_index (m_vsetvl_def_exprs, | |
2499 | m_unknow_info)); | |
2500 | } | |
9243c3d1 | 2501 | } |
29331e72 | 2502 | continue; |
9243c3d1 JZZ |
2503 | } |
2504 | ||
29331e72 LD |
2505 | vsetvl_info &footer_info = block_info.get_exit_info (); |
2506 | bitmap_ones (m_kill[bb->index ()]); | |
2507 | bitmap_set_bit (def_loc[bb->index ()], | |
2508 | get_expr_index (m_vsetvl_def_exprs, footer_info)); | |
9243c3d1 JZZ |
2509 | } |
2510 | ||
29331e72 LD |
2511 | /* Set the def_out of the ENTRY basic block to m_unknow_info expr. */ |
2512 | basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
2513 | bitmap_set_bit (m_vsetvl_def_out[entry->index], | |
2514 | get_expr_index (m_vsetvl_def_exprs, m_unknow_info)); | |
9243c3d1 | 2515 | |
29331e72 LD |
2516 | compute_reaching_defintion (def_loc, m_kill, m_vsetvl_def_in, |
2517 | m_vsetvl_def_out); | |
2518 | ||
2519 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
e030af3e | 2520 | { |
29331e72 LD |
2521 | fprintf (dump_file, |
2522 | "\n Compute vsetvl info reaching defition data:\n\n"); | |
2523 | fprintf (dump_file, " Expression List (%d):\n", | |
2524 | m_vsetvl_def_exprs.length ()); | |
2525 | for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i++) | |
2526 | { | |
2527 | const auto &info = *m_vsetvl_def_exprs[i]; | |
2528 | fprintf (dump_file, " Expr[%u]: ", i); | |
2529 | info.dump (dump_file, " "); | |
2530 | } | |
2531 | fprintf (dump_file, "\n bitmap data:\n"); | |
2532 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2533 | { | |
2534 | unsigned int i = bb->index (); | |
2535 | fprintf (dump_file, " BB %u:\n", i); | |
2536 | fprintf (dump_file, " def_loc: "); | |
2537 | dump_bitmap_file (dump_file, def_loc[i]); | |
2538 | fprintf (dump_file, " kill: "); | |
2539 | dump_bitmap_file (dump_file, m_kill[i]); | |
2540 | fprintf (dump_file, " vsetvl_def_in: "); | |
2541 | dump_bitmap_file (dump_file, m_vsetvl_def_in[i]); | |
2542 | fprintf (dump_file, " vsetvl_def_out: "); | |
2543 | dump_bitmap_file (dump_file, m_vsetvl_def_out[i]); | |
2544 | } | |
e030af3e | 2545 | } |
4f673c5e | 2546 | |
29331e72 | 2547 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2548 | { |
29331e72 LD |
2549 | vsetvl_block_info &block_info = get_block_info (bb); |
2550 | if (block_info.empty_p ()) | |
2551 | continue; | |
2552 | vsetvl_info &curr_info = block_info.get_entry_info (); | |
2553 | if (!curr_info.valid_p ()) | |
2554 | continue; | |
2555 | ||
2556 | unsigned int expr_index; | |
2557 | sbitmap_iterator sbi; | |
2558 | gcc_assert ( | |
2559 | !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()])); | |
2560 | bool full_available = true; | |
2561 | EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[bb->index ()], 0, expr_index, | |
2562 | sbi) | |
4f673c5e | 2563 | { |
29331e72 LD |
2564 | vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index]; |
2565 | if (!prev_info.valid_p () | |
2566 | || !m_dem.available_p (prev_info, curr_info)) | |
2567 | { | |
2568 | full_available = false; | |
2569 | break; | |
2570 | } | |
4f673c5e | 2571 | } |
29331e72 | 2572 | block_info.full_available = full_available; |
4f673c5e | 2573 | } |
29331e72 LD |
2574 | |
2575 | sbitmap_vector_free (def_loc); | |
2576 | sbitmap_vector_free (m_kill); | |
e030af3e | 2577 | } |
9243c3d1 | 2578 | |
e030af3e | 2579 | /* Compute the local properties of each recorded expression. |
6b6b9c68 | 2580 | |
e030af3e JZ |
2581 | Local properties are those that are defined by the block, irrespective of |
2582 | other blocks. | |
6b6b9c68 | 2583 | |
e030af3e JZ |
2584 | An expression is transparent in a block if its operands are not modified |
2585 | in the block. | |
6b6b9c68 | 2586 | |
e030af3e JZ |
2587 | An expression is computed (locally available) in a block if it is computed |
2588 | at least once and expression would contain the same value if the | |
2589 | computation was moved to the end of the block. | |
2590 | ||
2591 | An expression is locally anticipatable in a block if it is computed at | |
2592 | least once and expression would contain the same value if the computation | |
2593 | was moved to the beginning of the block. */ | |
2594 | void | |
29331e72 | 2595 | pre_vsetvl::compute_lcm_local_properties () |
6b6b9c68 | 2596 | { |
29331e72 LD |
2597 | m_exprs.truncate (0); |
2598 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2599 | { | |
2600 | vsetvl_block_info &block_info = get_block_info (bb); | |
2601 | if (block_info.empty_p ()) | |
2602 | continue; | |
2603 | vsetvl_info &header_info = block_info.get_entry_info (); | |
2604 | vsetvl_info &footer_info = block_info.get_exit_info (); | |
2605 | gcc_assert (footer_info.valid_p () || footer_info.unknown_p ()); | |
2606 | add_expr (m_exprs, header_info); | |
2607 | add_expr (m_exprs, footer_info); | |
2608 | } | |
2609 | ||
2610 | int num_exprs = m_exprs.length (); | |
2611 | if (m_avloc) | |
2612 | sbitmap_vector_free (m_avloc); | |
2613 | if (m_kill) | |
2614 | sbitmap_vector_free (m_kill); | |
2615 | if (m_antloc) | |
2616 | sbitmap_vector_free (m_antloc); | |
2617 | if (m_transp) | |
2618 | sbitmap_vector_free (m_transp); | |
2619 | if (m_avin) | |
2620 | sbitmap_vector_free (m_avin); | |
2621 | if (m_avout) | |
2622 | sbitmap_vector_free (m_avout); | |
2623 | ||
2624 | m_avloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2625 | m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2626 | m_antloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2627 | m_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2628 | m_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2629 | m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2630 | ||
2631 | bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun)); | |
2632 | bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun)); | |
2633 | bitmap_vector_clear (m_transp, last_basic_block_for_fn (cfun)); | |
2634 | ||
e030af3e JZ |
2635 | /* - If T is locally available at the end of a block, then T' must be |
2636 | available at the end of the same block. Since some optimization has | |
2637 | occurred earlier, T' might not be locally available, however, it must | |
2638 | have been previously computed on all paths. As a formula, T at AVLOC(B) | |
2639 | implies that T' at AVOUT(B). | |
2640 | An "available occurrence" is one that is the last occurrence in the | |
2641 | basic block and the operands are not modified by following statements in | |
2642 | the basic block [including this insn]. | |
6b6b9c68 | 2643 | |
e030af3e JZ |
2644 | - If T is locally anticipated at the beginning of a block, then either |
2645 | T', is locally anticipated or it is already available from previous | |
2646 | blocks. As a formula, this means that T at ANTLOC(B) implies that T' at | |
2647 | ANTLOC(B) at AVIN(B). | |
2648 | An "anticipatable occurrence" is one that is the first occurrence in the | |
2649 | basic block, the operands are not modified in the basic block prior | |
2650 | to the occurrence and the output is not used between the start of | |
2651 | the block and the occurrence. */ | |
e030af3e | 2652 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2653 | { |
29331e72 LD |
2654 | unsigned bb_index = bb->index (); |
2655 | vsetvl_block_info &block_info = get_block_info (bb); | |
9243c3d1 | 2656 | |
29331e72 LD |
2657 | /* Compute m_transp */ |
2658 | if (block_info.empty_p ()) | |
9243c3d1 | 2659 | { |
29331e72 LD |
2660 | bitmap_ones (m_transp[bb_index]); |
2661 | for (int i = 0; i < num_exprs; i += 1) | |
4f673c5e | 2662 | { |
29331e72 LD |
2663 | const vsetvl_info &info = *m_exprs[i]; |
2664 | if (!info.has_nonvlmax_reg_avl () && !info.has_vl ()) | |
2665 | continue; | |
2666 | ||
2667 | unsigned int regno; | |
2668 | sbitmap_iterator sbi; | |
2669 | EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0, regno, | |
2670 | sbi) | |
2671 | { | |
2672 | if (regno == REGNO (info.get_avl ())) | |
2673 | bitmap_clear_bit (m_transp[bb->index ()], i); | |
2674 | } | |
2675 | ||
e030af3e JZ |
2676 | for (const insn_info *insn : bb->real_nondebug_insns ()) |
2677 | { | |
29331e72 LD |
2678 | if ((info.has_nonvlmax_reg_avl () |
2679 | && find_access (insn->defs (), REGNO (info.get_avl ()))) | |
2680 | || (info.has_vl () | |
2681 | && find_access (insn->uses (), | |
2682 | REGNO (info.get_vl ())))) | |
e030af3e | 2683 | { |
29331e72 | 2684 | bitmap_clear_bit (m_transp[bb_index], i); |
e030af3e JZ |
2685 | break; |
2686 | } | |
2687 | } | |
4f673c5e | 2688 | } |
9243c3d1 | 2689 | |
29331e72 | 2690 | continue; |
9243c3d1 | 2691 | } |
e030af3e | 2692 | |
29331e72 LD |
2693 | vsetvl_info &header_info = block_info.get_entry_info (); |
2694 | vsetvl_info &footer_info = block_info.get_exit_info (); | |
9243c3d1 | 2695 | |
29331e72 LD |
2696 | if (header_info.valid_p () |
2697 | && (anticpatable_exp_p (header_info) || block_info.full_available)) | |
2698 | bitmap_set_bit (m_antloc[bb_index], | |
2699 | get_expr_index (m_exprs, header_info)); | |
9243c3d1 | 2700 | |
29331e72 LD |
2701 | if (footer_info.valid_p ()) |
2702 | for (int i = 0; i < num_exprs; i += 1) | |
2703 | { | |
2704 | const vsetvl_info &info = *m_exprs[i]; | |
2705 | if (!info.valid_p ()) | |
2706 | continue; | |
2707 | if (available_exp_p (footer_info, info)) | |
2708 | bitmap_set_bit (m_avloc[bb_index], i); | |
2709 | } | |
2710 | } | |
9243c3d1 | 2711 | |
29331e72 | 2712 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2713 | { |
29331e72 LD |
2714 | unsigned bb_index = bb->index (); |
2715 | bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]); | |
2716 | bitmap_not (m_kill[bb_index], m_kill[bb_index]); | |
9243c3d1 JZZ |
2717 | } |
2718 | ||
29331e72 | 2719 | for (const bb_info *bb : crtl->ssa->bbs ()) |
9243c3d1 | 2720 | { |
29331e72 | 2721 | unsigned bb_index = bb->index (); |
9243c3d1 JZZ |
2722 | edge e; |
2723 | edge_iterator ei; | |
29331e72 | 2724 | FOR_EACH_EDGE (e, ei, bb->cfg_bb ()->preds) |
9243c3d1 JZZ |
2725 | if (e->flags & EDGE_COMPLEX) |
2726 | { | |
29331e72 LD |
2727 | bitmap_clear (m_antloc[bb_index]); |
2728 | bitmap_clear (m_transp[bb_index]); | |
9243c3d1 JZZ |
2729 | } |
2730 | } | |
2731 | } | |
2732 | ||
29331e72 LD |
2733 | void |
2734 | pre_vsetvl::fuse_local_vsetvl_info () | |
e030af3e | 2735 | { |
29331e72 LD |
2736 | m_reg_def_loc |
2737 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), GP_REG_LAST + 1); | |
2738 | bitmap_vector_clear (m_reg_def_loc, last_basic_block_for_fn (cfun)); | |
2739 | bitmap_ones (m_reg_def_loc[ENTRY_BLOCK_PTR_FOR_FN (cfun)->index]); | |
2740 | ||
2741 | for (bb_info *bb : crtl->ssa->bbs ()) | |
e030af3e | 2742 | { |
29331e72 | 2743 | auto &block_info = get_block_info (bb); |
4fd09aed | 2744 | block_info.bb = bb; |
29331e72 | 2745 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e030af3e | 2746 | { |
29331e72 LD |
2747 | fprintf (dump_file, " Try fuse basic block %d\n", bb->index ()); |
2748 | } | |
2749 | auto_vec<vsetvl_info> infos; | |
2750 | for (insn_info *insn : bb->real_nondebug_insns ()) | |
2751 | { | |
2752 | vsetvl_info curr_info = vsetvl_info (insn); | |
2753 | if (curr_info.valid_p () || curr_info.unknown_p ()) | |
2754 | infos.safe_push (curr_info); | |
2755 | ||
2756 | /* Collecting GP registers modified by the current bb. */ | |
2757 | if (insn->is_real ()) | |
2758 | for (def_info *def : insn->defs ()) | |
2759 | if (def->is_reg () && GP_REG_P (def->regno ())) | |
2760 | bitmap_set_bit (m_reg_def_loc[bb->index ()], def->regno ()); | |
2761 | } | |
e030af3e | 2762 | |
29331e72 LD |
2763 | vsetvl_info prev_info = vsetvl_info (); |
2764 | prev_info.set_empty (); | |
2765 | for (auto &curr_info : infos) | |
2766 | { | |
2767 | if (prev_info.empty_p ()) | |
2768 | prev_info = curr_info; | |
2769 | else if ((curr_info.unknown_p () && prev_info.valid_p ()) | |
2770 | || (curr_info.valid_p () && prev_info.unknown_p ())) | |
2771 | { | |
4fd09aed | 2772 | block_info.local_infos.safe_push (prev_info); |
29331e72 LD |
2773 | prev_info = curr_info; |
2774 | } | |
2775 | else if (curr_info.valid_p () && prev_info.valid_p ()) | |
2776 | { | |
2777 | if (m_dem.available_p (prev_info, curr_info)) | |
e7b585a4 | 2778 | { |
29331e72 | 2779 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e7b585a4 | 2780 | { |
29331e72 LD |
2781 | fprintf (dump_file, |
2782 | " Ignore curr info since prev info " | |
2783 | "available with it:\n"); | |
2784 | fprintf (dump_file, " prev_info: "); | |
2785 | prev_info.dump (dump_file, " "); | |
2786 | fprintf (dump_file, " curr_info: "); | |
2787 | curr_info.dump (dump_file, " "); | |
2788 | fprintf (dump_file, "\n"); | |
e7b585a4 | 2789 | } |
29331e72 LD |
2790 | if (!curr_info.vl_use_by_non_rvv_insn_p () |
2791 | && vsetvl_insn_p (curr_info.get_insn ()->rtl ())) | |
2792 | m_delete_list.safe_push (curr_info); | |
e030af3e | 2793 | |
29331e72 LD |
2794 | if (curr_info.get_read_vl_insn ()) |
2795 | prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ()); | |
e030af3e | 2796 | } |
29331e72 | 2797 | else if (m_dem.compatible_p (prev_info, curr_info)) |
e030af3e | 2798 | { |
29331e72 | 2799 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e030af3e | 2800 | { |
29331e72 LD |
2801 | fprintf (dump_file, " Fuse curr info since prev info " |
2802 | "compatible with it:\n"); | |
2803 | fprintf (dump_file, " prev_info: "); | |
2804 | prev_info.dump (dump_file, " "); | |
2805 | fprintf (dump_file, " curr_info: "); | |
2806 | curr_info.dump (dump_file, " "); | |
e030af3e | 2807 | } |
29331e72 LD |
2808 | m_dem.merge (prev_info, curr_info); |
2809 | if (curr_info.get_read_vl_insn ()) | |
2810 | prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ()); | |
2811 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
e030af3e | 2812 | { |
29331e72 LD |
2813 | fprintf (dump_file, " prev_info after fused: "); |
2814 | prev_info.dump (dump_file, " "); | |
2815 | fprintf (dump_file, "\n"); | |
e030af3e | 2816 | } |
e030af3e JZ |
2817 | } |
2818 | else | |
2819 | { | |
29331e72 LD |
2820 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2821 | { | |
2822 | fprintf (dump_file, | |
2823 | " Cannot fuse uncompatible infos:\n"); | |
2824 | fprintf (dump_file, " prev_info: "); | |
2825 | prev_info.dump (dump_file, " "); | |
2826 | fprintf (dump_file, " curr_info: "); | |
2827 | curr_info.dump (dump_file, " "); | |
2828 | } | |
4fd09aed | 2829 | block_info.local_infos.safe_push (prev_info); |
29331e72 | 2830 | prev_info = curr_info; |
e030af3e JZ |
2831 | } |
2832 | } | |
2833 | } | |
29331e72 LD |
2834 | |
2835 | if (prev_info.valid_p () || prev_info.unknown_p ()) | |
4fd09aed | 2836 | block_info.local_infos.safe_push (prev_info); |
e030af3e | 2837 | } |
e030af3e | 2838 | |
29331e72 LD |
2839 | m_avl_regs = sbitmap_alloc (GP_REG_LAST + 1); |
2840 | bitmap_clear (m_avl_regs); | |
2841 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
e030af3e | 2842 | { |
29331e72 LD |
2843 | vsetvl_block_info &block_info = get_block_info (bb); |
2844 | if (block_info.empty_p ()) | |
2845 | continue; | |
2846 | ||
2847 | vsetvl_info &header_info = block_info.get_entry_info (); | |
2848 | if (header_info.valid_p () && header_info.has_nonvlmax_reg_avl ()) | |
e030af3e | 2849 | { |
29331e72 LD |
2850 | gcc_assert (GP_REG_P (REGNO (header_info.get_avl ()))); |
2851 | bitmap_set_bit (m_avl_regs, REGNO (header_info.get_avl ())); | |
e030af3e | 2852 | } |
e030af3e JZ |
2853 | } |
2854 | } | |
2855 | ||
29331e72 | 2856 | |
9243c3d1 | 2857 | bool |
29331e72 | 2858 | pre_vsetvl::earliest_fuse_vsetvl_info () |
9243c3d1 | 2859 | { |
29331e72 LD |
2860 | compute_avl_def_data (); |
2861 | compute_vsetvl_def_data (); | |
2862 | compute_lcm_local_properties (); | |
9243c3d1 | 2863 | |
29331e72 LD |
2864 | unsigned num_exprs = m_exprs.length (); |
2865 | struct edge_list *m_edges = create_edge_list (); | |
2866 | unsigned num_edges = NUM_EDGES (m_edges); | |
2867 | sbitmap *antin | |
2868 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
2869 | sbitmap *antout | |
2870 | = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); | |
005fad9d | 2871 | |
29331e72 | 2872 | sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs); |
9243c3d1 | 2873 | |
29331e72 LD |
2874 | compute_available (m_avloc, m_kill, m_avout, m_avin); |
2875 | compute_antinout_edge (m_antloc, m_transp, antin, antout); | |
2876 | compute_earliest (m_edges, num_exprs, antin, antout, m_avout, m_kill, | |
2877 | earliest); | |
2878 | ||
2879 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
9243c3d1 | 2880 | { |
29331e72 LD |
2881 | fprintf (dump_file, "\n Compute LCM earliest insert data:\n\n"); |
2882 | fprintf (dump_file, " Expression List (%u):\n", num_exprs); | |
2883 | for (unsigned i = 0; i < num_exprs; i++) | |
9243c3d1 | 2884 | { |
29331e72 LD |
2885 | const auto &info = *m_exprs[i]; |
2886 | fprintf (dump_file, " Expr[%u]: ", i); | |
2887 | info.dump (dump_file, " "); | |
9243c3d1 | 2888 | } |
29331e72 LD |
2889 | fprintf (dump_file, "\n bitmap data:\n"); |
2890 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
2891 | { | |
2892 | unsigned int i = bb->index (); | |
2893 | fprintf (dump_file, " BB %u:\n", i); | |
2894 | fprintf (dump_file, " avloc: "); | |
2895 | dump_bitmap_file (dump_file, m_avloc[i]); | |
2896 | fprintf (dump_file, " kill: "); | |
2897 | dump_bitmap_file (dump_file, m_kill[i]); | |
2898 | fprintf (dump_file, " antloc: "); | |
2899 | dump_bitmap_file (dump_file, m_antloc[i]); | |
2900 | fprintf (dump_file, " transp: "); | |
2901 | dump_bitmap_file (dump_file, m_transp[i]); | |
2902 | ||
2903 | fprintf (dump_file, " avin: "); | |
2904 | dump_bitmap_file (dump_file, m_avin[i]); | |
2905 | fprintf (dump_file, " avout: "); | |
2906 | dump_bitmap_file (dump_file, m_avout[i]); | |
2907 | fprintf (dump_file, " antin: "); | |
2908 | dump_bitmap_file (dump_file, antin[i]); | |
2909 | fprintf (dump_file, " antout: "); | |
2910 | dump_bitmap_file (dump_file, antout[i]); | |
2911 | } | |
2912 | fprintf (dump_file, "\n"); | |
2913 | fprintf (dump_file, " earliest:\n"); | |
2914 | for (unsigned ed = 0; ed < num_edges; ed++) | |
2915 | { | |
2916 | edge eg = INDEX_EDGE (m_edges, ed); | |
9243c3d1 | 2917 | |
29331e72 LD |
2918 | if (bitmap_empty_p (earliest[ed])) |
2919 | continue; | |
2920 | fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index, | |
2921 | eg->dest->index); | |
2922 | dump_bitmap_file (dump_file, earliest[ed]); | |
2923 | } | |
2924 | fprintf (dump_file, "\n"); | |
2925 | } | |
9243c3d1 | 2926 | |
29331e72 | 2927 | if (dump_file && (dump_flags & TDF_DETAILS)) |
9243c3d1 | 2928 | { |
29331e72 LD |
2929 | fprintf (dump_file, " Fused global info result:\n"); |
2930 | } | |
9243c3d1 | 2931 | |
29331e72 LD |
2932 | bool changed = false; |
2933 | for (unsigned ed = 0; ed < num_edges; ed++) | |
2934 | { | |
2935 | sbitmap e = earliest[ed]; | |
2936 | if (bitmap_empty_p (e)) | |
9243c3d1 JZZ |
2937 | continue; |
2938 | ||
29331e72 LD |
2939 | unsigned int expr_index; |
2940 | sbitmap_iterator sbi; | |
2941 | EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi) | |
ec99ffab | 2942 | { |
29331e72 LD |
2943 | vsetvl_info &curr_info = *m_exprs[expr_index]; |
2944 | if (!curr_info.valid_p ()) | |
2945 | continue; | |
2946 | ||
2947 | edge eg = INDEX_EDGE (m_edges, ed); | |
2948 | if (eg->probability == profile_probability::never ()) | |
2949 | continue; | |
2950 | if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun) | |
2951 | || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
2952 | continue; | |
ff8f9544 | 2953 | |
29331e72 LD |
2954 | vsetvl_block_info &src_block_info = get_block_info (eg->src); |
2955 | vsetvl_block_info &dest_block_info = get_block_info (eg->dest); | |
ff8f9544 | 2956 | |
29331e72 LD |
2957 | if (src_block_info.probability |
2958 | == profile_probability::uninitialized ()) | |
ff8f9544 | 2959 | continue; |
9243c3d1 | 2960 | |
29331e72 | 2961 | if (src_block_info.empty_p ()) |
9243c3d1 | 2962 | { |
29331e72 LD |
2963 | vsetvl_info new_curr_info = curr_info; |
2964 | new_curr_info.set_bb (crtl->ssa->bb (eg->dest)); | |
2965 | bool has_compatible_p = false; | |
2966 | unsigned int def_expr_index; | |
2967 | sbitmap_iterator sbi2; | |
2968 | EXECUTE_IF_SET_IN_BITMAP ( | |
2969 | m_vsetvl_def_in[new_curr_info.get_bb ()->index ()], 0, | |
2970 | def_expr_index, sbi2) | |
9243c3d1 | 2971 | { |
29331e72 LD |
2972 | vsetvl_info &prev_info = *m_vsetvl_def_exprs[def_expr_index]; |
2973 | if (!prev_info.valid_p ()) | |
2974 | continue; | |
2975 | if (m_dem.compatible_p (prev_info, new_curr_info)) | |
9243c3d1 | 2976 | { |
29331e72 LD |
2977 | has_compatible_p = true; |
2978 | break; | |
9243c3d1 | 2979 | } |
9243c3d1 | 2980 | } |
29331e72 | 2981 | if (!has_compatible_p) |
9243c3d1 | 2982 | { |
29331e72 LD |
2983 | if (dump_file && (dump_flags & TDF_DETAILS)) |
2984 | { | |
2985 | fprintf (dump_file, | |
2986 | " Forbidden lift up vsetvl info into bb %u " | |
2987 | "since there is no vsetvl info that reaching in " | |
2988 | "is compatible with it:", | |
2989 | eg->src->index); | |
2990 | curr_info.dump (dump_file, " "); | |
2991 | } | |
2992 | continue; | |
9243c3d1 JZZ |
2993 | } |
2994 | ||
29331e72 | 2995 | if (dump_file && (dump_flags & TDF_DETAILS)) |
e030af3e JZ |
2996 | { |
2997 | fprintf (dump_file, | |
29331e72 LD |
2998 | " Set empty bb %u to info:", eg->src->index); |
2999 | curr_info.dump (dump_file, " "); | |
e030af3e | 3000 | } |
29331e72 LD |
3001 | src_block_info.set_info (curr_info); |
3002 | src_block_info.probability = dest_block_info.probability; | |
3003 | changed = true; | |
9243c3d1 | 3004 | } |
29331e72 LD |
3005 | else if (src_block_info.has_info ()) |
3006 | { | |
3007 | vsetvl_info &prev_info = src_block_info.get_exit_info (); | |
3008 | gcc_assert (prev_info.valid_p ()); | |
3009 | ||
3010 | if (m_dem.compatible_p (prev_info, curr_info)) | |
3011 | { | |
3012 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3013 | { | |
3014 | fprintf (dump_file, " Fuse curr info since prev info " | |
3015 | "compatible with it:\n"); | |
3016 | fprintf (dump_file, " prev_info: "); | |
3017 | prev_info.dump (dump_file, " "); | |
3018 | fprintf (dump_file, " curr_info: "); | |
3019 | curr_info.dump (dump_file, " "); | |
3020 | } | |
3021 | m_dem.merge (prev_info, curr_info); | |
3022 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3023 | { | |
3024 | fprintf (dump_file, " prev_info after fused: "); | |
3025 | prev_info.dump (dump_file, " "); | |
3026 | fprintf (dump_file, "\n"); | |
3027 | } | |
3028 | changed = true; | |
3029 | if (src_block_info.has_info ()) | |
3030 | src_block_info.probability += dest_block_info.probability; | |
3031 | } | |
3032 | else if (src_block_info.has_info () | |
3033 | && !m_dem.compatible_p (prev_info, curr_info)) | |
3034 | { | |
3035 | /* Cancel lift up if probabilities are equal. */ | |
3036 | if (successors_probability_equal_p (eg->src)) | |
3037 | { | |
3038 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3039 | { | |
3040 | fprintf (dump_file, | |
3041 | " Change empty bb %u to from:", | |
3042 | eg->src->index); | |
3043 | prev_info.dump (dump_file, " "); | |
3044 | fprintf (dump_file, | |
3045 | " to (higher probability):"); | |
3046 | curr_info.dump (dump_file, " "); | |
3047 | } | |
3048 | src_block_info.set_empty_info (); | |
3049 | src_block_info.probability | |
3050 | = profile_probability::uninitialized (); | |
3051 | changed = true; | |
3052 | } | |
3053 | /* Choose the one with higher probability. */ | |
3054 | else if (dest_block_info.probability | |
3055 | > src_block_info.probability) | |
3056 | { | |
3057 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3058 | { | |
3059 | fprintf (dump_file, | |
3060 | " Change empty bb %u to from:", | |
3061 | eg->src->index); | |
3062 | prev_info.dump (dump_file, " "); | |
3063 | fprintf (dump_file, | |
3064 | " to (higher probability):"); | |
3065 | curr_info.dump (dump_file, " "); | |
3066 | } | |
3067 | src_block_info.set_info (curr_info); | |
3068 | src_block_info.probability = dest_block_info.probability; | |
3069 | changed = true; | |
3070 | } | |
3071 | } | |
3072 | } | |
3073 | else | |
e030af3e | 3074 | { |
29331e72 LD |
3075 | vsetvl_info &prev_info = src_block_info.get_exit_info (); |
3076 | if (!prev_info.valid_p () | |
3077 | || m_dem.available_p (prev_info, curr_info)) | |
3078 | continue; | |
3079 | ||
3080 | if (m_dem.compatible_p (prev_info, curr_info)) | |
3081 | { | |
3082 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3083 | { | |
3084 | fprintf (dump_file, " Fuse curr info since prev info " | |
3085 | "compatible with it:\n"); | |
3086 | fprintf (dump_file, " prev_info: "); | |
3087 | prev_info.dump (dump_file, " "); | |
3088 | fprintf (dump_file, " curr_info: "); | |
3089 | curr_info.dump (dump_file, " "); | |
3090 | } | |
3091 | m_dem.merge (prev_info, curr_info); | |
3092 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3093 | { | |
3094 | fprintf (dump_file, " prev_info after fused: "); | |
3095 | prev_info.dump (dump_file, " "); | |
3096 | fprintf (dump_file, "\n"); | |
3097 | } | |
3098 | changed = true; | |
3099 | } | |
e030af3e | 3100 | } |
9243c3d1 JZZ |
3101 | } |
3102 | } | |
3103 | ||
0d50facd | 3104 | if (dump_file && (dump_flags & TDF_DETAILS)) |
c919d059 | 3105 | { |
29331e72 | 3106 | fprintf (dump_file, "\n"); |
c919d059 | 3107 | } |
c919d059 | 3108 | |
29331e72 LD |
3109 | sbitmap_vector_free (antin); |
3110 | sbitmap_vector_free (antout); | |
3111 | sbitmap_vector_free (earliest); | |
3112 | free_edge_list (m_edges); | |
c919d059 | 3113 | |
29331e72 | 3114 | return changed; |
c919d059 KC |
3115 | } |
3116 | ||
8421f279 | 3117 | void |
29331e72 | 3118 | pre_vsetvl::pre_global_vsetvl_info () |
c919d059 | 3119 | { |
29331e72 LD |
3120 | compute_avl_def_data (); |
3121 | compute_vsetvl_def_data (); | |
3122 | compute_lcm_local_properties (); | |
c919d059 | 3123 | |
29331e72 LD |
3124 | unsigned num_exprs = m_exprs.length (); |
3125 | m_edges = pre_edge_lcm_avs (num_exprs, m_transp, m_avloc, m_antloc, m_kill, | |
3126 | m_avin, m_avout, &m_insert, &m_del); | |
3127 | unsigned num_edges = NUM_EDGES (m_edges); | |
c919d059 | 3128 | |
29331e72 LD |
3129 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3130 | { | |
3131 | fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n"); | |
3132 | fprintf (dump_file, " Expression List (%u):\n", num_exprs); | |
3133 | for (unsigned i = 0; i < num_exprs; i++) | |
c919d059 | 3134 | { |
29331e72 LD |
3135 | const auto &info = *m_exprs[i]; |
3136 | fprintf (dump_file, " Expr[%u]: ", i); | |
3137 | info.dump (dump_file, " "); | |
c919d059 | 3138 | } |
29331e72 LD |
3139 | fprintf (dump_file, "\n bitmap data:\n"); |
3140 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
c919d059 | 3141 | { |
29331e72 LD |
3142 | unsigned i = bb->index (); |
3143 | fprintf (dump_file, " BB %u:\n", i); | |
3144 | fprintf (dump_file, " avloc: "); | |
3145 | dump_bitmap_file (dump_file, m_avloc[i]); | |
3146 | fprintf (dump_file, " kill: "); | |
3147 | dump_bitmap_file (dump_file, m_kill[i]); | |
3148 | fprintf (dump_file, " antloc: "); | |
3149 | dump_bitmap_file (dump_file, m_antloc[i]); | |
3150 | fprintf (dump_file, " transp: "); | |
3151 | dump_bitmap_file (dump_file, m_transp[i]); | |
3152 | ||
3153 | fprintf (dump_file, " avin: "); | |
3154 | dump_bitmap_file (dump_file, m_avin[i]); | |
3155 | fprintf (dump_file, " avout: "); | |
3156 | dump_bitmap_file (dump_file, m_avout[i]); | |
3157 | fprintf (dump_file, " del: "); | |
3158 | dump_bitmap_file (dump_file, m_del[i]); | |
c919d059 | 3159 | } |
29331e72 LD |
3160 | fprintf (dump_file, "\n"); |
3161 | fprintf (dump_file, " insert:\n"); | |
3162 | for (unsigned ed = 0; ed < num_edges; ed++) | |
8421f279 | 3163 | { |
29331e72 | 3164 | edge eg = INDEX_EDGE (m_edges, ed); |
c919d059 | 3165 | |
29331e72 LD |
3166 | if (bitmap_empty_p (m_insert[ed])) |
3167 | continue; | |
3168 | fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index, | |
3169 | eg->dest->index); | |
3170 | dump_bitmap_file (dump_file, m_insert[ed]); | |
c919d059 | 3171 | } |
29331e72 LD |
3172 | } |
3173 | ||
3174 | /* Remove vsetvl infos as LCM suggest */ | |
3175 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
3176 | { | |
3177 | sbitmap d = m_del[bb->index ()]; | |
3178 | if (bitmap_count_bits (d) == 0) | |
c919d059 | 3179 | continue; |
29331e72 LD |
3180 | gcc_assert (bitmap_count_bits (d) == 1); |
3181 | unsigned expr_index = bitmap_first_set_bit (d); | |
3182 | vsetvl_info &info = *m_exprs[expr_index]; | |
3183 | gcc_assert (info.valid_p ()); | |
3184 | gcc_assert (info.get_bb () == bb); | |
3185 | const vsetvl_block_info &block_info = get_block_info (info.get_bb ()); | |
3186 | gcc_assert (block_info.get_entry_info () == info); | |
3187 | info.set_delete (); | |
3188 | } | |
c919d059 | 3189 | |
29331e72 LD |
3190 | for (const bb_info *bb : crtl->ssa->bbs ()) |
3191 | { | |
3192 | vsetvl_block_info &block_info = get_block_info (bb); | |
3193 | if (block_info.empty_p ()) | |
3194 | continue; | |
3195 | vsetvl_info &curr_info = block_info.get_entry_info (); | |
3196 | if (curr_info.delete_p ()) | |
c919d059 | 3197 | { |
4fd09aed | 3198 | if (block_info.local_infos.is_empty ()) |
29331e72 | 3199 | continue; |
4fd09aed | 3200 | curr_info = block_info.local_infos[0]; |
c919d059 | 3201 | } |
29331e72 LD |
3202 | if (curr_info.valid_p () && !curr_info.vl_use_by_non_rvv_insn_p () |
3203 | && preds_has_same_avl_p (curr_info)) | |
3204 | curr_info.set_change_vtype_only (); | |
c919d059 | 3205 | |
29331e72 LD |
3206 | vsetvl_info prev_info = vsetvl_info (); |
3207 | prev_info.set_empty (); | |
4fd09aed | 3208 | for (auto &curr_info : block_info.local_infos) |
c919d059 | 3209 | { |
29331e72 LD |
3210 | if (prev_info.valid_p () && curr_info.valid_p () |
3211 | && m_dem.avl_available_p (prev_info, curr_info)) | |
3212 | curr_info.set_change_vtype_only (); | |
3213 | prev_info = curr_info; | |
c919d059 | 3214 | } |
20c85207 | 3215 | } |
20c85207 JZ |
3216 | } |
3217 | ||
29331e72 LD |
3218 | void |
3219 | pre_vsetvl::emit_vsetvl () | |
20c85207 | 3220 | { |
29331e72 | 3221 | bool need_commit = false; |
20c85207 | 3222 | |
29331e72 | 3223 | for (const bb_info *bb : crtl->ssa->bbs ()) |
20c85207 | 3224 | { |
4fd09aed | 3225 | for (const auto &curr_info : get_block_info (bb).local_infos) |
29331e72 LD |
3226 | { |
3227 | insn_info *insn = curr_info.get_insn (); | |
3228 | if (curr_info.delete_p ()) | |
3229 | { | |
3230 | if (vsetvl_insn_p (insn->rtl ())) | |
3231 | remove_vsetvl_insn (curr_info); | |
3232 | continue; | |
3233 | } | |
3234 | else if (curr_info.valid_p ()) | |
3235 | { | |
3236 | if (vsetvl_insn_p (insn->rtl ())) | |
3237 | { | |
3238 | const vsetvl_info temp = vsetvl_info (insn); | |
3239 | if (!(curr_info == temp)) | |
3240 | { | |
3241 | if (dump_file) | |
3242 | { | |
3243 | fprintf (dump_file, "\n Change vsetvl info from: "); | |
3244 | temp.dump (dump_file, " "); | |
3245 | fprintf (dump_file, " to: "); | |
3246 | curr_info.dump (dump_file, " "); | |
3247 | } | |
3248 | change_vsetvl_insn (curr_info); | |
3249 | } | |
3250 | } | |
3251 | else | |
3252 | { | |
3253 | if (dump_file) | |
3254 | { | |
3255 | fprintf (dump_file, | |
3256 | "\n Insert vsetvl info before insn %d: ", | |
3257 | insn->uid ()); | |
3258 | curr_info.dump (dump_file, " "); | |
3259 | } | |
3260 | insert_vsetvl_insn (EMIT_BEFORE, curr_info); | |
3261 | } | |
3262 | } | |
3263 | } | |
20c85207 | 3264 | } |
20c85207 | 3265 | |
29331e72 | 3266 | for (const vsetvl_info &item : m_delete_list) |
20c85207 | 3267 | { |
29331e72 LD |
3268 | gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ())); |
3269 | remove_vsetvl_insn (item); | |
20c85207 JZ |
3270 | } |
3271 | ||
29331e72 LD |
3272 | /* m_insert vsetvl as LCM suggest. */ |
3273 | for (int ed = 0; ed < NUM_EDGES (m_edges); ed++) | |
20c85207 | 3274 | { |
29331e72 LD |
3275 | edge eg = INDEX_EDGE (m_edges, ed); |
3276 | sbitmap i = m_insert[ed]; | |
3277 | if (bitmap_count_bits (i) < 1) | |
3278 | continue; | |
3279 | ||
3280 | if (bitmap_count_bits (i) > 1) | |
3281 | /* For code with infinite loop (e.g. pr61634.c), The data flow is | |
3282 | completely wrong. */ | |
3283 | continue; | |
3284 | ||
3285 | gcc_assert (bitmap_count_bits (i) == 1); | |
3286 | unsigned expr_index = bitmap_first_set_bit (i); | |
3287 | const vsetvl_info &info = *m_exprs[expr_index]; | |
3288 | gcc_assert (info.valid_p ()); | |
3289 | if (dump_file) | |
20c85207 | 3290 | { |
29331e72 LD |
3291 | fprintf (dump_file, |
3292 | "\n Insert vsetvl info at edge(bb %u -> bb %u): ", | |
3293 | eg->src->index, eg->dest->index); | |
3294 | info.dump (dump_file, " "); | |
20c85207 | 3295 | } |
29331e72 LD |
3296 | rtl_profile_for_edge (eg); |
3297 | start_sequence (); | |
3298 | ||
3299 | insert_vsetvl_insn (EMIT_DIRECT, info); | |
3300 | rtx_insn *rinsn = get_insns (); | |
3301 | end_sequence (); | |
3302 | default_rtl_profile (); | |
3303 | ||
3304 | /* We should not get an abnormal edge here. */ | |
3305 | gcc_assert (!(eg->flags & EDGE_ABNORMAL)); | |
3306 | need_commit = true; | |
3307 | insert_insn_on_edge (rinsn, eg); | |
20c85207 JZ |
3308 | } |
3309 | ||
29331e72 LD |
3310 | /* Insert vsetvl info that was not deleted after lift up. */ |
3311 | for (const bb_info *bb : crtl->ssa->bbs ()) | |
20c85207 | 3312 | { |
29331e72 LD |
3313 | const vsetvl_block_info &block_info = get_block_info (bb); |
3314 | if (!block_info.has_info ()) | |
3315 | continue; | |
3316 | ||
3317 | const vsetvl_info &footer_info = block_info.get_exit_info (); | |
3318 | ||
3319 | if (footer_info.delete_p ()) | |
3320 | continue; | |
3321 | ||
3322 | edge eg; | |
3323 | edge_iterator eg_iterator; | |
3324 | FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs) | |
20c85207 | 3325 | { |
29331e72 LD |
3326 | gcc_assert (!(eg->flags & EDGE_ABNORMAL)); |
3327 | if (dump_file) | |
3328 | { | |
3329 | fprintf ( | |
3330 | dump_file, | |
3331 | "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ", | |
3332 | eg->src->index, eg->dest->index); | |
3333 | footer_info.dump (dump_file, " "); | |
3334 | } | |
3335 | start_sequence (); | |
3336 | insert_vsetvl_insn (EMIT_DIRECT, footer_info); | |
3337 | rtx_insn *rinsn = get_insns (); | |
3338 | end_sequence (); | |
3339 | default_rtl_profile (); | |
3340 | insert_insn_on_edge (rinsn, eg); | |
3341 | need_commit = true; | |
20c85207 JZ |
3342 | } |
3343 | } | |
3344 | ||
29331e72 LD |
3345 | if (need_commit) |
3346 | commit_edge_insertions (); | |
20c85207 JZ |
3347 | } |
3348 | ||
9243c3d1 | 3349 | void |
29331e72 | 3350 | pre_vsetvl::cleaup () |
9243c3d1 | 3351 | { |
29331e72 LD |
3352 | remove_avl_operand (); |
3353 | remove_unused_dest_operand (); | |
3354 | } | |
9243c3d1 | 3355 | |
29331e72 LD |
3356 | void |
3357 | pre_vsetvl::remove_avl_operand () | |
3358 | { | |
3359 | basic_block cfg_bb; | |
3360 | rtx_insn *rinsn; | |
3361 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
3362 | FOR_BB_INSNS (cfg_bb, rinsn) | |
3363 | if (NONDEBUG_INSN_P (rinsn) && has_vl_op (rinsn) | |
3364 | && REG_P (get_vl (rinsn))) | |
3365 | { | |
9243c3d1 | 3366 | rtx avl = get_vl (rinsn); |
a2d12abe | 3367 | if (count_regno_occurrences (rinsn, REGNO (avl)) == 1) |
9243c3d1 | 3368 | { |
29331e72 | 3369 | rtx new_pat; |
60bd33bc | 3370 | if (fault_first_load_p (rinsn)) |
29331e72 LD |
3371 | new_pat |
3372 | = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx); | |
60bd33bc JZZ |
3373 | else |
3374 | { | |
3375 | rtx set = single_set (rinsn); | |
3376 | rtx src | |
3377 | = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx); | |
29331e72 LD |
3378 | new_pat = gen_rtx_SET (SET_DEST (set), src); |
3379 | } | |
3380 | if (dump_file) | |
3381 | { | |
3382 | fprintf (dump_file, " Cleanup insn %u's avl operand:\n", | |
3383 | INSN_UID (rinsn)); | |
3384 | print_rtl_single (dump_file, rinsn); | |
60bd33bc | 3385 | } |
29331e72 | 3386 | validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false); |
9243c3d1 JZZ |
3387 | } |
3388 | } | |
20c85207 JZ |
3389 | } |
3390 | ||
6b6b9c68 | 3391 | void |
29331e72 | 3392 | pre_vsetvl::remove_unused_dest_operand () |
20c85207 | 3393 | { |
6b6b9c68 | 3394 | df_analyze (); |
20c85207 JZ |
3395 | basic_block cfg_bb; |
3396 | rtx_insn *rinsn; | |
3397 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
29331e72 LD |
3398 | FOR_BB_INSNS (cfg_bb, rinsn) |
3399 | if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn)) | |
6b6b9c68 | 3400 | { |
29331e72 LD |
3401 | rtx vl = get_vl (rinsn); |
3402 | vsetvl_info info = vsetvl_info (rinsn); | |
3403 | if (has_no_uses (cfg_bb, rinsn, REGNO (vl))) | |
3404 | if (!info.has_vlmax_avl ()) | |
3405 | { | |
3406 | rtx new_pat = info.get_vsetvl_pat (true); | |
3407 | if (dump_file) | |
3408 | { | |
3409 | fprintf (dump_file, | |
3410 | " Remove vsetvl insn %u's dest(vl) operand since " | |
3411 | "it unused:\n", | |
3412 | INSN_UID (rinsn)); | |
3413 | print_rtl_single (dump_file, rinsn); | |
3414 | } | |
3415 | validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, | |
3416 | false); | |
3417 | } | |
6b6b9c68 | 3418 | } |
6b6b9c68 JZZ |
3419 | } |
3420 | ||
29331e72 LD |
3421 | const pass_data pass_data_vsetvl = { |
3422 | RTL_PASS, /* type */ | |
3423 | "vsetvl", /* name */ | |
3424 | OPTGROUP_NONE, /* optinfo_flags */ | |
3425 | TV_NONE, /* tv_id */ | |
3426 | 0, /* properties_required */ | |
3427 | 0, /* properties_provided */ | |
3428 | 0, /* properties_destroyed */ | |
3429 | 0, /* todo_flags_start */ | |
3430 | 0, /* todo_flags_finish */ | |
3431 | }; | |
9243c3d1 | 3432 | |
29331e72 LD |
3433 | class pass_vsetvl : public rtl_opt_pass |
3434 | { | |
3435 | private: | |
3436 | void simple_vsetvl (); | |
3437 | void lazy_vsetvl (); | |
9243c3d1 | 3438 | |
29331e72 LD |
3439 | public: |
3440 | pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {} | |
9243c3d1 | 3441 | |
29331e72 LD |
3442 | /* opt_pass methods: */ |
3443 | virtual bool gate (function *) final override { return TARGET_VECTOR; } | |
3444 | virtual unsigned int execute (function *) final override; | |
3445 | }; // class pass_vsetvl | |
9243c3d1 | 3446 | |
acc10c79 | 3447 | void |
29331e72 | 3448 | pass_vsetvl::simple_vsetvl () |
acc10c79 | 3449 | { |
29331e72 LD |
3450 | if (dump_file) |
3451 | fprintf (dump_file, "\nEntering Simple VSETVL PASS\n"); | |
acc10c79 | 3452 | |
29331e72 LD |
3453 | basic_block cfg_bb; |
3454 | rtx_insn *rinsn; | |
3455 | FOR_ALL_BB_FN (cfg_bb, cfun) | |
acc10c79 | 3456 | { |
29331e72 | 3457 | FOR_BB_INSNS (cfg_bb, rinsn) |
acc10c79 | 3458 | { |
29331e72 | 3459 | if (!NONDEBUG_INSN_P (rinsn)) |
acc10c79 | 3460 | continue; |
29331e72 LD |
3461 | if (has_vtype_op (rinsn)) |
3462 | { | |
3463 | const auto &info = vsetvl_info (rinsn); | |
3464 | rtx pat = info.get_vsetvl_pat (); | |
3465 | emit_insn_before (pat, rinsn); | |
3466 | if (dump_file) | |
3467 | { | |
3468 | fprintf (dump_file, " Insert vsetvl insn before insn %d:\n", | |
3469 | INSN_UID (rinsn)); | |
3470 | print_rtl_single (dump_file, PREV_INSN (rinsn)); | |
3471 | } | |
3472 | } | |
acc10c79 JZZ |
3473 | } |
3474 | } | |
acc10c79 JZZ |
3475 | } |
3476 | ||
9243c3d1 JZZ |
3477 | /* Lazy vsetvl insertion for optimize > 0. */ |
3478 | void | |
29331e72 | 3479 | pass_vsetvl::lazy_vsetvl () |
9243c3d1 JZZ |
3480 | { |
3481 | if (dump_file) | |
29331e72 LD |
3482 | fprintf (dump_file, "\nEntering Lazy VSETVL PASS\n\n"); |
3483 | ||
3484 | pre_vsetvl pre = pre_vsetvl (); | |
9243c3d1 | 3485 | |
9243c3d1 | 3486 | if (dump_file) |
29331e72 LD |
3487 | fprintf (dump_file, "\nPhase 1: Fuse local vsetvl infos.\n\n"); |
3488 | pre.fuse_local_vsetvl_info (); | |
0d50facd | 3489 | if (dump_file && (dump_flags & TDF_DETAILS)) |
29331e72 | 3490 | pre.dump (dump_file, "phase 1"); |
9243c3d1 | 3491 | |
29331e72 | 3492 | /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */ |
9243c3d1 | 3493 | if (dump_file) |
29331e72 LD |
3494 | fprintf (dump_file, "\nPhase 2: Lift up vsetvl info.\n\n"); |
3495 | bool changed; | |
3496 | int fused_count = 0; | |
3497 | do | |
3498 | { | |
3499 | if (dump_file) | |
3500 | fprintf (dump_file, " Try lift up %d.\n\n", fused_count); | |
3501 | changed = pre.earliest_fuse_vsetvl_info (); | |
3502 | fused_count += 1; | |
3503 | } while (changed); | |
3504 | ||
0d50facd | 3505 | if (dump_file && (dump_flags & TDF_DETAILS)) |
29331e72 | 3506 | pre.dump (dump_file, "phase 2"); |
9243c3d1 | 3507 | |
29331e72 | 3508 | /* Phase 3: Reducing redundant vsetvl infos using LCM. */ |
9243c3d1 | 3509 | if (dump_file) |
29331e72 LD |
3510 | fprintf (dump_file, "\nPhase 3: Reduce global vsetvl infos.\n\n"); |
3511 | pre.pre_global_vsetvl_info (); | |
3512 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
3513 | pre.dump (dump_file, "phase 3"); | |
9243c3d1 | 3514 | |
29331e72 | 3515 | /* Phase 4: Insert, modify and remove vsetvl insns. */ |
9243c3d1 | 3516 | if (dump_file) |
29331e72 LD |
3517 | fprintf (dump_file, |
3518 | "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n"); | |
3519 | pre.emit_vsetvl (); | |
9243c3d1 | 3520 | |
29331e72 | 3521 | /* Phase 5: Cleaup */ |
9243c3d1 | 3522 | if (dump_file) |
29331e72 LD |
3523 | fprintf (dump_file, "\nPhase 5: Cleaup\n\n"); |
3524 | pre.cleaup (); | |
6b6b9c68 | 3525 | |
29331e72 | 3526 | pre.finish (); |
9243c3d1 JZZ |
3527 | } |
3528 | ||
3529 | /* Main entry point for this pass. */ | |
3530 | unsigned int | |
3531 | pass_vsetvl::execute (function *) | |
3532 | { | |
3533 | if (n_basic_blocks_for_fn (cfun) <= 0) | |
3534 | return 0; | |
3535 | ||
ca8fb009 JZZ |
3536 | /* The RVV instruction may change after split which is not a stable |
3537 | instruction. We need to split it here to avoid potential issue | |
3538 | since the VSETVL PASS is insert before split PASS. */ | |
3539 | split_all_insns (); | |
9243c3d1 JZZ |
3540 | |
3541 | /* Early return for there is no vector instructions. */ | |
3542 | if (!has_vector_insn (cfun)) | |
3543 | return 0; | |
3544 | ||
9243c3d1 JZZ |
3545 | if (!optimize) |
3546 | simple_vsetvl (); | |
3547 | else | |
3548 | lazy_vsetvl (); | |
3549 | ||
9243c3d1 JZZ |
3550 | return 0; |
3551 | } | |
3552 | ||
3553 | rtl_opt_pass * | |
3554 | make_pass_vsetvl (gcc::context *ctxt) | |
3555 | { | |
3556 | return new pass_vsetvl (ctxt); | |
3557 | } |