]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/riscv/riscv-vsetvl.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / config / riscv / riscv-vsetvl.cc
1 /* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
2 Copyright (C) 2022-2024 Free Software Foundation, Inc.
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or(at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* The values of the vl and vtype registers will affect the behavior of RVV
22 insns. That is, when we need to execute an RVV instruction, we need to set
23 the correct vl and vtype values by executing the vsetvl instruction before.
24 Executing the fewest number of vsetvl instructions while keeping the behavior
25 the same is the problem this pass is trying to solve. This vsetvl pass is
26 divided into 5 phases:
27
28 - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses
29 each instruction in it that affects vl and vtype state and generates an
30 array of vsetvl_info objects. Then traverse the vsetvl_info array from
31 front to back and perform fusion according to the fusion rules. The fused
32 vsetvl infos are stored in the vsetvl_block_info object's `infos` field.
33
34 - Phase 2 (earliest fuse global vsetvl infos): The header_info and
35 footer_info of vsetvl_block_info are used as expressions, and the
36 earliest of each expression is computed. Based on the earliest
37 information, try to lift up the corresponding vsetvl info to the src
38 basic block of the edge (mainly to reduce the total number of vsetvl
39 instructions, this uplift will cause some execution paths to execute
40 vsetvl instructions that shouldn't be there).
41
42 - Phase 3 (pre global vsetvl info): The header_info and footer_info of
43 vsetvl_block_info are used as expressions, and the LCM algorithm is used
44 to compute the header_info that needs to be deleted and the one that
45 needs to be inserted in some edges.
46
47 - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and
48 the deletion and insertion information of Phase 3, the mandatory vsetvl
49 instruction insertion, modification and deletion are performed.
50
51 - Phase 5 (cleanup): Clean up the avl operand in the RVV operator
52 instruction and cleanup the unused dest operand of the vsetvl insn.
53
54 After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual
55 basic block is represented by vsetvl_block_info, and the virtual vsetvl
56 statements inside are represented by vsetvl_info. The later phases 2 and 3
57 are constantly modifying and adjusting this virtual CFG. Phase 4 performs
58 insertion, modification and deletion of vsetvl instructions based on the
59 optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to
60 the RTL.
61 */
62
63 #define IN_TARGET_CODE 1
64 #define INCLUDE_ALGORITHM
65 #define INCLUDE_FUNCTIONAL
66
67 #include "config.h"
68 #include "system.h"
69 #include "coretypes.h"
70 #include "tm.h"
71 #include "backend.h"
72 #include "rtl.h"
73 #include "target.h"
74 #include "tree-pass.h"
75 #include "df.h"
76 #include "rtl-ssa.h"
77 #include "cfgcleanup.h"
78 #include "insn-config.h"
79 #include "insn-attr.h"
80 #include "insn-opinit.h"
81 #include "tm-constrs.h"
82 #include "cfgrtl.h"
83 #include "cfganal.h"
84 #include "lcm.h"
85 #include "predict.h"
86 #include "profile-count.h"
87 #include "gcse.h"
88
89 using namespace rtl_ssa;
90 using namespace riscv_vector;
91
92 /* Set the bitmap DST to the union of SRC of predecessors of
93 basic block B.
94 It's a bit different from bitmap_union_of_preds in cfganal.cc. This function
95 takes into account the case where pred is ENTRY basic block. The main reason
96 for this difference is to make it easier to insert some special value into
97 the ENTRY base block. For example, vsetvl_info with a status of UNKNOW. */
98 static void
99 bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b)
100 {
101 unsigned int set_size = dst->size;
102 edge e;
103 unsigned ix;
104
105 for (ix = 0; ix < EDGE_COUNT (b->preds); ix++)
106 {
107 e = EDGE_PRED (b, ix);
108 bitmap_copy (dst, src[e->src->index]);
109 break;
110 }
111
112 if (ix == EDGE_COUNT (b->preds))
113 bitmap_clear (dst);
114 else
115 for (ix++; ix < EDGE_COUNT (b->preds); ix++)
116 {
117 unsigned int i;
118 SBITMAP_ELT_TYPE *p, *r;
119
120 e = EDGE_PRED (b, ix);
121 p = src[e->src->index]->elms;
122 r = dst->elms;
123 for (i = 0; i < set_size; i++)
124 *r++ |= *p++;
125 }
126 }
127
128 /* Compute the reaching defintion in and out based on the gen and KILL
129 informations in each Base Blocks.
130 This function references the compute_avaiable implementation in lcm.cc */
131 static void
132 compute_reaching_defintion (sbitmap *gen, sbitmap *kill, sbitmap *in,
133 sbitmap *out)
134 {
135 edge e;
136 basic_block *worklist, *qin, *qout, *qend, bb;
137 unsigned int qlen;
138 edge_iterator ei;
139
140 /* Allocate a worklist array/queue. Entries are only added to the
141 list if they were not already on the list. So the size is
142 bounded by the number of basic blocks. */
143 qin = qout = worklist
144 = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
145
146 /* Put every block on the worklist; this is necessary because of the
147 optimistic initialization of AVOUT above. Use reverse postorder
148 to make the forward dataflow problem require less iterations. */
149 int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
150 int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
151 for (int i = 0; i < n; ++i)
152 {
153 bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]);
154 *qin++ = bb;
155 bb->aux = bb;
156 }
157 free (rpo);
158
159 qin = worklist;
160 qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS];
161 qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
162
163 /* Mark blocks which are successors of the entry block so that we
164 can easily identify them below. */
165 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
166 e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun);
167
168 /* Iterate until the worklist is empty. */
169 while (qlen)
170 {
171 /* Take the first entry off the worklist. */
172 bb = *qout++;
173 qlen--;
174
175 if (qout >= qend)
176 qout = worklist;
177
178 /* Do not clear the aux field for blocks which are successors of the
179 ENTRY block. That way we never add then to the worklist again. */
180 if (bb->aux != ENTRY_BLOCK_PTR_FOR_FN (cfun))
181 bb->aux = NULL;
182
183 bitmap_union_of_preds_with_entry (in[bb->index], out, bb);
184
185 if (bitmap_ior_and_compl (out[bb->index], gen[bb->index], in[bb->index],
186 kill[bb->index]))
187 /* If the out state of this block changed, then we need
188 to add the successors of this block to the worklist
189 if they are not already on the worklist. */
190 FOR_EACH_EDGE (e, ei, bb->succs)
191 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
192 {
193 *qin++ = e->dest;
194 e->dest->aux = e;
195 qlen++;
196
197 if (qin >= qend)
198 qin = worklist;
199 }
200 }
201
202 clear_aux_for_edges ();
203 clear_aux_for_blocks ();
204 free (worklist);
205 }
206
207 /* Classification of vsetvl instruction. */
208 enum vsetvl_type
209 {
210 VSETVL_NORMAL,
211 VSETVL_VTYPE_CHANGE_ONLY,
212 VSETVL_DISCARD_RESULT,
213 NUM_VSETVL_TYPE
214 };
215
216 enum emit_type
217 {
218 /* emit_insn directly. */
219 EMIT_DIRECT,
220 EMIT_BEFORE,
221 EMIT_AFTER,
222 };
223
224 /* dump helper functions */
225 static const char *
226 vlmul_to_str (vlmul_type vlmul)
227 {
228 switch (vlmul)
229 {
230 case LMUL_1:
231 return "m1";
232 case LMUL_2:
233 return "m2";
234 case LMUL_4:
235 return "m4";
236 case LMUL_8:
237 return "m8";
238 case LMUL_RESERVED:
239 return "INVALID LMUL";
240 case LMUL_F8:
241 return "mf8";
242 case LMUL_F4:
243 return "mf4";
244 case LMUL_F2:
245 return "mf2";
246
247 default:
248 gcc_unreachable ();
249 }
250 }
251
252 static const char *
253 policy_to_str (bool agnostic_p)
254 {
255 return agnostic_p ? "agnostic" : "undisturbed";
256 }
257
258 /* Return true if it is an RVV instruction depends on VTYPE global
259 status register. */
260 static bool
261 has_vtype_op (rtx_insn *rinsn)
262 {
263 return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
264 }
265
266 /* Return true if the instruction ignores VLMUL field of VTYPE. */
267 static bool
268 ignore_vlmul_insn_p (rtx_insn *rinsn)
269 {
270 return get_attr_type (rinsn) == TYPE_VIMOVVX
271 || get_attr_type (rinsn) == TYPE_VFMOVVF
272 || get_attr_type (rinsn) == TYPE_VIMOVXV
273 || get_attr_type (rinsn) == TYPE_VFMOVFV;
274 }
275
276 /* Return true if the instruction is scalar move instruction. */
277 static bool
278 scalar_move_insn_p (rtx_insn *rinsn)
279 {
280 return get_attr_type (rinsn) == TYPE_VIMOVXV
281 || get_attr_type (rinsn) == TYPE_VFMOVFV;
282 }
283
284 /* Return true if the instruction is fault first load instruction. */
285 static bool
286 fault_first_load_p (rtx_insn *rinsn)
287 {
288 return recog_memoized (rinsn) >= 0
289 && (get_attr_type (rinsn) == TYPE_VLDFF
290 || get_attr_type (rinsn) == TYPE_VLSEGDFF);
291 }
292
293 /* Return true if the instruction is read vl instruction. */
294 static bool
295 read_vl_insn_p (rtx_insn *rinsn)
296 {
297 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL;
298 }
299
300 /* Return true if it is a vsetvl instruction. */
301 static bool
302 vector_config_insn_p (rtx_insn *rinsn)
303 {
304 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
305 }
306
307 /* Return true if it is vsetvldi or vsetvlsi. */
308 static bool
309 vsetvl_insn_p (rtx_insn *rinsn)
310 {
311 if (!rinsn || !vector_config_insn_p (rinsn))
312 return false;
313 return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
314 || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
315 }
316
317 /* Return true if it is vsetvl zero, rs1. */
318 static bool
319 vsetvl_discard_result_insn_p (rtx_insn *rinsn)
320 {
321 if (!vector_config_insn_p (rinsn))
322 return false;
323 return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi
324 || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
325 }
326
327 static bool
328 real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
329 {
330 return insn != nullptr && insn->is_real () && insn->bb () == bb;
331 }
332
333 /* Helper function to get VL operand for VLMAX insn. */
334 static rtx
335 get_vl (rtx_insn *rinsn)
336 {
337 if (has_vl_op (rinsn))
338 {
339 extract_insn_cached (rinsn);
340 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
341 }
342 return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
343 }
344
345 /* Helper function to get AVL operand. */
346 static rtx
347 get_avl (rtx_insn *rinsn)
348 {
349 if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
350 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
351
352 if (!has_vl_op (rinsn))
353 return NULL_RTX;
354 if (vlmax_avl_type_p (rinsn))
355 return RVV_VLMAX;
356 extract_insn_cached (rinsn);
357 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
358 }
359
360 /* Get default mask policy. */
361 static bool
362 get_default_ma ()
363 {
364 /* For the instruction that doesn't require MA, we still need a default value
365 to emit vsetvl. We pick up the default value according to prefer policy. */
366 return (bool) (get_prefer_mask_policy () & 0x1
367 || (get_prefer_mask_policy () >> 1 & 0x1));
368 }
369
370 /* Helper function to get MA operand. */
371 static bool
372 mask_agnostic_p (rtx_insn *rinsn)
373 {
374 /* If it doesn't have MA, we return agnostic by default. */
375 extract_insn_cached (rinsn);
376 int ma = get_attr_ma (rinsn);
377 return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
378 }
379
380 /* Return true if FN has a vector instruction that use VL/VTYPE. */
381 static bool
382 has_vector_insn (function *fn)
383 {
384 basic_block cfg_bb;
385 rtx_insn *rinsn;
386 FOR_ALL_BB_FN (cfg_bb, fn)
387 FOR_BB_INSNS (cfg_bb, rinsn)
388 if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
389 return true;
390 return false;
391 }
392
393 static vlmul_type
394 calculate_vlmul (unsigned int sew, unsigned int ratio)
395 {
396 const vlmul_type ALL_LMUL[]
397 = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
398 for (const vlmul_type vlmul : ALL_LMUL)
399 if (calculate_ratio (sew, vlmul) == ratio)
400 return vlmul;
401 return LMUL_RESERVED;
402 }
403
404 /* Get the currently supported maximum sew used in the int rvv instructions. */
405 static uint8_t
406 get_max_int_sew ()
407 {
408 if (TARGET_VECTOR_ELEN_64)
409 return 64;
410 else if (TARGET_VECTOR_ELEN_32)
411 return 32;
412 gcc_unreachable ();
413 }
414
415 /* Get the currently supported maximum sew used in the float rvv instructions.
416 */
417 static uint8_t
418 get_max_float_sew ()
419 {
420 if (TARGET_VECTOR_ELEN_FP_64)
421 return 64;
422 else if (TARGET_VECTOR_ELEN_FP_32)
423 return 32;
424 else if (TARGET_VECTOR_ELEN_FP_16)
425 return 16;
426 gcc_unreachable ();
427 }
428
429 enum def_type
430 {
431 REAL_SET = 1 << 0,
432 PHI_SET = 1 << 1,
433 BB_HEAD_SET = 1 << 2,
434 BB_END_SET = 1 << 3,
435 /* ??? TODO: In RTL_SSA framework, we have REAL_SET,
436 PHI_SET, BB_HEAD_SET, BB_END_SET and
437 CLOBBER_DEF def_info types. Currently,
438 we conservatively do not optimize clobber
439 def since we don't see the case that we
440 need to optimize it. */
441 CLOBBER_DEF = 1 << 4
442 };
443
444 static bool
445 insn_should_be_added_p (const insn_info *insn, unsigned int types)
446 {
447 if (insn->is_real () && (types & REAL_SET))
448 return true;
449 if (insn->is_phi () && (types & PHI_SET))
450 return true;
451 if (insn->is_bb_head () && (types & BB_HEAD_SET))
452 return true;
453 if (insn->is_bb_end () && (types & BB_END_SET))
454 return true;
455 return false;
456 }
457
458 static const hash_set<use_info *>
459 get_all_real_uses (insn_info *insn, unsigned regno)
460 {
461 gcc_assert (insn->is_real ());
462
463 hash_set<use_info *> uses;
464 auto_vec<phi_info *> work_list;
465 hash_set<phi_info *> visited_list;
466
467 for (def_info *def : insn->defs ())
468 {
469 if (!def->is_reg () || def->regno () != regno)
470 continue;
471 set_info *set = safe_dyn_cast<set_info *> (def);
472 if (!set)
473 continue;
474 for (use_info *use : set->nondebug_insn_uses ())
475 if (use->insn ()->is_real ())
476 uses.add (use);
477 for (use_info *use : set->phi_uses ())
478 work_list.safe_push (use->phi ());
479 }
480
481 while (!work_list.is_empty ())
482 {
483 phi_info *phi = work_list.pop ();
484 visited_list.add (phi);
485
486 for (use_info *use : phi->nondebug_insn_uses ())
487 if (use->insn ()->is_real ())
488 uses.add (use);
489 for (use_info *use : phi->phi_uses ())
490 if (!visited_list.contains (use->phi ()))
491 work_list.safe_push (use->phi ());
492 }
493 return uses;
494 }
495
496 /* Recursively find all define instructions. The kind of instruction is
497 specified by the DEF_TYPE. */
498 static hash_set<set_info *>
499 get_all_sets (phi_info *phi, unsigned int types)
500 {
501 hash_set<set_info *> insns;
502 auto_vec<phi_info *> work_list;
503 hash_set<phi_info *> visited_list;
504 if (!phi)
505 return hash_set<set_info *> ();
506 work_list.safe_push (phi);
507
508 while (!work_list.is_empty ())
509 {
510 phi_info *phi = work_list.pop ();
511 visited_list.add (phi);
512 for (use_info *use : phi->inputs ())
513 {
514 def_info *def = use->def ();
515 set_info *set = safe_dyn_cast<set_info *> (def);
516 if (!set)
517 return hash_set<set_info *> ();
518
519 gcc_assert (!set->insn ()->is_debug_insn ());
520
521 if (insn_should_be_added_p (set->insn (), types))
522 insns.add (set);
523 if (set->insn ()->is_phi ())
524 {
525 phi_info *new_phi = as_a<phi_info *> (set);
526 if (!visited_list.contains (new_phi))
527 work_list.safe_push (new_phi);
528 }
529 }
530 }
531 return insns;
532 }
533
534 static hash_set<set_info *>
535 get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
536 bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
537 {
538 if (real_p && phi_p && param_p)
539 return get_all_sets (safe_dyn_cast<phi_info *> (set),
540 REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
541
542 else if (real_p && param_p)
543 return get_all_sets (safe_dyn_cast<phi_info *> (set),
544 REAL_SET | BB_HEAD_SET | BB_END_SET);
545
546 else if (real_p)
547 return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
548 return hash_set<set_info *> ();
549 }
550
551 static bool
552 source_equal_p (insn_info *insn1, insn_info *insn2)
553 {
554 if (!insn1 || !insn2)
555 return false;
556 rtx_insn *rinsn1 = insn1->rtl ();
557 rtx_insn *rinsn2 = insn2->rtl ();
558 if (!rinsn1 || !rinsn2)
559 return false;
560
561 rtx note1 = find_reg_equal_equiv_note (rinsn1);
562 rtx note2 = find_reg_equal_equiv_note (rinsn2);
563 /* We could handle the case of similar-looking REG_EQUALs as well but
564 would need to verify that no insn in between modifies any of the source
565 operands. */
566 if (note1 && note2 && rtx_equal_p (note1, note2)
567 && REG_NOTE_KIND (note1) == REG_EQUIV)
568 return true;
569 return false;
570 }
571
572 static insn_info *
573 extract_single_source (set_info *set)
574 {
575 if (!set)
576 return nullptr;
577 if (set->insn ()->is_real ())
578 return set->insn ();
579 if (!set->insn ()->is_phi ())
580 return nullptr;
581 hash_set<set_info *> sets = get_all_sets (set, true, false, true);
582 if (sets.is_empty ())
583 return nullptr;
584
585 insn_info *first_insn = (*sets.begin ())->insn ();
586 if (first_insn->is_artificial ())
587 return nullptr;
588 for (const set_info *set : sets)
589 {
590 /* If there is a head or end insn, we conservative return
591 NULL so that VSETVL PASS will insert vsetvl directly. */
592 if (set->insn ()->is_artificial ())
593 return nullptr;
594 if (set != *sets.begin () && !source_equal_p (set->insn (), first_insn))
595 return nullptr;
596 }
597
598 return first_insn;
599 }
600
601 static insn_info *
602 extract_single_source (def_info *def)
603 {
604 if (!def)
605 return nullptr;
606 return extract_single_source (dyn_cast<set_info *> (def));
607 }
608
609 static bool
610 same_equiv_note_p (set_info *set1, set_info *set2)
611 {
612 insn_info *insn1 = extract_single_source (set1);
613 insn_info *insn2 = extract_single_source (set2);
614 if (!insn1 || !insn2)
615 return false;
616 return source_equal_p (insn1, insn2);
617 }
618
619 static unsigned
620 get_expr_id (unsigned bb_index, unsigned regno, unsigned num_bbs)
621 {
622 return regno * num_bbs + bb_index;
623 }
624 static unsigned
625 get_regno (unsigned expr_id, unsigned num_bb)
626 {
627 return expr_id / num_bb;
628 }
629 static unsigned
630 get_bb_index (unsigned expr_id, unsigned num_bb)
631 {
632 return expr_id % num_bb;
633 }
634
635 /* Return true if the SET result is not used by any instructions. */
636 static bool
637 has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno)
638 {
639 if (bitmap_bit_p (df_get_live_out (cfg_bb), regno))
640 return false;
641
642 rtx_insn *iter;
643 for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb));
644 iter = NEXT_INSN (iter))
645 if (df_find_use (iter, regno_reg_rtx[regno]))
646 return false;
647
648 return true;
649 }
650
651 /* This flags indicates the minimum demand of the vl and vtype values by the
652 RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV
653 instruction only needs the SEW/LMUL ratio to remain the same, and does not
654 require SEW and LMUL to be fixed.
655 Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter
656 instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of
657 the former instruction, then we can make the minimu demand of the former
658 instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are
659 the SEW and LMUL of the latter instruction, and the vsetvl instruction
660 generated according to the new demand can also be used for the latter
661 instruction, so there is no need to insert a separate vsetvl instruction for
662 the latter instruction. */
663 enum demand_flags : unsigned
664 {
665 DEMAND_EMPTY_P = 0,
666 DEMAND_SEW_P = 1 << 0,
667 DEMAND_LMUL_P = 1 << 1,
668 DEMAND_RATIO_P = 1 << 2,
669 DEMAND_GE_SEW_P = 1 << 3,
670 DEMAND_TAIL_POLICY_P = 1 << 4,
671 DEMAND_MASK_POLICY_P = 1 << 5,
672 DEMAND_AVL_P = 1 << 6,
673 DEMAND_NON_ZERO_AVL_P = 1 << 7,
674 };
675
676 /* We split the demand information into three parts. They are sew and lmul
677 related (sew_lmul_demand_type), tail and mask policy related
678 (policy_demand_type) and avl related (avl_demand_type). Then we define three
679 interfaces avaiable_with, compatible_p and merge. avaiable_with is
680 used to determine whether the two vsetvl infos prev_info and next_info are
681 available or not. If prev_info is available for next_info, it means that the
682 RVV insn corresponding to next_info on the path from prev_info to next_info
683 can be used without inserting a separate vsetvl instruction. compatible_p
684 is used to determine whether prev_info is compatible with next_info, and if
685 so, merge can be used to merge the stricter demand information from
686 next_info into prev_info so that prev_info becomes available to next_info.
687 */
688
689 enum class sew_lmul_demand_type : unsigned
690 {
691 sew_lmul = demand_flags::DEMAND_SEW_P | demand_flags::DEMAND_LMUL_P,
692 ratio_only = demand_flags::DEMAND_RATIO_P,
693 sew_only = demand_flags::DEMAND_SEW_P,
694 ge_sew = demand_flags::DEMAND_GE_SEW_P,
695 ratio_and_ge_sew
696 = demand_flags::DEMAND_RATIO_P | demand_flags::DEMAND_GE_SEW_P,
697 };
698
699 enum class policy_demand_type : unsigned
700 {
701 tail_mask_policy
702 = demand_flags::DEMAND_TAIL_POLICY_P | demand_flags::DEMAND_MASK_POLICY_P,
703 tail_policy_only = demand_flags::DEMAND_TAIL_POLICY_P,
704 mask_policy_only = demand_flags::DEMAND_MASK_POLICY_P,
705 ignore_policy = demand_flags::DEMAND_EMPTY_P,
706 };
707
708 enum class avl_demand_type : unsigned
709 {
710 avl = demand_flags::DEMAND_AVL_P,
711 non_zero_avl = demand_flags::DEMAND_NON_ZERO_AVL_P,
712 ignore_avl = demand_flags::DEMAND_EMPTY_P,
713 };
714
715 class vsetvl_info
716 {
717 private:
718 insn_info *m_insn;
719 bb_info *m_bb;
720 rtx m_avl;
721 rtx m_vl;
722 set_info *m_avl_def;
723 uint8_t m_sew;
724 uint8_t m_max_sew;
725 vlmul_type m_vlmul;
726 uint8_t m_ratio;
727 bool m_ta;
728 bool m_ma;
729
730 sew_lmul_demand_type m_sew_lmul_demand;
731 policy_demand_type m_policy_demand;
732 avl_demand_type m_avl_demand;
733
734 enum class state_type
735 {
736 UNINITIALIZED,
737 VALID,
738 UNKNOWN,
739 EMPTY,
740 };
741 state_type m_state;
742
743 bool m_delete;
744 bool m_change_vtype_only;
745 insn_info *m_read_vl_insn;
746 bool m_vl_used_by_non_rvv_insn;
747
748 public:
749 vsetvl_info ()
750 : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX), m_vl (NULL_RTX),
751 m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED),
752 m_ratio (0), m_ta (false), m_ma (false),
753 m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul),
754 m_policy_demand (policy_demand_type::tail_mask_policy),
755 m_avl_demand (avl_demand_type::avl), m_state (state_type::UNINITIALIZED),
756 m_delete (false), m_change_vtype_only (false), m_read_vl_insn (nullptr),
757 m_vl_used_by_non_rvv_insn (false)
758 {}
759
760 vsetvl_info (insn_info *insn) : vsetvl_info () { parse_insn (insn); }
761
762 vsetvl_info (rtx_insn *insn) : vsetvl_info () { parse_insn (insn); }
763
764 void set_avl (rtx avl) { m_avl = avl; }
765 void set_vl (rtx vl) { m_vl = vl; }
766 void set_avl_def (set_info *avl_def) { m_avl_def = avl_def; }
767 void set_sew (uint8_t sew) { m_sew = sew; }
768 void set_vlmul (vlmul_type vlmul) { m_vlmul = vlmul; }
769 void set_ratio (uint8_t ratio) { m_ratio = ratio; }
770 void set_ta (bool ta) { m_ta = ta; }
771 void set_ma (bool ma) { m_ma = ma; }
772 void set_delete () { m_delete = true; }
773 void set_bb (bb_info *bb) { m_bb = bb; }
774 void set_max_sew (uint8_t max_sew) { m_max_sew = max_sew; }
775 void set_change_vtype_only () { m_change_vtype_only = true; }
776 void set_read_vl_insn (insn_info *insn) { m_read_vl_insn = insn; }
777
778 rtx get_avl () const { return m_avl; }
779 rtx get_vl () const { return m_vl; }
780 set_info *get_avl_def () const { return m_avl_def; }
781 uint8_t get_sew () const { return m_sew; }
782 vlmul_type get_vlmul () const { return m_vlmul; }
783 uint8_t get_ratio () const { return m_ratio; }
784 bool get_ta () const { return m_ta; }
785 bool get_ma () const { return m_ma; }
786 insn_info *get_insn () const { return m_insn; }
787 bool delete_p () const { return m_delete; }
788 bb_info *get_bb () const { return m_bb; }
789 uint8_t get_max_sew () const { return m_max_sew; }
790 insn_info *get_read_vl_insn () const { return m_read_vl_insn; }
791 bool vl_used_by_non_rvv_insn_p () const { return m_vl_used_by_non_rvv_insn; }
792
793 bool has_imm_avl () const { return m_avl && CONST_INT_P (m_avl); }
794 bool has_vlmax_avl () const { return vlmax_avl_p (m_avl); }
795 bool has_nonvlmax_reg_avl () const
796 {
797 return m_avl && REG_P (m_avl) && !has_vlmax_avl ();
798 }
799 bool has_non_zero_avl () const
800 {
801 if (has_imm_avl ())
802 return INTVAL (m_avl) > 0;
803 return has_vlmax_avl ();
804 }
805 bool has_vl () const
806 {
807 /* The VL operand can only be either a NULL_RTX or a register. */
808 gcc_assert (!m_vl || REG_P (m_vl));
809 return m_vl != NULL_RTX;
810 }
811 bool has_same_ratio (const vsetvl_info &other) const
812 {
813 return get_ratio () == other.get_ratio ();
814 }
815
816 /* The block of INSN isn't always same as the block of the VSETVL_INFO,
817 meaning we may have 'get_insn ()->bb () != get_bb ()'.
818
819 E.g. BB 2 (Empty) ---> BB 3 (VALID, has rvv insn 1)
820
821 BB 2 has empty VSETVL_INFO, wheras BB 3 has VSETVL_INFO that satisfies
822 get_insn ()->bb () == get_bb (). In earliest fusion, we may fuse bb 3 and
823 bb 2 so that the 'get_bb ()' of BB2 VSETVL_INFO will be BB2 wheras the
824 'get_insn ()' of BB2 VSETVL INFO will be the rvv insn 1 (which is located
825 at BB3). */
826 bool insn_inside_bb_p () const { return get_insn ()->bb () == get_bb (); }
827 void update_avl (const vsetvl_info &other)
828 {
829 m_avl = other.get_avl ();
830 m_vl = other.get_vl ();
831 m_avl_def = other.get_avl_def ();
832 }
833
834 bool uninit_p () const { return m_state == state_type::UNINITIALIZED; }
835 bool valid_p () const { return m_state == state_type::VALID; }
836 bool unknown_p () const { return m_state == state_type::UNKNOWN; }
837 bool empty_p () const { return m_state == state_type::EMPTY; }
838 bool change_vtype_only_p () const { return m_change_vtype_only; }
839
840 void set_valid () { m_state = state_type::VALID; }
841 void set_unknown () { m_state = state_type::UNKNOWN; }
842 void set_empty () { m_state = state_type::EMPTY; }
843
844 void set_sew_lmul_demand (sew_lmul_demand_type demand)
845 {
846 m_sew_lmul_demand = demand;
847 }
848 void set_policy_demand (policy_demand_type demand)
849 {
850 m_policy_demand = demand;
851 }
852 void set_avl_demand (avl_demand_type demand) { m_avl_demand = demand; }
853
854 sew_lmul_demand_type get_sew_lmul_demand () const
855 {
856 return m_sew_lmul_demand;
857 }
858 policy_demand_type get_policy_demand () const { return m_policy_demand; }
859 avl_demand_type get_avl_demand () const { return m_avl_demand; }
860
861 void normalize_demand (unsigned demand_flags)
862 {
863 switch (demand_flags
864 & (DEMAND_SEW_P | DEMAND_LMUL_P | DEMAND_RATIO_P | DEMAND_GE_SEW_P))
865 {
866 case (unsigned) sew_lmul_demand_type::sew_lmul:
867 m_sew_lmul_demand = sew_lmul_demand_type::sew_lmul;
868 break;
869 case (unsigned) sew_lmul_demand_type::ratio_only:
870 m_sew_lmul_demand = sew_lmul_demand_type::ratio_only;
871 break;
872 case (unsigned) sew_lmul_demand_type::sew_only:
873 m_sew_lmul_demand = sew_lmul_demand_type::sew_only;
874 break;
875 case (unsigned) sew_lmul_demand_type::ge_sew:
876 m_sew_lmul_demand = sew_lmul_demand_type::ge_sew;
877 break;
878 case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew:
879 m_sew_lmul_demand = sew_lmul_demand_type::ratio_and_ge_sew;
880 break;
881 default:
882 gcc_unreachable ();
883 }
884
885 switch (demand_flags & (DEMAND_TAIL_POLICY_P | DEMAND_MASK_POLICY_P))
886 {
887 case (unsigned) policy_demand_type::tail_mask_policy:
888 m_policy_demand = policy_demand_type::tail_mask_policy;
889 break;
890 case (unsigned) policy_demand_type::tail_policy_only:
891 m_policy_demand = policy_demand_type::tail_policy_only;
892 break;
893 case (unsigned) policy_demand_type::mask_policy_only:
894 m_policy_demand = policy_demand_type::mask_policy_only;
895 break;
896 case (unsigned) policy_demand_type::ignore_policy:
897 m_policy_demand = policy_demand_type::ignore_policy;
898 break;
899 default:
900 gcc_unreachable ();
901 }
902
903 switch (demand_flags & (DEMAND_AVL_P | DEMAND_NON_ZERO_AVL_P))
904 {
905 case (unsigned) avl_demand_type::avl:
906 m_avl_demand = avl_demand_type::avl;
907 break;
908 case (unsigned) avl_demand_type::non_zero_avl:
909 m_avl_demand = avl_demand_type::non_zero_avl;
910 break;
911 case (unsigned) avl_demand_type::ignore_avl:
912 m_avl_demand = avl_demand_type::ignore_avl;
913 break;
914 default:
915 gcc_unreachable ();
916 }
917 }
918
919 void parse_insn (rtx_insn *rinsn)
920 {
921 if (!NONDEBUG_INSN_P (rinsn))
922 return;
923 if (optimize == 0 && !has_vtype_op (rinsn))
924 return;
925 gcc_assert (!vsetvl_discard_result_insn_p (rinsn));
926 set_valid ();
927 extract_insn_cached (rinsn);
928 m_avl = ::get_avl (rinsn);
929 if (has_vlmax_avl () || vsetvl_insn_p (rinsn))
930 m_vl = ::get_vl (rinsn);
931 m_sew = ::get_sew (rinsn);
932 m_vlmul = ::get_vlmul (rinsn);
933 m_ta = tail_agnostic_p (rinsn);
934 m_ma = mask_agnostic_p (rinsn);
935 }
936
937 void parse_insn (insn_info *insn)
938 {
939 m_insn = insn;
940 m_bb = insn->bb ();
941 /* Return if it is debug insn for the consistency with optimize == 0. */
942 if (insn->is_debug_insn ())
943 return;
944
945 /* We set it as unknown since we don't what will happen in CALL or ASM. */
946 if (insn->is_call () || insn->is_asm ())
947 {
948 set_unknown ();
949 return;
950 }
951
952 /* If this is something that updates VL/VTYPE that we don't know about, set
953 the state to unknown. */
954 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
955 && (find_access (insn->defs (), VL_REGNUM)
956 || find_access (insn->defs (), VTYPE_REGNUM)))
957 {
958 set_unknown ();
959 return;
960 }
961
962 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
963 /* uninitialized */
964 return;
965
966 set_valid ();
967
968 m_avl = ::get_avl (insn->rtl ());
969 if (m_avl)
970 {
971 if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ())
972 m_vl = ::get_vl (insn->rtl ());
973
974 if (has_nonvlmax_reg_avl ())
975 m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def ();
976 }
977
978 m_sew = ::get_sew (insn->rtl ());
979 m_vlmul = ::get_vlmul (insn->rtl ());
980 m_ratio = get_attr_ratio (insn->rtl ());
981 /* when get_attr_ratio is invalid, this kind of instructions
982 doesn't care about ratio. However, we still need this value
983 in demand info backward analysis. */
984 if (m_ratio == INVALID_ATTRIBUTE)
985 m_ratio = calculate_ratio (m_sew, m_vlmul);
986 m_ta = tail_agnostic_p (insn->rtl ());
987 m_ma = mask_agnostic_p (insn->rtl ());
988
989 /* If merge operand is undef value, we prefer agnostic. */
990 int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
991 if (merge_op_idx != INVALID_ATTRIBUTE
992 && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
993 {
994 m_ta = true;
995 m_ma = true;
996 }
997
998 /* Determine the demand info of the RVV insn. */
999 m_max_sew = get_max_int_sew ();
1000 unsigned dflags = 0;
1001 if (vector_config_insn_p (insn->rtl ()))
1002 {
1003 dflags |= demand_flags::DEMAND_AVL_P;
1004 dflags |= demand_flags::DEMAND_RATIO_P;
1005 }
1006 else
1007 {
1008 if (has_vl_op (insn->rtl ()))
1009 {
1010 if (scalar_move_insn_p (insn->rtl ()))
1011 {
1012 /* If the avl for vmv.s.x comes from the vsetvl instruction, we
1013 don't know if the avl is non-zero, so it is set to
1014 DEMAND_AVL_P for now. it may be corrected to
1015 DEMAND_NON_ZERO_AVL_P later when more information is
1016 available.
1017 */
1018 if (has_non_zero_avl ())
1019 dflags |= demand_flags::DEMAND_NON_ZERO_AVL_P;
1020 else
1021 dflags |= demand_flags::DEMAND_AVL_P;
1022 }
1023 else
1024 dflags |= demand_flags::DEMAND_AVL_P;
1025 }
1026
1027 if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
1028 dflags |= demand_flags::DEMAND_RATIO_P;
1029 else
1030 {
1031 if (scalar_move_insn_p (insn->rtl ()) && m_ta)
1032 {
1033 dflags |= demand_flags::DEMAND_GE_SEW_P;
1034 m_max_sew = get_attr_type (insn->rtl ()) == TYPE_VFMOVFV
1035 ? get_max_float_sew ()
1036 : get_max_int_sew ();
1037 }
1038 else
1039 dflags |= demand_flags::DEMAND_SEW_P;
1040
1041 if (!ignore_vlmul_insn_p (insn->rtl ()))
1042 dflags |= demand_flags::DEMAND_LMUL_P;
1043 }
1044
1045 if (!m_ta)
1046 dflags |= demand_flags::DEMAND_TAIL_POLICY_P;
1047 if (!m_ma)
1048 dflags |= demand_flags::DEMAND_MASK_POLICY_P;
1049 }
1050
1051 normalize_demand (dflags);
1052
1053 /* Optimize AVL from the vsetvl instruction. */
1054 insn_info *def_insn = extract_single_source (get_avl_def ());
1055 if (def_insn && vsetvl_insn_p (def_insn->rtl ()))
1056 {
1057 vsetvl_info def_info = vsetvl_info (def_insn);
1058 if ((scalar_move_insn_p (insn->rtl ())
1059 || def_info.get_ratio () == get_ratio ())
1060 && (def_info.has_vlmax_avl () || def_info.has_imm_avl ()))
1061 {
1062 update_avl (def_info);
1063 if (scalar_move_insn_p (insn->rtl ()) && has_non_zero_avl ())
1064 m_avl_demand = avl_demand_type::non_zero_avl;
1065 }
1066 }
1067
1068 /* Determine if dest operand(vl) has been used by non-RVV instructions. */
1069 if (has_vl ())
1070 {
1071 const hash_set<use_info *> vl_uses
1072 = get_all_real_uses (get_insn (), REGNO (get_vl ()));
1073 for (use_info *use : vl_uses)
1074 {
1075 gcc_assert (use->insn ()->is_real ());
1076 rtx_insn *rinsn = use->insn ()->rtl ();
1077 if (!has_vl_op (rinsn)
1078 || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1)
1079 {
1080 m_vl_used_by_non_rvv_insn = true;
1081 break;
1082 }
1083 rtx avl = ::get_avl (rinsn);
1084 if (!avl || !REG_P (avl) || REGNO (get_vl ()) != REGNO (avl))
1085 {
1086 m_vl_used_by_non_rvv_insn = true;
1087 break;
1088 }
1089 }
1090 }
1091
1092 /* Collect the read vl insn for the fault-only-first rvv loads. */
1093 if (fault_first_load_p (insn->rtl ()))
1094 {
1095 for (insn_info *i = insn->next_nondebug_insn ();
1096 i->bb () == insn->bb (); i = i->next_nondebug_insn ())
1097 {
1098 if (find_access (i->defs (), VL_REGNUM))
1099 break;
1100 if (i->rtl () && read_vl_insn_p (i->rtl ()))
1101 {
1102 m_read_vl_insn = i;
1103 break;
1104 }
1105 }
1106 }
1107 }
1108
1109 /* Returns the corresponding vsetvl rtx pat. */
1110 rtx get_vsetvl_pat (bool ignore_vl = false) const
1111 {
1112 rtx avl = get_avl ();
1113 /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
1114 set the value of avl to (const_int 0) so that VSETVL PASS will
1115 insert vsetvl correctly.*/
1116 if (!get_avl ())
1117 avl = GEN_INT (0);
1118 rtx sew = gen_int_mode (get_sew (), Pmode);
1119 rtx vlmul = gen_int_mode (get_vlmul (), Pmode);
1120 rtx ta = gen_int_mode (get_ta (), Pmode);
1121 rtx ma = gen_int_mode (get_ma (), Pmode);
1122
1123 if (change_vtype_only_p ())
1124 return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
1125 else if (has_vl () && !ignore_vl)
1126 return gen_vsetvl (Pmode, get_vl (), avl, sew, vlmul, ta, ma);
1127 else
1128 return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
1129 }
1130
1131 /* Return true that the non-AVL operands of THIS will be modified
1132 if we fuse the VL modification from OTHER into THIS. */
1133 bool vl_modify_non_avl_op_p (const vsetvl_info &other) const
1134 {
1135 /* We don't need to worry about any operands from THIS be
1136 modified by OTHER vsetvl since we OTHER vsetvl doesn't
1137 modify any operand. */
1138 if (!other.has_vl ())
1139 return false;
1140
1141 /* THIS VL operand always preempt OTHER VL operand. */
1142 if (this->has_vl ())
1143 return false;
1144
1145 /* If THIS has non IMM AVL and THIS is AVL compatible with
1146 OTHER, the AVL value of THIS is same as VL value of OTHER. */
1147 if (!this->has_imm_avl ())
1148 return false;
1149 return find_access (this->get_insn ()->uses (), REGNO (other.get_vl ()));
1150 }
1151
1152 bool operator== (const vsetvl_info &other) const
1153 {
1154 gcc_assert (!uninit_p () && !other.uninit_p ()
1155 && "Uninitialization should not happen");
1156
1157 if (empty_p ())
1158 return other.empty_p ();
1159 if (unknown_p ())
1160 return other.unknown_p ();
1161
1162 return get_insn () == other.get_insn () && get_bb () == other.get_bb ()
1163 && get_avl () == other.get_avl () && get_vl () == other.get_vl ()
1164 && get_avl_def () == other.get_avl_def ()
1165 && get_sew () == other.get_sew ()
1166 && get_vlmul () == other.get_vlmul () && get_ta () == other.get_ta ()
1167 && get_ma () == other.get_ma ()
1168 && get_avl_demand () == other.get_avl_demand ()
1169 && get_sew_lmul_demand () == other.get_sew_lmul_demand ()
1170 && get_policy_demand () == other.get_policy_demand ();
1171 }
1172
1173 void dump (FILE *file, const char *indent = "") const
1174 {
1175 if (uninit_p ())
1176 {
1177 fprintf (file, "UNINITIALIZED.\n");
1178 return;
1179 }
1180 else if (unknown_p ())
1181 {
1182 fprintf (file, "UNKNOWN.\n");
1183 return;
1184 }
1185 else if (empty_p ())
1186 {
1187 fprintf (file, "EMPTY.\n");
1188 return;
1189 }
1190 else if (valid_p ())
1191 fprintf (file, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (),
1192 get_bb ()->index (), delete_p () ? " (deleted)" : "");
1193 else
1194 gcc_unreachable ();
1195
1196 fprintf (file, "%sDemand fields:", indent);
1197 if (m_sew_lmul_demand == sew_lmul_demand_type::sew_lmul)
1198 fprintf (file, " demand_sew_lmul");
1199 else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_only)
1200 fprintf (file, " demand_ratio_only");
1201 else if (m_sew_lmul_demand == sew_lmul_demand_type::sew_only)
1202 fprintf (file, " demand_sew_only");
1203 else if (m_sew_lmul_demand == sew_lmul_demand_type::ge_sew)
1204 fprintf (file, " demand_ge_sew");
1205 else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_and_ge_sew)
1206 fprintf (file, " demand_ratio_and_ge_sew");
1207
1208 if (m_policy_demand == policy_demand_type::tail_mask_policy)
1209 fprintf (file, " demand_tail_mask_policy");
1210 else if (m_policy_demand == policy_demand_type::tail_policy_only)
1211 fprintf (file, " demand_tail_policy_only");
1212 else if (m_policy_demand == policy_demand_type::mask_policy_only)
1213 fprintf (file, " demand_mask_policy_only");
1214
1215 if (m_avl_demand == avl_demand_type::avl)
1216 fprintf (file, " demand_avl");
1217 else if (m_avl_demand == avl_demand_type::non_zero_avl)
1218 fprintf (file, " demand_non_zero_avl");
1219 fprintf (file, "\n");
1220
1221 fprintf (file, "%sSEW=%d, ", indent, get_sew ());
1222 fprintf (file, "VLMUL=%s, ", vlmul_to_str (get_vlmul ()));
1223 fprintf (file, "RATIO=%d, ", get_ratio ());
1224 fprintf (file, "MAX_SEW=%d\n", get_max_sew ());
1225
1226 fprintf (file, "%sTAIL_POLICY=%s, ", indent, policy_to_str (get_ta ()));
1227 fprintf (file, "MASK_POLICY=%s\n", policy_to_str (get_ma ()));
1228
1229 fprintf (file, "%sAVL=", indent);
1230 print_rtl_single (file, get_avl ());
1231 fprintf (file, "%sVL=", indent);
1232 print_rtl_single (file, get_vl ());
1233 if (change_vtype_only_p ())
1234 fprintf (file, "%schange vtype only\n", indent);
1235 if (get_read_vl_insn ())
1236 fprintf (file, "%sread_vl_insn: insn %u\n", indent,
1237 get_read_vl_insn ()->uid ());
1238 if (vl_used_by_non_rvv_insn_p ())
1239 fprintf (file, "%suse_by_non_rvv_insn=true\n", indent);
1240 }
1241 };
1242
1243 class vsetvl_block_info
1244 {
1245 public:
1246 /* The static execute probability of the demand info. */
1247 profile_probability probability;
1248
1249 auto_vec<vsetvl_info> local_infos;
1250 vsetvl_info global_info;
1251 bb_info *bb;
1252
1253 bool full_available;
1254
1255 vsetvl_block_info () : bb (nullptr), full_available (false)
1256 {
1257 local_infos.safe_grow_cleared (0);
1258 global_info.set_empty ();
1259 }
1260 vsetvl_block_info (const vsetvl_block_info &other)
1261 : probability (other.probability), local_infos (other.local_infos.copy ()),
1262 global_info (other.global_info), bb (other.bb)
1263 {}
1264
1265 vsetvl_info &get_entry_info ()
1266 {
1267 gcc_assert (!empty_p ());
1268 return local_infos.is_empty () ? global_info : local_infos[0];
1269 }
1270 vsetvl_info &get_exit_info ()
1271 {
1272 gcc_assert (!empty_p ());
1273 return local_infos.is_empty () ? global_info
1274 : local_infos[local_infos.length () - 1];
1275 }
1276 const vsetvl_info &get_entry_info () const
1277 {
1278 gcc_assert (!empty_p ());
1279 return local_infos.is_empty () ? global_info : local_infos[0];
1280 }
1281 const vsetvl_info &get_exit_info () const
1282 {
1283 gcc_assert (!empty_p ());
1284 return local_infos.is_empty () ? global_info
1285 : local_infos[local_infos.length () - 1];
1286 }
1287
1288 bool empty_p () const { return local_infos.is_empty () && !has_info (); }
1289 bool has_info () const { return !global_info.empty_p (); }
1290 void set_info (const vsetvl_info &info)
1291 {
1292 gcc_assert (local_infos.is_empty ());
1293 global_info = info;
1294 global_info.set_bb (bb);
1295 }
1296 void set_empty_info () { global_info.set_empty (); }
1297 };
1298
1299 /* Demand system is the RVV-based VSETVL info analysis tools wrapper.
1300 It defines compatible rules for SEW/LMUL, POLICY and AVL.
1301 Also, it provides 3 iterfaces avaiable_p, compatible_p and
1302 merge for the VSETVL PASS analysis and optimization.
1303
1304 - avaiable_p: Determine whether the next info can get the
1305 avaiable VSETVL status from previous info.
1306 e.g. bb 2 (demand SEW = 32, LMUL = M2) -> bb 3 (demand RATIO = 16).
1307 Since bb 2 demand info (SEW/LMUL = 32/2 = 16) satisfies the bb 3
1308 demand, the VSETVL instruction in bb 3 can be elided.
1309 avaiable_p (previous, next) is true in such situation.
1310 - compatible_p: Determine whether prev_info is compatible with next_info
1311 so that we can have a new merged info that is avaiable to both of them.
1312 - merge: Merge the stricter demand information from
1313 next_info into prev_info so that prev_info becomes available to
1314 next_info. */
1315 class demand_system
1316 {
1317 private:
1318 sbitmap *m_avl_def_in;
1319 sbitmap *m_avl_def_out;
1320
1321 /* predictors. */
1322
1323 inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1324 const vsetvl_info &next ATTRIBUTE_UNUSED)
1325 {
1326 return true;
1327 }
1328 inline bool always_false (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1329 const vsetvl_info &next ATTRIBUTE_UNUSED)
1330 {
1331 return false;
1332 }
1333
1334 /* predictors for sew and lmul */
1335
1336 inline bool lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1337 {
1338 return prev.get_vlmul () == next.get_vlmul ();
1339 }
1340 inline bool sew_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1341 {
1342 return prev.get_sew () == next.get_sew ();
1343 }
1344 inline bool sew_lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1345 {
1346 return lmul_eq_p (prev, next) && sew_eq_p (prev, next);
1347 }
1348 inline bool sew_ge_p (const vsetvl_info &prev, const vsetvl_info &next)
1349 {
1350 return prev.get_sew () == next.get_sew ()
1351 || (next.get_ta () && prev.get_sew () > next.get_sew ());
1352 }
1353 inline bool sew_le_p (const vsetvl_info &prev, const vsetvl_info &next)
1354 {
1355 return prev.get_sew () == next.get_sew ()
1356 || (prev.get_ta () && prev.get_sew () < next.get_sew ());
1357 }
1358 inline bool prev_sew_le_next_max_sew_p (const vsetvl_info &prev,
1359 const vsetvl_info &next)
1360 {
1361 return prev.get_sew () <= next.get_max_sew ();
1362 }
1363 inline bool next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
1364 const vsetvl_info &next)
1365 {
1366 return next.get_sew () <= prev.get_max_sew ();
1367 }
1368 inline bool max_sew_overlap_p (const vsetvl_info &prev,
1369 const vsetvl_info &next)
1370 {
1371 return !(prev.get_sew () > next.get_max_sew ()
1372 || next.get_sew () > prev.get_max_sew ());
1373 }
1374 inline bool ratio_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1375 {
1376 return prev.has_same_ratio (next);
1377 }
1378 inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
1379 const vsetvl_info &next)
1380 {
1381 return prev.get_ratio () >= (next.get_sew () / 8);
1382 }
1383 inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
1384 const vsetvl_info &next)
1385 {
1386 return next.get_ratio () >= (prev.get_sew () / 8);
1387 }
1388
1389 inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev,
1390 const vsetvl_info &next)
1391 {
1392 return sew_ge_p (prev, next) && ratio_eq_p (prev, next);
1393 }
1394 inline bool sew_ge_and_prev_sew_le_next_max_sew_p (const vsetvl_info &prev,
1395 const vsetvl_info &next)
1396 {
1397 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next);
1398 }
1399 inline bool
1400 sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p (
1401 const vsetvl_info &prev, const vsetvl_info &next)
1402 {
1403 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next)
1404 && next_ratio_valid_for_prev_sew_p (prev, next);
1405 }
1406 inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
1407 const vsetvl_info &next)
1408 {
1409 return sew_le_p (prev, next) && next_sew_le_prev_max_sew_p (prev, next);
1410 }
1411 inline bool
1412 max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
1413 const vsetvl_info &next)
1414 {
1415 return next_ratio_valid_for_prev_sew_p (prev, next)
1416 && max_sew_overlap_p (prev, next);
1417 }
1418 inline bool
1419 sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info &prev,
1420 const vsetvl_info &next)
1421 {
1422 return sew_le_p (prev, next) && ratio_eq_p (prev, next)
1423 && next_sew_le_prev_max_sew_p (prev, next);
1424 }
1425 inline bool
1426 max_sew_overlap_and_prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
1427 const vsetvl_info &next)
1428 {
1429 return prev_ratio_valid_for_next_sew_p (prev, next)
1430 && max_sew_overlap_p (prev, next);
1431 }
1432 inline bool
1433 sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p (
1434 const vsetvl_info &prev, const vsetvl_info &next)
1435 {
1436 return sew_le_p (prev, next) && prev_ratio_valid_for_next_sew_p (prev, next)
1437 && next_sew_le_prev_max_sew_p (prev, next);
1438 }
1439 inline bool max_sew_overlap_and_ratio_eq_p (const vsetvl_info &prev,
1440 const vsetvl_info &next)
1441 {
1442 return ratio_eq_p (prev, next) && max_sew_overlap_p (prev, next);
1443 }
1444
1445 /* predictors for tail and mask policy */
1446
1447 inline bool tail_policy_eq_p (const vsetvl_info &prev,
1448 const vsetvl_info &next)
1449 {
1450 return prev.get_ta () == next.get_ta ();
1451 }
1452 inline bool mask_policy_eq_p (const vsetvl_info &prev,
1453 const vsetvl_info &next)
1454 {
1455 return prev.get_ma () == next.get_ma ();
1456 }
1457 inline bool tail_mask_policy_eq_p (const vsetvl_info &prev,
1458 const vsetvl_info &next)
1459 {
1460 return tail_policy_eq_p (prev, next) && mask_policy_eq_p (prev, next);
1461 }
1462
1463 /* predictors for avl */
1464
1465 inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info)
1466 {
1467 if (info.has_vl ())
1468 {
1469 if (find_access (i->defs (), REGNO (info.get_vl ())))
1470 return true;
1471 if (find_access (i->uses (), REGNO (info.get_vl ())))
1472 {
1473 resource_info resource = full_register (REGNO (info.get_vl ()));
1474 def_lookup dl1 = crtl->ssa->find_def (resource, i);
1475 def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ());
1476 if (dl1.matching_set () || dl2.matching_set ())
1477 return true;
1478 /* If their VLs are coming from same def, we still want to fuse
1479 their VSETVL demand info to gain better performance. */
1480 return dl1.prev_def (i) != dl2.prev_def (i);
1481 }
1482 }
1483 return false;
1484 }
1485 inline bool modify_avl_p (insn_info *i, const vsetvl_info &info)
1486 {
1487 return info.has_nonvlmax_reg_avl ()
1488 && find_access (i->defs (), REGNO (info.get_avl ()));
1489 }
1490
1491 inline bool modify_reg_between_p (insn_info *prev_insn, insn_info *curr_insn,
1492 unsigned regno)
1493 {
1494 gcc_assert (prev_insn->compare_with (curr_insn) < 0);
1495 for (insn_info *i = curr_insn->prev_nondebug_insn (); i != prev_insn;
1496 i = i->prev_nondebug_insn ())
1497 {
1498 // no def of regno
1499 if (find_access (i->defs (), regno))
1500 return true;
1501 }
1502 return false;
1503 }
1504
1505 inline bool reg_avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next)
1506 {
1507 if (!prev.has_nonvlmax_reg_avl () || !next.has_nonvlmax_reg_avl ())
1508 return false;
1509
1510 if (same_equiv_note_p (prev.get_avl_def (), next.get_avl_def ()))
1511 return true;
1512
1513 if (REGNO (prev.get_avl ()) != REGNO (next.get_avl ()))
1514 return false;
1515
1516 insn_info *prev_insn = prev.get_insn ();
1517 if (prev.get_bb () != prev_insn->bb ())
1518 prev_insn = prev.get_bb ()->end_insn ();
1519
1520 insn_info *next_insn = next.get_insn ();
1521 if (next.get_bb () != next_insn->bb ())
1522 next_insn = next.get_bb ()->end_insn ();
1523
1524 return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false);
1525 }
1526
1527 inline bool avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next)
1528 {
1529 gcc_assert (prev.valid_p () && next.valid_p ());
1530
1531 if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
1532 return false;
1533
1534 if (vector_config_insn_p (prev.get_insn ()->rtl ()) && next.get_avl_def ()
1535 && next.get_avl_def ()->insn () == prev.get_insn ())
1536 return true;
1537
1538 if (prev.get_read_vl_insn ())
1539 {
1540 if (!next.has_nonvlmax_reg_avl () || !next.get_avl_def ())
1541 return false;
1542 insn_info *avl_def_insn = extract_single_source (next.get_avl_def ());
1543 return avl_def_insn == prev.get_read_vl_insn ();
1544 }
1545
1546 if (prev == next && prev.has_nonvlmax_reg_avl ())
1547 {
1548 insn_info *insn = prev.get_insn ();
1549 bb_info *bb = insn->bb ();
1550 for (insn_info *i = insn; real_insn_and_same_bb_p (i, bb);
1551 i = i->next_nondebug_insn ())
1552 if (find_access (i->defs (), REGNO (prev.get_avl ())))
1553 return false;
1554 }
1555
1556 if (prev.has_vlmax_avl () && next.has_vlmax_avl ())
1557 return true;
1558 else if (prev.has_imm_avl () && next.has_imm_avl ())
1559 return INTVAL (prev.get_avl ()) == INTVAL (next.get_avl ());
1560 else if (prev.has_vl () && next.has_nonvlmax_reg_avl ()
1561 && REGNO (prev.get_vl ()) == REGNO (next.get_avl ()))
1562 {
1563 insn_info *prev_insn = prev.insn_inside_bb_p ()
1564 ? prev.get_insn ()
1565 : prev.get_bb ()->end_insn ();
1566
1567 insn_info *next_insn = next.insn_inside_bb_p ()
1568 ? next.get_insn ()
1569 : next.get_bb ()->end_insn ();
1570 return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false);
1571 }
1572 else if (prev.has_nonvlmax_reg_avl () && next.has_nonvlmax_reg_avl ())
1573 return reg_avl_equal_p (prev, next);
1574
1575 return false;
1576 }
1577 inline bool avl_equal_or_prev_avl_non_zero_p (const vsetvl_info &prev,
1578 const vsetvl_info &next)
1579 {
1580 return avl_equal_p (prev, next) || prev.has_non_zero_avl ();
1581 }
1582
1583 inline bool can_use_next_avl_p (const vsetvl_info &prev,
1584 const vsetvl_info &next)
1585 {
1586 /* Forbid the AVL/VL propagation if VL of NEXT is used
1587 by non-RVV instructions. This is because:
1588
1589 bb 2:
1590 PREV: scalar move (no AVL)
1591 bb 3:
1592 NEXT: vsetvl a5(VL), a4(AVL) ...
1593 branch a5,zero
1594
1595 Since user vsetvl instruction is no side effect instruction
1596 which should be placed in the correct and optimal location
1597 of the program by the previous PASS, it is unreasonable that
1598 VSETVL PASS tries to move it to another places if it used by
1599 non-RVV instructions.
1600
1601 Note: We only forbid the cases that VL is used by the following
1602 non-RVV instructions which will cause issues. We don't forbid
1603 other cases since it won't cause correctness issues and we still
1604 more demand info are fused backward. The later LCM algorithm
1605 should know the optimal location of the vsetvl. */
1606 if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
1607 return false;
1608
1609 if (!next.has_nonvlmax_reg_avl () && !next.has_vl ())
1610 return true;
1611
1612 insn_info *prev_insn = prev.get_insn ();
1613 if (prev.get_bb () != prev_insn->bb ())
1614 prev_insn = prev.get_bb ()->end_insn ();
1615
1616 insn_info *next_insn = next.get_insn ();
1617 if (next.get_bb () != next_insn->bb ())
1618 next_insn = next.get_bb ()->end_insn ();
1619
1620 return avl_vl_unmodified_between_p (prev_insn, next_insn, next);
1621 }
1622
1623 inline bool avl_equal_or_next_avl_non_zero_and_can_use_next_avl_p (
1624 const vsetvl_info &prev, const vsetvl_info &next)
1625 {
1626 return avl_equal_p (prev, next)
1627 || (next.has_non_zero_avl () && can_use_next_avl_p (prev, next));
1628 }
1629
1630 /* modifiers */
1631
1632 inline void nop (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1633 const vsetvl_info &next ATTRIBUTE_UNUSED)
1634 {}
1635
1636 /* modifiers for sew and lmul */
1637
1638 inline void use_min_of_max_sew (vsetvl_info &prev, const vsetvl_info &next)
1639 {
1640 prev.set_max_sew (MIN (prev.get_max_sew (), next.get_max_sew ()));
1641 }
1642 inline void use_next_sew (vsetvl_info &prev, const vsetvl_info &next)
1643 {
1644 prev.set_sew (next.get_sew ());
1645 use_min_of_max_sew (prev, next);
1646 }
1647 inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
1648 {
1649 auto max_sew = std::max (prev.get_sew (), next.get_sew ());
1650 prev.set_sew (max_sew);
1651 use_min_of_max_sew (prev, next);
1652 }
1653 inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
1654 {
1655 use_next_sew (prev, next);
1656 prev.set_vlmul (next.get_vlmul ());
1657 prev.set_ratio (next.get_ratio ());
1658 }
1659 inline void use_next_sew_with_prev_ratio (vsetvl_info &prev,
1660 const vsetvl_info &next)
1661 {
1662 use_next_sew (prev, next);
1663 prev.set_vlmul (calculate_vlmul (next.get_sew (), prev.get_ratio ()));
1664 }
1665 inline void modify_lmul_with_next_ratio (vsetvl_info &prev,
1666 const vsetvl_info &next)
1667 {
1668 prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
1669 prev.set_ratio (next.get_ratio ());
1670 }
1671
1672 inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev,
1673 const vsetvl_info &next)
1674 {
1675 prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
1676 use_max_sew (prev, next);
1677 prev.set_ratio (next.get_ratio ());
1678 }
1679
1680 inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info &prev,
1681 const vsetvl_info &next)
1682 {
1683 auto max_sew = std::max (prev.get_sew (), next.get_sew ());
1684 prev.set_vlmul (calculate_vlmul (max_sew, prev.get_ratio ()));
1685 prev.set_sew (max_sew);
1686 }
1687
1688 /* modifiers for tail and mask policy */
1689
1690 inline void use_tail_policy (vsetvl_info &prev, const vsetvl_info &next)
1691 {
1692 if (!next.get_ta ())
1693 prev.set_ta (next.get_ta ());
1694 }
1695 inline void use_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
1696 {
1697 if (!next.get_ma ())
1698 prev.set_ma (next.get_ma ());
1699 }
1700 inline void use_tail_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
1701 {
1702 use_tail_policy (prev, next);
1703 use_mask_policy (prev, next);
1704 }
1705
1706 /* modifiers for avl */
1707
1708 inline void use_next_avl (vsetvl_info &prev, const vsetvl_info &next)
1709 {
1710 gcc_assert (can_use_next_avl_p (prev, next));
1711 prev.update_avl (next);
1712 }
1713
1714 inline void use_next_avl_when_not_equal (vsetvl_info &prev,
1715 const vsetvl_info &next)
1716 {
1717 if (avl_equal_p (prev, next))
1718 return;
1719 gcc_assert (next.has_non_zero_avl ());
1720 use_next_avl (prev, next);
1721 }
1722
1723 public:
1724 demand_system () : m_avl_def_in (nullptr), m_avl_def_out (nullptr) {}
1725
1726 void set_avl_in_out_data (sbitmap *m_avl_def_in, sbitmap *m_avl_def_out)
1727 {
1728 m_avl_def_in = m_avl_def_in;
1729 m_avl_def_out = m_avl_def_out;
1730 }
1731
1732 /* Can we move vsetvl info between prev_insn and next_insn safe? */
1733 bool avl_vl_unmodified_between_p (insn_info *prev_insn, insn_info *next_insn,
1734 const vsetvl_info &info,
1735 bool ignore_vl = false)
1736 {
1737 gcc_assert ((ignore_vl && info.has_nonvlmax_reg_avl ())
1738 || (info.has_nonvlmax_reg_avl () || info.has_vl ()));
1739
1740 gcc_assert (!prev_insn->is_debug_insn () && !next_insn->is_debug_insn ());
1741 if (prev_insn->bb () == next_insn->bb ()
1742 && prev_insn->compare_with (next_insn) < 0)
1743 {
1744 for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
1745 i = i->prev_nondebug_insn ())
1746 {
1747 // no def and use of vl
1748 if (!ignore_vl && modify_or_use_vl_p (i, info))
1749 return false;
1750
1751 // no def of avl
1752 if (modify_avl_p (i, info))
1753 return false;
1754 }
1755 return true;
1756 }
1757 else
1758 {
1759 if (!ignore_vl && info.has_vl ())
1760 {
1761 bitmap live_out = df_get_live_out (prev_insn->bb ()->cfg_bb ());
1762 if (bitmap_bit_p (live_out, REGNO (info.get_vl ())))
1763 return false;
1764 }
1765
1766 if (info.has_nonvlmax_reg_avl () && m_avl_def_in && m_avl_def_out)
1767 {
1768 bool has_avl_out = false;
1769 unsigned regno = REGNO (info.get_avl ());
1770 unsigned expr_id;
1771 sbitmap_iterator sbi;
1772 EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[prev_insn->bb ()->index ()],
1773 0, expr_id, sbi)
1774 {
1775 if (get_regno (expr_id, last_basic_block_for_fn (cfun))
1776 != regno)
1777 continue;
1778 has_avl_out = true;
1779 if (!bitmap_bit_p (m_avl_def_in[next_insn->bb ()->index ()],
1780 expr_id))
1781 return false;
1782 }
1783 if (!has_avl_out)
1784 return false;
1785 }
1786
1787 for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn ();
1788 i = i->prev_nondebug_insn ())
1789 {
1790 // no def amd use of vl
1791 if (!ignore_vl && modify_or_use_vl_p (i, info))
1792 return false;
1793
1794 // no def of avl
1795 if (modify_avl_p (i, info))
1796 return false;
1797 }
1798
1799 for (insn_info *i = prev_insn->bb ()->end_insn (); i != prev_insn;
1800 i = i->prev_nondebug_insn ())
1801 {
1802 // no def amd use of vl
1803 if (!ignore_vl && modify_or_use_vl_p (i, info))
1804 return false;
1805
1806 // no def of avl
1807 if (modify_avl_p (i, info))
1808 return false;
1809 }
1810 }
1811 return true;
1812 }
1813
1814 bool sew_lmul_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1815 {
1816 gcc_assert (prev.valid_p () && next.valid_p ());
1817 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1818 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1819 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1820 AVAILABLE_P, FUSE) \
1821 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1822 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1823 return COMPATIBLE_P (prev, next);
1824
1825 #include "riscv-vsetvl.def"
1826
1827 gcc_unreachable ();
1828 }
1829
1830 bool sew_lmul_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1831 {
1832 gcc_assert (prev.valid_p () && next.valid_p ());
1833 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1834 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1835 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1836 AVAILABLE_P, FUSE) \
1837 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1838 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1839 return AVAILABLE_P (prev, next);
1840
1841 #include "riscv-vsetvl.def"
1842
1843 gcc_unreachable ();
1844 }
1845
1846 void merge_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
1847 {
1848 gcc_assert (prev.valid_p () && next.valid_p ());
1849 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1850 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1851 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1852 AVAILABLE_P, FUSE) \
1853 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1854 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1855 { \
1856 gcc_assert (COMPATIBLE_P (prev, next)); \
1857 FUSE (prev, next); \
1858 prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \
1859 return; \
1860 }
1861
1862 #include "riscv-vsetvl.def"
1863
1864 gcc_unreachable ();
1865 }
1866
1867 bool policy_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1868 {
1869 gcc_assert (prev.valid_p () && next.valid_p ());
1870 policy_demand_type prev_flags = prev.get_policy_demand ();
1871 policy_demand_type next_flags = next.get_policy_demand ();
1872 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1873 AVAILABLE_P, FUSE) \
1874 if (prev_flags == policy_demand_type::PREV_FLAGS \
1875 && next_flags == policy_demand_type::NEXT_FLAGS) \
1876 return COMPATIBLE_P (prev, next);
1877
1878 #include "riscv-vsetvl.def"
1879
1880 gcc_unreachable ();
1881 }
1882
1883 bool policy_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1884 {
1885 gcc_assert (prev.valid_p () && next.valid_p ());
1886 policy_demand_type prev_flags = prev.get_policy_demand ();
1887 policy_demand_type next_flags = next.get_policy_demand ();
1888 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1889 AVAILABLE_P, FUSE) \
1890 if (prev_flags == policy_demand_type::PREV_FLAGS \
1891 && next_flags == policy_demand_type::NEXT_FLAGS) \
1892 return AVAILABLE_P (prev, next);
1893
1894 #include "riscv-vsetvl.def"
1895
1896 gcc_unreachable ();
1897 }
1898
1899 void merge_policy (vsetvl_info &prev, const vsetvl_info &next)
1900 {
1901 gcc_assert (prev.valid_p () && next.valid_p ());
1902 policy_demand_type prev_flags = prev.get_policy_demand ();
1903 policy_demand_type next_flags = next.get_policy_demand ();
1904 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1905 AVAILABLE_P, FUSE) \
1906 if (prev_flags == policy_demand_type::PREV_FLAGS \
1907 && next_flags == policy_demand_type::NEXT_FLAGS) \
1908 { \
1909 gcc_assert (COMPATIBLE_P (prev, next)); \
1910 FUSE (prev, next); \
1911 prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \
1912 return; \
1913 }
1914
1915 #include "riscv-vsetvl.def"
1916
1917 gcc_unreachable ();
1918 }
1919
1920 bool vl_not_in_conflict_p (const vsetvl_info &prev, const vsetvl_info &next)
1921 {
1922 /* We don't fuse this following case:
1923
1924 li a5, -1
1925 vmv.s.x v0, a5 -- PREV
1926 vsetvli a5, ... -- NEXT
1927
1928 Don't fuse NEXT into PREV.
1929 */
1930 return !prev.vl_modify_non_avl_op_p (next)
1931 && !next.vl_modify_non_avl_op_p (prev);
1932 }
1933
1934 bool avl_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1935 {
1936 gcc_assert (prev.valid_p () && next.valid_p ());
1937 avl_demand_type prev_flags = prev.get_avl_demand ();
1938 avl_demand_type next_flags = next.get_avl_demand ();
1939 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1940 AVAILABLE_P, FUSE) \
1941 if (prev_flags == avl_demand_type::PREV_FLAGS \
1942 && next_flags == avl_demand_type::NEXT_FLAGS) \
1943 return COMPATIBLE_P (prev, next);
1944
1945 #include "riscv-vsetvl.def"
1946
1947 gcc_unreachable ();
1948 }
1949
1950 bool avl_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1951 {
1952 gcc_assert (prev.valid_p () && next.valid_p ());
1953 avl_demand_type prev_flags = prev.get_avl_demand ();
1954 avl_demand_type next_flags = next.get_avl_demand ();
1955 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1956 AVAILABLE_P, FUSE) \
1957 if (prev_flags == avl_demand_type::PREV_FLAGS \
1958 && next_flags == avl_demand_type::NEXT_FLAGS) \
1959 return AVAILABLE_P (prev, next);
1960
1961 #include "riscv-vsetvl.def"
1962
1963 gcc_unreachable ();
1964 }
1965
1966 void merge_avl (vsetvl_info &prev, const vsetvl_info &next)
1967 {
1968 gcc_assert (prev.valid_p () && next.valid_p ());
1969 avl_demand_type prev_flags = prev.get_avl_demand ();
1970 avl_demand_type next_flags = next.get_avl_demand ();
1971 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1972 AVAILABLE_P, FUSE) \
1973 if (prev_flags == avl_demand_type::PREV_FLAGS \
1974 && next_flags == avl_demand_type::NEXT_FLAGS) \
1975 { \
1976 gcc_assert (COMPATIBLE_P (prev, next)); \
1977 FUSE (prev, next); \
1978 prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \
1979 return; \
1980 }
1981
1982 #include "riscv-vsetvl.def"
1983
1984 gcc_unreachable ();
1985 }
1986
1987 bool compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1988 {
1989 bool compatible_p = sew_lmul_compatible_p (prev, next)
1990 && policy_compatible_p (prev, next)
1991 && avl_compatible_p (prev, next)
1992 && vl_not_in_conflict_p (prev, next);
1993 return compatible_p;
1994 }
1995
1996 bool available_p (const vsetvl_info &prev, const vsetvl_info &next)
1997 {
1998 bool available_p = sew_lmul_available_p (prev, next)
1999 && policy_available_p (prev, next)
2000 && avl_available_p (prev, next)
2001 && vl_not_in_conflict_p (prev, next);
2002 gcc_assert (!available_p || compatible_p (prev, next));
2003 return available_p;
2004 }
2005
2006 void merge (vsetvl_info &prev, const vsetvl_info &next)
2007 {
2008 gcc_assert (compatible_p (prev, next));
2009 merge_sew_lmul (prev, next);
2010 merge_policy (prev, next);
2011 merge_avl (prev, next);
2012 gcc_assert (available_p (prev, next));
2013 }
2014 };
2015
2016
2017 class pre_vsetvl
2018 {
2019 private:
2020 demand_system m_dem;
2021 auto_vec<vsetvl_block_info> m_vector_block_infos;
2022
2023 /* data for avl reaching defintion. */
2024 sbitmap m_avl_regs;
2025 sbitmap *m_avl_def_in;
2026 sbitmap *m_avl_def_out;
2027 sbitmap *m_reg_def_loc;
2028
2029 /* data for vsetvl info reaching defintion. */
2030 vsetvl_info m_unknow_info;
2031 auto_vec<vsetvl_info *> m_vsetvl_def_exprs;
2032 sbitmap *m_vsetvl_def_in;
2033 sbitmap *m_vsetvl_def_out;
2034
2035 /* data for lcm */
2036 auto_vec<vsetvl_info *> m_exprs;
2037 sbitmap *m_avloc;
2038 sbitmap *m_avin;
2039 sbitmap *m_avout;
2040 sbitmap *m_kill;
2041 sbitmap *m_antloc;
2042 sbitmap *m_transp;
2043 sbitmap *m_insert;
2044 sbitmap *m_del;
2045 struct edge_list *m_edges;
2046
2047 auto_vec<vsetvl_info> m_delete_list;
2048
2049 vsetvl_block_info &get_block_info (const bb_info *bb)
2050 {
2051 return m_vector_block_infos[bb->index ()];
2052 }
2053 const vsetvl_block_info &get_block_info (const basic_block bb) const
2054 {
2055 return m_vector_block_infos[bb->index];
2056 }
2057
2058 vsetvl_block_info &get_block_info (const basic_block bb)
2059 {
2060 return m_vector_block_infos[bb->index];
2061 }
2062
2063 void add_expr (auto_vec<vsetvl_info *> &m_exprs, vsetvl_info &info)
2064 {
2065 for (vsetvl_info *item : m_exprs)
2066 {
2067 if (*item == info)
2068 return;
2069 }
2070 m_exprs.safe_push (&info);
2071 }
2072
2073 unsigned get_expr_index (auto_vec<vsetvl_info *> &m_exprs,
2074 const vsetvl_info &info)
2075 {
2076 for (size_t i = 0; i < m_exprs.length (); i += 1)
2077 {
2078 if (*m_exprs[i] == info)
2079 return i;
2080 }
2081 gcc_unreachable ();
2082 }
2083
2084 bool anticipated_exp_p (const vsetvl_info &header_info)
2085 {
2086 if (!header_info.has_nonvlmax_reg_avl () && !header_info.has_vl ())
2087 return true;
2088
2089 bb_info *bb = header_info.get_bb ();
2090 insn_info *prev_insn = bb->head_insn ();
2091 insn_info *next_insn = header_info.insn_inside_bb_p ()
2092 ? header_info.get_insn ()
2093 : header_info.get_bb ()->end_insn ();
2094
2095 return m_dem.avl_vl_unmodified_between_p (prev_insn, next_insn,
2096 header_info);
2097 }
2098
2099 bool available_exp_p (const vsetvl_info &prev_info,
2100 const vsetvl_info &next_info)
2101 {
2102 return m_dem.available_p (prev_info, next_info);
2103 }
2104
2105 void compute_probabilities ()
2106 {
2107 edge e;
2108 edge_iterator ei;
2109
2110 for (const bb_info *bb : crtl->ssa->bbs ())
2111 {
2112 basic_block cfg_bb = bb->cfg_bb ();
2113 auto &curr_prob = get_block_info (cfg_bb).probability;
2114
2115 /* GCC assume entry block (bb 0) are always so
2116 executed so set its probability as "always". */
2117 if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
2118 curr_prob = profile_probability::always ();
2119 /* Exit block (bb 1) is the block we don't need to process. */
2120 if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
2121 continue;
2122
2123 gcc_assert (curr_prob.initialized_p ());
2124 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2125 {
2126 auto &new_prob = get_block_info (e->dest).probability;
2127 /* Normally, the edge probability should be initialized.
2128 However, some special testing code which is written in
2129 GIMPLE IR style force the edge probility uninitialized,
2130 we conservatively set it as never so that it will not
2131 affect PRE (Phase 3 && Phse 4). */
2132 if (!e->probability.initialized_p ())
2133 new_prob = profile_probability::never ();
2134 else if (!new_prob.initialized_p ())
2135 new_prob = curr_prob * e->probability;
2136 else if (new_prob == profile_probability::always ())
2137 continue;
2138 else
2139 new_prob += curr_prob * e->probability;
2140 }
2141 }
2142 }
2143
2144 void insert_vsetvl_insn (enum emit_type emit_type, const vsetvl_info &info)
2145 {
2146 rtx pat = info.get_vsetvl_pat ();
2147 rtx_insn *rinsn = info.get_insn ()->rtl ();
2148
2149 if (emit_type == EMIT_DIRECT)
2150 {
2151 emit_insn (pat);
2152 if (dump_file)
2153 {
2154 fprintf (dump_file, " Insert vsetvl insn %d:\n",
2155 INSN_UID (get_last_insn ()));
2156 print_rtl_single (dump_file, get_last_insn ());
2157 }
2158 }
2159 else if (emit_type == EMIT_BEFORE)
2160 {
2161 emit_insn_before (pat, rinsn);
2162 if (dump_file)
2163 {
2164 fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
2165 INSN_UID (rinsn));
2166 print_rtl_single (dump_file, PREV_INSN (rinsn));
2167 }
2168 }
2169 else
2170 {
2171 emit_insn_after (pat, rinsn);
2172 if (dump_file)
2173 {
2174 fprintf (dump_file, " Insert vsetvl insn after insn %d:\n",
2175 INSN_UID (rinsn));
2176 print_rtl_single (dump_file, NEXT_INSN (rinsn));
2177 }
2178 }
2179 }
2180
2181 void change_vsetvl_insn (const vsetvl_info &info)
2182 {
2183 rtx_insn *rinsn = info.get_insn ()->rtl ();
2184 rtx new_pat = info.get_vsetvl_pat ();
2185
2186 if (dump_file)
2187 {
2188 fprintf (dump_file, " Change insn %d from:\n", INSN_UID (rinsn));
2189 print_rtl_single (dump_file, rinsn);
2190 }
2191
2192 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
2193
2194 if (dump_file)
2195 {
2196 fprintf (dump_file, "\n to:\n");
2197 print_rtl_single (dump_file, rinsn);
2198 }
2199 }
2200
2201 void remove_vsetvl_insn (const vsetvl_info &info)
2202 {
2203 rtx_insn *rinsn = info.get_insn ()->rtl ();
2204 if (dump_file)
2205 {
2206 fprintf (dump_file, " Eliminate insn %d:\n", INSN_UID (rinsn));
2207 print_rtl_single (dump_file, rinsn);
2208 }
2209 if (in_sequence_p ())
2210 remove_insn (rinsn);
2211 else
2212 delete_insn (rinsn);
2213 }
2214
2215 bool successors_probability_equal_p (const basic_block cfg_bb) const
2216 {
2217 edge e;
2218 edge_iterator ei;
2219 profile_probability prob = profile_probability::uninitialized ();
2220 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2221 {
2222 if (prob == profile_probability::uninitialized ())
2223 prob = m_vector_block_infos[e->dest->index].probability;
2224 else if (prob == m_vector_block_infos[e->dest->index].probability)
2225 continue;
2226 else
2227 /* We pick the highest probability among those incompatible VSETVL
2228 infos. When all incompatible VSTEVL infos have same probability, we
2229 don't pick any of them. */
2230 return false;
2231 }
2232 return true;
2233 }
2234
2235 bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
2236 {
2237 gcc_assert (
2238 !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
2239
2240 unsigned expr_index;
2241 sbitmap_iterator sbi;
2242 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[curr_info.get_bb ()->index ()], 0,
2243 expr_index, sbi)
2244 {
2245 const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
2246 if (!prev_info.valid_p ()
2247 || !m_dem.avl_available_p (prev_info, curr_info)
2248 || prev_info.get_ratio () != curr_info.get_ratio ())
2249 return false;
2250 }
2251
2252 return true;
2253 }
2254
2255 public:
2256 pre_vsetvl ()
2257 : m_avl_def_in (nullptr), m_avl_def_out (nullptr),
2258 m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr),
2259 m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr),
2260 m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr)
2261 {
2262 /* Initialization of RTL_SSA. */
2263 calculate_dominance_info (CDI_DOMINATORS);
2264 df_analyze ();
2265 crtl->ssa = new function_info (cfun);
2266 m_vector_block_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2267 compute_probabilities ();
2268 m_unknow_info.set_unknown ();
2269 }
2270
2271 void finish ()
2272 {
2273 free_dominance_info (CDI_DOMINATORS);
2274 if (crtl->ssa->perform_pending_updates ())
2275 cleanup_cfg (0);
2276 delete crtl->ssa;
2277 crtl->ssa = nullptr;
2278
2279 if (m_avl_regs)
2280 sbitmap_free (m_avl_regs);
2281 if (m_reg_def_loc)
2282 sbitmap_vector_free (m_reg_def_loc);
2283
2284 if (m_avl_def_in)
2285 sbitmap_vector_free (m_avl_def_in);
2286 if (m_avl_def_out)
2287 sbitmap_vector_free (m_avl_def_out);
2288
2289 if (m_vsetvl_def_in)
2290 sbitmap_vector_free (m_vsetvl_def_in);
2291 if (m_vsetvl_def_out)
2292 sbitmap_vector_free (m_vsetvl_def_out);
2293
2294 if (m_avloc)
2295 sbitmap_vector_free (m_avloc);
2296 if (m_kill)
2297 sbitmap_vector_free (m_kill);
2298 if (m_antloc)
2299 sbitmap_vector_free (m_antloc);
2300 if (m_transp)
2301 sbitmap_vector_free (m_transp);
2302 if (m_insert)
2303 sbitmap_vector_free (m_insert);
2304 if (m_del)
2305 sbitmap_vector_free (m_del);
2306 if (m_avin)
2307 sbitmap_vector_free (m_avin);
2308 if (m_avout)
2309 sbitmap_vector_free (m_avout);
2310
2311 if (m_edges)
2312 free_edge_list (m_edges);
2313 }
2314
2315 void compute_avl_def_data ();
2316 void compute_vsetvl_def_data ();
2317 void compute_lcm_local_properties ();
2318
2319 void fuse_local_vsetvl_info ();
2320 bool earliest_fuse_vsetvl_info ();
2321 void pre_global_vsetvl_info ();
2322 void emit_vsetvl ();
2323 void cleaup ();
2324 void remove_avl_operand ();
2325 void remove_unused_dest_operand ();
2326
2327 void dump (FILE *file, const char *title) const
2328 {
2329 fprintf (file, "\nVSETVL infos after %s\n\n", title);
2330 for (const bb_info *bb : crtl->ssa->bbs ())
2331 {
2332 const auto &block_info = m_vector_block_infos[bb->index ()];
2333 fprintf (file, " bb %d:\n", bb->index ());
2334 fprintf (file, " probability: ");
2335 block_info.probability.dump (file);
2336 fprintf (file, "\n");
2337 if (!block_info.empty_p ())
2338 {
2339 fprintf (file, " Header vsetvl info:");
2340 block_info.get_entry_info ().dump (file, " ");
2341 fprintf (file, " Footer vsetvl info:");
2342 block_info.get_exit_info ().dump (file, " ");
2343 for (const auto &info : block_info.local_infos)
2344 {
2345 fprintf (file,
2346 " insn %d vsetvl info:", info.get_insn ()->uid ());
2347 info.dump (file, " ");
2348 }
2349 }
2350 }
2351 }
2352 };
2353
2354 void
2355 pre_vsetvl::compute_avl_def_data ()
2356 {
2357 if (bitmap_empty_p (m_avl_regs))
2358 return;
2359
2360 unsigned num_regs = GP_REG_LAST + 1;
2361 unsigned num_bbs = last_basic_block_for_fn (cfun);
2362
2363 sbitmap *avl_def_loc_temp = sbitmap_vector_alloc (num_bbs, num_regs);
2364 for (const bb_info *bb : crtl->ssa->bbs ())
2365 {
2366 bitmap_and (avl_def_loc_temp[bb->index ()], m_avl_regs,
2367 m_reg_def_loc[bb->index ()]);
2368
2369 vsetvl_block_info &block_info = get_block_info (bb);
2370 if (block_info.has_info ())
2371 {
2372 vsetvl_info &footer_info = block_info.get_exit_info ();
2373 gcc_assert (footer_info.valid_p ());
2374 if (footer_info.has_vl ())
2375 bitmap_set_bit (avl_def_loc_temp[bb->index ()],
2376 REGNO (footer_info.get_vl ()));
2377 }
2378 }
2379
2380 if (m_avl_def_in)
2381 sbitmap_vector_free (m_avl_def_in);
2382 if (m_avl_def_out)
2383 sbitmap_vector_free (m_avl_def_out);
2384
2385 unsigned num_exprs = num_bbs * num_regs;
2386 sbitmap *avl_def_loc = sbitmap_vector_alloc (num_bbs, num_exprs);
2387 sbitmap *m_kill = sbitmap_vector_alloc (num_bbs, num_exprs);
2388 m_avl_def_in = sbitmap_vector_alloc (num_bbs, num_exprs);
2389 m_avl_def_out = sbitmap_vector_alloc (num_bbs, num_exprs);
2390
2391 bitmap_vector_clear (avl_def_loc, num_bbs);
2392 bitmap_vector_clear (m_kill, num_bbs);
2393 bitmap_vector_clear (m_avl_def_out, num_bbs);
2394
2395 unsigned regno;
2396 sbitmap_iterator sbi;
2397 for (const bb_info *bb : crtl->ssa->bbs ())
2398 EXECUTE_IF_SET_IN_BITMAP (avl_def_loc_temp[bb->index ()], 0, regno, sbi)
2399 {
2400 bitmap_set_bit (avl_def_loc[bb->index ()],
2401 get_expr_id (bb->index (), regno, num_bbs));
2402 bitmap_set_range (m_kill[bb->index ()], regno * num_bbs, num_bbs);
2403 }
2404
2405 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2406 EXECUTE_IF_SET_IN_BITMAP (m_avl_regs, 0, regno, sbi)
2407 bitmap_set_bit (m_avl_def_out[entry->index],
2408 get_expr_id (entry->index, regno, num_bbs));
2409
2410 compute_reaching_defintion (avl_def_loc, m_kill, m_avl_def_in, m_avl_def_out);
2411
2412 if (dump_file && (dump_flags & TDF_DETAILS))
2413 {
2414 fprintf (dump_file,
2415 " Compute avl reaching defition data (num_bbs %d, num_regs "
2416 "%d):\n\n",
2417 num_bbs, num_regs);
2418 fprintf (dump_file, " avl_regs: ");
2419 dump_bitmap_file (dump_file, m_avl_regs);
2420 fprintf (dump_file, "\n bitmap data:\n");
2421 for (const bb_info *bb : crtl->ssa->bbs ())
2422 {
2423 unsigned int i = bb->index ();
2424 fprintf (dump_file, " BB %u:\n", i);
2425 fprintf (dump_file, " avl_def_loc:");
2426 unsigned expr_id;
2427 sbitmap_iterator sbi;
2428 EXECUTE_IF_SET_IN_BITMAP (avl_def_loc[i], 0, expr_id, sbi)
2429 {
2430 fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
2431 get_bb_index (expr_id, num_bbs));
2432 }
2433 fprintf (dump_file, "\n kill:");
2434 EXECUTE_IF_SET_IN_BITMAP (m_kill[i], 0, expr_id, sbi)
2435 {
2436 fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
2437 get_bb_index (expr_id, num_bbs));
2438 }
2439 fprintf (dump_file, "\n avl_def_in:");
2440 EXECUTE_IF_SET_IN_BITMAP (m_avl_def_in[i], 0, expr_id, sbi)
2441 {
2442 fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
2443 get_bb_index (expr_id, num_bbs));
2444 }
2445 fprintf (dump_file, "\n avl_def_out:");
2446 EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[i], 0, expr_id, sbi)
2447 {
2448 fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
2449 get_bb_index (expr_id, num_bbs));
2450 }
2451 fprintf (dump_file, "\n");
2452 }
2453 }
2454
2455 sbitmap_vector_free (avl_def_loc);
2456 sbitmap_vector_free (m_kill);
2457 sbitmap_vector_free (avl_def_loc_temp);
2458
2459 m_dem.set_avl_in_out_data (m_avl_def_in, m_avl_def_out);
2460 }
2461
2462 void
2463 pre_vsetvl::compute_vsetvl_def_data ()
2464 {
2465 m_vsetvl_def_exprs.truncate (0);
2466 add_expr (m_vsetvl_def_exprs, m_unknow_info);
2467 for (const bb_info *bb : crtl->ssa->bbs ())
2468 {
2469 vsetvl_block_info &block_info = get_block_info (bb);
2470 if (block_info.empty_p ())
2471 continue;
2472 vsetvl_info &footer_info = block_info.get_exit_info ();
2473 gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
2474 add_expr (m_vsetvl_def_exprs, footer_info);
2475 }
2476
2477 if (m_vsetvl_def_in)
2478 sbitmap_vector_free (m_vsetvl_def_in);
2479 if (m_vsetvl_def_out)
2480 sbitmap_vector_free (m_vsetvl_def_out);
2481
2482 sbitmap *def_loc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2483 m_vsetvl_def_exprs.length ());
2484 sbitmap *m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2485 m_vsetvl_def_exprs.length ());
2486
2487 m_vsetvl_def_in = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2488 m_vsetvl_def_exprs.length ());
2489 m_vsetvl_def_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2490 m_vsetvl_def_exprs.length ());
2491
2492 bitmap_vector_clear (def_loc, last_basic_block_for_fn (cfun));
2493 bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun));
2494 bitmap_vector_clear (m_vsetvl_def_out, last_basic_block_for_fn (cfun));
2495
2496 for (const bb_info *bb : crtl->ssa->bbs ())
2497 {
2498 vsetvl_block_info &block_info = get_block_info (bb);
2499 if (block_info.empty_p ())
2500 {
2501 for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i += 1)
2502 {
2503 const vsetvl_info &info = *m_vsetvl_def_exprs[i];
2504 if (!info.has_nonvlmax_reg_avl ())
2505 continue;
2506 unsigned int regno;
2507 sbitmap_iterator sbi;
2508 EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0, regno,
2509 sbi)
2510 if (regno == REGNO (info.get_avl ()))
2511 {
2512 bitmap_set_bit (m_kill[bb->index ()], i);
2513 bitmap_set_bit (def_loc[bb->index ()],
2514 get_expr_index (m_vsetvl_def_exprs,
2515 m_unknow_info));
2516 }
2517 }
2518 continue;
2519 }
2520
2521 vsetvl_info &footer_info = block_info.get_exit_info ();
2522 bitmap_ones (m_kill[bb->index ()]);
2523 bitmap_set_bit (def_loc[bb->index ()],
2524 get_expr_index (m_vsetvl_def_exprs, footer_info));
2525 }
2526
2527 /* Set the def_out of the ENTRY basic block to m_unknow_info expr. */
2528 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2529 bitmap_set_bit (m_vsetvl_def_out[entry->index],
2530 get_expr_index (m_vsetvl_def_exprs, m_unknow_info));
2531
2532 compute_reaching_defintion (def_loc, m_kill, m_vsetvl_def_in,
2533 m_vsetvl_def_out);
2534
2535 if (dump_file && (dump_flags & TDF_DETAILS))
2536 {
2537 fprintf (dump_file,
2538 "\n Compute vsetvl info reaching defition data:\n\n");
2539 fprintf (dump_file, " Expression List (%d):\n",
2540 m_vsetvl_def_exprs.length ());
2541 for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i++)
2542 {
2543 const auto &info = *m_vsetvl_def_exprs[i];
2544 fprintf (dump_file, " Expr[%u]: ", i);
2545 info.dump (dump_file, " ");
2546 }
2547 fprintf (dump_file, "\n bitmap data:\n");
2548 for (const bb_info *bb : crtl->ssa->bbs ())
2549 {
2550 unsigned int i = bb->index ();
2551 fprintf (dump_file, " BB %u:\n", i);
2552 fprintf (dump_file, " def_loc: ");
2553 dump_bitmap_file (dump_file, def_loc[i]);
2554 fprintf (dump_file, " kill: ");
2555 dump_bitmap_file (dump_file, m_kill[i]);
2556 fprintf (dump_file, " vsetvl_def_in: ");
2557 dump_bitmap_file (dump_file, m_vsetvl_def_in[i]);
2558 fprintf (dump_file, " vsetvl_def_out: ");
2559 dump_bitmap_file (dump_file, m_vsetvl_def_out[i]);
2560 }
2561 }
2562
2563 for (const bb_info *bb : crtl->ssa->bbs ())
2564 {
2565 vsetvl_block_info &block_info = get_block_info (bb);
2566 if (block_info.empty_p ())
2567 continue;
2568 vsetvl_info &curr_info = block_info.get_entry_info ();
2569 if (!curr_info.valid_p ())
2570 continue;
2571
2572 unsigned int expr_index;
2573 sbitmap_iterator sbi;
2574 gcc_assert (
2575 !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
2576 bool full_available = true;
2577 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[bb->index ()], 0, expr_index,
2578 sbi)
2579 {
2580 vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
2581 if (!prev_info.valid_p ()
2582 || !m_dem.available_p (prev_info, curr_info))
2583 {
2584 full_available = false;
2585 break;
2586 }
2587 }
2588 block_info.full_available = full_available;
2589 }
2590
2591 sbitmap_vector_free (def_loc);
2592 sbitmap_vector_free (m_kill);
2593 }
2594
2595 /* Compute the local properties of each recorded expression.
2596
2597 Local properties are those that are defined by the block, irrespective of
2598 other blocks.
2599
2600 An expression is transparent in a block if its operands are not modified
2601 in the block.
2602
2603 An expression is computed (locally available) in a block if it is computed
2604 at least once and expression would contain the same value if the
2605 computation was moved to the end of the block.
2606
2607 An expression is locally anticipatable in a block if it is computed at
2608 least once and expression would contain the same value if the computation
2609 was moved to the beginning of the block. */
2610 void
2611 pre_vsetvl::compute_lcm_local_properties ()
2612 {
2613 m_exprs.truncate (0);
2614 for (const bb_info *bb : crtl->ssa->bbs ())
2615 {
2616 vsetvl_block_info &block_info = get_block_info (bb);
2617 if (block_info.empty_p ())
2618 continue;
2619 vsetvl_info &header_info = block_info.get_entry_info ();
2620 vsetvl_info &footer_info = block_info.get_exit_info ();
2621 gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
2622 add_expr (m_exprs, header_info);
2623 add_expr (m_exprs, footer_info);
2624 }
2625
2626 int num_exprs = m_exprs.length ();
2627 if (m_avloc)
2628 sbitmap_vector_free (m_avloc);
2629 if (m_kill)
2630 sbitmap_vector_free (m_kill);
2631 if (m_antloc)
2632 sbitmap_vector_free (m_antloc);
2633 if (m_transp)
2634 sbitmap_vector_free (m_transp);
2635 if (m_avin)
2636 sbitmap_vector_free (m_avin);
2637 if (m_avout)
2638 sbitmap_vector_free (m_avout);
2639
2640 m_avloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2641 m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2642 m_antloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2643 m_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2644 m_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2645 m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2646
2647 bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun));
2648 bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun));
2649 bitmap_vector_clear (m_transp, last_basic_block_for_fn (cfun));
2650
2651 /* - If T is locally available at the end of a block, then T' must be
2652 available at the end of the same block. Since some optimization has
2653 occurred earlier, T' might not be locally available, however, it must
2654 have been previously computed on all paths. As a formula, T at AVLOC(B)
2655 implies that T' at AVOUT(B).
2656 An "available occurrence" is one that is the last occurrence in the
2657 basic block and the operands are not modified by following statements in
2658 the basic block [including this insn].
2659
2660 - If T is locally anticipated at the beginning of a block, then either
2661 T', is locally anticipated or it is already available from previous
2662 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
2663 ANTLOC(B) at AVIN(B).
2664 An "anticipatable occurrence" is one that is the first occurrence in the
2665 basic block, the operands are not modified in the basic block prior
2666 to the occurrence and the output is not used between the start of
2667 the block and the occurrence. */
2668 for (const bb_info *bb : crtl->ssa->bbs ())
2669 {
2670 unsigned bb_index = bb->index ();
2671 vsetvl_block_info &block_info = get_block_info (bb);
2672
2673 /* Compute m_transp */
2674 if (block_info.empty_p ())
2675 {
2676 bitmap_ones (m_transp[bb_index]);
2677 for (int i = 0; i < num_exprs; i += 1)
2678 {
2679 const vsetvl_info &info = *m_exprs[i];
2680 if (!info.has_nonvlmax_reg_avl () && !info.has_vl ())
2681 continue;
2682
2683 if (info.has_nonvlmax_reg_avl ())
2684 {
2685 unsigned int regno;
2686 sbitmap_iterator sbi;
2687 EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0,
2688 regno, sbi)
2689 {
2690 if (regno == REGNO (info.get_avl ()))
2691 bitmap_clear_bit (m_transp[bb->index ()], i);
2692 }
2693 }
2694
2695 for (insn_info *insn : bb->real_nondebug_insns ())
2696 {
2697 if (info.has_nonvlmax_reg_avl ()
2698 && find_access (insn->defs (), REGNO (info.get_avl ())))
2699 {
2700 bitmap_clear_bit (m_transp[bb_index], i);
2701 break;
2702 }
2703
2704 if (info.has_vl ()
2705 && reg_mentioned_p (info.get_vl (), insn->rtl ()))
2706 {
2707 if (find_access (insn->defs (), REGNO (info.get_vl ())))
2708 /* We can't fuse vsetvl into the blocks that modify the
2709 VL operand since successors of such blocks will need
2710 the value of those blocks are defining.
2711
2712 bb 4: def a5
2713 / \
2714 bb 5:use a5 bb 6:vsetvl a5, 5
2715
2716 The example above shows that we can't fuse vsetvl
2717 from bb 6 into bb 4 since the successor bb 5 is using
2718 the value defined in bb 4. */
2719 ;
2720 else
2721 {
2722 /* We can't fuse vsetvl into the blocks that use the
2723 VL operand which has different value from the
2724 vsetvl info.
2725
2726 bb 4: def a5
2727 |
2728 bb 5: use a5
2729 |
2730 bb 6: def a5
2731 |
2732 bb 7: use a5
2733
2734 The example above shows that we can't fuse vsetvl
2735 from bb 6 into bb 5 since their value is different.
2736 */
2737 resource_info resource
2738 = full_register (REGNO (info.get_vl ()));
2739 def_lookup dl = crtl->ssa->find_def (resource, insn);
2740 def_info *def
2741 = dl.matching_set_or_last_def_of_prev_group ();
2742 insn_info *def_insn = extract_single_source (def);
2743 if (def_insn && vsetvl_insn_p (def_insn->rtl ()))
2744 {
2745 vsetvl_info def_info = vsetvl_info (def_insn);
2746 if (m_dem.compatible_p (def_info, info))
2747 continue;
2748 }
2749 }
2750
2751 bitmap_clear_bit (m_transp[bb_index], i);
2752 break;
2753 }
2754 }
2755 }
2756
2757 continue;
2758 }
2759
2760 vsetvl_info &header_info = block_info.get_entry_info ();
2761 vsetvl_info &footer_info = block_info.get_exit_info ();
2762
2763 if (header_info.valid_p () && anticipated_exp_p (header_info))
2764 bitmap_set_bit (m_antloc[bb_index],
2765 get_expr_index (m_exprs, header_info));
2766
2767 if (footer_info.valid_p ())
2768 for (int i = 0; i < num_exprs; i += 1)
2769 {
2770 const vsetvl_info &info = *m_exprs[i];
2771 if (!info.valid_p ())
2772 continue;
2773 if (available_exp_p (footer_info, info))
2774 bitmap_set_bit (m_avloc[bb_index], i);
2775 }
2776 }
2777
2778 for (const bb_info *bb : crtl->ssa->bbs ())
2779 {
2780 unsigned bb_index = bb->index ();
2781 bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]);
2782 bitmap_not (m_kill[bb_index], m_kill[bb_index]);
2783 }
2784
2785 for (const bb_info *bb : crtl->ssa->bbs ())
2786 {
2787 unsigned bb_index = bb->index ();
2788 edge e;
2789 edge_iterator ei;
2790 FOR_EACH_EDGE (e, ei, bb->cfg_bb ()->preds)
2791 if (e->flags & EDGE_COMPLEX)
2792 {
2793 bitmap_clear (m_antloc[bb_index]);
2794 bitmap_clear (m_transp[bb_index]);
2795 }
2796 }
2797 }
2798
2799 void
2800 pre_vsetvl::fuse_local_vsetvl_info ()
2801 {
2802 m_reg_def_loc
2803 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), GP_REG_LAST + 1);
2804 bitmap_vector_clear (m_reg_def_loc, last_basic_block_for_fn (cfun));
2805 bitmap_ones (m_reg_def_loc[ENTRY_BLOCK_PTR_FOR_FN (cfun)->index]);
2806
2807 for (bb_info *bb : crtl->ssa->bbs ())
2808 {
2809 auto &block_info = get_block_info (bb);
2810 block_info.bb = bb;
2811 if (dump_file && (dump_flags & TDF_DETAILS))
2812 {
2813 fprintf (dump_file, " Try fuse basic block %d\n", bb->index ());
2814 }
2815 auto_vec<vsetvl_info> infos;
2816 for (insn_info *insn : bb->real_nondebug_insns ())
2817 {
2818 vsetvl_info curr_info = vsetvl_info (insn);
2819 if (curr_info.valid_p () || curr_info.unknown_p ())
2820 infos.safe_push (curr_info);
2821
2822 /* Collecting GP registers modified by the current bb. */
2823 if (insn->is_real ())
2824 for (def_info *def : insn->defs ())
2825 if (def->is_reg () && GP_REG_P (def->regno ()))
2826 bitmap_set_bit (m_reg_def_loc[bb->index ()], def->regno ());
2827 }
2828
2829 vsetvl_info prev_info = vsetvl_info ();
2830 prev_info.set_empty ();
2831 for (auto &curr_info : infos)
2832 {
2833 if (prev_info.empty_p ())
2834 prev_info = curr_info;
2835 else if ((curr_info.unknown_p () && prev_info.valid_p ())
2836 || (curr_info.valid_p () && prev_info.unknown_p ()))
2837 {
2838 block_info.local_infos.safe_push (prev_info);
2839 prev_info = curr_info;
2840 }
2841 else if (curr_info.valid_p () && prev_info.valid_p ())
2842 {
2843 if (m_dem.available_p (prev_info, curr_info))
2844 {
2845 if (dump_file && (dump_flags & TDF_DETAILS))
2846 {
2847 fprintf (dump_file,
2848 " Ignore curr info since prev info "
2849 "available with it:\n");
2850 fprintf (dump_file, " prev_info: ");
2851 prev_info.dump (dump_file, " ");
2852 fprintf (dump_file, " curr_info: ");
2853 curr_info.dump (dump_file, " ");
2854 fprintf (dump_file, "\n");
2855 }
2856 if (!curr_info.vl_used_by_non_rvv_insn_p ()
2857 && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
2858 m_delete_list.safe_push (curr_info);
2859
2860 if (curr_info.get_read_vl_insn ())
2861 prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
2862 }
2863 else if (m_dem.compatible_p (prev_info, curr_info))
2864 {
2865 if (dump_file && (dump_flags & TDF_DETAILS))
2866 {
2867 fprintf (dump_file, " Fuse curr info since prev info "
2868 "compatible with it:\n");
2869 fprintf (dump_file, " prev_info: ");
2870 prev_info.dump (dump_file, " ");
2871 fprintf (dump_file, " curr_info: ");
2872 curr_info.dump (dump_file, " ");
2873 }
2874 m_dem.merge (prev_info, curr_info);
2875 if (curr_info.get_read_vl_insn ())
2876 prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
2877 if (dump_file && (dump_flags & TDF_DETAILS))
2878 {
2879 fprintf (dump_file, " prev_info after fused: ");
2880 prev_info.dump (dump_file, " ");
2881 fprintf (dump_file, "\n");
2882 }
2883 }
2884 else
2885 {
2886 if (dump_file && (dump_flags & TDF_DETAILS))
2887 {
2888 fprintf (dump_file,
2889 " Cannot fuse uncompatible infos:\n");
2890 fprintf (dump_file, " prev_info: ");
2891 prev_info.dump (dump_file, " ");
2892 fprintf (dump_file, " curr_info: ");
2893 curr_info.dump (dump_file, " ");
2894 }
2895 block_info.local_infos.safe_push (prev_info);
2896 prev_info = curr_info;
2897 }
2898 }
2899 }
2900
2901 if (prev_info.valid_p () || prev_info.unknown_p ())
2902 block_info.local_infos.safe_push (prev_info);
2903 }
2904
2905 m_avl_regs = sbitmap_alloc (GP_REG_LAST + 1);
2906 bitmap_clear (m_avl_regs);
2907 for (const bb_info *bb : crtl->ssa->bbs ())
2908 {
2909 vsetvl_block_info &block_info = get_block_info (bb);
2910 if (block_info.empty_p ())
2911 continue;
2912
2913 vsetvl_info &header_info = block_info.get_entry_info ();
2914 if (header_info.valid_p () && header_info.has_nonvlmax_reg_avl ())
2915 {
2916 gcc_assert (GP_REG_P (REGNO (header_info.get_avl ())));
2917 bitmap_set_bit (m_avl_regs, REGNO (header_info.get_avl ()));
2918 }
2919 }
2920 }
2921
2922
2923 bool
2924 pre_vsetvl::earliest_fuse_vsetvl_info ()
2925 {
2926 compute_avl_def_data ();
2927 compute_vsetvl_def_data ();
2928 compute_lcm_local_properties ();
2929
2930 unsigned num_exprs = m_exprs.length ();
2931 struct edge_list *m_edges = create_edge_list ();
2932 unsigned num_edges = NUM_EDGES (m_edges);
2933 sbitmap *antin
2934 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2935 sbitmap *antout
2936 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2937
2938 sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs);
2939
2940 compute_available (m_avloc, m_kill, m_avout, m_avin);
2941 compute_antinout_edge (m_antloc, m_transp, antin, antout);
2942 compute_earliest (m_edges, num_exprs, antin, antout, m_avout, m_kill,
2943 earliest);
2944
2945 if (dump_file && (dump_flags & TDF_DETAILS))
2946 {
2947 fprintf (dump_file, "\n Compute LCM earliest insert data:\n\n");
2948 fprintf (dump_file, " Expression List (%u):\n", num_exprs);
2949 for (unsigned i = 0; i < num_exprs; i++)
2950 {
2951 const auto &info = *m_exprs[i];
2952 fprintf (dump_file, " Expr[%u]: ", i);
2953 info.dump (dump_file, " ");
2954 }
2955 fprintf (dump_file, "\n bitmap data:\n");
2956 for (const bb_info *bb : crtl->ssa->bbs ())
2957 {
2958 unsigned int i = bb->index ();
2959 fprintf (dump_file, " BB %u:\n", i);
2960 fprintf (dump_file, " avloc: ");
2961 dump_bitmap_file (dump_file, m_avloc[i]);
2962 fprintf (dump_file, " kill: ");
2963 dump_bitmap_file (dump_file, m_kill[i]);
2964 fprintf (dump_file, " antloc: ");
2965 dump_bitmap_file (dump_file, m_antloc[i]);
2966 fprintf (dump_file, " transp: ");
2967 dump_bitmap_file (dump_file, m_transp[i]);
2968
2969 fprintf (dump_file, " avin: ");
2970 dump_bitmap_file (dump_file, m_avin[i]);
2971 fprintf (dump_file, " avout: ");
2972 dump_bitmap_file (dump_file, m_avout[i]);
2973 fprintf (dump_file, " antin: ");
2974 dump_bitmap_file (dump_file, antin[i]);
2975 fprintf (dump_file, " antout: ");
2976 dump_bitmap_file (dump_file, antout[i]);
2977 }
2978 fprintf (dump_file, "\n");
2979 fprintf (dump_file, " earliest:\n");
2980 for (unsigned ed = 0; ed < num_edges; ed++)
2981 {
2982 edge eg = INDEX_EDGE (m_edges, ed);
2983
2984 if (bitmap_empty_p (earliest[ed]))
2985 continue;
2986 fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
2987 eg->dest->index);
2988 dump_bitmap_file (dump_file, earliest[ed]);
2989 }
2990 fprintf (dump_file, "\n");
2991 }
2992
2993 if (dump_file && (dump_flags & TDF_DETAILS))
2994 {
2995 fprintf (dump_file, " Fused global info result:\n");
2996 }
2997
2998 bool changed = false;
2999 for (unsigned ed = 0; ed < num_edges; ed++)
3000 {
3001 sbitmap e = earliest[ed];
3002 if (bitmap_empty_p (e))
3003 continue;
3004
3005 unsigned int expr_index;
3006 sbitmap_iterator sbi;
3007 EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi)
3008 {
3009 vsetvl_info &curr_info = *m_exprs[expr_index];
3010 if (!curr_info.valid_p ())
3011 continue;
3012
3013 edge eg = INDEX_EDGE (m_edges, ed);
3014 if (eg->probability == profile_probability::never ())
3015 continue;
3016 if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
3017 || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
3018 continue;
3019
3020 /* When multiple set bits in earliest edge, such edge may
3021 have infinite loop in preds or succs or multiple conflict
3022 vsetvl expression which make such edge is unrelated. We
3023 don't perform fusion for such situation. */
3024 if (bitmap_count_bits (e) != 1)
3025 continue;
3026
3027 vsetvl_block_info &src_block_info = get_block_info (eg->src);
3028 vsetvl_block_info &dest_block_info = get_block_info (eg->dest);
3029
3030 if (src_block_info.probability
3031 == profile_probability::uninitialized ())
3032 continue;
3033
3034 if (src_block_info.empty_p ())
3035 {
3036 vsetvl_info new_curr_info = curr_info;
3037 new_curr_info.set_bb (crtl->ssa->bb (eg->dest));
3038 bool has_compatible_p = false;
3039 unsigned int def_expr_index;
3040 sbitmap_iterator sbi2;
3041 EXECUTE_IF_SET_IN_BITMAP (
3042 m_vsetvl_def_in[new_curr_info.get_bb ()->index ()], 0,
3043 def_expr_index, sbi2)
3044 {
3045 vsetvl_info &prev_info = *m_vsetvl_def_exprs[def_expr_index];
3046 if (!prev_info.valid_p ())
3047 continue;
3048 if (m_dem.compatible_p (prev_info, new_curr_info))
3049 {
3050 has_compatible_p = true;
3051 break;
3052 }
3053 }
3054 if (!has_compatible_p)
3055 {
3056 if (dump_file && (dump_flags & TDF_DETAILS))
3057 {
3058 fprintf (dump_file,
3059 " Forbidden lift up vsetvl info into bb %u "
3060 "since there is no vsetvl info that reaching in "
3061 "is compatible with it:",
3062 eg->src->index);
3063 curr_info.dump (dump_file, " ");
3064 }
3065 continue;
3066 }
3067
3068 if (dump_file && (dump_flags & TDF_DETAILS))
3069 {
3070 fprintf (dump_file,
3071 " Set empty bb %u to info:", eg->src->index);
3072 curr_info.dump (dump_file, " ");
3073 }
3074 src_block_info.set_info (curr_info);
3075 src_block_info.probability = dest_block_info.probability;
3076 changed = true;
3077 }
3078 else if (src_block_info.has_info ())
3079 {
3080 vsetvl_info &prev_info = src_block_info.get_exit_info ();
3081 gcc_assert (prev_info.valid_p ());
3082
3083 if (m_dem.compatible_p (prev_info, curr_info))
3084 {
3085 if (dump_file && (dump_flags & TDF_DETAILS))
3086 {
3087 fprintf (dump_file, " Fuse curr info since prev info "
3088 "compatible with it:\n");
3089 fprintf (dump_file, " prev_info: ");
3090 prev_info.dump (dump_file, " ");
3091 fprintf (dump_file, " curr_info: ");
3092 curr_info.dump (dump_file, " ");
3093 }
3094 m_dem.merge (prev_info, curr_info);
3095 if (dump_file && (dump_flags & TDF_DETAILS))
3096 {
3097 fprintf (dump_file, " prev_info after fused: ");
3098 prev_info.dump (dump_file, " ");
3099 fprintf (dump_file, "\n");
3100 }
3101 changed = true;
3102 if (src_block_info.has_info ())
3103 src_block_info.probability += dest_block_info.probability;
3104 }
3105 else if (src_block_info.has_info ()
3106 && !m_dem.compatible_p (prev_info, curr_info))
3107 {
3108 /* Cancel lift up if probabilities are equal. */
3109 if (successors_probability_equal_p (eg->src))
3110 {
3111 if (dump_file && (dump_flags & TDF_DETAILS))
3112 {
3113 fprintf (dump_file,
3114 " Change empty bb %u to from:",
3115 eg->src->index);
3116 prev_info.dump (dump_file, " ");
3117 fprintf (dump_file,
3118 " to (higher probability):");
3119 curr_info.dump (dump_file, " ");
3120 }
3121 src_block_info.set_empty_info ();
3122 src_block_info.probability
3123 = profile_probability::uninitialized ();
3124 changed = true;
3125 }
3126 /* Choose the one with higher probability. */
3127 else if (dest_block_info.probability
3128 > src_block_info.probability)
3129 {
3130 if (dump_file && (dump_flags & TDF_DETAILS))
3131 {
3132 fprintf (dump_file,
3133 " Change empty bb %u to from:",
3134 eg->src->index);
3135 prev_info.dump (dump_file, " ");
3136 fprintf (dump_file,
3137 " to (higher probability):");
3138 curr_info.dump (dump_file, " ");
3139 }
3140 src_block_info.set_info (curr_info);
3141 src_block_info.probability = dest_block_info.probability;
3142 changed = true;
3143 }
3144 }
3145 }
3146 else
3147 {
3148 vsetvl_info &prev_info = src_block_info.get_exit_info ();
3149 if (!prev_info.valid_p ()
3150 || m_dem.available_p (prev_info, curr_info))
3151 continue;
3152
3153 if (m_dem.compatible_p (prev_info, curr_info))
3154 {
3155 if (dump_file && (dump_flags & TDF_DETAILS))
3156 {
3157 fprintf (dump_file, " Fuse curr info since prev info "
3158 "compatible with it:\n");
3159 fprintf (dump_file, " prev_info: ");
3160 prev_info.dump (dump_file, " ");
3161 fprintf (dump_file, " curr_info: ");
3162 curr_info.dump (dump_file, " ");
3163 }
3164 m_dem.merge (prev_info, curr_info);
3165 if (dump_file && (dump_flags & TDF_DETAILS))
3166 {
3167 fprintf (dump_file, " prev_info after fused: ");
3168 prev_info.dump (dump_file, " ");
3169 fprintf (dump_file, "\n");
3170 }
3171 changed = true;
3172 }
3173 }
3174 }
3175 }
3176
3177 if (dump_file && (dump_flags & TDF_DETAILS))
3178 {
3179 fprintf (dump_file, "\n");
3180 }
3181
3182 sbitmap_vector_free (antin);
3183 sbitmap_vector_free (antout);
3184 sbitmap_vector_free (earliest);
3185 free_edge_list (m_edges);
3186
3187 return changed;
3188 }
3189
3190 void
3191 pre_vsetvl::pre_global_vsetvl_info ()
3192 {
3193 compute_avl_def_data ();
3194 compute_vsetvl_def_data ();
3195 compute_lcm_local_properties ();
3196
3197 unsigned num_exprs = m_exprs.length ();
3198 m_edges = pre_edge_lcm_avs (num_exprs, m_transp, m_avloc, m_antloc, m_kill,
3199 m_avin, m_avout, &m_insert, &m_del);
3200 unsigned num_edges = NUM_EDGES (m_edges);
3201
3202 if (dump_file && (dump_flags & TDF_DETAILS))
3203 {
3204 fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n");
3205 fprintf (dump_file, " Expression List (%u):\n", num_exprs);
3206 for (unsigned i = 0; i < num_exprs; i++)
3207 {
3208 const auto &info = *m_exprs[i];
3209 fprintf (dump_file, " Expr[%u]: ", i);
3210 info.dump (dump_file, " ");
3211 }
3212 fprintf (dump_file, "\n bitmap data:\n");
3213 for (const bb_info *bb : crtl->ssa->bbs ())
3214 {
3215 unsigned i = bb->index ();
3216 fprintf (dump_file, " BB %u:\n", i);
3217 fprintf (dump_file, " avloc: ");
3218 dump_bitmap_file (dump_file, m_avloc[i]);
3219 fprintf (dump_file, " kill: ");
3220 dump_bitmap_file (dump_file, m_kill[i]);
3221 fprintf (dump_file, " antloc: ");
3222 dump_bitmap_file (dump_file, m_antloc[i]);
3223 fprintf (dump_file, " transp: ");
3224 dump_bitmap_file (dump_file, m_transp[i]);
3225
3226 fprintf (dump_file, " avin: ");
3227 dump_bitmap_file (dump_file, m_avin[i]);
3228 fprintf (dump_file, " avout: ");
3229 dump_bitmap_file (dump_file, m_avout[i]);
3230 fprintf (dump_file, " del: ");
3231 dump_bitmap_file (dump_file, m_del[i]);
3232 }
3233 fprintf (dump_file, "\n");
3234 fprintf (dump_file, " insert:\n");
3235 for (unsigned ed = 0; ed < num_edges; ed++)
3236 {
3237 edge eg = INDEX_EDGE (m_edges, ed);
3238
3239 if (bitmap_empty_p (m_insert[ed]))
3240 continue;
3241 fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
3242 eg->dest->index);
3243 dump_bitmap_file (dump_file, m_insert[ed]);
3244 }
3245 }
3246
3247 /* Remove vsetvl infos as LCM suggest */
3248 for (const bb_info *bb : crtl->ssa->bbs ())
3249 {
3250 sbitmap d = m_del[bb->index ()];
3251 if (bitmap_count_bits (d) == 0)
3252 continue;
3253 gcc_assert (bitmap_count_bits (d) == 1);
3254 unsigned expr_index = bitmap_first_set_bit (d);
3255 vsetvl_info &info = *m_exprs[expr_index];
3256 gcc_assert (info.valid_p ());
3257 gcc_assert (info.get_bb () == bb);
3258 const vsetvl_block_info &block_info = get_block_info (info.get_bb ());
3259 gcc_assert (block_info.get_entry_info () == info);
3260 info.set_delete ();
3261 }
3262
3263 /* Remove vsetvl infos if all precessors are available to the block. */
3264 for (const bb_info *bb : crtl->ssa->bbs ())
3265 {
3266 vsetvl_block_info &block_info = get_block_info (bb);
3267 if (block_info.empty_p () || !block_info.full_available)
3268 continue;
3269
3270 vsetvl_info &info = block_info.get_entry_info ();
3271 info.set_delete ();
3272 }
3273
3274 for (const bb_info *bb : crtl->ssa->bbs ())
3275 {
3276 vsetvl_block_info &block_info = get_block_info (bb);
3277 if (block_info.empty_p ())
3278 continue;
3279 vsetvl_info &curr_info = block_info.get_entry_info ();
3280 if (curr_info.delete_p ())
3281 {
3282 if (block_info.local_infos.is_empty ())
3283 continue;
3284 curr_info = block_info.local_infos[0];
3285 }
3286 if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
3287 && preds_all_same_avl_and_ratio_p (curr_info))
3288 curr_info.set_change_vtype_only ();
3289
3290 vsetvl_info prev_info = vsetvl_info ();
3291 prev_info.set_empty ();
3292 for (auto &curr_info : block_info.local_infos)
3293 {
3294 if (prev_info.valid_p () && curr_info.valid_p ()
3295 && m_dem.avl_available_p (prev_info, curr_info)
3296 && prev_info.get_ratio () == curr_info.get_ratio ())
3297 curr_info.set_change_vtype_only ();
3298 prev_info = curr_info;
3299 }
3300 }
3301 }
3302
3303 void
3304 pre_vsetvl::emit_vsetvl ()
3305 {
3306 bool need_commit = false;
3307
3308 for (const bb_info *bb : crtl->ssa->bbs ())
3309 {
3310 for (const auto &curr_info : get_block_info (bb).local_infos)
3311 {
3312 insn_info *insn = curr_info.get_insn ();
3313 if (curr_info.delete_p ())
3314 {
3315 if (vsetvl_insn_p (insn->rtl ()))
3316 remove_vsetvl_insn (curr_info);
3317 continue;
3318 }
3319 else if (curr_info.valid_p ())
3320 {
3321 if (vsetvl_insn_p (insn->rtl ()))
3322 {
3323 const vsetvl_info temp = vsetvl_info (insn);
3324 if (!(curr_info == temp))
3325 {
3326 if (dump_file)
3327 {
3328 fprintf (dump_file, "\n Change vsetvl info from: ");
3329 temp.dump (dump_file, " ");
3330 fprintf (dump_file, " to: ");
3331 curr_info.dump (dump_file, " ");
3332 }
3333 change_vsetvl_insn (curr_info);
3334 }
3335 }
3336 else
3337 {
3338 if (dump_file)
3339 {
3340 fprintf (dump_file,
3341 "\n Insert vsetvl info before insn %d: ",
3342 insn->uid ());
3343 curr_info.dump (dump_file, " ");
3344 }
3345 insert_vsetvl_insn (EMIT_BEFORE, curr_info);
3346 }
3347 }
3348 }
3349 }
3350
3351 for (const vsetvl_info &item : m_delete_list)
3352 {
3353 gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ()));
3354 remove_vsetvl_insn (item);
3355 }
3356
3357 /* Insert vsetvl info that was not deleted after lift up. */
3358 for (const bb_info *bb : crtl->ssa->bbs ())
3359 {
3360 const vsetvl_block_info &block_info = get_block_info (bb);
3361 if (!block_info.has_info ())
3362 continue;
3363
3364 const vsetvl_info &footer_info = block_info.get_exit_info ();
3365
3366 if (footer_info.delete_p ())
3367 continue;
3368
3369 edge eg;
3370 edge_iterator eg_iterator;
3371 FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs)
3372 {
3373 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3374 if (dump_file)
3375 {
3376 fprintf (
3377 dump_file,
3378 "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ",
3379 eg->src->index, eg->dest->index);
3380 footer_info.dump (dump_file, " ");
3381 }
3382 start_sequence ();
3383 insert_vsetvl_insn (EMIT_DIRECT, footer_info);
3384 rtx_insn *rinsn = get_insns ();
3385 end_sequence ();
3386 default_rtl_profile ();
3387 insert_insn_on_edge (rinsn, eg);
3388 need_commit = true;
3389 }
3390 }
3391
3392 /* m_insert vsetvl as LCM suggest. */
3393 for (int ed = 0; ed < NUM_EDGES (m_edges); ed++)
3394 {
3395 edge eg = INDEX_EDGE (m_edges, ed);
3396 sbitmap i = m_insert[ed];
3397 if (bitmap_count_bits (i) < 1)
3398 continue;
3399
3400 if (bitmap_count_bits (i) > 1)
3401 /* For code with infinite loop (e.g. pr61634.c), The data flow is
3402 completely wrong. */
3403 continue;
3404
3405 gcc_assert (bitmap_count_bits (i) == 1);
3406 unsigned expr_index = bitmap_first_set_bit (i);
3407 const vsetvl_info &info = *m_exprs[expr_index];
3408 gcc_assert (info.valid_p ());
3409 if (dump_file)
3410 {
3411 fprintf (dump_file,
3412 "\n Insert vsetvl info at edge(bb %u -> bb %u): ",
3413 eg->src->index, eg->dest->index);
3414 info.dump (dump_file, " ");
3415 }
3416 rtl_profile_for_edge (eg);
3417 start_sequence ();
3418
3419 insert_vsetvl_insn (EMIT_DIRECT, info);
3420 rtx_insn *rinsn = get_insns ();
3421 end_sequence ();
3422 default_rtl_profile ();
3423
3424 /* We should not get an abnormal edge here. */
3425 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3426 need_commit = true;
3427 insert_insn_on_edge (rinsn, eg);
3428 }
3429
3430 if (need_commit)
3431 commit_edge_insertions ();
3432 }
3433
3434 void
3435 pre_vsetvl::cleaup ()
3436 {
3437 remove_avl_operand ();
3438 remove_unused_dest_operand ();
3439 }
3440
3441 void
3442 pre_vsetvl::remove_avl_operand ()
3443 {
3444 basic_block cfg_bb;
3445 rtx_insn *rinsn;
3446 FOR_ALL_BB_FN (cfg_bb, cfun)
3447 FOR_BB_INSNS (cfg_bb, rinsn)
3448 if (NONDEBUG_INSN_P (rinsn) && has_vl_op (rinsn)
3449 && REG_P (get_vl (rinsn)))
3450 {
3451 rtx avl = get_vl (rinsn);
3452 if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
3453 {
3454 rtx new_pat;
3455 if (fault_first_load_p (rinsn))
3456 new_pat
3457 = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
3458 else
3459 {
3460 rtx set = single_set (rinsn);
3461 rtx src
3462 = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
3463 new_pat = gen_rtx_SET (SET_DEST (set), src);
3464 }
3465 if (dump_file)
3466 {
3467 fprintf (dump_file, " Cleanup insn %u's avl operand:\n",
3468 INSN_UID (rinsn));
3469 print_rtl_single (dump_file, rinsn);
3470 }
3471 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
3472 }
3473 }
3474 }
3475
3476 void
3477 pre_vsetvl::remove_unused_dest_operand ()
3478 {
3479 df_analyze ();
3480 basic_block cfg_bb;
3481 rtx_insn *rinsn;
3482 FOR_ALL_BB_FN (cfg_bb, cfun)
3483 FOR_BB_INSNS (cfg_bb, rinsn)
3484 if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn))
3485 {
3486 rtx vl = get_vl (rinsn);
3487 vsetvl_info info = vsetvl_info (rinsn);
3488 if (has_no_uses (cfg_bb, rinsn, REGNO (vl)))
3489 if (!info.has_vlmax_avl ())
3490 {
3491 rtx new_pat = info.get_vsetvl_pat (true);
3492 if (dump_file)
3493 {
3494 fprintf (dump_file,
3495 " Remove vsetvl insn %u's dest(vl) operand since "
3496 "it unused:\n",
3497 INSN_UID (rinsn));
3498 print_rtl_single (dump_file, rinsn);
3499 }
3500 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat,
3501 false);
3502 }
3503 }
3504 }
3505
3506 const pass_data pass_data_vsetvl = {
3507 RTL_PASS, /* type */
3508 "vsetvl", /* name */
3509 OPTGROUP_NONE, /* optinfo_flags */
3510 TV_NONE, /* tv_id */
3511 0, /* properties_required */
3512 0, /* properties_provided */
3513 0, /* properties_destroyed */
3514 0, /* todo_flags_start */
3515 0, /* todo_flags_finish */
3516 };
3517
3518 class pass_vsetvl : public rtl_opt_pass
3519 {
3520 private:
3521 void simple_vsetvl ();
3522 void lazy_vsetvl ();
3523
3524 public:
3525 pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
3526
3527 /* opt_pass methods: */
3528 virtual bool gate (function *) final override { return TARGET_VECTOR; }
3529 virtual unsigned int execute (function *) final override;
3530 }; // class pass_vsetvl
3531
3532 void
3533 pass_vsetvl::simple_vsetvl ()
3534 {
3535 if (dump_file)
3536 fprintf (dump_file, "\nEntering Simple VSETVL PASS\n");
3537
3538 basic_block cfg_bb;
3539 rtx_insn *rinsn;
3540 FOR_ALL_BB_FN (cfg_bb, cfun)
3541 {
3542 FOR_BB_INSNS (cfg_bb, rinsn)
3543 {
3544 if (!NONDEBUG_INSN_P (rinsn))
3545 continue;
3546 if (has_vtype_op (rinsn))
3547 {
3548 const auto &info = vsetvl_info (rinsn);
3549 rtx pat = info.get_vsetvl_pat ();
3550 emit_insn_before (pat, rinsn);
3551 if (dump_file)
3552 {
3553 fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
3554 INSN_UID (rinsn));
3555 print_rtl_single (dump_file, PREV_INSN (rinsn));
3556 }
3557 }
3558 }
3559 }
3560 }
3561
3562 /* Lazy vsetvl insertion for optimize > 0. */
3563 void
3564 pass_vsetvl::lazy_vsetvl ()
3565 {
3566 if (dump_file)
3567 fprintf (dump_file, "\nEntering Lazy VSETVL PASS\n\n");
3568
3569 pre_vsetvl pre = pre_vsetvl ();
3570
3571 if (dump_file)
3572 fprintf (dump_file, "\nPhase 1: Fuse local vsetvl infos.\n\n");
3573 pre.fuse_local_vsetvl_info ();
3574 if (dump_file && (dump_flags & TDF_DETAILS))
3575 pre.dump (dump_file, "phase 1");
3576
3577 /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */
3578 if (dump_file)
3579 fprintf (dump_file, "\nPhase 2: Lift up vsetvl info.\n\n");
3580 bool changed;
3581 int fused_count = 0;
3582 do
3583 {
3584 if (dump_file)
3585 fprintf (dump_file, " Try lift up %d.\n\n", fused_count);
3586 changed = pre.earliest_fuse_vsetvl_info ();
3587 fused_count += 1;
3588 } while (changed);
3589
3590 if (dump_file && (dump_flags & TDF_DETAILS))
3591 pre.dump (dump_file, "phase 2");
3592
3593 /* Phase 3: Reducing redundant vsetvl infos using LCM. */
3594 if (dump_file)
3595 fprintf (dump_file, "\nPhase 3: Reduce global vsetvl infos.\n\n");
3596 pre.pre_global_vsetvl_info ();
3597 if (dump_file && (dump_flags & TDF_DETAILS))
3598 pre.dump (dump_file, "phase 3");
3599
3600 /* Phase 4: Insert, modify and remove vsetvl insns. */
3601 if (dump_file)
3602 fprintf (dump_file,
3603 "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n");
3604 pre.emit_vsetvl ();
3605
3606 /* Phase 5: Cleaup */
3607 if (dump_file)
3608 fprintf (dump_file, "\nPhase 5: Cleaup\n\n");
3609 pre.cleaup ();
3610
3611 pre.finish ();
3612 }
3613
3614 /* Main entry point for this pass. */
3615 unsigned int
3616 pass_vsetvl::execute (function *)
3617 {
3618 if (n_basic_blocks_for_fn (cfun) <= 0)
3619 return 0;
3620
3621 /* The RVV instruction may change after split which is not a stable
3622 instruction. We need to split it here to avoid potential issue
3623 since the VSETVL PASS is insert before split PASS. */
3624 split_all_insns ();
3625
3626 /* Early return for there is no vector instructions. */
3627 if (!has_vector_insn (cfun))
3628 return 0;
3629
3630 if (!optimize)
3631 simple_vsetvl ();
3632 else
3633 lazy_vsetvl ();
3634
3635 return 0;
3636 }
3637
3638 rtl_opt_pass *
3639 make_pass_vsetvl (gcc::context *ctxt)
3640 {
3641 return new pass_vsetvl (ctxt);
3642 }