1 /* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
2 Copyright (C) 2022-2024 Free Software Foundation, Inc.
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or(at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The values of the vl and vtype registers will affect the behavior of RVV
22 insns. That is, when we need to execute an RVV instruction, we need to set
23 the correct vl and vtype values by executing the vsetvl instruction before.
24 Executing the fewest number of vsetvl instructions while keeping the behavior
25 the same is the problem this pass is trying to solve. This vsetvl pass is
26 divided into 5 phases:
28 - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses
29 each instruction in it that affects vl and vtype state and generates an
30 array of vsetvl_info objects. Then traverse the vsetvl_info array from
31 front to back and perform fusion according to the fusion rules. The fused
32 vsetvl infos are stored in the vsetvl_block_info object's `infos` field.
34 - Phase 2 (earliest fuse global vsetvl infos): The header_info and
35 footer_info of vsetvl_block_info are used as expressions, and the
36 earliest of each expression is computed. Based on the earliest
37 information, try to lift up the corresponding vsetvl info to the src
38 basic block of the edge (mainly to reduce the total number of vsetvl
39 instructions, this uplift will cause some execution paths to execute
40 vsetvl instructions that shouldn't be there).
42 - Phase 3 (pre global vsetvl info): The header_info and footer_info of
43 vsetvl_block_info are used as expressions, and the LCM algorithm is used
44 to compute the header_info that needs to be deleted and the one that
45 needs to be inserted in some edges.
47 - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and
48 the deletion and insertion information of Phase 3, the mandatory vsetvl
49 instruction insertion, modification and deletion are performed.
51 - Phase 5 (cleanup): Clean up the avl operand in the RVV operator
52 instruction and cleanup the unused dest operand of the vsetvl insn.
54 After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual
55 basic block is represented by vsetvl_block_info, and the virtual vsetvl
56 statements inside are represented by vsetvl_info. The later phases 2 and 3
57 are constantly modifying and adjusting this virtual CFG. Phase 4 performs
58 insertion, modification and deletion of vsetvl instructions based on the
59 optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to
63 #define IN_TARGET_CODE 1
64 #define INCLUDE_ALGORITHM
65 #define INCLUDE_FUNCTIONAL
69 #include "coretypes.h"
74 #include "tree-pass.h"
77 #include "cfgcleanup.h"
78 #include "insn-config.h"
79 #include "insn-attr.h"
80 #include "insn-opinit.h"
81 #include "tm-constrs.h"
86 #include "profile-count.h"
89 using namespace rtl_ssa
;
90 using namespace riscv_vector
;
92 /* Set the bitmap DST to the union of SRC of predecessors of
94 It's a bit different from bitmap_union_of_preds in cfganal.cc. This function
95 takes into account the case where pred is ENTRY basic block. The main reason
96 for this difference is to make it easier to insert some special value into
97 the ENTRY base block. For example, vsetvl_info with a status of UNKNOW. */
99 bitmap_union_of_preds_with_entry (sbitmap dst
, sbitmap
*src
, basic_block b
)
101 unsigned int set_size
= dst
->size
;
105 for (ix
= 0; ix
< EDGE_COUNT (b
->preds
); ix
++)
107 e
= EDGE_PRED (b
, ix
);
108 bitmap_copy (dst
, src
[e
->src
->index
]);
112 if (ix
== EDGE_COUNT (b
->preds
))
115 for (ix
++; ix
< EDGE_COUNT (b
->preds
); ix
++)
118 SBITMAP_ELT_TYPE
*p
, *r
;
120 e
= EDGE_PRED (b
, ix
);
121 p
= src
[e
->src
->index
]->elms
;
123 for (i
= 0; i
< set_size
; i
++)
128 /* Compute the reaching defintion in and out based on the gen and KILL
129 informations in each Base Blocks.
130 This function references the compute_avaiable implementation in lcm.cc */
132 compute_reaching_defintion (sbitmap
*gen
, sbitmap
*kill
, sbitmap
*in
,
136 basic_block
*worklist
, *qin
, *qout
, *qend
, bb
;
140 /* Allocate a worklist array/queue. Entries are only added to the
141 list if they were not already on the list. So the size is
142 bounded by the number of basic blocks. */
143 qin
= qout
= worklist
144 = XNEWVEC (basic_block
, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
146 /* Put every block on the worklist; this is necessary because of the
147 optimistic initialization of AVOUT above. Use reverse postorder
148 to make the forward dataflow problem require less iterations. */
149 int *rpo
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
150 int n
= pre_and_rev_post_order_compute_fn (cfun
, NULL
, rpo
, false);
151 for (int i
= 0; i
< n
; ++i
)
153 bb
= BASIC_BLOCK_FOR_FN (cfun
, rpo
[i
]);
160 qend
= &worklist
[n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
];
161 qlen
= n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
;
163 /* Mark blocks which are successors of the entry block so that we
164 can easily identify them below. */
165 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR_FOR_FN (cfun
)->succs
)
166 e
->dest
->aux
= ENTRY_BLOCK_PTR_FOR_FN (cfun
);
168 /* Iterate until the worklist is empty. */
171 /* Take the first entry off the worklist. */
178 /* Do not clear the aux field for blocks which are successors of the
179 ENTRY block. That way we never add then to the worklist again. */
180 if (bb
->aux
!= ENTRY_BLOCK_PTR_FOR_FN (cfun
))
183 bitmap_union_of_preds_with_entry (in
[bb
->index
], out
, bb
);
185 if (bitmap_ior_and_compl (out
[bb
->index
], gen
[bb
->index
], in
[bb
->index
],
187 /* If the out state of this block changed, then we need
188 to add the successors of this block to the worklist
189 if they are not already on the worklist. */
190 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
191 if (!e
->dest
->aux
&& e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
202 clear_aux_for_edges ();
203 clear_aux_for_blocks ();
207 /* Classification of vsetvl instruction. */
211 VSETVL_VTYPE_CHANGE_ONLY
,
212 VSETVL_DISCARD_RESULT
,
218 /* emit_insn directly. */
224 /* dump helper functions */
226 vlmul_to_str (vlmul_type vlmul
)
239 return "INVALID LMUL";
253 policy_to_str (bool agnostic_p
)
255 return agnostic_p
? "agnostic" : "undisturbed";
258 /* Return true if it is an RVV instruction depends on VTYPE global
261 has_vtype_op (rtx_insn
*rinsn
)
263 return recog_memoized (rinsn
) >= 0 && get_attr_has_vtype_op (rinsn
);
266 /* Return true if the instruction ignores VLMUL field of VTYPE. */
268 ignore_vlmul_insn_p (rtx_insn
*rinsn
)
270 return get_attr_type (rinsn
) == TYPE_VIMOVVX
271 || get_attr_type (rinsn
) == TYPE_VFMOVVF
272 || get_attr_type (rinsn
) == TYPE_VIMOVXV
273 || get_attr_type (rinsn
) == TYPE_VFMOVFV
;
276 /* Return true if the instruction is scalar move instruction. */
278 scalar_move_insn_p (rtx_insn
*rinsn
)
280 return get_attr_type (rinsn
) == TYPE_VIMOVXV
281 || get_attr_type (rinsn
) == TYPE_VFMOVFV
;
284 /* Return true if the instruction is fault first load instruction. */
286 fault_first_load_p (rtx_insn
*rinsn
)
288 return recog_memoized (rinsn
) >= 0
289 && (get_attr_type (rinsn
) == TYPE_VLDFF
290 || get_attr_type (rinsn
) == TYPE_VLSEGDFF
);
293 /* Return true if the instruction is read vl instruction. */
295 read_vl_insn_p (rtx_insn
*rinsn
)
297 return recog_memoized (rinsn
) >= 0 && get_attr_type (rinsn
) == TYPE_RDVL
;
300 /* Return true if it is a vsetvl instruction. */
302 vector_config_insn_p (rtx_insn
*rinsn
)
304 return recog_memoized (rinsn
) >= 0 && get_attr_type (rinsn
) == TYPE_VSETVL
;
307 /* Return true if it is vsetvldi or vsetvlsi. */
309 vsetvl_insn_p (rtx_insn
*rinsn
)
311 if (!rinsn
|| !vector_config_insn_p (rinsn
))
313 return (INSN_CODE (rinsn
) == CODE_FOR_vsetvldi
314 || INSN_CODE (rinsn
) == CODE_FOR_vsetvlsi
);
317 /* Return true if it is vsetvl zero, rs1. */
319 vsetvl_discard_result_insn_p (rtx_insn
*rinsn
)
321 if (!vector_config_insn_p (rinsn
))
323 return (INSN_CODE (rinsn
) == CODE_FOR_vsetvl_discard_resultdi
324 || INSN_CODE (rinsn
) == CODE_FOR_vsetvl_discard_resultsi
);
328 real_insn_and_same_bb_p (const insn_info
*insn
, const bb_info
*bb
)
330 return insn
!= nullptr && insn
->is_real () && insn
->bb () == bb
;
333 /* Helper function to get VL operand for VLMAX insn. */
335 get_vl (rtx_insn
*rinsn
)
337 if (has_vl_op (rinsn
))
339 extract_insn_cached (rinsn
);
340 return recog_data
.operand
[get_attr_vl_op_idx (rinsn
)];
342 return SET_DEST (XVECEXP (PATTERN (rinsn
), 0, 0));
345 /* Helper function to get AVL operand. */
347 get_avl (rtx_insn
*rinsn
)
349 if (vsetvl_insn_p (rinsn
) || vsetvl_discard_result_insn_p (rinsn
))
350 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn
), 0, 0)), 0, 0);
352 if (!has_vl_op (rinsn
))
354 if (vlmax_avl_type_p (rinsn
))
356 extract_insn_cached (rinsn
);
357 return recog_data
.operand
[get_attr_vl_op_idx (rinsn
)];
360 /* Get default mask policy. */
364 /* For the instruction that doesn't require MA, we still need a default value
365 to emit vsetvl. We pick up the default value according to prefer policy. */
366 return (bool) (get_prefer_mask_policy () & 0x1
367 || (get_prefer_mask_policy () >> 1 & 0x1));
370 /* Helper function to get MA operand. */
372 mask_agnostic_p (rtx_insn
*rinsn
)
374 /* If it doesn't have MA, we return agnostic by default. */
375 extract_insn_cached (rinsn
);
376 int ma
= get_attr_ma (rinsn
);
377 return ma
== INVALID_ATTRIBUTE
? get_default_ma () : IS_AGNOSTIC (ma
);
380 /* Return true if FN has a vector instruction that use VL/VTYPE. */
382 has_vector_insn (function
*fn
)
386 FOR_ALL_BB_FN (cfg_bb
, fn
)
387 FOR_BB_INSNS (cfg_bb
, rinsn
)
388 if (NONDEBUG_INSN_P (rinsn
) && has_vtype_op (rinsn
))
394 calculate_vlmul (unsigned int sew
, unsigned int ratio
)
396 const vlmul_type ALL_LMUL
[]
397 = {LMUL_1
, LMUL_2
, LMUL_4
, LMUL_8
, LMUL_F8
, LMUL_F4
, LMUL_F2
};
398 for (const vlmul_type vlmul
: ALL_LMUL
)
399 if (calculate_ratio (sew
, vlmul
) == ratio
)
401 return LMUL_RESERVED
;
404 /* Get the currently supported maximum sew used in the int rvv instructions. */
408 if (TARGET_VECTOR_ELEN_64
)
410 else if (TARGET_VECTOR_ELEN_32
)
415 /* Get the currently supported maximum sew used in the float rvv instructions.
420 if (TARGET_VECTOR_ELEN_FP_64
)
422 else if (TARGET_VECTOR_ELEN_FP_32
)
424 else if (TARGET_VECTOR_ELEN_FP_16
)
433 BB_HEAD_SET
= 1 << 2,
435 /* ??? TODO: In RTL_SSA framework, we have REAL_SET,
436 PHI_SET, BB_HEAD_SET, BB_END_SET and
437 CLOBBER_DEF def_info types. Currently,
438 we conservatively do not optimize clobber
439 def since we don't see the case that we
440 need to optimize it. */
445 insn_should_be_added_p (const insn_info
*insn
, unsigned int types
)
447 if (insn
->is_real () && (types
& REAL_SET
))
449 if (insn
->is_phi () && (types
& PHI_SET
))
451 if (insn
->is_bb_head () && (types
& BB_HEAD_SET
))
453 if (insn
->is_bb_end () && (types
& BB_END_SET
))
458 static const hash_set
<use_info
*>
459 get_all_real_uses (insn_info
*insn
, unsigned regno
)
461 gcc_assert (insn
->is_real ());
463 hash_set
<use_info
*> uses
;
464 auto_vec
<phi_info
*> work_list
;
465 hash_set
<phi_info
*> visited_list
;
467 for (def_info
*def
: insn
->defs ())
469 if (!def
->is_reg () || def
->regno () != regno
)
471 set_info
*set
= safe_dyn_cast
<set_info
*> (def
);
474 for (use_info
*use
: set
->nondebug_insn_uses ())
475 if (use
->insn ()->is_real ())
477 for (use_info
*use
: set
->phi_uses ())
478 work_list
.safe_push (use
->phi ());
481 while (!work_list
.is_empty ())
483 phi_info
*phi
= work_list
.pop ();
484 visited_list
.add (phi
);
486 for (use_info
*use
: phi
->nondebug_insn_uses ())
487 if (use
->insn ()->is_real ())
489 for (use_info
*use
: phi
->phi_uses ())
490 if (!visited_list
.contains (use
->phi ()))
491 work_list
.safe_push (use
->phi ());
496 /* Recursively find all define instructions. The kind of instruction is
497 specified by the DEF_TYPE. */
498 static hash_set
<set_info
*>
499 get_all_sets (phi_info
*phi
, unsigned int types
)
501 hash_set
<set_info
*> insns
;
502 auto_vec
<phi_info
*> work_list
;
503 hash_set
<phi_info
*> visited_list
;
505 return hash_set
<set_info
*> ();
506 work_list
.safe_push (phi
);
508 while (!work_list
.is_empty ())
510 phi_info
*phi
= work_list
.pop ();
511 visited_list
.add (phi
);
512 for (use_info
*use
: phi
->inputs ())
514 def_info
*def
= use
->def ();
515 set_info
*set
= safe_dyn_cast
<set_info
*> (def
);
517 return hash_set
<set_info
*> ();
519 gcc_assert (!set
->insn ()->is_debug_insn ());
521 if (insn_should_be_added_p (set
->insn (), types
))
523 if (set
->insn ()->is_phi ())
525 phi_info
*new_phi
= as_a
<phi_info
*> (set
);
526 if (!visited_list
.contains (new_phi
))
527 work_list
.safe_push (new_phi
);
534 static hash_set
<set_info
*>
535 get_all_sets (set_info
*set
, bool /* get_real_inst */ real_p
,
536 bool /*get_phi*/ phi_p
, bool /* get_function_parameter*/ param_p
)
538 if (real_p
&& phi_p
&& param_p
)
539 return get_all_sets (safe_dyn_cast
<phi_info
*> (set
),
540 REAL_SET
| PHI_SET
| BB_HEAD_SET
| BB_END_SET
);
542 else if (real_p
&& param_p
)
543 return get_all_sets (safe_dyn_cast
<phi_info
*> (set
),
544 REAL_SET
| BB_HEAD_SET
| BB_END_SET
);
547 return get_all_sets (safe_dyn_cast
<phi_info
*> (set
), REAL_SET
);
548 return hash_set
<set_info
*> ();
552 source_equal_p (insn_info
*insn1
, insn_info
*insn2
)
554 if (!insn1
|| !insn2
)
556 rtx_insn
*rinsn1
= insn1
->rtl ();
557 rtx_insn
*rinsn2
= insn2
->rtl ();
558 if (!rinsn1
|| !rinsn2
)
561 rtx note1
= find_reg_equal_equiv_note (rinsn1
);
562 rtx note2
= find_reg_equal_equiv_note (rinsn2
);
563 /* We could handle the case of similar-looking REG_EQUALs as well but
564 would need to verify that no insn in between modifies any of the source
566 if (note1
&& note2
&& rtx_equal_p (note1
, note2
)
567 && REG_NOTE_KIND (note1
) == REG_EQUIV
)
573 extract_single_source (set_info
*set
)
577 if (set
->insn ()->is_real ())
579 if (!set
->insn ()->is_phi ())
581 hash_set
<set_info
*> sets
= get_all_sets (set
, true, false, true);
582 if (sets
.is_empty ())
585 insn_info
*first_insn
= (*sets
.begin ())->insn ();
586 if (first_insn
->is_artificial ())
588 for (const set_info
*set
: sets
)
590 /* If there is a head or end insn, we conservative return
591 NULL so that VSETVL PASS will insert vsetvl directly. */
592 if (set
->insn ()->is_artificial ())
594 if (set
!= *sets
.begin () && !source_equal_p (set
->insn (), first_insn
))
602 extract_single_source (def_info
*def
)
606 return extract_single_source (dyn_cast
<set_info
*> (def
));
610 same_equiv_note_p (set_info
*set1
, set_info
*set2
)
612 insn_info
*insn1
= extract_single_source (set1
);
613 insn_info
*insn2
= extract_single_source (set2
);
614 if (!insn1
|| !insn2
)
616 return source_equal_p (insn1
, insn2
);
620 get_expr_id (unsigned bb_index
, unsigned regno
, unsigned num_bbs
)
622 return regno
* num_bbs
+ bb_index
;
625 get_regno (unsigned expr_id
, unsigned num_bb
)
627 return expr_id
/ num_bb
;
630 get_bb_index (unsigned expr_id
, unsigned num_bb
)
632 return expr_id
% num_bb
;
635 /* Return true if the SET result is not used by any instructions. */
637 has_no_uses (basic_block cfg_bb
, rtx_insn
*rinsn
, int regno
)
639 if (bitmap_bit_p (df_get_live_out (cfg_bb
), regno
))
643 for (iter
= NEXT_INSN (rinsn
); iter
&& iter
!= NEXT_INSN (BB_END (cfg_bb
));
644 iter
= NEXT_INSN (iter
))
645 if (df_find_use (iter
, regno_reg_rtx
[regno
]))
651 /* This flags indicates the minimum demand of the vl and vtype values by the
652 RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV
653 instruction only needs the SEW/LMUL ratio to remain the same, and does not
654 require SEW and LMUL to be fixed.
655 Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter
656 instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of
657 the former instruction, then we can make the minimu demand of the former
658 instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are
659 the SEW and LMUL of the latter instruction, and the vsetvl instruction
660 generated according to the new demand can also be used for the latter
661 instruction, so there is no need to insert a separate vsetvl instruction for
662 the latter instruction. */
663 enum demand_flags
: unsigned
666 DEMAND_SEW_P
= 1 << 0,
667 DEMAND_LMUL_P
= 1 << 1,
668 DEMAND_RATIO_P
= 1 << 2,
669 DEMAND_GE_SEW_P
= 1 << 3,
670 DEMAND_TAIL_POLICY_P
= 1 << 4,
671 DEMAND_MASK_POLICY_P
= 1 << 5,
672 DEMAND_AVL_P
= 1 << 6,
673 DEMAND_NON_ZERO_AVL_P
= 1 << 7,
676 /* We split the demand information into three parts. They are sew and lmul
677 related (sew_lmul_demand_type), tail and mask policy related
678 (policy_demand_type) and avl related (avl_demand_type). Then we define three
679 interfaces avaiable_with, compatible_p and merge. avaiable_with is
680 used to determine whether the two vsetvl infos prev_info and next_info are
681 available or not. If prev_info is available for next_info, it means that the
682 RVV insn corresponding to next_info on the path from prev_info to next_info
683 can be used without inserting a separate vsetvl instruction. compatible_p
684 is used to determine whether prev_info is compatible with next_info, and if
685 so, merge can be used to merge the stricter demand information from
686 next_info into prev_info so that prev_info becomes available to next_info.
689 enum class sew_lmul_demand_type
: unsigned
691 sew_lmul
= demand_flags::DEMAND_SEW_P
| demand_flags::DEMAND_LMUL_P
,
692 ratio_only
= demand_flags::DEMAND_RATIO_P
,
693 sew_only
= demand_flags::DEMAND_SEW_P
,
694 ge_sew
= demand_flags::DEMAND_GE_SEW_P
,
696 = demand_flags::DEMAND_RATIO_P
| demand_flags::DEMAND_GE_SEW_P
,
699 enum class policy_demand_type
: unsigned
702 = demand_flags::DEMAND_TAIL_POLICY_P
| demand_flags::DEMAND_MASK_POLICY_P
,
703 tail_policy_only
= demand_flags::DEMAND_TAIL_POLICY_P
,
704 mask_policy_only
= demand_flags::DEMAND_MASK_POLICY_P
,
705 ignore_policy
= demand_flags::DEMAND_EMPTY_P
,
708 enum class avl_demand_type
: unsigned
710 avl
= demand_flags::DEMAND_AVL_P
,
711 non_zero_avl
= demand_flags::DEMAND_NON_ZERO_AVL_P
,
712 ignore_avl
= demand_flags::DEMAND_EMPTY_P
,
730 sew_lmul_demand_type m_sew_lmul_demand
;
731 policy_demand_type m_policy_demand
;
732 avl_demand_type m_avl_demand
;
734 enum class state_type
744 bool m_change_vtype_only
;
745 insn_info
*m_read_vl_insn
;
746 bool m_vl_used_by_non_rvv_insn
;
750 : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX
), m_vl (NULL_RTX
),
751 m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED
),
752 m_ratio (0), m_ta (false), m_ma (false),
753 m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul
),
754 m_policy_demand (policy_demand_type::tail_mask_policy
),
755 m_avl_demand (avl_demand_type::avl
), m_state (state_type::UNINITIALIZED
),
756 m_delete (false), m_change_vtype_only (false), m_read_vl_insn (nullptr),
757 m_vl_used_by_non_rvv_insn (false)
760 vsetvl_info (insn_info
*insn
) : vsetvl_info () { parse_insn (insn
); }
762 vsetvl_info (rtx_insn
*insn
) : vsetvl_info () { parse_insn (insn
); }
764 void set_avl (rtx avl
) { m_avl
= avl
; }
765 void set_vl (rtx vl
) { m_vl
= vl
; }
766 void set_avl_def (set_info
*avl_def
) { m_avl_def
= avl_def
; }
767 void set_sew (uint8_t sew
) { m_sew
= sew
; }
768 void set_vlmul (vlmul_type vlmul
) { m_vlmul
= vlmul
; }
769 void set_ratio (uint8_t ratio
) { m_ratio
= ratio
; }
770 void set_ta (bool ta
) { m_ta
= ta
; }
771 void set_ma (bool ma
) { m_ma
= ma
; }
772 void set_delete () { m_delete
= true; }
773 void set_bb (bb_info
*bb
) { m_bb
= bb
; }
774 void set_max_sew (uint8_t max_sew
) { m_max_sew
= max_sew
; }
775 void set_change_vtype_only () { m_change_vtype_only
= true; }
776 void set_read_vl_insn (insn_info
*insn
) { m_read_vl_insn
= insn
; }
778 rtx
get_avl () const { return m_avl
; }
779 rtx
get_vl () const { return m_vl
; }
780 set_info
*get_avl_def () const { return m_avl_def
; }
781 uint8_t get_sew () const { return m_sew
; }
782 vlmul_type
get_vlmul () const { return m_vlmul
; }
783 uint8_t get_ratio () const { return m_ratio
; }
784 bool get_ta () const { return m_ta
; }
785 bool get_ma () const { return m_ma
; }
786 insn_info
*get_insn () const { return m_insn
; }
787 bool delete_p () const { return m_delete
; }
788 bb_info
*get_bb () const { return m_bb
; }
789 uint8_t get_max_sew () const { return m_max_sew
; }
790 insn_info
*get_read_vl_insn () const { return m_read_vl_insn
; }
791 bool vl_used_by_non_rvv_insn_p () const { return m_vl_used_by_non_rvv_insn
; }
793 bool has_imm_avl () const { return m_avl
&& CONST_INT_P (m_avl
); }
794 bool has_vlmax_avl () const { return vlmax_avl_p (m_avl
); }
795 bool has_nonvlmax_reg_avl () const
797 return m_avl
&& REG_P (m_avl
) && !has_vlmax_avl ();
799 bool has_non_zero_avl () const
802 return INTVAL (m_avl
) > 0;
803 return has_vlmax_avl ();
807 /* The VL operand can only be either a NULL_RTX or a register. */
808 gcc_assert (!m_vl
|| REG_P (m_vl
));
809 return m_vl
!= NULL_RTX
;
811 bool has_same_ratio (const vsetvl_info
&other
) const
813 return get_ratio () == other
.get_ratio ();
816 /* The block of INSN isn't always same as the block of the VSETVL_INFO,
817 meaning we may have 'get_insn ()->bb () != get_bb ()'.
819 E.g. BB 2 (Empty) ---> BB 3 (VALID, has rvv insn 1)
821 BB 2 has empty VSETVL_INFO, wheras BB 3 has VSETVL_INFO that satisfies
822 get_insn ()->bb () == get_bb (). In earliest fusion, we may fuse bb 3 and
823 bb 2 so that the 'get_bb ()' of BB2 VSETVL_INFO will be BB2 wheras the
824 'get_insn ()' of BB2 VSETVL INFO will be the rvv insn 1 (which is located
826 bool insn_inside_bb_p () const { return get_insn ()->bb () == get_bb (); }
827 void update_avl (const vsetvl_info
&other
)
829 m_avl
= other
.get_avl ();
830 m_vl
= other
.get_vl ();
831 m_avl_def
= other
.get_avl_def ();
834 bool uninit_p () const { return m_state
== state_type::UNINITIALIZED
; }
835 bool valid_p () const { return m_state
== state_type::VALID
; }
836 bool unknown_p () const { return m_state
== state_type::UNKNOWN
; }
837 bool empty_p () const { return m_state
== state_type::EMPTY
; }
838 bool change_vtype_only_p () const { return m_change_vtype_only
; }
840 void set_valid () { m_state
= state_type::VALID
; }
841 void set_unknown () { m_state
= state_type::UNKNOWN
; }
842 void set_empty () { m_state
= state_type::EMPTY
; }
844 void set_sew_lmul_demand (sew_lmul_demand_type demand
)
846 m_sew_lmul_demand
= demand
;
848 void set_policy_demand (policy_demand_type demand
)
850 m_policy_demand
= demand
;
852 void set_avl_demand (avl_demand_type demand
) { m_avl_demand
= demand
; }
854 sew_lmul_demand_type
get_sew_lmul_demand () const
856 return m_sew_lmul_demand
;
858 policy_demand_type
get_policy_demand () const { return m_policy_demand
; }
859 avl_demand_type
get_avl_demand () const { return m_avl_demand
; }
861 void normalize_demand (unsigned demand_flags
)
864 & (DEMAND_SEW_P
| DEMAND_LMUL_P
| DEMAND_RATIO_P
| DEMAND_GE_SEW_P
))
866 case (unsigned) sew_lmul_demand_type::sew_lmul
:
867 m_sew_lmul_demand
= sew_lmul_demand_type::sew_lmul
;
869 case (unsigned) sew_lmul_demand_type::ratio_only
:
870 m_sew_lmul_demand
= sew_lmul_demand_type::ratio_only
;
872 case (unsigned) sew_lmul_demand_type::sew_only
:
873 m_sew_lmul_demand
= sew_lmul_demand_type::sew_only
;
875 case (unsigned) sew_lmul_demand_type::ge_sew
:
876 m_sew_lmul_demand
= sew_lmul_demand_type::ge_sew
;
878 case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew
:
879 m_sew_lmul_demand
= sew_lmul_demand_type::ratio_and_ge_sew
;
885 switch (demand_flags
& (DEMAND_TAIL_POLICY_P
| DEMAND_MASK_POLICY_P
))
887 case (unsigned) policy_demand_type::tail_mask_policy
:
888 m_policy_demand
= policy_demand_type::tail_mask_policy
;
890 case (unsigned) policy_demand_type::tail_policy_only
:
891 m_policy_demand
= policy_demand_type::tail_policy_only
;
893 case (unsigned) policy_demand_type::mask_policy_only
:
894 m_policy_demand
= policy_demand_type::mask_policy_only
;
896 case (unsigned) policy_demand_type::ignore_policy
:
897 m_policy_demand
= policy_demand_type::ignore_policy
;
903 switch (demand_flags
& (DEMAND_AVL_P
| DEMAND_NON_ZERO_AVL_P
))
905 case (unsigned) avl_demand_type::avl
:
906 m_avl_demand
= avl_demand_type::avl
;
908 case (unsigned) avl_demand_type::non_zero_avl
:
909 m_avl_demand
= avl_demand_type::non_zero_avl
;
911 case (unsigned) avl_demand_type::ignore_avl
:
912 m_avl_demand
= avl_demand_type::ignore_avl
;
919 void parse_insn (rtx_insn
*rinsn
)
921 if (!NONDEBUG_INSN_P (rinsn
))
923 if (optimize
== 0 && !has_vtype_op (rinsn
))
925 gcc_assert (!vsetvl_discard_result_insn_p (rinsn
));
927 extract_insn_cached (rinsn
);
928 m_avl
= ::get_avl (rinsn
);
929 if (has_vlmax_avl () || vsetvl_insn_p (rinsn
))
930 m_vl
= ::get_vl (rinsn
);
931 m_sew
= ::get_sew (rinsn
);
932 m_vlmul
= ::get_vlmul (rinsn
);
933 m_ta
= tail_agnostic_p (rinsn
);
934 m_ma
= mask_agnostic_p (rinsn
);
937 void parse_insn (insn_info
*insn
)
941 /* Return if it is debug insn for the consistency with optimize == 0. */
942 if (insn
->is_debug_insn ())
945 /* We set it as unknown since we don't what will happen in CALL or ASM. */
946 if (insn
->is_call () || insn
->is_asm ())
952 /* If this is something that updates VL/VTYPE that we don't know about, set
953 the state to unknown. */
954 if (!vector_config_insn_p (insn
->rtl ()) && !has_vtype_op (insn
->rtl ())
955 && (find_access (insn
->defs (), VL_REGNUM
)
956 || find_access (insn
->defs (), VTYPE_REGNUM
)))
962 if (!vector_config_insn_p (insn
->rtl ()) && !has_vtype_op (insn
->rtl ()))
968 m_avl
= ::get_avl (insn
->rtl ());
971 if (vsetvl_insn_p (insn
->rtl ()) || has_vlmax_avl ())
972 m_vl
= ::get_vl (insn
->rtl ());
974 if (has_nonvlmax_reg_avl ())
975 m_avl_def
= find_access (insn
->uses (), REGNO (m_avl
))->def ();
978 m_sew
= ::get_sew (insn
->rtl ());
979 m_vlmul
= ::get_vlmul (insn
->rtl ());
980 m_ratio
= get_attr_ratio (insn
->rtl ());
981 /* when get_attr_ratio is invalid, this kind of instructions
982 doesn't care about ratio. However, we still need this value
983 in demand info backward analysis. */
984 if (m_ratio
== INVALID_ATTRIBUTE
)
985 m_ratio
= calculate_ratio (m_sew
, m_vlmul
);
986 m_ta
= tail_agnostic_p (insn
->rtl ());
987 m_ma
= mask_agnostic_p (insn
->rtl ());
989 /* If merge operand is undef value, we prefer agnostic. */
990 int merge_op_idx
= get_attr_merge_op_idx (insn
->rtl ());
991 if (merge_op_idx
!= INVALID_ATTRIBUTE
992 && satisfies_constraint_vu (recog_data
.operand
[merge_op_idx
]))
998 /* Determine the demand info of the RVV insn. */
999 m_max_sew
= get_max_int_sew ();
1000 unsigned dflags
= 0;
1001 if (vector_config_insn_p (insn
->rtl ()))
1003 dflags
|= demand_flags::DEMAND_AVL_P
;
1004 dflags
|= demand_flags::DEMAND_RATIO_P
;
1008 if (has_vl_op (insn
->rtl ()))
1010 if (scalar_move_insn_p (insn
->rtl ()))
1012 /* If the avl for vmv.s.x comes from the vsetvl instruction, we
1013 don't know if the avl is non-zero, so it is set to
1014 DEMAND_AVL_P for now. it may be corrected to
1015 DEMAND_NON_ZERO_AVL_P later when more information is
1018 if (has_non_zero_avl ())
1019 dflags
|= demand_flags::DEMAND_NON_ZERO_AVL_P
;
1021 dflags
|= demand_flags::DEMAND_AVL_P
;
1024 dflags
|= demand_flags::DEMAND_AVL_P
;
1027 if (get_attr_ratio (insn
->rtl ()) != INVALID_ATTRIBUTE
)
1028 dflags
|= demand_flags::DEMAND_RATIO_P
;
1031 if (scalar_move_insn_p (insn
->rtl ()) && m_ta
)
1033 dflags
|= demand_flags::DEMAND_GE_SEW_P
;
1034 m_max_sew
= get_attr_type (insn
->rtl ()) == TYPE_VFMOVFV
1035 ? get_max_float_sew ()
1036 : get_max_int_sew ();
1039 dflags
|= demand_flags::DEMAND_SEW_P
;
1041 if (!ignore_vlmul_insn_p (insn
->rtl ()))
1042 dflags
|= demand_flags::DEMAND_LMUL_P
;
1046 dflags
|= demand_flags::DEMAND_TAIL_POLICY_P
;
1048 dflags
|= demand_flags::DEMAND_MASK_POLICY_P
;
1051 normalize_demand (dflags
);
1053 /* Optimize AVL from the vsetvl instruction. */
1054 insn_info
*def_insn
= extract_single_source (get_avl_def ());
1055 if (def_insn
&& vsetvl_insn_p (def_insn
->rtl ()))
1057 vsetvl_info def_info
= vsetvl_info (def_insn
);
1058 if ((scalar_move_insn_p (insn
->rtl ())
1059 || def_info
.get_ratio () == get_ratio ())
1060 && (def_info
.has_vlmax_avl () || def_info
.has_imm_avl ()))
1062 update_avl (def_info
);
1063 if (scalar_move_insn_p (insn
->rtl ()) && has_non_zero_avl ())
1064 m_avl_demand
= avl_demand_type::non_zero_avl
;
1068 /* Determine if dest operand(vl) has been used by non-RVV instructions. */
1071 const hash_set
<use_info
*> vl_uses
1072 = get_all_real_uses (get_insn (), REGNO (get_vl ()));
1073 for (use_info
*use
: vl_uses
)
1075 gcc_assert (use
->insn ()->is_real ());
1076 rtx_insn
*rinsn
= use
->insn ()->rtl ();
1077 if (!has_vl_op (rinsn
)
1078 || count_regno_occurrences (rinsn
, REGNO (get_vl ())) != 1)
1080 m_vl_used_by_non_rvv_insn
= true;
1083 rtx avl
= ::get_avl (rinsn
);
1084 if (!avl
|| !REG_P (avl
) || REGNO (get_vl ()) != REGNO (avl
))
1086 m_vl_used_by_non_rvv_insn
= true;
1092 /* Collect the read vl insn for the fault-only-first rvv loads. */
1093 if (fault_first_load_p (insn
->rtl ()))
1095 for (insn_info
*i
= insn
->next_nondebug_insn ();
1096 i
->bb () == insn
->bb (); i
= i
->next_nondebug_insn ())
1098 if (find_access (i
->defs (), VL_REGNUM
))
1100 if (i
->rtl () && read_vl_insn_p (i
->rtl ()))
1109 /* Returns the corresponding vsetvl rtx pat. */
1110 rtx
get_vsetvl_pat (bool ignore_vl
= false) const
1112 rtx avl
= get_avl ();
1113 /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
1114 set the value of avl to (const_int 0) so that VSETVL PASS will
1115 insert vsetvl correctly.*/
1118 rtx sew
= gen_int_mode (get_sew (), Pmode
);
1119 rtx vlmul
= gen_int_mode (get_vlmul (), Pmode
);
1120 rtx ta
= gen_int_mode (get_ta (), Pmode
);
1121 rtx ma
= gen_int_mode (get_ma (), Pmode
);
1123 if (change_vtype_only_p ())
1124 return gen_vsetvl_vtype_change_only (sew
, vlmul
, ta
, ma
);
1125 else if (has_vl () && !ignore_vl
)
1126 return gen_vsetvl (Pmode
, get_vl (), avl
, sew
, vlmul
, ta
, ma
);
1128 return gen_vsetvl_discard_result (Pmode
, avl
, sew
, vlmul
, ta
, ma
);
1131 /* Return true that the non-AVL operands of THIS will be modified
1132 if we fuse the VL modification from OTHER into THIS. */
1133 bool vl_modify_non_avl_op_p (const vsetvl_info
&other
) const
1135 /* We don't need to worry about any operands from THIS be
1136 modified by OTHER vsetvl since we OTHER vsetvl doesn't
1137 modify any operand. */
1138 if (!other
.has_vl ())
1141 /* THIS VL operand always preempt OTHER VL operand. */
1142 if (this->has_vl ())
1145 /* If THIS has non IMM AVL and THIS is AVL compatible with
1146 OTHER, the AVL value of THIS is same as VL value of OTHER. */
1147 if (!this->has_imm_avl ())
1149 return find_access (this->get_insn ()->uses (), REGNO (other
.get_vl ()));
1152 bool operator== (const vsetvl_info
&other
) const
1154 gcc_assert (!uninit_p () && !other
.uninit_p ()
1155 && "Uninitialization should not happen");
1158 return other
.empty_p ();
1160 return other
.unknown_p ();
1162 return get_insn () == other
.get_insn () && get_bb () == other
.get_bb ()
1163 && get_avl () == other
.get_avl () && get_vl () == other
.get_vl ()
1164 && get_avl_def () == other
.get_avl_def ()
1165 && get_sew () == other
.get_sew ()
1166 && get_vlmul () == other
.get_vlmul () && get_ta () == other
.get_ta ()
1167 && get_ma () == other
.get_ma ()
1168 && get_avl_demand () == other
.get_avl_demand ()
1169 && get_sew_lmul_demand () == other
.get_sew_lmul_demand ()
1170 && get_policy_demand () == other
.get_policy_demand ();
1173 void dump (FILE *file
, const char *indent
= "") const
1177 fprintf (file
, "UNINITIALIZED.\n");
1180 else if (unknown_p ())
1182 fprintf (file
, "UNKNOWN.\n");
1185 else if (empty_p ())
1187 fprintf (file
, "EMPTY.\n");
1190 else if (valid_p ())
1191 fprintf (file
, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (),
1192 get_bb ()->index (), delete_p () ? " (deleted)" : "");
1196 fprintf (file
, "%sDemand fields:", indent
);
1197 if (m_sew_lmul_demand
== sew_lmul_demand_type::sew_lmul
)
1198 fprintf (file
, " demand_sew_lmul");
1199 else if (m_sew_lmul_demand
== sew_lmul_demand_type::ratio_only
)
1200 fprintf (file
, " demand_ratio_only");
1201 else if (m_sew_lmul_demand
== sew_lmul_demand_type::sew_only
)
1202 fprintf (file
, " demand_sew_only");
1203 else if (m_sew_lmul_demand
== sew_lmul_demand_type::ge_sew
)
1204 fprintf (file
, " demand_ge_sew");
1205 else if (m_sew_lmul_demand
== sew_lmul_demand_type::ratio_and_ge_sew
)
1206 fprintf (file
, " demand_ratio_and_ge_sew");
1208 if (m_policy_demand
== policy_demand_type::tail_mask_policy
)
1209 fprintf (file
, " demand_tail_mask_policy");
1210 else if (m_policy_demand
== policy_demand_type::tail_policy_only
)
1211 fprintf (file
, " demand_tail_policy_only");
1212 else if (m_policy_demand
== policy_demand_type::mask_policy_only
)
1213 fprintf (file
, " demand_mask_policy_only");
1215 if (m_avl_demand
== avl_demand_type::avl
)
1216 fprintf (file
, " demand_avl");
1217 else if (m_avl_demand
== avl_demand_type::non_zero_avl
)
1218 fprintf (file
, " demand_non_zero_avl");
1219 fprintf (file
, "\n");
1221 fprintf (file
, "%sSEW=%d, ", indent
, get_sew ());
1222 fprintf (file
, "VLMUL=%s, ", vlmul_to_str (get_vlmul ()));
1223 fprintf (file
, "RATIO=%d, ", get_ratio ());
1224 fprintf (file
, "MAX_SEW=%d\n", get_max_sew ());
1226 fprintf (file
, "%sTAIL_POLICY=%s, ", indent
, policy_to_str (get_ta ()));
1227 fprintf (file
, "MASK_POLICY=%s\n", policy_to_str (get_ma ()));
1229 fprintf (file
, "%sAVL=", indent
);
1230 print_rtl_single (file
, get_avl ());
1231 fprintf (file
, "%sVL=", indent
);
1232 print_rtl_single (file
, get_vl ());
1233 if (change_vtype_only_p ())
1234 fprintf (file
, "%schange vtype only\n", indent
);
1235 if (get_read_vl_insn ())
1236 fprintf (file
, "%sread_vl_insn: insn %u\n", indent
,
1237 get_read_vl_insn ()->uid ());
1238 if (vl_used_by_non_rvv_insn_p ())
1239 fprintf (file
, "%suse_by_non_rvv_insn=true\n", indent
);
1243 class vsetvl_block_info
1246 /* The static execute probability of the demand info. */
1247 profile_probability probability
;
1249 auto_vec
<vsetvl_info
> local_infos
;
1250 vsetvl_info global_info
;
1253 bool full_available
;
1255 vsetvl_block_info () : bb (nullptr), full_available (false)
1257 local_infos
.safe_grow_cleared (0);
1258 global_info
.set_empty ();
1260 vsetvl_block_info (const vsetvl_block_info
&other
)
1261 : probability (other
.probability
), local_infos (other
.local_infos
.copy ()),
1262 global_info (other
.global_info
), bb (other
.bb
)
1265 vsetvl_info
&get_entry_info ()
1267 gcc_assert (!empty_p ());
1268 return local_infos
.is_empty () ? global_info
: local_infos
[0];
1270 vsetvl_info
&get_exit_info ()
1272 gcc_assert (!empty_p ());
1273 return local_infos
.is_empty () ? global_info
1274 : local_infos
[local_infos
.length () - 1];
1276 const vsetvl_info
&get_entry_info () const
1278 gcc_assert (!empty_p ());
1279 return local_infos
.is_empty () ? global_info
: local_infos
[0];
1281 const vsetvl_info
&get_exit_info () const
1283 gcc_assert (!empty_p ());
1284 return local_infos
.is_empty () ? global_info
1285 : local_infos
[local_infos
.length () - 1];
1288 bool empty_p () const { return local_infos
.is_empty () && !has_info (); }
1289 bool has_info () const { return !global_info
.empty_p (); }
1290 void set_info (const vsetvl_info
&info
)
1292 gcc_assert (local_infos
.is_empty ());
1294 global_info
.set_bb (bb
);
1296 void set_empty_info () { global_info
.set_empty (); }
1299 /* Demand system is the RVV-based VSETVL info analysis tools wrapper.
1300 It defines compatible rules for SEW/LMUL, POLICY and AVL.
1301 Also, it provides 3 iterfaces avaiable_p, compatible_p and
1302 merge for the VSETVL PASS analysis and optimization.
1304 - avaiable_p: Determine whether the next info can get the
1305 avaiable VSETVL status from previous info.
1306 e.g. bb 2 (demand SEW = 32, LMUL = M2) -> bb 3 (demand RATIO = 16).
1307 Since bb 2 demand info (SEW/LMUL = 32/2 = 16) satisfies the bb 3
1308 demand, the VSETVL instruction in bb 3 can be elided.
1309 avaiable_p (previous, next) is true in such situation.
1310 - compatible_p: Determine whether prev_info is compatible with next_info
1311 so that we can have a new merged info that is avaiable to both of them.
1312 - merge: Merge the stricter demand information from
1313 next_info into prev_info so that prev_info becomes available to
1318 sbitmap
*m_avl_def_in
;
1319 sbitmap
*m_avl_def_out
;
1323 inline bool always_true (const vsetvl_info
&prev ATTRIBUTE_UNUSED
,
1324 const vsetvl_info
&next ATTRIBUTE_UNUSED
)
1328 inline bool always_false (const vsetvl_info
&prev ATTRIBUTE_UNUSED
,
1329 const vsetvl_info
&next ATTRIBUTE_UNUSED
)
1334 /* predictors for sew and lmul */
1336 inline bool lmul_eq_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1338 return prev
.get_vlmul () == next
.get_vlmul ();
1340 inline bool sew_eq_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1342 return prev
.get_sew () == next
.get_sew ();
1344 inline bool sew_lmul_eq_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1346 return lmul_eq_p (prev
, next
) && sew_eq_p (prev
, next
);
1348 inline bool sew_ge_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1350 return prev
.get_sew () == next
.get_sew ()
1351 || (next
.get_ta () && prev
.get_sew () > next
.get_sew ());
1353 inline bool sew_le_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1355 return prev
.get_sew () == next
.get_sew ()
1356 || (prev
.get_ta () && prev
.get_sew () < next
.get_sew ());
1358 inline bool prev_sew_le_next_max_sew_p (const vsetvl_info
&prev
,
1359 const vsetvl_info
&next
)
1361 return prev
.get_sew () <= next
.get_max_sew ();
1363 inline bool next_sew_le_prev_max_sew_p (const vsetvl_info
&prev
,
1364 const vsetvl_info
&next
)
1366 return next
.get_sew () <= prev
.get_max_sew ();
1368 inline bool max_sew_overlap_p (const vsetvl_info
&prev
,
1369 const vsetvl_info
&next
)
1371 return !(prev
.get_sew () > next
.get_max_sew ()
1372 || next
.get_sew () > prev
.get_max_sew ());
1374 inline bool ratio_eq_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1376 return prev
.has_same_ratio (next
);
1378 inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info
&prev
,
1379 const vsetvl_info
&next
)
1381 return prev
.get_ratio () >= (next
.get_sew () / 8);
1383 inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info
&prev
,
1384 const vsetvl_info
&next
)
1386 return next
.get_ratio () >= (prev
.get_sew () / 8);
1389 inline bool sew_ge_and_ratio_eq_p (const vsetvl_info
&prev
,
1390 const vsetvl_info
&next
)
1392 return sew_ge_p (prev
, next
) && ratio_eq_p (prev
, next
);
1394 inline bool sew_ge_and_prev_sew_le_next_max_sew_p (const vsetvl_info
&prev
,
1395 const vsetvl_info
&next
)
1397 return sew_ge_p (prev
, next
) && prev_sew_le_next_max_sew_p (prev
, next
);
1400 sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p (
1401 const vsetvl_info
&prev
, const vsetvl_info
&next
)
1403 return sew_ge_p (prev
, next
) && prev_sew_le_next_max_sew_p (prev
, next
)
1404 && next_ratio_valid_for_prev_sew_p (prev
, next
);
1406 inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info
&prev
,
1407 const vsetvl_info
&next
)
1409 return sew_le_p (prev
, next
) && next_sew_le_prev_max_sew_p (prev
, next
);
1412 max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info
&prev
,
1413 const vsetvl_info
&next
)
1415 return next_ratio_valid_for_prev_sew_p (prev
, next
)
1416 && max_sew_overlap_p (prev
, next
);
1419 sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info
&prev
,
1420 const vsetvl_info
&next
)
1422 return sew_le_p (prev
, next
) && ratio_eq_p (prev
, next
)
1423 && next_sew_le_prev_max_sew_p (prev
, next
);
1426 max_sew_overlap_and_prev_ratio_valid_for_next_sew_p (const vsetvl_info
&prev
,
1427 const vsetvl_info
&next
)
1429 return prev_ratio_valid_for_next_sew_p (prev
, next
)
1430 && max_sew_overlap_p (prev
, next
);
1433 sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p (
1434 const vsetvl_info
&prev
, const vsetvl_info
&next
)
1436 return sew_le_p (prev
, next
) && prev_ratio_valid_for_next_sew_p (prev
, next
)
1437 && next_sew_le_prev_max_sew_p (prev
, next
);
1439 inline bool max_sew_overlap_and_ratio_eq_p (const vsetvl_info
&prev
,
1440 const vsetvl_info
&next
)
1442 return ratio_eq_p (prev
, next
) && max_sew_overlap_p (prev
, next
);
1445 /* predictors for tail and mask policy */
1447 inline bool tail_policy_eq_p (const vsetvl_info
&prev
,
1448 const vsetvl_info
&next
)
1450 return prev
.get_ta () == next
.get_ta ();
1452 inline bool mask_policy_eq_p (const vsetvl_info
&prev
,
1453 const vsetvl_info
&next
)
1455 return prev
.get_ma () == next
.get_ma ();
1457 inline bool tail_mask_policy_eq_p (const vsetvl_info
&prev
,
1458 const vsetvl_info
&next
)
1460 return tail_policy_eq_p (prev
, next
) && mask_policy_eq_p (prev
, next
);
1463 /* predictors for avl */
1465 inline bool modify_or_use_vl_p (insn_info
*i
, const vsetvl_info
&info
)
1469 if (find_access (i
->defs (), REGNO (info
.get_vl ())))
1471 if (find_access (i
->uses (), REGNO (info
.get_vl ())))
1473 resource_info resource
= full_register (REGNO (info
.get_vl ()));
1474 def_lookup dl1
= crtl
->ssa
->find_def (resource
, i
);
1475 def_lookup dl2
= crtl
->ssa
->find_def (resource
, info
.get_insn ());
1476 if (dl1
.matching_set () || dl2
.matching_set ())
1478 /* If their VLs are coming from same def, we still want to fuse
1479 their VSETVL demand info to gain better performance. */
1480 return dl1
.prev_def (i
) != dl2
.prev_def (i
);
1485 inline bool modify_avl_p (insn_info
*i
, const vsetvl_info
&info
)
1487 return info
.has_nonvlmax_reg_avl ()
1488 && find_access (i
->defs (), REGNO (info
.get_avl ()));
1491 inline bool modify_reg_between_p (insn_info
*prev_insn
, insn_info
*curr_insn
,
1494 gcc_assert (prev_insn
->compare_with (curr_insn
) < 0);
1495 for (insn_info
*i
= curr_insn
->prev_nondebug_insn (); i
!= prev_insn
;
1496 i
= i
->prev_nondebug_insn ())
1499 if (find_access (i
->defs (), regno
))
1505 inline bool reg_avl_equal_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1507 if (!prev
.has_nonvlmax_reg_avl () || !next
.has_nonvlmax_reg_avl ())
1510 if (same_equiv_note_p (prev
.get_avl_def (), next
.get_avl_def ()))
1513 if (REGNO (prev
.get_avl ()) != REGNO (next
.get_avl ()))
1516 insn_info
*prev_insn
= prev
.get_insn ();
1517 if (prev
.get_bb () != prev_insn
->bb ())
1518 prev_insn
= prev
.get_bb ()->end_insn ();
1520 insn_info
*next_insn
= next
.get_insn ();
1521 if (next
.get_bb () != next_insn
->bb ())
1522 next_insn
= next
.get_bb ()->end_insn ();
1524 return avl_vl_unmodified_between_p (prev_insn
, next_insn
, next
, false);
1527 inline bool avl_equal_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1529 gcc_assert (prev
.valid_p () && next
.valid_p ());
1531 if (next
.has_vl () && next
.vl_used_by_non_rvv_insn_p ())
1534 if (vector_config_insn_p (prev
.get_insn ()->rtl ()) && next
.get_avl_def ()
1535 && next
.get_avl_def ()->insn () == prev
.get_insn ())
1538 if (prev
.get_read_vl_insn ())
1540 if (!next
.has_nonvlmax_reg_avl () || !next
.get_avl_def ())
1542 insn_info
*avl_def_insn
= extract_single_source (next
.get_avl_def ());
1543 return avl_def_insn
== prev
.get_read_vl_insn ();
1546 if (prev
== next
&& prev
.has_nonvlmax_reg_avl ())
1548 insn_info
*insn
= prev
.get_insn ();
1549 bb_info
*bb
= insn
->bb ();
1550 for (insn_info
*i
= insn
; real_insn_and_same_bb_p (i
, bb
);
1551 i
= i
->next_nondebug_insn ())
1552 if (find_access (i
->defs (), REGNO (prev
.get_avl ())))
1556 if (prev
.has_vlmax_avl () && next
.has_vlmax_avl ())
1558 else if (prev
.has_imm_avl () && next
.has_imm_avl ())
1559 return INTVAL (prev
.get_avl ()) == INTVAL (next
.get_avl ());
1560 else if (prev
.has_vl () && next
.has_nonvlmax_reg_avl ()
1561 && REGNO (prev
.get_vl ()) == REGNO (next
.get_avl ()))
1563 insn_info
*prev_insn
= prev
.insn_inside_bb_p ()
1565 : prev
.get_bb ()->end_insn ();
1567 insn_info
*next_insn
= next
.insn_inside_bb_p ()
1569 : next
.get_bb ()->end_insn ();
1570 return avl_vl_unmodified_between_p (prev_insn
, next_insn
, next
, false);
1572 else if (prev
.has_nonvlmax_reg_avl () && next
.has_nonvlmax_reg_avl ())
1573 return reg_avl_equal_p (prev
, next
);
1577 inline bool avl_equal_or_prev_avl_non_zero_p (const vsetvl_info
&prev
,
1578 const vsetvl_info
&next
)
1580 return avl_equal_p (prev
, next
) || prev
.has_non_zero_avl ();
1583 inline bool can_use_next_avl_p (const vsetvl_info
&prev
,
1584 const vsetvl_info
&next
)
1586 /* Forbid the AVL/VL propagation if VL of NEXT is used
1587 by non-RVV instructions. This is because:
1590 PREV: scalar move (no AVL)
1592 NEXT: vsetvl a5(VL), a4(AVL) ...
1595 Since user vsetvl instruction is no side effect instruction
1596 which should be placed in the correct and optimal location
1597 of the program by the previous PASS, it is unreasonable that
1598 VSETVL PASS tries to move it to another places if it used by
1599 non-RVV instructions.
1601 Note: We only forbid the cases that VL is used by the following
1602 non-RVV instructions which will cause issues. We don't forbid
1603 other cases since it won't cause correctness issues and we still
1604 more demand info are fused backward. The later LCM algorithm
1605 should know the optimal location of the vsetvl. */
1606 if (next
.has_vl () && next
.vl_used_by_non_rvv_insn_p ())
1609 if (!next
.has_nonvlmax_reg_avl () && !next
.has_vl ())
1612 insn_info
*prev_insn
= prev
.get_insn ();
1613 if (prev
.get_bb () != prev_insn
->bb ())
1614 prev_insn
= prev
.get_bb ()->end_insn ();
1616 insn_info
*next_insn
= next
.get_insn ();
1617 if (next
.get_bb () != next_insn
->bb ())
1618 next_insn
= next
.get_bb ()->end_insn ();
1620 return avl_vl_unmodified_between_p (prev_insn
, next_insn
, next
);
1623 inline bool avl_equal_or_next_avl_non_zero_and_can_use_next_avl_p (
1624 const vsetvl_info
&prev
, const vsetvl_info
&next
)
1626 return avl_equal_p (prev
, next
)
1627 || (next
.has_non_zero_avl () && can_use_next_avl_p (prev
, next
));
1632 inline void nop (const vsetvl_info
&prev ATTRIBUTE_UNUSED
,
1633 const vsetvl_info
&next ATTRIBUTE_UNUSED
)
1636 /* modifiers for sew and lmul */
1638 inline void use_min_of_max_sew (vsetvl_info
&prev
, const vsetvl_info
&next
)
1640 prev
.set_max_sew (MIN (prev
.get_max_sew (), next
.get_max_sew ()));
1642 inline void use_next_sew (vsetvl_info
&prev
, const vsetvl_info
&next
)
1644 prev
.set_sew (next
.get_sew ());
1645 use_min_of_max_sew (prev
, next
);
1647 inline void use_max_sew (vsetvl_info
&prev
, const vsetvl_info
&next
)
1649 auto max_sew
= std::max (prev
.get_sew (), next
.get_sew ());
1650 prev
.set_sew (max_sew
);
1651 use_min_of_max_sew (prev
, next
);
1653 inline void use_next_sew_lmul (vsetvl_info
&prev
, const vsetvl_info
&next
)
1655 use_next_sew (prev
, next
);
1656 prev
.set_vlmul (next
.get_vlmul ());
1657 prev
.set_ratio (next
.get_ratio ());
1659 inline void use_next_sew_with_prev_ratio (vsetvl_info
&prev
,
1660 const vsetvl_info
&next
)
1662 use_next_sew (prev
, next
);
1663 prev
.set_vlmul (calculate_vlmul (next
.get_sew (), prev
.get_ratio ()));
1665 inline void modify_lmul_with_next_ratio (vsetvl_info
&prev
,
1666 const vsetvl_info
&next
)
1668 prev
.set_vlmul (calculate_vlmul (prev
.get_sew (), next
.get_ratio ()));
1669 prev
.set_ratio (next
.get_ratio ());
1672 inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info
&prev
,
1673 const vsetvl_info
&next
)
1675 prev
.set_vlmul (calculate_vlmul (prev
.get_sew (), next
.get_ratio ()));
1676 use_max_sew (prev
, next
);
1677 prev
.set_ratio (next
.get_ratio ());
1680 inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info
&prev
,
1681 const vsetvl_info
&next
)
1683 auto max_sew
= std::max (prev
.get_sew (), next
.get_sew ());
1684 prev
.set_vlmul (calculate_vlmul (max_sew
, prev
.get_ratio ()));
1685 prev
.set_sew (max_sew
);
1688 /* modifiers for tail and mask policy */
1690 inline void use_tail_policy (vsetvl_info
&prev
, const vsetvl_info
&next
)
1692 if (!next
.get_ta ())
1693 prev
.set_ta (next
.get_ta ());
1695 inline void use_mask_policy (vsetvl_info
&prev
, const vsetvl_info
&next
)
1697 if (!next
.get_ma ())
1698 prev
.set_ma (next
.get_ma ());
1700 inline void use_tail_mask_policy (vsetvl_info
&prev
, const vsetvl_info
&next
)
1702 use_tail_policy (prev
, next
);
1703 use_mask_policy (prev
, next
);
1706 /* modifiers for avl */
1708 inline void use_next_avl (vsetvl_info
&prev
, const vsetvl_info
&next
)
1710 gcc_assert (can_use_next_avl_p (prev
, next
));
1711 prev
.update_avl (next
);
1714 inline void use_next_avl_when_not_equal (vsetvl_info
&prev
,
1715 const vsetvl_info
&next
)
1717 if (avl_equal_p (prev
, next
))
1719 gcc_assert (next
.has_non_zero_avl ());
1720 use_next_avl (prev
, next
);
1724 demand_system () : m_avl_def_in (nullptr), m_avl_def_out (nullptr) {}
1726 void set_avl_in_out_data (sbitmap
*m_avl_def_in
, sbitmap
*m_avl_def_out
)
1728 m_avl_def_in
= m_avl_def_in
;
1729 m_avl_def_out
= m_avl_def_out
;
1732 /* Can we move vsetvl info between prev_insn and next_insn safe? */
1733 bool avl_vl_unmodified_between_p (insn_info
*prev_insn
, insn_info
*next_insn
,
1734 const vsetvl_info
&info
,
1735 bool ignore_vl
= false)
1737 gcc_assert ((ignore_vl
&& info
.has_nonvlmax_reg_avl ())
1738 || (info
.has_nonvlmax_reg_avl () || info
.has_vl ()));
1740 gcc_assert (!prev_insn
->is_debug_insn () && !next_insn
->is_debug_insn ());
1741 if (prev_insn
->bb () == next_insn
->bb ()
1742 && prev_insn
->compare_with (next_insn
) < 0)
1744 for (insn_info
*i
= next_insn
->prev_nondebug_insn (); i
!= prev_insn
;
1745 i
= i
->prev_nondebug_insn ())
1747 // no def and use of vl
1748 if (!ignore_vl
&& modify_or_use_vl_p (i
, info
))
1752 if (modify_avl_p (i
, info
))
1759 if (!ignore_vl
&& info
.has_vl ())
1761 bitmap live_out
= df_get_live_out (prev_insn
->bb ()->cfg_bb ());
1762 if (bitmap_bit_p (live_out
, REGNO (info
.get_vl ())))
1766 if (info
.has_nonvlmax_reg_avl () && m_avl_def_in
&& m_avl_def_out
)
1768 bool has_avl_out
= false;
1769 unsigned regno
= REGNO (info
.get_avl ());
1771 sbitmap_iterator sbi
;
1772 EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out
[prev_insn
->bb ()->index ()],
1775 if (get_regno (expr_id
, last_basic_block_for_fn (cfun
))
1779 if (!bitmap_bit_p (m_avl_def_in
[next_insn
->bb ()->index ()],
1787 for (insn_info
*i
= next_insn
; i
!= next_insn
->bb ()->head_insn ();
1788 i
= i
->prev_nondebug_insn ())
1790 // no def amd use of vl
1791 if (!ignore_vl
&& modify_or_use_vl_p (i
, info
))
1795 if (modify_avl_p (i
, info
))
1799 for (insn_info
*i
= prev_insn
->bb ()->end_insn (); i
!= prev_insn
;
1800 i
= i
->prev_nondebug_insn ())
1802 // no def amd use of vl
1803 if (!ignore_vl
&& modify_or_use_vl_p (i
, info
))
1807 if (modify_avl_p (i
, info
))
1814 bool sew_lmul_compatible_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1816 gcc_assert (prev
.valid_p () && next
.valid_p ());
1817 sew_lmul_demand_type prev_flags
= prev
.get_sew_lmul_demand ();
1818 sew_lmul_demand_type next_flags
= next
.get_sew_lmul_demand ();
1819 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1820 AVAILABLE_P, FUSE) \
1821 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1822 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1823 return COMPATIBLE_P (prev, next);
1825 #include "riscv-vsetvl.def"
1830 bool sew_lmul_available_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1832 gcc_assert (prev
.valid_p () && next
.valid_p ());
1833 sew_lmul_demand_type prev_flags
= prev
.get_sew_lmul_demand ();
1834 sew_lmul_demand_type next_flags
= next
.get_sew_lmul_demand ();
1835 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1836 AVAILABLE_P, FUSE) \
1837 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1838 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1839 return AVAILABLE_P (prev, next);
1841 #include "riscv-vsetvl.def"
1846 void merge_sew_lmul (vsetvl_info
&prev
, const vsetvl_info
&next
)
1848 gcc_assert (prev
.valid_p () && next
.valid_p ());
1849 sew_lmul_demand_type prev_flags
= prev
.get_sew_lmul_demand ();
1850 sew_lmul_demand_type next_flags
= next
.get_sew_lmul_demand ();
1851 #define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1852 AVAILABLE_P, FUSE) \
1853 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1854 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1856 gcc_assert (COMPATIBLE_P (prev, next)); \
1857 FUSE (prev, next); \
1858 prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \
1862 #include "riscv-vsetvl.def"
1867 bool policy_compatible_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1869 gcc_assert (prev
.valid_p () && next
.valid_p ());
1870 policy_demand_type prev_flags
= prev
.get_policy_demand ();
1871 policy_demand_type next_flags
= next
.get_policy_demand ();
1872 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1873 AVAILABLE_P, FUSE) \
1874 if (prev_flags == policy_demand_type::PREV_FLAGS \
1875 && next_flags == policy_demand_type::NEXT_FLAGS) \
1876 return COMPATIBLE_P (prev, next);
1878 #include "riscv-vsetvl.def"
1883 bool policy_available_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1885 gcc_assert (prev
.valid_p () && next
.valid_p ());
1886 policy_demand_type prev_flags
= prev
.get_policy_demand ();
1887 policy_demand_type next_flags
= next
.get_policy_demand ();
1888 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1889 AVAILABLE_P, FUSE) \
1890 if (prev_flags == policy_demand_type::PREV_FLAGS \
1891 && next_flags == policy_demand_type::NEXT_FLAGS) \
1892 return AVAILABLE_P (prev, next);
1894 #include "riscv-vsetvl.def"
1899 void merge_policy (vsetvl_info
&prev
, const vsetvl_info
&next
)
1901 gcc_assert (prev
.valid_p () && next
.valid_p ());
1902 policy_demand_type prev_flags
= prev
.get_policy_demand ();
1903 policy_demand_type next_flags
= next
.get_policy_demand ();
1904 #define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1905 AVAILABLE_P, FUSE) \
1906 if (prev_flags == policy_demand_type::PREV_FLAGS \
1907 && next_flags == policy_demand_type::NEXT_FLAGS) \
1909 gcc_assert (COMPATIBLE_P (prev, next)); \
1910 FUSE (prev, next); \
1911 prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \
1915 #include "riscv-vsetvl.def"
1920 bool vl_not_in_conflict_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1922 /* We don't fuse this following case:
1925 vmv.s.x v0, a5 -- PREV
1926 vsetvli a5, ... -- NEXT
1928 Don't fuse NEXT into PREV.
1930 return !prev
.vl_modify_non_avl_op_p (next
)
1931 && !next
.vl_modify_non_avl_op_p (prev
);
1934 bool avl_compatible_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1936 gcc_assert (prev
.valid_p () && next
.valid_p ());
1937 avl_demand_type prev_flags
= prev
.get_avl_demand ();
1938 avl_demand_type next_flags
= next
.get_avl_demand ();
1939 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1940 AVAILABLE_P, FUSE) \
1941 if (prev_flags == avl_demand_type::PREV_FLAGS \
1942 && next_flags == avl_demand_type::NEXT_FLAGS) \
1943 return COMPATIBLE_P (prev, next);
1945 #include "riscv-vsetvl.def"
1950 bool avl_available_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1952 gcc_assert (prev
.valid_p () && next
.valid_p ());
1953 avl_demand_type prev_flags
= prev
.get_avl_demand ();
1954 avl_demand_type next_flags
= next
.get_avl_demand ();
1955 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1956 AVAILABLE_P, FUSE) \
1957 if (prev_flags == avl_demand_type::PREV_FLAGS \
1958 && next_flags == avl_demand_type::NEXT_FLAGS) \
1959 return AVAILABLE_P (prev, next);
1961 #include "riscv-vsetvl.def"
1966 void merge_avl (vsetvl_info
&prev
, const vsetvl_info
&next
)
1968 gcc_assert (prev
.valid_p () && next
.valid_p ());
1969 avl_demand_type prev_flags
= prev
.get_avl_demand ();
1970 avl_demand_type next_flags
= next
.get_avl_demand ();
1971 #define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1972 AVAILABLE_P, FUSE) \
1973 if (prev_flags == avl_demand_type::PREV_FLAGS \
1974 && next_flags == avl_demand_type::NEXT_FLAGS) \
1976 gcc_assert (COMPATIBLE_P (prev, next)); \
1977 FUSE (prev, next); \
1978 prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \
1982 #include "riscv-vsetvl.def"
1987 bool compatible_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1989 bool compatible_p
= sew_lmul_compatible_p (prev
, next
)
1990 && policy_compatible_p (prev
, next
)
1991 && avl_compatible_p (prev
, next
)
1992 && vl_not_in_conflict_p (prev
, next
);
1993 return compatible_p
;
1996 bool available_p (const vsetvl_info
&prev
, const vsetvl_info
&next
)
1998 bool available_p
= sew_lmul_available_p (prev
, next
)
1999 && policy_available_p (prev
, next
)
2000 && avl_available_p (prev
, next
)
2001 && vl_not_in_conflict_p (prev
, next
);
2002 gcc_assert (!available_p
|| compatible_p (prev
, next
));
2006 void merge (vsetvl_info
&prev
, const vsetvl_info
&next
)
2008 gcc_assert (compatible_p (prev
, next
));
2009 merge_sew_lmul (prev
, next
);
2010 merge_policy (prev
, next
);
2011 merge_avl (prev
, next
);
2012 gcc_assert (available_p (prev
, next
));
2020 demand_system m_dem
;
2021 auto_vec
<vsetvl_block_info
> m_vector_block_infos
;
2023 /* data for avl reaching defintion. */
2025 sbitmap
*m_avl_def_in
;
2026 sbitmap
*m_avl_def_out
;
2027 sbitmap
*m_reg_def_loc
;
2029 /* data for vsetvl info reaching defintion. */
2030 vsetvl_info m_unknow_info
;
2031 auto_vec
<vsetvl_info
*> m_vsetvl_def_exprs
;
2032 sbitmap
*m_vsetvl_def_in
;
2033 sbitmap
*m_vsetvl_def_out
;
2036 auto_vec
<vsetvl_info
*> m_exprs
;
2045 struct edge_list
*m_edges
;
2047 auto_vec
<vsetvl_info
> m_delete_list
;
2049 vsetvl_block_info
&get_block_info (const bb_info
*bb
)
2051 return m_vector_block_infos
[bb
->index ()];
2053 const vsetvl_block_info
&get_block_info (const basic_block bb
) const
2055 return m_vector_block_infos
[bb
->index
];
2058 vsetvl_block_info
&get_block_info (const basic_block bb
)
2060 return m_vector_block_infos
[bb
->index
];
2063 void add_expr (auto_vec
<vsetvl_info
*> &m_exprs
, vsetvl_info
&info
)
2065 for (vsetvl_info
*item
: m_exprs
)
2070 m_exprs
.safe_push (&info
);
2073 unsigned get_expr_index (auto_vec
<vsetvl_info
*> &m_exprs
,
2074 const vsetvl_info
&info
)
2076 for (size_t i
= 0; i
< m_exprs
.length (); i
+= 1)
2078 if (*m_exprs
[i
] == info
)
2084 bool anticipated_exp_p (const vsetvl_info
&header_info
)
2086 if (!header_info
.has_nonvlmax_reg_avl () && !header_info
.has_vl ())
2089 bb_info
*bb
= header_info
.get_bb ();
2090 insn_info
*prev_insn
= bb
->head_insn ();
2091 insn_info
*next_insn
= header_info
.insn_inside_bb_p ()
2092 ? header_info
.get_insn ()
2093 : header_info
.get_bb ()->end_insn ();
2095 return m_dem
.avl_vl_unmodified_between_p (prev_insn
, next_insn
,
2099 bool available_exp_p (const vsetvl_info
&prev_info
,
2100 const vsetvl_info
&next_info
)
2102 return m_dem
.available_p (prev_info
, next_info
);
2105 void compute_probabilities ()
2110 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2112 basic_block cfg_bb
= bb
->cfg_bb ();
2113 auto &curr_prob
= get_block_info (cfg_bb
).probability
;
2115 /* GCC assume entry block (bb 0) are always so
2116 executed so set its probability as "always". */
2117 if (ENTRY_BLOCK_PTR_FOR_FN (cfun
) == cfg_bb
)
2118 curr_prob
= profile_probability::always ();
2119 /* Exit block (bb 1) is the block we don't need to process. */
2120 if (EXIT_BLOCK_PTR_FOR_FN (cfun
) == cfg_bb
)
2123 gcc_assert (curr_prob
.initialized_p ());
2124 FOR_EACH_EDGE (e
, ei
, cfg_bb
->succs
)
2126 auto &new_prob
= get_block_info (e
->dest
).probability
;
2127 /* Normally, the edge probability should be initialized.
2128 However, some special testing code which is written in
2129 GIMPLE IR style force the edge probility uninitialized,
2130 we conservatively set it as never so that it will not
2131 affect PRE (Phase 3 && Phse 4). */
2132 if (!e
->probability
.initialized_p ())
2133 new_prob
= profile_probability::never ();
2134 else if (!new_prob
.initialized_p ())
2135 new_prob
= curr_prob
* e
->probability
;
2136 else if (new_prob
== profile_probability::always ())
2139 new_prob
+= curr_prob
* e
->probability
;
2144 void insert_vsetvl_insn (enum emit_type emit_type
, const vsetvl_info
&info
)
2146 rtx pat
= info
.get_vsetvl_pat ();
2147 rtx_insn
*rinsn
= info
.get_insn ()->rtl ();
2149 if (emit_type
== EMIT_DIRECT
)
2154 fprintf (dump_file
, " Insert vsetvl insn %d:\n",
2155 INSN_UID (get_last_insn ()));
2156 print_rtl_single (dump_file
, get_last_insn ());
2159 else if (emit_type
== EMIT_BEFORE
)
2161 emit_insn_before (pat
, rinsn
);
2164 fprintf (dump_file
, " Insert vsetvl insn before insn %d:\n",
2166 print_rtl_single (dump_file
, PREV_INSN (rinsn
));
2171 emit_insn_after (pat
, rinsn
);
2174 fprintf (dump_file
, " Insert vsetvl insn after insn %d:\n",
2176 print_rtl_single (dump_file
, NEXT_INSN (rinsn
));
2181 void change_vsetvl_insn (const vsetvl_info
&info
)
2183 rtx_insn
*rinsn
= info
.get_insn ()->rtl ();
2184 rtx new_pat
= info
.get_vsetvl_pat ();
2188 fprintf (dump_file
, " Change insn %d from:\n", INSN_UID (rinsn
));
2189 print_rtl_single (dump_file
, rinsn
);
2192 validate_change_or_fail (rinsn
, &PATTERN (rinsn
), new_pat
, false);
2196 fprintf (dump_file
, "\n to:\n");
2197 print_rtl_single (dump_file
, rinsn
);
2201 void remove_vsetvl_insn (const vsetvl_info
&info
)
2203 rtx_insn
*rinsn
= info
.get_insn ()->rtl ();
2206 fprintf (dump_file
, " Eliminate insn %d:\n", INSN_UID (rinsn
));
2207 print_rtl_single (dump_file
, rinsn
);
2209 if (in_sequence_p ())
2210 remove_insn (rinsn
);
2212 delete_insn (rinsn
);
2215 bool successors_probability_equal_p (const basic_block cfg_bb
) const
2219 profile_probability prob
= profile_probability::uninitialized ();
2220 FOR_EACH_EDGE (e
, ei
, cfg_bb
->succs
)
2222 if (prob
== profile_probability::uninitialized ())
2223 prob
= m_vector_block_infos
[e
->dest
->index
].probability
;
2224 else if (prob
== m_vector_block_infos
[e
->dest
->index
].probability
)
2227 /* We pick the highest probability among those incompatible VSETVL
2228 infos. When all incompatible VSTEVL infos have same probability, we
2229 don't pick any of them. */
2235 bool preds_all_same_avl_and_ratio_p (const vsetvl_info
&curr_info
)
2238 !bitmap_empty_p (m_vsetvl_def_in
[curr_info
.get_bb ()->index ()]));
2240 unsigned expr_index
;
2241 sbitmap_iterator sbi
;
2242 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in
[curr_info
.get_bb ()->index ()], 0,
2245 const vsetvl_info
&prev_info
= *m_vsetvl_def_exprs
[expr_index
];
2246 if (!prev_info
.valid_p ()
2247 || !m_dem
.avl_available_p (prev_info
, curr_info
)
2248 || prev_info
.get_ratio () != curr_info
.get_ratio ())
2257 : m_avl_def_in (nullptr), m_avl_def_out (nullptr),
2258 m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr),
2259 m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr),
2260 m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr)
2262 /* Initialization of RTL_SSA. */
2263 calculate_dominance_info (CDI_DOMINATORS
);
2265 crtl
->ssa
= new function_info (cfun
);
2266 m_vector_block_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
));
2267 compute_probabilities ();
2268 m_unknow_info
.set_unknown ();
2273 free_dominance_info (CDI_DOMINATORS
);
2274 if (crtl
->ssa
->perform_pending_updates ())
2277 crtl
->ssa
= nullptr;
2280 sbitmap_free (m_avl_regs
);
2282 sbitmap_vector_free (m_reg_def_loc
);
2285 sbitmap_vector_free (m_avl_def_in
);
2287 sbitmap_vector_free (m_avl_def_out
);
2289 if (m_vsetvl_def_in
)
2290 sbitmap_vector_free (m_vsetvl_def_in
);
2291 if (m_vsetvl_def_out
)
2292 sbitmap_vector_free (m_vsetvl_def_out
);
2295 sbitmap_vector_free (m_avloc
);
2297 sbitmap_vector_free (m_kill
);
2299 sbitmap_vector_free (m_antloc
);
2301 sbitmap_vector_free (m_transp
);
2303 sbitmap_vector_free (m_insert
);
2305 sbitmap_vector_free (m_del
);
2307 sbitmap_vector_free (m_avin
);
2309 sbitmap_vector_free (m_avout
);
2312 free_edge_list (m_edges
);
2315 void compute_avl_def_data ();
2316 void compute_vsetvl_def_data ();
2317 void compute_lcm_local_properties ();
2319 void fuse_local_vsetvl_info ();
2320 bool earliest_fuse_vsetvl_info ();
2321 void pre_global_vsetvl_info ();
2322 void emit_vsetvl ();
2324 void remove_avl_operand ();
2325 void remove_unused_dest_operand ();
2327 void dump (FILE *file
, const char *title
) const
2329 fprintf (file
, "\nVSETVL infos after %s\n\n", title
);
2330 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2332 const auto &block_info
= m_vector_block_infos
[bb
->index ()];
2333 fprintf (file
, " bb %d:\n", bb
->index ());
2334 fprintf (file
, " probability: ");
2335 block_info
.probability
.dump (file
);
2336 fprintf (file
, "\n");
2337 if (!block_info
.empty_p ())
2339 fprintf (file
, " Header vsetvl info:");
2340 block_info
.get_entry_info ().dump (file
, " ");
2341 fprintf (file
, " Footer vsetvl info:");
2342 block_info
.get_exit_info ().dump (file
, " ");
2343 for (const auto &info
: block_info
.local_infos
)
2346 " insn %d vsetvl info:", info
.get_insn ()->uid ());
2347 info
.dump (file
, " ");
2355 pre_vsetvl::compute_avl_def_data ()
2357 if (bitmap_empty_p (m_avl_regs
))
2360 unsigned num_regs
= GP_REG_LAST
+ 1;
2361 unsigned num_bbs
= last_basic_block_for_fn (cfun
);
2363 sbitmap
*avl_def_loc_temp
= sbitmap_vector_alloc (num_bbs
, num_regs
);
2364 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2366 bitmap_and (avl_def_loc_temp
[bb
->index ()], m_avl_regs
,
2367 m_reg_def_loc
[bb
->index ()]);
2369 vsetvl_block_info
&block_info
= get_block_info (bb
);
2370 if (block_info
.has_info ())
2372 vsetvl_info
&footer_info
= block_info
.get_exit_info ();
2373 gcc_assert (footer_info
.valid_p ());
2374 if (footer_info
.has_vl ())
2375 bitmap_set_bit (avl_def_loc_temp
[bb
->index ()],
2376 REGNO (footer_info
.get_vl ()));
2381 sbitmap_vector_free (m_avl_def_in
);
2383 sbitmap_vector_free (m_avl_def_out
);
2385 unsigned num_exprs
= num_bbs
* num_regs
;
2386 sbitmap
*avl_def_loc
= sbitmap_vector_alloc (num_bbs
, num_exprs
);
2387 sbitmap
*m_kill
= sbitmap_vector_alloc (num_bbs
, num_exprs
);
2388 m_avl_def_in
= sbitmap_vector_alloc (num_bbs
, num_exprs
);
2389 m_avl_def_out
= sbitmap_vector_alloc (num_bbs
, num_exprs
);
2391 bitmap_vector_clear (avl_def_loc
, num_bbs
);
2392 bitmap_vector_clear (m_kill
, num_bbs
);
2393 bitmap_vector_clear (m_avl_def_out
, num_bbs
);
2396 sbitmap_iterator sbi
;
2397 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2398 EXECUTE_IF_SET_IN_BITMAP (avl_def_loc_temp
[bb
->index ()], 0, regno
, sbi
)
2400 bitmap_set_bit (avl_def_loc
[bb
->index ()],
2401 get_expr_id (bb
->index (), regno
, num_bbs
));
2402 bitmap_set_range (m_kill
[bb
->index ()], regno
* num_bbs
, num_bbs
);
2405 basic_block entry
= ENTRY_BLOCK_PTR_FOR_FN (cfun
);
2406 EXECUTE_IF_SET_IN_BITMAP (m_avl_regs
, 0, regno
, sbi
)
2407 bitmap_set_bit (m_avl_def_out
[entry
->index
],
2408 get_expr_id (entry
->index
, regno
, num_bbs
));
2410 compute_reaching_defintion (avl_def_loc
, m_kill
, m_avl_def_in
, m_avl_def_out
);
2412 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2415 " Compute avl reaching defition data (num_bbs %d, num_regs "
2418 fprintf (dump_file
, " avl_regs: ");
2419 dump_bitmap_file (dump_file
, m_avl_regs
);
2420 fprintf (dump_file
, "\n bitmap data:\n");
2421 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2423 unsigned int i
= bb
->index ();
2424 fprintf (dump_file
, " BB %u:\n", i
);
2425 fprintf (dump_file
, " avl_def_loc:");
2427 sbitmap_iterator sbi
;
2428 EXECUTE_IF_SET_IN_BITMAP (avl_def_loc
[i
], 0, expr_id
, sbi
)
2430 fprintf (dump_file
, " (r%u,bb%u)", get_regno (expr_id
, num_bbs
),
2431 get_bb_index (expr_id
, num_bbs
));
2433 fprintf (dump_file
, "\n kill:");
2434 EXECUTE_IF_SET_IN_BITMAP (m_kill
[i
], 0, expr_id
, sbi
)
2436 fprintf (dump_file
, " (r%u,bb%u)", get_regno (expr_id
, num_bbs
),
2437 get_bb_index (expr_id
, num_bbs
));
2439 fprintf (dump_file
, "\n avl_def_in:");
2440 EXECUTE_IF_SET_IN_BITMAP (m_avl_def_in
[i
], 0, expr_id
, sbi
)
2442 fprintf (dump_file
, " (r%u,bb%u)", get_regno (expr_id
, num_bbs
),
2443 get_bb_index (expr_id
, num_bbs
));
2445 fprintf (dump_file
, "\n avl_def_out:");
2446 EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out
[i
], 0, expr_id
, sbi
)
2448 fprintf (dump_file
, " (r%u,bb%u)", get_regno (expr_id
, num_bbs
),
2449 get_bb_index (expr_id
, num_bbs
));
2451 fprintf (dump_file
, "\n");
2455 sbitmap_vector_free (avl_def_loc
);
2456 sbitmap_vector_free (m_kill
);
2457 sbitmap_vector_free (avl_def_loc_temp
);
2459 m_dem
.set_avl_in_out_data (m_avl_def_in
, m_avl_def_out
);
2463 pre_vsetvl::compute_vsetvl_def_data ()
2465 m_vsetvl_def_exprs
.truncate (0);
2466 add_expr (m_vsetvl_def_exprs
, m_unknow_info
);
2467 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2469 vsetvl_block_info
&block_info
= get_block_info (bb
);
2470 if (block_info
.empty_p ())
2472 vsetvl_info
&footer_info
= block_info
.get_exit_info ();
2473 gcc_assert (footer_info
.valid_p () || footer_info
.unknown_p ());
2474 add_expr (m_vsetvl_def_exprs
, footer_info
);
2477 if (m_vsetvl_def_in
)
2478 sbitmap_vector_free (m_vsetvl_def_in
);
2479 if (m_vsetvl_def_out
)
2480 sbitmap_vector_free (m_vsetvl_def_out
);
2482 sbitmap
*def_loc
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2483 m_vsetvl_def_exprs
.length ());
2484 sbitmap
*m_kill
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2485 m_vsetvl_def_exprs
.length ());
2487 m_vsetvl_def_in
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2488 m_vsetvl_def_exprs
.length ());
2489 m_vsetvl_def_out
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
),
2490 m_vsetvl_def_exprs
.length ());
2492 bitmap_vector_clear (def_loc
, last_basic_block_for_fn (cfun
));
2493 bitmap_vector_clear (m_kill
, last_basic_block_for_fn (cfun
));
2494 bitmap_vector_clear (m_vsetvl_def_out
, last_basic_block_for_fn (cfun
));
2496 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2498 vsetvl_block_info
&block_info
= get_block_info (bb
);
2499 if (block_info
.empty_p ())
2501 for (unsigned i
= 0; i
< m_vsetvl_def_exprs
.length (); i
+= 1)
2503 const vsetvl_info
&info
= *m_vsetvl_def_exprs
[i
];
2504 if (!info
.has_nonvlmax_reg_avl ())
2507 sbitmap_iterator sbi
;
2508 EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc
[bb
->index ()], 0, regno
,
2510 if (regno
== REGNO (info
.get_avl ()))
2512 bitmap_set_bit (m_kill
[bb
->index ()], i
);
2513 bitmap_set_bit (def_loc
[bb
->index ()],
2514 get_expr_index (m_vsetvl_def_exprs
,
2521 vsetvl_info
&footer_info
= block_info
.get_exit_info ();
2522 bitmap_ones (m_kill
[bb
->index ()]);
2523 bitmap_set_bit (def_loc
[bb
->index ()],
2524 get_expr_index (m_vsetvl_def_exprs
, footer_info
));
2527 /* Set the def_out of the ENTRY basic block to m_unknow_info expr. */
2528 basic_block entry
= ENTRY_BLOCK_PTR_FOR_FN (cfun
);
2529 bitmap_set_bit (m_vsetvl_def_out
[entry
->index
],
2530 get_expr_index (m_vsetvl_def_exprs
, m_unknow_info
));
2532 compute_reaching_defintion (def_loc
, m_kill
, m_vsetvl_def_in
,
2535 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2538 "\n Compute vsetvl info reaching defition data:\n\n");
2539 fprintf (dump_file
, " Expression List (%d):\n",
2540 m_vsetvl_def_exprs
.length ());
2541 for (unsigned i
= 0; i
< m_vsetvl_def_exprs
.length (); i
++)
2543 const auto &info
= *m_vsetvl_def_exprs
[i
];
2544 fprintf (dump_file
, " Expr[%u]: ", i
);
2545 info
.dump (dump_file
, " ");
2547 fprintf (dump_file
, "\n bitmap data:\n");
2548 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2550 unsigned int i
= bb
->index ();
2551 fprintf (dump_file
, " BB %u:\n", i
);
2552 fprintf (dump_file
, " def_loc: ");
2553 dump_bitmap_file (dump_file
, def_loc
[i
]);
2554 fprintf (dump_file
, " kill: ");
2555 dump_bitmap_file (dump_file
, m_kill
[i
]);
2556 fprintf (dump_file
, " vsetvl_def_in: ");
2557 dump_bitmap_file (dump_file
, m_vsetvl_def_in
[i
]);
2558 fprintf (dump_file
, " vsetvl_def_out: ");
2559 dump_bitmap_file (dump_file
, m_vsetvl_def_out
[i
]);
2563 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2565 vsetvl_block_info
&block_info
= get_block_info (bb
);
2566 if (block_info
.empty_p ())
2568 vsetvl_info
&curr_info
= block_info
.get_entry_info ();
2569 if (!curr_info
.valid_p ())
2572 unsigned int expr_index
;
2573 sbitmap_iterator sbi
;
2575 !bitmap_empty_p (m_vsetvl_def_in
[curr_info
.get_bb ()->index ()]));
2576 bool full_available
= true;
2577 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in
[bb
->index ()], 0, expr_index
,
2580 vsetvl_info
&prev_info
= *m_vsetvl_def_exprs
[expr_index
];
2581 if (!prev_info
.valid_p ()
2582 || !m_dem
.available_p (prev_info
, curr_info
))
2584 full_available
= false;
2588 block_info
.full_available
= full_available
;
2591 sbitmap_vector_free (def_loc
);
2592 sbitmap_vector_free (m_kill
);
2595 /* Compute the local properties of each recorded expression.
2597 Local properties are those that are defined by the block, irrespective of
2600 An expression is transparent in a block if its operands are not modified
2603 An expression is computed (locally available) in a block if it is computed
2604 at least once and expression would contain the same value if the
2605 computation was moved to the end of the block.
2607 An expression is locally anticipatable in a block if it is computed at
2608 least once and expression would contain the same value if the computation
2609 was moved to the beginning of the block. */
2611 pre_vsetvl::compute_lcm_local_properties ()
2613 m_exprs
.truncate (0);
2614 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2616 vsetvl_block_info
&block_info
= get_block_info (bb
);
2617 if (block_info
.empty_p ())
2619 vsetvl_info
&header_info
= block_info
.get_entry_info ();
2620 vsetvl_info
&footer_info
= block_info
.get_exit_info ();
2621 gcc_assert (footer_info
.valid_p () || footer_info
.unknown_p ());
2622 add_expr (m_exprs
, header_info
);
2623 add_expr (m_exprs
, footer_info
);
2626 int num_exprs
= m_exprs
.length ();
2628 sbitmap_vector_free (m_avloc
);
2630 sbitmap_vector_free (m_kill
);
2632 sbitmap_vector_free (m_antloc
);
2634 sbitmap_vector_free (m_transp
);
2636 sbitmap_vector_free (m_avin
);
2638 sbitmap_vector_free (m_avout
);
2640 m_avloc
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2641 m_kill
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2642 m_antloc
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2643 m_transp
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2644 m_avin
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2645 m_avout
= sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2647 bitmap_vector_clear (m_avloc
, last_basic_block_for_fn (cfun
));
2648 bitmap_vector_clear (m_antloc
, last_basic_block_for_fn (cfun
));
2649 bitmap_vector_clear (m_transp
, last_basic_block_for_fn (cfun
));
2651 /* - If T is locally available at the end of a block, then T' must be
2652 available at the end of the same block. Since some optimization has
2653 occurred earlier, T' might not be locally available, however, it must
2654 have been previously computed on all paths. As a formula, T at AVLOC(B)
2655 implies that T' at AVOUT(B).
2656 An "available occurrence" is one that is the last occurrence in the
2657 basic block and the operands are not modified by following statements in
2658 the basic block [including this insn].
2660 - If T is locally anticipated at the beginning of a block, then either
2661 T', is locally anticipated or it is already available from previous
2662 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
2663 ANTLOC(B) at AVIN(B).
2664 An "anticipatable occurrence" is one that is the first occurrence in the
2665 basic block, the operands are not modified in the basic block prior
2666 to the occurrence and the output is not used between the start of
2667 the block and the occurrence. */
2668 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2670 unsigned bb_index
= bb
->index ();
2671 vsetvl_block_info
&block_info
= get_block_info (bb
);
2673 /* Compute m_transp */
2674 if (block_info
.empty_p ())
2676 bitmap_ones (m_transp
[bb_index
]);
2677 for (int i
= 0; i
< num_exprs
; i
+= 1)
2679 const vsetvl_info
&info
= *m_exprs
[i
];
2680 if (!info
.has_nonvlmax_reg_avl () && !info
.has_vl ())
2683 if (info
.has_nonvlmax_reg_avl ())
2686 sbitmap_iterator sbi
;
2687 EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc
[bb
->index ()], 0,
2690 if (regno
== REGNO (info
.get_avl ()))
2691 bitmap_clear_bit (m_transp
[bb
->index ()], i
);
2695 for (insn_info
*insn
: bb
->real_nondebug_insns ())
2697 if (info
.has_nonvlmax_reg_avl ()
2698 && find_access (insn
->defs (), REGNO (info
.get_avl ())))
2700 bitmap_clear_bit (m_transp
[bb_index
], i
);
2705 && reg_mentioned_p (info
.get_vl (), insn
->rtl ()))
2707 if (find_access (insn
->defs (), REGNO (info
.get_vl ())))
2708 /* We can't fuse vsetvl into the blocks that modify the
2709 VL operand since successors of such blocks will need
2710 the value of those blocks are defining.
2714 bb 5:use a5 bb 6:vsetvl a5, 5
2716 The example above shows that we can't fuse vsetvl
2717 from bb 6 into bb 4 since the successor bb 5 is using
2718 the value defined in bb 4. */
2722 /* We can't fuse vsetvl into the blocks that use the
2723 VL operand which has different value from the
2734 The example above shows that we can't fuse vsetvl
2735 from bb 6 into bb 5 since their value is different.
2737 resource_info resource
2738 = full_register (REGNO (info
.get_vl ()));
2739 def_lookup dl
= crtl
->ssa
->find_def (resource
, insn
);
2741 = dl
.matching_set_or_last_def_of_prev_group ();
2742 insn_info
*def_insn
= extract_single_source (def
);
2743 if (def_insn
&& vsetvl_insn_p (def_insn
->rtl ()))
2745 vsetvl_info def_info
= vsetvl_info (def_insn
);
2746 if (m_dem
.compatible_p (def_info
, info
))
2751 bitmap_clear_bit (m_transp
[bb_index
], i
);
2760 vsetvl_info
&header_info
= block_info
.get_entry_info ();
2761 vsetvl_info
&footer_info
= block_info
.get_exit_info ();
2763 if (header_info
.valid_p () && anticipated_exp_p (header_info
))
2764 bitmap_set_bit (m_antloc
[bb_index
],
2765 get_expr_index (m_exprs
, header_info
));
2767 if (footer_info
.valid_p ())
2768 for (int i
= 0; i
< num_exprs
; i
+= 1)
2770 const vsetvl_info
&info
= *m_exprs
[i
];
2771 if (!info
.valid_p ())
2773 if (available_exp_p (footer_info
, info
))
2774 bitmap_set_bit (m_avloc
[bb_index
], i
);
2778 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2780 unsigned bb_index
= bb
->index ();
2781 bitmap_ior (m_kill
[bb_index
], m_transp
[bb_index
], m_avloc
[bb_index
]);
2782 bitmap_not (m_kill
[bb_index
], m_kill
[bb_index
]);
2785 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2787 unsigned bb_index
= bb
->index ();
2790 FOR_EACH_EDGE (e
, ei
, bb
->cfg_bb ()->preds
)
2791 if (e
->flags
& EDGE_COMPLEX
)
2793 bitmap_clear (m_antloc
[bb_index
]);
2794 bitmap_clear (m_transp
[bb_index
]);
2800 pre_vsetvl::fuse_local_vsetvl_info ()
2803 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), GP_REG_LAST
+ 1);
2804 bitmap_vector_clear (m_reg_def_loc
, last_basic_block_for_fn (cfun
));
2805 bitmap_ones (m_reg_def_loc
[ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
]);
2807 for (bb_info
*bb
: crtl
->ssa
->bbs ())
2809 auto &block_info
= get_block_info (bb
);
2811 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2813 fprintf (dump_file
, " Try fuse basic block %d\n", bb
->index ());
2815 auto_vec
<vsetvl_info
> infos
;
2816 for (insn_info
*insn
: bb
->real_nondebug_insns ())
2818 vsetvl_info curr_info
= vsetvl_info (insn
);
2819 if (curr_info
.valid_p () || curr_info
.unknown_p ())
2820 infos
.safe_push (curr_info
);
2822 /* Collecting GP registers modified by the current bb. */
2823 if (insn
->is_real ())
2824 for (def_info
*def
: insn
->defs ())
2825 if (def
->is_reg () && GP_REG_P (def
->regno ()))
2826 bitmap_set_bit (m_reg_def_loc
[bb
->index ()], def
->regno ());
2829 vsetvl_info prev_info
= vsetvl_info ();
2830 prev_info
.set_empty ();
2831 for (auto &curr_info
: infos
)
2833 if (prev_info
.empty_p ())
2834 prev_info
= curr_info
;
2835 else if ((curr_info
.unknown_p () && prev_info
.valid_p ())
2836 || (curr_info
.valid_p () && prev_info
.unknown_p ()))
2838 block_info
.local_infos
.safe_push (prev_info
);
2839 prev_info
= curr_info
;
2841 else if (curr_info
.valid_p () && prev_info
.valid_p ())
2843 if (m_dem
.available_p (prev_info
, curr_info
))
2845 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2848 " Ignore curr info since prev info "
2849 "available with it:\n");
2850 fprintf (dump_file
, " prev_info: ");
2851 prev_info
.dump (dump_file
, " ");
2852 fprintf (dump_file
, " curr_info: ");
2853 curr_info
.dump (dump_file
, " ");
2854 fprintf (dump_file
, "\n");
2856 if (!curr_info
.vl_used_by_non_rvv_insn_p ()
2857 && vsetvl_insn_p (curr_info
.get_insn ()->rtl ()))
2858 m_delete_list
.safe_push (curr_info
);
2860 if (curr_info
.get_read_vl_insn ())
2861 prev_info
.set_read_vl_insn (curr_info
.get_read_vl_insn ());
2863 else if (m_dem
.compatible_p (prev_info
, curr_info
))
2865 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2867 fprintf (dump_file
, " Fuse curr info since prev info "
2868 "compatible with it:\n");
2869 fprintf (dump_file
, " prev_info: ");
2870 prev_info
.dump (dump_file
, " ");
2871 fprintf (dump_file
, " curr_info: ");
2872 curr_info
.dump (dump_file
, " ");
2874 m_dem
.merge (prev_info
, curr_info
);
2875 if (curr_info
.get_read_vl_insn ())
2876 prev_info
.set_read_vl_insn (curr_info
.get_read_vl_insn ());
2877 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2879 fprintf (dump_file
, " prev_info after fused: ");
2880 prev_info
.dump (dump_file
, " ");
2881 fprintf (dump_file
, "\n");
2886 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2889 " Cannot fuse uncompatible infos:\n");
2890 fprintf (dump_file
, " prev_info: ");
2891 prev_info
.dump (dump_file
, " ");
2892 fprintf (dump_file
, " curr_info: ");
2893 curr_info
.dump (dump_file
, " ");
2895 block_info
.local_infos
.safe_push (prev_info
);
2896 prev_info
= curr_info
;
2901 if (prev_info
.valid_p () || prev_info
.unknown_p ())
2902 block_info
.local_infos
.safe_push (prev_info
);
2905 m_avl_regs
= sbitmap_alloc (GP_REG_LAST
+ 1);
2906 bitmap_clear (m_avl_regs
);
2907 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2909 vsetvl_block_info
&block_info
= get_block_info (bb
);
2910 if (block_info
.empty_p ())
2913 vsetvl_info
&header_info
= block_info
.get_entry_info ();
2914 if (header_info
.valid_p () && header_info
.has_nonvlmax_reg_avl ())
2916 gcc_assert (GP_REG_P (REGNO (header_info
.get_avl ())));
2917 bitmap_set_bit (m_avl_regs
, REGNO (header_info
.get_avl ()));
2924 pre_vsetvl::earliest_fuse_vsetvl_info ()
2926 compute_avl_def_data ();
2927 compute_vsetvl_def_data ();
2928 compute_lcm_local_properties ();
2930 unsigned num_exprs
= m_exprs
.length ();
2931 struct edge_list
*m_edges
= create_edge_list ();
2932 unsigned num_edges
= NUM_EDGES (m_edges
);
2934 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2936 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun
), num_exprs
);
2938 sbitmap
*earliest
= sbitmap_vector_alloc (num_edges
, num_exprs
);
2940 compute_available (m_avloc
, m_kill
, m_avout
, m_avin
);
2941 compute_antinout_edge (m_antloc
, m_transp
, antin
, antout
);
2942 compute_earliest (m_edges
, num_exprs
, antin
, antout
, m_avout
, m_kill
,
2945 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2947 fprintf (dump_file
, "\n Compute LCM earliest insert data:\n\n");
2948 fprintf (dump_file
, " Expression List (%u):\n", num_exprs
);
2949 for (unsigned i
= 0; i
< num_exprs
; i
++)
2951 const auto &info
= *m_exprs
[i
];
2952 fprintf (dump_file
, " Expr[%u]: ", i
);
2953 info
.dump (dump_file
, " ");
2955 fprintf (dump_file
, "\n bitmap data:\n");
2956 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
2958 unsigned int i
= bb
->index ();
2959 fprintf (dump_file
, " BB %u:\n", i
);
2960 fprintf (dump_file
, " avloc: ");
2961 dump_bitmap_file (dump_file
, m_avloc
[i
]);
2962 fprintf (dump_file
, " kill: ");
2963 dump_bitmap_file (dump_file
, m_kill
[i
]);
2964 fprintf (dump_file
, " antloc: ");
2965 dump_bitmap_file (dump_file
, m_antloc
[i
]);
2966 fprintf (dump_file
, " transp: ");
2967 dump_bitmap_file (dump_file
, m_transp
[i
]);
2969 fprintf (dump_file
, " avin: ");
2970 dump_bitmap_file (dump_file
, m_avin
[i
]);
2971 fprintf (dump_file
, " avout: ");
2972 dump_bitmap_file (dump_file
, m_avout
[i
]);
2973 fprintf (dump_file
, " antin: ");
2974 dump_bitmap_file (dump_file
, antin
[i
]);
2975 fprintf (dump_file
, " antout: ");
2976 dump_bitmap_file (dump_file
, antout
[i
]);
2978 fprintf (dump_file
, "\n");
2979 fprintf (dump_file
, " earliest:\n");
2980 for (unsigned ed
= 0; ed
< num_edges
; ed
++)
2982 edge eg
= INDEX_EDGE (m_edges
, ed
);
2984 if (bitmap_empty_p (earliest
[ed
]))
2986 fprintf (dump_file
, " Edge(bb %u -> bb %u): ", eg
->src
->index
,
2988 dump_bitmap_file (dump_file
, earliest
[ed
]);
2990 fprintf (dump_file
, "\n");
2993 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2995 fprintf (dump_file
, " Fused global info result:\n");
2998 bool changed
= false;
2999 for (unsigned ed
= 0; ed
< num_edges
; ed
++)
3001 sbitmap e
= earliest
[ed
];
3002 if (bitmap_empty_p (e
))
3005 unsigned int expr_index
;
3006 sbitmap_iterator sbi
;
3007 EXECUTE_IF_SET_IN_BITMAP (e
, 0, expr_index
, sbi
)
3009 vsetvl_info
&curr_info
= *m_exprs
[expr_index
];
3010 if (!curr_info
.valid_p ())
3013 edge eg
= INDEX_EDGE (m_edges
, ed
);
3014 if (eg
->probability
== profile_probability::never ())
3016 if (eg
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
)
3017 || eg
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
3020 /* When multiple set bits in earliest edge, such edge may
3021 have infinite loop in preds or succs or multiple conflict
3022 vsetvl expression which make such edge is unrelated. We
3023 don't perform fusion for such situation. */
3024 if (bitmap_count_bits (e
) != 1)
3027 vsetvl_block_info
&src_block_info
= get_block_info (eg
->src
);
3028 vsetvl_block_info
&dest_block_info
= get_block_info (eg
->dest
);
3030 if (src_block_info
.probability
3031 == profile_probability::uninitialized ())
3034 if (src_block_info
.empty_p ())
3036 vsetvl_info new_curr_info
= curr_info
;
3037 new_curr_info
.set_bb (crtl
->ssa
->bb (eg
->dest
));
3038 bool has_compatible_p
= false;
3039 unsigned int def_expr_index
;
3040 sbitmap_iterator sbi2
;
3041 EXECUTE_IF_SET_IN_BITMAP (
3042 m_vsetvl_def_in
[new_curr_info
.get_bb ()->index ()], 0,
3043 def_expr_index
, sbi2
)
3045 vsetvl_info
&prev_info
= *m_vsetvl_def_exprs
[def_expr_index
];
3046 if (!prev_info
.valid_p ())
3048 if (m_dem
.compatible_p (prev_info
, new_curr_info
))
3050 has_compatible_p
= true;
3054 if (!has_compatible_p
)
3056 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3059 " Forbidden lift up vsetvl info into bb %u "
3060 "since there is no vsetvl info that reaching in "
3061 "is compatible with it:",
3063 curr_info
.dump (dump_file
, " ");
3068 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3071 " Set empty bb %u to info:", eg
->src
->index
);
3072 curr_info
.dump (dump_file
, " ");
3074 src_block_info
.set_info (curr_info
);
3075 src_block_info
.probability
= dest_block_info
.probability
;
3078 else if (src_block_info
.has_info ())
3080 vsetvl_info
&prev_info
= src_block_info
.get_exit_info ();
3081 gcc_assert (prev_info
.valid_p ());
3083 if (m_dem
.compatible_p (prev_info
, curr_info
))
3085 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3087 fprintf (dump_file
, " Fuse curr info since prev info "
3088 "compatible with it:\n");
3089 fprintf (dump_file
, " prev_info: ");
3090 prev_info
.dump (dump_file
, " ");
3091 fprintf (dump_file
, " curr_info: ");
3092 curr_info
.dump (dump_file
, " ");
3094 m_dem
.merge (prev_info
, curr_info
);
3095 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3097 fprintf (dump_file
, " prev_info after fused: ");
3098 prev_info
.dump (dump_file
, " ");
3099 fprintf (dump_file
, "\n");
3102 if (src_block_info
.has_info ())
3103 src_block_info
.probability
+= dest_block_info
.probability
;
3105 else if (src_block_info
.has_info ()
3106 && !m_dem
.compatible_p (prev_info
, curr_info
))
3108 /* Cancel lift up if probabilities are equal. */
3109 if (successors_probability_equal_p (eg
->src
))
3111 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3114 " Change empty bb %u to from:",
3116 prev_info
.dump (dump_file
, " ");
3118 " to (higher probability):");
3119 curr_info
.dump (dump_file
, " ");
3121 src_block_info
.set_empty_info ();
3122 src_block_info
.probability
3123 = profile_probability::uninitialized ();
3126 /* Choose the one with higher probability. */
3127 else if (dest_block_info
.probability
3128 > src_block_info
.probability
)
3130 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3133 " Change empty bb %u to from:",
3135 prev_info
.dump (dump_file
, " ");
3137 " to (higher probability):");
3138 curr_info
.dump (dump_file
, " ");
3140 src_block_info
.set_info (curr_info
);
3141 src_block_info
.probability
= dest_block_info
.probability
;
3148 vsetvl_info
&prev_info
= src_block_info
.get_exit_info ();
3149 if (!prev_info
.valid_p ()
3150 || m_dem
.available_p (prev_info
, curr_info
))
3153 if (m_dem
.compatible_p (prev_info
, curr_info
))
3155 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3157 fprintf (dump_file
, " Fuse curr info since prev info "
3158 "compatible with it:\n");
3159 fprintf (dump_file
, " prev_info: ");
3160 prev_info
.dump (dump_file
, " ");
3161 fprintf (dump_file
, " curr_info: ");
3162 curr_info
.dump (dump_file
, " ");
3164 m_dem
.merge (prev_info
, curr_info
);
3165 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3167 fprintf (dump_file
, " prev_info after fused: ");
3168 prev_info
.dump (dump_file
, " ");
3169 fprintf (dump_file
, "\n");
3177 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3179 fprintf (dump_file
, "\n");
3182 sbitmap_vector_free (antin
);
3183 sbitmap_vector_free (antout
);
3184 sbitmap_vector_free (earliest
);
3185 free_edge_list (m_edges
);
3191 pre_vsetvl::pre_global_vsetvl_info ()
3193 compute_avl_def_data ();
3194 compute_vsetvl_def_data ();
3195 compute_lcm_local_properties ();
3197 unsigned num_exprs
= m_exprs
.length ();
3198 m_edges
= pre_edge_lcm_avs (num_exprs
, m_transp
, m_avloc
, m_antloc
, m_kill
,
3199 m_avin
, m_avout
, &m_insert
, &m_del
);
3200 unsigned num_edges
= NUM_EDGES (m_edges
);
3202 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3204 fprintf (dump_file
, "\n Compute LCM insert and delete data:\n\n");
3205 fprintf (dump_file
, " Expression List (%u):\n", num_exprs
);
3206 for (unsigned i
= 0; i
< num_exprs
; i
++)
3208 const auto &info
= *m_exprs
[i
];
3209 fprintf (dump_file
, " Expr[%u]: ", i
);
3210 info
.dump (dump_file
, " ");
3212 fprintf (dump_file
, "\n bitmap data:\n");
3213 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3215 unsigned i
= bb
->index ();
3216 fprintf (dump_file
, " BB %u:\n", i
);
3217 fprintf (dump_file
, " avloc: ");
3218 dump_bitmap_file (dump_file
, m_avloc
[i
]);
3219 fprintf (dump_file
, " kill: ");
3220 dump_bitmap_file (dump_file
, m_kill
[i
]);
3221 fprintf (dump_file
, " antloc: ");
3222 dump_bitmap_file (dump_file
, m_antloc
[i
]);
3223 fprintf (dump_file
, " transp: ");
3224 dump_bitmap_file (dump_file
, m_transp
[i
]);
3226 fprintf (dump_file
, " avin: ");
3227 dump_bitmap_file (dump_file
, m_avin
[i
]);
3228 fprintf (dump_file
, " avout: ");
3229 dump_bitmap_file (dump_file
, m_avout
[i
]);
3230 fprintf (dump_file
, " del: ");
3231 dump_bitmap_file (dump_file
, m_del
[i
]);
3233 fprintf (dump_file
, "\n");
3234 fprintf (dump_file
, " insert:\n");
3235 for (unsigned ed
= 0; ed
< num_edges
; ed
++)
3237 edge eg
= INDEX_EDGE (m_edges
, ed
);
3239 if (bitmap_empty_p (m_insert
[ed
]))
3241 fprintf (dump_file
, " Edge(bb %u -> bb %u): ", eg
->src
->index
,
3243 dump_bitmap_file (dump_file
, m_insert
[ed
]);
3247 /* Remove vsetvl infos as LCM suggest */
3248 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3250 sbitmap d
= m_del
[bb
->index ()];
3251 if (bitmap_count_bits (d
) == 0)
3253 gcc_assert (bitmap_count_bits (d
) == 1);
3254 unsigned expr_index
= bitmap_first_set_bit (d
);
3255 vsetvl_info
&info
= *m_exprs
[expr_index
];
3256 gcc_assert (info
.valid_p ());
3257 gcc_assert (info
.get_bb () == bb
);
3258 const vsetvl_block_info
&block_info
= get_block_info (info
.get_bb ());
3259 gcc_assert (block_info
.get_entry_info () == info
);
3263 /* Remove vsetvl infos if all precessors are available to the block. */
3264 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3266 vsetvl_block_info
&block_info
= get_block_info (bb
);
3267 if (block_info
.empty_p () || !block_info
.full_available
)
3270 vsetvl_info
&info
= block_info
.get_entry_info ();
3274 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3276 vsetvl_block_info
&block_info
= get_block_info (bb
);
3277 if (block_info
.empty_p ())
3279 vsetvl_info
&curr_info
= block_info
.get_entry_info ();
3280 if (curr_info
.delete_p ())
3282 if (block_info
.local_infos
.is_empty ())
3284 curr_info
= block_info
.local_infos
[0];
3286 if (curr_info
.valid_p () && !curr_info
.vl_used_by_non_rvv_insn_p ()
3287 && preds_all_same_avl_and_ratio_p (curr_info
))
3288 curr_info
.set_change_vtype_only ();
3290 vsetvl_info prev_info
= vsetvl_info ();
3291 prev_info
.set_empty ();
3292 for (auto &curr_info
: block_info
.local_infos
)
3294 if (prev_info
.valid_p () && curr_info
.valid_p ()
3295 && m_dem
.avl_available_p (prev_info
, curr_info
)
3296 && prev_info
.get_ratio () == curr_info
.get_ratio ())
3297 curr_info
.set_change_vtype_only ();
3298 prev_info
= curr_info
;
3304 pre_vsetvl::emit_vsetvl ()
3306 bool need_commit
= false;
3308 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3310 for (const auto &curr_info
: get_block_info (bb
).local_infos
)
3312 insn_info
*insn
= curr_info
.get_insn ();
3313 if (curr_info
.delete_p ())
3315 if (vsetvl_insn_p (insn
->rtl ()))
3316 remove_vsetvl_insn (curr_info
);
3319 else if (curr_info
.valid_p ())
3321 if (vsetvl_insn_p (insn
->rtl ()))
3323 const vsetvl_info temp
= vsetvl_info (insn
);
3324 if (!(curr_info
== temp
))
3328 fprintf (dump_file
, "\n Change vsetvl info from: ");
3329 temp
.dump (dump_file
, " ");
3330 fprintf (dump_file
, " to: ");
3331 curr_info
.dump (dump_file
, " ");
3333 change_vsetvl_insn (curr_info
);
3341 "\n Insert vsetvl info before insn %d: ",
3343 curr_info
.dump (dump_file
, " ");
3345 insert_vsetvl_insn (EMIT_BEFORE
, curr_info
);
3351 for (const vsetvl_info
&item
: m_delete_list
)
3353 gcc_assert (vsetvl_insn_p (item
.get_insn ()->rtl ()));
3354 remove_vsetvl_insn (item
);
3357 /* Insert vsetvl info that was not deleted after lift up. */
3358 for (const bb_info
*bb
: crtl
->ssa
->bbs ())
3360 const vsetvl_block_info
&block_info
= get_block_info (bb
);
3361 if (!block_info
.has_info ())
3364 const vsetvl_info
&footer_info
= block_info
.get_exit_info ();
3366 if (footer_info
.delete_p ())
3370 edge_iterator eg_iterator
;
3371 FOR_EACH_EDGE (eg
, eg_iterator
, bb
->cfg_bb ()->succs
)
3373 gcc_assert (!(eg
->flags
& EDGE_ABNORMAL
));
3378 "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ",
3379 eg
->src
->index
, eg
->dest
->index
);
3380 footer_info
.dump (dump_file
, " ");
3383 insert_vsetvl_insn (EMIT_DIRECT
, footer_info
);
3384 rtx_insn
*rinsn
= get_insns ();
3386 default_rtl_profile ();
3387 insert_insn_on_edge (rinsn
, eg
);
3392 /* m_insert vsetvl as LCM suggest. */
3393 for (int ed
= 0; ed
< NUM_EDGES (m_edges
); ed
++)
3395 edge eg
= INDEX_EDGE (m_edges
, ed
);
3396 sbitmap i
= m_insert
[ed
];
3397 if (bitmap_count_bits (i
) < 1)
3400 if (bitmap_count_bits (i
) > 1)
3401 /* For code with infinite loop (e.g. pr61634.c), The data flow is
3402 completely wrong. */
3405 gcc_assert (bitmap_count_bits (i
) == 1);
3406 unsigned expr_index
= bitmap_first_set_bit (i
);
3407 const vsetvl_info
&info
= *m_exprs
[expr_index
];
3408 gcc_assert (info
.valid_p ());
3412 "\n Insert vsetvl info at edge(bb %u -> bb %u): ",
3413 eg
->src
->index
, eg
->dest
->index
);
3414 info
.dump (dump_file
, " ");
3416 rtl_profile_for_edge (eg
);
3419 insert_vsetvl_insn (EMIT_DIRECT
, info
);
3420 rtx_insn
*rinsn
= get_insns ();
3422 default_rtl_profile ();
3424 /* We should not get an abnormal edge here. */
3425 gcc_assert (!(eg
->flags
& EDGE_ABNORMAL
));
3427 insert_insn_on_edge (rinsn
, eg
);
3431 commit_edge_insertions ();
3435 pre_vsetvl::cleaup ()
3437 remove_avl_operand ();
3438 remove_unused_dest_operand ();
3442 pre_vsetvl::remove_avl_operand ()
3446 FOR_ALL_BB_FN (cfg_bb
, cfun
)
3447 FOR_BB_INSNS (cfg_bb
, rinsn
)
3448 if (NONDEBUG_INSN_P (rinsn
) && has_vl_op (rinsn
)
3449 && REG_P (get_vl (rinsn
)))
3451 rtx avl
= get_vl (rinsn
);
3452 if (count_regno_occurrences (rinsn
, REGNO (avl
)) == 1)
3455 if (fault_first_load_p (rinsn
))
3457 = simplify_replace_rtx (PATTERN (rinsn
), avl
, const0_rtx
);
3460 rtx set
= single_set (rinsn
);
3462 = simplify_replace_rtx (SET_SRC (set
), avl
, const0_rtx
);
3463 new_pat
= gen_rtx_SET (SET_DEST (set
), src
);
3467 fprintf (dump_file
, " Cleanup insn %u's avl operand:\n",
3469 print_rtl_single (dump_file
, rinsn
);
3471 validate_change_or_fail (rinsn
, &PATTERN (rinsn
), new_pat
, false);
3477 pre_vsetvl::remove_unused_dest_operand ()
3482 FOR_ALL_BB_FN (cfg_bb
, cfun
)
3483 FOR_BB_INSNS (cfg_bb
, rinsn
)
3484 if (NONDEBUG_INSN_P (rinsn
) && vsetvl_insn_p (rinsn
))
3486 rtx vl
= get_vl (rinsn
);
3487 vsetvl_info info
= vsetvl_info (rinsn
);
3488 if (has_no_uses (cfg_bb
, rinsn
, REGNO (vl
)))
3489 if (!info
.has_vlmax_avl ())
3491 rtx new_pat
= info
.get_vsetvl_pat (true);
3495 " Remove vsetvl insn %u's dest(vl) operand since "
3498 print_rtl_single (dump_file
, rinsn
);
3500 validate_change_or_fail (rinsn
, &PATTERN (rinsn
), new_pat
,
3506 const pass_data pass_data_vsetvl
= {
3507 RTL_PASS
, /* type */
3508 "vsetvl", /* name */
3509 OPTGROUP_NONE
, /* optinfo_flags */
3510 TV_NONE
, /* tv_id */
3511 0, /* properties_required */
3512 0, /* properties_provided */
3513 0, /* properties_destroyed */
3514 0, /* todo_flags_start */
3515 0, /* todo_flags_finish */
3518 class pass_vsetvl
: public rtl_opt_pass
3521 void simple_vsetvl ();
3522 void lazy_vsetvl ();
3525 pass_vsetvl (gcc::context
*ctxt
) : rtl_opt_pass (pass_data_vsetvl
, ctxt
) {}
3527 /* opt_pass methods: */
3528 virtual bool gate (function
*) final override
{ return TARGET_VECTOR
; }
3529 virtual unsigned int execute (function
*) final override
;
3530 }; // class pass_vsetvl
3533 pass_vsetvl::simple_vsetvl ()
3536 fprintf (dump_file
, "\nEntering Simple VSETVL PASS\n");
3540 FOR_ALL_BB_FN (cfg_bb
, cfun
)
3542 FOR_BB_INSNS (cfg_bb
, rinsn
)
3544 if (!NONDEBUG_INSN_P (rinsn
))
3546 if (has_vtype_op (rinsn
))
3548 const auto &info
= vsetvl_info (rinsn
);
3549 rtx pat
= info
.get_vsetvl_pat ();
3550 emit_insn_before (pat
, rinsn
);
3553 fprintf (dump_file
, " Insert vsetvl insn before insn %d:\n",
3555 print_rtl_single (dump_file
, PREV_INSN (rinsn
));
3562 /* Lazy vsetvl insertion for optimize > 0. */
3564 pass_vsetvl::lazy_vsetvl ()
3567 fprintf (dump_file
, "\nEntering Lazy VSETVL PASS\n\n");
3569 pre_vsetvl pre
= pre_vsetvl ();
3572 fprintf (dump_file
, "\nPhase 1: Fuse local vsetvl infos.\n\n");
3573 pre
.fuse_local_vsetvl_info ();
3574 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3575 pre
.dump (dump_file
, "phase 1");
3577 /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */
3579 fprintf (dump_file
, "\nPhase 2: Lift up vsetvl info.\n\n");
3581 int fused_count
= 0;
3585 fprintf (dump_file
, " Try lift up %d.\n\n", fused_count
);
3586 changed
= pre
.earliest_fuse_vsetvl_info ();
3590 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3591 pre
.dump (dump_file
, "phase 2");
3593 /* Phase 3: Reducing redundant vsetvl infos using LCM. */
3595 fprintf (dump_file
, "\nPhase 3: Reduce global vsetvl infos.\n\n");
3596 pre
.pre_global_vsetvl_info ();
3597 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3598 pre
.dump (dump_file
, "phase 3");
3600 /* Phase 4: Insert, modify and remove vsetvl insns. */
3603 "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n");
3606 /* Phase 5: Cleaup */
3608 fprintf (dump_file
, "\nPhase 5: Cleaup\n\n");
3614 /* Main entry point for this pass. */
3616 pass_vsetvl::execute (function
*)
3618 if (n_basic_blocks_for_fn (cfun
) <= 0)
3621 /* The RVV instruction may change after split which is not a stable
3622 instruction. We need to split it here to avoid potential issue
3623 since the VSETVL PASS is insert before split PASS. */
3626 /* Early return for there is no vector instructions. */
3627 if (!has_vector_insn (cfun
))
3639 make_pass_vsetvl (gcc::context
*ctxt
)
3641 return new pass_vsetvl (ctxt
);