]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/riscv/riscv-vsetvl.cc
c++: partial ordering of object parameter [PR53499]
[thirdparty/gcc.git] / gcc / config / riscv / riscv-vsetvl.cc
CommitLineData
9243c3d1 1/* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
c841bde5 2 Copyright (C) 2022-2023 Free Software Foundation, Inc.
9243c3d1
JZZ
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or(at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
29331e72
LD
21/* The values of the vl and vtype registers will affect the behavior of RVV
22 insns. That is, when we need to execute an RVV instruction, we need to set
23 the correct vl and vtype values by executing the vsetvl instruction before.
24 Executing the fewest number of vsetvl instructions while keeping the behavior
25 the same is the problem this pass is trying to solve. This vsetvl pass is
26 divided into 5 phases:
27
28 - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses
29 each instruction in it that affects vl and vtype state and generates an
30 array of vsetvl_info objects. Then traverse the vsetvl_info array from
31 front to back and perform fusion according to the fusion rules. The fused
32 vsetvl infos are stored in the vsetvl_block_info object's `infos` field.
33
34 - Phase 2 (earliest fuse global vsetvl infos): The header_info and
35 footer_info of vsetvl_block_info are used as expressions, and the
36 earliest of each expression is computed. Based on the earliest
37 information, try to lift up the corresponding vsetvl info to the src
38 basic block of the edge (mainly to reduce the total number of vsetvl
39 instructions, this uplift will cause some execution paths to execute
40 vsetvl instructions that shouldn't be there).
41
42 - Phase 3 (pre global vsetvl info): The header_info and footer_info of
43 vsetvl_block_info are used as expressions, and the LCM algorithm is used
44 to compute the header_info that needs to be deleted and the one that
45 needs to be inserted in some edges.
46
47 - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and
48 the deletion and insertion information of Phase 3, the mandatory vsetvl
49 instruction insertion, modification and deletion are performed.
50
51 - Phase 5 (cleanup): Clean up the avl operand in the RVV operator
52 instruction and cleanup the unused dest operand of the vsetvl insn.
53
54 After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual
55 basic block is represented by vsetvl_block_info, and the virtual vsetvl
56 statements inside are represented by vsetvl_info. The later phases 2 and 3
57 are constantly modifying and adjusting this virtual CFG. Phase 4 performs
58 insertion, modification and deletion of vsetvl instructions based on the
59 optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to
60 the RTL.
61*/
9243c3d1
JZZ
62
63#define IN_TARGET_CODE 1
64#define INCLUDE_ALGORITHM
65#define INCLUDE_FUNCTIONAL
66
67#include "config.h"
68#include "system.h"
69#include "coretypes.h"
70#include "tm.h"
71#include "backend.h"
72#include "rtl.h"
73#include "target.h"
74#include "tree-pass.h"
75#include "df.h"
76#include "rtl-ssa.h"
77#include "cfgcleanup.h"
78#include "insn-config.h"
79#include "insn-attr.h"
80#include "insn-opinit.h"
81#include "tm-constrs.h"
82#include "cfgrtl.h"
83#include "cfganal.h"
84#include "lcm.h"
85#include "predict.h"
86#include "profile-count.h"
a3ad2301 87#include "gcse.h"
9243c3d1
JZZ
88
89using namespace rtl_ssa;
90using namespace riscv_vector;
91
29331e72
LD
92/* Set the bitmap DST to the union of SRC of predecessors of
93 basic block B.
94 It's a bit different from bitmap_union_of_preds in cfganal.cc. This function
95 takes into account the case where pred is ENTRY basic block. The main reason
96 for this difference is to make it easier to insert some special value into
97 the ENTRY base block. For example, vsetvl_info with a status of UNKNOW. */
98static void
99bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b)
100{
101 unsigned int set_size = dst->size;
102 edge e;
103 unsigned ix;
104
105 for (ix = 0; ix < EDGE_COUNT (b->preds); ix++)
106 {
107 e = EDGE_PRED (b, ix);
108 bitmap_copy (dst, src[e->src->index]);
109 break;
110 }
ec99ffab 111
29331e72
LD
112 if (ix == EDGE_COUNT (b->preds))
113 bitmap_clear (dst);
114 else
115 for (ix++; ix < EDGE_COUNT (b->preds); ix++)
116 {
117 unsigned int i;
118 SBITMAP_ELT_TYPE *p, *r;
119
120 e = EDGE_PRED (b, ix);
121 p = src[e->src->index]->elms;
122 r = dst->elms;
123 for (i = 0; i < set_size; i++)
124 *r++ |= *p++;
125 }
126}
127
128/* Compute the reaching defintion in and out based on the gen and KILL
129 informations in each Base Blocks.
130 This function references the compute_avaiable implementation in lcm.cc */
131static void
132compute_reaching_defintion (sbitmap *gen, sbitmap *kill, sbitmap *in,
133 sbitmap *out)
9243c3d1 134{
29331e72
LD
135 edge e;
136 basic_block *worklist, *qin, *qout, *qend, bb;
137 unsigned int qlen;
138 edge_iterator ei;
139
140 /* Allocate a worklist array/queue. Entries are only added to the
141 list if they were not already on the list. So the size is
142 bounded by the number of basic blocks. */
143 qin = qout = worklist
144 = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
145
146 /* Put every block on the worklist; this is necessary because of the
147 optimistic initialization of AVOUT above. Use reverse postorder
148 to make the forward dataflow problem require less iterations. */
149 int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
150 int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
151 for (int i = 0; i < n; ++i)
152 {
153 bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]);
154 *qin++ = bb;
155 bb->aux = bb;
156 }
157 free (rpo);
158
159 qin = worklist;
160 qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS];
161 qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
162
163 /* Mark blocks which are successors of the entry block so that we
164 can easily identify them below. */
165 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
166 e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun);
167
168 /* Iterate until the worklist is empty. */
169 while (qlen)
170 {
171 /* Take the first entry off the worklist. */
172 bb = *qout++;
173 qlen--;
174
175 if (qout >= qend)
176 qout = worklist;
177
178 /* Do not clear the aux field for blocks which are successors of the
179 ENTRY block. That way we never add then to the worklist again. */
180 if (bb->aux != ENTRY_BLOCK_PTR_FOR_FN (cfun))
181 bb->aux = NULL;
182
183 bitmap_union_of_preds_with_entry (in[bb->index], out, bb);
184
185 if (bitmap_ior_and_compl (out[bb->index], gen[bb->index], in[bb->index],
186 kill[bb->index]))
187 /* If the out state of this block changed, then we need
188 to add the successors of this block to the worklist
189 if they are not already on the worklist. */
190 FOR_EACH_EDGE (e, ei, bb->succs)
191 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
192 {
193 *qin++ = e->dest;
194 e->dest->aux = e;
195 qlen++;
196
197 if (qin >= qend)
198 qin = worklist;
199 }
200 }
201
202 clear_aux_for_edges ();
203 clear_aux_for_blocks ();
204 free (worklist);
9243c3d1
JZZ
205}
206
29331e72
LD
207/* Classification of vsetvl instruction. */
208enum vsetvl_type
9243c3d1 209{
29331e72
LD
210 VSETVL_NORMAL,
211 VSETVL_VTYPE_CHANGE_ONLY,
212 VSETVL_DISCARD_RESULT,
213 NUM_VSETVL_TYPE
214};
9243c3d1 215
29331e72 216enum emit_type
9243c3d1 217{
29331e72
LD
218 /* emit_insn directly. */
219 EMIT_DIRECT,
220 EMIT_BEFORE,
221 EMIT_AFTER,
222};
223
224/* dump helper functions */
225static const char *
226vlmul_to_str (vlmul_type vlmul)
227{
228 switch (vlmul)
229 {
230 case LMUL_1:
231 return "m1";
232 case LMUL_2:
233 return "m2";
234 case LMUL_4:
235 return "m4";
236 case LMUL_8:
237 return "m8";
238 case LMUL_RESERVED:
239 return "INVALID LMUL";
240 case LMUL_F8:
241 return "mf8";
242 case LMUL_F4:
243 return "mf4";
244 case LMUL_F2:
245 return "mf2";
246
247 default:
248 gcc_unreachable ();
249 }
9243c3d1
JZZ
250}
251
29331e72
LD
252static const char *
253policy_to_str (bool agnostic_p)
9243c3d1 254{
29331e72 255 return agnostic_p ? "agnostic" : "undisturbed";
9243c3d1
JZZ
256}
257
9243c3d1
JZZ
258/* Return true if it is an RVV instruction depends on VTYPE global
259 status register. */
260static bool
261has_vtype_op (rtx_insn *rinsn)
262{
263 return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
264}
265
ec99ffab
JZZ
266/* Return true if the instruction ignores VLMUL field of VTYPE. */
267static bool
268ignore_vlmul_insn_p (rtx_insn *rinsn)
269{
270 return get_attr_type (rinsn) == TYPE_VIMOVVX
271 || get_attr_type (rinsn) == TYPE_VFMOVVF
272 || get_attr_type (rinsn) == TYPE_VIMOVXV
273 || get_attr_type (rinsn) == TYPE_VFMOVFV;
274}
275
276/* Return true if the instruction is scalar move instruction. */
277static bool
278scalar_move_insn_p (rtx_insn *rinsn)
279{
280 return get_attr_type (rinsn) == TYPE_VIMOVXV
281 || get_attr_type (rinsn) == TYPE_VFMOVFV;
282}
283
60bd33bc
JZZ
284/* Return true if the instruction is fault first load instruction. */
285static bool
286fault_first_load_p (rtx_insn *rinsn)
287{
6313b045
JZZ
288 return recog_memoized (rinsn) >= 0
289 && (get_attr_type (rinsn) == TYPE_VLDFF
290 || get_attr_type (rinsn) == TYPE_VLSEGDFF);
60bd33bc
JZZ
291}
292
293/* Return true if the instruction is read vl instruction. */
294static bool
295read_vl_insn_p (rtx_insn *rinsn)
296{
297 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL;
298}
299
9243c3d1
JZZ
300/* Return true if it is a vsetvl instruction. */
301static bool
302vector_config_insn_p (rtx_insn *rinsn)
303{
304 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
305}
306
307/* Return true if it is vsetvldi or vsetvlsi. */
308static bool
309vsetvl_insn_p (rtx_insn *rinsn)
310{
29331e72 311 if (!rinsn || !vector_config_insn_p (rinsn))
6b6b9c68 312 return false;
85112fbb 313 return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
6b6b9c68
JZZ
314 || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
315}
316
317/* Return true if it is vsetvl zero, rs1. */
318static bool
319vsetvl_discard_result_insn_p (rtx_insn *rinsn)
320{
321 if (!vector_config_insn_p (rinsn))
322 return false;
323 return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi
324 || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
9243c3d1
JZZ
325}
326
9243c3d1 327static bool
4f673c5e 328real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
9243c3d1 329{
4f673c5e 330 return insn != nullptr && insn->is_real () && insn->bb () == bb;
9243c3d1
JZZ
331}
332
29331e72 333/* Helper function to get VL operand for VLMAX insn. */
6b6b9c68
JZZ
334static rtx
335get_vl (rtx_insn *rinsn)
336{
337 if (has_vl_op (rinsn))
338 {
339 extract_insn_cached (rinsn);
340 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
341 }
342 return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
4f673c5e
JZZ
343}
344
6b6b9c68
JZZ
345/* Helper function to get AVL operand. */
346static rtx
347get_avl (rtx_insn *rinsn)
348{
349 if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
350 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
351
352 if (!has_vl_op (rinsn))
353 return NULL_RTX;
5e714992 354 if (vlmax_avl_type_p (rinsn))
6b6b9c68
JZZ
355 return RVV_VLMAX;
356 extract_insn_cached (rinsn);
357 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
358}
359
9243c3d1
JZZ
360/* Get default mask policy. */
361static bool
362get_default_ma ()
363{
364 /* For the instruction that doesn't require MA, we still need a default value
365 to emit vsetvl. We pick up the default value according to prefer policy. */
366 return (bool) (get_prefer_mask_policy () & 0x1
367 || (get_prefer_mask_policy () >> 1 & 0x1));
368}
369
9243c3d1
JZZ
370/* Helper function to get MA operand. */
371static bool
372mask_agnostic_p (rtx_insn *rinsn)
373{
374 /* If it doesn't have MA, we return agnostic by default. */
375 extract_insn_cached (rinsn);
376 int ma = get_attr_ma (rinsn);
377 return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
378}
379
380/* Return true if FN has a vector instruction that use VL/VTYPE. */
381static bool
382has_vector_insn (function *fn)
383{
384 basic_block cfg_bb;
385 rtx_insn *rinsn;
386 FOR_ALL_BB_FN (cfg_bb, fn)
387 FOR_BB_INSNS (cfg_bb, rinsn)
388 if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
389 return true;
390 return false;
391}
392
29331e72
LD
393static vlmul_type
394calculate_vlmul (unsigned int sew, unsigned int ratio)
9243c3d1 395{
29331e72
LD
396 const vlmul_type ALL_LMUL[]
397 = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
398 for (const vlmul_type vlmul : ALL_LMUL)
399 if (calculate_ratio (sew, vlmul) == ratio)
400 return vlmul;
401 return LMUL_RESERVED;
9243c3d1
JZZ
402}
403
29331e72
LD
404/* Get the currently supported maximum sew used in the int rvv instructions. */
405static uint8_t
406get_max_int_sew ()
9243c3d1 407{
29331e72
LD
408 if (TARGET_VECTOR_ELEN_64)
409 return 64;
410 else if (TARGET_VECTOR_ELEN_32)
411 return 32;
412 gcc_unreachable ();
9243c3d1
JZZ
413}
414
29331e72
LD
415/* Get the currently supported maximum sew used in the float rvv instructions.
416 */
417static uint8_t
418get_max_float_sew ()
419{
420 if (TARGET_VECTOR_ELEN_FP_64)
421 return 64;
422 else if (TARGET_VECTOR_ELEN_FP_32)
423 return 32;
424 else if (TARGET_VECTOR_ELEN_FP_16)
425 return 16;
426 gcc_unreachable ();
9243c3d1
JZZ
427}
428
29331e72 429enum def_type
9243c3d1 430{
29331e72
LD
431 REAL_SET = 1 << 0,
432 PHI_SET = 1 << 1,
433 BB_HEAD_SET = 1 << 2,
434 BB_END_SET = 1 << 3,
435 /* ??? TODO: In RTL_SSA framework, we have REAL_SET,
436 PHI_SET, BB_HEAD_SET, BB_END_SET and
437 CLOBBER_DEF def_info types. Currently,
438 we conservatively do not optimize clobber
439 def since we don't see the case that we
440 need to optimize it. */
441 CLOBBER_DEF = 1 << 4
442};
9243c3d1 443
29331e72
LD
444static bool
445insn_should_be_added_p (const insn_info *insn, unsigned int types)
da93c41c 446{
29331e72
LD
447 if (insn->is_real () && (types & REAL_SET))
448 return true;
449 if (insn->is_phi () && (types & PHI_SET))
450 return true;
451 if (insn->is_bb_head () && (types & BB_HEAD_SET))
452 return true;
453 if (insn->is_bb_end () && (types & BB_END_SET))
454 return true;
455 return false;
da93c41c
JZ
456}
457
29331e72
LD
458static const hash_set<use_info *>
459get_all_real_uses (insn_info *insn, unsigned regno)
9243c3d1 460{
29331e72 461 gcc_assert (insn->is_real ());
9243c3d1 462
29331e72
LD
463 hash_set<use_info *> uses;
464 auto_vec<phi_info *> work_list;
465 hash_set<phi_info *> visited_list;
9243c3d1 466
29331e72 467 for (def_info *def : insn->defs ())
9243c3d1 468 {
29331e72
LD
469 if (!def->is_reg () || def->regno () != regno)
470 continue;
471 set_info *set = safe_dyn_cast<set_info *> (def);
472 if (!set)
473 continue;
474 for (use_info *use : set->nondebug_insn_uses ())
475 if (use->insn ()->is_real ())
476 uses.add (use);
477 for (use_info *use : set->phi_uses ())
478 work_list.safe_push (use->phi ());
9243c3d1 479 }
9243c3d1 480
29331e72 481 while (!work_list.is_empty ())
60bd33bc 482 {
29331e72
LD
483 phi_info *phi = work_list.pop ();
484 visited_list.add (phi);
60bd33bc 485
29331e72
LD
486 for (use_info *use : phi->nondebug_insn_uses ())
487 if (use->insn ()->is_real ())
488 uses.add (use);
489 for (use_info *use : phi->phi_uses ())
490 if (!visited_list.contains (use->phi ()))
491 work_list.safe_push (use->phi ());
60bd33bc 492 }
29331e72 493 return uses;
60bd33bc
JZZ
494}
495
29331e72
LD
496/* Recursively find all define instructions. The kind of instruction is
497 specified by the DEF_TYPE. */
498static hash_set<set_info *>
499get_all_sets (phi_info *phi, unsigned int types)
9243c3d1 500{
29331e72
LD
501 hash_set<set_info *> insns;
502 auto_vec<phi_info *> work_list;
503 hash_set<phi_info *> visited_list;
504 if (!phi)
505 return hash_set<set_info *> ();
506 work_list.safe_push (phi);
9243c3d1 507
29331e72 508 while (!work_list.is_empty ())
9243c3d1 509 {
29331e72
LD
510 phi_info *phi = work_list.pop ();
511 visited_list.add (phi);
512 for (use_info *use : phi->inputs ())
513 {
514 def_info *def = use->def ();
515 set_info *set = safe_dyn_cast<set_info *> (def);
516 if (!set)
517 return hash_set<set_info *> ();
a1e42094 518
29331e72 519 gcc_assert (!set->insn ()->is_debug_insn ());
9243c3d1 520
29331e72
LD
521 if (insn_should_be_added_p (set->insn (), types))
522 insns.add (set);
523 if (set->insn ()->is_phi ())
524 {
525 phi_info *new_phi = as_a<phi_info *> (set);
526 if (!visited_list.contains (new_phi))
527 work_list.safe_push (new_phi);
528 }
529 }
9243c3d1 530 }
29331e72 531 return insns;
9243c3d1
JZZ
532}
533
29331e72
LD
534static hash_set<set_info *>
535get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
536 bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
aef20243 537{
29331e72
LD
538 if (real_p && phi_p && param_p)
539 return get_all_sets (safe_dyn_cast<phi_info *> (set),
540 REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
aef20243 541
29331e72
LD
542 else if (real_p && param_p)
543 return get_all_sets (safe_dyn_cast<phi_info *> (set),
544 REAL_SET | BB_HEAD_SET | BB_END_SET);
545
546 else if (real_p)
547 return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
548 return hash_set<set_info *> ();
69f39144
JZ
549}
550
4f673c5e 551static bool
6b6b9c68 552source_equal_p (insn_info *insn1, insn_info *insn2)
4f673c5e 553{
6b6b9c68
JZZ
554 if (!insn1 || !insn2)
555 return false;
556 rtx_insn *rinsn1 = insn1->rtl ();
557 rtx_insn *rinsn2 = insn2->rtl ();
4f673c5e
JZZ
558 if (!rinsn1 || !rinsn2)
559 return false;
29331e72 560
4f673c5e
JZZ
561 rtx note1 = find_reg_equal_equiv_note (rinsn1);
562 rtx note2 = find_reg_equal_equiv_note (rinsn2);
2020bce3
RD
563 /* We could handle the case of similar-looking REG_EQUALs as well but
564 would need to verify that no insn in between modifies any of the source
565 operands. */
566 if (note1 && note2 && rtx_equal_p (note1, note2)
567 && REG_NOTE_KIND (note1) == REG_EQUIV)
4f673c5e 568 return true;
29331e72 569 return false;
4f673c5e
JZZ
570}
571
6b6b9c68 572static insn_info *
4f673c5e
JZZ
573extract_single_source (set_info *set)
574{
575 if (!set)
576 return nullptr;
577 if (set->insn ()->is_real ())
6b6b9c68 578 return set->insn ();
4f673c5e
JZZ
579 if (!set->insn ()->is_phi ())
580 return nullptr;
6b6b9c68 581 hash_set<set_info *> sets = get_all_sets (set, true, false, true);
4f673c5e 582
6b6b9c68 583 insn_info *first_insn = (*sets.begin ())->insn ();
4f673c5e
JZZ
584 if (first_insn->is_artificial ())
585 return nullptr;
6b6b9c68 586 for (const set_info *set : sets)
4f673c5e
JZZ
587 {
588 /* If there is a head or end insn, we conservative return
589 NULL so that VSETVL PASS will insert vsetvl directly. */
6b6b9c68 590 if (set->insn ()->is_artificial ())
4f673c5e 591 return nullptr;
29331e72 592 if (set != *sets.begin () && !source_equal_p (set->insn (), first_insn))
4f673c5e
JZZ
593 return nullptr;
594 }
595
6b6b9c68 596 return first_insn;
4f673c5e
JZZ
597}
598
29331e72
LD
599static bool
600same_equiv_note_p (set_info *set1, set_info *set2)
ec99ffab 601{
29331e72
LD
602 insn_info *insn1 = extract_single_source (set1);
603 insn_info *insn2 = extract_single_source (set2);
604 if (!insn1 || !insn2)
605 return false;
606 return source_equal_p (insn1, insn2);
ec99ffab
JZZ
607}
608
29331e72
LD
609static unsigned
610get_expr_id (unsigned bb_index, unsigned regno, unsigned num_bbs)
ec99ffab 611{
29331e72 612 return regno * num_bbs + bb_index;
ec99ffab 613}
29331e72
LD
614static unsigned
615get_regno (unsigned expr_id, unsigned num_bb)
ec99ffab 616{
29331e72 617 return expr_id / num_bb;
ec99ffab 618}
29331e72
LD
619static unsigned
620get_bb_index (unsigned expr_id, unsigned num_bb)
ec99ffab 621{
29331e72 622 return expr_id % num_bb;
ec99ffab
JZZ
623}
624
29331e72 625/* Return true if the SET result is not used by any instructions. */
ec99ffab 626static bool
29331e72 627has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno)
ec99ffab 628{
29331e72
LD
629 if (bitmap_bit_p (df_get_live_out (cfg_bb), regno))
630 return false;
ec99ffab 631
29331e72
LD
632 rtx_insn *iter;
633 for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb));
634 iter = NEXT_INSN (iter))
635 if (df_find_use (iter, regno_reg_rtx[regno]))
636 return false;
ec99ffab 637
29331e72 638 return true;
ec99ffab
JZZ
639}
640
29331e72
LD
641/* This flags indicates the minimum demand of the vl and vtype values by the
642 RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV
643 instruction only needs the SEW/LMUL ratio to remain the same, and does not
644 require SEW and LMUL to be fixed.
645 Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter
646 instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of
647 the former instruction, then we can make the minimu demand of the former
648 instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are
649 the SEW and LMUL of the latter instruction, and the vsetvl instruction
650 generated according to the new demand can also be used for the latter
651 instruction, so there is no need to insert a separate vsetvl instruction for
652 the latter instruction. */
653enum demand_flags : unsigned
654{
655 DEMAND_EMPTY_P = 0,
656 DEMAND_SEW_P = 1 << 0,
657 DEMAND_LMUL_P = 1 << 1,
658 DEMAND_RATIO_P = 1 << 2,
659 DEMAND_GE_SEW_P = 1 << 3,
660 DEMAND_TAIL_POLICY_P = 1 << 4,
661 DEMAND_MASK_POLICY_P = 1 << 5,
662 DEMAND_AVL_P = 1 << 6,
663 DEMAND_NON_ZERO_AVL_P = 1 << 7,
664};
ec99ffab 665
29331e72
LD
666/* We split the demand information into three parts. They are sew and lmul
667 related (sew_lmul_demand_type), tail and mask policy related
668 (policy_demand_type) and avl related (avl_demand_type). Then we define three
669 interfaces avaiable_with, compatible_p and merge. avaiable_with is
670 used to determine whether the two vsetvl infos prev_info and next_info are
671 available or not. If prev_info is available for next_info, it means that the
672 RVV insn corresponding to next_info on the path from prev_info to next_info
673 can be used without inserting a separate vsetvl instruction. compatible_p
674 is used to determine whether prev_info is compatible with next_info, and if
675 so, merge can be used to merge the stricter demand information from
676 next_info into prev_info so that prev_info becomes available to next_info.
677 */
ec99ffab 678
29331e72 679enum class sew_lmul_demand_type : unsigned
ec99ffab 680{
29331e72
LD
681 sew_lmul = demand_flags::DEMAND_SEW_P | demand_flags::DEMAND_LMUL_P,
682 ratio_only = demand_flags::DEMAND_RATIO_P,
683 sew_only = demand_flags::DEMAND_SEW_P,
684 ge_sew = demand_flags::DEMAND_GE_SEW_P,
685 ratio_and_ge_sew
686 = demand_flags::DEMAND_RATIO_P | demand_flags::DEMAND_GE_SEW_P,
687};
ec99ffab 688
29331e72 689enum class policy_demand_type : unsigned
29547511 690{
29331e72
LD
691 tail_mask_policy
692 = demand_flags::DEMAND_TAIL_POLICY_P | demand_flags::DEMAND_MASK_POLICY_P,
693 tail_policy_only = demand_flags::DEMAND_TAIL_POLICY_P,
694 mask_policy_only = demand_flags::DEMAND_MASK_POLICY_P,
695 ignore_policy = demand_flags::DEMAND_EMPTY_P,
696};
29547511 697
29331e72 698enum class avl_demand_type : unsigned
ec99ffab 699{
29331e72
LD
700 avl = demand_flags::DEMAND_AVL_P,
701 non_zero_avl = demand_flags::DEMAND_NON_ZERO_AVL_P,
702 ignore_avl = demand_flags::DEMAND_EMPTY_P,
703};
ec99ffab 704
29331e72 705class vsetvl_info
ec99ffab 706{
29331e72
LD
707private:
708 insn_info *m_insn;
709 bb_info *m_bb;
710 rtx m_avl;
711 rtx m_vl;
712 set_info *m_avl_def;
713 uint8_t m_sew;
714 uint8_t m_max_sew;
715 vlmul_type m_vlmul;
716 uint8_t m_ratio;
717 bool m_ta;
718 bool m_ma;
719
720 sew_lmul_demand_type m_sew_lmul_demand;
721 policy_demand_type m_policy_demand;
722 avl_demand_type m_avl_demand;
723
724 enum class state_type
725 {
726 UNINITIALIZED,
727 VALID,
728 UNKNOWN,
729 EMPTY,
730 };
731 state_type m_state;
732
733 bool m_delete;
734 bool m_change_vtype_only;
735 insn_info *m_read_vl_insn;
736 bool m_vl_used_by_non_rvv_insn;
ec99ffab 737
29331e72
LD
738public:
739 vsetvl_info ()
740 : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX), m_vl (NULL_RTX),
741 m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED),
742 m_ratio (0), m_ta (false), m_ma (false),
743 m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul),
744 m_policy_demand (policy_demand_type::tail_mask_policy),
745 m_avl_demand (avl_demand_type::avl), m_state (state_type::UNINITIALIZED),
746 m_delete (false), m_change_vtype_only (false), m_read_vl_insn (nullptr),
747 m_vl_used_by_non_rvv_insn (false)
748 {}
749
750 vsetvl_info (insn_info *insn) : vsetvl_info () { parse_insn (insn); }
751
752 vsetvl_info (rtx_insn *insn) : vsetvl_info () { parse_insn (insn); }
753
754 void set_avl (rtx avl) { m_avl = avl; }
755 void set_vl (rtx vl) { m_vl = vl; }
756 void set_avl_def (set_info *avl_def) { m_avl_def = avl_def; }
757 void set_sew (uint8_t sew) { m_sew = sew; }
758 void set_vlmul (vlmul_type vlmul) { m_vlmul = vlmul; }
759 void set_ratio (uint8_t ratio) { m_ratio = ratio; }
760 void set_ta (bool ta) { m_ta = ta; }
761 void set_ma (bool ma) { m_ma = ma; }
762 void set_delete () { m_delete = true; }
763 void set_bb (bb_info *bb) { m_bb = bb; }
764 void set_max_sew (uint8_t max_sew) { m_max_sew = max_sew; }
765 void set_change_vtype_only () { m_change_vtype_only = true; }
766 void set_read_vl_insn (insn_info *insn) { m_read_vl_insn = insn; }
767
768 rtx get_avl () const { return m_avl; }
769 rtx get_vl () const { return m_vl; }
770 set_info *get_avl_def () const { return m_avl_def; }
771 uint8_t get_sew () const { return m_sew; }
772 vlmul_type get_vlmul () const { return m_vlmul; }
773 uint8_t get_ratio () const { return m_ratio; }
774 bool get_ta () const { return m_ta; }
775 bool get_ma () const { return m_ma; }
776 insn_info *get_insn () const { return m_insn; }
777 bool delete_p () const { return m_delete; }
778 bb_info *get_bb () const { return m_bb; }
779 uint8_t get_max_sew () const { return m_max_sew; }
780 insn_info *get_read_vl_insn () const { return m_read_vl_insn; }
4cd4c34a 781 bool vl_used_by_non_rvv_insn_p () const { return m_vl_used_by_non_rvv_insn; }
29331e72
LD
782
783 bool has_imm_avl () const { return m_avl && CONST_INT_P (m_avl); }
784 bool has_vlmax_avl () const { return vlmax_avl_p (m_avl); }
785 bool has_nonvlmax_reg_avl () const
786 {
787 return m_avl && REG_P (m_avl) && !has_vlmax_avl ();
788 }
789 bool has_non_zero_avl () const
790 {
791 if (has_imm_avl ())
792 return INTVAL (m_avl) > 0;
793 return has_vlmax_avl ();
794 }
795 bool has_vl () const
796 {
797 /* The VL operand can only be either a NULL_RTX or a register. */
798 gcc_assert (!m_vl || REG_P (m_vl));
799 return m_vl != NULL_RTX;
800 }
801 bool has_same_ratio (const vsetvl_info &other) const
802 {
803 return get_ratio () == other.get_ratio ();
804 }
805
806 /* The block of INSN isn't always same as the block of the VSETVL_INFO,
807 meaning we may have 'get_insn ()->bb () != get_bb ()'.
808
809 E.g. BB 2 (Empty) ---> BB 3 (VALID, has rvv insn 1)
810
811 BB 2 has empty VSETVL_INFO, wheras BB 3 has VSETVL_INFO that satisfies
812 get_insn ()->bb () == get_bb (). In earliest fusion, we may fuse bb 3 and
813 bb 2 so that the 'get_bb ()' of BB2 VSETVL_INFO will be BB2 wheras the
814 'get_insn ()' of BB2 VSETVL INFO will be the rvv insn 1 (which is located
815 at BB3). */
816 bool insn_inside_bb_p () const { return get_insn ()->bb () == get_bb (); }
817 void update_avl (const vsetvl_info &other)
818 {
819 m_avl = other.get_avl ();
820 m_vl = other.get_vl ();
821 m_avl_def = other.get_avl_def ();
822 }
823
824 bool uninit_p () const { return m_state == state_type::UNINITIALIZED; }
825 bool valid_p () const { return m_state == state_type::VALID; }
826 bool unknown_p () const { return m_state == state_type::UNKNOWN; }
827 bool empty_p () const { return m_state == state_type::EMPTY; }
828 bool change_vtype_only_p () const { return m_change_vtype_only; }
829
830 void set_valid () { m_state = state_type::VALID; }
831 void set_unknown () { m_state = state_type::UNKNOWN; }
832 void set_empty () { m_state = state_type::EMPTY; }
833
834 void set_sew_lmul_demand (sew_lmul_demand_type demand)
835 {
836 m_sew_lmul_demand = demand;
837 }
838 void set_policy_demand (policy_demand_type demand)
839 {
840 m_policy_demand = demand;
841 }
842 void set_avl_demand (avl_demand_type demand) { m_avl_demand = demand; }
843
844 sew_lmul_demand_type get_sew_lmul_demand () const
845 {
846 return m_sew_lmul_demand;
847 }
848 policy_demand_type get_policy_demand () const { return m_policy_demand; }
849 avl_demand_type get_avl_demand () const { return m_avl_demand; }
850
851 void normalize_demand (unsigned demand_flags)
852 {
853 switch (demand_flags
854 & (DEMAND_SEW_P | DEMAND_LMUL_P | DEMAND_RATIO_P | DEMAND_GE_SEW_P))
855 {
856 case (unsigned) sew_lmul_demand_type::sew_lmul:
857 m_sew_lmul_demand = sew_lmul_demand_type::sew_lmul;
858 break;
859 case (unsigned) sew_lmul_demand_type::ratio_only:
860 m_sew_lmul_demand = sew_lmul_demand_type::ratio_only;
861 break;
862 case (unsigned) sew_lmul_demand_type::sew_only:
863 m_sew_lmul_demand = sew_lmul_demand_type::sew_only;
864 break;
865 case (unsigned) sew_lmul_demand_type::ge_sew:
866 m_sew_lmul_demand = sew_lmul_demand_type::ge_sew;
867 break;
868 case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew:
869 m_sew_lmul_demand = sew_lmul_demand_type::ratio_and_ge_sew;
870 break;
871 default:
872 gcc_unreachable ();
873 }
874
875 switch (demand_flags & (DEMAND_TAIL_POLICY_P | DEMAND_MASK_POLICY_P))
876 {
877 case (unsigned) policy_demand_type::tail_mask_policy:
878 m_policy_demand = policy_demand_type::tail_mask_policy;
879 break;
880 case (unsigned) policy_demand_type::tail_policy_only:
881 m_policy_demand = policy_demand_type::tail_policy_only;
882 break;
883 case (unsigned) policy_demand_type::mask_policy_only:
884 m_policy_demand = policy_demand_type::mask_policy_only;
885 break;
886 case (unsigned) policy_demand_type::ignore_policy:
887 m_policy_demand = policy_demand_type::ignore_policy;
888 break;
889 default:
890 gcc_unreachable ();
891 }
892
893 switch (demand_flags & (DEMAND_AVL_P | DEMAND_NON_ZERO_AVL_P))
894 {
895 case (unsigned) avl_demand_type::avl:
896 m_avl_demand = avl_demand_type::avl;
897 break;
898 case (unsigned) avl_demand_type::non_zero_avl:
899 m_avl_demand = avl_demand_type::non_zero_avl;
900 break;
901 case (unsigned) avl_demand_type::ignore_avl:
902 m_avl_demand = avl_demand_type::ignore_avl;
903 break;
904 default:
905 gcc_unreachable ();
906 }
907 }
908
909 void parse_insn (rtx_insn *rinsn)
910 {
911 if (!NONDEBUG_INSN_P (rinsn))
912 return;
913 if (optimize == 0 && !has_vtype_op (rinsn))
914 return;
915 gcc_assert (!vsetvl_discard_result_insn_p (rinsn));
916 set_valid ();
917 extract_insn_cached (rinsn);
918 m_avl = ::get_avl (rinsn);
919 if (has_vlmax_avl () || vsetvl_insn_p (rinsn))
920 m_vl = ::get_vl (rinsn);
921 m_sew = ::get_sew (rinsn);
922 m_vlmul = ::get_vlmul (rinsn);
923 m_ta = tail_agnostic_p (rinsn);
924 m_ma = mask_agnostic_p (rinsn);
925 }
926
927 void parse_insn (insn_info *insn)
928 {
929 m_insn = insn;
930 m_bb = insn->bb ();
931 /* Return if it is debug insn for the consistency with optimize == 0. */
932 if (insn->is_debug_insn ())
933 return;
ec99ffab 934
29331e72
LD
935 /* We set it as unknown since we don't what will happen in CALL or ASM. */
936 if (insn->is_call () || insn->is_asm ())
937 {
938 set_unknown ();
939 return;
940 }
941
942 /* If this is something that updates VL/VTYPE that we don't know about, set
943 the state to unknown. */
944 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
945 && (find_access (insn->defs (), VL_REGNUM)
946 || find_access (insn->defs (), VTYPE_REGNUM)))
947 {
948 set_unknown ();
949 return;
950 }
951
952 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
953 /* uninitialized */
954 return;
ec99ffab 955
29331e72
LD
956 set_valid ();
957
958 m_avl = ::get_avl (insn->rtl ());
959 if (m_avl)
960 {
961 if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ())
962 m_vl = ::get_vl (insn->rtl ());
963
964 if (has_nonvlmax_reg_avl ())
965 m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def ();
966 }
967
968 m_sew = ::get_sew (insn->rtl ());
969 m_vlmul = ::get_vlmul (insn->rtl ());
970 m_ratio = get_attr_ratio (insn->rtl ());
971 /* when get_attr_ratio is invalid, this kind of instructions
972 doesn't care about ratio. However, we still need this value
973 in demand info backward analysis. */
974 if (m_ratio == INVALID_ATTRIBUTE)
975 m_ratio = calculate_ratio (m_sew, m_vlmul);
976 m_ta = tail_agnostic_p (insn->rtl ());
977 m_ma = mask_agnostic_p (insn->rtl ());
978
979 /* If merge operand is undef value, we prefer agnostic. */
980 int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
981 if (merge_op_idx != INVALID_ATTRIBUTE
982 && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
983 {
984 m_ta = true;
985 m_ma = true;
986 }
987
988 /* Determine the demand info of the RVV insn. */
989 m_max_sew = get_max_int_sew ();
193ef02a 990 unsigned dflags = 0;
29331e72
LD
991 if (vector_config_insn_p (insn->rtl ()))
992 {
193ef02a
RS
993 dflags |= demand_flags::DEMAND_AVL_P;
994 dflags |= demand_flags::DEMAND_RATIO_P;
29331e72
LD
995 }
996 else
997 {
998 if (has_vl_op (insn->rtl ()))
999 {
1000 if (scalar_move_insn_p (insn->rtl ()))
1001 {
1002 /* If the avl for vmv.s.x comes from the vsetvl instruction, we
1003 don't know if the avl is non-zero, so it is set to
1004 DEMAND_AVL_P for now. it may be corrected to
1005 DEMAND_NON_ZERO_AVL_P later when more information is
1006 available.
1007 */
1008 if (has_non_zero_avl ())
193ef02a 1009 dflags |= demand_flags::DEMAND_NON_ZERO_AVL_P;
29331e72 1010 else
193ef02a 1011 dflags |= demand_flags::DEMAND_AVL_P;
29331e72
LD
1012 }
1013 else
193ef02a 1014 dflags |= demand_flags::DEMAND_AVL_P;
29331e72 1015 }
ec99ffab 1016
29331e72 1017 if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
193ef02a 1018 dflags |= demand_flags::DEMAND_RATIO_P;
29331e72
LD
1019 else
1020 {
1021 if (scalar_move_insn_p (insn->rtl ()) && m_ta)
1022 {
193ef02a 1023 dflags |= demand_flags::DEMAND_GE_SEW_P;
29331e72
LD
1024 m_max_sew = get_attr_type (insn->rtl ()) == TYPE_VFMOVFV
1025 ? get_max_float_sew ()
1026 : get_max_int_sew ();
1027 }
1028 else
193ef02a 1029 dflags |= demand_flags::DEMAND_SEW_P;
29331e72
LD
1030
1031 if (!ignore_vlmul_insn_p (insn->rtl ()))
193ef02a 1032 dflags |= demand_flags::DEMAND_LMUL_P;
29331e72 1033 }
ec99ffab 1034
29331e72 1035 if (!m_ta)
193ef02a 1036 dflags |= demand_flags::DEMAND_TAIL_POLICY_P;
29331e72 1037 if (!m_ma)
193ef02a 1038 dflags |= demand_flags::DEMAND_MASK_POLICY_P;
29331e72
LD
1039 }
1040
193ef02a 1041 normalize_demand (dflags);
29331e72
LD
1042
1043 /* Optimize AVL from the vsetvl instruction. */
1044 insn_info *def_insn = extract_single_source (get_avl_def ());
1045 if (def_insn && vsetvl_insn_p (def_insn->rtl ()))
1046 {
1047 vsetvl_info def_info = vsetvl_info (def_insn);
1048 if ((scalar_move_insn_p (insn->rtl ())
1049 || def_info.get_ratio () == get_ratio ())
1050 && (def_info.has_vlmax_avl () || def_info.has_imm_avl ()))
1051 {
1052 update_avl (def_info);
1053 if (scalar_move_insn_p (insn->rtl ()) && has_non_zero_avl ())
1054 m_avl_demand = avl_demand_type::non_zero_avl;
1055 }
1056 }
1057
1058 /* Determine if dest operand(vl) has been used by non-RVV instructions. */
1059 if (has_vl ())
1060 {
1061 const hash_set<use_info *> vl_uses
1062 = get_all_real_uses (get_insn (), REGNO (get_vl ()));
1063 for (use_info *use : vl_uses)
1064 {
1065 gcc_assert (use->insn ()->is_real ());
1066 rtx_insn *rinsn = use->insn ()->rtl ();
1067 if (!has_vl_op (rinsn)
1068 || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1)
1069 {
1070 m_vl_used_by_non_rvv_insn = true;
1071 break;
1072 }
1073 rtx avl = ::get_avl (rinsn);
c2f23514 1074 if (!avl || !REG_P (avl) || REGNO (get_vl ()) != REGNO (avl))
29331e72
LD
1075 {
1076 m_vl_used_by_non_rvv_insn = true;
1077 break;
1078 }
1079 }
1080 }
ec99ffab 1081
29331e72
LD
1082 /* Collect the read vl insn for the fault-only-first rvv loads. */
1083 if (fault_first_load_p (insn->rtl ()))
1084 {
1085 for (insn_info *i = insn->next_nondebug_insn ();
1086 i->bb () == insn->bb (); i = i->next_nondebug_insn ())
1087 {
1088 if (find_access (i->defs (), VL_REGNUM))
1089 break;
1090 if (i->rtl () && read_vl_insn_p (i->rtl ()))
1091 {
1092 m_read_vl_insn = i;
1093 break;
1094 }
1095 }
1096 }
1097 }
1098
1099 /* Returns the corresponding vsetvl rtx pat. */
1100 rtx get_vsetvl_pat (bool ignore_vl = false) const
1101 {
1102 rtx avl = get_avl ();
1103 /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
1104 set the value of avl to (const_int 0) so that VSETVL PASS will
1105 insert vsetvl correctly.*/
1106 if (!get_avl ())
1107 avl = GEN_INT (0);
1108 rtx sew = gen_int_mode (get_sew (), Pmode);
1109 rtx vlmul = gen_int_mode (get_vlmul (), Pmode);
1110 rtx ta = gen_int_mode (get_ta (), Pmode);
1111 rtx ma = gen_int_mode (get_ma (), Pmode);
1112
1113 if (change_vtype_only_p ())
1114 return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
1115 else if (has_vl () && !ignore_vl)
1116 return gen_vsetvl (Pmode, get_vl (), avl, sew, vlmul, ta, ma);
1117 else
1118 return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
1119 }
1120
1121 bool operator== (const vsetvl_info &other) const
1122 {
1123 gcc_assert (!uninit_p () && !other.uninit_p ()
1124 && "Uninitialization should not happen");
1125
1126 if (empty_p ())
1127 return other.empty_p ();
1128 if (unknown_p ())
1129 return other.unknown_p ();
1130
1131 return get_insn () == other.get_insn () && get_bb () == other.get_bb ()
1132 && get_avl () == other.get_avl () && get_vl () == other.get_vl ()
1133 && get_avl_def () == other.get_avl_def ()
1134 && get_sew () == other.get_sew ()
1135 && get_vlmul () == other.get_vlmul () && get_ta () == other.get_ta ()
1136 && get_ma () == other.get_ma ()
1137 && get_avl_demand () == other.get_avl_demand ()
1138 && get_sew_lmul_demand () == other.get_sew_lmul_demand ()
1139 && get_policy_demand () == other.get_policy_demand ();
1140 }
1141
1142 void dump (FILE *file, const char *indent = "") const
1143 {
1144 if (uninit_p ())
1145 {
1146 fprintf (file, "UNINITIALIZED.\n");
1147 return;
1148 }
1149 else if (unknown_p ())
1150 {
1151 fprintf (file, "UNKNOWN.\n");
1152 return;
1153 }
1154 else if (empty_p ())
1155 {
1156 fprintf (file, "EMPTY.\n");
1157 return;
1158 }
1159 else if (valid_p ())
1160 fprintf (file, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (),
1161 get_bb ()->index (), delete_p () ? " (deleted)" : "");
1162 else
1163 gcc_unreachable ();
ec99ffab 1164
29331e72
LD
1165 fprintf (file, "%sDemand fields:", indent);
1166 if (m_sew_lmul_demand == sew_lmul_demand_type::sew_lmul)
1167 fprintf (file, " demand_sew_lmul");
1168 else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_only)
1169 fprintf (file, " demand_ratio_only");
1170 else if (m_sew_lmul_demand == sew_lmul_demand_type::sew_only)
1171 fprintf (file, " demand_sew_only");
1172 else if (m_sew_lmul_demand == sew_lmul_demand_type::ge_sew)
1173 fprintf (file, " demand_ge_sew");
1174 else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_and_ge_sew)
1175 fprintf (file, " demand_ratio_and_ge_sew");
1176
1177 if (m_policy_demand == policy_demand_type::tail_mask_policy)
1178 fprintf (file, " demand_tail_mask_policy");
1179 else if (m_policy_demand == policy_demand_type::tail_policy_only)
1180 fprintf (file, " demand_tail_policy_only");
1181 else if (m_policy_demand == policy_demand_type::mask_policy_only)
1182 fprintf (file, " demand_mask_policy_only");
1183
1184 if (m_avl_demand == avl_demand_type::avl)
1185 fprintf (file, " demand_avl");
1186 else if (m_avl_demand == avl_demand_type::non_zero_avl)
1187 fprintf (file, " demand_non_zero_avl");
1188 fprintf (file, "\n");
1189
1190 fprintf (file, "%sSEW=%d, ", indent, get_sew ());
1191 fprintf (file, "VLMUL=%s, ", vlmul_to_str (get_vlmul ()));
1192 fprintf (file, "RATIO=%d, ", get_ratio ());
1193 fprintf (file, "MAX_SEW=%d\n", get_max_sew ());
1194
1195 fprintf (file, "%sTAIL_POLICY=%s, ", indent, policy_to_str (get_ta ()));
1196 fprintf (file, "MASK_POLICY=%s\n", policy_to_str (get_ma ()));
1197
1198 fprintf (file, "%sAVL=", indent);
1199 print_rtl_single (file, get_avl ());
1200 fprintf (file, "%sVL=", indent);
1201 print_rtl_single (file, get_vl ());
1202 if (change_vtype_only_p ())
1203 fprintf (file, "%schange vtype only\n", indent);
1204 if (get_read_vl_insn ())
1205 fprintf (file, "%sread_vl_insn: insn %u\n", indent,
1206 get_read_vl_insn ()->uid ());
4cd4c34a 1207 if (vl_used_by_non_rvv_insn_p ())
29331e72
LD
1208 fprintf (file, "%suse_by_non_rvv_insn=true\n", indent);
1209 }
1210};
8fbc0871 1211
29331e72 1212class vsetvl_block_info
ec99ffab 1213{
29331e72
LD
1214public:
1215 /* The static execute probability of the demand info. */
1216 profile_probability probability;
1217
4fd09aed
JZ
1218 auto_vec<vsetvl_info> local_infos;
1219 vsetvl_info global_info;
1220 bb_info *bb;
29331e72
LD
1221
1222 bool full_available;
1223
4fd09aed 1224 vsetvl_block_info () : bb (nullptr), full_available (false)
29331e72 1225 {
4fd09aed
JZ
1226 local_infos.safe_grow_cleared (0);
1227 global_info.set_empty ();
29331e72
LD
1228 }
1229 vsetvl_block_info (const vsetvl_block_info &other)
4fd09aed
JZ
1230 : probability (other.probability), local_infos (other.local_infos.copy ()),
1231 global_info (other.global_info), bb (other.bb)
29331e72
LD
1232 {}
1233
1234 vsetvl_info &get_entry_info ()
1235 {
1236 gcc_assert (!empty_p ());
4fd09aed 1237 return local_infos.is_empty () ? global_info : local_infos[0];
29331e72
LD
1238 }
1239 vsetvl_info &get_exit_info ()
1240 {
1241 gcc_assert (!empty_p ());
4fd09aed
JZ
1242 return local_infos.is_empty () ? global_info
1243 : local_infos[local_infos.length () - 1];
29331e72
LD
1244 }
1245 const vsetvl_info &get_entry_info () const
1246 {
1247 gcc_assert (!empty_p ());
4fd09aed 1248 return local_infos.is_empty () ? global_info : local_infos[0];
29331e72
LD
1249 }
1250 const vsetvl_info &get_exit_info () const
1251 {
1252 gcc_assert (!empty_p ());
4fd09aed
JZ
1253 return local_infos.is_empty () ? global_info
1254 : local_infos[local_infos.length () - 1];
29331e72
LD
1255 }
1256
4fd09aed
JZ
1257 bool empty_p () const { return local_infos.is_empty () && !has_info (); }
1258 bool has_info () const { return !global_info.empty_p (); }
29331e72
LD
1259 void set_info (const vsetvl_info &info)
1260 {
4fd09aed
JZ
1261 gcc_assert (local_infos.is_empty ());
1262 global_info = info;
1263 global_info.set_bb (bb);
29331e72 1264 }
4fd09aed 1265 void set_empty_info () { global_info.set_empty (); }
ec99ffab
JZZ
1266};
1267
29331e72
LD
1268/* Demand system is the RVV-based VSETVL info analysis tools wrapper.
1269 It defines compatible rules for SEW/LMUL, POLICY and AVL.
1270 Also, it provides 3 iterfaces avaiable_p, compatible_p and
1271 merge for the VSETVL PASS analysis and optimization.
1272
1273 - avaiable_p: Determine whether the next info can get the
1274 avaiable VSETVL status from previous info.
1275 e.g. bb 2 (demand SEW = 32, LMUL = M2) -> bb 3 (demand RATIO = 16).
1276 Since bb 2 demand info (SEW/LMUL = 32/2 = 16) satisfies the bb 3
1277 demand, the VSETVL instruction in bb 3 can be elided.
1278 avaiable_p (previous, next) is true in such situation.
1279 - compatible_p: Determine whether prev_info is compatible with next_info
1280 so that we can have a new merged info that is avaiable to both of them.
1281 - merge: Merge the stricter demand information from
1282 next_info into prev_info so that prev_info becomes available to
1283 next_info. */
1284class demand_system
ec99ffab 1285{
29331e72
LD
1286private:
1287 sbitmap *m_avl_def_in;
1288 sbitmap *m_avl_def_out;
ec99ffab 1289
29331e72 1290 /* predictors. */
ec99ffab 1291
29331e72
LD
1292 inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1293 const vsetvl_info &next ATTRIBUTE_UNUSED)
1294 {
1295 return true;
1296 }
1297 inline bool always_false (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1298 const vsetvl_info &next ATTRIBUTE_UNUSED)
1299 {
ec99ffab 1300 return false;
29331e72
LD
1301 }
1302
1303 /* predictors for sew and lmul */
1304
1305 inline bool lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1306 {
1307 return prev.get_vlmul () == next.get_vlmul ();
1308 }
1309 inline bool sew_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1310 {
1311 return prev.get_sew () == next.get_sew ();
1312 }
1313 inline bool sew_lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1314 {
1315 return lmul_eq_p (prev, next) && sew_eq_p (prev, next);
1316 }
1317 inline bool sew_ge_p (const vsetvl_info &prev, const vsetvl_info &next)
1318 {
1319 return prev.get_sew () == next.get_sew ()
1320 || (next.get_ta () && prev.get_sew () > next.get_sew ());
1321 }
1322 inline bool sew_le_p (const vsetvl_info &prev, const vsetvl_info &next)
1323 {
1324 return prev.get_sew () == next.get_sew ()
1325 || (prev.get_ta () && prev.get_sew () < next.get_sew ());
1326 }
1327 inline bool prev_sew_le_next_max_sew_p (const vsetvl_info &prev,
1328 const vsetvl_info &next)
1329 {
1330 return prev.get_sew () <= next.get_max_sew ();
1331 }
1332 inline bool next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
1333 const vsetvl_info &next)
1334 {
1335 return next.get_sew () <= prev.get_max_sew ();
1336 }
1337 inline bool max_sew_overlap_p (const vsetvl_info &prev,
1338 const vsetvl_info &next)
1339 {
1340 return !(prev.get_sew () > next.get_max_sew ()
1341 || next.get_sew () > prev.get_max_sew ());
1342 }
1343 inline bool ratio_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1344 {
1345 return prev.has_same_ratio (next);
1346 }
1347 inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
1348 const vsetvl_info &next)
1349 {
1350 return prev.get_ratio () >= (next.get_sew () / 8);
1351 }
1352 inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
1353 const vsetvl_info &next)
1354 {
1355 return next.get_ratio () >= (prev.get_sew () / 8);
1356 }
1357
1358 inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev,
1359 const vsetvl_info &next)
1360 {
1361 return sew_ge_p (prev, next) && ratio_eq_p (prev, next);
1362 }
1363 inline bool sew_ge_and_prev_sew_le_next_max_sew_p (const vsetvl_info &prev,
1364 const vsetvl_info &next)
1365 {
1366 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next);
1367 }
1368 inline bool
1369 sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p (
1370 const vsetvl_info &prev, const vsetvl_info &next)
1371 {
1372 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next)
1373 && next_ratio_valid_for_prev_sew_p (prev, next);
1374 }
1375 inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
1376 const vsetvl_info &next)
1377 {
1378 return sew_le_p (prev, next) && next_sew_le_prev_max_sew_p (prev, next);
1379 }
1380 inline bool
1381 max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
1382 const vsetvl_info &next)
1383 {
1384 return next_ratio_valid_for_prev_sew_p (prev, next)
1385 && max_sew_overlap_p (prev, next);
1386 }
1387 inline bool
1388 sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info &prev,
1389 const vsetvl_info &next)
1390 {
1391 return sew_le_p (prev, next) && ratio_eq_p (prev, next)
1392 && next_sew_le_prev_max_sew_p (prev, next);
1393 }
1394 inline bool
1395 max_sew_overlap_and_prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
1396 const vsetvl_info &next)
1397 {
1398 return prev_ratio_valid_for_next_sew_p (prev, next)
1399 && max_sew_overlap_p (prev, next);
1400 }
1401 inline bool
1402 sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p (
1403 const vsetvl_info &prev, const vsetvl_info &next)
1404 {
1405 return sew_le_p (prev, next) && prev_ratio_valid_for_next_sew_p (prev, next)
1406 && next_sew_le_prev_max_sew_p (prev, next);
1407 }
1408 inline bool max_sew_overlap_and_ratio_eq_p (const vsetvl_info &prev,
1409 const vsetvl_info &next)
1410 {
1411 return ratio_eq_p (prev, next) && max_sew_overlap_p (prev, next);
1412 }
1413
1414 /* predictors for tail and mask policy */
1415
1416 inline bool tail_policy_eq_p (const vsetvl_info &prev,
1417 const vsetvl_info &next)
1418 {
1419 return prev.get_ta () == next.get_ta ();
1420 }
1421 inline bool mask_policy_eq_p (const vsetvl_info &prev,
1422 const vsetvl_info &next)
1423 {
1424 return prev.get_ma () == next.get_ma ();
1425 }
1426 inline bool tail_mask_policy_eq_p (const vsetvl_info &prev,
1427 const vsetvl_info &next)
1428 {
1429 return tail_policy_eq_p (prev, next) && mask_policy_eq_p (prev, next);
1430 }
1431
1432 /* predictors for avl */
1433
1434 inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info)
1435 {
9c16ca93
JZ
1436 if (info.has_vl ())
1437 {
1438 if (find_access (i->defs (), REGNO (info.get_vl ())))
1439 return true;
1440 if (find_access (i->uses (), REGNO (info.get_vl ())))
1441 {
1442 resource_info resource = full_register (REGNO (info.get_vl ()));
1443 def_lookup dl1 = crtl->ssa->find_def (resource, i);
1444 def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ());
1445 if (dl1.matching_set () || dl2.matching_set ())
1446 return true;
1447 /* If their VLs are coming from same def, we still want to fuse
1448 their VSETVL demand info to gain better performance. */
1449 return dl1.prev_def (i) != dl2.prev_def (i);
1450 }
1451 }
1452 return false;
29331e72
LD
1453 }
1454 inline bool modify_avl_p (insn_info *i, const vsetvl_info &info)
1455 {
1456 return info.has_nonvlmax_reg_avl ()
1457 && find_access (i->defs (), REGNO (info.get_avl ()));
1458 }
1459
1460 inline bool modify_reg_between_p (insn_info *prev_insn, insn_info *curr_insn,
1461 unsigned regno)
1462 {
1463 gcc_assert (prev_insn->compare_with (curr_insn) < 0);
1464 for (insn_info *i = curr_insn->prev_nondebug_insn (); i != prev_insn;
1465 i = i->prev_nondebug_insn ())
1466 {
1467 // no def of regno
1468 if (find_access (i->defs (), regno))
1469 return true;
1470 }
1471 return false;
1472 }
ec99ffab 1473
29331e72
LD
1474 inline bool reg_avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next)
1475 {
1476 if (!prev.has_nonvlmax_reg_avl () || !next.has_nonvlmax_reg_avl ())
1477 return false;
ec99ffab 1478
29331e72
LD
1479 if (same_equiv_note_p (prev.get_avl_def (), next.get_avl_def ()))
1480 return true;
ec99ffab 1481
29331e72
LD
1482 if (REGNO (prev.get_avl ()) != REGNO (next.get_avl ()))
1483 return false;
ec99ffab 1484
29331e72
LD
1485 insn_info *prev_insn = prev.get_insn ();
1486 if (prev.get_bb () != prev_insn->bb ())
1487 prev_insn = prev.get_bb ()->end_insn ();
ec99ffab 1488
29331e72
LD
1489 insn_info *next_insn = next.get_insn ();
1490 if (next.get_bb () != next_insn->bb ())
1491 next_insn = next.get_bb ()->end_insn ();
ec99ffab 1492
29331e72
LD
1493 return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false);
1494 }
ec99ffab 1495
29331e72
LD
1496 inline bool avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next)
1497 {
1498 gcc_assert (prev.valid_p () && next.valid_p ());
ec99ffab 1499
4cd4c34a 1500 if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
29331e72 1501 return false;
e030af3e 1502
29331e72
LD
1503 if (vector_config_insn_p (prev.get_insn ()->rtl ()) && next.get_avl_def ()
1504 && next.get_avl_def ()->insn () == prev.get_insn ())
1505 return true;
e030af3e 1506
29331e72
LD
1507 if (prev.get_read_vl_insn ())
1508 {
1509 if (!next.has_nonvlmax_reg_avl () || !next.get_avl_def ())
1510 return false;
1511 insn_info *avl_def_insn = extract_single_source (next.get_avl_def ());
1512 return avl_def_insn == prev.get_read_vl_insn ();
1513 }
1514
1515 if (prev == next && prev.has_nonvlmax_reg_avl ())
1516 {
1517 insn_info *insn = prev.get_insn ();
1518 bb_info *bb = insn->bb ();
1519 for (insn_info *i = insn; real_insn_and_same_bb_p (i, bb);
1520 i = i->next_nondebug_insn ())
1521 if (find_access (i->defs (), REGNO (prev.get_avl ())))
e030af3e 1522 return false;
29331e72 1523 }
60bd33bc 1524
29331e72
LD
1525 if (prev.has_vlmax_avl () && next.has_vlmax_avl ())
1526 return true;
1527 else if (prev.has_imm_avl () && next.has_imm_avl ())
1528 return INTVAL (prev.get_avl ()) == INTVAL (next.get_avl ());
1529 else if (prev.has_vl () && next.has_nonvlmax_reg_avl ()
1530 && REGNO (prev.get_vl ()) == REGNO (next.get_avl ()))
1531 {
1532 insn_info *prev_insn = prev.insn_inside_bb_p ()
1533 ? prev.get_insn ()
1534 : prev.get_bb ()->end_insn ();
1535
1536 insn_info *next_insn = next.insn_inside_bb_p ()
1537 ? next.get_insn ()
1538 : next.get_bb ()->end_insn ();
1539 return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false);
1540 }
1541 else if (prev.has_nonvlmax_reg_avl () && next.has_nonvlmax_reg_avl ())
1542 return reg_avl_equal_p (prev, next);
e030af3e 1543
e030af3e 1544 return false;
29331e72
LD
1545 }
1546 inline bool avl_equal_or_prev_avl_non_zero_p (const vsetvl_info &prev,
1547 const vsetvl_info &next)
1548 {
1549 return avl_equal_p (prev, next) || prev.has_non_zero_avl ();
1550 }
1551
1552 inline bool can_use_next_avl_p (const vsetvl_info &prev,
1553 const vsetvl_info &next)
1554 {
0c4bd132
JZ
1555 /* Forbid the AVL/VL propagation if VL of NEXT is used
1556 by non-RVV instructions. This is because:
1557
1558 bb 2:
1559 PREV: scalar move (no AVL)
1560 bb 3:
1561 NEXT: vsetvl a5(VL), a4(AVL) ...
1562 branch a5,zero
1563
1564 Since user vsetvl instruction is no side effect instruction
1565 which should be placed in the correct and optimal location
1566 of the program by the previous PASS, it is unreasonable that
1567 VSETVL PASS tries to move it to another places if it used by
1568 non-RVV instructions.
1569
1570 Note: We only forbid the cases that VL is used by the following
1571 non-RVV instructions which will cause issues. We don't forbid
1572 other cases since it won't cause correctness issues and we still
1573 more demand info are fused backward. The later LCM algorithm
1574 should know the optimal location of the vsetvl. */
1575 if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
1576 return false;
1577
29331e72
LD
1578 if (!next.has_nonvlmax_reg_avl () && !next.has_vl ())
1579 return true;
e030af3e 1580
29331e72
LD
1581 insn_info *prev_insn = prev.get_insn ();
1582 if (prev.get_bb () != prev_insn->bb ())
1583 prev_insn = prev.get_bb ()->end_insn ();
1584
1585 insn_info *next_insn = next.get_insn ();
1586 if (next.get_bb () != next_insn->bb ())
1587 next_insn = next.get_bb ()->end_insn ();
1588
1589 return avl_vl_unmodified_between_p (prev_insn, next_insn, next);
1590 }
1591
1592 inline bool avl_equal_or_next_avl_non_zero_and_can_use_next_avl_p (
1593 const vsetvl_info &prev, const vsetvl_info &next)
1594 {
1595 return avl_equal_p (prev, next)
1596 || (next.has_non_zero_avl () && can_use_next_avl_p (prev, next));
1597 }
1598
1599 /* modifiers */
1600
1601 inline void nop (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1602 const vsetvl_info &next ATTRIBUTE_UNUSED)
1603 {}
1604
1605 /* modifiers for sew and lmul */
1606
1607 inline void use_min_of_max_sew (vsetvl_info &prev, const vsetvl_info &next)
1608 {
1609 prev.set_max_sew (MIN (prev.get_max_sew (), next.get_max_sew ()));
1610 }
1611 inline void use_next_sew (vsetvl_info &prev, const vsetvl_info &next)
1612 {
1613 prev.set_sew (next.get_sew ());
1614 use_min_of_max_sew (prev, next);
1615 }
1616 inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
1617 {
1618 auto max_sew = std::max (prev.get_sew (), next.get_sew ());
1619 prev.set_sew (max_sew);
1620 use_min_of_max_sew (prev, next);
1621 }
1622 inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
1623 {
1624 use_next_sew (prev, next);
1625 prev.set_vlmul (next.get_vlmul ());
1626 prev.set_ratio (next.get_ratio ());
1627 }
1628 inline void use_next_sew_with_prev_ratio (vsetvl_info &prev,
1629 const vsetvl_info &next)
1630 {
1631 use_next_sew (prev, next);
1632 prev.set_vlmul (calculate_vlmul (next.get_sew (), prev.get_ratio ()));
1633 }
1634 inline void modify_lmul_with_next_ratio (vsetvl_info &prev,
1635 const vsetvl_info &next)
1636 {
1637 prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
1638 prev.set_ratio (next.get_ratio ());
1639 }
1640
1641 inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev,
1642 const vsetvl_info &next)
1643 {
1644 prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
1645 use_max_sew (prev, next);
1646 prev.set_ratio (next.get_ratio ());
1647 }
1648
1649 inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info &prev,
1650 const vsetvl_info &next)
1651 {
1652 auto max_sew = std::max (prev.get_sew (), next.get_sew ());
1653 prev.set_vlmul (calculate_vlmul (max_sew, prev.get_ratio ()));
1654 prev.set_sew (max_sew);
1655 }
1656
1657 /* modifiers for tail and mask policy */
1658
1659 inline void use_tail_policy (vsetvl_info &prev, const vsetvl_info &next)
1660 {
1661 if (!next.get_ta ())
1662 prev.set_ta (next.get_ta ());
1663 }
1664 inline void use_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
1665 {
1666 if (!next.get_ma ())
1667 prev.set_ma (next.get_ma ());
1668 }
1669 inline void use_tail_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
1670 {
1671 use_tail_policy (prev, next);
1672 use_mask_policy (prev, next);
1673 }
1674
1675 /* modifiers for avl */
1676
1677 inline void use_next_avl (vsetvl_info &prev, const vsetvl_info &next)
1678 {
1679 gcc_assert (can_use_next_avl_p (prev, next));
1680 prev.update_avl (next);
1681 }
1682
1683 inline void use_next_avl_when_not_equal (vsetvl_info &prev,
1684 const vsetvl_info &next)
1685 {
1686 if (avl_equal_p (prev, next))
1687 return;
1688 gcc_assert (next.has_non_zero_avl ());
1689 use_next_avl (prev, next);
1690 }
e030af3e 1691
29331e72
LD
1692public:
1693 demand_system () : m_avl_def_in (nullptr), m_avl_def_out (nullptr) {}
1694
1695 void set_avl_in_out_data (sbitmap *m_avl_def_in, sbitmap *m_avl_def_out)
1696 {
1697 m_avl_def_in = m_avl_def_in;
1698 m_avl_def_out = m_avl_def_out;
1699 }
1700
1701 /* Can we move vsetvl info between prev_insn and next_insn safe? */
1702 bool avl_vl_unmodified_between_p (insn_info *prev_insn, insn_info *next_insn,
1703 const vsetvl_info &info,
1704 bool ignore_vl = false)
1705 {
1706 gcc_assert ((ignore_vl && info.has_nonvlmax_reg_avl ())
1707 || (info.has_nonvlmax_reg_avl () || info.has_vl ()));
1708
1709 gcc_assert (!prev_insn->is_debug_insn () && !next_insn->is_debug_insn ());
1710 if (prev_insn->bb () == next_insn->bb ()
1711 && prev_insn->compare_with (next_insn) < 0)
1712 {
1713 for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
1714 i = i->prev_nondebug_insn ())
1715 {
9c16ca93 1716 // no def and use of vl
29331e72
LD
1717 if (!ignore_vl && modify_or_use_vl_p (i, info))
1718 return false;
e030af3e 1719
29331e72
LD
1720 // no def of avl
1721 if (modify_avl_p (i, info))
1722 return false;
1723 }
1724 return true;
1725 }
1726 else
1727 {
1728 if (!ignore_vl && info.has_vl ())
1729 {
1730 bitmap live_out = df_get_live_out (prev_insn->bb ()->cfg_bb ());
1731 if (bitmap_bit_p (live_out, REGNO (info.get_vl ())))
1732 return false;
1733 }
a2d12abe 1734
29331e72
LD
1735 if (info.has_nonvlmax_reg_avl () && m_avl_def_in && m_avl_def_out)
1736 {
1737 bool has_avl_out = false;
1738 unsigned regno = REGNO (info.get_avl ());
1739 unsigned expr_id;
1740 sbitmap_iterator sbi;
1741 EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[prev_insn->bb ()->index ()],
1742 0, expr_id, sbi)
1743 {
1744 if (get_regno (expr_id, last_basic_block_for_fn (cfun))
1745 != regno)
1746 continue;
1747 has_avl_out = true;
1748 if (!bitmap_bit_p (m_avl_def_in[next_insn->bb ()->index ()],
1749 expr_id))
1750 return false;
1751 }
1752 if (!has_avl_out)
1753 return false;
1754 }
12b23c71 1755
29331e72
LD
1756 for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn ();
1757 i = i->prev_nondebug_insn ())
1758 {
1759 // no def amd use of vl
1760 if (!ignore_vl && modify_or_use_vl_p (i, info))
1761 return false;
9243c3d1 1762
29331e72
LD
1763 // no def of avl
1764 if (modify_avl_p (i, info))
1765 return false;
1766 }
6b6b9c68 1767
29331e72
LD
1768 for (insn_info *i = prev_insn->bb ()->end_insn (); i != prev_insn;
1769 i = i->prev_nondebug_insn ())
1770 {
1771 // no def amd use of vl
1772 if (!ignore_vl && modify_or_use_vl_p (i, info))
1773 return false;
1774
1775 // no def of avl
1776 if (modify_avl_p (i, info))
1777 return false;
1778 }
1779 }
d875d756 1780 return true;
29331e72
LD
1781 }
1782
1783 bool sew_lmul_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1784 {
1785 gcc_assert (prev.valid_p () && next.valid_p ());
1786 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1787 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1788#define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1789 AVAILABLE_P, FUSE) \
1790 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1791 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1792 return COMPATIBLE_P (prev, next);
6b6b9c68 1793
29331e72 1794#include "riscv-vsetvl.def"
6b6b9c68 1795
29331e72
LD
1796 gcc_unreachable ();
1797 }
6b6b9c68 1798
29331e72
LD
1799 bool sew_lmul_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1800 {
1801 gcc_assert (prev.valid_p () && next.valid_p ());
1802 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1803 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1804#define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1805 AVAILABLE_P, FUSE) \
1806 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1807 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1808 return AVAILABLE_P (prev, next);
d875d756 1809
29331e72 1810#include "riscv-vsetvl.def"
4f673c5e 1811
29331e72
LD
1812 gcc_unreachable ();
1813 }
1814
1815 void merge_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
1816 {
1817 gcc_assert (prev.valid_p () && next.valid_p ());
1818 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1819 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1820#define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1821 AVAILABLE_P, FUSE) \
1822 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1823 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1824 { \
1825 gcc_assert (COMPATIBLE_P (prev, next)); \
1826 FUSE (prev, next); \
1827 prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \
1828 return; \
1829 }
9243c3d1 1830
29331e72 1831#include "riscv-vsetvl.def"
9243c3d1 1832
29331e72
LD
1833 gcc_unreachable ();
1834 }
9243c3d1 1835
29331e72
LD
1836 bool policy_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1837 {
1838 gcc_assert (prev.valid_p () && next.valid_p ());
1839 policy_demand_type prev_flags = prev.get_policy_demand ();
1840 policy_demand_type next_flags = next.get_policy_demand ();
1841#define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1842 AVAILABLE_P, FUSE) \
1843 if (prev_flags == policy_demand_type::PREV_FLAGS \
1844 && next_flags == policy_demand_type::NEXT_FLAGS) \
1845 return COMPATIBLE_P (prev, next);
9243c3d1 1846
29331e72 1847#include "riscv-vsetvl.def"
9243c3d1 1848
29331e72
LD
1849 gcc_unreachable ();
1850 }
4f673c5e 1851
29331e72
LD
1852 bool policy_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1853 {
1854 gcc_assert (prev.valid_p () && next.valid_p ());
1855 policy_demand_type prev_flags = prev.get_policy_demand ();
1856 policy_demand_type next_flags = next.get_policy_demand ();
1857#define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1858 AVAILABLE_P, FUSE) \
1859 if (prev_flags == policy_demand_type::PREV_FLAGS \
1860 && next_flags == policy_demand_type::NEXT_FLAGS) \
1861 return AVAILABLE_P (prev, next);
4f673c5e 1862
29331e72 1863#include "riscv-vsetvl.def"
9243c3d1 1864
29331e72
LD
1865 gcc_unreachable ();
1866 }
1867
1868 void merge_policy (vsetvl_info &prev, const vsetvl_info &next)
1869 {
1870 gcc_assert (prev.valid_p () && next.valid_p ());
1871 policy_demand_type prev_flags = prev.get_policy_demand ();
1872 policy_demand_type next_flags = next.get_policy_demand ();
1873#define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1874 AVAILABLE_P, FUSE) \
1875 if (prev_flags == policy_demand_type::PREV_FLAGS \
1876 && next_flags == policy_demand_type::NEXT_FLAGS) \
1877 { \
1878 gcc_assert (COMPATIBLE_P (prev, next)); \
1879 FUSE (prev, next); \
1880 prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \
1881 return; \
1882 }
9243c3d1 1883
29331e72 1884#include "riscv-vsetvl.def"
ec99ffab 1885
29331e72
LD
1886 gcc_unreachable ();
1887 }
9243c3d1 1888
29331e72
LD
1889 bool avl_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1890 {
1891 gcc_assert (prev.valid_p () && next.valid_p ());
1892 avl_demand_type prev_flags = prev.get_avl_demand ();
1893 avl_demand_type next_flags = next.get_avl_demand ();
1894#define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1895 AVAILABLE_P, FUSE) \
1896 if (prev_flags == avl_demand_type::PREV_FLAGS \
1897 && next_flags == avl_demand_type::NEXT_FLAGS) \
1898 return COMPATIBLE_P (prev, next);
9243c3d1 1899
29331e72 1900#include "riscv-vsetvl.def"
9243c3d1 1901
29331e72
LD
1902 gcc_unreachable ();
1903 }
9243c3d1 1904
29331e72
LD
1905 bool avl_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1906 {
1907 gcc_assert (prev.valid_p () && next.valid_p ());
1908 avl_demand_type prev_flags = prev.get_avl_demand ();
1909 avl_demand_type next_flags = next.get_avl_demand ();
1910#define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1911 AVAILABLE_P, FUSE) \
1912 if (prev_flags == avl_demand_type::PREV_FLAGS \
1913 && next_flags == avl_demand_type::NEXT_FLAGS) \
1914 return AVAILABLE_P (prev, next);
9243c3d1 1915
29331e72 1916#include "riscv-vsetvl.def"
9243c3d1 1917
29331e72
LD
1918 gcc_unreachable ();
1919 }
1920
1921 void merge_avl (vsetvl_info &prev, const vsetvl_info &next)
1922 {
1923 gcc_assert (prev.valid_p () && next.valid_p ());
1924 avl_demand_type prev_flags = prev.get_avl_demand ();
1925 avl_demand_type next_flags = next.get_avl_demand ();
1926#define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1927 AVAILABLE_P, FUSE) \
1928 if (prev_flags == avl_demand_type::PREV_FLAGS \
1929 && next_flags == avl_demand_type::NEXT_FLAGS) \
1930 { \
1931 gcc_assert (COMPATIBLE_P (prev, next)); \
1932 FUSE (prev, next); \
1933 prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \
1934 return; \
60bd33bc
JZZ
1935 }
1936
29331e72 1937#include "riscv-vsetvl.def"
9243c3d1 1938
29331e72
LD
1939 gcc_unreachable ();
1940 }
1941
1942 bool compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1943 {
1944 bool compatible_p = sew_lmul_compatible_p (prev, next)
1945 && policy_compatible_p (prev, next)
1946 && avl_compatible_p (prev, next);
1947 return compatible_p;
1948 }
1949
1950 bool available_p (const vsetvl_info &prev, const vsetvl_info &next)
1951 {
1952 bool available_p = sew_lmul_available_p (prev, next)
1953 && policy_available_p (prev, next)
1954 && avl_available_p (prev, next);
1955 gcc_assert (!available_p || compatible_p (prev, next));
1956 return available_p;
1957 }
1958
1959 void merge (vsetvl_info &prev, const vsetvl_info &next)
1960 {
1961 gcc_assert (compatible_p (prev, next));
1962 merge_sew_lmul (prev, next);
1963 merge_policy (prev, next);
1964 merge_avl (prev, next);
1965 gcc_assert (available_p (prev, next));
1966 }
1967};
9243c3d1 1968
9243c3d1 1969
29331e72 1970class pre_vsetvl
9243c3d1 1971{
29331e72
LD
1972private:
1973 demand_system m_dem;
1974 auto_vec<vsetvl_block_info> m_vector_block_infos;
1975
1976 /* data for avl reaching defintion. */
1977 sbitmap m_avl_regs;
1978 sbitmap *m_avl_def_in;
1979 sbitmap *m_avl_def_out;
1980 sbitmap *m_reg_def_loc;
1981
1982 /* data for vsetvl info reaching defintion. */
1983 vsetvl_info m_unknow_info;
1984 auto_vec<vsetvl_info *> m_vsetvl_def_exprs;
1985 sbitmap *m_vsetvl_def_in;
1986 sbitmap *m_vsetvl_def_out;
1987
1988 /* data for lcm */
1989 auto_vec<vsetvl_info *> m_exprs;
1990 sbitmap *m_avloc;
1991 sbitmap *m_avin;
1992 sbitmap *m_avout;
1993 sbitmap *m_kill;
1994 sbitmap *m_antloc;
1995 sbitmap *m_transp;
1996 sbitmap *m_insert;
1997 sbitmap *m_del;
1998 struct edge_list *m_edges;
1999
2000 auto_vec<vsetvl_info> m_delete_list;
2001
2002 vsetvl_block_info &get_block_info (const bb_info *bb)
2003 {
2004 return m_vector_block_infos[bb->index ()];
2005 }
2006 const vsetvl_block_info &get_block_info (const basic_block bb) const
2007 {
2008 return m_vector_block_infos[bb->index];
2009 }
2010
2011 vsetvl_block_info &get_block_info (const basic_block bb)
2012 {
2013 return m_vector_block_infos[bb->index];
2014 }
2015
2016 void add_expr (auto_vec<vsetvl_info *> &m_exprs, vsetvl_info &info)
2017 {
2018 for (vsetvl_info *item : m_exprs)
2019 {
2020 if (*item == info)
2021 return;
2022 }
2023 m_exprs.safe_push (&info);
2024 }
2025
2026 unsigned get_expr_index (auto_vec<vsetvl_info *> &m_exprs,
2027 const vsetvl_info &info)
2028 {
2029 for (size_t i = 0; i < m_exprs.length (); i += 1)
2030 {
2031 if (*m_exprs[i] == info)
2032 return i;
2033 }
2034 gcc_unreachable ();
2035 }
2036
2037 bool anticpatable_exp_p (const vsetvl_info &header_info)
2038 {
2039 if (!header_info.has_nonvlmax_reg_avl () && !header_info.has_vl ())
2040 return true;
9243c3d1 2041
29331e72
LD
2042 bb_info *bb = header_info.get_bb ();
2043 insn_info *prev_insn = bb->head_insn ();
2044 insn_info *next_insn = header_info.insn_inside_bb_p ()
2045 ? header_info.get_insn ()
2046 : header_info.get_bb ()->end_insn ();
2047
2048 return m_dem.avl_vl_unmodified_between_p (prev_insn, next_insn,
2049 header_info);
2050 }
2051
2052 bool available_exp_p (const vsetvl_info &prev_info,
2053 const vsetvl_info &next_info)
2054 {
2055 return m_dem.available_p (prev_info, next_info);
2056 }
2057
2058 void compute_probabilities ()
2059 {
2060 edge e;
2061 edge_iterator ei;
2062
2063 for (const bb_info *bb : crtl->ssa->bbs ())
2064 {
2065 basic_block cfg_bb = bb->cfg_bb ();
2066 auto &curr_prob = get_block_info (cfg_bb).probability;
2067
2068 /* GCC assume entry block (bb 0) are always so
2069 executed so set its probability as "always". */
2070 if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
2071 curr_prob = profile_probability::always ();
2072 /* Exit block (bb 1) is the block we don't need to process. */
2073 if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
2074 continue;
9243c3d1 2075
29331e72
LD
2076 gcc_assert (curr_prob.initialized_p ());
2077 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2078 {
2079 auto &new_prob = get_block_info (e->dest).probability;
2080 /* Normally, the edge probability should be initialized.
2081 However, some special testing code which is written in
2082 GIMPLE IR style force the edge probility uninitialized,
2083 we conservatively set it as never so that it will not
2084 affect PRE (Phase 3 && Phse 4). */
2085 if (!e->probability.initialized_p ())
2086 new_prob = profile_probability::never ();
2087 else if (!new_prob.initialized_p ())
2088 new_prob = curr_prob * e->probability;
2089 else if (new_prob == profile_probability::always ())
2090 continue;
2091 else
2092 new_prob += curr_prob * e->probability;
2093 }
2094 }
2095 }
2096
2097 void insert_vsetvl_insn (enum emit_type emit_type, const vsetvl_info &info)
2098 {
2099 rtx pat = info.get_vsetvl_pat ();
2100 rtx_insn *rinsn = info.get_insn ()->rtl ();
2101
2102 if (emit_type == EMIT_DIRECT)
2103 {
2104 emit_insn (pat);
2105 if (dump_file)
2106 {
2107 fprintf (dump_file, " Insert vsetvl insn %d:\n",
2108 INSN_UID (get_last_insn ()));
2109 print_rtl_single (dump_file, get_last_insn ());
2110 }
2111 }
2112 else if (emit_type == EMIT_BEFORE)
2113 {
2114 emit_insn_before (pat, rinsn);
2115 if (dump_file)
2116 {
2117 fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
2118 INSN_UID (rinsn));
2119 print_rtl_single (dump_file, PREV_INSN (rinsn));
2120 }
2121 }
2122 else
2123 {
2124 emit_insn_after (pat, rinsn);
2125 if (dump_file)
2126 {
2127 fprintf (dump_file, " Insert vsetvl insn after insn %d:\n",
2128 INSN_UID (rinsn));
2129 print_rtl_single (dump_file, NEXT_INSN (rinsn));
2130 }
2131 }
2132 }
2133
2134 void change_vsetvl_insn (const vsetvl_info &info)
2135 {
2136 rtx_insn *rinsn = info.get_insn ()->rtl ();
2137 rtx new_pat = info.get_vsetvl_pat ();
2138
2139 if (dump_file)
2140 {
2141 fprintf (dump_file, " Change insn %d from:\n", INSN_UID (rinsn));
2142 print_rtl_single (dump_file, rinsn);
2143 }
2144
2145 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
2146
2147 if (dump_file)
2148 {
2149 fprintf (dump_file, "\n to:\n");
2150 print_rtl_single (dump_file, rinsn);
2151 }
2152 }
2153
2154 void remove_vsetvl_insn (const vsetvl_info &info)
2155 {
2156 rtx_insn *rinsn = info.get_insn ()->rtl ();
2157 if (dump_file)
2158 {
2159 fprintf (dump_file, " Eliminate insn %d:\n", INSN_UID (rinsn));
2160 print_rtl_single (dump_file, rinsn);
2161 }
2162 if (in_sequence_p ())
2163 remove_insn (rinsn);
2164 else
2165 delete_insn (rinsn);
2166 }
2167
2168 bool successors_probability_equal_p (const basic_block cfg_bb) const
2169 {
2170 edge e;
2171 edge_iterator ei;
2172 profile_probability prob = profile_probability::uninitialized ();
2173 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2174 {
2175 if (prob == profile_probability::uninitialized ())
2176 prob = m_vector_block_infos[e->dest->index].probability;
2177 else if (prob == m_vector_block_infos[e->dest->index].probability)
2178 continue;
2179 else
2180 /* We pick the highest probability among those incompatible VSETVL
2181 infos. When all incompatible VSTEVL infos have same probability, we
2182 don't pick any of them. */
2183 return false;
2184 }
ec99ffab 2185 return true;
29331e72
LD
2186 }
2187
923a67f1 2188 bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
29331e72
LD
2189 {
2190 gcc_assert (
2191 !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
2192
2193 unsigned expr_index;
2194 sbitmap_iterator sbi;
2195 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[curr_info.get_bb ()->index ()], 0,
2196 expr_index, sbi)
2197 {
2198 const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
2199 if (!prev_info.valid_p ()
923a67f1
JZ
2200 || !m_dem.avl_available_p (prev_info, curr_info)
2201 || prev_info.get_ratio () != curr_info.get_ratio ())
29331e72
LD
2202 return false;
2203 }
005fad9d 2204
005fad9d 2205 return true;
29331e72 2206 }
005fad9d 2207
29331e72
LD
2208public:
2209 pre_vsetvl ()
2210 : m_avl_def_in (nullptr), m_avl_def_out (nullptr),
2211 m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr),
2212 m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr),
2213 m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr)
2214 {
2215 /* Initialization of RTL_SSA. */
2216 calculate_dominance_info (CDI_DOMINATORS);
2217 df_analyze ();
2218 crtl->ssa = new function_info (cfun);
2219 m_vector_block_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2220 compute_probabilities ();
2221 m_unknow_info.set_unknown ();
2222 }
2223
2224 void finish ()
2225 {
2226 free_dominance_info (CDI_DOMINATORS);
2227 if (crtl->ssa->perform_pending_updates ())
2228 cleanup_cfg (0);
2229 delete crtl->ssa;
2230 crtl->ssa = nullptr;
2231
2232 if (m_avl_regs)
2233 sbitmap_free (m_avl_regs);
2234 if (m_reg_def_loc)
2235 sbitmap_vector_free (m_reg_def_loc);
2236
2237 if (m_avl_def_in)
2238 sbitmap_vector_free (m_avl_def_in);
2239 if (m_avl_def_out)
2240 sbitmap_vector_free (m_avl_def_out);
2241
2242 if (m_vsetvl_def_in)
2243 sbitmap_vector_free (m_vsetvl_def_in);
2244 if (m_vsetvl_def_out)
2245 sbitmap_vector_free (m_vsetvl_def_out);
2246
2247 if (m_avloc)
2248 sbitmap_vector_free (m_avloc);
2249 if (m_kill)
2250 sbitmap_vector_free (m_kill);
2251 if (m_antloc)
2252 sbitmap_vector_free (m_antloc);
2253 if (m_transp)
2254 sbitmap_vector_free (m_transp);
2255 if (m_insert)
2256 sbitmap_vector_free (m_insert);
2257 if (m_del)
2258 sbitmap_vector_free (m_del);
2259 if (m_avin)
2260 sbitmap_vector_free (m_avin);
2261 if (m_avout)
2262 sbitmap_vector_free (m_avout);
2263
2264 if (m_edges)
2265 free_edge_list (m_edges);
2266 }
2267
2268 void compute_avl_def_data ();
2269 void compute_vsetvl_def_data ();
2270 void compute_lcm_local_properties ();
2271
2272 void fuse_local_vsetvl_info ();
2273 bool earliest_fuse_vsetvl_info ();
2274 void pre_global_vsetvl_info ();
2275 void emit_vsetvl ();
2276 void cleaup ();
2277 void remove_avl_operand ();
2278 void remove_unused_dest_operand ();
2279
2280 void dump (FILE *file, const char *title) const
2281 {
2282 fprintf (file, "\nVSETVL infos after %s\n\n", title);
2283 for (const bb_info *bb : crtl->ssa->bbs ())
2284 {
2285 const auto &block_info = m_vector_block_infos[bb->index ()];
2286 fprintf (file, " bb %d:\n", bb->index ());
2287 fprintf (file, " probability: ");
2288 block_info.probability.dump (file);
2289 fprintf (file, "\n");
2290 if (!block_info.empty_p ())
2291 {
2292 fprintf (file, " Header vsetvl info:");
2293 block_info.get_entry_info ().dump (file, " ");
2294 fprintf (file, " Footer vsetvl info:");
2295 block_info.get_exit_info ().dump (file, " ");
4fd09aed 2296 for (const auto &info : block_info.local_infos)
29331e72
LD
2297 {
2298 fprintf (file,
2299 " insn %d vsetvl info:", info.get_insn ()->uid ());
2300 info.dump (file, " ");
2301 }
2302 }
2303 }
2304 }
2305};
c139f5e1 2306
e030af3e 2307void
29331e72 2308pre_vsetvl::compute_avl_def_data ()
e030af3e 2309{
29331e72
LD
2310 if (bitmap_empty_p (m_avl_regs))
2311 return;
e030af3e 2312
29331e72
LD
2313 unsigned num_regs = GP_REG_LAST + 1;
2314 unsigned num_bbs = last_basic_block_for_fn (cfun);
9243c3d1 2315
29331e72
LD
2316 sbitmap *avl_def_loc_temp = sbitmap_vector_alloc (num_bbs, num_regs);
2317 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2318 {
29331e72
LD
2319 bitmap_and (avl_def_loc_temp[bb->index ()], m_avl_regs,
2320 m_reg_def_loc[bb->index ()]);
2321
2322 vsetvl_block_info &block_info = get_block_info (bb);
2323 if (block_info.has_info ())
9243c3d1 2324 {
29331e72
LD
2325 vsetvl_info &footer_info = block_info.get_exit_info ();
2326 gcc_assert (footer_info.valid_p ());
2327 if (footer_info.has_vl ())
2328 bitmap_set_bit (avl_def_loc_temp[bb->index ()],
2329 REGNO (footer_info.get_vl ()));
9243c3d1
JZZ
2330 }
2331 }
9243c3d1 2332
29331e72
LD
2333 if (m_avl_def_in)
2334 sbitmap_vector_free (m_avl_def_in);
2335 if (m_avl_def_out)
2336 sbitmap_vector_free (m_avl_def_out);
9243c3d1 2337
29331e72
LD
2338 unsigned num_exprs = num_bbs * num_regs;
2339 sbitmap *avl_def_loc = sbitmap_vector_alloc (num_bbs, num_exprs);
2340 sbitmap *m_kill = sbitmap_vector_alloc (num_bbs, num_exprs);
2341 m_avl_def_in = sbitmap_vector_alloc (num_bbs, num_exprs);
2342 m_avl_def_out = sbitmap_vector_alloc (num_bbs, num_exprs);
9243c3d1 2343
29331e72
LD
2344 bitmap_vector_clear (avl_def_loc, num_bbs);
2345 bitmap_vector_clear (m_kill, num_bbs);
2346 bitmap_vector_clear (m_avl_def_out, num_bbs);
2347
2348 unsigned regno;
2349 sbitmap_iterator sbi;
2350 for (const bb_info *bb : crtl->ssa->bbs ())
2351 EXECUTE_IF_SET_IN_BITMAP (avl_def_loc_temp[bb->index ()], 0, regno, sbi)
2352 {
2353 bitmap_set_bit (avl_def_loc[bb->index ()],
2354 get_expr_id (bb->index (), regno, num_bbs));
2355 bitmap_set_range (m_kill[bb->index ()], regno * num_bbs, num_bbs);
2356 }
2357
2358 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2359 EXECUTE_IF_SET_IN_BITMAP (m_avl_regs, 0, regno, sbi)
2360 bitmap_set_bit (m_avl_def_out[entry->index],
2361 get_expr_id (entry->index, regno, num_bbs));
2362
2363 compute_reaching_defintion (avl_def_loc, m_kill, m_avl_def_in, m_avl_def_out);
2364
2365 if (dump_file && (dump_flags & TDF_DETAILS))
9243c3d1 2366 {
29331e72
LD
2367 fprintf (dump_file,
2368 " Compute avl reaching defition data (num_bbs %d, num_regs "
2369 "%d):\n\n",
2370 num_bbs, num_regs);
2371 fprintf (dump_file, " avl_regs: ");
2372 dump_bitmap_file (dump_file, m_avl_regs);
2373 fprintf (dump_file, "\n bitmap data:\n");
2374 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2375 {
29331e72
LD
2376 unsigned int i = bb->index ();
2377 fprintf (dump_file, " BB %u:\n", i);
2378 fprintf (dump_file, " avl_def_loc:");
2379 unsigned expr_id;
2380 sbitmap_iterator sbi;
2381 EXECUTE_IF_SET_IN_BITMAP (avl_def_loc[i], 0, expr_id, sbi)
ec99ffab 2382 {
29331e72
LD
2383 fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
2384 get_bb_index (expr_id, num_bbs));
2385 }
2386 fprintf (dump_file, "\n kill:");
2387 EXECUTE_IF_SET_IN_BITMAP (m_kill[i], 0, expr_id, sbi)
2388 {
2389 fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
2390 get_bb_index (expr_id, num_bbs));
2391 }
2392 fprintf (dump_file, "\n avl_def_in:");
2393 EXECUTE_IF_SET_IN_BITMAP (m_avl_def_in[i], 0, expr_id, sbi)
2394 {
2395 fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
2396 get_bb_index (expr_id, num_bbs));
2397 }
2398 fprintf (dump_file, "\n avl_def_out:");
2399 EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[i], 0, expr_id, sbi)
2400 {
2401 fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
2402 get_bb_index (expr_id, num_bbs));
ec99ffab 2403 }
29331e72 2404 fprintf (dump_file, "\n");
9243c3d1
JZZ
2405 }
2406 }
2407
29331e72
LD
2408 sbitmap_vector_free (avl_def_loc);
2409 sbitmap_vector_free (m_kill);
2410 sbitmap_vector_free (avl_def_loc_temp);
9243c3d1 2411
29331e72 2412 m_dem.set_avl_in_out_data (m_avl_def_in, m_avl_def_out);
9243c3d1
JZZ
2413}
2414
9243c3d1 2415void
29331e72 2416pre_vsetvl::compute_vsetvl_def_data ()
9243c3d1 2417{
29331e72
LD
2418 m_vsetvl_def_exprs.truncate (0);
2419 add_expr (m_vsetvl_def_exprs, m_unknow_info);
2420 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2421 {
29331e72
LD
2422 vsetvl_block_info &block_info = get_block_info (bb);
2423 if (block_info.empty_p ())
2424 continue;
2425 vsetvl_info &footer_info = block_info.get_exit_info ();
2426 gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
2427 add_expr (m_vsetvl_def_exprs, footer_info);
9243c3d1
JZZ
2428 }
2429
29331e72
LD
2430 if (m_vsetvl_def_in)
2431 sbitmap_vector_free (m_vsetvl_def_in);
2432 if (m_vsetvl_def_out)
2433 sbitmap_vector_free (m_vsetvl_def_out);
9243c3d1 2434
29331e72
LD
2435 sbitmap *def_loc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2436 m_vsetvl_def_exprs.length ());
2437 sbitmap *m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2438 m_vsetvl_def_exprs.length ());
9243c3d1 2439
29331e72
LD
2440 m_vsetvl_def_in = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2441 m_vsetvl_def_exprs.length ());
2442 m_vsetvl_def_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2443 m_vsetvl_def_exprs.length ());
9243c3d1 2444
29331e72
LD
2445 bitmap_vector_clear (def_loc, last_basic_block_for_fn (cfun));
2446 bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun));
2447 bitmap_vector_clear (m_vsetvl_def_out, last_basic_block_for_fn (cfun));
9243c3d1 2448
29331e72
LD
2449 for (const bb_info *bb : crtl->ssa->bbs ())
2450 {
2451 vsetvl_block_info &block_info = get_block_info (bb);
2452 if (block_info.empty_p ())
9243c3d1 2453 {
29331e72 2454 for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i += 1)
9243c3d1 2455 {
29331e72
LD
2456 const vsetvl_info &info = *m_vsetvl_def_exprs[i];
2457 if (!info.has_nonvlmax_reg_avl ())
2458 continue;
2459 unsigned int regno;
2460 sbitmap_iterator sbi;
2461 EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0, regno,
2462 sbi)
2463 if (regno == REGNO (info.get_avl ()))
2464 {
2465 bitmap_set_bit (m_kill[bb->index ()], i);
2466 bitmap_set_bit (def_loc[bb->index ()],
2467 get_expr_index (m_vsetvl_def_exprs,
2468 m_unknow_info));
2469 }
9243c3d1 2470 }
29331e72 2471 continue;
9243c3d1
JZZ
2472 }
2473
29331e72
LD
2474 vsetvl_info &footer_info = block_info.get_exit_info ();
2475 bitmap_ones (m_kill[bb->index ()]);
2476 bitmap_set_bit (def_loc[bb->index ()],
2477 get_expr_index (m_vsetvl_def_exprs, footer_info));
9243c3d1
JZZ
2478 }
2479
29331e72
LD
2480 /* Set the def_out of the ENTRY basic block to m_unknow_info expr. */
2481 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2482 bitmap_set_bit (m_vsetvl_def_out[entry->index],
2483 get_expr_index (m_vsetvl_def_exprs, m_unknow_info));
9243c3d1 2484
29331e72
LD
2485 compute_reaching_defintion (def_loc, m_kill, m_vsetvl_def_in,
2486 m_vsetvl_def_out);
2487
2488 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e 2489 {
29331e72
LD
2490 fprintf (dump_file,
2491 "\n Compute vsetvl info reaching defition data:\n\n");
2492 fprintf (dump_file, " Expression List (%d):\n",
2493 m_vsetvl_def_exprs.length ());
2494 for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i++)
2495 {
2496 const auto &info = *m_vsetvl_def_exprs[i];
2497 fprintf (dump_file, " Expr[%u]: ", i);
2498 info.dump (dump_file, " ");
2499 }
2500 fprintf (dump_file, "\n bitmap data:\n");
2501 for (const bb_info *bb : crtl->ssa->bbs ())
2502 {
2503 unsigned int i = bb->index ();
2504 fprintf (dump_file, " BB %u:\n", i);
2505 fprintf (dump_file, " def_loc: ");
2506 dump_bitmap_file (dump_file, def_loc[i]);
2507 fprintf (dump_file, " kill: ");
2508 dump_bitmap_file (dump_file, m_kill[i]);
2509 fprintf (dump_file, " vsetvl_def_in: ");
2510 dump_bitmap_file (dump_file, m_vsetvl_def_in[i]);
2511 fprintf (dump_file, " vsetvl_def_out: ");
2512 dump_bitmap_file (dump_file, m_vsetvl_def_out[i]);
2513 }
e030af3e 2514 }
4f673c5e 2515
29331e72 2516 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2517 {
29331e72
LD
2518 vsetvl_block_info &block_info = get_block_info (bb);
2519 if (block_info.empty_p ())
2520 continue;
2521 vsetvl_info &curr_info = block_info.get_entry_info ();
2522 if (!curr_info.valid_p ())
2523 continue;
2524
2525 unsigned int expr_index;
2526 sbitmap_iterator sbi;
2527 gcc_assert (
2528 !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
2529 bool full_available = true;
2530 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[bb->index ()], 0, expr_index,
2531 sbi)
4f673c5e 2532 {
29331e72
LD
2533 vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
2534 if (!prev_info.valid_p ()
2535 || !m_dem.available_p (prev_info, curr_info))
2536 {
2537 full_available = false;
2538 break;
2539 }
4f673c5e 2540 }
29331e72 2541 block_info.full_available = full_available;
4f673c5e 2542 }
29331e72
LD
2543
2544 sbitmap_vector_free (def_loc);
2545 sbitmap_vector_free (m_kill);
e030af3e 2546}
9243c3d1 2547
e030af3e 2548/* Compute the local properties of each recorded expression.
6b6b9c68 2549
e030af3e
JZ
2550 Local properties are those that are defined by the block, irrespective of
2551 other blocks.
6b6b9c68 2552
e030af3e
JZ
2553 An expression is transparent in a block if its operands are not modified
2554 in the block.
6b6b9c68 2555
e030af3e
JZ
2556 An expression is computed (locally available) in a block if it is computed
2557 at least once and expression would contain the same value if the
2558 computation was moved to the end of the block.
2559
2560 An expression is locally anticipatable in a block if it is computed at
2561 least once and expression would contain the same value if the computation
2562 was moved to the beginning of the block. */
2563void
29331e72 2564pre_vsetvl::compute_lcm_local_properties ()
6b6b9c68 2565{
29331e72
LD
2566 m_exprs.truncate (0);
2567 for (const bb_info *bb : crtl->ssa->bbs ())
2568 {
2569 vsetvl_block_info &block_info = get_block_info (bb);
2570 if (block_info.empty_p ())
2571 continue;
2572 vsetvl_info &header_info = block_info.get_entry_info ();
2573 vsetvl_info &footer_info = block_info.get_exit_info ();
2574 gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
2575 add_expr (m_exprs, header_info);
2576 add_expr (m_exprs, footer_info);
2577 }
2578
2579 int num_exprs = m_exprs.length ();
2580 if (m_avloc)
2581 sbitmap_vector_free (m_avloc);
2582 if (m_kill)
2583 sbitmap_vector_free (m_kill);
2584 if (m_antloc)
2585 sbitmap_vector_free (m_antloc);
2586 if (m_transp)
2587 sbitmap_vector_free (m_transp);
2588 if (m_avin)
2589 sbitmap_vector_free (m_avin);
2590 if (m_avout)
2591 sbitmap_vector_free (m_avout);
2592
2593 m_avloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2594 m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2595 m_antloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2596 m_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2597 m_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2598 m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2599
2600 bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun));
2601 bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun));
2602 bitmap_vector_clear (m_transp, last_basic_block_for_fn (cfun));
2603
e030af3e
JZ
2604 /* - If T is locally available at the end of a block, then T' must be
2605 available at the end of the same block. Since some optimization has
2606 occurred earlier, T' might not be locally available, however, it must
2607 have been previously computed on all paths. As a formula, T at AVLOC(B)
2608 implies that T' at AVOUT(B).
2609 An "available occurrence" is one that is the last occurrence in the
2610 basic block and the operands are not modified by following statements in
2611 the basic block [including this insn].
6b6b9c68 2612
e030af3e
JZ
2613 - If T is locally anticipated at the beginning of a block, then either
2614 T', is locally anticipated or it is already available from previous
2615 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
2616 ANTLOC(B) at AVIN(B).
2617 An "anticipatable occurrence" is one that is the first occurrence in the
2618 basic block, the operands are not modified in the basic block prior
2619 to the occurrence and the output is not used between the start of
2620 the block and the occurrence. */
e030af3e 2621 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2622 {
29331e72
LD
2623 unsigned bb_index = bb->index ();
2624 vsetvl_block_info &block_info = get_block_info (bb);
9243c3d1 2625
29331e72
LD
2626 /* Compute m_transp */
2627 if (block_info.empty_p ())
9243c3d1 2628 {
29331e72
LD
2629 bitmap_ones (m_transp[bb_index]);
2630 for (int i = 0; i < num_exprs; i += 1)
4f673c5e 2631 {
29331e72
LD
2632 const vsetvl_info &info = *m_exprs[i];
2633 if (!info.has_nonvlmax_reg_avl () && !info.has_vl ())
2634 continue;
2635
7b2984ad 2636 if (info.has_nonvlmax_reg_avl ())
29331e72 2637 {
7b2984ad
JZ
2638 unsigned int regno;
2639 sbitmap_iterator sbi;
2640 EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0,
2641 regno, sbi)
2642 {
2643 if (regno == REGNO (info.get_avl ()))
2644 bitmap_clear_bit (m_transp[bb->index ()], i);
2645 }
29331e72
LD
2646 }
2647
e030af3e
JZ
2648 for (const insn_info *insn : bb->real_nondebug_insns ())
2649 {
9c16ca93
JZ
2650 if (info.has_nonvlmax_reg_avl ()
2651 && find_access (insn->defs (), REGNO (info.get_avl ())))
e030af3e 2652 {
29331e72 2653 bitmap_clear_bit (m_transp[bb_index], i);
e030af3e
JZ
2654 break;
2655 }
2656 }
4f673c5e 2657 }
9243c3d1 2658
29331e72 2659 continue;
9243c3d1 2660 }
e030af3e 2661
29331e72
LD
2662 vsetvl_info &header_info = block_info.get_entry_info ();
2663 vsetvl_info &footer_info = block_info.get_exit_info ();
9243c3d1 2664
29331e72
LD
2665 if (header_info.valid_p ()
2666 && (anticpatable_exp_p (header_info) || block_info.full_available))
2667 bitmap_set_bit (m_antloc[bb_index],
2668 get_expr_index (m_exprs, header_info));
9243c3d1 2669
29331e72
LD
2670 if (footer_info.valid_p ())
2671 for (int i = 0; i < num_exprs; i += 1)
2672 {
2673 const vsetvl_info &info = *m_exprs[i];
2674 if (!info.valid_p ())
2675 continue;
2676 if (available_exp_p (footer_info, info))
2677 bitmap_set_bit (m_avloc[bb_index], i);
2678 }
2679 }
9243c3d1 2680
29331e72 2681 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2682 {
29331e72
LD
2683 unsigned bb_index = bb->index ();
2684 bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]);
2685 bitmap_not (m_kill[bb_index], m_kill[bb_index]);
9243c3d1
JZZ
2686 }
2687
29331e72 2688 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2689 {
29331e72 2690 unsigned bb_index = bb->index ();
9243c3d1
JZZ
2691 edge e;
2692 edge_iterator ei;
29331e72 2693 FOR_EACH_EDGE (e, ei, bb->cfg_bb ()->preds)
9243c3d1
JZZ
2694 if (e->flags & EDGE_COMPLEX)
2695 {
29331e72
LD
2696 bitmap_clear (m_antloc[bb_index]);
2697 bitmap_clear (m_transp[bb_index]);
9243c3d1
JZZ
2698 }
2699 }
2700}
2701
29331e72
LD
2702void
2703pre_vsetvl::fuse_local_vsetvl_info ()
e030af3e 2704{
29331e72
LD
2705 m_reg_def_loc
2706 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), GP_REG_LAST + 1);
2707 bitmap_vector_clear (m_reg_def_loc, last_basic_block_for_fn (cfun));
2708 bitmap_ones (m_reg_def_loc[ENTRY_BLOCK_PTR_FOR_FN (cfun)->index]);
2709
2710 for (bb_info *bb : crtl->ssa->bbs ())
e030af3e 2711 {
29331e72 2712 auto &block_info = get_block_info (bb);
4fd09aed 2713 block_info.bb = bb;
29331e72 2714 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e 2715 {
29331e72
LD
2716 fprintf (dump_file, " Try fuse basic block %d\n", bb->index ());
2717 }
2718 auto_vec<vsetvl_info> infos;
2719 for (insn_info *insn : bb->real_nondebug_insns ())
2720 {
2721 vsetvl_info curr_info = vsetvl_info (insn);
2722 if (curr_info.valid_p () || curr_info.unknown_p ())
2723 infos.safe_push (curr_info);
2724
2725 /* Collecting GP registers modified by the current bb. */
2726 if (insn->is_real ())
2727 for (def_info *def : insn->defs ())
2728 if (def->is_reg () && GP_REG_P (def->regno ()))
2729 bitmap_set_bit (m_reg_def_loc[bb->index ()], def->regno ());
2730 }
e030af3e 2731
29331e72
LD
2732 vsetvl_info prev_info = vsetvl_info ();
2733 prev_info.set_empty ();
2734 for (auto &curr_info : infos)
2735 {
2736 if (prev_info.empty_p ())
2737 prev_info = curr_info;
2738 else if ((curr_info.unknown_p () && prev_info.valid_p ())
2739 || (curr_info.valid_p () && prev_info.unknown_p ()))
2740 {
4fd09aed 2741 block_info.local_infos.safe_push (prev_info);
29331e72
LD
2742 prev_info = curr_info;
2743 }
2744 else if (curr_info.valid_p () && prev_info.valid_p ())
2745 {
2746 if (m_dem.available_p (prev_info, curr_info))
e7b585a4 2747 {
29331e72 2748 if (dump_file && (dump_flags & TDF_DETAILS))
e7b585a4 2749 {
29331e72
LD
2750 fprintf (dump_file,
2751 " Ignore curr info since prev info "
2752 "available with it:\n");
2753 fprintf (dump_file, " prev_info: ");
2754 prev_info.dump (dump_file, " ");
2755 fprintf (dump_file, " curr_info: ");
2756 curr_info.dump (dump_file, " ");
2757 fprintf (dump_file, "\n");
e7b585a4 2758 }
4cd4c34a 2759 if (!curr_info.vl_used_by_non_rvv_insn_p ()
29331e72
LD
2760 && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
2761 m_delete_list.safe_push (curr_info);
e030af3e 2762
29331e72
LD
2763 if (curr_info.get_read_vl_insn ())
2764 prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
e030af3e 2765 }
29331e72 2766 else if (m_dem.compatible_p (prev_info, curr_info))
e030af3e 2767 {
29331e72 2768 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e 2769 {
29331e72
LD
2770 fprintf (dump_file, " Fuse curr info since prev info "
2771 "compatible with it:\n");
2772 fprintf (dump_file, " prev_info: ");
2773 prev_info.dump (dump_file, " ");
2774 fprintf (dump_file, " curr_info: ");
2775 curr_info.dump (dump_file, " ");
e030af3e 2776 }
29331e72
LD
2777 m_dem.merge (prev_info, curr_info);
2778 if (curr_info.get_read_vl_insn ())
2779 prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
2780 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e 2781 {
29331e72
LD
2782 fprintf (dump_file, " prev_info after fused: ");
2783 prev_info.dump (dump_file, " ");
2784 fprintf (dump_file, "\n");
e030af3e 2785 }
e030af3e
JZ
2786 }
2787 else
2788 {
29331e72
LD
2789 if (dump_file && (dump_flags & TDF_DETAILS))
2790 {
2791 fprintf (dump_file,
2792 " Cannot fuse uncompatible infos:\n");
2793 fprintf (dump_file, " prev_info: ");
2794 prev_info.dump (dump_file, " ");
2795 fprintf (dump_file, " curr_info: ");
2796 curr_info.dump (dump_file, " ");
2797 }
4fd09aed 2798 block_info.local_infos.safe_push (prev_info);
29331e72 2799 prev_info = curr_info;
e030af3e
JZ
2800 }
2801 }
2802 }
29331e72
LD
2803
2804 if (prev_info.valid_p () || prev_info.unknown_p ())
4fd09aed 2805 block_info.local_infos.safe_push (prev_info);
e030af3e 2806 }
e030af3e 2807
29331e72
LD
2808 m_avl_regs = sbitmap_alloc (GP_REG_LAST + 1);
2809 bitmap_clear (m_avl_regs);
2810 for (const bb_info *bb : crtl->ssa->bbs ())
e030af3e 2811 {
29331e72
LD
2812 vsetvl_block_info &block_info = get_block_info (bb);
2813 if (block_info.empty_p ())
2814 continue;
2815
2816 vsetvl_info &header_info = block_info.get_entry_info ();
2817 if (header_info.valid_p () && header_info.has_nonvlmax_reg_avl ())
e030af3e 2818 {
29331e72
LD
2819 gcc_assert (GP_REG_P (REGNO (header_info.get_avl ())));
2820 bitmap_set_bit (m_avl_regs, REGNO (header_info.get_avl ()));
e030af3e 2821 }
e030af3e
JZ
2822 }
2823}
2824
29331e72 2825
9243c3d1 2826bool
29331e72 2827pre_vsetvl::earliest_fuse_vsetvl_info ()
9243c3d1 2828{
29331e72
LD
2829 compute_avl_def_data ();
2830 compute_vsetvl_def_data ();
2831 compute_lcm_local_properties ();
9243c3d1 2832
29331e72
LD
2833 unsigned num_exprs = m_exprs.length ();
2834 struct edge_list *m_edges = create_edge_list ();
2835 unsigned num_edges = NUM_EDGES (m_edges);
2836 sbitmap *antin
2837 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2838 sbitmap *antout
2839 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
005fad9d 2840
29331e72 2841 sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs);
9243c3d1 2842
29331e72
LD
2843 compute_available (m_avloc, m_kill, m_avout, m_avin);
2844 compute_antinout_edge (m_antloc, m_transp, antin, antout);
2845 compute_earliest (m_edges, num_exprs, antin, antout, m_avout, m_kill,
2846 earliest);
2847
2848 if (dump_file && (dump_flags & TDF_DETAILS))
9243c3d1 2849 {
29331e72
LD
2850 fprintf (dump_file, "\n Compute LCM earliest insert data:\n\n");
2851 fprintf (dump_file, " Expression List (%u):\n", num_exprs);
2852 for (unsigned i = 0; i < num_exprs; i++)
9243c3d1 2853 {
29331e72
LD
2854 const auto &info = *m_exprs[i];
2855 fprintf (dump_file, " Expr[%u]: ", i);
2856 info.dump (dump_file, " ");
9243c3d1 2857 }
29331e72
LD
2858 fprintf (dump_file, "\n bitmap data:\n");
2859 for (const bb_info *bb : crtl->ssa->bbs ())
2860 {
2861 unsigned int i = bb->index ();
2862 fprintf (dump_file, " BB %u:\n", i);
2863 fprintf (dump_file, " avloc: ");
2864 dump_bitmap_file (dump_file, m_avloc[i]);
2865 fprintf (dump_file, " kill: ");
2866 dump_bitmap_file (dump_file, m_kill[i]);
2867 fprintf (dump_file, " antloc: ");
2868 dump_bitmap_file (dump_file, m_antloc[i]);
2869 fprintf (dump_file, " transp: ");
2870 dump_bitmap_file (dump_file, m_transp[i]);
2871
2872 fprintf (dump_file, " avin: ");
2873 dump_bitmap_file (dump_file, m_avin[i]);
2874 fprintf (dump_file, " avout: ");
2875 dump_bitmap_file (dump_file, m_avout[i]);
2876 fprintf (dump_file, " antin: ");
2877 dump_bitmap_file (dump_file, antin[i]);
2878 fprintf (dump_file, " antout: ");
2879 dump_bitmap_file (dump_file, antout[i]);
2880 }
2881 fprintf (dump_file, "\n");
2882 fprintf (dump_file, " earliest:\n");
2883 for (unsigned ed = 0; ed < num_edges; ed++)
2884 {
2885 edge eg = INDEX_EDGE (m_edges, ed);
9243c3d1 2886
29331e72
LD
2887 if (bitmap_empty_p (earliest[ed]))
2888 continue;
2889 fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
2890 eg->dest->index);
2891 dump_bitmap_file (dump_file, earliest[ed]);
2892 }
2893 fprintf (dump_file, "\n");
2894 }
9243c3d1 2895
29331e72 2896 if (dump_file && (dump_flags & TDF_DETAILS))
9243c3d1 2897 {
29331e72
LD
2898 fprintf (dump_file, " Fused global info result:\n");
2899 }
9243c3d1 2900
29331e72
LD
2901 bool changed = false;
2902 for (unsigned ed = 0; ed < num_edges; ed++)
2903 {
2904 sbitmap e = earliest[ed];
2905 if (bitmap_empty_p (e))
9243c3d1
JZZ
2906 continue;
2907
29331e72
LD
2908 unsigned int expr_index;
2909 sbitmap_iterator sbi;
2910 EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi)
ec99ffab 2911 {
29331e72
LD
2912 vsetvl_info &curr_info = *m_exprs[expr_index];
2913 if (!curr_info.valid_p ())
2914 continue;
2915
2916 edge eg = INDEX_EDGE (m_edges, ed);
2917 if (eg->probability == profile_probability::never ())
2918 continue;
2919 if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
2920 || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
2921 continue;
ff8f9544 2922
29331e72
LD
2923 vsetvl_block_info &src_block_info = get_block_info (eg->src);
2924 vsetvl_block_info &dest_block_info = get_block_info (eg->dest);
ff8f9544 2925
29331e72
LD
2926 if (src_block_info.probability
2927 == profile_probability::uninitialized ())
ff8f9544 2928 continue;
9243c3d1 2929
29331e72 2930 if (src_block_info.empty_p ())
9243c3d1 2931 {
29331e72
LD
2932 vsetvl_info new_curr_info = curr_info;
2933 new_curr_info.set_bb (crtl->ssa->bb (eg->dest));
2934 bool has_compatible_p = false;
2935 unsigned int def_expr_index;
2936 sbitmap_iterator sbi2;
2937 EXECUTE_IF_SET_IN_BITMAP (
2938 m_vsetvl_def_in[new_curr_info.get_bb ()->index ()], 0,
2939 def_expr_index, sbi2)
9243c3d1 2940 {
29331e72
LD
2941 vsetvl_info &prev_info = *m_vsetvl_def_exprs[def_expr_index];
2942 if (!prev_info.valid_p ())
2943 continue;
2944 if (m_dem.compatible_p (prev_info, new_curr_info))
9243c3d1 2945 {
29331e72
LD
2946 has_compatible_p = true;
2947 break;
9243c3d1 2948 }
9243c3d1 2949 }
29331e72 2950 if (!has_compatible_p)
9243c3d1 2951 {
29331e72
LD
2952 if (dump_file && (dump_flags & TDF_DETAILS))
2953 {
2954 fprintf (dump_file,
2955 " Forbidden lift up vsetvl info into bb %u "
2956 "since there is no vsetvl info that reaching in "
2957 "is compatible with it:",
2958 eg->src->index);
2959 curr_info.dump (dump_file, " ");
2960 }
2961 continue;
9243c3d1
JZZ
2962 }
2963
29331e72 2964 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e
JZ
2965 {
2966 fprintf (dump_file,
29331e72
LD
2967 " Set empty bb %u to info:", eg->src->index);
2968 curr_info.dump (dump_file, " ");
e030af3e 2969 }
29331e72
LD
2970 src_block_info.set_info (curr_info);
2971 src_block_info.probability = dest_block_info.probability;
2972 changed = true;
9243c3d1 2973 }
29331e72
LD
2974 else if (src_block_info.has_info ())
2975 {
2976 vsetvl_info &prev_info = src_block_info.get_exit_info ();
2977 gcc_assert (prev_info.valid_p ());
2978
2979 if (m_dem.compatible_p (prev_info, curr_info))
2980 {
2981 if (dump_file && (dump_flags & TDF_DETAILS))
2982 {
2983 fprintf (dump_file, " Fuse curr info since prev info "
2984 "compatible with it:\n");
2985 fprintf (dump_file, " prev_info: ");
2986 prev_info.dump (dump_file, " ");
2987 fprintf (dump_file, " curr_info: ");
2988 curr_info.dump (dump_file, " ");
2989 }
2990 m_dem.merge (prev_info, curr_info);
2991 if (dump_file && (dump_flags & TDF_DETAILS))
2992 {
2993 fprintf (dump_file, " prev_info after fused: ");
2994 prev_info.dump (dump_file, " ");
2995 fprintf (dump_file, "\n");
2996 }
2997 changed = true;
2998 if (src_block_info.has_info ())
2999 src_block_info.probability += dest_block_info.probability;
3000 }
3001 else if (src_block_info.has_info ()
3002 && !m_dem.compatible_p (prev_info, curr_info))
3003 {
3004 /* Cancel lift up if probabilities are equal. */
3005 if (successors_probability_equal_p (eg->src))
3006 {
3007 if (dump_file && (dump_flags & TDF_DETAILS))
3008 {
3009 fprintf (dump_file,
3010 " Change empty bb %u to from:",
3011 eg->src->index);
3012 prev_info.dump (dump_file, " ");
3013 fprintf (dump_file,
3014 " to (higher probability):");
3015 curr_info.dump (dump_file, " ");
3016 }
3017 src_block_info.set_empty_info ();
3018 src_block_info.probability
3019 = profile_probability::uninitialized ();
3020 changed = true;
3021 }
3022 /* Choose the one with higher probability. */
3023 else if (dest_block_info.probability
3024 > src_block_info.probability)
3025 {
3026 if (dump_file && (dump_flags & TDF_DETAILS))
3027 {
3028 fprintf (dump_file,
3029 " Change empty bb %u to from:",
3030 eg->src->index);
3031 prev_info.dump (dump_file, " ");
3032 fprintf (dump_file,
3033 " to (higher probability):");
3034 curr_info.dump (dump_file, " ");
3035 }
3036 src_block_info.set_info (curr_info);
3037 src_block_info.probability = dest_block_info.probability;
3038 changed = true;
3039 }
3040 }
3041 }
3042 else
e030af3e 3043 {
29331e72
LD
3044 vsetvl_info &prev_info = src_block_info.get_exit_info ();
3045 if (!prev_info.valid_p ()
3046 || m_dem.available_p (prev_info, curr_info))
3047 continue;
3048
3049 if (m_dem.compatible_p (prev_info, curr_info))
3050 {
3051 if (dump_file && (dump_flags & TDF_DETAILS))
3052 {
3053 fprintf (dump_file, " Fuse curr info since prev info "
3054 "compatible with it:\n");
3055 fprintf (dump_file, " prev_info: ");
3056 prev_info.dump (dump_file, " ");
3057 fprintf (dump_file, " curr_info: ");
3058 curr_info.dump (dump_file, " ");
3059 }
3060 m_dem.merge (prev_info, curr_info);
3061 if (dump_file && (dump_flags & TDF_DETAILS))
3062 {
3063 fprintf (dump_file, " prev_info after fused: ");
3064 prev_info.dump (dump_file, " ");
3065 fprintf (dump_file, "\n");
3066 }
3067 changed = true;
3068 }
e030af3e 3069 }
9243c3d1
JZZ
3070 }
3071 }
3072
0d50facd 3073 if (dump_file && (dump_flags & TDF_DETAILS))
c919d059 3074 {
29331e72 3075 fprintf (dump_file, "\n");
c919d059 3076 }
c919d059 3077
29331e72
LD
3078 sbitmap_vector_free (antin);
3079 sbitmap_vector_free (antout);
3080 sbitmap_vector_free (earliest);
3081 free_edge_list (m_edges);
c919d059 3082
29331e72 3083 return changed;
c919d059
KC
3084}
3085
8421f279 3086void
29331e72 3087pre_vsetvl::pre_global_vsetvl_info ()
c919d059 3088{
29331e72
LD
3089 compute_avl_def_data ();
3090 compute_vsetvl_def_data ();
3091 compute_lcm_local_properties ();
c919d059 3092
29331e72
LD
3093 unsigned num_exprs = m_exprs.length ();
3094 m_edges = pre_edge_lcm_avs (num_exprs, m_transp, m_avloc, m_antloc, m_kill,
3095 m_avin, m_avout, &m_insert, &m_del);
3096 unsigned num_edges = NUM_EDGES (m_edges);
c919d059 3097
29331e72
LD
3098 if (dump_file && (dump_flags & TDF_DETAILS))
3099 {
3100 fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n");
3101 fprintf (dump_file, " Expression List (%u):\n", num_exprs);
3102 for (unsigned i = 0; i < num_exprs; i++)
c919d059 3103 {
29331e72
LD
3104 const auto &info = *m_exprs[i];
3105 fprintf (dump_file, " Expr[%u]: ", i);
3106 info.dump (dump_file, " ");
c919d059 3107 }
29331e72
LD
3108 fprintf (dump_file, "\n bitmap data:\n");
3109 for (const bb_info *bb : crtl->ssa->bbs ())
c919d059 3110 {
29331e72
LD
3111 unsigned i = bb->index ();
3112 fprintf (dump_file, " BB %u:\n", i);
3113 fprintf (dump_file, " avloc: ");
3114 dump_bitmap_file (dump_file, m_avloc[i]);
3115 fprintf (dump_file, " kill: ");
3116 dump_bitmap_file (dump_file, m_kill[i]);
3117 fprintf (dump_file, " antloc: ");
3118 dump_bitmap_file (dump_file, m_antloc[i]);
3119 fprintf (dump_file, " transp: ");
3120 dump_bitmap_file (dump_file, m_transp[i]);
3121
3122 fprintf (dump_file, " avin: ");
3123 dump_bitmap_file (dump_file, m_avin[i]);
3124 fprintf (dump_file, " avout: ");
3125 dump_bitmap_file (dump_file, m_avout[i]);
3126 fprintf (dump_file, " del: ");
3127 dump_bitmap_file (dump_file, m_del[i]);
c919d059 3128 }
29331e72
LD
3129 fprintf (dump_file, "\n");
3130 fprintf (dump_file, " insert:\n");
3131 for (unsigned ed = 0; ed < num_edges; ed++)
8421f279 3132 {
29331e72 3133 edge eg = INDEX_EDGE (m_edges, ed);
c919d059 3134
29331e72
LD
3135 if (bitmap_empty_p (m_insert[ed]))
3136 continue;
3137 fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
3138 eg->dest->index);
3139 dump_bitmap_file (dump_file, m_insert[ed]);
c919d059 3140 }
29331e72
LD
3141 }
3142
3143 /* Remove vsetvl infos as LCM suggest */
3144 for (const bb_info *bb : crtl->ssa->bbs ())
3145 {
3146 sbitmap d = m_del[bb->index ()];
3147 if (bitmap_count_bits (d) == 0)
c919d059 3148 continue;
29331e72
LD
3149 gcc_assert (bitmap_count_bits (d) == 1);
3150 unsigned expr_index = bitmap_first_set_bit (d);
3151 vsetvl_info &info = *m_exprs[expr_index];
3152 gcc_assert (info.valid_p ());
3153 gcc_assert (info.get_bb () == bb);
3154 const vsetvl_block_info &block_info = get_block_info (info.get_bb ());
3155 gcc_assert (block_info.get_entry_info () == info);
3156 info.set_delete ();
3157 }
c919d059 3158
29331e72
LD
3159 for (const bb_info *bb : crtl->ssa->bbs ())
3160 {
3161 vsetvl_block_info &block_info = get_block_info (bb);
3162 if (block_info.empty_p ())
3163 continue;
3164 vsetvl_info &curr_info = block_info.get_entry_info ();
3165 if (curr_info.delete_p ())
c919d059 3166 {
4fd09aed 3167 if (block_info.local_infos.is_empty ())
29331e72 3168 continue;
4fd09aed 3169 curr_info = block_info.local_infos[0];
c919d059 3170 }
4cd4c34a 3171 if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
923a67f1 3172 && preds_all_same_avl_and_ratio_p (curr_info))
29331e72 3173 curr_info.set_change_vtype_only ();
c919d059 3174
29331e72
LD
3175 vsetvl_info prev_info = vsetvl_info ();
3176 prev_info.set_empty ();
4fd09aed 3177 for (auto &curr_info : block_info.local_infos)
c919d059 3178 {
29331e72 3179 if (prev_info.valid_p () && curr_info.valid_p ()
923a67f1
JZ
3180 && m_dem.avl_available_p (prev_info, curr_info)
3181 && prev_info.get_ratio () == curr_info.get_ratio ())
29331e72
LD
3182 curr_info.set_change_vtype_only ();
3183 prev_info = curr_info;
c919d059 3184 }
20c85207 3185 }
20c85207
JZ
3186}
3187
29331e72
LD
3188void
3189pre_vsetvl::emit_vsetvl ()
20c85207 3190{
29331e72 3191 bool need_commit = false;
20c85207 3192
29331e72 3193 for (const bb_info *bb : crtl->ssa->bbs ())
20c85207 3194 {
4fd09aed 3195 for (const auto &curr_info : get_block_info (bb).local_infos)
29331e72
LD
3196 {
3197 insn_info *insn = curr_info.get_insn ();
3198 if (curr_info.delete_p ())
3199 {
3200 if (vsetvl_insn_p (insn->rtl ()))
3201 remove_vsetvl_insn (curr_info);
3202 continue;
3203 }
3204 else if (curr_info.valid_p ())
3205 {
3206 if (vsetvl_insn_p (insn->rtl ()))
3207 {
3208 const vsetvl_info temp = vsetvl_info (insn);
3209 if (!(curr_info == temp))
3210 {
3211 if (dump_file)
3212 {
3213 fprintf (dump_file, "\n Change vsetvl info from: ");
3214 temp.dump (dump_file, " ");
3215 fprintf (dump_file, " to: ");
3216 curr_info.dump (dump_file, " ");
3217 }
3218 change_vsetvl_insn (curr_info);
3219 }
3220 }
3221 else
3222 {
3223 if (dump_file)
3224 {
3225 fprintf (dump_file,
3226 "\n Insert vsetvl info before insn %d: ",
3227 insn->uid ());
3228 curr_info.dump (dump_file, " ");
3229 }
3230 insert_vsetvl_insn (EMIT_BEFORE, curr_info);
3231 }
3232 }
3233 }
20c85207 3234 }
20c85207 3235
29331e72 3236 for (const vsetvl_info &item : m_delete_list)
20c85207 3237 {
29331e72
LD
3238 gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ()));
3239 remove_vsetvl_insn (item);
20c85207
JZ
3240 }
3241
d1189cee
JZ
3242 /* Insert vsetvl info that was not deleted after lift up. */
3243 for (const bb_info *bb : crtl->ssa->bbs ())
3244 {
3245 const vsetvl_block_info &block_info = get_block_info (bb);
3246 if (!block_info.has_info ())
3247 continue;
3248
3249 const vsetvl_info &footer_info = block_info.get_exit_info ();
3250
3251 if (footer_info.delete_p ())
3252 continue;
3253
3254 edge eg;
3255 edge_iterator eg_iterator;
3256 FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs)
3257 {
3258 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3259 if (dump_file)
3260 {
3261 fprintf (
3262 dump_file,
3263 "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ",
3264 eg->src->index, eg->dest->index);
3265 footer_info.dump (dump_file, " ");
3266 }
3267 start_sequence ();
3268 insert_vsetvl_insn (EMIT_DIRECT, footer_info);
3269 rtx_insn *rinsn = get_insns ();
3270 end_sequence ();
3271 default_rtl_profile ();
3272 insert_insn_on_edge (rinsn, eg);
3273 need_commit = true;
3274 }
3275 }
3276
29331e72
LD
3277 /* m_insert vsetvl as LCM suggest. */
3278 for (int ed = 0; ed < NUM_EDGES (m_edges); ed++)
20c85207 3279 {
29331e72
LD
3280 edge eg = INDEX_EDGE (m_edges, ed);
3281 sbitmap i = m_insert[ed];
3282 if (bitmap_count_bits (i) < 1)
3283 continue;
3284
3285 if (bitmap_count_bits (i) > 1)
3286 /* For code with infinite loop (e.g. pr61634.c), The data flow is
3287 completely wrong. */
3288 continue;
3289
3290 gcc_assert (bitmap_count_bits (i) == 1);
3291 unsigned expr_index = bitmap_first_set_bit (i);
3292 const vsetvl_info &info = *m_exprs[expr_index];
3293 gcc_assert (info.valid_p ());
3294 if (dump_file)
20c85207 3295 {
29331e72
LD
3296 fprintf (dump_file,
3297 "\n Insert vsetvl info at edge(bb %u -> bb %u): ",
3298 eg->src->index, eg->dest->index);
3299 info.dump (dump_file, " ");
20c85207 3300 }
29331e72
LD
3301 rtl_profile_for_edge (eg);
3302 start_sequence ();
3303
3304 insert_vsetvl_insn (EMIT_DIRECT, info);
3305 rtx_insn *rinsn = get_insns ();
3306 end_sequence ();
3307 default_rtl_profile ();
3308
3309 /* We should not get an abnormal edge here. */
3310 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3311 need_commit = true;
3312 insert_insn_on_edge (rinsn, eg);
20c85207
JZ
3313 }
3314
29331e72
LD
3315 if (need_commit)
3316 commit_edge_insertions ();
20c85207
JZ
3317}
3318
9243c3d1 3319void
29331e72 3320pre_vsetvl::cleaup ()
9243c3d1 3321{
29331e72
LD
3322 remove_avl_operand ();
3323 remove_unused_dest_operand ();
3324}
9243c3d1 3325
29331e72
LD
3326void
3327pre_vsetvl::remove_avl_operand ()
3328{
3329 basic_block cfg_bb;
3330 rtx_insn *rinsn;
3331 FOR_ALL_BB_FN (cfg_bb, cfun)
3332 FOR_BB_INSNS (cfg_bb, rinsn)
3333 if (NONDEBUG_INSN_P (rinsn) && has_vl_op (rinsn)
3334 && REG_P (get_vl (rinsn)))
3335 {
9243c3d1 3336 rtx avl = get_vl (rinsn);
a2d12abe 3337 if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
9243c3d1 3338 {
29331e72 3339 rtx new_pat;
60bd33bc 3340 if (fault_first_load_p (rinsn))
29331e72
LD
3341 new_pat
3342 = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
60bd33bc
JZZ
3343 else
3344 {
3345 rtx set = single_set (rinsn);
3346 rtx src
3347 = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
29331e72
LD
3348 new_pat = gen_rtx_SET (SET_DEST (set), src);
3349 }
3350 if (dump_file)
3351 {
3352 fprintf (dump_file, " Cleanup insn %u's avl operand:\n",
3353 INSN_UID (rinsn));
3354 print_rtl_single (dump_file, rinsn);
60bd33bc 3355 }
29331e72 3356 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
9243c3d1
JZZ
3357 }
3358 }
20c85207
JZ
3359}
3360
6b6b9c68 3361void
29331e72 3362pre_vsetvl::remove_unused_dest_operand ()
20c85207 3363{
6b6b9c68 3364 df_analyze ();
20c85207
JZ
3365 basic_block cfg_bb;
3366 rtx_insn *rinsn;
3367 FOR_ALL_BB_FN (cfg_bb, cfun)
29331e72
LD
3368 FOR_BB_INSNS (cfg_bb, rinsn)
3369 if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn))
6b6b9c68 3370 {
29331e72
LD
3371 rtx vl = get_vl (rinsn);
3372 vsetvl_info info = vsetvl_info (rinsn);
3373 if (has_no_uses (cfg_bb, rinsn, REGNO (vl)))
3374 if (!info.has_vlmax_avl ())
3375 {
3376 rtx new_pat = info.get_vsetvl_pat (true);
3377 if (dump_file)
3378 {
3379 fprintf (dump_file,
3380 " Remove vsetvl insn %u's dest(vl) operand since "
3381 "it unused:\n",
3382 INSN_UID (rinsn));
3383 print_rtl_single (dump_file, rinsn);
3384 }
3385 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat,
3386 false);
3387 }
6b6b9c68 3388 }
6b6b9c68
JZZ
3389}
3390
29331e72
LD
3391const pass_data pass_data_vsetvl = {
3392 RTL_PASS, /* type */
3393 "vsetvl", /* name */
3394 OPTGROUP_NONE, /* optinfo_flags */
3395 TV_NONE, /* tv_id */
3396 0, /* properties_required */
3397 0, /* properties_provided */
3398 0, /* properties_destroyed */
3399 0, /* todo_flags_start */
3400 0, /* todo_flags_finish */
3401};
9243c3d1 3402
29331e72
LD
3403class pass_vsetvl : public rtl_opt_pass
3404{
3405private:
3406 void simple_vsetvl ();
3407 void lazy_vsetvl ();
9243c3d1 3408
29331e72
LD
3409public:
3410 pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
9243c3d1 3411
29331e72
LD
3412 /* opt_pass methods: */
3413 virtual bool gate (function *) final override { return TARGET_VECTOR; }
3414 virtual unsigned int execute (function *) final override;
3415}; // class pass_vsetvl
9243c3d1 3416
acc10c79 3417void
29331e72 3418pass_vsetvl::simple_vsetvl ()
acc10c79 3419{
29331e72
LD
3420 if (dump_file)
3421 fprintf (dump_file, "\nEntering Simple VSETVL PASS\n");
acc10c79 3422
29331e72
LD
3423 basic_block cfg_bb;
3424 rtx_insn *rinsn;
3425 FOR_ALL_BB_FN (cfg_bb, cfun)
acc10c79 3426 {
29331e72 3427 FOR_BB_INSNS (cfg_bb, rinsn)
acc10c79 3428 {
29331e72 3429 if (!NONDEBUG_INSN_P (rinsn))
acc10c79 3430 continue;
29331e72
LD
3431 if (has_vtype_op (rinsn))
3432 {
3433 const auto &info = vsetvl_info (rinsn);
3434 rtx pat = info.get_vsetvl_pat ();
3435 emit_insn_before (pat, rinsn);
3436 if (dump_file)
3437 {
3438 fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
3439 INSN_UID (rinsn));
3440 print_rtl_single (dump_file, PREV_INSN (rinsn));
3441 }
3442 }
acc10c79
JZZ
3443 }
3444 }
acc10c79
JZZ
3445}
3446
9243c3d1
JZZ
3447/* Lazy vsetvl insertion for optimize > 0. */
3448void
29331e72 3449pass_vsetvl::lazy_vsetvl ()
9243c3d1
JZZ
3450{
3451 if (dump_file)
29331e72
LD
3452 fprintf (dump_file, "\nEntering Lazy VSETVL PASS\n\n");
3453
3454 pre_vsetvl pre = pre_vsetvl ();
9243c3d1 3455
9243c3d1 3456 if (dump_file)
29331e72
LD
3457 fprintf (dump_file, "\nPhase 1: Fuse local vsetvl infos.\n\n");
3458 pre.fuse_local_vsetvl_info ();
0d50facd 3459 if (dump_file && (dump_flags & TDF_DETAILS))
29331e72 3460 pre.dump (dump_file, "phase 1");
9243c3d1 3461
29331e72 3462 /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */
9243c3d1 3463 if (dump_file)
29331e72
LD
3464 fprintf (dump_file, "\nPhase 2: Lift up vsetvl info.\n\n");
3465 bool changed;
3466 int fused_count = 0;
3467 do
3468 {
3469 if (dump_file)
3470 fprintf (dump_file, " Try lift up %d.\n\n", fused_count);
3471 changed = pre.earliest_fuse_vsetvl_info ();
3472 fused_count += 1;
3473 } while (changed);
3474
0d50facd 3475 if (dump_file && (dump_flags & TDF_DETAILS))
29331e72 3476 pre.dump (dump_file, "phase 2");
9243c3d1 3477
29331e72 3478 /* Phase 3: Reducing redundant vsetvl infos using LCM. */
9243c3d1 3479 if (dump_file)
29331e72
LD
3480 fprintf (dump_file, "\nPhase 3: Reduce global vsetvl infos.\n\n");
3481 pre.pre_global_vsetvl_info ();
3482 if (dump_file && (dump_flags & TDF_DETAILS))
3483 pre.dump (dump_file, "phase 3");
9243c3d1 3484
29331e72 3485 /* Phase 4: Insert, modify and remove vsetvl insns. */
9243c3d1 3486 if (dump_file)
29331e72
LD
3487 fprintf (dump_file,
3488 "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n");
3489 pre.emit_vsetvl ();
9243c3d1 3490
29331e72 3491 /* Phase 5: Cleaup */
9243c3d1 3492 if (dump_file)
29331e72
LD
3493 fprintf (dump_file, "\nPhase 5: Cleaup\n\n");
3494 pre.cleaup ();
6b6b9c68 3495
29331e72 3496 pre.finish ();
9243c3d1
JZZ
3497}
3498
3499/* Main entry point for this pass. */
3500unsigned int
3501pass_vsetvl::execute (function *)
3502{
3503 if (n_basic_blocks_for_fn (cfun) <= 0)
3504 return 0;
3505
ca8fb009
JZZ
3506 /* The RVV instruction may change after split which is not a stable
3507 instruction. We need to split it here to avoid potential issue
3508 since the VSETVL PASS is insert before split PASS. */
3509 split_all_insns ();
9243c3d1
JZZ
3510
3511 /* Early return for there is no vector instructions. */
3512 if (!has_vector_insn (cfun))
3513 return 0;
3514
9243c3d1
JZZ
3515 if (!optimize)
3516 simple_vsetvl ();
3517 else
3518 lazy_vsetvl ();
3519
9243c3d1
JZZ
3520 return 0;
3521}
3522
3523rtl_opt_pass *
3524make_pass_vsetvl (gcc::context *ctxt)
3525{
3526 return new pass_vsetvl (ctxt);
3527}