]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/riscv/riscv-vsetvl.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / config / riscv / riscv-vsetvl.cc
CommitLineData
9243c3d1 1/* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
a945c346 2 Copyright (C) 2022-2024 Free Software Foundation, Inc.
9243c3d1
JZZ
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or(at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
29331e72
LD
21/* The values of the vl and vtype registers will affect the behavior of RVV
22 insns. That is, when we need to execute an RVV instruction, we need to set
23 the correct vl and vtype values by executing the vsetvl instruction before.
24 Executing the fewest number of vsetvl instructions while keeping the behavior
25 the same is the problem this pass is trying to solve. This vsetvl pass is
26 divided into 5 phases:
27
28 - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses
29 each instruction in it that affects vl and vtype state and generates an
30 array of vsetvl_info objects. Then traverse the vsetvl_info array from
31 front to back and perform fusion according to the fusion rules. The fused
32 vsetvl infos are stored in the vsetvl_block_info object's `infos` field.
33
34 - Phase 2 (earliest fuse global vsetvl infos): The header_info and
35 footer_info of vsetvl_block_info are used as expressions, and the
36 earliest of each expression is computed. Based on the earliest
37 information, try to lift up the corresponding vsetvl info to the src
38 basic block of the edge (mainly to reduce the total number of vsetvl
39 instructions, this uplift will cause some execution paths to execute
40 vsetvl instructions that shouldn't be there).
41
42 - Phase 3 (pre global vsetvl info): The header_info and footer_info of
43 vsetvl_block_info are used as expressions, and the LCM algorithm is used
44 to compute the header_info that needs to be deleted and the one that
45 needs to be inserted in some edges.
46
47 - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and
48 the deletion and insertion information of Phase 3, the mandatory vsetvl
49 instruction insertion, modification and deletion are performed.
50
51 - Phase 5 (cleanup): Clean up the avl operand in the RVV operator
52 instruction and cleanup the unused dest operand of the vsetvl insn.
53
54 After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual
55 basic block is represented by vsetvl_block_info, and the virtual vsetvl
56 statements inside are represented by vsetvl_info. The later phases 2 and 3
57 are constantly modifying and adjusting this virtual CFG. Phase 4 performs
58 insertion, modification and deletion of vsetvl instructions based on the
59 optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to
60 the RTL.
61*/
9243c3d1
JZZ
62
63#define IN_TARGET_CODE 1
64#define INCLUDE_ALGORITHM
65#define INCLUDE_FUNCTIONAL
66
67#include "config.h"
68#include "system.h"
69#include "coretypes.h"
70#include "tm.h"
71#include "backend.h"
72#include "rtl.h"
73#include "target.h"
74#include "tree-pass.h"
75#include "df.h"
76#include "rtl-ssa.h"
77#include "cfgcleanup.h"
78#include "insn-config.h"
79#include "insn-attr.h"
80#include "insn-opinit.h"
81#include "tm-constrs.h"
82#include "cfgrtl.h"
83#include "cfganal.h"
84#include "lcm.h"
85#include "predict.h"
86#include "profile-count.h"
a3ad2301 87#include "gcse.h"
9243c3d1
JZZ
88
89using namespace rtl_ssa;
90using namespace riscv_vector;
91
29331e72
LD
92/* Set the bitmap DST to the union of SRC of predecessors of
93 basic block B.
94 It's a bit different from bitmap_union_of_preds in cfganal.cc. This function
95 takes into account the case where pred is ENTRY basic block. The main reason
96 for this difference is to make it easier to insert some special value into
97 the ENTRY base block. For example, vsetvl_info with a status of UNKNOW. */
98static void
99bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b)
100{
101 unsigned int set_size = dst->size;
102 edge e;
103 unsigned ix;
104
105 for (ix = 0; ix < EDGE_COUNT (b->preds); ix++)
106 {
107 e = EDGE_PRED (b, ix);
108 bitmap_copy (dst, src[e->src->index]);
109 break;
110 }
ec99ffab 111
29331e72
LD
112 if (ix == EDGE_COUNT (b->preds))
113 bitmap_clear (dst);
114 else
115 for (ix++; ix < EDGE_COUNT (b->preds); ix++)
116 {
117 unsigned int i;
118 SBITMAP_ELT_TYPE *p, *r;
119
120 e = EDGE_PRED (b, ix);
121 p = src[e->src->index]->elms;
122 r = dst->elms;
123 for (i = 0; i < set_size; i++)
124 *r++ |= *p++;
125 }
126}
127
128/* Compute the reaching defintion in and out based on the gen and KILL
129 informations in each Base Blocks.
130 This function references the compute_avaiable implementation in lcm.cc */
131static void
132compute_reaching_defintion (sbitmap *gen, sbitmap *kill, sbitmap *in,
133 sbitmap *out)
9243c3d1 134{
29331e72
LD
135 edge e;
136 basic_block *worklist, *qin, *qout, *qend, bb;
137 unsigned int qlen;
138 edge_iterator ei;
139
140 /* Allocate a worklist array/queue. Entries are only added to the
141 list if they were not already on the list. So the size is
142 bounded by the number of basic blocks. */
143 qin = qout = worklist
144 = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
145
146 /* Put every block on the worklist; this is necessary because of the
147 optimistic initialization of AVOUT above. Use reverse postorder
148 to make the forward dataflow problem require less iterations. */
149 int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
150 int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
151 for (int i = 0; i < n; ++i)
152 {
153 bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]);
154 *qin++ = bb;
155 bb->aux = bb;
156 }
157 free (rpo);
158
159 qin = worklist;
160 qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS];
161 qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
162
163 /* Mark blocks which are successors of the entry block so that we
164 can easily identify them below. */
165 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
166 e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun);
167
168 /* Iterate until the worklist is empty. */
169 while (qlen)
170 {
171 /* Take the first entry off the worklist. */
172 bb = *qout++;
173 qlen--;
174
175 if (qout >= qend)
176 qout = worklist;
177
178 /* Do not clear the aux field for blocks which are successors of the
179 ENTRY block. That way we never add then to the worklist again. */
180 if (bb->aux != ENTRY_BLOCK_PTR_FOR_FN (cfun))
181 bb->aux = NULL;
182
183 bitmap_union_of_preds_with_entry (in[bb->index], out, bb);
184
185 if (bitmap_ior_and_compl (out[bb->index], gen[bb->index], in[bb->index],
186 kill[bb->index]))
187 /* If the out state of this block changed, then we need
188 to add the successors of this block to the worklist
189 if they are not already on the worklist. */
190 FOR_EACH_EDGE (e, ei, bb->succs)
191 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
192 {
193 *qin++ = e->dest;
194 e->dest->aux = e;
195 qlen++;
196
197 if (qin >= qend)
198 qin = worklist;
199 }
200 }
201
202 clear_aux_for_edges ();
203 clear_aux_for_blocks ();
204 free (worklist);
9243c3d1
JZZ
205}
206
29331e72
LD
207/* Classification of vsetvl instruction. */
208enum vsetvl_type
9243c3d1 209{
29331e72
LD
210 VSETVL_NORMAL,
211 VSETVL_VTYPE_CHANGE_ONLY,
212 VSETVL_DISCARD_RESULT,
213 NUM_VSETVL_TYPE
214};
9243c3d1 215
29331e72 216enum emit_type
9243c3d1 217{
29331e72
LD
218 /* emit_insn directly. */
219 EMIT_DIRECT,
220 EMIT_BEFORE,
221 EMIT_AFTER,
222};
223
224/* dump helper functions */
225static const char *
226vlmul_to_str (vlmul_type vlmul)
227{
228 switch (vlmul)
229 {
230 case LMUL_1:
231 return "m1";
232 case LMUL_2:
233 return "m2";
234 case LMUL_4:
235 return "m4";
236 case LMUL_8:
237 return "m8";
238 case LMUL_RESERVED:
239 return "INVALID LMUL";
240 case LMUL_F8:
241 return "mf8";
242 case LMUL_F4:
243 return "mf4";
244 case LMUL_F2:
245 return "mf2";
246
247 default:
248 gcc_unreachable ();
249 }
9243c3d1
JZZ
250}
251
29331e72
LD
252static const char *
253policy_to_str (bool agnostic_p)
9243c3d1 254{
29331e72 255 return agnostic_p ? "agnostic" : "undisturbed";
9243c3d1
JZZ
256}
257
9243c3d1
JZZ
258/* Return true if it is an RVV instruction depends on VTYPE global
259 status register. */
260static bool
261has_vtype_op (rtx_insn *rinsn)
262{
263 return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
264}
265
ec99ffab
JZZ
266/* Return true if the instruction ignores VLMUL field of VTYPE. */
267static bool
268ignore_vlmul_insn_p (rtx_insn *rinsn)
269{
270 return get_attr_type (rinsn) == TYPE_VIMOVVX
271 || get_attr_type (rinsn) == TYPE_VFMOVVF
272 || get_attr_type (rinsn) == TYPE_VIMOVXV
273 || get_attr_type (rinsn) == TYPE_VFMOVFV;
274}
275
276/* Return true if the instruction is scalar move instruction. */
277static bool
278scalar_move_insn_p (rtx_insn *rinsn)
279{
280 return get_attr_type (rinsn) == TYPE_VIMOVXV
281 || get_attr_type (rinsn) == TYPE_VFMOVFV;
282}
283
60bd33bc
JZZ
284/* Return true if the instruction is fault first load instruction. */
285static bool
286fault_first_load_p (rtx_insn *rinsn)
287{
6313b045
JZZ
288 return recog_memoized (rinsn) >= 0
289 && (get_attr_type (rinsn) == TYPE_VLDFF
290 || get_attr_type (rinsn) == TYPE_VLSEGDFF);
60bd33bc
JZZ
291}
292
293/* Return true if the instruction is read vl instruction. */
294static bool
295read_vl_insn_p (rtx_insn *rinsn)
296{
297 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL;
298}
299
9243c3d1
JZZ
300/* Return true if it is a vsetvl instruction. */
301static bool
302vector_config_insn_p (rtx_insn *rinsn)
303{
304 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
305}
306
307/* Return true if it is vsetvldi or vsetvlsi. */
308static bool
309vsetvl_insn_p (rtx_insn *rinsn)
310{
29331e72 311 if (!rinsn || !vector_config_insn_p (rinsn))
6b6b9c68 312 return false;
85112fbb 313 return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
6b6b9c68
JZZ
314 || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
315}
316
317/* Return true if it is vsetvl zero, rs1. */
318static bool
319vsetvl_discard_result_insn_p (rtx_insn *rinsn)
320{
321 if (!vector_config_insn_p (rinsn))
322 return false;
323 return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi
324 || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
9243c3d1
JZZ
325}
326
9243c3d1 327static bool
4f673c5e 328real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
9243c3d1 329{
4f673c5e 330 return insn != nullptr && insn->is_real () && insn->bb () == bb;
9243c3d1
JZZ
331}
332
29331e72 333/* Helper function to get VL operand for VLMAX insn. */
6b6b9c68
JZZ
334static rtx
335get_vl (rtx_insn *rinsn)
336{
337 if (has_vl_op (rinsn))
338 {
339 extract_insn_cached (rinsn);
340 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
341 }
342 return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
4f673c5e
JZZ
343}
344
6b6b9c68
JZZ
345/* Helper function to get AVL operand. */
346static rtx
347get_avl (rtx_insn *rinsn)
348{
349 if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
350 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
351
352 if (!has_vl_op (rinsn))
353 return NULL_RTX;
5e714992 354 if (vlmax_avl_type_p (rinsn))
6b6b9c68
JZZ
355 return RVV_VLMAX;
356 extract_insn_cached (rinsn);
357 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
358}
359
9243c3d1
JZZ
360/* Get default mask policy. */
361static bool
362get_default_ma ()
363{
364 /* For the instruction that doesn't require MA, we still need a default value
365 to emit vsetvl. We pick up the default value according to prefer policy. */
366 return (bool) (get_prefer_mask_policy () & 0x1
367 || (get_prefer_mask_policy () >> 1 & 0x1));
368}
369
9243c3d1
JZZ
370/* Helper function to get MA operand. */
371static bool
372mask_agnostic_p (rtx_insn *rinsn)
373{
374 /* If it doesn't have MA, we return agnostic by default. */
375 extract_insn_cached (rinsn);
376 int ma = get_attr_ma (rinsn);
377 return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
378}
379
380/* Return true if FN has a vector instruction that use VL/VTYPE. */
381static bool
382has_vector_insn (function *fn)
383{
384 basic_block cfg_bb;
385 rtx_insn *rinsn;
386 FOR_ALL_BB_FN (cfg_bb, fn)
387 FOR_BB_INSNS (cfg_bb, rinsn)
388 if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
389 return true;
390 return false;
391}
392
29331e72
LD
393static vlmul_type
394calculate_vlmul (unsigned int sew, unsigned int ratio)
9243c3d1 395{
29331e72
LD
396 const vlmul_type ALL_LMUL[]
397 = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
398 for (const vlmul_type vlmul : ALL_LMUL)
399 if (calculate_ratio (sew, vlmul) == ratio)
400 return vlmul;
401 return LMUL_RESERVED;
9243c3d1
JZZ
402}
403
29331e72
LD
404/* Get the currently supported maximum sew used in the int rvv instructions. */
405static uint8_t
406get_max_int_sew ()
9243c3d1 407{
29331e72
LD
408 if (TARGET_VECTOR_ELEN_64)
409 return 64;
410 else if (TARGET_VECTOR_ELEN_32)
411 return 32;
412 gcc_unreachable ();
9243c3d1
JZZ
413}
414
29331e72
LD
415/* Get the currently supported maximum sew used in the float rvv instructions.
416 */
417static uint8_t
418get_max_float_sew ()
419{
420 if (TARGET_VECTOR_ELEN_FP_64)
421 return 64;
422 else if (TARGET_VECTOR_ELEN_FP_32)
423 return 32;
424 else if (TARGET_VECTOR_ELEN_FP_16)
425 return 16;
426 gcc_unreachable ();
9243c3d1
JZZ
427}
428
29331e72 429enum def_type
9243c3d1 430{
29331e72
LD
431 REAL_SET = 1 << 0,
432 PHI_SET = 1 << 1,
433 BB_HEAD_SET = 1 << 2,
434 BB_END_SET = 1 << 3,
435 /* ??? TODO: In RTL_SSA framework, we have REAL_SET,
436 PHI_SET, BB_HEAD_SET, BB_END_SET and
437 CLOBBER_DEF def_info types. Currently,
438 we conservatively do not optimize clobber
439 def since we don't see the case that we
440 need to optimize it. */
441 CLOBBER_DEF = 1 << 4
442};
9243c3d1 443
29331e72
LD
444static bool
445insn_should_be_added_p (const insn_info *insn, unsigned int types)
da93c41c 446{
29331e72
LD
447 if (insn->is_real () && (types & REAL_SET))
448 return true;
449 if (insn->is_phi () && (types & PHI_SET))
450 return true;
451 if (insn->is_bb_head () && (types & BB_HEAD_SET))
452 return true;
453 if (insn->is_bb_end () && (types & BB_END_SET))
454 return true;
455 return false;
da93c41c
JZ
456}
457
29331e72
LD
458static const hash_set<use_info *>
459get_all_real_uses (insn_info *insn, unsigned regno)
9243c3d1 460{
29331e72 461 gcc_assert (insn->is_real ());
9243c3d1 462
29331e72
LD
463 hash_set<use_info *> uses;
464 auto_vec<phi_info *> work_list;
465 hash_set<phi_info *> visited_list;
9243c3d1 466
29331e72 467 for (def_info *def : insn->defs ())
9243c3d1 468 {
29331e72
LD
469 if (!def->is_reg () || def->regno () != regno)
470 continue;
471 set_info *set = safe_dyn_cast<set_info *> (def);
472 if (!set)
473 continue;
474 for (use_info *use : set->nondebug_insn_uses ())
475 if (use->insn ()->is_real ())
476 uses.add (use);
477 for (use_info *use : set->phi_uses ())
478 work_list.safe_push (use->phi ());
9243c3d1 479 }
9243c3d1 480
29331e72 481 while (!work_list.is_empty ())
60bd33bc 482 {
29331e72
LD
483 phi_info *phi = work_list.pop ();
484 visited_list.add (phi);
60bd33bc 485
29331e72
LD
486 for (use_info *use : phi->nondebug_insn_uses ())
487 if (use->insn ()->is_real ())
488 uses.add (use);
489 for (use_info *use : phi->phi_uses ())
490 if (!visited_list.contains (use->phi ()))
491 work_list.safe_push (use->phi ());
60bd33bc 492 }
29331e72 493 return uses;
60bd33bc
JZZ
494}
495
29331e72
LD
496/* Recursively find all define instructions. The kind of instruction is
497 specified by the DEF_TYPE. */
498static hash_set<set_info *>
499get_all_sets (phi_info *phi, unsigned int types)
9243c3d1 500{
29331e72
LD
501 hash_set<set_info *> insns;
502 auto_vec<phi_info *> work_list;
503 hash_set<phi_info *> visited_list;
504 if (!phi)
505 return hash_set<set_info *> ();
506 work_list.safe_push (phi);
9243c3d1 507
29331e72 508 while (!work_list.is_empty ())
9243c3d1 509 {
29331e72
LD
510 phi_info *phi = work_list.pop ();
511 visited_list.add (phi);
512 for (use_info *use : phi->inputs ())
513 {
514 def_info *def = use->def ();
515 set_info *set = safe_dyn_cast<set_info *> (def);
516 if (!set)
517 return hash_set<set_info *> ();
a1e42094 518
29331e72 519 gcc_assert (!set->insn ()->is_debug_insn ());
9243c3d1 520
29331e72
LD
521 if (insn_should_be_added_p (set->insn (), types))
522 insns.add (set);
523 if (set->insn ()->is_phi ())
524 {
525 phi_info *new_phi = as_a<phi_info *> (set);
526 if (!visited_list.contains (new_phi))
527 work_list.safe_push (new_phi);
528 }
529 }
9243c3d1 530 }
29331e72 531 return insns;
9243c3d1
JZZ
532}
533
29331e72
LD
534static hash_set<set_info *>
535get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
536 bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
aef20243 537{
29331e72
LD
538 if (real_p && phi_p && param_p)
539 return get_all_sets (safe_dyn_cast<phi_info *> (set),
540 REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
aef20243 541
29331e72
LD
542 else if (real_p && param_p)
543 return get_all_sets (safe_dyn_cast<phi_info *> (set),
544 REAL_SET | BB_HEAD_SET | BB_END_SET);
545
546 else if (real_p)
547 return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
548 return hash_set<set_info *> ();
69f39144
JZ
549}
550
4f673c5e 551static bool
6b6b9c68 552source_equal_p (insn_info *insn1, insn_info *insn2)
4f673c5e 553{
6b6b9c68
JZZ
554 if (!insn1 || !insn2)
555 return false;
556 rtx_insn *rinsn1 = insn1->rtl ();
557 rtx_insn *rinsn2 = insn2->rtl ();
4f673c5e
JZZ
558 if (!rinsn1 || !rinsn2)
559 return false;
29331e72 560
4f673c5e
JZZ
561 rtx note1 = find_reg_equal_equiv_note (rinsn1);
562 rtx note2 = find_reg_equal_equiv_note (rinsn2);
2020bce3
RD
563 /* We could handle the case of similar-looking REG_EQUALs as well but
564 would need to verify that no insn in between modifies any of the source
565 operands. */
566 if (note1 && note2 && rtx_equal_p (note1, note2)
567 && REG_NOTE_KIND (note1) == REG_EQUIV)
4f673c5e 568 return true;
29331e72 569 return false;
4f673c5e
JZZ
570}
571
6b6b9c68 572static insn_info *
4f673c5e
JZZ
573extract_single_source (set_info *set)
574{
575 if (!set)
576 return nullptr;
577 if (set->insn ()->is_real ())
6b6b9c68 578 return set->insn ();
4f673c5e
JZZ
579 if (!set->insn ()->is_phi ())
580 return nullptr;
6b6b9c68 581 hash_set<set_info *> sets = get_all_sets (set, true, false, true);
330bb064
JZ
582 if (sets.is_empty ())
583 return nullptr;
4f673c5e 584
6b6b9c68 585 insn_info *first_insn = (*sets.begin ())->insn ();
4f673c5e
JZZ
586 if (first_insn->is_artificial ())
587 return nullptr;
6b6b9c68 588 for (const set_info *set : sets)
4f673c5e
JZZ
589 {
590 /* If there is a head or end insn, we conservative return
591 NULL so that VSETVL PASS will insert vsetvl directly. */
6b6b9c68 592 if (set->insn ()->is_artificial ())
4f673c5e 593 return nullptr;
29331e72 594 if (set != *sets.begin () && !source_equal_p (set->insn (), first_insn))
4f673c5e
JZZ
595 return nullptr;
596 }
597
6b6b9c68 598 return first_insn;
4f673c5e
JZZ
599}
600
db642d60
JZ
601static insn_info *
602extract_single_source (def_info *def)
603{
604 if (!def)
605 return nullptr;
606 return extract_single_source (dyn_cast<set_info *> (def));
607}
608
29331e72
LD
609static bool
610same_equiv_note_p (set_info *set1, set_info *set2)
ec99ffab 611{
29331e72
LD
612 insn_info *insn1 = extract_single_source (set1);
613 insn_info *insn2 = extract_single_source (set2);
614 if (!insn1 || !insn2)
615 return false;
616 return source_equal_p (insn1, insn2);
ec99ffab
JZZ
617}
618
29331e72
LD
619static unsigned
620get_expr_id (unsigned bb_index, unsigned regno, unsigned num_bbs)
ec99ffab 621{
29331e72 622 return regno * num_bbs + bb_index;
ec99ffab 623}
29331e72
LD
624static unsigned
625get_regno (unsigned expr_id, unsigned num_bb)
ec99ffab 626{
29331e72 627 return expr_id / num_bb;
ec99ffab 628}
29331e72
LD
629static unsigned
630get_bb_index (unsigned expr_id, unsigned num_bb)
ec99ffab 631{
29331e72 632 return expr_id % num_bb;
ec99ffab
JZZ
633}
634
29331e72 635/* Return true if the SET result is not used by any instructions. */
ec99ffab 636static bool
29331e72 637has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno)
ec99ffab 638{
29331e72
LD
639 if (bitmap_bit_p (df_get_live_out (cfg_bb), regno))
640 return false;
ec99ffab 641
29331e72
LD
642 rtx_insn *iter;
643 for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb));
644 iter = NEXT_INSN (iter))
645 if (df_find_use (iter, regno_reg_rtx[regno]))
646 return false;
ec99ffab 647
29331e72 648 return true;
ec99ffab
JZZ
649}
650
29331e72
LD
651/* This flags indicates the minimum demand of the vl and vtype values by the
652 RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV
653 instruction only needs the SEW/LMUL ratio to remain the same, and does not
654 require SEW and LMUL to be fixed.
655 Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter
656 instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of
657 the former instruction, then we can make the minimu demand of the former
658 instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are
659 the SEW and LMUL of the latter instruction, and the vsetvl instruction
660 generated according to the new demand can also be used for the latter
661 instruction, so there is no need to insert a separate vsetvl instruction for
662 the latter instruction. */
663enum demand_flags : unsigned
664{
665 DEMAND_EMPTY_P = 0,
666 DEMAND_SEW_P = 1 << 0,
667 DEMAND_LMUL_P = 1 << 1,
668 DEMAND_RATIO_P = 1 << 2,
669 DEMAND_GE_SEW_P = 1 << 3,
670 DEMAND_TAIL_POLICY_P = 1 << 4,
671 DEMAND_MASK_POLICY_P = 1 << 5,
672 DEMAND_AVL_P = 1 << 6,
673 DEMAND_NON_ZERO_AVL_P = 1 << 7,
674};
ec99ffab 675
29331e72
LD
676/* We split the demand information into three parts. They are sew and lmul
677 related (sew_lmul_demand_type), tail and mask policy related
678 (policy_demand_type) and avl related (avl_demand_type). Then we define three
679 interfaces avaiable_with, compatible_p and merge. avaiable_with is
680 used to determine whether the two vsetvl infos prev_info and next_info are
681 available or not. If prev_info is available for next_info, it means that the
682 RVV insn corresponding to next_info on the path from prev_info to next_info
683 can be used without inserting a separate vsetvl instruction. compatible_p
684 is used to determine whether prev_info is compatible with next_info, and if
685 so, merge can be used to merge the stricter demand information from
686 next_info into prev_info so that prev_info becomes available to next_info.
687 */
ec99ffab 688
29331e72 689enum class sew_lmul_demand_type : unsigned
ec99ffab 690{
29331e72
LD
691 sew_lmul = demand_flags::DEMAND_SEW_P | demand_flags::DEMAND_LMUL_P,
692 ratio_only = demand_flags::DEMAND_RATIO_P,
693 sew_only = demand_flags::DEMAND_SEW_P,
694 ge_sew = demand_flags::DEMAND_GE_SEW_P,
695 ratio_and_ge_sew
696 = demand_flags::DEMAND_RATIO_P | demand_flags::DEMAND_GE_SEW_P,
697};
ec99ffab 698
29331e72 699enum class policy_demand_type : unsigned
29547511 700{
29331e72
LD
701 tail_mask_policy
702 = demand_flags::DEMAND_TAIL_POLICY_P | demand_flags::DEMAND_MASK_POLICY_P,
703 tail_policy_only = demand_flags::DEMAND_TAIL_POLICY_P,
704 mask_policy_only = demand_flags::DEMAND_MASK_POLICY_P,
705 ignore_policy = demand_flags::DEMAND_EMPTY_P,
706};
29547511 707
29331e72 708enum class avl_demand_type : unsigned
ec99ffab 709{
29331e72
LD
710 avl = demand_flags::DEMAND_AVL_P,
711 non_zero_avl = demand_flags::DEMAND_NON_ZERO_AVL_P,
712 ignore_avl = demand_flags::DEMAND_EMPTY_P,
713};
ec99ffab 714
29331e72 715class vsetvl_info
ec99ffab 716{
29331e72
LD
717private:
718 insn_info *m_insn;
719 bb_info *m_bb;
720 rtx m_avl;
721 rtx m_vl;
722 set_info *m_avl_def;
723 uint8_t m_sew;
724 uint8_t m_max_sew;
725 vlmul_type m_vlmul;
726 uint8_t m_ratio;
727 bool m_ta;
728 bool m_ma;
729
730 sew_lmul_demand_type m_sew_lmul_demand;
731 policy_demand_type m_policy_demand;
732 avl_demand_type m_avl_demand;
733
734 enum class state_type
735 {
736 UNINITIALIZED,
737 VALID,
738 UNKNOWN,
739 EMPTY,
740 };
741 state_type m_state;
742
743 bool m_delete;
744 bool m_change_vtype_only;
745 insn_info *m_read_vl_insn;
746 bool m_vl_used_by_non_rvv_insn;
ec99ffab 747
29331e72
LD
748public:
749 vsetvl_info ()
750 : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX), m_vl (NULL_RTX),
751 m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED),
752 m_ratio (0), m_ta (false), m_ma (false),
753 m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul),
754 m_policy_demand (policy_demand_type::tail_mask_policy),
755 m_avl_demand (avl_demand_type::avl), m_state (state_type::UNINITIALIZED),
756 m_delete (false), m_change_vtype_only (false), m_read_vl_insn (nullptr),
757 m_vl_used_by_non_rvv_insn (false)
758 {}
759
760 vsetvl_info (insn_info *insn) : vsetvl_info () { parse_insn (insn); }
761
762 vsetvl_info (rtx_insn *insn) : vsetvl_info () { parse_insn (insn); }
763
764 void set_avl (rtx avl) { m_avl = avl; }
765 void set_vl (rtx vl) { m_vl = vl; }
766 void set_avl_def (set_info *avl_def) { m_avl_def = avl_def; }
767 void set_sew (uint8_t sew) { m_sew = sew; }
768 void set_vlmul (vlmul_type vlmul) { m_vlmul = vlmul; }
769 void set_ratio (uint8_t ratio) { m_ratio = ratio; }
770 void set_ta (bool ta) { m_ta = ta; }
771 void set_ma (bool ma) { m_ma = ma; }
772 void set_delete () { m_delete = true; }
773 void set_bb (bb_info *bb) { m_bb = bb; }
774 void set_max_sew (uint8_t max_sew) { m_max_sew = max_sew; }
775 void set_change_vtype_only () { m_change_vtype_only = true; }
776 void set_read_vl_insn (insn_info *insn) { m_read_vl_insn = insn; }
777
778 rtx get_avl () const { return m_avl; }
779 rtx get_vl () const { return m_vl; }
780 set_info *get_avl_def () const { return m_avl_def; }
781 uint8_t get_sew () const { return m_sew; }
782 vlmul_type get_vlmul () const { return m_vlmul; }
783 uint8_t get_ratio () const { return m_ratio; }
784 bool get_ta () const { return m_ta; }
785 bool get_ma () const { return m_ma; }
786 insn_info *get_insn () const { return m_insn; }
787 bool delete_p () const { return m_delete; }
788 bb_info *get_bb () const { return m_bb; }
789 uint8_t get_max_sew () const { return m_max_sew; }
790 insn_info *get_read_vl_insn () const { return m_read_vl_insn; }
4cd4c34a 791 bool vl_used_by_non_rvv_insn_p () const { return m_vl_used_by_non_rvv_insn; }
29331e72
LD
792
793 bool has_imm_avl () const { return m_avl && CONST_INT_P (m_avl); }
794 bool has_vlmax_avl () const { return vlmax_avl_p (m_avl); }
795 bool has_nonvlmax_reg_avl () const
796 {
797 return m_avl && REG_P (m_avl) && !has_vlmax_avl ();
798 }
799 bool has_non_zero_avl () const
800 {
801 if (has_imm_avl ())
802 return INTVAL (m_avl) > 0;
803 return has_vlmax_avl ();
804 }
805 bool has_vl () const
806 {
807 /* The VL operand can only be either a NULL_RTX or a register. */
808 gcc_assert (!m_vl || REG_P (m_vl));
809 return m_vl != NULL_RTX;
810 }
811 bool has_same_ratio (const vsetvl_info &other) const
812 {
813 return get_ratio () == other.get_ratio ();
814 }
815
816 /* The block of INSN isn't always same as the block of the VSETVL_INFO,
817 meaning we may have 'get_insn ()->bb () != get_bb ()'.
818
819 E.g. BB 2 (Empty) ---> BB 3 (VALID, has rvv insn 1)
820
821 BB 2 has empty VSETVL_INFO, wheras BB 3 has VSETVL_INFO that satisfies
822 get_insn ()->bb () == get_bb (). In earliest fusion, we may fuse bb 3 and
823 bb 2 so that the 'get_bb ()' of BB2 VSETVL_INFO will be BB2 wheras the
824 'get_insn ()' of BB2 VSETVL INFO will be the rvv insn 1 (which is located
825 at BB3). */
826 bool insn_inside_bb_p () const { return get_insn ()->bb () == get_bb (); }
827 void update_avl (const vsetvl_info &other)
828 {
829 m_avl = other.get_avl ();
830 m_vl = other.get_vl ();
831 m_avl_def = other.get_avl_def ();
832 }
833
834 bool uninit_p () const { return m_state == state_type::UNINITIALIZED; }
835 bool valid_p () const { return m_state == state_type::VALID; }
836 bool unknown_p () const { return m_state == state_type::UNKNOWN; }
837 bool empty_p () const { return m_state == state_type::EMPTY; }
838 bool change_vtype_only_p () const { return m_change_vtype_only; }
839
840 void set_valid () { m_state = state_type::VALID; }
841 void set_unknown () { m_state = state_type::UNKNOWN; }
842 void set_empty () { m_state = state_type::EMPTY; }
843
844 void set_sew_lmul_demand (sew_lmul_demand_type demand)
845 {
846 m_sew_lmul_demand = demand;
847 }
848 void set_policy_demand (policy_demand_type demand)
849 {
850 m_policy_demand = demand;
851 }
852 void set_avl_demand (avl_demand_type demand) { m_avl_demand = demand; }
853
854 sew_lmul_demand_type get_sew_lmul_demand () const
855 {
856 return m_sew_lmul_demand;
857 }
858 policy_demand_type get_policy_demand () const { return m_policy_demand; }
859 avl_demand_type get_avl_demand () const { return m_avl_demand; }
860
861 void normalize_demand (unsigned demand_flags)
862 {
863 switch (demand_flags
864 & (DEMAND_SEW_P | DEMAND_LMUL_P | DEMAND_RATIO_P | DEMAND_GE_SEW_P))
865 {
866 case (unsigned) sew_lmul_demand_type::sew_lmul:
867 m_sew_lmul_demand = sew_lmul_demand_type::sew_lmul;
868 break;
869 case (unsigned) sew_lmul_demand_type::ratio_only:
870 m_sew_lmul_demand = sew_lmul_demand_type::ratio_only;
871 break;
872 case (unsigned) sew_lmul_demand_type::sew_only:
873 m_sew_lmul_demand = sew_lmul_demand_type::sew_only;
874 break;
875 case (unsigned) sew_lmul_demand_type::ge_sew:
876 m_sew_lmul_demand = sew_lmul_demand_type::ge_sew;
877 break;
878 case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew:
879 m_sew_lmul_demand = sew_lmul_demand_type::ratio_and_ge_sew;
880 break;
881 default:
882 gcc_unreachable ();
883 }
884
885 switch (demand_flags & (DEMAND_TAIL_POLICY_P | DEMAND_MASK_POLICY_P))
886 {
887 case (unsigned) policy_demand_type::tail_mask_policy:
888 m_policy_demand = policy_demand_type::tail_mask_policy;
889 break;
890 case (unsigned) policy_demand_type::tail_policy_only:
891 m_policy_demand = policy_demand_type::tail_policy_only;
892 break;
893 case (unsigned) policy_demand_type::mask_policy_only:
894 m_policy_demand = policy_demand_type::mask_policy_only;
895 break;
896 case (unsigned) policy_demand_type::ignore_policy:
897 m_policy_demand = policy_demand_type::ignore_policy;
898 break;
899 default:
900 gcc_unreachable ();
901 }
902
903 switch (demand_flags & (DEMAND_AVL_P | DEMAND_NON_ZERO_AVL_P))
904 {
905 case (unsigned) avl_demand_type::avl:
906 m_avl_demand = avl_demand_type::avl;
907 break;
908 case (unsigned) avl_demand_type::non_zero_avl:
909 m_avl_demand = avl_demand_type::non_zero_avl;
910 break;
911 case (unsigned) avl_demand_type::ignore_avl:
912 m_avl_demand = avl_demand_type::ignore_avl;
913 break;
914 default:
915 gcc_unreachable ();
916 }
917 }
918
919 void parse_insn (rtx_insn *rinsn)
920 {
921 if (!NONDEBUG_INSN_P (rinsn))
922 return;
923 if (optimize == 0 && !has_vtype_op (rinsn))
924 return;
925 gcc_assert (!vsetvl_discard_result_insn_p (rinsn));
926 set_valid ();
927 extract_insn_cached (rinsn);
928 m_avl = ::get_avl (rinsn);
929 if (has_vlmax_avl () || vsetvl_insn_p (rinsn))
930 m_vl = ::get_vl (rinsn);
931 m_sew = ::get_sew (rinsn);
932 m_vlmul = ::get_vlmul (rinsn);
933 m_ta = tail_agnostic_p (rinsn);
934 m_ma = mask_agnostic_p (rinsn);
935 }
936
937 void parse_insn (insn_info *insn)
938 {
939 m_insn = insn;
940 m_bb = insn->bb ();
941 /* Return if it is debug insn for the consistency with optimize == 0. */
942 if (insn->is_debug_insn ())
943 return;
ec99ffab 944
29331e72
LD
945 /* We set it as unknown since we don't what will happen in CALL or ASM. */
946 if (insn->is_call () || insn->is_asm ())
947 {
948 set_unknown ();
949 return;
950 }
951
952 /* If this is something that updates VL/VTYPE that we don't know about, set
953 the state to unknown. */
954 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
955 && (find_access (insn->defs (), VL_REGNUM)
956 || find_access (insn->defs (), VTYPE_REGNUM)))
957 {
958 set_unknown ();
959 return;
960 }
961
962 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
963 /* uninitialized */
964 return;
ec99ffab 965
29331e72
LD
966 set_valid ();
967
968 m_avl = ::get_avl (insn->rtl ());
969 if (m_avl)
970 {
971 if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ())
972 m_vl = ::get_vl (insn->rtl ());
973
974 if (has_nonvlmax_reg_avl ())
975 m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def ();
976 }
977
978 m_sew = ::get_sew (insn->rtl ());
979 m_vlmul = ::get_vlmul (insn->rtl ());
980 m_ratio = get_attr_ratio (insn->rtl ());
981 /* when get_attr_ratio is invalid, this kind of instructions
982 doesn't care about ratio. However, we still need this value
983 in demand info backward analysis. */
984 if (m_ratio == INVALID_ATTRIBUTE)
985 m_ratio = calculate_ratio (m_sew, m_vlmul);
986 m_ta = tail_agnostic_p (insn->rtl ());
987 m_ma = mask_agnostic_p (insn->rtl ());
988
989 /* If merge operand is undef value, we prefer agnostic. */
990 int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
991 if (merge_op_idx != INVALID_ATTRIBUTE
992 && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
993 {
994 m_ta = true;
995 m_ma = true;
996 }
997
998 /* Determine the demand info of the RVV insn. */
999 m_max_sew = get_max_int_sew ();
193ef02a 1000 unsigned dflags = 0;
29331e72
LD
1001 if (vector_config_insn_p (insn->rtl ()))
1002 {
193ef02a
RS
1003 dflags |= demand_flags::DEMAND_AVL_P;
1004 dflags |= demand_flags::DEMAND_RATIO_P;
29331e72
LD
1005 }
1006 else
1007 {
1008 if (has_vl_op (insn->rtl ()))
1009 {
1010 if (scalar_move_insn_p (insn->rtl ()))
1011 {
1012 /* If the avl for vmv.s.x comes from the vsetvl instruction, we
1013 don't know if the avl is non-zero, so it is set to
1014 DEMAND_AVL_P for now. it may be corrected to
1015 DEMAND_NON_ZERO_AVL_P later when more information is
1016 available.
1017 */
1018 if (has_non_zero_avl ())
193ef02a 1019 dflags |= demand_flags::DEMAND_NON_ZERO_AVL_P;
29331e72 1020 else
193ef02a 1021 dflags |= demand_flags::DEMAND_AVL_P;
29331e72
LD
1022 }
1023 else
193ef02a 1024 dflags |= demand_flags::DEMAND_AVL_P;
29331e72 1025 }
ec99ffab 1026
29331e72 1027 if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
193ef02a 1028 dflags |= demand_flags::DEMAND_RATIO_P;
29331e72
LD
1029 else
1030 {
1031 if (scalar_move_insn_p (insn->rtl ()) && m_ta)
1032 {
193ef02a 1033 dflags |= demand_flags::DEMAND_GE_SEW_P;
29331e72
LD
1034 m_max_sew = get_attr_type (insn->rtl ()) == TYPE_VFMOVFV
1035 ? get_max_float_sew ()
1036 : get_max_int_sew ();
1037 }
1038 else
193ef02a 1039 dflags |= demand_flags::DEMAND_SEW_P;
29331e72
LD
1040
1041 if (!ignore_vlmul_insn_p (insn->rtl ()))
193ef02a 1042 dflags |= demand_flags::DEMAND_LMUL_P;
29331e72 1043 }
ec99ffab 1044
29331e72 1045 if (!m_ta)
193ef02a 1046 dflags |= demand_flags::DEMAND_TAIL_POLICY_P;
29331e72 1047 if (!m_ma)
193ef02a 1048 dflags |= demand_flags::DEMAND_MASK_POLICY_P;
29331e72
LD
1049 }
1050
193ef02a 1051 normalize_demand (dflags);
29331e72
LD
1052
1053 /* Optimize AVL from the vsetvl instruction. */
1054 insn_info *def_insn = extract_single_source (get_avl_def ());
1055 if (def_insn && vsetvl_insn_p (def_insn->rtl ()))
1056 {
1057 vsetvl_info def_info = vsetvl_info (def_insn);
1058 if ((scalar_move_insn_p (insn->rtl ())
1059 || def_info.get_ratio () == get_ratio ())
1060 && (def_info.has_vlmax_avl () || def_info.has_imm_avl ()))
1061 {
1062 update_avl (def_info);
1063 if (scalar_move_insn_p (insn->rtl ()) && has_non_zero_avl ())
1064 m_avl_demand = avl_demand_type::non_zero_avl;
1065 }
1066 }
1067
1068 /* Determine if dest operand(vl) has been used by non-RVV instructions. */
1069 if (has_vl ())
1070 {
1071 const hash_set<use_info *> vl_uses
1072 = get_all_real_uses (get_insn (), REGNO (get_vl ()));
1073 for (use_info *use : vl_uses)
1074 {
1075 gcc_assert (use->insn ()->is_real ());
1076 rtx_insn *rinsn = use->insn ()->rtl ();
1077 if (!has_vl_op (rinsn)
1078 || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1)
1079 {
1080 m_vl_used_by_non_rvv_insn = true;
1081 break;
1082 }
1083 rtx avl = ::get_avl (rinsn);
c2f23514 1084 if (!avl || !REG_P (avl) || REGNO (get_vl ()) != REGNO (avl))
29331e72
LD
1085 {
1086 m_vl_used_by_non_rvv_insn = true;
1087 break;
1088 }
1089 }
1090 }
ec99ffab 1091
29331e72
LD
1092 /* Collect the read vl insn for the fault-only-first rvv loads. */
1093 if (fault_first_load_p (insn->rtl ()))
1094 {
1095 for (insn_info *i = insn->next_nondebug_insn ();
1096 i->bb () == insn->bb (); i = i->next_nondebug_insn ())
1097 {
1098 if (find_access (i->defs (), VL_REGNUM))
1099 break;
1100 if (i->rtl () && read_vl_insn_p (i->rtl ()))
1101 {
1102 m_read_vl_insn = i;
1103 break;
1104 }
1105 }
1106 }
1107 }
1108
1109 /* Returns the corresponding vsetvl rtx pat. */
1110 rtx get_vsetvl_pat (bool ignore_vl = false) const
1111 {
1112 rtx avl = get_avl ();
1113 /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
1114 set the value of avl to (const_int 0) so that VSETVL PASS will
1115 insert vsetvl correctly.*/
1116 if (!get_avl ())
1117 avl = GEN_INT (0);
1118 rtx sew = gen_int_mode (get_sew (), Pmode);
1119 rtx vlmul = gen_int_mode (get_vlmul (), Pmode);
1120 rtx ta = gen_int_mode (get_ta (), Pmode);
1121 rtx ma = gen_int_mode (get_ma (), Pmode);
1122
1123 if (change_vtype_only_p ())
1124 return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
1125 else if (has_vl () && !ignore_vl)
1126 return gen_vsetvl (Pmode, get_vl (), avl, sew, vlmul, ta, ma);
1127 else
1128 return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
1129 }
1130
d82bb518
JZ
1131 /* Return true that the non-AVL operands of THIS will be modified
1132 if we fuse the VL modification from OTHER into THIS. */
1133 bool vl_modify_non_avl_op_p (const vsetvl_info &other) const
1134 {
1135 /* We don't need to worry about any operands from THIS be
1136 modified by OTHER vsetvl since we OTHER vsetvl doesn't
1137 modify any operand. */
1138 if (!other.has_vl ())
1139 return false;
1140
1141 /* THIS VL operand always preempt OTHER VL operand. */
1142 if (this->has_vl ())
1143 return false;
1144
1145 /* If THIS has non IMM AVL and THIS is AVL compatible with
1146 OTHER, the AVL value of THIS is same as VL value of OTHER. */
1147 if (!this->has_imm_avl ())
1148 return false;
1149 return find_access (this->get_insn ()->uses (), REGNO (other.get_vl ()));
1150 }
1151
29331e72
LD
1152 bool operator== (const vsetvl_info &other) const
1153 {
1154 gcc_assert (!uninit_p () && !other.uninit_p ()
1155 && "Uninitialization should not happen");
1156
1157 if (empty_p ())
1158 return other.empty_p ();
1159 if (unknown_p ())
1160 return other.unknown_p ();
1161
1162 return get_insn () == other.get_insn () && get_bb () == other.get_bb ()
1163 && get_avl () == other.get_avl () && get_vl () == other.get_vl ()
1164 && get_avl_def () == other.get_avl_def ()
1165 && get_sew () == other.get_sew ()
1166 && get_vlmul () == other.get_vlmul () && get_ta () == other.get_ta ()
1167 && get_ma () == other.get_ma ()
1168 && get_avl_demand () == other.get_avl_demand ()
1169 && get_sew_lmul_demand () == other.get_sew_lmul_demand ()
1170 && get_policy_demand () == other.get_policy_demand ();
1171 }
1172
1173 void dump (FILE *file, const char *indent = "") const
1174 {
1175 if (uninit_p ())
1176 {
1177 fprintf (file, "UNINITIALIZED.\n");
1178 return;
1179 }
1180 else if (unknown_p ())
1181 {
1182 fprintf (file, "UNKNOWN.\n");
1183 return;
1184 }
1185 else if (empty_p ())
1186 {
1187 fprintf (file, "EMPTY.\n");
1188 return;
1189 }
1190 else if (valid_p ())
1191 fprintf (file, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (),
1192 get_bb ()->index (), delete_p () ? " (deleted)" : "");
1193 else
1194 gcc_unreachable ();
ec99ffab 1195
29331e72
LD
1196 fprintf (file, "%sDemand fields:", indent);
1197 if (m_sew_lmul_demand == sew_lmul_demand_type::sew_lmul)
1198 fprintf (file, " demand_sew_lmul");
1199 else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_only)
1200 fprintf (file, " demand_ratio_only");
1201 else if (m_sew_lmul_demand == sew_lmul_demand_type::sew_only)
1202 fprintf (file, " demand_sew_only");
1203 else if (m_sew_lmul_demand == sew_lmul_demand_type::ge_sew)
1204 fprintf (file, " demand_ge_sew");
1205 else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_and_ge_sew)
1206 fprintf (file, " demand_ratio_and_ge_sew");
1207
1208 if (m_policy_demand == policy_demand_type::tail_mask_policy)
1209 fprintf (file, " demand_tail_mask_policy");
1210 else if (m_policy_demand == policy_demand_type::tail_policy_only)
1211 fprintf (file, " demand_tail_policy_only");
1212 else if (m_policy_demand == policy_demand_type::mask_policy_only)
1213 fprintf (file, " demand_mask_policy_only");
1214
1215 if (m_avl_demand == avl_demand_type::avl)
1216 fprintf (file, " demand_avl");
1217 else if (m_avl_demand == avl_demand_type::non_zero_avl)
1218 fprintf (file, " demand_non_zero_avl");
1219 fprintf (file, "\n");
1220
1221 fprintf (file, "%sSEW=%d, ", indent, get_sew ());
1222 fprintf (file, "VLMUL=%s, ", vlmul_to_str (get_vlmul ()));
1223 fprintf (file, "RATIO=%d, ", get_ratio ());
1224 fprintf (file, "MAX_SEW=%d\n", get_max_sew ());
1225
1226 fprintf (file, "%sTAIL_POLICY=%s, ", indent, policy_to_str (get_ta ()));
1227 fprintf (file, "MASK_POLICY=%s\n", policy_to_str (get_ma ()));
1228
1229 fprintf (file, "%sAVL=", indent);
1230 print_rtl_single (file, get_avl ());
1231 fprintf (file, "%sVL=", indent);
1232 print_rtl_single (file, get_vl ());
1233 if (change_vtype_only_p ())
1234 fprintf (file, "%schange vtype only\n", indent);
1235 if (get_read_vl_insn ())
1236 fprintf (file, "%sread_vl_insn: insn %u\n", indent,
1237 get_read_vl_insn ()->uid ());
4cd4c34a 1238 if (vl_used_by_non_rvv_insn_p ())
29331e72
LD
1239 fprintf (file, "%suse_by_non_rvv_insn=true\n", indent);
1240 }
1241};
8fbc0871 1242
29331e72 1243class vsetvl_block_info
ec99ffab 1244{
29331e72
LD
1245public:
1246 /* The static execute probability of the demand info. */
1247 profile_probability probability;
1248
4fd09aed
JZ
1249 auto_vec<vsetvl_info> local_infos;
1250 vsetvl_info global_info;
1251 bb_info *bb;
29331e72
LD
1252
1253 bool full_available;
1254
4fd09aed 1255 vsetvl_block_info () : bb (nullptr), full_available (false)
29331e72 1256 {
4fd09aed
JZ
1257 local_infos.safe_grow_cleared (0);
1258 global_info.set_empty ();
29331e72
LD
1259 }
1260 vsetvl_block_info (const vsetvl_block_info &other)
4fd09aed
JZ
1261 : probability (other.probability), local_infos (other.local_infos.copy ()),
1262 global_info (other.global_info), bb (other.bb)
29331e72
LD
1263 {}
1264
1265 vsetvl_info &get_entry_info ()
1266 {
1267 gcc_assert (!empty_p ());
4fd09aed 1268 return local_infos.is_empty () ? global_info : local_infos[0];
29331e72
LD
1269 }
1270 vsetvl_info &get_exit_info ()
1271 {
1272 gcc_assert (!empty_p ());
4fd09aed
JZ
1273 return local_infos.is_empty () ? global_info
1274 : local_infos[local_infos.length () - 1];
29331e72
LD
1275 }
1276 const vsetvl_info &get_entry_info () const
1277 {
1278 gcc_assert (!empty_p ());
4fd09aed 1279 return local_infos.is_empty () ? global_info : local_infos[0];
29331e72
LD
1280 }
1281 const vsetvl_info &get_exit_info () const
1282 {
1283 gcc_assert (!empty_p ());
4fd09aed
JZ
1284 return local_infos.is_empty () ? global_info
1285 : local_infos[local_infos.length () - 1];
29331e72
LD
1286 }
1287
4fd09aed
JZ
1288 bool empty_p () const { return local_infos.is_empty () && !has_info (); }
1289 bool has_info () const { return !global_info.empty_p (); }
29331e72
LD
1290 void set_info (const vsetvl_info &info)
1291 {
4fd09aed
JZ
1292 gcc_assert (local_infos.is_empty ());
1293 global_info = info;
1294 global_info.set_bb (bb);
29331e72 1295 }
4fd09aed 1296 void set_empty_info () { global_info.set_empty (); }
ec99ffab
JZZ
1297};
1298
29331e72
LD
1299/* Demand system is the RVV-based VSETVL info analysis tools wrapper.
1300 It defines compatible rules for SEW/LMUL, POLICY and AVL.
1301 Also, it provides 3 iterfaces avaiable_p, compatible_p and
1302 merge for the VSETVL PASS analysis and optimization.
1303
1304 - avaiable_p: Determine whether the next info can get the
1305 avaiable VSETVL status from previous info.
1306 e.g. bb 2 (demand SEW = 32, LMUL = M2) -> bb 3 (demand RATIO = 16).
1307 Since bb 2 demand info (SEW/LMUL = 32/2 = 16) satisfies the bb 3
1308 demand, the VSETVL instruction in bb 3 can be elided.
1309 avaiable_p (previous, next) is true in such situation.
1310 - compatible_p: Determine whether prev_info is compatible with next_info
1311 so that we can have a new merged info that is avaiable to both of them.
1312 - merge: Merge the stricter demand information from
1313 next_info into prev_info so that prev_info becomes available to
1314 next_info. */
1315class demand_system
ec99ffab 1316{
29331e72
LD
1317private:
1318 sbitmap *m_avl_def_in;
1319 sbitmap *m_avl_def_out;
ec99ffab 1320
29331e72 1321 /* predictors. */
ec99ffab 1322
29331e72
LD
1323 inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1324 const vsetvl_info &next ATTRIBUTE_UNUSED)
1325 {
1326 return true;
1327 }
1328 inline bool always_false (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1329 const vsetvl_info &next ATTRIBUTE_UNUSED)
1330 {
ec99ffab 1331 return false;
29331e72
LD
1332 }
1333
1334 /* predictors for sew and lmul */
1335
1336 inline bool lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1337 {
1338 return prev.get_vlmul () == next.get_vlmul ();
1339 }
1340 inline bool sew_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1341 {
1342 return prev.get_sew () == next.get_sew ();
1343 }
1344 inline bool sew_lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1345 {
1346 return lmul_eq_p (prev, next) && sew_eq_p (prev, next);
1347 }
1348 inline bool sew_ge_p (const vsetvl_info &prev, const vsetvl_info &next)
1349 {
1350 return prev.get_sew () == next.get_sew ()
1351 || (next.get_ta () && prev.get_sew () > next.get_sew ());
1352 }
1353 inline bool sew_le_p (const vsetvl_info &prev, const vsetvl_info &next)
1354 {
1355 return prev.get_sew () == next.get_sew ()
1356 || (prev.get_ta () && prev.get_sew () < next.get_sew ());
1357 }
1358 inline bool prev_sew_le_next_max_sew_p (const vsetvl_info &prev,
1359 const vsetvl_info &next)
1360 {
1361 return prev.get_sew () <= next.get_max_sew ();
1362 }
1363 inline bool next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
1364 const vsetvl_info &next)
1365 {
1366 return next.get_sew () <= prev.get_max_sew ();
1367 }
1368 inline bool max_sew_overlap_p (const vsetvl_info &prev,
1369 const vsetvl_info &next)
1370 {
1371 return !(prev.get_sew () > next.get_max_sew ()
1372 || next.get_sew () > prev.get_max_sew ());
1373 }
1374 inline bool ratio_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1375 {
1376 return prev.has_same_ratio (next);
1377 }
1378 inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
1379 const vsetvl_info &next)
1380 {
1381 return prev.get_ratio () >= (next.get_sew () / 8);
1382 }
1383 inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
1384 const vsetvl_info &next)
1385 {
1386 return next.get_ratio () >= (prev.get_sew () / 8);
1387 }
1388
1389 inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev,
1390 const vsetvl_info &next)
1391 {
1392 return sew_ge_p (prev, next) && ratio_eq_p (prev, next);
1393 }
1394 inline bool sew_ge_and_prev_sew_le_next_max_sew_p (const vsetvl_info &prev,
1395 const vsetvl_info &next)
1396 {
1397 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next);
1398 }
1399 inline bool
1400 sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p (
1401 const vsetvl_info &prev, const vsetvl_info &next)
1402 {
1403 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next)
1404 && next_ratio_valid_for_prev_sew_p (prev, next);
1405 }
1406 inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
1407 const vsetvl_info &next)
1408 {
1409 return sew_le_p (prev, next) && next_sew_le_prev_max_sew_p (prev, next);
1410 }
1411 inline bool
1412 max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
1413 const vsetvl_info &next)
1414 {
1415 return next_ratio_valid_for_prev_sew_p (prev, next)
1416 && max_sew_overlap_p (prev, next);
1417 }
1418 inline bool
1419 sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info &prev,
1420 const vsetvl_info &next)
1421 {
1422 return sew_le_p (prev, next) && ratio_eq_p (prev, next)
1423 && next_sew_le_prev_max_sew_p (prev, next);
1424 }
1425 inline bool
1426 max_sew_overlap_and_prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
1427 const vsetvl_info &next)
1428 {
1429 return prev_ratio_valid_for_next_sew_p (prev, next)
1430 && max_sew_overlap_p (prev, next);
1431 }
1432 inline bool
1433 sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p (
1434 const vsetvl_info &prev, const vsetvl_info &next)
1435 {
1436 return sew_le_p (prev, next) && prev_ratio_valid_for_next_sew_p (prev, next)
1437 && next_sew_le_prev_max_sew_p (prev, next);
1438 }
1439 inline bool max_sew_overlap_and_ratio_eq_p (const vsetvl_info &prev,
1440 const vsetvl_info &next)
1441 {
1442 return ratio_eq_p (prev, next) && max_sew_overlap_p (prev, next);
1443 }
1444
1445 /* predictors for tail and mask policy */
1446
1447 inline bool tail_policy_eq_p (const vsetvl_info &prev,
1448 const vsetvl_info &next)
1449 {
1450 return prev.get_ta () == next.get_ta ();
1451 }
1452 inline bool mask_policy_eq_p (const vsetvl_info &prev,
1453 const vsetvl_info &next)
1454 {
1455 return prev.get_ma () == next.get_ma ();
1456 }
1457 inline bool tail_mask_policy_eq_p (const vsetvl_info &prev,
1458 const vsetvl_info &next)
1459 {
1460 return tail_policy_eq_p (prev, next) && mask_policy_eq_p (prev, next);
1461 }
1462
1463 /* predictors for avl */
1464
1465 inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info)
1466 {
9c16ca93
JZ
1467 if (info.has_vl ())
1468 {
1469 if (find_access (i->defs (), REGNO (info.get_vl ())))
1470 return true;
1471 if (find_access (i->uses (), REGNO (info.get_vl ())))
1472 {
1473 resource_info resource = full_register (REGNO (info.get_vl ()));
1474 def_lookup dl1 = crtl->ssa->find_def (resource, i);
1475 def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ());
1476 if (dl1.matching_set () || dl2.matching_set ())
1477 return true;
1478 /* If their VLs are coming from same def, we still want to fuse
1479 their VSETVL demand info to gain better performance. */
1480 return dl1.prev_def (i) != dl2.prev_def (i);
1481 }
1482 }
1483 return false;
29331e72
LD
1484 }
1485 inline bool modify_avl_p (insn_info *i, const vsetvl_info &info)
1486 {
1487 return info.has_nonvlmax_reg_avl ()
1488 && find_access (i->defs (), REGNO (info.get_avl ()));
1489 }
1490
1491 inline bool modify_reg_between_p (insn_info *prev_insn, insn_info *curr_insn,
1492 unsigned regno)
1493 {
1494 gcc_assert (prev_insn->compare_with (curr_insn) < 0);
1495 for (insn_info *i = curr_insn->prev_nondebug_insn (); i != prev_insn;
1496 i = i->prev_nondebug_insn ())
1497 {
1498 // no def of regno
1499 if (find_access (i->defs (), regno))
1500 return true;
1501 }
1502 return false;
1503 }
ec99ffab 1504
29331e72
LD
1505 inline bool reg_avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next)
1506 {
1507 if (!prev.has_nonvlmax_reg_avl () || !next.has_nonvlmax_reg_avl ())
1508 return false;
ec99ffab 1509
29331e72
LD
1510 if (same_equiv_note_p (prev.get_avl_def (), next.get_avl_def ()))
1511 return true;
ec99ffab 1512
29331e72
LD
1513 if (REGNO (prev.get_avl ()) != REGNO (next.get_avl ()))
1514 return false;
ec99ffab 1515
29331e72
LD
1516 insn_info *prev_insn = prev.get_insn ();
1517 if (prev.get_bb () != prev_insn->bb ())
1518 prev_insn = prev.get_bb ()->end_insn ();
ec99ffab 1519
29331e72
LD
1520 insn_info *next_insn = next.get_insn ();
1521 if (next.get_bb () != next_insn->bb ())
1522 next_insn = next.get_bb ()->end_insn ();
ec99ffab 1523
29331e72
LD
1524 return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false);
1525 }
ec99ffab 1526
29331e72
LD
1527 inline bool avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next)
1528 {
1529 gcc_assert (prev.valid_p () && next.valid_p ());
ec99ffab 1530
4cd4c34a 1531 if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
29331e72 1532 return false;
e030af3e 1533
29331e72
LD
1534 if (vector_config_insn_p (prev.get_insn ()->rtl ()) && next.get_avl_def ()
1535 && next.get_avl_def ()->insn () == prev.get_insn ())
1536 return true;
e030af3e 1537
29331e72
LD
1538 if (prev.get_read_vl_insn ())
1539 {
1540 if (!next.has_nonvlmax_reg_avl () || !next.get_avl_def ())
1541 return false;
1542 insn_info *avl_def_insn = extract_single_source (next.get_avl_def ());
1543 return avl_def_insn == prev.get_read_vl_insn ();
1544 }
1545
1546 if (prev == next && prev.has_nonvlmax_reg_avl ())
1547 {
1548 insn_info *insn = prev.get_insn ();
1549 bb_info *bb = insn->bb ();
1550 for (insn_info *i = insn; real_insn_and_same_bb_p (i, bb);
1551 i = i->next_nondebug_insn ())
1552 if (find_access (i->defs (), REGNO (prev.get_avl ())))
e030af3e 1553 return false;
29331e72 1554 }
60bd33bc 1555
29331e72
LD
1556 if (prev.has_vlmax_avl () && next.has_vlmax_avl ())
1557 return true;
1558 else if (prev.has_imm_avl () && next.has_imm_avl ())
1559 return INTVAL (prev.get_avl ()) == INTVAL (next.get_avl ());
1560 else if (prev.has_vl () && next.has_nonvlmax_reg_avl ()
1561 && REGNO (prev.get_vl ()) == REGNO (next.get_avl ()))
1562 {
1563 insn_info *prev_insn = prev.insn_inside_bb_p ()
1564 ? prev.get_insn ()
1565 : prev.get_bb ()->end_insn ();
1566
1567 insn_info *next_insn = next.insn_inside_bb_p ()
1568 ? next.get_insn ()
1569 : next.get_bb ()->end_insn ();
1570 return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false);
1571 }
1572 else if (prev.has_nonvlmax_reg_avl () && next.has_nonvlmax_reg_avl ())
1573 return reg_avl_equal_p (prev, next);
e030af3e 1574
e030af3e 1575 return false;
29331e72
LD
1576 }
1577 inline bool avl_equal_or_prev_avl_non_zero_p (const vsetvl_info &prev,
1578 const vsetvl_info &next)
1579 {
1580 return avl_equal_p (prev, next) || prev.has_non_zero_avl ();
1581 }
1582
1583 inline bool can_use_next_avl_p (const vsetvl_info &prev,
1584 const vsetvl_info &next)
1585 {
0c4bd132
JZ
1586 /* Forbid the AVL/VL propagation if VL of NEXT is used
1587 by non-RVV instructions. This is because:
1588
1589 bb 2:
1590 PREV: scalar move (no AVL)
1591 bb 3:
1592 NEXT: vsetvl a5(VL), a4(AVL) ...
1593 branch a5,zero
1594
1595 Since user vsetvl instruction is no side effect instruction
1596 which should be placed in the correct and optimal location
1597 of the program by the previous PASS, it is unreasonable that
1598 VSETVL PASS tries to move it to another places if it used by
1599 non-RVV instructions.
1600
1601 Note: We only forbid the cases that VL is used by the following
1602 non-RVV instructions which will cause issues. We don't forbid
1603 other cases since it won't cause correctness issues and we still
1604 more demand info are fused backward. The later LCM algorithm
1605 should know the optimal location of the vsetvl. */
1606 if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
1607 return false;
1608
29331e72
LD
1609 if (!next.has_nonvlmax_reg_avl () && !next.has_vl ())
1610 return true;
e030af3e 1611
29331e72
LD
1612 insn_info *prev_insn = prev.get_insn ();
1613 if (prev.get_bb () != prev_insn->bb ())
1614 prev_insn = prev.get_bb ()->end_insn ();
1615
1616 insn_info *next_insn = next.get_insn ();
1617 if (next.get_bb () != next_insn->bb ())
1618 next_insn = next.get_bb ()->end_insn ();
1619
1620 return avl_vl_unmodified_between_p (prev_insn, next_insn, next);
1621 }
1622
1623 inline bool avl_equal_or_next_avl_non_zero_and_can_use_next_avl_p (
1624 const vsetvl_info &prev, const vsetvl_info &next)
1625 {
1626 return avl_equal_p (prev, next)
1627 || (next.has_non_zero_avl () && can_use_next_avl_p (prev, next));
1628 }
1629
1630 /* modifiers */
1631
1632 inline void nop (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1633 const vsetvl_info &next ATTRIBUTE_UNUSED)
1634 {}
1635
1636 /* modifiers for sew and lmul */
1637
1638 inline void use_min_of_max_sew (vsetvl_info &prev, const vsetvl_info &next)
1639 {
1640 prev.set_max_sew (MIN (prev.get_max_sew (), next.get_max_sew ()));
1641 }
1642 inline void use_next_sew (vsetvl_info &prev, const vsetvl_info &next)
1643 {
1644 prev.set_sew (next.get_sew ());
1645 use_min_of_max_sew (prev, next);
1646 }
1647 inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
1648 {
1649 auto max_sew = std::max (prev.get_sew (), next.get_sew ());
1650 prev.set_sew (max_sew);
1651 use_min_of_max_sew (prev, next);
1652 }
1653 inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
1654 {
1655 use_next_sew (prev, next);
1656 prev.set_vlmul (next.get_vlmul ());
1657 prev.set_ratio (next.get_ratio ());
1658 }
1659 inline void use_next_sew_with_prev_ratio (vsetvl_info &prev,
1660 const vsetvl_info &next)
1661 {
1662 use_next_sew (prev, next);
1663 prev.set_vlmul (calculate_vlmul (next.get_sew (), prev.get_ratio ()));
1664 }
1665 inline void modify_lmul_with_next_ratio (vsetvl_info &prev,
1666 const vsetvl_info &next)
1667 {
1668 prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
1669 prev.set_ratio (next.get_ratio ());
1670 }
1671
1672 inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev,
1673 const vsetvl_info &next)
1674 {
1675 prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
1676 use_max_sew (prev, next);
1677 prev.set_ratio (next.get_ratio ());
1678 }
1679
1680 inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info &prev,
1681 const vsetvl_info &next)
1682 {
1683 auto max_sew = std::max (prev.get_sew (), next.get_sew ());
1684 prev.set_vlmul (calculate_vlmul (max_sew, prev.get_ratio ()));
1685 prev.set_sew (max_sew);
1686 }
1687
1688 /* modifiers for tail and mask policy */
1689
1690 inline void use_tail_policy (vsetvl_info &prev, const vsetvl_info &next)
1691 {
1692 if (!next.get_ta ())
1693 prev.set_ta (next.get_ta ());
1694 }
1695 inline void use_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
1696 {
1697 if (!next.get_ma ())
1698 prev.set_ma (next.get_ma ());
1699 }
1700 inline void use_tail_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
1701 {
1702 use_tail_policy (prev, next);
1703 use_mask_policy (prev, next);
1704 }
1705
1706 /* modifiers for avl */
1707
1708 inline void use_next_avl (vsetvl_info &prev, const vsetvl_info &next)
1709 {
1710 gcc_assert (can_use_next_avl_p (prev, next));
1711 prev.update_avl (next);
1712 }
1713
1714 inline void use_next_avl_when_not_equal (vsetvl_info &prev,
1715 const vsetvl_info &next)
1716 {
1717 if (avl_equal_p (prev, next))
1718 return;
1719 gcc_assert (next.has_non_zero_avl ());
1720 use_next_avl (prev, next);
1721 }
e030af3e 1722
29331e72
LD
1723public:
1724 demand_system () : m_avl_def_in (nullptr), m_avl_def_out (nullptr) {}
1725
1726 void set_avl_in_out_data (sbitmap *m_avl_def_in, sbitmap *m_avl_def_out)
1727 {
1728 m_avl_def_in = m_avl_def_in;
1729 m_avl_def_out = m_avl_def_out;
1730 }
1731
1732 /* Can we move vsetvl info between prev_insn and next_insn safe? */
1733 bool avl_vl_unmodified_between_p (insn_info *prev_insn, insn_info *next_insn,
1734 const vsetvl_info &info,
1735 bool ignore_vl = false)
1736 {
1737 gcc_assert ((ignore_vl && info.has_nonvlmax_reg_avl ())
1738 || (info.has_nonvlmax_reg_avl () || info.has_vl ()));
1739
1740 gcc_assert (!prev_insn->is_debug_insn () && !next_insn->is_debug_insn ());
1741 if (prev_insn->bb () == next_insn->bb ()
1742 && prev_insn->compare_with (next_insn) < 0)
1743 {
1744 for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
1745 i = i->prev_nondebug_insn ())
1746 {
9c16ca93 1747 // no def and use of vl
29331e72
LD
1748 if (!ignore_vl && modify_or_use_vl_p (i, info))
1749 return false;
e030af3e 1750
29331e72
LD
1751 // no def of avl
1752 if (modify_avl_p (i, info))
1753 return false;
1754 }
1755 return true;
1756 }
1757 else
1758 {
1759 if (!ignore_vl && info.has_vl ())
1760 {
1761 bitmap live_out = df_get_live_out (prev_insn->bb ()->cfg_bb ());
1762 if (bitmap_bit_p (live_out, REGNO (info.get_vl ())))
1763 return false;
1764 }
a2d12abe 1765
29331e72
LD
1766 if (info.has_nonvlmax_reg_avl () && m_avl_def_in && m_avl_def_out)
1767 {
1768 bool has_avl_out = false;
1769 unsigned regno = REGNO (info.get_avl ());
1770 unsigned expr_id;
1771 sbitmap_iterator sbi;
1772 EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[prev_insn->bb ()->index ()],
1773 0, expr_id, sbi)
1774 {
1775 if (get_regno (expr_id, last_basic_block_for_fn (cfun))
1776 != regno)
1777 continue;
1778 has_avl_out = true;
1779 if (!bitmap_bit_p (m_avl_def_in[next_insn->bb ()->index ()],
1780 expr_id))
1781 return false;
1782 }
1783 if (!has_avl_out)
1784 return false;
1785 }
12b23c71 1786
29331e72
LD
1787 for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn ();
1788 i = i->prev_nondebug_insn ())
1789 {
1790 // no def amd use of vl
1791 if (!ignore_vl && modify_or_use_vl_p (i, info))
1792 return false;
9243c3d1 1793
29331e72
LD
1794 // no def of avl
1795 if (modify_avl_p (i, info))
1796 return false;
1797 }
6b6b9c68 1798
29331e72
LD
1799 for (insn_info *i = prev_insn->bb ()->end_insn (); i != prev_insn;
1800 i = i->prev_nondebug_insn ())
1801 {
1802 // no def amd use of vl
1803 if (!ignore_vl && modify_or_use_vl_p (i, info))
1804 return false;
1805
1806 // no def of avl
1807 if (modify_avl_p (i, info))
1808 return false;
1809 }
1810 }
d875d756 1811 return true;
29331e72
LD
1812 }
1813
1814 bool sew_lmul_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1815 {
1816 gcc_assert (prev.valid_p () && next.valid_p ());
1817 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1818 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1819#define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1820 AVAILABLE_P, FUSE) \
1821 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1822 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1823 return COMPATIBLE_P (prev, next);
6b6b9c68 1824
29331e72 1825#include "riscv-vsetvl.def"
6b6b9c68 1826
29331e72
LD
1827 gcc_unreachable ();
1828 }
6b6b9c68 1829
29331e72
LD
1830 bool sew_lmul_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1831 {
1832 gcc_assert (prev.valid_p () && next.valid_p ());
1833 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1834 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1835#define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1836 AVAILABLE_P, FUSE) \
1837 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1838 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1839 return AVAILABLE_P (prev, next);
d875d756 1840
29331e72 1841#include "riscv-vsetvl.def"
4f673c5e 1842
29331e72
LD
1843 gcc_unreachable ();
1844 }
1845
1846 void merge_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
1847 {
1848 gcc_assert (prev.valid_p () && next.valid_p ());
1849 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1850 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1851#define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1852 AVAILABLE_P, FUSE) \
1853 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1854 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1855 { \
1856 gcc_assert (COMPATIBLE_P (prev, next)); \
1857 FUSE (prev, next); \
1858 prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \
1859 return; \
1860 }
9243c3d1 1861
29331e72 1862#include "riscv-vsetvl.def"
9243c3d1 1863
29331e72
LD
1864 gcc_unreachable ();
1865 }
9243c3d1 1866
29331e72
LD
1867 bool policy_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1868 {
1869 gcc_assert (prev.valid_p () && next.valid_p ());
1870 policy_demand_type prev_flags = prev.get_policy_demand ();
1871 policy_demand_type next_flags = next.get_policy_demand ();
1872#define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1873 AVAILABLE_P, FUSE) \
1874 if (prev_flags == policy_demand_type::PREV_FLAGS \
1875 && next_flags == policy_demand_type::NEXT_FLAGS) \
1876 return COMPATIBLE_P (prev, next);
9243c3d1 1877
29331e72 1878#include "riscv-vsetvl.def"
9243c3d1 1879
29331e72
LD
1880 gcc_unreachable ();
1881 }
4f673c5e 1882
29331e72
LD
1883 bool policy_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1884 {
1885 gcc_assert (prev.valid_p () && next.valid_p ());
1886 policy_demand_type prev_flags = prev.get_policy_demand ();
1887 policy_demand_type next_flags = next.get_policy_demand ();
1888#define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1889 AVAILABLE_P, FUSE) \
1890 if (prev_flags == policy_demand_type::PREV_FLAGS \
1891 && next_flags == policy_demand_type::NEXT_FLAGS) \
1892 return AVAILABLE_P (prev, next);
4f673c5e 1893
29331e72 1894#include "riscv-vsetvl.def"
9243c3d1 1895
29331e72
LD
1896 gcc_unreachable ();
1897 }
1898
1899 void merge_policy (vsetvl_info &prev, const vsetvl_info &next)
1900 {
1901 gcc_assert (prev.valid_p () && next.valid_p ());
1902 policy_demand_type prev_flags = prev.get_policy_demand ();
1903 policy_demand_type next_flags = next.get_policy_demand ();
1904#define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1905 AVAILABLE_P, FUSE) \
1906 if (prev_flags == policy_demand_type::PREV_FLAGS \
1907 && next_flags == policy_demand_type::NEXT_FLAGS) \
1908 { \
1909 gcc_assert (COMPATIBLE_P (prev, next)); \
1910 FUSE (prev, next); \
1911 prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \
1912 return; \
1913 }
9243c3d1 1914
29331e72 1915#include "riscv-vsetvl.def"
ec99ffab 1916
29331e72
LD
1917 gcc_unreachable ();
1918 }
9243c3d1 1919
d82bb518
JZ
1920 bool vl_not_in_conflict_p (const vsetvl_info &prev, const vsetvl_info &next)
1921 {
1922 /* We don't fuse this following case:
1923
1924 li a5, -1
1925 vmv.s.x v0, a5 -- PREV
1926 vsetvli a5, ... -- NEXT
1927
1928 Don't fuse NEXT into PREV.
1929 */
1930 return !prev.vl_modify_non_avl_op_p (next)
1931 && !next.vl_modify_non_avl_op_p (prev);
1932 }
1933
29331e72
LD
1934 bool avl_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1935 {
1936 gcc_assert (prev.valid_p () && next.valid_p ());
1937 avl_demand_type prev_flags = prev.get_avl_demand ();
1938 avl_demand_type next_flags = next.get_avl_demand ();
1939#define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1940 AVAILABLE_P, FUSE) \
1941 if (prev_flags == avl_demand_type::PREV_FLAGS \
1942 && next_flags == avl_demand_type::NEXT_FLAGS) \
1943 return COMPATIBLE_P (prev, next);
9243c3d1 1944
29331e72 1945#include "riscv-vsetvl.def"
9243c3d1 1946
29331e72
LD
1947 gcc_unreachable ();
1948 }
9243c3d1 1949
29331e72
LD
1950 bool avl_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1951 {
1952 gcc_assert (prev.valid_p () && next.valid_p ());
1953 avl_demand_type prev_flags = prev.get_avl_demand ();
1954 avl_demand_type next_flags = next.get_avl_demand ();
1955#define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1956 AVAILABLE_P, FUSE) \
1957 if (prev_flags == avl_demand_type::PREV_FLAGS \
1958 && next_flags == avl_demand_type::NEXT_FLAGS) \
1959 return AVAILABLE_P (prev, next);
9243c3d1 1960
29331e72 1961#include "riscv-vsetvl.def"
9243c3d1 1962
29331e72
LD
1963 gcc_unreachable ();
1964 }
1965
1966 void merge_avl (vsetvl_info &prev, const vsetvl_info &next)
1967 {
1968 gcc_assert (prev.valid_p () && next.valid_p ());
1969 avl_demand_type prev_flags = prev.get_avl_demand ();
1970 avl_demand_type next_flags = next.get_avl_demand ();
1971#define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1972 AVAILABLE_P, FUSE) \
1973 if (prev_flags == avl_demand_type::PREV_FLAGS \
1974 && next_flags == avl_demand_type::NEXT_FLAGS) \
1975 { \
1976 gcc_assert (COMPATIBLE_P (prev, next)); \
1977 FUSE (prev, next); \
1978 prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \
1979 return; \
60bd33bc
JZZ
1980 }
1981
29331e72 1982#include "riscv-vsetvl.def"
9243c3d1 1983
29331e72
LD
1984 gcc_unreachable ();
1985 }
1986
1987 bool compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1988 {
1989 bool compatible_p = sew_lmul_compatible_p (prev, next)
1990 && policy_compatible_p (prev, next)
d82bb518
JZ
1991 && avl_compatible_p (prev, next)
1992 && vl_not_in_conflict_p (prev, next);
29331e72
LD
1993 return compatible_p;
1994 }
1995
1996 bool available_p (const vsetvl_info &prev, const vsetvl_info &next)
1997 {
1998 bool available_p = sew_lmul_available_p (prev, next)
1999 && policy_available_p (prev, next)
d82bb518
JZ
2000 && avl_available_p (prev, next)
2001 && vl_not_in_conflict_p (prev, next);
29331e72
LD
2002 gcc_assert (!available_p || compatible_p (prev, next));
2003 return available_p;
2004 }
2005
2006 void merge (vsetvl_info &prev, const vsetvl_info &next)
2007 {
2008 gcc_assert (compatible_p (prev, next));
2009 merge_sew_lmul (prev, next);
2010 merge_policy (prev, next);
2011 merge_avl (prev, next);
2012 gcc_assert (available_p (prev, next));
2013 }
2014};
9243c3d1 2015
9243c3d1 2016
29331e72 2017class pre_vsetvl
9243c3d1 2018{
29331e72
LD
2019private:
2020 demand_system m_dem;
2021 auto_vec<vsetvl_block_info> m_vector_block_infos;
2022
2023 /* data for avl reaching defintion. */
2024 sbitmap m_avl_regs;
2025 sbitmap *m_avl_def_in;
2026 sbitmap *m_avl_def_out;
2027 sbitmap *m_reg_def_loc;
2028
2029 /* data for vsetvl info reaching defintion. */
2030 vsetvl_info m_unknow_info;
2031 auto_vec<vsetvl_info *> m_vsetvl_def_exprs;
2032 sbitmap *m_vsetvl_def_in;
2033 sbitmap *m_vsetvl_def_out;
2034
2035 /* data for lcm */
2036 auto_vec<vsetvl_info *> m_exprs;
2037 sbitmap *m_avloc;
2038 sbitmap *m_avin;
2039 sbitmap *m_avout;
2040 sbitmap *m_kill;
2041 sbitmap *m_antloc;
2042 sbitmap *m_transp;
2043 sbitmap *m_insert;
2044 sbitmap *m_del;
2045 struct edge_list *m_edges;
2046
2047 auto_vec<vsetvl_info> m_delete_list;
2048
2049 vsetvl_block_info &get_block_info (const bb_info *bb)
2050 {
2051 return m_vector_block_infos[bb->index ()];
2052 }
2053 const vsetvl_block_info &get_block_info (const basic_block bb) const
2054 {
2055 return m_vector_block_infos[bb->index];
2056 }
2057
2058 vsetvl_block_info &get_block_info (const basic_block bb)
2059 {
2060 return m_vector_block_infos[bb->index];
2061 }
2062
2063 void add_expr (auto_vec<vsetvl_info *> &m_exprs, vsetvl_info &info)
2064 {
2065 for (vsetvl_info *item : m_exprs)
2066 {
2067 if (*item == info)
2068 return;
2069 }
2070 m_exprs.safe_push (&info);
2071 }
2072
2073 unsigned get_expr_index (auto_vec<vsetvl_info *> &m_exprs,
2074 const vsetvl_info &info)
2075 {
2076 for (size_t i = 0; i < m_exprs.length (); i += 1)
2077 {
2078 if (*m_exprs[i] == info)
2079 return i;
2080 }
2081 gcc_unreachable ();
2082 }
2083
c9d5b46a 2084 bool anticipated_exp_p (const vsetvl_info &header_info)
29331e72
LD
2085 {
2086 if (!header_info.has_nonvlmax_reg_avl () && !header_info.has_vl ())
2087 return true;
9243c3d1 2088
29331e72
LD
2089 bb_info *bb = header_info.get_bb ();
2090 insn_info *prev_insn = bb->head_insn ();
2091 insn_info *next_insn = header_info.insn_inside_bb_p ()
2092 ? header_info.get_insn ()
2093 : header_info.get_bb ()->end_insn ();
2094
2095 return m_dem.avl_vl_unmodified_between_p (prev_insn, next_insn,
2096 header_info);
2097 }
2098
2099 bool available_exp_p (const vsetvl_info &prev_info,
2100 const vsetvl_info &next_info)
2101 {
2102 return m_dem.available_p (prev_info, next_info);
2103 }
2104
2105 void compute_probabilities ()
2106 {
2107 edge e;
2108 edge_iterator ei;
2109
2110 for (const bb_info *bb : crtl->ssa->bbs ())
2111 {
2112 basic_block cfg_bb = bb->cfg_bb ();
2113 auto &curr_prob = get_block_info (cfg_bb).probability;
2114
2115 /* GCC assume entry block (bb 0) are always so
2116 executed so set its probability as "always". */
2117 if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
2118 curr_prob = profile_probability::always ();
2119 /* Exit block (bb 1) is the block we don't need to process. */
2120 if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
2121 continue;
9243c3d1 2122
29331e72
LD
2123 gcc_assert (curr_prob.initialized_p ());
2124 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2125 {
2126 auto &new_prob = get_block_info (e->dest).probability;
2127 /* Normally, the edge probability should be initialized.
2128 However, some special testing code which is written in
2129 GIMPLE IR style force the edge probility uninitialized,
2130 we conservatively set it as never so that it will not
2131 affect PRE (Phase 3 && Phse 4). */
2132 if (!e->probability.initialized_p ())
2133 new_prob = profile_probability::never ();
2134 else if (!new_prob.initialized_p ())
2135 new_prob = curr_prob * e->probability;
2136 else if (new_prob == profile_probability::always ())
2137 continue;
2138 else
2139 new_prob += curr_prob * e->probability;
2140 }
2141 }
2142 }
2143
2144 void insert_vsetvl_insn (enum emit_type emit_type, const vsetvl_info &info)
2145 {
2146 rtx pat = info.get_vsetvl_pat ();
2147 rtx_insn *rinsn = info.get_insn ()->rtl ();
2148
2149 if (emit_type == EMIT_DIRECT)
2150 {
2151 emit_insn (pat);
2152 if (dump_file)
2153 {
2154 fprintf (dump_file, " Insert vsetvl insn %d:\n",
2155 INSN_UID (get_last_insn ()));
2156 print_rtl_single (dump_file, get_last_insn ());
2157 }
2158 }
2159 else if (emit_type == EMIT_BEFORE)
2160 {
2161 emit_insn_before (pat, rinsn);
2162 if (dump_file)
2163 {
2164 fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
2165 INSN_UID (rinsn));
2166 print_rtl_single (dump_file, PREV_INSN (rinsn));
2167 }
2168 }
2169 else
2170 {
2171 emit_insn_after (pat, rinsn);
2172 if (dump_file)
2173 {
2174 fprintf (dump_file, " Insert vsetvl insn after insn %d:\n",
2175 INSN_UID (rinsn));
2176 print_rtl_single (dump_file, NEXT_INSN (rinsn));
2177 }
2178 }
2179 }
2180
2181 void change_vsetvl_insn (const vsetvl_info &info)
2182 {
2183 rtx_insn *rinsn = info.get_insn ()->rtl ();
2184 rtx new_pat = info.get_vsetvl_pat ();
2185
2186 if (dump_file)
2187 {
2188 fprintf (dump_file, " Change insn %d from:\n", INSN_UID (rinsn));
2189 print_rtl_single (dump_file, rinsn);
2190 }
2191
2192 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
2193
2194 if (dump_file)
2195 {
2196 fprintf (dump_file, "\n to:\n");
2197 print_rtl_single (dump_file, rinsn);
2198 }
2199 }
2200
2201 void remove_vsetvl_insn (const vsetvl_info &info)
2202 {
2203 rtx_insn *rinsn = info.get_insn ()->rtl ();
2204 if (dump_file)
2205 {
2206 fprintf (dump_file, " Eliminate insn %d:\n", INSN_UID (rinsn));
2207 print_rtl_single (dump_file, rinsn);
2208 }
2209 if (in_sequence_p ())
2210 remove_insn (rinsn);
2211 else
2212 delete_insn (rinsn);
2213 }
2214
2215 bool successors_probability_equal_p (const basic_block cfg_bb) const
2216 {
2217 edge e;
2218 edge_iterator ei;
2219 profile_probability prob = profile_probability::uninitialized ();
2220 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2221 {
2222 if (prob == profile_probability::uninitialized ())
2223 prob = m_vector_block_infos[e->dest->index].probability;
2224 else if (prob == m_vector_block_infos[e->dest->index].probability)
2225 continue;
2226 else
2227 /* We pick the highest probability among those incompatible VSETVL
2228 infos. When all incompatible VSTEVL infos have same probability, we
2229 don't pick any of them. */
2230 return false;
2231 }
ec99ffab 2232 return true;
29331e72
LD
2233 }
2234
923a67f1 2235 bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
29331e72
LD
2236 {
2237 gcc_assert (
2238 !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
2239
2240 unsigned expr_index;
2241 sbitmap_iterator sbi;
2242 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[curr_info.get_bb ()->index ()], 0,
2243 expr_index, sbi)
2244 {
2245 const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
2246 if (!prev_info.valid_p ()
923a67f1
JZ
2247 || !m_dem.avl_available_p (prev_info, curr_info)
2248 || prev_info.get_ratio () != curr_info.get_ratio ())
29331e72
LD
2249 return false;
2250 }
005fad9d 2251
005fad9d 2252 return true;
29331e72 2253 }
005fad9d 2254
29331e72
LD
2255public:
2256 pre_vsetvl ()
2257 : m_avl_def_in (nullptr), m_avl_def_out (nullptr),
2258 m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr),
2259 m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr),
2260 m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr)
2261 {
2262 /* Initialization of RTL_SSA. */
2263 calculate_dominance_info (CDI_DOMINATORS);
2264 df_analyze ();
2265 crtl->ssa = new function_info (cfun);
2266 m_vector_block_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2267 compute_probabilities ();
2268 m_unknow_info.set_unknown ();
2269 }
2270
2271 void finish ()
2272 {
2273 free_dominance_info (CDI_DOMINATORS);
2274 if (crtl->ssa->perform_pending_updates ())
2275 cleanup_cfg (0);
2276 delete crtl->ssa;
2277 crtl->ssa = nullptr;
2278
2279 if (m_avl_regs)
2280 sbitmap_free (m_avl_regs);
2281 if (m_reg_def_loc)
2282 sbitmap_vector_free (m_reg_def_loc);
2283
2284 if (m_avl_def_in)
2285 sbitmap_vector_free (m_avl_def_in);
2286 if (m_avl_def_out)
2287 sbitmap_vector_free (m_avl_def_out);
2288
2289 if (m_vsetvl_def_in)
2290 sbitmap_vector_free (m_vsetvl_def_in);
2291 if (m_vsetvl_def_out)
2292 sbitmap_vector_free (m_vsetvl_def_out);
2293
2294 if (m_avloc)
2295 sbitmap_vector_free (m_avloc);
2296 if (m_kill)
2297 sbitmap_vector_free (m_kill);
2298 if (m_antloc)
2299 sbitmap_vector_free (m_antloc);
2300 if (m_transp)
2301 sbitmap_vector_free (m_transp);
2302 if (m_insert)
2303 sbitmap_vector_free (m_insert);
2304 if (m_del)
2305 sbitmap_vector_free (m_del);
2306 if (m_avin)
2307 sbitmap_vector_free (m_avin);
2308 if (m_avout)
2309 sbitmap_vector_free (m_avout);
2310
2311 if (m_edges)
2312 free_edge_list (m_edges);
2313 }
2314
2315 void compute_avl_def_data ();
2316 void compute_vsetvl_def_data ();
2317 void compute_lcm_local_properties ();
2318
2319 void fuse_local_vsetvl_info ();
2320 bool earliest_fuse_vsetvl_info ();
2321 void pre_global_vsetvl_info ();
2322 void emit_vsetvl ();
2323 void cleaup ();
2324 void remove_avl_operand ();
2325 void remove_unused_dest_operand ();
2326
2327 void dump (FILE *file, const char *title) const
2328 {
2329 fprintf (file, "\nVSETVL infos after %s\n\n", title);
2330 for (const bb_info *bb : crtl->ssa->bbs ())
2331 {
2332 const auto &block_info = m_vector_block_infos[bb->index ()];
2333 fprintf (file, " bb %d:\n", bb->index ());
2334 fprintf (file, " probability: ");
2335 block_info.probability.dump (file);
2336 fprintf (file, "\n");
2337 if (!block_info.empty_p ())
2338 {
2339 fprintf (file, " Header vsetvl info:");
2340 block_info.get_entry_info ().dump (file, " ");
2341 fprintf (file, " Footer vsetvl info:");
2342 block_info.get_exit_info ().dump (file, " ");
4fd09aed 2343 for (const auto &info : block_info.local_infos)
29331e72
LD
2344 {
2345 fprintf (file,
2346 " insn %d vsetvl info:", info.get_insn ()->uid ());
2347 info.dump (file, " ");
2348 }
2349 }
2350 }
2351 }
2352};
c139f5e1 2353
e030af3e 2354void
29331e72 2355pre_vsetvl::compute_avl_def_data ()
e030af3e 2356{
29331e72
LD
2357 if (bitmap_empty_p (m_avl_regs))
2358 return;
e030af3e 2359
29331e72
LD
2360 unsigned num_regs = GP_REG_LAST + 1;
2361 unsigned num_bbs = last_basic_block_for_fn (cfun);
9243c3d1 2362
29331e72
LD
2363 sbitmap *avl_def_loc_temp = sbitmap_vector_alloc (num_bbs, num_regs);
2364 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2365 {
29331e72
LD
2366 bitmap_and (avl_def_loc_temp[bb->index ()], m_avl_regs,
2367 m_reg_def_loc[bb->index ()]);
2368
2369 vsetvl_block_info &block_info = get_block_info (bb);
2370 if (block_info.has_info ())
9243c3d1 2371 {
29331e72
LD
2372 vsetvl_info &footer_info = block_info.get_exit_info ();
2373 gcc_assert (footer_info.valid_p ());
2374 if (footer_info.has_vl ())
2375 bitmap_set_bit (avl_def_loc_temp[bb->index ()],
2376 REGNO (footer_info.get_vl ()));
9243c3d1
JZZ
2377 }
2378 }
9243c3d1 2379
29331e72
LD
2380 if (m_avl_def_in)
2381 sbitmap_vector_free (m_avl_def_in);
2382 if (m_avl_def_out)
2383 sbitmap_vector_free (m_avl_def_out);
9243c3d1 2384
29331e72
LD
2385 unsigned num_exprs = num_bbs * num_regs;
2386 sbitmap *avl_def_loc = sbitmap_vector_alloc (num_bbs, num_exprs);
2387 sbitmap *m_kill = sbitmap_vector_alloc (num_bbs, num_exprs);
2388 m_avl_def_in = sbitmap_vector_alloc (num_bbs, num_exprs);
2389 m_avl_def_out = sbitmap_vector_alloc (num_bbs, num_exprs);
9243c3d1 2390
29331e72
LD
2391 bitmap_vector_clear (avl_def_loc, num_bbs);
2392 bitmap_vector_clear (m_kill, num_bbs);
2393 bitmap_vector_clear (m_avl_def_out, num_bbs);
2394
2395 unsigned regno;
2396 sbitmap_iterator sbi;
2397 for (const bb_info *bb : crtl->ssa->bbs ())
2398 EXECUTE_IF_SET_IN_BITMAP (avl_def_loc_temp[bb->index ()], 0, regno, sbi)
2399 {
2400 bitmap_set_bit (avl_def_loc[bb->index ()],
2401 get_expr_id (bb->index (), regno, num_bbs));
2402 bitmap_set_range (m_kill[bb->index ()], regno * num_bbs, num_bbs);
2403 }
2404
2405 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2406 EXECUTE_IF_SET_IN_BITMAP (m_avl_regs, 0, regno, sbi)
2407 bitmap_set_bit (m_avl_def_out[entry->index],
2408 get_expr_id (entry->index, regno, num_bbs));
2409
2410 compute_reaching_defintion (avl_def_loc, m_kill, m_avl_def_in, m_avl_def_out);
2411
2412 if (dump_file && (dump_flags & TDF_DETAILS))
9243c3d1 2413 {
29331e72
LD
2414 fprintf (dump_file,
2415 " Compute avl reaching defition data (num_bbs %d, num_regs "
2416 "%d):\n\n",
2417 num_bbs, num_regs);
2418 fprintf (dump_file, " avl_regs: ");
2419 dump_bitmap_file (dump_file, m_avl_regs);
2420 fprintf (dump_file, "\n bitmap data:\n");
2421 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2422 {
29331e72
LD
2423 unsigned int i = bb->index ();
2424 fprintf (dump_file, " BB %u:\n", i);
2425 fprintf (dump_file, " avl_def_loc:");
2426 unsigned expr_id;
2427 sbitmap_iterator sbi;
2428 EXECUTE_IF_SET_IN_BITMAP (avl_def_loc[i], 0, expr_id, sbi)
ec99ffab 2429 {
29331e72
LD
2430 fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
2431 get_bb_index (expr_id, num_bbs));
2432 }
2433 fprintf (dump_file, "\n kill:");
2434 EXECUTE_IF_SET_IN_BITMAP (m_kill[i], 0, expr_id, sbi)
2435 {
2436 fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
2437 get_bb_index (expr_id, num_bbs));
2438 }
2439 fprintf (dump_file, "\n avl_def_in:");
2440 EXECUTE_IF_SET_IN_BITMAP (m_avl_def_in[i], 0, expr_id, sbi)
2441 {
2442 fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
2443 get_bb_index (expr_id, num_bbs));
2444 }
2445 fprintf (dump_file, "\n avl_def_out:");
2446 EXECUTE_IF_SET_IN_BITMAP (m_avl_def_out[i], 0, expr_id, sbi)
2447 {
2448 fprintf (dump_file, " (r%u,bb%u)", get_regno (expr_id, num_bbs),
2449 get_bb_index (expr_id, num_bbs));
ec99ffab 2450 }
29331e72 2451 fprintf (dump_file, "\n");
9243c3d1
JZZ
2452 }
2453 }
2454
29331e72
LD
2455 sbitmap_vector_free (avl_def_loc);
2456 sbitmap_vector_free (m_kill);
2457 sbitmap_vector_free (avl_def_loc_temp);
9243c3d1 2458
29331e72 2459 m_dem.set_avl_in_out_data (m_avl_def_in, m_avl_def_out);
9243c3d1
JZZ
2460}
2461
9243c3d1 2462void
29331e72 2463pre_vsetvl::compute_vsetvl_def_data ()
9243c3d1 2464{
29331e72
LD
2465 m_vsetvl_def_exprs.truncate (0);
2466 add_expr (m_vsetvl_def_exprs, m_unknow_info);
2467 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2468 {
29331e72
LD
2469 vsetvl_block_info &block_info = get_block_info (bb);
2470 if (block_info.empty_p ())
2471 continue;
2472 vsetvl_info &footer_info = block_info.get_exit_info ();
2473 gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
2474 add_expr (m_vsetvl_def_exprs, footer_info);
9243c3d1
JZZ
2475 }
2476
29331e72
LD
2477 if (m_vsetvl_def_in)
2478 sbitmap_vector_free (m_vsetvl_def_in);
2479 if (m_vsetvl_def_out)
2480 sbitmap_vector_free (m_vsetvl_def_out);
9243c3d1 2481
29331e72
LD
2482 sbitmap *def_loc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2483 m_vsetvl_def_exprs.length ());
2484 sbitmap *m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2485 m_vsetvl_def_exprs.length ());
9243c3d1 2486
29331e72
LD
2487 m_vsetvl_def_in = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2488 m_vsetvl_def_exprs.length ());
2489 m_vsetvl_def_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2490 m_vsetvl_def_exprs.length ());
9243c3d1 2491
29331e72
LD
2492 bitmap_vector_clear (def_loc, last_basic_block_for_fn (cfun));
2493 bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun));
2494 bitmap_vector_clear (m_vsetvl_def_out, last_basic_block_for_fn (cfun));
9243c3d1 2495
29331e72
LD
2496 for (const bb_info *bb : crtl->ssa->bbs ())
2497 {
2498 vsetvl_block_info &block_info = get_block_info (bb);
2499 if (block_info.empty_p ())
9243c3d1 2500 {
29331e72 2501 for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i += 1)
9243c3d1 2502 {
29331e72
LD
2503 const vsetvl_info &info = *m_vsetvl_def_exprs[i];
2504 if (!info.has_nonvlmax_reg_avl ())
2505 continue;
2506 unsigned int regno;
2507 sbitmap_iterator sbi;
2508 EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0, regno,
2509 sbi)
2510 if (regno == REGNO (info.get_avl ()))
2511 {
2512 bitmap_set_bit (m_kill[bb->index ()], i);
2513 bitmap_set_bit (def_loc[bb->index ()],
2514 get_expr_index (m_vsetvl_def_exprs,
2515 m_unknow_info));
2516 }
9243c3d1 2517 }
29331e72 2518 continue;
9243c3d1
JZZ
2519 }
2520
29331e72
LD
2521 vsetvl_info &footer_info = block_info.get_exit_info ();
2522 bitmap_ones (m_kill[bb->index ()]);
2523 bitmap_set_bit (def_loc[bb->index ()],
2524 get_expr_index (m_vsetvl_def_exprs, footer_info));
9243c3d1
JZZ
2525 }
2526
29331e72
LD
2527 /* Set the def_out of the ENTRY basic block to m_unknow_info expr. */
2528 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2529 bitmap_set_bit (m_vsetvl_def_out[entry->index],
2530 get_expr_index (m_vsetvl_def_exprs, m_unknow_info));
9243c3d1 2531
29331e72
LD
2532 compute_reaching_defintion (def_loc, m_kill, m_vsetvl_def_in,
2533 m_vsetvl_def_out);
2534
2535 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e 2536 {
29331e72
LD
2537 fprintf (dump_file,
2538 "\n Compute vsetvl info reaching defition data:\n\n");
2539 fprintf (dump_file, " Expression List (%d):\n",
2540 m_vsetvl_def_exprs.length ());
2541 for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i++)
2542 {
2543 const auto &info = *m_vsetvl_def_exprs[i];
2544 fprintf (dump_file, " Expr[%u]: ", i);
2545 info.dump (dump_file, " ");
2546 }
2547 fprintf (dump_file, "\n bitmap data:\n");
2548 for (const bb_info *bb : crtl->ssa->bbs ())
2549 {
2550 unsigned int i = bb->index ();
2551 fprintf (dump_file, " BB %u:\n", i);
2552 fprintf (dump_file, " def_loc: ");
2553 dump_bitmap_file (dump_file, def_loc[i]);
2554 fprintf (dump_file, " kill: ");
2555 dump_bitmap_file (dump_file, m_kill[i]);
2556 fprintf (dump_file, " vsetvl_def_in: ");
2557 dump_bitmap_file (dump_file, m_vsetvl_def_in[i]);
2558 fprintf (dump_file, " vsetvl_def_out: ");
2559 dump_bitmap_file (dump_file, m_vsetvl_def_out[i]);
2560 }
e030af3e 2561 }
4f673c5e 2562
29331e72 2563 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2564 {
29331e72
LD
2565 vsetvl_block_info &block_info = get_block_info (bb);
2566 if (block_info.empty_p ())
2567 continue;
2568 vsetvl_info &curr_info = block_info.get_entry_info ();
2569 if (!curr_info.valid_p ())
2570 continue;
2571
2572 unsigned int expr_index;
2573 sbitmap_iterator sbi;
2574 gcc_assert (
2575 !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
2576 bool full_available = true;
2577 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[bb->index ()], 0, expr_index,
2578 sbi)
4f673c5e 2579 {
29331e72
LD
2580 vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
2581 if (!prev_info.valid_p ()
2582 || !m_dem.available_p (prev_info, curr_info))
2583 {
2584 full_available = false;
2585 break;
2586 }
4f673c5e 2587 }
29331e72 2588 block_info.full_available = full_available;
4f673c5e 2589 }
29331e72
LD
2590
2591 sbitmap_vector_free (def_loc);
2592 sbitmap_vector_free (m_kill);
e030af3e 2593}
9243c3d1 2594
e030af3e 2595/* Compute the local properties of each recorded expression.
6b6b9c68 2596
e030af3e
JZ
2597 Local properties are those that are defined by the block, irrespective of
2598 other blocks.
6b6b9c68 2599
e030af3e
JZ
2600 An expression is transparent in a block if its operands are not modified
2601 in the block.
6b6b9c68 2602
e030af3e
JZ
2603 An expression is computed (locally available) in a block if it is computed
2604 at least once and expression would contain the same value if the
2605 computation was moved to the end of the block.
2606
2607 An expression is locally anticipatable in a block if it is computed at
2608 least once and expression would contain the same value if the computation
2609 was moved to the beginning of the block. */
2610void
29331e72 2611pre_vsetvl::compute_lcm_local_properties ()
6b6b9c68 2612{
29331e72
LD
2613 m_exprs.truncate (0);
2614 for (const bb_info *bb : crtl->ssa->bbs ())
2615 {
2616 vsetvl_block_info &block_info = get_block_info (bb);
2617 if (block_info.empty_p ())
2618 continue;
2619 vsetvl_info &header_info = block_info.get_entry_info ();
2620 vsetvl_info &footer_info = block_info.get_exit_info ();
2621 gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
2622 add_expr (m_exprs, header_info);
2623 add_expr (m_exprs, footer_info);
2624 }
2625
2626 int num_exprs = m_exprs.length ();
2627 if (m_avloc)
2628 sbitmap_vector_free (m_avloc);
2629 if (m_kill)
2630 sbitmap_vector_free (m_kill);
2631 if (m_antloc)
2632 sbitmap_vector_free (m_antloc);
2633 if (m_transp)
2634 sbitmap_vector_free (m_transp);
2635 if (m_avin)
2636 sbitmap_vector_free (m_avin);
2637 if (m_avout)
2638 sbitmap_vector_free (m_avout);
2639
2640 m_avloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2641 m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2642 m_antloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2643 m_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2644 m_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2645 m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2646
2647 bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun));
2648 bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun));
2649 bitmap_vector_clear (m_transp, last_basic_block_for_fn (cfun));
2650
e030af3e
JZ
2651 /* - If T is locally available at the end of a block, then T' must be
2652 available at the end of the same block. Since some optimization has
2653 occurred earlier, T' might not be locally available, however, it must
2654 have been previously computed on all paths. As a formula, T at AVLOC(B)
2655 implies that T' at AVOUT(B).
2656 An "available occurrence" is one that is the last occurrence in the
2657 basic block and the operands are not modified by following statements in
2658 the basic block [including this insn].
6b6b9c68 2659
e030af3e
JZ
2660 - If T is locally anticipated at the beginning of a block, then either
2661 T', is locally anticipated or it is already available from previous
2662 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
2663 ANTLOC(B) at AVIN(B).
2664 An "anticipatable occurrence" is one that is the first occurrence in the
2665 basic block, the operands are not modified in the basic block prior
2666 to the occurrence and the output is not used between the start of
2667 the block and the occurrence. */
e030af3e 2668 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2669 {
29331e72
LD
2670 unsigned bb_index = bb->index ();
2671 vsetvl_block_info &block_info = get_block_info (bb);
9243c3d1 2672
29331e72
LD
2673 /* Compute m_transp */
2674 if (block_info.empty_p ())
9243c3d1 2675 {
29331e72
LD
2676 bitmap_ones (m_transp[bb_index]);
2677 for (int i = 0; i < num_exprs; i += 1)
4f673c5e 2678 {
29331e72
LD
2679 const vsetvl_info &info = *m_exprs[i];
2680 if (!info.has_nonvlmax_reg_avl () && !info.has_vl ())
2681 continue;
2682
7b2984ad 2683 if (info.has_nonvlmax_reg_avl ())
29331e72 2684 {
7b2984ad
JZ
2685 unsigned int regno;
2686 sbitmap_iterator sbi;
2687 EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0,
2688 regno, sbi)
2689 {
2690 if (regno == REGNO (info.get_avl ()))
2691 bitmap_clear_bit (m_transp[bb->index ()], i);
2692 }
29331e72
LD
2693 }
2694
c9d5b46a 2695 for (insn_info *insn : bb->real_nondebug_insns ())
e030af3e 2696 {
9c16ca93
JZ
2697 if (info.has_nonvlmax_reg_avl ()
2698 && find_access (insn->defs (), REGNO (info.get_avl ())))
e030af3e 2699 {
29331e72 2700 bitmap_clear_bit (m_transp[bb_index], i);
e030af3e
JZ
2701 break;
2702 }
c9d5b46a
JZ
2703
2704 if (info.has_vl ()
2705 && reg_mentioned_p (info.get_vl (), insn->rtl ()))
2706 {
2707 if (find_access (insn->defs (), REGNO (info.get_vl ())))
2708 /* We can't fuse vsetvl into the blocks that modify the
2709 VL operand since successors of such blocks will need
2710 the value of those blocks are defining.
2711
2712 bb 4: def a5
2713 / \
2714 bb 5:use a5 bb 6:vsetvl a5, 5
2715
2716 The example above shows that we can't fuse vsetvl
2717 from bb 6 into bb 4 since the successor bb 5 is using
2718 the value defined in bb 4. */
2719 ;
2720 else
2721 {
2722 /* We can't fuse vsetvl into the blocks that use the
2723 VL operand which has different value from the
2724 vsetvl info.
2725
2726 bb 4: def a5
2727 |
2728 bb 5: use a5
2729 |
2730 bb 6: def a5
2731 |
2732 bb 7: use a5
2733
2734 The example above shows that we can't fuse vsetvl
2735 from bb 6 into bb 5 since their value is different.
2736 */
2737 resource_info resource
2738 = full_register (REGNO (info.get_vl ()));
2739 def_lookup dl = crtl->ssa->find_def (resource, insn);
2740 def_info *def
2741 = dl.matching_set_or_last_def_of_prev_group ();
db642d60 2742 insn_info *def_insn = extract_single_source (def);
c9d5b46a
JZ
2743 if (def_insn && vsetvl_insn_p (def_insn->rtl ()))
2744 {
2745 vsetvl_info def_info = vsetvl_info (def_insn);
2746 if (m_dem.compatible_p (def_info, info))
2747 continue;
2748 }
2749 }
2750
2751 bitmap_clear_bit (m_transp[bb_index], i);
2752 break;
2753 }
e030af3e 2754 }
4f673c5e 2755 }
9243c3d1 2756
29331e72 2757 continue;
9243c3d1 2758 }
e030af3e 2759
29331e72
LD
2760 vsetvl_info &header_info = block_info.get_entry_info ();
2761 vsetvl_info &footer_info = block_info.get_exit_info ();
9243c3d1 2762
ef21ae5c 2763 if (header_info.valid_p () && anticipated_exp_p (header_info))
29331e72
LD
2764 bitmap_set_bit (m_antloc[bb_index],
2765 get_expr_index (m_exprs, header_info));
9243c3d1 2766
29331e72
LD
2767 if (footer_info.valid_p ())
2768 for (int i = 0; i < num_exprs; i += 1)
2769 {
2770 const vsetvl_info &info = *m_exprs[i];
2771 if (!info.valid_p ())
2772 continue;
2773 if (available_exp_p (footer_info, info))
2774 bitmap_set_bit (m_avloc[bb_index], i);
2775 }
2776 }
9243c3d1 2777
29331e72 2778 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2779 {
29331e72
LD
2780 unsigned bb_index = bb->index ();
2781 bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]);
2782 bitmap_not (m_kill[bb_index], m_kill[bb_index]);
9243c3d1
JZZ
2783 }
2784
29331e72 2785 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2786 {
29331e72 2787 unsigned bb_index = bb->index ();
9243c3d1
JZZ
2788 edge e;
2789 edge_iterator ei;
29331e72 2790 FOR_EACH_EDGE (e, ei, bb->cfg_bb ()->preds)
9243c3d1
JZZ
2791 if (e->flags & EDGE_COMPLEX)
2792 {
29331e72
LD
2793 bitmap_clear (m_antloc[bb_index]);
2794 bitmap_clear (m_transp[bb_index]);
9243c3d1
JZZ
2795 }
2796 }
2797}
2798
29331e72
LD
2799void
2800pre_vsetvl::fuse_local_vsetvl_info ()
e030af3e 2801{
29331e72
LD
2802 m_reg_def_loc
2803 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), GP_REG_LAST + 1);
2804 bitmap_vector_clear (m_reg_def_loc, last_basic_block_for_fn (cfun));
2805 bitmap_ones (m_reg_def_loc[ENTRY_BLOCK_PTR_FOR_FN (cfun)->index]);
2806
2807 for (bb_info *bb : crtl->ssa->bbs ())
e030af3e 2808 {
29331e72 2809 auto &block_info = get_block_info (bb);
4fd09aed 2810 block_info.bb = bb;
29331e72 2811 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e 2812 {
29331e72
LD
2813 fprintf (dump_file, " Try fuse basic block %d\n", bb->index ());
2814 }
2815 auto_vec<vsetvl_info> infos;
2816 for (insn_info *insn : bb->real_nondebug_insns ())
2817 {
2818 vsetvl_info curr_info = vsetvl_info (insn);
2819 if (curr_info.valid_p () || curr_info.unknown_p ())
2820 infos.safe_push (curr_info);
2821
2822 /* Collecting GP registers modified by the current bb. */
2823 if (insn->is_real ())
2824 for (def_info *def : insn->defs ())
2825 if (def->is_reg () && GP_REG_P (def->regno ()))
2826 bitmap_set_bit (m_reg_def_loc[bb->index ()], def->regno ());
2827 }
e030af3e 2828
29331e72
LD
2829 vsetvl_info prev_info = vsetvl_info ();
2830 prev_info.set_empty ();
2831 for (auto &curr_info : infos)
2832 {
2833 if (prev_info.empty_p ())
2834 prev_info = curr_info;
2835 else if ((curr_info.unknown_p () && prev_info.valid_p ())
2836 || (curr_info.valid_p () && prev_info.unknown_p ()))
2837 {
4fd09aed 2838 block_info.local_infos.safe_push (prev_info);
29331e72
LD
2839 prev_info = curr_info;
2840 }
2841 else if (curr_info.valid_p () && prev_info.valid_p ())
2842 {
2843 if (m_dem.available_p (prev_info, curr_info))
e7b585a4 2844 {
29331e72 2845 if (dump_file && (dump_flags & TDF_DETAILS))
e7b585a4 2846 {
29331e72
LD
2847 fprintf (dump_file,
2848 " Ignore curr info since prev info "
2849 "available with it:\n");
2850 fprintf (dump_file, " prev_info: ");
2851 prev_info.dump (dump_file, " ");
2852 fprintf (dump_file, " curr_info: ");
2853 curr_info.dump (dump_file, " ");
2854 fprintf (dump_file, "\n");
e7b585a4 2855 }
4cd4c34a 2856 if (!curr_info.vl_used_by_non_rvv_insn_p ()
29331e72
LD
2857 && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
2858 m_delete_list.safe_push (curr_info);
e030af3e 2859
29331e72
LD
2860 if (curr_info.get_read_vl_insn ())
2861 prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
e030af3e 2862 }
29331e72 2863 else if (m_dem.compatible_p (prev_info, curr_info))
e030af3e 2864 {
29331e72 2865 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e 2866 {
29331e72
LD
2867 fprintf (dump_file, " Fuse curr info since prev info "
2868 "compatible with it:\n");
2869 fprintf (dump_file, " prev_info: ");
2870 prev_info.dump (dump_file, " ");
2871 fprintf (dump_file, " curr_info: ");
2872 curr_info.dump (dump_file, " ");
e030af3e 2873 }
29331e72
LD
2874 m_dem.merge (prev_info, curr_info);
2875 if (curr_info.get_read_vl_insn ())
2876 prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
2877 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e 2878 {
29331e72
LD
2879 fprintf (dump_file, " prev_info after fused: ");
2880 prev_info.dump (dump_file, " ");
2881 fprintf (dump_file, "\n");
e030af3e 2882 }
e030af3e
JZ
2883 }
2884 else
2885 {
29331e72
LD
2886 if (dump_file && (dump_flags & TDF_DETAILS))
2887 {
2888 fprintf (dump_file,
2889 " Cannot fuse uncompatible infos:\n");
2890 fprintf (dump_file, " prev_info: ");
2891 prev_info.dump (dump_file, " ");
2892 fprintf (dump_file, " curr_info: ");
2893 curr_info.dump (dump_file, " ");
2894 }
4fd09aed 2895 block_info.local_infos.safe_push (prev_info);
29331e72 2896 prev_info = curr_info;
e030af3e
JZ
2897 }
2898 }
2899 }
29331e72
LD
2900
2901 if (prev_info.valid_p () || prev_info.unknown_p ())
4fd09aed 2902 block_info.local_infos.safe_push (prev_info);
e030af3e 2903 }
e030af3e 2904
29331e72
LD
2905 m_avl_regs = sbitmap_alloc (GP_REG_LAST + 1);
2906 bitmap_clear (m_avl_regs);
2907 for (const bb_info *bb : crtl->ssa->bbs ())
e030af3e 2908 {
29331e72
LD
2909 vsetvl_block_info &block_info = get_block_info (bb);
2910 if (block_info.empty_p ())
2911 continue;
2912
2913 vsetvl_info &header_info = block_info.get_entry_info ();
2914 if (header_info.valid_p () && header_info.has_nonvlmax_reg_avl ())
e030af3e 2915 {
29331e72
LD
2916 gcc_assert (GP_REG_P (REGNO (header_info.get_avl ())));
2917 bitmap_set_bit (m_avl_regs, REGNO (header_info.get_avl ()));
e030af3e 2918 }
e030af3e
JZ
2919 }
2920}
2921
29331e72 2922
9243c3d1 2923bool
29331e72 2924pre_vsetvl::earliest_fuse_vsetvl_info ()
9243c3d1 2925{
29331e72
LD
2926 compute_avl_def_data ();
2927 compute_vsetvl_def_data ();
2928 compute_lcm_local_properties ();
9243c3d1 2929
29331e72
LD
2930 unsigned num_exprs = m_exprs.length ();
2931 struct edge_list *m_edges = create_edge_list ();
2932 unsigned num_edges = NUM_EDGES (m_edges);
2933 sbitmap *antin
2934 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2935 sbitmap *antout
2936 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
005fad9d 2937
29331e72 2938 sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs);
9243c3d1 2939
29331e72
LD
2940 compute_available (m_avloc, m_kill, m_avout, m_avin);
2941 compute_antinout_edge (m_antloc, m_transp, antin, antout);
2942 compute_earliest (m_edges, num_exprs, antin, antout, m_avout, m_kill,
2943 earliest);
2944
2945 if (dump_file && (dump_flags & TDF_DETAILS))
9243c3d1 2946 {
29331e72
LD
2947 fprintf (dump_file, "\n Compute LCM earliest insert data:\n\n");
2948 fprintf (dump_file, " Expression List (%u):\n", num_exprs);
2949 for (unsigned i = 0; i < num_exprs; i++)
9243c3d1 2950 {
29331e72
LD
2951 const auto &info = *m_exprs[i];
2952 fprintf (dump_file, " Expr[%u]: ", i);
2953 info.dump (dump_file, " ");
9243c3d1 2954 }
29331e72
LD
2955 fprintf (dump_file, "\n bitmap data:\n");
2956 for (const bb_info *bb : crtl->ssa->bbs ())
2957 {
2958 unsigned int i = bb->index ();
2959 fprintf (dump_file, " BB %u:\n", i);
2960 fprintf (dump_file, " avloc: ");
2961 dump_bitmap_file (dump_file, m_avloc[i]);
2962 fprintf (dump_file, " kill: ");
2963 dump_bitmap_file (dump_file, m_kill[i]);
2964 fprintf (dump_file, " antloc: ");
2965 dump_bitmap_file (dump_file, m_antloc[i]);
2966 fprintf (dump_file, " transp: ");
2967 dump_bitmap_file (dump_file, m_transp[i]);
2968
2969 fprintf (dump_file, " avin: ");
2970 dump_bitmap_file (dump_file, m_avin[i]);
2971 fprintf (dump_file, " avout: ");
2972 dump_bitmap_file (dump_file, m_avout[i]);
2973 fprintf (dump_file, " antin: ");
2974 dump_bitmap_file (dump_file, antin[i]);
2975 fprintf (dump_file, " antout: ");
2976 dump_bitmap_file (dump_file, antout[i]);
2977 }
2978 fprintf (dump_file, "\n");
2979 fprintf (dump_file, " earliest:\n");
2980 for (unsigned ed = 0; ed < num_edges; ed++)
2981 {
2982 edge eg = INDEX_EDGE (m_edges, ed);
9243c3d1 2983
29331e72
LD
2984 if (bitmap_empty_p (earliest[ed]))
2985 continue;
2986 fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
2987 eg->dest->index);
2988 dump_bitmap_file (dump_file, earliest[ed]);
2989 }
2990 fprintf (dump_file, "\n");
2991 }
9243c3d1 2992
29331e72 2993 if (dump_file && (dump_flags & TDF_DETAILS))
9243c3d1 2994 {
29331e72
LD
2995 fprintf (dump_file, " Fused global info result:\n");
2996 }
9243c3d1 2997
29331e72
LD
2998 bool changed = false;
2999 for (unsigned ed = 0; ed < num_edges; ed++)
3000 {
3001 sbitmap e = earliest[ed];
3002 if (bitmap_empty_p (e))
9243c3d1
JZZ
3003 continue;
3004
29331e72
LD
3005 unsigned int expr_index;
3006 sbitmap_iterator sbi;
3007 EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi)
ec99ffab 3008 {
29331e72
LD
3009 vsetvl_info &curr_info = *m_exprs[expr_index];
3010 if (!curr_info.valid_p ())
3011 continue;
3012
3013 edge eg = INDEX_EDGE (m_edges, ed);
3014 if (eg->probability == profile_probability::never ())
3015 continue;
3016 if (eg->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
3017 || eg->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
3018 continue;
ff8f9544 3019
c9d5b46a
JZ
3020 /* When multiple set bits in earliest edge, such edge may
3021 have infinite loop in preds or succs or multiple conflict
3022 vsetvl expression which make such edge is unrelated. We
3023 don't perform fusion for such situation. */
3024 if (bitmap_count_bits (e) != 1)
3025 continue;
3026
29331e72
LD
3027 vsetvl_block_info &src_block_info = get_block_info (eg->src);
3028 vsetvl_block_info &dest_block_info = get_block_info (eg->dest);
ff8f9544 3029
29331e72
LD
3030 if (src_block_info.probability
3031 == profile_probability::uninitialized ())
ff8f9544 3032 continue;
9243c3d1 3033
29331e72 3034 if (src_block_info.empty_p ())
9243c3d1 3035 {
29331e72
LD
3036 vsetvl_info new_curr_info = curr_info;
3037 new_curr_info.set_bb (crtl->ssa->bb (eg->dest));
3038 bool has_compatible_p = false;
3039 unsigned int def_expr_index;
3040 sbitmap_iterator sbi2;
3041 EXECUTE_IF_SET_IN_BITMAP (
3042 m_vsetvl_def_in[new_curr_info.get_bb ()->index ()], 0,
3043 def_expr_index, sbi2)
9243c3d1 3044 {
29331e72
LD
3045 vsetvl_info &prev_info = *m_vsetvl_def_exprs[def_expr_index];
3046 if (!prev_info.valid_p ())
3047 continue;
3048 if (m_dem.compatible_p (prev_info, new_curr_info))
9243c3d1 3049 {
29331e72
LD
3050 has_compatible_p = true;
3051 break;
9243c3d1 3052 }
9243c3d1 3053 }
29331e72 3054 if (!has_compatible_p)
9243c3d1 3055 {
29331e72
LD
3056 if (dump_file && (dump_flags & TDF_DETAILS))
3057 {
3058 fprintf (dump_file,
3059 " Forbidden lift up vsetvl info into bb %u "
3060 "since there is no vsetvl info that reaching in "
3061 "is compatible with it:",
3062 eg->src->index);
3063 curr_info.dump (dump_file, " ");
3064 }
3065 continue;
9243c3d1
JZZ
3066 }
3067
29331e72 3068 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e
JZ
3069 {
3070 fprintf (dump_file,
29331e72
LD
3071 " Set empty bb %u to info:", eg->src->index);
3072 curr_info.dump (dump_file, " ");
e030af3e 3073 }
29331e72
LD
3074 src_block_info.set_info (curr_info);
3075 src_block_info.probability = dest_block_info.probability;
3076 changed = true;
9243c3d1 3077 }
29331e72
LD
3078 else if (src_block_info.has_info ())
3079 {
3080 vsetvl_info &prev_info = src_block_info.get_exit_info ();
3081 gcc_assert (prev_info.valid_p ());
3082
3083 if (m_dem.compatible_p (prev_info, curr_info))
3084 {
3085 if (dump_file && (dump_flags & TDF_DETAILS))
3086 {
3087 fprintf (dump_file, " Fuse curr info since prev info "
3088 "compatible with it:\n");
3089 fprintf (dump_file, " prev_info: ");
3090 prev_info.dump (dump_file, " ");
3091 fprintf (dump_file, " curr_info: ");
3092 curr_info.dump (dump_file, " ");
3093 }
3094 m_dem.merge (prev_info, curr_info);
3095 if (dump_file && (dump_flags & TDF_DETAILS))
3096 {
3097 fprintf (dump_file, " prev_info after fused: ");
3098 prev_info.dump (dump_file, " ");
3099 fprintf (dump_file, "\n");
3100 }
3101 changed = true;
3102 if (src_block_info.has_info ())
3103 src_block_info.probability += dest_block_info.probability;
3104 }
3105 else if (src_block_info.has_info ()
3106 && !m_dem.compatible_p (prev_info, curr_info))
3107 {
3108 /* Cancel lift up if probabilities are equal. */
3109 if (successors_probability_equal_p (eg->src))
3110 {
3111 if (dump_file && (dump_flags & TDF_DETAILS))
3112 {
3113 fprintf (dump_file,
3114 " Change empty bb %u to from:",
3115 eg->src->index);
3116 prev_info.dump (dump_file, " ");
3117 fprintf (dump_file,
3118 " to (higher probability):");
3119 curr_info.dump (dump_file, " ");
3120 }
3121 src_block_info.set_empty_info ();
3122 src_block_info.probability
3123 = profile_probability::uninitialized ();
3124 changed = true;
3125 }
3126 /* Choose the one with higher probability. */
3127 else if (dest_block_info.probability
3128 > src_block_info.probability)
3129 {
3130 if (dump_file && (dump_flags & TDF_DETAILS))
3131 {
3132 fprintf (dump_file,
3133 " Change empty bb %u to from:",
3134 eg->src->index);
3135 prev_info.dump (dump_file, " ");
3136 fprintf (dump_file,
3137 " to (higher probability):");
3138 curr_info.dump (dump_file, " ");
3139 }
3140 src_block_info.set_info (curr_info);
3141 src_block_info.probability = dest_block_info.probability;
3142 changed = true;
3143 }
3144 }
3145 }
3146 else
e030af3e 3147 {
29331e72
LD
3148 vsetvl_info &prev_info = src_block_info.get_exit_info ();
3149 if (!prev_info.valid_p ()
3150 || m_dem.available_p (prev_info, curr_info))
3151 continue;
3152
3153 if (m_dem.compatible_p (prev_info, curr_info))
3154 {
3155 if (dump_file && (dump_flags & TDF_DETAILS))
3156 {
3157 fprintf (dump_file, " Fuse curr info since prev info "
3158 "compatible with it:\n");
3159 fprintf (dump_file, " prev_info: ");
3160 prev_info.dump (dump_file, " ");
3161 fprintf (dump_file, " curr_info: ");
3162 curr_info.dump (dump_file, " ");
3163 }
3164 m_dem.merge (prev_info, curr_info);
3165 if (dump_file && (dump_flags & TDF_DETAILS))
3166 {
3167 fprintf (dump_file, " prev_info after fused: ");
3168 prev_info.dump (dump_file, " ");
3169 fprintf (dump_file, "\n");
3170 }
3171 changed = true;
3172 }
e030af3e 3173 }
9243c3d1
JZZ
3174 }
3175 }
3176
0d50facd 3177 if (dump_file && (dump_flags & TDF_DETAILS))
c919d059 3178 {
29331e72 3179 fprintf (dump_file, "\n");
c919d059 3180 }
c919d059 3181
29331e72
LD
3182 sbitmap_vector_free (antin);
3183 sbitmap_vector_free (antout);
3184 sbitmap_vector_free (earliest);
3185 free_edge_list (m_edges);
c919d059 3186
29331e72 3187 return changed;
c919d059
KC
3188}
3189
8421f279 3190void
29331e72 3191pre_vsetvl::pre_global_vsetvl_info ()
c919d059 3192{
29331e72
LD
3193 compute_avl_def_data ();
3194 compute_vsetvl_def_data ();
3195 compute_lcm_local_properties ();
c919d059 3196
29331e72
LD
3197 unsigned num_exprs = m_exprs.length ();
3198 m_edges = pre_edge_lcm_avs (num_exprs, m_transp, m_avloc, m_antloc, m_kill,
3199 m_avin, m_avout, &m_insert, &m_del);
3200 unsigned num_edges = NUM_EDGES (m_edges);
c919d059 3201
29331e72
LD
3202 if (dump_file && (dump_flags & TDF_DETAILS))
3203 {
3204 fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n");
3205 fprintf (dump_file, " Expression List (%u):\n", num_exprs);
3206 for (unsigned i = 0; i < num_exprs; i++)
c919d059 3207 {
29331e72
LD
3208 const auto &info = *m_exprs[i];
3209 fprintf (dump_file, " Expr[%u]: ", i);
3210 info.dump (dump_file, " ");
c919d059 3211 }
29331e72
LD
3212 fprintf (dump_file, "\n bitmap data:\n");
3213 for (const bb_info *bb : crtl->ssa->bbs ())
c919d059 3214 {
29331e72
LD
3215 unsigned i = bb->index ();
3216 fprintf (dump_file, " BB %u:\n", i);
3217 fprintf (dump_file, " avloc: ");
3218 dump_bitmap_file (dump_file, m_avloc[i]);
3219 fprintf (dump_file, " kill: ");
3220 dump_bitmap_file (dump_file, m_kill[i]);
3221 fprintf (dump_file, " antloc: ");
3222 dump_bitmap_file (dump_file, m_antloc[i]);
3223 fprintf (dump_file, " transp: ");
3224 dump_bitmap_file (dump_file, m_transp[i]);
3225
3226 fprintf (dump_file, " avin: ");
3227 dump_bitmap_file (dump_file, m_avin[i]);
3228 fprintf (dump_file, " avout: ");
3229 dump_bitmap_file (dump_file, m_avout[i]);
3230 fprintf (dump_file, " del: ");
3231 dump_bitmap_file (dump_file, m_del[i]);
c919d059 3232 }
29331e72
LD
3233 fprintf (dump_file, "\n");
3234 fprintf (dump_file, " insert:\n");
3235 for (unsigned ed = 0; ed < num_edges; ed++)
8421f279 3236 {
29331e72 3237 edge eg = INDEX_EDGE (m_edges, ed);
c919d059 3238
29331e72
LD
3239 if (bitmap_empty_p (m_insert[ed]))
3240 continue;
3241 fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
3242 eg->dest->index);
3243 dump_bitmap_file (dump_file, m_insert[ed]);
c919d059 3244 }
29331e72
LD
3245 }
3246
3247 /* Remove vsetvl infos as LCM suggest */
3248 for (const bb_info *bb : crtl->ssa->bbs ())
3249 {
3250 sbitmap d = m_del[bb->index ()];
3251 if (bitmap_count_bits (d) == 0)
c919d059 3252 continue;
29331e72
LD
3253 gcc_assert (bitmap_count_bits (d) == 1);
3254 unsigned expr_index = bitmap_first_set_bit (d);
3255 vsetvl_info &info = *m_exprs[expr_index];
3256 gcc_assert (info.valid_p ());
3257 gcc_assert (info.get_bb () == bb);
3258 const vsetvl_block_info &block_info = get_block_info (info.get_bb ());
3259 gcc_assert (block_info.get_entry_info () == info);
3260 info.set_delete ();
3261 }
c919d059 3262
ef21ae5c
JZ
3263 /* Remove vsetvl infos if all precessors are available to the block. */
3264 for (const bb_info *bb : crtl->ssa->bbs ())
3265 {
3266 vsetvl_block_info &block_info = get_block_info (bb);
3267 if (block_info.empty_p () || !block_info.full_available)
3268 continue;
3269
3270 vsetvl_info &info = block_info.get_entry_info ();
3271 info.set_delete ();
3272 }
3273
29331e72
LD
3274 for (const bb_info *bb : crtl->ssa->bbs ())
3275 {
3276 vsetvl_block_info &block_info = get_block_info (bb);
3277 if (block_info.empty_p ())
3278 continue;
3279 vsetvl_info &curr_info = block_info.get_entry_info ();
3280 if (curr_info.delete_p ())
c919d059 3281 {
4fd09aed 3282 if (block_info.local_infos.is_empty ())
29331e72 3283 continue;
4fd09aed 3284 curr_info = block_info.local_infos[0];
c919d059 3285 }
4cd4c34a 3286 if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
923a67f1 3287 && preds_all_same_avl_and_ratio_p (curr_info))
29331e72 3288 curr_info.set_change_vtype_only ();
c919d059 3289
29331e72
LD
3290 vsetvl_info prev_info = vsetvl_info ();
3291 prev_info.set_empty ();
4fd09aed 3292 for (auto &curr_info : block_info.local_infos)
c919d059 3293 {
29331e72 3294 if (prev_info.valid_p () && curr_info.valid_p ()
923a67f1
JZ
3295 && m_dem.avl_available_p (prev_info, curr_info)
3296 && prev_info.get_ratio () == curr_info.get_ratio ())
29331e72
LD
3297 curr_info.set_change_vtype_only ();
3298 prev_info = curr_info;
c919d059 3299 }
20c85207 3300 }
20c85207
JZ
3301}
3302
29331e72
LD
3303void
3304pre_vsetvl::emit_vsetvl ()
20c85207 3305{
29331e72 3306 bool need_commit = false;
20c85207 3307
29331e72 3308 for (const bb_info *bb : crtl->ssa->bbs ())
20c85207 3309 {
4fd09aed 3310 for (const auto &curr_info : get_block_info (bb).local_infos)
29331e72
LD
3311 {
3312 insn_info *insn = curr_info.get_insn ();
3313 if (curr_info.delete_p ())
3314 {
3315 if (vsetvl_insn_p (insn->rtl ()))
3316 remove_vsetvl_insn (curr_info);
3317 continue;
3318 }
3319 else if (curr_info.valid_p ())
3320 {
3321 if (vsetvl_insn_p (insn->rtl ()))
3322 {
3323 const vsetvl_info temp = vsetvl_info (insn);
3324 if (!(curr_info == temp))
3325 {
3326 if (dump_file)
3327 {
3328 fprintf (dump_file, "\n Change vsetvl info from: ");
3329 temp.dump (dump_file, " ");
3330 fprintf (dump_file, " to: ");
3331 curr_info.dump (dump_file, " ");
3332 }
3333 change_vsetvl_insn (curr_info);
3334 }
3335 }
3336 else
3337 {
3338 if (dump_file)
3339 {
3340 fprintf (dump_file,
3341 "\n Insert vsetvl info before insn %d: ",
3342 insn->uid ());
3343 curr_info.dump (dump_file, " ");
3344 }
3345 insert_vsetvl_insn (EMIT_BEFORE, curr_info);
3346 }
3347 }
3348 }
20c85207 3349 }
20c85207 3350
29331e72 3351 for (const vsetvl_info &item : m_delete_list)
20c85207 3352 {
29331e72
LD
3353 gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ()));
3354 remove_vsetvl_insn (item);
20c85207
JZ
3355 }
3356
d1189cee
JZ
3357 /* Insert vsetvl info that was not deleted after lift up. */
3358 for (const bb_info *bb : crtl->ssa->bbs ())
3359 {
3360 const vsetvl_block_info &block_info = get_block_info (bb);
3361 if (!block_info.has_info ())
3362 continue;
3363
3364 const vsetvl_info &footer_info = block_info.get_exit_info ();
3365
3366 if (footer_info.delete_p ())
3367 continue;
3368
3369 edge eg;
3370 edge_iterator eg_iterator;
3371 FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs)
3372 {
3373 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3374 if (dump_file)
3375 {
3376 fprintf (
3377 dump_file,
3378 "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ",
3379 eg->src->index, eg->dest->index);
3380 footer_info.dump (dump_file, " ");
3381 }
3382 start_sequence ();
3383 insert_vsetvl_insn (EMIT_DIRECT, footer_info);
3384 rtx_insn *rinsn = get_insns ();
3385 end_sequence ();
3386 default_rtl_profile ();
3387 insert_insn_on_edge (rinsn, eg);
3388 need_commit = true;
3389 }
3390 }
3391
29331e72
LD
3392 /* m_insert vsetvl as LCM suggest. */
3393 for (int ed = 0; ed < NUM_EDGES (m_edges); ed++)
20c85207 3394 {
29331e72
LD
3395 edge eg = INDEX_EDGE (m_edges, ed);
3396 sbitmap i = m_insert[ed];
3397 if (bitmap_count_bits (i) < 1)
3398 continue;
3399
3400 if (bitmap_count_bits (i) > 1)
3401 /* For code with infinite loop (e.g. pr61634.c), The data flow is
3402 completely wrong. */
3403 continue;
3404
3405 gcc_assert (bitmap_count_bits (i) == 1);
3406 unsigned expr_index = bitmap_first_set_bit (i);
3407 const vsetvl_info &info = *m_exprs[expr_index];
3408 gcc_assert (info.valid_p ());
3409 if (dump_file)
20c85207 3410 {
29331e72
LD
3411 fprintf (dump_file,
3412 "\n Insert vsetvl info at edge(bb %u -> bb %u): ",
3413 eg->src->index, eg->dest->index);
3414 info.dump (dump_file, " ");
20c85207 3415 }
29331e72
LD
3416 rtl_profile_for_edge (eg);
3417 start_sequence ();
3418
3419 insert_vsetvl_insn (EMIT_DIRECT, info);
3420 rtx_insn *rinsn = get_insns ();
3421 end_sequence ();
3422 default_rtl_profile ();
3423
3424 /* We should not get an abnormal edge here. */
3425 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3426 need_commit = true;
3427 insert_insn_on_edge (rinsn, eg);
20c85207
JZ
3428 }
3429
29331e72
LD
3430 if (need_commit)
3431 commit_edge_insertions ();
20c85207
JZ
3432}
3433
9243c3d1 3434void
29331e72 3435pre_vsetvl::cleaup ()
9243c3d1 3436{
29331e72
LD
3437 remove_avl_operand ();
3438 remove_unused_dest_operand ();
3439}
9243c3d1 3440
29331e72
LD
3441void
3442pre_vsetvl::remove_avl_operand ()
3443{
3444 basic_block cfg_bb;
3445 rtx_insn *rinsn;
3446 FOR_ALL_BB_FN (cfg_bb, cfun)
3447 FOR_BB_INSNS (cfg_bb, rinsn)
3448 if (NONDEBUG_INSN_P (rinsn) && has_vl_op (rinsn)
3449 && REG_P (get_vl (rinsn)))
3450 {
9243c3d1 3451 rtx avl = get_vl (rinsn);
a2d12abe 3452 if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
9243c3d1 3453 {
29331e72 3454 rtx new_pat;
60bd33bc 3455 if (fault_first_load_p (rinsn))
29331e72
LD
3456 new_pat
3457 = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
60bd33bc
JZZ
3458 else
3459 {
3460 rtx set = single_set (rinsn);
3461 rtx src
3462 = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
29331e72
LD
3463 new_pat = gen_rtx_SET (SET_DEST (set), src);
3464 }
3465 if (dump_file)
3466 {
3467 fprintf (dump_file, " Cleanup insn %u's avl operand:\n",
3468 INSN_UID (rinsn));
3469 print_rtl_single (dump_file, rinsn);
60bd33bc 3470 }
29331e72 3471 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
9243c3d1
JZZ
3472 }
3473 }
20c85207
JZ
3474}
3475
6b6b9c68 3476void
29331e72 3477pre_vsetvl::remove_unused_dest_operand ()
20c85207 3478{
6b6b9c68 3479 df_analyze ();
20c85207
JZ
3480 basic_block cfg_bb;
3481 rtx_insn *rinsn;
3482 FOR_ALL_BB_FN (cfg_bb, cfun)
29331e72
LD
3483 FOR_BB_INSNS (cfg_bb, rinsn)
3484 if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn))
6b6b9c68 3485 {
29331e72
LD
3486 rtx vl = get_vl (rinsn);
3487 vsetvl_info info = vsetvl_info (rinsn);
3488 if (has_no_uses (cfg_bb, rinsn, REGNO (vl)))
3489 if (!info.has_vlmax_avl ())
3490 {
3491 rtx new_pat = info.get_vsetvl_pat (true);
3492 if (dump_file)
3493 {
3494 fprintf (dump_file,
3495 " Remove vsetvl insn %u's dest(vl) operand since "
3496 "it unused:\n",
3497 INSN_UID (rinsn));
3498 print_rtl_single (dump_file, rinsn);
3499 }
3500 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat,
3501 false);
3502 }
6b6b9c68 3503 }
6b6b9c68
JZZ
3504}
3505
29331e72
LD
3506const pass_data pass_data_vsetvl = {
3507 RTL_PASS, /* type */
3508 "vsetvl", /* name */
3509 OPTGROUP_NONE, /* optinfo_flags */
3510 TV_NONE, /* tv_id */
3511 0, /* properties_required */
3512 0, /* properties_provided */
3513 0, /* properties_destroyed */
3514 0, /* todo_flags_start */
3515 0, /* todo_flags_finish */
3516};
9243c3d1 3517
29331e72
LD
3518class pass_vsetvl : public rtl_opt_pass
3519{
3520private:
3521 void simple_vsetvl ();
3522 void lazy_vsetvl ();
9243c3d1 3523
29331e72
LD
3524public:
3525 pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
9243c3d1 3526
29331e72
LD
3527 /* opt_pass methods: */
3528 virtual bool gate (function *) final override { return TARGET_VECTOR; }
3529 virtual unsigned int execute (function *) final override;
3530}; // class pass_vsetvl
9243c3d1 3531
acc10c79 3532void
29331e72 3533pass_vsetvl::simple_vsetvl ()
acc10c79 3534{
29331e72
LD
3535 if (dump_file)
3536 fprintf (dump_file, "\nEntering Simple VSETVL PASS\n");
acc10c79 3537
29331e72
LD
3538 basic_block cfg_bb;
3539 rtx_insn *rinsn;
3540 FOR_ALL_BB_FN (cfg_bb, cfun)
acc10c79 3541 {
29331e72 3542 FOR_BB_INSNS (cfg_bb, rinsn)
acc10c79 3543 {
29331e72 3544 if (!NONDEBUG_INSN_P (rinsn))
acc10c79 3545 continue;
29331e72
LD
3546 if (has_vtype_op (rinsn))
3547 {
3548 const auto &info = vsetvl_info (rinsn);
3549 rtx pat = info.get_vsetvl_pat ();
3550 emit_insn_before (pat, rinsn);
3551 if (dump_file)
3552 {
3553 fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
3554 INSN_UID (rinsn));
3555 print_rtl_single (dump_file, PREV_INSN (rinsn));
3556 }
3557 }
acc10c79
JZZ
3558 }
3559 }
acc10c79
JZZ
3560}
3561
9243c3d1
JZZ
3562/* Lazy vsetvl insertion for optimize > 0. */
3563void
29331e72 3564pass_vsetvl::lazy_vsetvl ()
9243c3d1
JZZ
3565{
3566 if (dump_file)
29331e72
LD
3567 fprintf (dump_file, "\nEntering Lazy VSETVL PASS\n\n");
3568
3569 pre_vsetvl pre = pre_vsetvl ();
9243c3d1 3570
9243c3d1 3571 if (dump_file)
29331e72
LD
3572 fprintf (dump_file, "\nPhase 1: Fuse local vsetvl infos.\n\n");
3573 pre.fuse_local_vsetvl_info ();
0d50facd 3574 if (dump_file && (dump_flags & TDF_DETAILS))
29331e72 3575 pre.dump (dump_file, "phase 1");
9243c3d1 3576
29331e72 3577 /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */
9243c3d1 3578 if (dump_file)
29331e72
LD
3579 fprintf (dump_file, "\nPhase 2: Lift up vsetvl info.\n\n");
3580 bool changed;
3581 int fused_count = 0;
3582 do
3583 {
3584 if (dump_file)
3585 fprintf (dump_file, " Try lift up %d.\n\n", fused_count);
3586 changed = pre.earliest_fuse_vsetvl_info ();
3587 fused_count += 1;
3588 } while (changed);
3589
0d50facd 3590 if (dump_file && (dump_flags & TDF_DETAILS))
29331e72 3591 pre.dump (dump_file, "phase 2");
9243c3d1 3592
29331e72 3593 /* Phase 3: Reducing redundant vsetvl infos using LCM. */
9243c3d1 3594 if (dump_file)
29331e72
LD
3595 fprintf (dump_file, "\nPhase 3: Reduce global vsetvl infos.\n\n");
3596 pre.pre_global_vsetvl_info ();
3597 if (dump_file && (dump_flags & TDF_DETAILS))
3598 pre.dump (dump_file, "phase 3");
9243c3d1 3599
29331e72 3600 /* Phase 4: Insert, modify and remove vsetvl insns. */
9243c3d1 3601 if (dump_file)
29331e72
LD
3602 fprintf (dump_file,
3603 "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n");
3604 pre.emit_vsetvl ();
9243c3d1 3605
29331e72 3606 /* Phase 5: Cleaup */
9243c3d1 3607 if (dump_file)
29331e72
LD
3608 fprintf (dump_file, "\nPhase 5: Cleaup\n\n");
3609 pre.cleaup ();
6b6b9c68 3610
29331e72 3611 pre.finish ();
9243c3d1
JZZ
3612}
3613
3614/* Main entry point for this pass. */
3615unsigned int
3616pass_vsetvl::execute (function *)
3617{
3618 if (n_basic_blocks_for_fn (cfun) <= 0)
3619 return 0;
3620
ca8fb009
JZZ
3621 /* The RVV instruction may change after split which is not a stable
3622 instruction. We need to split it here to avoid potential issue
3623 since the VSETVL PASS is insert before split PASS. */
3624 split_all_insns ();
9243c3d1
JZZ
3625
3626 /* Early return for there is no vector instructions. */
3627 if (!has_vector_insn (cfun))
3628 return 0;
3629
9243c3d1
JZZ
3630 if (!optimize)
3631 simple_vsetvl ();
3632 else
3633 lazy_vsetvl ();
3634
9243c3d1
JZZ
3635 return 0;
3636}
3637
3638rtl_opt_pass *
3639make_pass_vsetvl (gcc::context *ctxt)
3640{
3641 return new pass_vsetvl (ctxt);
3642}