]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/riscv/riscv-vsetvl.cc
libstdc++: Remove std::__is_pointer and std::__is_scalar [PR115497]
[thirdparty/gcc.git] / gcc / config / riscv / riscv-vsetvl.cc
CommitLineData
9243c3d1 1/* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
a945c346 2 Copyright (C) 2022-2024 Free Software Foundation, Inc.
9243c3d1
JZZ
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or(at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
29331e72
LD
21/* The values of the vl and vtype registers will affect the behavior of RVV
22 insns. That is, when we need to execute an RVV instruction, we need to set
23 the correct vl and vtype values by executing the vsetvl instruction before.
24 Executing the fewest number of vsetvl instructions while keeping the behavior
25 the same is the problem this pass is trying to solve. This vsetvl pass is
26 divided into 5 phases:
27
28 - Phase 1 (fuse local vsetvl infos): traverses each Basic Block, parses
29 each instruction in it that affects vl and vtype state and generates an
30 array of vsetvl_info objects. Then traverse the vsetvl_info array from
31 front to back and perform fusion according to the fusion rules. The fused
32 vsetvl infos are stored in the vsetvl_block_info object's `infos` field.
33
34 - Phase 2 (earliest fuse global vsetvl infos): The header_info and
35 footer_info of vsetvl_block_info are used as expressions, and the
36 earliest of each expression is computed. Based on the earliest
37 information, try to lift up the corresponding vsetvl info to the src
38 basic block of the edge (mainly to reduce the total number of vsetvl
39 instructions, this uplift will cause some execution paths to execute
40 vsetvl instructions that shouldn't be there).
41
42 - Phase 3 (pre global vsetvl info): The header_info and footer_info of
43 vsetvl_block_info are used as expressions, and the LCM algorithm is used
44 to compute the header_info that needs to be deleted and the one that
45 needs to be inserted in some edges.
46
47 - Phase 4 (emit vsetvl insns) : Based on the fusion result of Phase 1 and
48 the deletion and insertion information of Phase 3, the mandatory vsetvl
49 instruction insertion, modification and deletion are performed.
50
51 - Phase 5 (cleanup): Clean up the avl operand in the RVV operator
52 instruction and cleanup the unused dest operand of the vsetvl insn.
53
54 After the Phase 1 a virtual CFG of vsetvl_info is generated. The virtual
55 basic block is represented by vsetvl_block_info, and the virtual vsetvl
56 statements inside are represented by vsetvl_info. The later phases 2 and 3
57 are constantly modifying and adjusting this virtual CFG. Phase 4 performs
58 insertion, modification and deletion of vsetvl instructions based on the
59 optimized virtual CFG. The Phase 1, 2 and 3 do not involve modifications to
60 the RTL.
61*/
9243c3d1
JZZ
62
63#define IN_TARGET_CODE 1
64#define INCLUDE_ALGORITHM
65#define INCLUDE_FUNCTIONAL
66
67#include "config.h"
68#include "system.h"
69#include "coretypes.h"
70#include "tm.h"
71#include "backend.h"
72#include "rtl.h"
73#include "target.h"
74#include "tree-pass.h"
75#include "df.h"
76#include "rtl-ssa.h"
77#include "cfgcleanup.h"
78#include "insn-config.h"
79#include "insn-attr.h"
80#include "insn-opinit.h"
81#include "tm-constrs.h"
82#include "cfgrtl.h"
83#include "cfganal.h"
84#include "lcm.h"
85#include "predict.h"
86#include "profile-count.h"
a3ad2301 87#include "gcse.h"
4a0a8dc1 88#include "cfgloop.h"
9243c3d1
JZZ
89
90using namespace rtl_ssa;
91using namespace riscv_vector;
92
29331e72
LD
93/* Set the bitmap DST to the union of SRC of predecessors of
94 basic block B.
95 It's a bit different from bitmap_union_of_preds in cfganal.cc. This function
96 takes into account the case where pred is ENTRY basic block. The main reason
97 for this difference is to make it easier to insert some special value into
d83070ae 98 the ENTRY base block. For example, vsetvl_info with a status of UNKNOWN. */
29331e72
LD
99static void
100bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b)
101{
102 unsigned int set_size = dst->size;
103 edge e;
104 unsigned ix;
105
106 for (ix = 0; ix < EDGE_COUNT (b->preds); ix++)
107 {
108 e = EDGE_PRED (b, ix);
109 bitmap_copy (dst, src[e->src->index]);
110 break;
111 }
ec99ffab 112
29331e72
LD
113 if (ix == EDGE_COUNT (b->preds))
114 bitmap_clear (dst);
115 else
116 for (ix++; ix < EDGE_COUNT (b->preds); ix++)
117 {
118 unsigned int i;
119 SBITMAP_ELT_TYPE *p, *r;
120
121 e = EDGE_PRED (b, ix);
122 p = src[e->src->index]->elms;
123 r = dst->elms;
124 for (i = 0; i < set_size; i++)
125 *r++ |= *p++;
126 }
127}
128
d83070ae
KC
129/* Compute the reaching definition in and out based on the gen and KILL
130 information's in each Base Blocks.
131 This function references the compute_available implementation in lcm.cc */
29331e72
LD
132static void
133compute_reaching_defintion (sbitmap *gen, sbitmap *kill, sbitmap *in,
134 sbitmap *out)
9243c3d1 135{
29331e72
LD
136 edge e;
137 basic_block *worklist, *qin, *qout, *qend, bb;
138 unsigned int qlen;
139 edge_iterator ei;
140
141 /* Allocate a worklist array/queue. Entries are only added to the
142 list if they were not already on the list. So the size is
143 bounded by the number of basic blocks. */
144 qin = qout = worklist
145 = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
146
147 /* Put every block on the worklist; this is necessary because of the
148 optimistic initialization of AVOUT above. Use reverse postorder
149 to make the forward dataflow problem require less iterations. */
150 int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
151 int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false);
152 for (int i = 0; i < n; ++i)
153 {
154 bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]);
155 *qin++ = bb;
156 bb->aux = bb;
157 }
158 free (rpo);
159
160 qin = worklist;
161 qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS];
162 qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
163
164 /* Mark blocks which are successors of the entry block so that we
165 can easily identify them below. */
166 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
167 e->dest->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun);
168
169 /* Iterate until the worklist is empty. */
170 while (qlen)
171 {
172 /* Take the first entry off the worklist. */
173 bb = *qout++;
174 qlen--;
175
176 if (qout >= qend)
177 qout = worklist;
178
179 /* Do not clear the aux field for blocks which are successors of the
180 ENTRY block. That way we never add then to the worklist again. */
181 if (bb->aux != ENTRY_BLOCK_PTR_FOR_FN (cfun))
182 bb->aux = NULL;
183
184 bitmap_union_of_preds_with_entry (in[bb->index], out, bb);
185
186 if (bitmap_ior_and_compl (out[bb->index], gen[bb->index], in[bb->index],
187 kill[bb->index]))
188 /* If the out state of this block changed, then we need
189 to add the successors of this block to the worklist
190 if they are not already on the worklist. */
191 FOR_EACH_EDGE (e, ei, bb->succs)
192 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
193 {
194 *qin++ = e->dest;
195 e->dest->aux = e;
196 qlen++;
197
198 if (qin >= qend)
199 qin = worklist;
200 }
201 }
202
203 clear_aux_for_edges ();
204 clear_aux_for_blocks ();
205 free (worklist);
9243c3d1
JZZ
206}
207
29331e72
LD
208/* Classification of vsetvl instruction. */
209enum vsetvl_type
9243c3d1 210{
29331e72
LD
211 VSETVL_NORMAL,
212 VSETVL_VTYPE_CHANGE_ONLY,
213 VSETVL_DISCARD_RESULT,
214 NUM_VSETVL_TYPE
215};
9243c3d1 216
29331e72 217enum emit_type
9243c3d1 218{
29331e72
LD
219 /* emit_insn directly. */
220 EMIT_DIRECT,
221 EMIT_BEFORE,
222 EMIT_AFTER,
223};
224
225/* dump helper functions */
226static const char *
227vlmul_to_str (vlmul_type vlmul)
228{
229 switch (vlmul)
230 {
231 case LMUL_1:
232 return "m1";
233 case LMUL_2:
234 return "m2";
235 case LMUL_4:
236 return "m4";
237 case LMUL_8:
238 return "m8";
239 case LMUL_RESERVED:
240 return "INVALID LMUL";
241 case LMUL_F8:
242 return "mf8";
243 case LMUL_F4:
244 return "mf4";
245 case LMUL_F2:
246 return "mf2";
247
248 default:
249 gcc_unreachable ();
250 }
9243c3d1
JZZ
251}
252
29331e72
LD
253static const char *
254policy_to_str (bool agnostic_p)
9243c3d1 255{
29331e72 256 return agnostic_p ? "agnostic" : "undisturbed";
9243c3d1
JZZ
257}
258
9243c3d1
JZZ
259/* Return true if it is an RVV instruction depends on VTYPE global
260 status register. */
261static bool
262has_vtype_op (rtx_insn *rinsn)
263{
264 return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
265}
266
ec99ffab
JZZ
267/* Return true if the instruction ignores VLMUL field of VTYPE. */
268static bool
269ignore_vlmul_insn_p (rtx_insn *rinsn)
270{
271 return get_attr_type (rinsn) == TYPE_VIMOVVX
272 || get_attr_type (rinsn) == TYPE_VFMOVVF
273 || get_attr_type (rinsn) == TYPE_VIMOVXV
274 || get_attr_type (rinsn) == TYPE_VFMOVFV;
275}
276
277/* Return true if the instruction is scalar move instruction. */
278static bool
279scalar_move_insn_p (rtx_insn *rinsn)
280{
281 return get_attr_type (rinsn) == TYPE_VIMOVXV
282 || get_attr_type (rinsn) == TYPE_VFMOVFV;
283}
284
60bd33bc
JZZ
285/* Return true if the instruction is fault first load instruction. */
286static bool
287fault_first_load_p (rtx_insn *rinsn)
288{
6313b045
JZZ
289 return recog_memoized (rinsn) >= 0
290 && (get_attr_type (rinsn) == TYPE_VLDFF
291 || get_attr_type (rinsn) == TYPE_VLSEGDFF);
60bd33bc
JZZ
292}
293
294/* Return true if the instruction is read vl instruction. */
295static bool
296read_vl_insn_p (rtx_insn *rinsn)
297{
298 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL;
299}
300
9243c3d1
JZZ
301/* Return true if it is a vsetvl instruction. */
302static bool
303vector_config_insn_p (rtx_insn *rinsn)
304{
305 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
306}
307
308/* Return true if it is vsetvldi or vsetvlsi. */
309static bool
310vsetvl_insn_p (rtx_insn *rinsn)
311{
29331e72 312 if (!rinsn || !vector_config_insn_p (rinsn))
6b6b9c68 313 return false;
85112fbb 314 return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
6b6b9c68
JZZ
315 || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
316}
317
22622a5a
JZ
318/* Return true if it is the bogus vsetvl_pre instruction:
319
320 (define_insn "@vlmax_avl<mode>"
321 [(set (match_operand:P 0 "register_operand" "=r")
322 (unspec:P [(match_operand:P 1 "const_int_operand" "i")] UNSPEC_VLMAX))]
323 "TARGET_VECTOR"
324 ""
325 [(set_attr "type" "vsetvl_pre")])
326
327 As described above, it's the bogus instruction which doesn't any assembler
328 and should be removed eventually. It's used for occupying a scalar register
329 for VLMAX avl RVV instruction before register allocation.
330
331 Before RA:
332
333 ...
334 vsetvl_pre (set r136)
335 vadd.vv (use r136 with VLMAX avl)
336 ...
337
338 After RA:
339
340 ...
341 vsetvl_pre (set a5)
342 vadd.vv (use r136 with VLMAX avl)
343 ...
344
345 VSETVL PASS:
346
347 ...
348 vsetvl_pre (set a5) ---> removed.
349 vsetvl a5,zero,... ---> Inserted.
350 vadd.vv
351 ...
352*/
353static bool
354vsetvl_pre_insn_p (rtx_insn *rinsn)
355{
356 return recog_memoized (rinsn) >= 0
357 && get_attr_type (rinsn) == TYPE_VSETVL_PRE;
358}
359
6b6b9c68
JZZ
360/* Return true if it is vsetvl zero, rs1. */
361static bool
362vsetvl_discard_result_insn_p (rtx_insn *rinsn)
363{
364 if (!vector_config_insn_p (rinsn))
365 return false;
366 return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi
367 || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
9243c3d1
JZZ
368}
369
9243c3d1 370static bool
4f673c5e 371real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
9243c3d1 372{
4f673c5e 373 return insn != nullptr && insn->is_real () && insn->bb () == bb;
9243c3d1
JZZ
374}
375
29331e72 376/* Helper function to get VL operand for VLMAX insn. */
6b6b9c68
JZZ
377static rtx
378get_vl (rtx_insn *rinsn)
379{
380 if (has_vl_op (rinsn))
381 {
382 extract_insn_cached (rinsn);
383 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
384 }
385 return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
4f673c5e
JZZ
386}
387
6b6b9c68
JZZ
388/* Helper function to get AVL operand. */
389static rtx
390get_avl (rtx_insn *rinsn)
391{
392 if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
393 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
394
395 if (!has_vl_op (rinsn))
396 return NULL_RTX;
5e714992 397 if (vlmax_avl_type_p (rinsn))
6b6b9c68
JZZ
398 return RVV_VLMAX;
399 extract_insn_cached (rinsn);
400 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
401}
402
9243c3d1
JZZ
403/* Get default mask policy. */
404static bool
405get_default_ma ()
406{
407 /* For the instruction that doesn't require MA, we still need a default value
408 to emit vsetvl. We pick up the default value according to prefer policy. */
409 return (bool) (get_prefer_mask_policy () & 0x1
410 || (get_prefer_mask_policy () >> 1 & 0x1));
411}
412
9243c3d1
JZZ
413/* Helper function to get MA operand. */
414static bool
415mask_agnostic_p (rtx_insn *rinsn)
416{
417 /* If it doesn't have MA, we return agnostic by default. */
418 extract_insn_cached (rinsn);
419 int ma = get_attr_ma (rinsn);
420 return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
421}
422
423/* Return true if FN has a vector instruction that use VL/VTYPE. */
424static bool
425has_vector_insn (function *fn)
426{
427 basic_block cfg_bb;
428 rtx_insn *rinsn;
429 FOR_ALL_BB_FN (cfg_bb, fn)
430 FOR_BB_INSNS (cfg_bb, rinsn)
431 if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
432 return true;
433 return false;
434}
435
29331e72
LD
436static vlmul_type
437calculate_vlmul (unsigned int sew, unsigned int ratio)
9243c3d1 438{
29331e72
LD
439 const vlmul_type ALL_LMUL[]
440 = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
441 for (const vlmul_type vlmul : ALL_LMUL)
442 if (calculate_ratio (sew, vlmul) == ratio)
443 return vlmul;
444 return LMUL_RESERVED;
9243c3d1
JZZ
445}
446
29331e72
LD
447/* Get the currently supported maximum sew used in the int rvv instructions. */
448static uint8_t
449get_max_int_sew ()
9243c3d1 450{
29331e72
LD
451 if (TARGET_VECTOR_ELEN_64)
452 return 64;
453 else if (TARGET_VECTOR_ELEN_32)
454 return 32;
455 gcc_unreachable ();
9243c3d1
JZZ
456}
457
29331e72
LD
458/* Get the currently supported maximum sew used in the float rvv instructions.
459 */
460static uint8_t
461get_max_float_sew ()
462{
463 if (TARGET_VECTOR_ELEN_FP_64)
464 return 64;
465 else if (TARGET_VECTOR_ELEN_FP_32)
466 return 32;
467 else if (TARGET_VECTOR_ELEN_FP_16)
468 return 16;
469 gcc_unreachable ();
9243c3d1
JZZ
470}
471
29331e72 472enum def_type
9243c3d1 473{
29331e72
LD
474 REAL_SET = 1 << 0,
475 PHI_SET = 1 << 1,
476 BB_HEAD_SET = 1 << 2,
477 BB_END_SET = 1 << 3,
478 /* ??? TODO: In RTL_SSA framework, we have REAL_SET,
479 PHI_SET, BB_HEAD_SET, BB_END_SET and
480 CLOBBER_DEF def_info types. Currently,
481 we conservatively do not optimize clobber
482 def since we don't see the case that we
483 need to optimize it. */
484 CLOBBER_DEF = 1 << 4
485};
9243c3d1 486
29331e72
LD
487static bool
488insn_should_be_added_p (const insn_info *insn, unsigned int types)
da93c41c 489{
29331e72
LD
490 if (insn->is_real () && (types & REAL_SET))
491 return true;
492 if (insn->is_phi () && (types & PHI_SET))
493 return true;
494 if (insn->is_bb_head () && (types & BB_HEAD_SET))
495 return true;
496 if (insn->is_bb_end () && (types & BB_END_SET))
497 return true;
498 return false;
da93c41c
JZ
499}
500
29331e72
LD
501static const hash_set<use_info *>
502get_all_real_uses (insn_info *insn, unsigned regno)
9243c3d1 503{
29331e72 504 gcc_assert (insn->is_real ());
9243c3d1 505
29331e72
LD
506 hash_set<use_info *> uses;
507 auto_vec<phi_info *> work_list;
508 hash_set<phi_info *> visited_list;
9243c3d1 509
29331e72 510 for (def_info *def : insn->defs ())
9243c3d1 511 {
29331e72
LD
512 if (!def->is_reg () || def->regno () != regno)
513 continue;
514 set_info *set = safe_dyn_cast<set_info *> (def);
515 if (!set)
516 continue;
517 for (use_info *use : set->nondebug_insn_uses ())
518 if (use->insn ()->is_real ())
519 uses.add (use);
520 for (use_info *use : set->phi_uses ())
521 work_list.safe_push (use->phi ());
9243c3d1 522 }
9243c3d1 523
29331e72 524 while (!work_list.is_empty ())
60bd33bc 525 {
29331e72
LD
526 phi_info *phi = work_list.pop ();
527 visited_list.add (phi);
60bd33bc 528
29331e72
LD
529 for (use_info *use : phi->nondebug_insn_uses ())
530 if (use->insn ()->is_real ())
531 uses.add (use);
532 for (use_info *use : phi->phi_uses ())
533 if (!visited_list.contains (use->phi ()))
534 work_list.safe_push (use->phi ());
60bd33bc 535 }
29331e72 536 return uses;
60bd33bc
JZZ
537}
538
29331e72
LD
539/* Recursively find all define instructions. The kind of instruction is
540 specified by the DEF_TYPE. */
541static hash_set<set_info *>
542get_all_sets (phi_info *phi, unsigned int types)
9243c3d1 543{
29331e72
LD
544 hash_set<set_info *> insns;
545 auto_vec<phi_info *> work_list;
546 hash_set<phi_info *> visited_list;
547 if (!phi)
548 return hash_set<set_info *> ();
549 work_list.safe_push (phi);
9243c3d1 550
29331e72 551 while (!work_list.is_empty ())
9243c3d1 552 {
29331e72
LD
553 phi_info *phi = work_list.pop ();
554 visited_list.add (phi);
555 for (use_info *use : phi->inputs ())
556 {
557 def_info *def = use->def ();
558 set_info *set = safe_dyn_cast<set_info *> (def);
559 if (!set)
560 return hash_set<set_info *> ();
a1e42094 561
29331e72 562 gcc_assert (!set->insn ()->is_debug_insn ());
9243c3d1 563
29331e72
LD
564 if (insn_should_be_added_p (set->insn (), types))
565 insns.add (set);
566 if (set->insn ()->is_phi ())
567 {
568 phi_info *new_phi = as_a<phi_info *> (set);
569 if (!visited_list.contains (new_phi))
570 work_list.safe_push (new_phi);
571 }
572 }
9243c3d1 573 }
29331e72 574 return insns;
9243c3d1
JZZ
575}
576
29331e72
LD
577static hash_set<set_info *>
578get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
579 bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
aef20243 580{
29331e72
LD
581 if (real_p && phi_p && param_p)
582 return get_all_sets (safe_dyn_cast<phi_info *> (set),
583 REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
aef20243 584
29331e72
LD
585 else if (real_p && param_p)
586 return get_all_sets (safe_dyn_cast<phi_info *> (set),
587 REAL_SET | BB_HEAD_SET | BB_END_SET);
588
589 else if (real_p)
590 return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
591 return hash_set<set_info *> ();
69f39144
JZ
592}
593
4f673c5e 594static bool
6b6b9c68 595source_equal_p (insn_info *insn1, insn_info *insn2)
4f673c5e 596{
6b6b9c68
JZZ
597 if (!insn1 || !insn2)
598 return false;
599 rtx_insn *rinsn1 = insn1->rtl ();
600 rtx_insn *rinsn2 = insn2->rtl ();
4f673c5e
JZZ
601 if (!rinsn1 || !rinsn2)
602 return false;
29331e72 603
4f673c5e
JZZ
604 rtx note1 = find_reg_equal_equiv_note (rinsn1);
605 rtx note2 = find_reg_equal_equiv_note (rinsn2);
2020bce3
RD
606 /* We could handle the case of similar-looking REG_EQUALs as well but
607 would need to verify that no insn in between modifies any of the source
608 operands. */
609 if (note1 && note2 && rtx_equal_p (note1, note2)
610 && REG_NOTE_KIND (note1) == REG_EQUIV)
4f673c5e 611 return true;
29331e72 612 return false;
4f673c5e
JZZ
613}
614
6b6b9c68 615static insn_info *
4f673c5e
JZZ
616extract_single_source (set_info *set)
617{
618 if (!set)
619 return nullptr;
620 if (set->insn ()->is_real ())
6b6b9c68 621 return set->insn ();
4f673c5e
JZZ
622 if (!set->insn ()->is_phi ())
623 return nullptr;
6b6b9c68 624 hash_set<set_info *> sets = get_all_sets (set, true, false, true);
330bb064
JZ
625 if (sets.is_empty ())
626 return nullptr;
4f673c5e 627
6b6b9c68 628 insn_info *first_insn = (*sets.begin ())->insn ();
4f673c5e
JZZ
629 if (first_insn->is_artificial ())
630 return nullptr;
6b6b9c68 631 for (const set_info *set : sets)
4f673c5e
JZZ
632 {
633 /* If there is a head or end insn, we conservative return
634 NULL so that VSETVL PASS will insert vsetvl directly. */
6b6b9c68 635 if (set->insn ()->is_artificial ())
4f673c5e 636 return nullptr;
29331e72 637 if (set != *sets.begin () && !source_equal_p (set->insn (), first_insn))
4f673c5e
JZZ
638 return nullptr;
639 }
640
6b6b9c68 641 return first_insn;
4f673c5e
JZZ
642}
643
29331e72
LD
644static bool
645same_equiv_note_p (set_info *set1, set_info *set2)
ec99ffab 646{
29331e72
LD
647 insn_info *insn1 = extract_single_source (set1);
648 insn_info *insn2 = extract_single_source (set2);
649 if (!insn1 || !insn2)
650 return false;
651 return source_equal_p (insn1, insn2);
ec99ffab
JZZ
652}
653
29331e72 654/* Return true if the SET result is not used by any instructions. */
ec99ffab 655static bool
29331e72 656has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno)
ec99ffab 657{
29331e72
LD
658 if (bitmap_bit_p (df_get_live_out (cfg_bb), regno))
659 return false;
ec99ffab 660
29331e72
LD
661 rtx_insn *iter;
662 for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb));
663 iter = NEXT_INSN (iter))
664 if (df_find_use (iter, regno_reg_rtx[regno]))
665 return false;
ec99ffab 666
29331e72 667 return true;
ec99ffab
JZZ
668}
669
4a0a8dc1
JZ
670/* Return true for the special block that we can't apply LCM optimization. */
671static bool
672invalid_opt_bb_p (basic_block cfg_bb)
673{
674 edge e;
675 edge_iterator ei;
676
677 /* We don't do LCM optimizations on complex edges. */
678 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
679 if (e->flags & EDGE_COMPLEX)
680 return true;
681
682 /* We only do LCM optimizations on blocks that are post dominated by
683 EXIT block, that is, we don't do LCM optimizations on infinite loop. */
684 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
685 if (e->flags & EDGE_FAKE)
686 return true;
687
688 return false;
689}
690
c6c2a1d7
JZ
691/* Get all predecessors of BB. */
692static hash_set<basic_block>
693get_all_predecessors (basic_block bb)
694{
695 hash_set<basic_block> blocks;
696 auto_vec<basic_block> work_list;
697 hash_set<basic_block> visited_list;
698 work_list.safe_push (bb);
699
700 while (!work_list.is_empty ())
701 {
702 basic_block new_bb = work_list.pop ();
703 visited_list.add (new_bb);
704 edge e;
705 edge_iterator ei;
706 FOR_EACH_EDGE (e, ei, new_bb->preds)
707 {
708 if (!visited_list.contains (e->src))
709 work_list.safe_push (e->src);
710 blocks.add (e->src);
711 }
712 }
713 return blocks;
714}
715
29331e72
LD
716/* This flags indicates the minimum demand of the vl and vtype values by the
717 RVV instruction. For example, DEMAND_RATIO_P indicates that this RVV
718 instruction only needs the SEW/LMUL ratio to remain the same, and does not
719 require SEW and LMUL to be fixed.
720 Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter
721 instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of
d83070ae 722 the former instruction, then we can make the minimum demand of the former
29331e72
LD
723 instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are
724 the SEW and LMUL of the latter instruction, and the vsetvl instruction
725 generated according to the new demand can also be used for the latter
726 instruction, so there is no need to insert a separate vsetvl instruction for
727 the latter instruction. */
728enum demand_flags : unsigned
729{
730 DEMAND_EMPTY_P = 0,
731 DEMAND_SEW_P = 1 << 0,
732 DEMAND_LMUL_P = 1 << 1,
733 DEMAND_RATIO_P = 1 << 2,
734 DEMAND_GE_SEW_P = 1 << 3,
735 DEMAND_TAIL_POLICY_P = 1 << 4,
736 DEMAND_MASK_POLICY_P = 1 << 5,
737 DEMAND_AVL_P = 1 << 6,
738 DEMAND_NON_ZERO_AVL_P = 1 << 7,
739};
ec99ffab 740
29331e72
LD
741/* We split the demand information into three parts. They are sew and lmul
742 related (sew_lmul_demand_type), tail and mask policy related
743 (policy_demand_type) and avl related (avl_demand_type). Then we define three
d83070ae 744 interfaces available_p, compatible_p and merge. available_p is
29331e72
LD
745 used to determine whether the two vsetvl infos prev_info and next_info are
746 available or not. If prev_info is available for next_info, it means that the
747 RVV insn corresponding to next_info on the path from prev_info to next_info
748 can be used without inserting a separate vsetvl instruction. compatible_p
749 is used to determine whether prev_info is compatible with next_info, and if
750 so, merge can be used to merge the stricter demand information from
751 next_info into prev_info so that prev_info becomes available to next_info.
752 */
ec99ffab 753
29331e72 754enum class sew_lmul_demand_type : unsigned
ec99ffab 755{
29331e72
LD
756 sew_lmul = demand_flags::DEMAND_SEW_P | demand_flags::DEMAND_LMUL_P,
757 ratio_only = demand_flags::DEMAND_RATIO_P,
758 sew_only = demand_flags::DEMAND_SEW_P,
759 ge_sew = demand_flags::DEMAND_GE_SEW_P,
760 ratio_and_ge_sew
761 = demand_flags::DEMAND_RATIO_P | demand_flags::DEMAND_GE_SEW_P,
762};
ec99ffab 763
29331e72 764enum class policy_demand_type : unsigned
29547511 765{
29331e72
LD
766 tail_mask_policy
767 = demand_flags::DEMAND_TAIL_POLICY_P | demand_flags::DEMAND_MASK_POLICY_P,
768 tail_policy_only = demand_flags::DEMAND_TAIL_POLICY_P,
769 mask_policy_only = demand_flags::DEMAND_MASK_POLICY_P,
770 ignore_policy = demand_flags::DEMAND_EMPTY_P,
771};
29547511 772
29331e72 773enum class avl_demand_type : unsigned
ec99ffab 774{
29331e72
LD
775 avl = demand_flags::DEMAND_AVL_P,
776 non_zero_avl = demand_flags::DEMAND_NON_ZERO_AVL_P,
777 ignore_avl = demand_flags::DEMAND_EMPTY_P,
778};
ec99ffab 779
29331e72 780class vsetvl_info
ec99ffab 781{
29331e72
LD
782private:
783 insn_info *m_insn;
784 bb_info *m_bb;
785 rtx m_avl;
786 rtx m_vl;
787 set_info *m_avl_def;
788 uint8_t m_sew;
789 uint8_t m_max_sew;
790 vlmul_type m_vlmul;
791 uint8_t m_ratio;
792 bool m_ta;
793 bool m_ma;
794
795 sew_lmul_demand_type m_sew_lmul_demand;
796 policy_demand_type m_policy_demand;
797 avl_demand_type m_avl_demand;
798
799 enum class state_type
800 {
801 UNINITIALIZED,
802 VALID,
803 UNKNOWN,
804 EMPTY,
805 };
806 state_type m_state;
807
808 bool m_delete;
809 bool m_change_vtype_only;
810 insn_info *m_read_vl_insn;
811 bool m_vl_used_by_non_rvv_insn;
ec99ffab 812
29331e72
LD
813public:
814 vsetvl_info ()
815 : m_insn (nullptr), m_bb (nullptr), m_avl (NULL_RTX), m_vl (NULL_RTX),
816 m_avl_def (nullptr), m_sew (0), m_max_sew (0), m_vlmul (LMUL_RESERVED),
817 m_ratio (0), m_ta (false), m_ma (false),
818 m_sew_lmul_demand (sew_lmul_demand_type::sew_lmul),
819 m_policy_demand (policy_demand_type::tail_mask_policy),
820 m_avl_demand (avl_demand_type::avl), m_state (state_type::UNINITIALIZED),
821 m_delete (false), m_change_vtype_only (false), m_read_vl_insn (nullptr),
822 m_vl_used_by_non_rvv_insn (false)
823 {}
824
825 vsetvl_info (insn_info *insn) : vsetvl_info () { parse_insn (insn); }
826
827 vsetvl_info (rtx_insn *insn) : vsetvl_info () { parse_insn (insn); }
828
829 void set_avl (rtx avl) { m_avl = avl; }
830 void set_vl (rtx vl) { m_vl = vl; }
831 void set_avl_def (set_info *avl_def) { m_avl_def = avl_def; }
832 void set_sew (uint8_t sew) { m_sew = sew; }
833 void set_vlmul (vlmul_type vlmul) { m_vlmul = vlmul; }
834 void set_ratio (uint8_t ratio) { m_ratio = ratio; }
835 void set_ta (bool ta) { m_ta = ta; }
836 void set_ma (bool ma) { m_ma = ma; }
837 void set_delete () { m_delete = true; }
838 void set_bb (bb_info *bb) { m_bb = bb; }
839 void set_max_sew (uint8_t max_sew) { m_max_sew = max_sew; }
840 void set_change_vtype_only () { m_change_vtype_only = true; }
841 void set_read_vl_insn (insn_info *insn) { m_read_vl_insn = insn; }
842
843 rtx get_avl () const { return m_avl; }
844 rtx get_vl () const { return m_vl; }
845 set_info *get_avl_def () const { return m_avl_def; }
846 uint8_t get_sew () const { return m_sew; }
847 vlmul_type get_vlmul () const { return m_vlmul; }
848 uint8_t get_ratio () const { return m_ratio; }
849 bool get_ta () const { return m_ta; }
850 bool get_ma () const { return m_ma; }
851 insn_info *get_insn () const { return m_insn; }
852 bool delete_p () const { return m_delete; }
853 bb_info *get_bb () const { return m_bb; }
854 uint8_t get_max_sew () const { return m_max_sew; }
855 insn_info *get_read_vl_insn () const { return m_read_vl_insn; }
4cd4c34a 856 bool vl_used_by_non_rvv_insn_p () const { return m_vl_used_by_non_rvv_insn; }
29331e72
LD
857
858 bool has_imm_avl () const { return m_avl && CONST_INT_P (m_avl); }
859 bool has_vlmax_avl () const { return vlmax_avl_p (m_avl); }
860 bool has_nonvlmax_reg_avl () const
861 {
862 return m_avl && REG_P (m_avl) && !has_vlmax_avl ();
863 }
864 bool has_non_zero_avl () const
865 {
866 if (has_imm_avl ())
867 return INTVAL (m_avl) > 0;
868 return has_vlmax_avl ();
869 }
870 bool has_vl () const
871 {
872 /* The VL operand can only be either a NULL_RTX or a register. */
873 gcc_assert (!m_vl || REG_P (m_vl));
874 return m_vl != NULL_RTX;
875 }
876 bool has_same_ratio (const vsetvl_info &other) const
877 {
878 return get_ratio () == other.get_ratio ();
879 }
880
881 /* The block of INSN isn't always same as the block of the VSETVL_INFO,
882 meaning we may have 'get_insn ()->bb () != get_bb ()'.
883
884 E.g. BB 2 (Empty) ---> BB 3 (VALID, has rvv insn 1)
885
886 BB 2 has empty VSETVL_INFO, wheras BB 3 has VSETVL_INFO that satisfies
887 get_insn ()->bb () == get_bb (). In earliest fusion, we may fuse bb 3 and
888 bb 2 so that the 'get_bb ()' of BB2 VSETVL_INFO will be BB2 wheras the
889 'get_insn ()' of BB2 VSETVL INFO will be the rvv insn 1 (which is located
890 at BB3). */
891 bool insn_inside_bb_p () const { return get_insn ()->bb () == get_bb (); }
892 void update_avl (const vsetvl_info &other)
893 {
894 m_avl = other.get_avl ();
895 m_vl = other.get_vl ();
896 m_avl_def = other.get_avl_def ();
897 }
898
899 bool uninit_p () const { return m_state == state_type::UNINITIALIZED; }
900 bool valid_p () const { return m_state == state_type::VALID; }
901 bool unknown_p () const { return m_state == state_type::UNKNOWN; }
902 bool empty_p () const { return m_state == state_type::EMPTY; }
903 bool change_vtype_only_p () const { return m_change_vtype_only; }
904
905 void set_valid () { m_state = state_type::VALID; }
906 void set_unknown () { m_state = state_type::UNKNOWN; }
907 void set_empty () { m_state = state_type::EMPTY; }
908
909 void set_sew_lmul_demand (sew_lmul_demand_type demand)
910 {
911 m_sew_lmul_demand = demand;
912 }
913 void set_policy_demand (policy_demand_type demand)
914 {
915 m_policy_demand = demand;
916 }
917 void set_avl_demand (avl_demand_type demand) { m_avl_demand = demand; }
918
919 sew_lmul_demand_type get_sew_lmul_demand () const
920 {
921 return m_sew_lmul_demand;
922 }
923 policy_demand_type get_policy_demand () const { return m_policy_demand; }
924 avl_demand_type get_avl_demand () const { return m_avl_demand; }
925
926 void normalize_demand (unsigned demand_flags)
927 {
928 switch (demand_flags
929 & (DEMAND_SEW_P | DEMAND_LMUL_P | DEMAND_RATIO_P | DEMAND_GE_SEW_P))
930 {
931 case (unsigned) sew_lmul_demand_type::sew_lmul:
932 m_sew_lmul_demand = sew_lmul_demand_type::sew_lmul;
933 break;
934 case (unsigned) sew_lmul_demand_type::ratio_only:
935 m_sew_lmul_demand = sew_lmul_demand_type::ratio_only;
936 break;
937 case (unsigned) sew_lmul_demand_type::sew_only:
938 m_sew_lmul_demand = sew_lmul_demand_type::sew_only;
939 break;
940 case (unsigned) sew_lmul_demand_type::ge_sew:
941 m_sew_lmul_demand = sew_lmul_demand_type::ge_sew;
942 break;
943 case (unsigned) sew_lmul_demand_type::ratio_and_ge_sew:
944 m_sew_lmul_demand = sew_lmul_demand_type::ratio_and_ge_sew;
945 break;
946 default:
947 gcc_unreachable ();
948 }
949
950 switch (demand_flags & (DEMAND_TAIL_POLICY_P | DEMAND_MASK_POLICY_P))
951 {
952 case (unsigned) policy_demand_type::tail_mask_policy:
953 m_policy_demand = policy_demand_type::tail_mask_policy;
954 break;
955 case (unsigned) policy_demand_type::tail_policy_only:
956 m_policy_demand = policy_demand_type::tail_policy_only;
957 break;
958 case (unsigned) policy_demand_type::mask_policy_only:
959 m_policy_demand = policy_demand_type::mask_policy_only;
960 break;
961 case (unsigned) policy_demand_type::ignore_policy:
962 m_policy_demand = policy_demand_type::ignore_policy;
963 break;
964 default:
965 gcc_unreachable ();
966 }
967
968 switch (demand_flags & (DEMAND_AVL_P | DEMAND_NON_ZERO_AVL_P))
969 {
970 case (unsigned) avl_demand_type::avl:
971 m_avl_demand = avl_demand_type::avl;
972 break;
973 case (unsigned) avl_demand_type::non_zero_avl:
974 m_avl_demand = avl_demand_type::non_zero_avl;
975 break;
976 case (unsigned) avl_demand_type::ignore_avl:
977 m_avl_demand = avl_demand_type::ignore_avl;
978 break;
979 default:
980 gcc_unreachable ();
981 }
982 }
983
984 void parse_insn (rtx_insn *rinsn)
985 {
986 if (!NONDEBUG_INSN_P (rinsn))
987 return;
988 if (optimize == 0 && !has_vtype_op (rinsn))
989 return;
990 gcc_assert (!vsetvl_discard_result_insn_p (rinsn));
991 set_valid ();
992 extract_insn_cached (rinsn);
993 m_avl = ::get_avl (rinsn);
994 if (has_vlmax_avl () || vsetvl_insn_p (rinsn))
995 m_vl = ::get_vl (rinsn);
996 m_sew = ::get_sew (rinsn);
997 m_vlmul = ::get_vlmul (rinsn);
998 m_ta = tail_agnostic_p (rinsn);
999 m_ma = mask_agnostic_p (rinsn);
1000 }
1001
1002 void parse_insn (insn_info *insn)
1003 {
1004 m_insn = insn;
1005 m_bb = insn->bb ();
1006 /* Return if it is debug insn for the consistency with optimize == 0. */
1007 if (insn->is_debug_insn ())
1008 return;
ec99ffab 1009
29331e72
LD
1010 /* We set it as unknown since we don't what will happen in CALL or ASM. */
1011 if (insn->is_call () || insn->is_asm ())
1012 {
1013 set_unknown ();
1014 return;
1015 }
1016
1017 /* If this is something that updates VL/VTYPE that we don't know about, set
1018 the state to unknown. */
1019 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
1020 && (find_access (insn->defs (), VL_REGNUM)
1021 || find_access (insn->defs (), VTYPE_REGNUM)))
1022 {
1023 set_unknown ();
1024 return;
1025 }
1026
1027 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
1028 /* uninitialized */
1029 return;
ec99ffab 1030
29331e72
LD
1031 set_valid ();
1032
1033 m_avl = ::get_avl (insn->rtl ());
1034 if (m_avl)
1035 {
1036 if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ())
1037 m_vl = ::get_vl (insn->rtl ());
1038
1039 if (has_nonvlmax_reg_avl ())
1040 m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def ();
1041 }
1042
1043 m_sew = ::get_sew (insn->rtl ());
1044 m_vlmul = ::get_vlmul (insn->rtl ());
1045 m_ratio = get_attr_ratio (insn->rtl ());
1046 /* when get_attr_ratio is invalid, this kind of instructions
1047 doesn't care about ratio. However, we still need this value
1048 in demand info backward analysis. */
1049 if (m_ratio == INVALID_ATTRIBUTE)
1050 m_ratio = calculate_ratio (m_sew, m_vlmul);
1051 m_ta = tail_agnostic_p (insn->rtl ());
1052 m_ma = mask_agnostic_p (insn->rtl ());
1053
1054 /* If merge operand is undef value, we prefer agnostic. */
1055 int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
1056 if (merge_op_idx != INVALID_ATTRIBUTE
1057 && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
1058 {
1059 m_ta = true;
1060 m_ma = true;
1061 }
1062
1063 /* Determine the demand info of the RVV insn. */
1064 m_max_sew = get_max_int_sew ();
193ef02a 1065 unsigned dflags = 0;
29331e72
LD
1066 if (vector_config_insn_p (insn->rtl ()))
1067 {
193ef02a
RS
1068 dflags |= demand_flags::DEMAND_AVL_P;
1069 dflags |= demand_flags::DEMAND_RATIO_P;
29331e72
LD
1070 }
1071 else
1072 {
1073 if (has_vl_op (insn->rtl ()))
1074 {
1075 if (scalar_move_insn_p (insn->rtl ()))
1076 {
1077 /* If the avl for vmv.s.x comes from the vsetvl instruction, we
1078 don't know if the avl is non-zero, so it is set to
1079 DEMAND_AVL_P for now. it may be corrected to
1080 DEMAND_NON_ZERO_AVL_P later when more information is
1081 available.
1082 */
1083 if (has_non_zero_avl ())
193ef02a 1084 dflags |= demand_flags::DEMAND_NON_ZERO_AVL_P;
29331e72 1085 else
193ef02a 1086 dflags |= demand_flags::DEMAND_AVL_P;
29331e72
LD
1087 }
1088 else
193ef02a 1089 dflags |= demand_flags::DEMAND_AVL_P;
29331e72 1090 }
ec99ffab 1091
29331e72 1092 if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
193ef02a 1093 dflags |= demand_flags::DEMAND_RATIO_P;
29331e72
LD
1094 else
1095 {
1096 if (scalar_move_insn_p (insn->rtl ()) && m_ta)
1097 {
193ef02a 1098 dflags |= demand_flags::DEMAND_GE_SEW_P;
29331e72
LD
1099 m_max_sew = get_attr_type (insn->rtl ()) == TYPE_VFMOVFV
1100 ? get_max_float_sew ()
1101 : get_max_int_sew ();
1102 }
1103 else
193ef02a 1104 dflags |= demand_flags::DEMAND_SEW_P;
29331e72
LD
1105
1106 if (!ignore_vlmul_insn_p (insn->rtl ()))
193ef02a 1107 dflags |= demand_flags::DEMAND_LMUL_P;
29331e72 1108 }
ec99ffab 1109
29331e72 1110 if (!m_ta)
193ef02a 1111 dflags |= demand_flags::DEMAND_TAIL_POLICY_P;
29331e72 1112 if (!m_ma)
193ef02a 1113 dflags |= demand_flags::DEMAND_MASK_POLICY_P;
29331e72
LD
1114 }
1115
193ef02a 1116 normalize_demand (dflags);
29331e72
LD
1117
1118 /* Optimize AVL from the vsetvl instruction. */
1119 insn_info *def_insn = extract_single_source (get_avl_def ());
1120 if (def_insn && vsetvl_insn_p (def_insn->rtl ()))
1121 {
1122 vsetvl_info def_info = vsetvl_info (def_insn);
1123 if ((scalar_move_insn_p (insn->rtl ())
1124 || def_info.get_ratio () == get_ratio ())
1125 && (def_info.has_vlmax_avl () || def_info.has_imm_avl ()))
1126 {
1127 update_avl (def_info);
1128 if (scalar_move_insn_p (insn->rtl ()) && has_non_zero_avl ())
1129 m_avl_demand = avl_demand_type::non_zero_avl;
1130 }
1131 }
1132
1133 /* Determine if dest operand(vl) has been used by non-RVV instructions. */
1134 if (has_vl ())
1135 {
1136 const hash_set<use_info *> vl_uses
1137 = get_all_real_uses (get_insn (), REGNO (get_vl ()));
1138 for (use_info *use : vl_uses)
1139 {
1140 gcc_assert (use->insn ()->is_real ());
1141 rtx_insn *rinsn = use->insn ()->rtl ();
1142 if (!has_vl_op (rinsn)
1143 || count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1)
1144 {
1145 m_vl_used_by_non_rvv_insn = true;
1146 break;
1147 }
1148 rtx avl = ::get_avl (rinsn);
c2f23514 1149 if (!avl || !REG_P (avl) || REGNO (get_vl ()) != REGNO (avl))
29331e72
LD
1150 {
1151 m_vl_used_by_non_rvv_insn = true;
1152 break;
1153 }
1154 }
1155 }
ec99ffab 1156
29331e72
LD
1157 /* Collect the read vl insn for the fault-only-first rvv loads. */
1158 if (fault_first_load_p (insn->rtl ()))
1159 {
1160 for (insn_info *i = insn->next_nondebug_insn ();
1161 i->bb () == insn->bb (); i = i->next_nondebug_insn ())
1162 {
1163 if (find_access (i->defs (), VL_REGNUM))
1164 break;
1165 if (i->rtl () && read_vl_insn_p (i->rtl ()))
1166 {
1167 m_read_vl_insn = i;
1168 break;
1169 }
1170 }
1171 }
1172 }
1173
1174 /* Returns the corresponding vsetvl rtx pat. */
1175 rtx get_vsetvl_pat (bool ignore_vl = false) const
1176 {
1177 rtx avl = get_avl ();
1178 /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
1179 set the value of avl to (const_int 0) so that VSETVL PASS will
1180 insert vsetvl correctly.*/
1181 if (!get_avl ())
1182 avl = GEN_INT (0);
1183 rtx sew = gen_int_mode (get_sew (), Pmode);
1184 rtx vlmul = gen_int_mode (get_vlmul (), Pmode);
1185 rtx ta = gen_int_mode (get_ta (), Pmode);
1186 rtx ma = gen_int_mode (get_ma (), Pmode);
1187
1188 if (change_vtype_only_p ())
1189 return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
1190 else if (has_vl () && !ignore_vl)
1191 return gen_vsetvl (Pmode, get_vl (), avl, sew, vlmul, ta, ma);
1192 else
1193 return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
1194 }
1195
d82bb518
JZ
1196 /* Return true that the non-AVL operands of THIS will be modified
1197 if we fuse the VL modification from OTHER into THIS. */
1198 bool vl_modify_non_avl_op_p (const vsetvl_info &other) const
1199 {
1200 /* We don't need to worry about any operands from THIS be
1201 modified by OTHER vsetvl since we OTHER vsetvl doesn't
1202 modify any operand. */
1203 if (!other.has_vl ())
1204 return false;
1205
1206 /* THIS VL operand always preempt OTHER VL operand. */
1207 if (this->has_vl ())
1208 return false;
1209
1210 /* If THIS has non IMM AVL and THIS is AVL compatible with
1211 OTHER, the AVL value of THIS is same as VL value of OTHER. */
1212 if (!this->has_imm_avl ())
1213 return false;
1214 return find_access (this->get_insn ()->uses (), REGNO (other.get_vl ()));
1215 }
1216
29331e72
LD
1217 bool operator== (const vsetvl_info &other) const
1218 {
1219 gcc_assert (!uninit_p () && !other.uninit_p ()
1220 && "Uninitialization should not happen");
1221
1222 if (empty_p ())
1223 return other.empty_p ();
1224 if (unknown_p ())
1225 return other.unknown_p ();
1226
1227 return get_insn () == other.get_insn () && get_bb () == other.get_bb ()
1228 && get_avl () == other.get_avl () && get_vl () == other.get_vl ()
1229 && get_avl_def () == other.get_avl_def ()
1230 && get_sew () == other.get_sew ()
1231 && get_vlmul () == other.get_vlmul () && get_ta () == other.get_ta ()
1232 && get_ma () == other.get_ma ()
1233 && get_avl_demand () == other.get_avl_demand ()
1234 && get_sew_lmul_demand () == other.get_sew_lmul_demand ()
1235 && get_policy_demand () == other.get_policy_demand ();
1236 }
1237
1238 void dump (FILE *file, const char *indent = "") const
1239 {
1240 if (uninit_p ())
1241 {
1242 fprintf (file, "UNINITIALIZED.\n");
1243 return;
1244 }
1245 else if (unknown_p ())
1246 {
1247 fprintf (file, "UNKNOWN.\n");
1248 return;
1249 }
1250 else if (empty_p ())
1251 {
1252 fprintf (file, "EMPTY.\n");
1253 return;
1254 }
1255 else if (valid_p ())
1256 fprintf (file, "VALID (insn %u, bb %u)%s\n", get_insn ()->uid (),
1257 get_bb ()->index (), delete_p () ? " (deleted)" : "");
1258 else
1259 gcc_unreachable ();
ec99ffab 1260
29331e72
LD
1261 fprintf (file, "%sDemand fields:", indent);
1262 if (m_sew_lmul_demand == sew_lmul_demand_type::sew_lmul)
1263 fprintf (file, " demand_sew_lmul");
1264 else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_only)
1265 fprintf (file, " demand_ratio_only");
1266 else if (m_sew_lmul_demand == sew_lmul_demand_type::sew_only)
1267 fprintf (file, " demand_sew_only");
1268 else if (m_sew_lmul_demand == sew_lmul_demand_type::ge_sew)
1269 fprintf (file, " demand_ge_sew");
1270 else if (m_sew_lmul_demand == sew_lmul_demand_type::ratio_and_ge_sew)
1271 fprintf (file, " demand_ratio_and_ge_sew");
1272
1273 if (m_policy_demand == policy_demand_type::tail_mask_policy)
1274 fprintf (file, " demand_tail_mask_policy");
1275 else if (m_policy_demand == policy_demand_type::tail_policy_only)
1276 fprintf (file, " demand_tail_policy_only");
1277 else if (m_policy_demand == policy_demand_type::mask_policy_only)
1278 fprintf (file, " demand_mask_policy_only");
1279
1280 if (m_avl_demand == avl_demand_type::avl)
1281 fprintf (file, " demand_avl");
1282 else if (m_avl_demand == avl_demand_type::non_zero_avl)
1283 fprintf (file, " demand_non_zero_avl");
1284 fprintf (file, "\n");
1285
1286 fprintf (file, "%sSEW=%d, ", indent, get_sew ());
1287 fprintf (file, "VLMUL=%s, ", vlmul_to_str (get_vlmul ()));
1288 fprintf (file, "RATIO=%d, ", get_ratio ());
1289 fprintf (file, "MAX_SEW=%d\n", get_max_sew ());
1290
1291 fprintf (file, "%sTAIL_POLICY=%s, ", indent, policy_to_str (get_ta ()));
1292 fprintf (file, "MASK_POLICY=%s\n", policy_to_str (get_ma ()));
1293
1294 fprintf (file, "%sAVL=", indent);
1295 print_rtl_single (file, get_avl ());
1296 fprintf (file, "%sVL=", indent);
1297 print_rtl_single (file, get_vl ());
1298 if (change_vtype_only_p ())
1299 fprintf (file, "%schange vtype only\n", indent);
1300 if (get_read_vl_insn ())
1301 fprintf (file, "%sread_vl_insn: insn %u\n", indent,
1302 get_read_vl_insn ()->uid ());
4cd4c34a 1303 if (vl_used_by_non_rvv_insn_p ())
29331e72
LD
1304 fprintf (file, "%suse_by_non_rvv_insn=true\n", indent);
1305 }
1306};
8fbc0871 1307
29331e72 1308class vsetvl_block_info
ec99ffab 1309{
29331e72
LD
1310public:
1311 /* The static execute probability of the demand info. */
1312 profile_probability probability;
1313
4fd09aed
JZ
1314 auto_vec<vsetvl_info> local_infos;
1315 vsetvl_info global_info;
1316 bb_info *bb;
29331e72 1317
5ee45f5e 1318 vsetvl_block_info () : bb (nullptr)
29331e72 1319 {
4fd09aed
JZ
1320 local_infos.safe_grow_cleared (0);
1321 global_info.set_empty ();
29331e72
LD
1322 }
1323 vsetvl_block_info (const vsetvl_block_info &other)
4fd09aed
JZ
1324 : probability (other.probability), local_infos (other.local_infos.copy ()),
1325 global_info (other.global_info), bb (other.bb)
29331e72
LD
1326 {}
1327
1328 vsetvl_info &get_entry_info ()
1329 {
1330 gcc_assert (!empty_p ());
4fd09aed 1331 return local_infos.is_empty () ? global_info : local_infos[0];
29331e72
LD
1332 }
1333 vsetvl_info &get_exit_info ()
1334 {
1335 gcc_assert (!empty_p ());
4fd09aed
JZ
1336 return local_infos.is_empty () ? global_info
1337 : local_infos[local_infos.length () - 1];
29331e72
LD
1338 }
1339 const vsetvl_info &get_entry_info () const
1340 {
1341 gcc_assert (!empty_p ());
4fd09aed 1342 return local_infos.is_empty () ? global_info : local_infos[0];
29331e72
LD
1343 }
1344 const vsetvl_info &get_exit_info () const
1345 {
1346 gcc_assert (!empty_p ());
4fd09aed
JZ
1347 return local_infos.is_empty () ? global_info
1348 : local_infos[local_infos.length () - 1];
29331e72
LD
1349 }
1350
4fd09aed
JZ
1351 bool empty_p () const { return local_infos.is_empty () && !has_info (); }
1352 bool has_info () const { return !global_info.empty_p (); }
29331e72
LD
1353 void set_info (const vsetvl_info &info)
1354 {
4fd09aed
JZ
1355 gcc_assert (local_infos.is_empty ());
1356 global_info = info;
1357 global_info.set_bb (bb);
29331e72 1358 }
4fd09aed 1359 void set_empty_info () { global_info.set_empty (); }
ec99ffab
JZZ
1360};
1361
29331e72
LD
1362/* Demand system is the RVV-based VSETVL info analysis tools wrapper.
1363 It defines compatible rules for SEW/LMUL, POLICY and AVL.
d83070ae 1364 Also, it provides 3 interfaces available_p, compatible_p and
29331e72
LD
1365 merge for the VSETVL PASS analysis and optimization.
1366
d83070ae
KC
1367 - available_p: Determine whether the next info can get the
1368 available VSETVL status from previous info.
29331e72
LD
1369 e.g. bb 2 (demand SEW = 32, LMUL = M2) -> bb 3 (demand RATIO = 16).
1370 Since bb 2 demand info (SEW/LMUL = 32/2 = 16) satisfies the bb 3
1371 demand, the VSETVL instruction in bb 3 can be elided.
d83070ae 1372 available_p (previous, next) is true in such situation.
29331e72 1373 - compatible_p: Determine whether prev_info is compatible with next_info
d83070ae 1374 so that we can have a new merged info that is available to both of them.
29331e72
LD
1375 - merge: Merge the stricter demand information from
1376 next_info into prev_info so that prev_info becomes available to
1377 next_info. */
1378class demand_system
ec99ffab 1379{
29331e72 1380private:
29331e72 1381 /* predictors. */
ec99ffab 1382
29331e72
LD
1383 inline bool always_true (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1384 const vsetvl_info &next ATTRIBUTE_UNUSED)
1385 {
1386 return true;
1387 }
1388 inline bool always_false (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1389 const vsetvl_info &next ATTRIBUTE_UNUSED)
1390 {
ec99ffab 1391 return false;
29331e72
LD
1392 }
1393
1394 /* predictors for sew and lmul */
1395
1396 inline bool lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1397 {
1398 return prev.get_vlmul () == next.get_vlmul ();
1399 }
1400 inline bool sew_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1401 {
1402 return prev.get_sew () == next.get_sew ();
1403 }
1404 inline bool sew_lmul_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1405 {
1406 return lmul_eq_p (prev, next) && sew_eq_p (prev, next);
1407 }
1408 inline bool sew_ge_p (const vsetvl_info &prev, const vsetvl_info &next)
1409 {
1410 return prev.get_sew () == next.get_sew ()
1411 || (next.get_ta () && prev.get_sew () > next.get_sew ());
1412 }
1413 inline bool sew_le_p (const vsetvl_info &prev, const vsetvl_info &next)
1414 {
1415 return prev.get_sew () == next.get_sew ()
1416 || (prev.get_ta () && prev.get_sew () < next.get_sew ());
1417 }
1418 inline bool prev_sew_le_next_max_sew_p (const vsetvl_info &prev,
1419 const vsetvl_info &next)
1420 {
1421 return prev.get_sew () <= next.get_max_sew ();
1422 }
1423 inline bool next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
1424 const vsetvl_info &next)
1425 {
1426 return next.get_sew () <= prev.get_max_sew ();
1427 }
1428 inline bool max_sew_overlap_p (const vsetvl_info &prev,
1429 const vsetvl_info &next)
1430 {
1431 return !(prev.get_sew () > next.get_max_sew ()
1432 || next.get_sew () > prev.get_max_sew ());
1433 }
1434 inline bool ratio_eq_p (const vsetvl_info &prev, const vsetvl_info &next)
1435 {
1436 return prev.has_same_ratio (next);
1437 }
1438 inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
1439 const vsetvl_info &next)
1440 {
1441 return prev.get_ratio () >= (next.get_sew () / 8);
1442 }
1443 inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
1444 const vsetvl_info &next)
1445 {
1446 return next.get_ratio () >= (prev.get_sew () / 8);
1447 }
1448
1449 inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev,
1450 const vsetvl_info &next)
1451 {
1452 return sew_ge_p (prev, next) && ratio_eq_p (prev, next);
1453 }
1454 inline bool sew_ge_and_prev_sew_le_next_max_sew_p (const vsetvl_info &prev,
1455 const vsetvl_info &next)
1456 {
1457 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next);
1458 }
1459 inline bool
1460 sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p (
1461 const vsetvl_info &prev, const vsetvl_info &next)
1462 {
1463 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next)
1464 && next_ratio_valid_for_prev_sew_p (prev, next);
1465 }
1466 inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
1467 const vsetvl_info &next)
1468 {
1469 return sew_le_p (prev, next) && next_sew_le_prev_max_sew_p (prev, next);
1470 }
1471 inline bool
1472 max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
1473 const vsetvl_info &next)
1474 {
1475 return next_ratio_valid_for_prev_sew_p (prev, next)
1476 && max_sew_overlap_p (prev, next);
1477 }
1478 inline bool
1479 sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info &prev,
1480 const vsetvl_info &next)
1481 {
1482 return sew_le_p (prev, next) && ratio_eq_p (prev, next)
1483 && next_sew_le_prev_max_sew_p (prev, next);
1484 }
1485 inline bool
1486 max_sew_overlap_and_prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
1487 const vsetvl_info &next)
1488 {
1489 return prev_ratio_valid_for_next_sew_p (prev, next)
1490 && max_sew_overlap_p (prev, next);
1491 }
1492 inline bool
1493 sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p (
1494 const vsetvl_info &prev, const vsetvl_info &next)
1495 {
1496 return sew_le_p (prev, next) && prev_ratio_valid_for_next_sew_p (prev, next)
1497 && next_sew_le_prev_max_sew_p (prev, next);
1498 }
1499 inline bool max_sew_overlap_and_ratio_eq_p (const vsetvl_info &prev,
1500 const vsetvl_info &next)
1501 {
1502 return ratio_eq_p (prev, next) && max_sew_overlap_p (prev, next);
1503 }
1504
1505 /* predictors for tail and mask policy */
1506
1507 inline bool tail_policy_eq_p (const vsetvl_info &prev,
1508 const vsetvl_info &next)
1509 {
1510 return prev.get_ta () == next.get_ta ();
1511 }
1512 inline bool mask_policy_eq_p (const vsetvl_info &prev,
1513 const vsetvl_info &next)
1514 {
1515 return prev.get_ma () == next.get_ma ();
1516 }
1517 inline bool tail_mask_policy_eq_p (const vsetvl_info &prev,
1518 const vsetvl_info &next)
1519 {
1520 return tail_policy_eq_p (prev, next) && mask_policy_eq_p (prev, next);
1521 }
1522
1523 /* predictors for avl */
1524
1525 inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info)
1526 {
9c16ca93
JZ
1527 if (info.has_vl ())
1528 {
1529 if (find_access (i->defs (), REGNO (info.get_vl ())))
1530 return true;
1531 if (find_access (i->uses (), REGNO (info.get_vl ())))
1532 {
1533 resource_info resource = full_register (REGNO (info.get_vl ()));
1534 def_lookup dl1 = crtl->ssa->find_def (resource, i);
1535 def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ());
1536 if (dl1.matching_set () || dl2.matching_set ())
1537 return true;
1538 /* If their VLs are coming from same def, we still want to fuse
1539 their VSETVL demand info to gain better performance. */
1540 return dl1.prev_def (i) != dl2.prev_def (i);
1541 }
1542 }
1543 return false;
29331e72
LD
1544 }
1545 inline bool modify_avl_p (insn_info *i, const vsetvl_info &info)
1546 {
1547 return info.has_nonvlmax_reg_avl ()
1548 && find_access (i->defs (), REGNO (info.get_avl ()));
1549 }
1550
1551 inline bool modify_reg_between_p (insn_info *prev_insn, insn_info *curr_insn,
1552 unsigned regno)
1553 {
1554 gcc_assert (prev_insn->compare_with (curr_insn) < 0);
1555 for (insn_info *i = curr_insn->prev_nondebug_insn (); i != prev_insn;
1556 i = i->prev_nondebug_insn ())
1557 {
1558 // no def of regno
1559 if (find_access (i->defs (), regno))
1560 return true;
1561 }
1562 return false;
1563 }
ec99ffab 1564
29331e72
LD
1565 inline bool reg_avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next)
1566 {
1567 if (!prev.has_nonvlmax_reg_avl () || !next.has_nonvlmax_reg_avl ())
1568 return false;
ec99ffab 1569
29331e72
LD
1570 if (same_equiv_note_p (prev.get_avl_def (), next.get_avl_def ()))
1571 return true;
ec99ffab 1572
29331e72
LD
1573 if (REGNO (prev.get_avl ()) != REGNO (next.get_avl ()))
1574 return false;
ec99ffab 1575
29331e72
LD
1576 insn_info *prev_insn = prev.get_insn ();
1577 if (prev.get_bb () != prev_insn->bb ())
1578 prev_insn = prev.get_bb ()->end_insn ();
ec99ffab 1579
29331e72
LD
1580 insn_info *next_insn = next.get_insn ();
1581 if (next.get_bb () != next_insn->bb ())
1582 next_insn = next.get_bb ()->end_insn ();
ec99ffab 1583
29331e72
LD
1584 return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false);
1585 }
ec99ffab 1586
29331e72
LD
1587 inline bool avl_equal_p (const vsetvl_info &prev, const vsetvl_info &next)
1588 {
1589 gcc_assert (prev.valid_p () && next.valid_p ());
ec99ffab 1590
4cd4c34a 1591 if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
29331e72 1592 return false;
e030af3e 1593
29331e72
LD
1594 if (vector_config_insn_p (prev.get_insn ()->rtl ()) && next.get_avl_def ()
1595 && next.get_avl_def ()->insn () == prev.get_insn ())
1596 return true;
e030af3e 1597
29331e72
LD
1598 if (prev.get_read_vl_insn ())
1599 {
1600 if (!next.has_nonvlmax_reg_avl () || !next.get_avl_def ())
1601 return false;
1602 insn_info *avl_def_insn = extract_single_source (next.get_avl_def ());
1603 return avl_def_insn == prev.get_read_vl_insn ();
1604 }
1605
1606 if (prev == next && prev.has_nonvlmax_reg_avl ())
1607 {
1608 insn_info *insn = prev.get_insn ();
1609 bb_info *bb = insn->bb ();
1610 for (insn_info *i = insn; real_insn_and_same_bb_p (i, bb);
1611 i = i->next_nondebug_insn ())
1612 if (find_access (i->defs (), REGNO (prev.get_avl ())))
e030af3e 1613 return false;
29331e72 1614 }
60bd33bc 1615
29331e72
LD
1616 if (prev.has_vlmax_avl () && next.has_vlmax_avl ())
1617 return true;
1618 else if (prev.has_imm_avl () && next.has_imm_avl ())
1619 return INTVAL (prev.get_avl ()) == INTVAL (next.get_avl ());
1620 else if (prev.has_vl () && next.has_nonvlmax_reg_avl ()
1621 && REGNO (prev.get_vl ()) == REGNO (next.get_avl ()))
1622 {
1623 insn_info *prev_insn = prev.insn_inside_bb_p ()
1624 ? prev.get_insn ()
1625 : prev.get_bb ()->end_insn ();
1626
1627 insn_info *next_insn = next.insn_inside_bb_p ()
1628 ? next.get_insn ()
1629 : next.get_bb ()->end_insn ();
1630 return avl_vl_unmodified_between_p (prev_insn, next_insn, next, false);
1631 }
1632 else if (prev.has_nonvlmax_reg_avl () && next.has_nonvlmax_reg_avl ())
1633 return reg_avl_equal_p (prev, next);
e030af3e 1634
e030af3e 1635 return false;
29331e72
LD
1636 }
1637 inline bool avl_equal_or_prev_avl_non_zero_p (const vsetvl_info &prev,
1638 const vsetvl_info &next)
1639 {
1640 return avl_equal_p (prev, next) || prev.has_non_zero_avl ();
1641 }
1642
1643 inline bool can_use_next_avl_p (const vsetvl_info &prev,
1644 const vsetvl_info &next)
1645 {
0c4bd132
JZ
1646 /* Forbid the AVL/VL propagation if VL of NEXT is used
1647 by non-RVV instructions. This is because:
1648
1649 bb 2:
1650 PREV: scalar move (no AVL)
1651 bb 3:
1652 NEXT: vsetvl a5(VL), a4(AVL) ...
1653 branch a5,zero
1654
1655 Since user vsetvl instruction is no side effect instruction
1656 which should be placed in the correct and optimal location
1657 of the program by the previous PASS, it is unreasonable that
1658 VSETVL PASS tries to move it to another places if it used by
1659 non-RVV instructions.
1660
1661 Note: We only forbid the cases that VL is used by the following
1662 non-RVV instructions which will cause issues. We don't forbid
1663 other cases since it won't cause correctness issues and we still
1664 more demand info are fused backward. The later LCM algorithm
1665 should know the optimal location of the vsetvl. */
1666 if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
1667 return false;
1668
29331e72
LD
1669 if (!next.has_nonvlmax_reg_avl () && !next.has_vl ())
1670 return true;
e030af3e 1671
29331e72
LD
1672 insn_info *prev_insn = prev.get_insn ();
1673 if (prev.get_bb () != prev_insn->bb ())
1674 prev_insn = prev.get_bb ()->end_insn ();
1675
1676 insn_info *next_insn = next.get_insn ();
1677 if (next.get_bb () != next_insn->bb ())
1678 next_insn = next.get_bb ()->end_insn ();
1679
1680 return avl_vl_unmodified_between_p (prev_insn, next_insn, next);
1681 }
1682
1683 inline bool avl_equal_or_next_avl_non_zero_and_can_use_next_avl_p (
1684 const vsetvl_info &prev, const vsetvl_info &next)
1685 {
1686 return avl_equal_p (prev, next)
1687 || (next.has_non_zero_avl () && can_use_next_avl_p (prev, next));
1688 }
1689
1690 /* modifiers */
1691
1692 inline void nop (const vsetvl_info &prev ATTRIBUTE_UNUSED,
1693 const vsetvl_info &next ATTRIBUTE_UNUSED)
1694 {}
1695
1696 /* modifiers for sew and lmul */
1697
1698 inline void use_min_of_max_sew (vsetvl_info &prev, const vsetvl_info &next)
1699 {
1700 prev.set_max_sew (MIN (prev.get_max_sew (), next.get_max_sew ()));
1701 }
1702 inline void use_next_sew (vsetvl_info &prev, const vsetvl_info &next)
1703 {
1704 prev.set_sew (next.get_sew ());
1705 use_min_of_max_sew (prev, next);
1706 }
1707 inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
1708 {
e74c37aa 1709 int max_sew = MAX (prev.get_sew (), next.get_sew ());
29331e72
LD
1710 prev.set_sew (max_sew);
1711 use_min_of_max_sew (prev, next);
1712 }
1713 inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
1714 {
1715 use_next_sew (prev, next);
1716 prev.set_vlmul (next.get_vlmul ());
1717 prev.set_ratio (next.get_ratio ());
1718 }
1719 inline void use_next_sew_with_prev_ratio (vsetvl_info &prev,
1720 const vsetvl_info &next)
1721 {
1722 use_next_sew (prev, next);
1723 prev.set_vlmul (calculate_vlmul (next.get_sew (), prev.get_ratio ()));
1724 }
1725 inline void modify_lmul_with_next_ratio (vsetvl_info &prev,
1726 const vsetvl_info &next)
1727 {
1728 prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
1729 prev.set_ratio (next.get_ratio ());
1730 }
1731
1732 inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev,
1733 const vsetvl_info &next)
1734 {
1735 prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
1736 use_max_sew (prev, next);
1737 prev.set_ratio (next.get_ratio ());
1738 }
1739
1740 inline void use_max_sew_and_lmul_with_prev_ratio (vsetvl_info &prev,
1741 const vsetvl_info &next)
1742 {
e74c37aa 1743 int max_sew = MAX (prev.get_sew (), next.get_sew ());
29331e72
LD
1744 prev.set_vlmul (calculate_vlmul (max_sew, prev.get_ratio ()));
1745 prev.set_sew (max_sew);
1746 }
1747
1748 /* modifiers for tail and mask policy */
1749
1750 inline void use_tail_policy (vsetvl_info &prev, const vsetvl_info &next)
1751 {
1752 if (!next.get_ta ())
1753 prev.set_ta (next.get_ta ());
1754 }
1755 inline void use_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
1756 {
1757 if (!next.get_ma ())
1758 prev.set_ma (next.get_ma ());
1759 }
1760 inline void use_tail_mask_policy (vsetvl_info &prev, const vsetvl_info &next)
1761 {
1762 use_tail_policy (prev, next);
1763 use_mask_policy (prev, next);
1764 }
1765
1766 /* modifiers for avl */
1767
1768 inline void use_next_avl (vsetvl_info &prev, const vsetvl_info &next)
1769 {
1770 gcc_assert (can_use_next_avl_p (prev, next));
1771 prev.update_avl (next);
1772 }
1773
1774 inline void use_next_avl_when_not_equal (vsetvl_info &prev,
1775 const vsetvl_info &next)
1776 {
1777 if (avl_equal_p (prev, next))
1778 return;
1779 gcc_assert (next.has_non_zero_avl ());
1780 use_next_avl (prev, next);
1781 }
e030af3e 1782
29331e72 1783public:
29331e72
LD
1784 /* Can we move vsetvl info between prev_insn and next_insn safe? */
1785 bool avl_vl_unmodified_between_p (insn_info *prev_insn, insn_info *next_insn,
1786 const vsetvl_info &info,
1787 bool ignore_vl = false)
1788 {
1789 gcc_assert ((ignore_vl && info.has_nonvlmax_reg_avl ())
1790 || (info.has_nonvlmax_reg_avl () || info.has_vl ()));
1791
1792 gcc_assert (!prev_insn->is_debug_insn () && !next_insn->is_debug_insn ());
1793 if (prev_insn->bb () == next_insn->bb ()
1794 && prev_insn->compare_with (next_insn) < 0)
1795 {
1796 for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
1797 i = i->prev_nondebug_insn ())
1798 {
9c16ca93 1799 // no def and use of vl
29331e72
LD
1800 if (!ignore_vl && modify_or_use_vl_p (i, info))
1801 return false;
e030af3e 1802
29331e72
LD
1803 // no def of avl
1804 if (modify_avl_p (i, info))
1805 return false;
1806 }
1807 return true;
1808 }
1809 else
1810 {
3132d2d3 1811 basic_block prev_cfg_bb = prev_insn->bb ()->cfg_bb ();
29331e72
LD
1812 if (!ignore_vl && info.has_vl ())
1813 {
3132d2d3 1814 bitmap live_out = df_get_live_out (prev_cfg_bb);
29331e72
LD
1815 if (bitmap_bit_p (live_out, REGNO (info.get_vl ())))
1816 return false;
1817 }
a2d12abe 1818
3132d2d3
JZ
1819 /* Find set_info at location of PREV_INSN and NEXT_INSN, Return
1820 false if those 2 set_info are different.
1821
1822 PREV_INSN --- multiple nested blocks --- NEXT_INSN.
1823
1824 Return false if there is any modifications of AVL inside those
1825 multiple nested blocks. */
1826 if (info.has_nonvlmax_reg_avl ())
29331e72 1827 {
3132d2d3
JZ
1828 resource_info resource = full_register (REGNO (info.get_avl ()));
1829 def_lookup dl1 = crtl->ssa->find_def (resource, prev_insn);
1830 def_lookup dl2 = crtl->ssa->find_def (resource, next_insn);
1831 if (dl2.matching_set ())
1832 return false;
1833
1834 auto is_phi_or_real
1835 = [&] (insn_info *h) { return h->is_real () || h->is_phi (); };
1836
1837 def_info *def1 = dl1.matching_set_or_last_def_of_prev_group ();
1838 def_info *def2 = dl2.prev_def (next_insn);
1839 set_info *set1 = safe_dyn_cast<set_info *> (def1);
1840 set_info *set2 = safe_dyn_cast<set_info *> (def2);
1841 if (!set1 || !set2)
1842 return false;
1843
1844 auto is_same_ultimate_def = [&] (set_info *s1, set_info *s2) {
1845 return s1->insn ()->is_phi () && s2->insn ()->is_phi ()
1846 && look_through_degenerate_phi (s1)
1847 == look_through_degenerate_phi (s2);
1848 };
1849
1850 if (set1 != set2 && !is_same_ultimate_def (set1, set2))
29331e72 1851 {
3132d2d3
JZ
1852 if (!is_phi_or_real (set1->insn ())
1853 || !is_phi_or_real (set2->insn ()))
29331e72 1854 return false;
3132d2d3
JZ
1855
1856 if (set1->insn ()->is_real () && set2->insn ()->is_phi ())
1857 {
1858 hash_set<set_info *> sets
1859 = get_all_sets (set2, true, false, true);
1860 if (!sets.contains (set1))
1861 return false;
1862 }
1863 else
1864 {
1865 insn_info *def_insn1 = extract_single_source (set1);
1866 insn_info *def_insn2 = extract_single_source (set2);
1867 if (!def_insn1 || !def_insn2 || def_insn1 != def_insn2)
1868 return false;
1869 }
29331e72 1870 }
29331e72 1871 }
12b23c71 1872
29331e72
LD
1873 for (insn_info *i = next_insn; i != next_insn->bb ()->head_insn ();
1874 i = i->prev_nondebug_insn ())
1875 {
d83070ae 1876 // no def and use of vl
29331e72
LD
1877 if (!ignore_vl && modify_or_use_vl_p (i, info))
1878 return false;
9243c3d1 1879
29331e72
LD
1880 // no def of avl
1881 if (modify_avl_p (i, info))
1882 return false;
1883 }
6b6b9c68 1884
29331e72
LD
1885 for (insn_info *i = prev_insn->bb ()->end_insn (); i != prev_insn;
1886 i = i->prev_nondebug_insn ())
1887 {
d83070ae 1888 // no def mad use of vl
29331e72
LD
1889 if (!ignore_vl && modify_or_use_vl_p (i, info))
1890 return false;
1891
1892 // no def of avl
1893 if (modify_avl_p (i, info))
1894 return false;
1895 }
1896 }
d875d756 1897 return true;
29331e72
LD
1898 }
1899
1900 bool sew_lmul_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1901 {
1902 gcc_assert (prev.valid_p () && next.valid_p ());
1903 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1904 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1905#define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1906 AVAILABLE_P, FUSE) \
1907 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1908 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1909 return COMPATIBLE_P (prev, next);
6b6b9c68 1910
29331e72 1911#include "riscv-vsetvl.def"
6b6b9c68 1912
29331e72
LD
1913 gcc_unreachable ();
1914 }
6b6b9c68 1915
29331e72
LD
1916 bool sew_lmul_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1917 {
1918 gcc_assert (prev.valid_p () && next.valid_p ());
1919 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1920 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1921#define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1922 AVAILABLE_P, FUSE) \
1923 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1924 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1925 return AVAILABLE_P (prev, next);
d875d756 1926
29331e72 1927#include "riscv-vsetvl.def"
4f673c5e 1928
29331e72
LD
1929 gcc_unreachable ();
1930 }
1931
1932 void merge_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
1933 {
1934 gcc_assert (prev.valid_p () && next.valid_p ());
1935 sew_lmul_demand_type prev_flags = prev.get_sew_lmul_demand ();
1936 sew_lmul_demand_type next_flags = next.get_sew_lmul_demand ();
1937#define DEF_SEW_LMUL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1938 AVAILABLE_P, FUSE) \
1939 if (prev_flags == sew_lmul_demand_type::PREV_FLAGS \
1940 && next_flags == sew_lmul_demand_type::NEXT_FLAGS) \
1941 { \
1942 gcc_assert (COMPATIBLE_P (prev, next)); \
1943 FUSE (prev, next); \
1944 prev.set_sew_lmul_demand (sew_lmul_demand_type::NEW_FLAGS); \
1945 return; \
1946 }
9243c3d1 1947
29331e72 1948#include "riscv-vsetvl.def"
9243c3d1 1949
29331e72
LD
1950 gcc_unreachable ();
1951 }
9243c3d1 1952
29331e72
LD
1953 bool policy_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
1954 {
1955 gcc_assert (prev.valid_p () && next.valid_p ());
1956 policy_demand_type prev_flags = prev.get_policy_demand ();
1957 policy_demand_type next_flags = next.get_policy_demand ();
1958#define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1959 AVAILABLE_P, FUSE) \
1960 if (prev_flags == policy_demand_type::PREV_FLAGS \
1961 && next_flags == policy_demand_type::NEXT_FLAGS) \
1962 return COMPATIBLE_P (prev, next);
9243c3d1 1963
29331e72 1964#include "riscv-vsetvl.def"
9243c3d1 1965
29331e72
LD
1966 gcc_unreachable ();
1967 }
4f673c5e 1968
29331e72
LD
1969 bool policy_available_p (const vsetvl_info &prev, const vsetvl_info &next)
1970 {
1971 gcc_assert (prev.valid_p () && next.valid_p ());
1972 policy_demand_type prev_flags = prev.get_policy_demand ();
1973 policy_demand_type next_flags = next.get_policy_demand ();
1974#define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1975 AVAILABLE_P, FUSE) \
1976 if (prev_flags == policy_demand_type::PREV_FLAGS \
1977 && next_flags == policy_demand_type::NEXT_FLAGS) \
1978 return AVAILABLE_P (prev, next);
4f673c5e 1979
29331e72 1980#include "riscv-vsetvl.def"
9243c3d1 1981
29331e72
LD
1982 gcc_unreachable ();
1983 }
1984
1985 void merge_policy (vsetvl_info &prev, const vsetvl_info &next)
1986 {
1987 gcc_assert (prev.valid_p () && next.valid_p ());
1988 policy_demand_type prev_flags = prev.get_policy_demand ();
1989 policy_demand_type next_flags = next.get_policy_demand ();
1990#define DEF_POLICY_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
1991 AVAILABLE_P, FUSE) \
1992 if (prev_flags == policy_demand_type::PREV_FLAGS \
1993 && next_flags == policy_demand_type::NEXT_FLAGS) \
1994 { \
1995 gcc_assert (COMPATIBLE_P (prev, next)); \
1996 FUSE (prev, next); \
1997 prev.set_policy_demand (policy_demand_type::NEW_FLAGS); \
1998 return; \
1999 }
9243c3d1 2000
29331e72 2001#include "riscv-vsetvl.def"
ec99ffab 2002
29331e72
LD
2003 gcc_unreachable ();
2004 }
9243c3d1 2005
d82bb518
JZ
2006 bool vl_not_in_conflict_p (const vsetvl_info &prev, const vsetvl_info &next)
2007 {
2008 /* We don't fuse this following case:
2009
2010 li a5, -1
2011 vmv.s.x v0, a5 -- PREV
2012 vsetvli a5, ... -- NEXT
2013
2014 Don't fuse NEXT into PREV.
2015 */
2016 return !prev.vl_modify_non_avl_op_p (next)
2017 && !next.vl_modify_non_avl_op_p (prev);
2018 }
2019
29331e72
LD
2020 bool avl_compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
2021 {
2022 gcc_assert (prev.valid_p () && next.valid_p ());
2023 avl_demand_type prev_flags = prev.get_avl_demand ();
2024 avl_demand_type next_flags = next.get_avl_demand ();
2025#define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2026 AVAILABLE_P, FUSE) \
2027 if (prev_flags == avl_demand_type::PREV_FLAGS \
2028 && next_flags == avl_demand_type::NEXT_FLAGS) \
2029 return COMPATIBLE_P (prev, next);
9243c3d1 2030
29331e72 2031#include "riscv-vsetvl.def"
9243c3d1 2032
29331e72
LD
2033 gcc_unreachable ();
2034 }
9243c3d1 2035
29331e72
LD
2036 bool avl_available_p (const vsetvl_info &prev, const vsetvl_info &next)
2037 {
2038 gcc_assert (prev.valid_p () && next.valid_p ());
2039 avl_demand_type prev_flags = prev.get_avl_demand ();
2040 avl_demand_type next_flags = next.get_avl_demand ();
2041#define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2042 AVAILABLE_P, FUSE) \
2043 if (prev_flags == avl_demand_type::PREV_FLAGS \
2044 && next_flags == avl_demand_type::NEXT_FLAGS) \
2045 return AVAILABLE_P (prev, next);
9243c3d1 2046
29331e72 2047#include "riscv-vsetvl.def"
9243c3d1 2048
29331e72
LD
2049 gcc_unreachable ();
2050 }
2051
2052 void merge_avl (vsetvl_info &prev, const vsetvl_info &next)
2053 {
2054 gcc_assert (prev.valid_p () && next.valid_p ());
2055 avl_demand_type prev_flags = prev.get_avl_demand ();
2056 avl_demand_type next_flags = next.get_avl_demand ();
2057#define DEF_AVL_RULE(PREV_FLAGS, NEXT_FLAGS, NEW_FLAGS, COMPATIBLE_P, \
2058 AVAILABLE_P, FUSE) \
2059 if (prev_flags == avl_demand_type::PREV_FLAGS \
2060 && next_flags == avl_demand_type::NEXT_FLAGS) \
2061 { \
2062 gcc_assert (COMPATIBLE_P (prev, next)); \
2063 FUSE (prev, next); \
2064 prev.set_avl_demand (avl_demand_type::NEW_FLAGS); \
2065 return; \
60bd33bc
JZZ
2066 }
2067
29331e72 2068#include "riscv-vsetvl.def"
9243c3d1 2069
29331e72
LD
2070 gcc_unreachable ();
2071 }
2072
2073 bool compatible_p (const vsetvl_info &prev, const vsetvl_info &next)
2074 {
2075 bool compatible_p = sew_lmul_compatible_p (prev, next)
2076 && policy_compatible_p (prev, next)
d82bb518
JZ
2077 && avl_compatible_p (prev, next)
2078 && vl_not_in_conflict_p (prev, next);
29331e72
LD
2079 return compatible_p;
2080 }
2081
2082 bool available_p (const vsetvl_info &prev, const vsetvl_info &next)
2083 {
2084 bool available_p = sew_lmul_available_p (prev, next)
2085 && policy_available_p (prev, next)
d82bb518
JZ
2086 && avl_available_p (prev, next)
2087 && vl_not_in_conflict_p (prev, next);
29331e72
LD
2088 gcc_assert (!available_p || compatible_p (prev, next));
2089 return available_p;
2090 }
2091
2092 void merge (vsetvl_info &prev, const vsetvl_info &next)
2093 {
2094 gcc_assert (compatible_p (prev, next));
2095 merge_sew_lmul (prev, next);
2096 merge_policy (prev, next);
2097 merge_avl (prev, next);
2098 gcc_assert (available_p (prev, next));
2099 }
2100};
9243c3d1 2101
9243c3d1 2102
29331e72 2103class pre_vsetvl
9243c3d1 2104{
29331e72
LD
2105private:
2106 demand_system m_dem;
2107 auto_vec<vsetvl_block_info> m_vector_block_infos;
2108
d83070ae 2109 /* data for avl reaching definition. */
29331e72
LD
2110 sbitmap *m_reg_def_loc;
2111
d83070ae
KC
2112 /* data for vsetvl info reaching definition. */
2113 vsetvl_info m_unknown_info;
29331e72
LD
2114 auto_vec<vsetvl_info *> m_vsetvl_def_exprs;
2115 sbitmap *m_vsetvl_def_in;
2116 sbitmap *m_vsetvl_def_out;
2117
2118 /* data for lcm */
2119 auto_vec<vsetvl_info *> m_exprs;
2120 sbitmap *m_avloc;
2121 sbitmap *m_avin;
2122 sbitmap *m_avout;
2123 sbitmap *m_kill;
2124 sbitmap *m_antloc;
2125 sbitmap *m_transp;
2126 sbitmap *m_insert;
2127 sbitmap *m_del;
2128 struct edge_list *m_edges;
2129
2130 auto_vec<vsetvl_info> m_delete_list;
2131
2132 vsetvl_block_info &get_block_info (const bb_info *bb)
2133 {
2134 return m_vector_block_infos[bb->index ()];
2135 }
2136 const vsetvl_block_info &get_block_info (const basic_block bb) const
2137 {
2138 return m_vector_block_infos[bb->index];
2139 }
2140
2141 vsetvl_block_info &get_block_info (const basic_block bb)
2142 {
2143 return m_vector_block_infos[bb->index];
2144 }
2145
2146 void add_expr (auto_vec<vsetvl_info *> &m_exprs, vsetvl_info &info)
2147 {
2148 for (vsetvl_info *item : m_exprs)
2149 {
2150 if (*item == info)
2151 return;
2152 }
2153 m_exprs.safe_push (&info);
2154 }
2155
2156 unsigned get_expr_index (auto_vec<vsetvl_info *> &m_exprs,
2157 const vsetvl_info &info)
2158 {
2159 for (size_t i = 0; i < m_exprs.length (); i += 1)
2160 {
2161 if (*m_exprs[i] == info)
2162 return i;
2163 }
2164 gcc_unreachable ();
2165 }
2166
c9d5b46a 2167 bool anticipated_exp_p (const vsetvl_info &header_info)
29331e72
LD
2168 {
2169 if (!header_info.has_nonvlmax_reg_avl () && !header_info.has_vl ())
2170 return true;
9243c3d1 2171
29331e72
LD
2172 bb_info *bb = header_info.get_bb ();
2173 insn_info *prev_insn = bb->head_insn ();
2174 insn_info *next_insn = header_info.insn_inside_bb_p ()
2175 ? header_info.get_insn ()
2176 : header_info.get_bb ()->end_insn ();
2177
2178 return m_dem.avl_vl_unmodified_between_p (prev_insn, next_insn,
2179 header_info);
2180 }
2181
2182 bool available_exp_p (const vsetvl_info &prev_info,
2183 const vsetvl_info &next_info)
2184 {
2185 return m_dem.available_p (prev_info, next_info);
2186 }
2187
2188 void compute_probabilities ()
2189 {
2190 edge e;
2191 edge_iterator ei;
2192
2193 for (const bb_info *bb : crtl->ssa->bbs ())
2194 {
2195 basic_block cfg_bb = bb->cfg_bb ();
2196 auto &curr_prob = get_block_info (cfg_bb).probability;
2197
2198 /* GCC assume entry block (bb 0) are always so
2199 executed so set its probability as "always". */
2200 if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
2201 curr_prob = profile_probability::always ();
2202 /* Exit block (bb 1) is the block we don't need to process. */
2203 if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
2204 continue;
9243c3d1 2205
29331e72
LD
2206 gcc_assert (curr_prob.initialized_p ());
2207 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2208 {
2209 auto &new_prob = get_block_info (e->dest).probability;
2210 /* Normally, the edge probability should be initialized.
2211 However, some special testing code which is written in
d83070ae 2212 GIMPLE IR style force the edge probability uninitialized,
29331e72 2213 we conservatively set it as never so that it will not
d83070ae 2214 affect PRE (Phase 3 && Phase 4). */
29331e72
LD
2215 if (!e->probability.initialized_p ())
2216 new_prob = profile_probability::never ();
2217 else if (!new_prob.initialized_p ())
2218 new_prob = curr_prob * e->probability;
2219 else if (new_prob == profile_probability::always ())
2220 continue;
2221 else
2222 new_prob += curr_prob * e->probability;
2223 }
2224 }
2225 }
2226
2227 void insert_vsetvl_insn (enum emit_type emit_type, const vsetvl_info &info)
2228 {
2229 rtx pat = info.get_vsetvl_pat ();
2230 rtx_insn *rinsn = info.get_insn ()->rtl ();
2231
2232 if (emit_type == EMIT_DIRECT)
2233 {
2234 emit_insn (pat);
2235 if (dump_file)
2236 {
2237 fprintf (dump_file, " Insert vsetvl insn %d:\n",
2238 INSN_UID (get_last_insn ()));
2239 print_rtl_single (dump_file, get_last_insn ());
2240 }
2241 }
2242 else if (emit_type == EMIT_BEFORE)
2243 {
2244 emit_insn_before (pat, rinsn);
2245 if (dump_file)
2246 {
2247 fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
2248 INSN_UID (rinsn));
2249 print_rtl_single (dump_file, PREV_INSN (rinsn));
2250 }
2251 }
2252 else
2253 {
2254 emit_insn_after (pat, rinsn);
2255 if (dump_file)
2256 {
2257 fprintf (dump_file, " Insert vsetvl insn after insn %d:\n",
2258 INSN_UID (rinsn));
2259 print_rtl_single (dump_file, NEXT_INSN (rinsn));
2260 }
2261 }
2262 }
2263
2264 void change_vsetvl_insn (const vsetvl_info &info)
2265 {
2266 rtx_insn *rinsn = info.get_insn ()->rtl ();
2267 rtx new_pat = info.get_vsetvl_pat ();
2268
2269 if (dump_file)
2270 {
2271 fprintf (dump_file, " Change insn %d from:\n", INSN_UID (rinsn));
2272 print_rtl_single (dump_file, rinsn);
2273 }
2274
2275 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
2276
2277 if (dump_file)
2278 {
2279 fprintf (dump_file, "\n to:\n");
2280 print_rtl_single (dump_file, rinsn);
2281 }
2282 }
2283
d29136ad 2284 void remove_vsetvl_insn (rtx_insn *rinsn)
29331e72 2285 {
29331e72
LD
2286 if (dump_file)
2287 {
2288 fprintf (dump_file, " Eliminate insn %d:\n", INSN_UID (rinsn));
2289 print_rtl_single (dump_file, rinsn);
2290 }
2291 if (in_sequence_p ())
2292 remove_insn (rinsn);
2293 else
2294 delete_insn (rinsn);
2295 }
2296
2297 bool successors_probability_equal_p (const basic_block cfg_bb) const
2298 {
2299 edge e;
2300 edge_iterator ei;
2301 profile_probability prob = profile_probability::uninitialized ();
2302 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
2303 {
2304 if (prob == profile_probability::uninitialized ())
2305 prob = m_vector_block_infos[e->dest->index].probability;
2306 else if (prob == m_vector_block_infos[e->dest->index].probability)
2307 continue;
2308 else
2309 /* We pick the highest probability among those incompatible VSETVL
d83070ae 2310 infos. When all incompatible VSETVL infos have same probability, we
29331e72
LD
2311 don't pick any of them. */
2312 return false;
2313 }
ec99ffab 2314 return true;
29331e72
LD
2315 }
2316
e935c066
JZ
2317 bool has_compatible_reaching_vsetvl_p (vsetvl_info info)
2318 {
2319 unsigned int index;
2320 sbitmap_iterator sbi;
2321 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[info.get_bb ()->index ()], 0,
2322 index, sbi)
2323 {
2324 const auto prev_info = *m_vsetvl_def_exprs[index];
2325 if (!prev_info.valid_p ())
2326 continue;
2327 if (m_dem.compatible_p (prev_info, info))
2328 return true;
2329 }
2330 return false;
2331 }
2332
923a67f1 2333 bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
29331e72
LD
2334 {
2335 gcc_assert (
2336 !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
2337
2338 unsigned expr_index;
2339 sbitmap_iterator sbi;
2340 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[curr_info.get_bb ()->index ()], 0,
2341 expr_index, sbi)
2342 {
2343 const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
2344 if (!prev_info.valid_p ()
923a67f1
JZ
2345 || !m_dem.avl_available_p (prev_info, curr_info)
2346 || prev_info.get_ratio () != curr_info.get_ratio ())
29331e72
LD
2347 return false;
2348 }
005fad9d 2349
005fad9d 2350 return true;
29331e72 2351 }
005fad9d 2352
29331e72
LD
2353public:
2354 pre_vsetvl ()
3132d2d3 2355 : m_vsetvl_def_in (nullptr), m_vsetvl_def_out (nullptr), m_avloc (nullptr),
29331e72
LD
2356 m_avin (nullptr), m_avout (nullptr), m_kill (nullptr), m_antloc (nullptr),
2357 m_transp (nullptr), m_insert (nullptr), m_del (nullptr), m_edges (nullptr)
2358 {
2359 /* Initialization of RTL_SSA. */
2360 calculate_dominance_info (CDI_DOMINATORS);
4a0a8dc1
JZ
2361 loop_optimizer_init (LOOPS_NORMAL);
2362 /* Create FAKE edges for infinite loops. */
2363 connect_infinite_loops_to_exit ();
29331e72
LD
2364 df_analyze ();
2365 crtl->ssa = new function_info (cfun);
2366 m_vector_block_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2367 compute_probabilities ();
d83070ae 2368 m_unknown_info.set_unknown ();
29331e72
LD
2369 }
2370
2371 void finish ()
2372 {
2373 free_dominance_info (CDI_DOMINATORS);
4a0a8dc1 2374 loop_optimizer_finalize ();
29331e72
LD
2375 if (crtl->ssa->perform_pending_updates ())
2376 cleanup_cfg (0);
2377 delete crtl->ssa;
2378 crtl->ssa = nullptr;
2379
29331e72
LD
2380 if (m_reg_def_loc)
2381 sbitmap_vector_free (m_reg_def_loc);
2382
29331e72
LD
2383 if (m_vsetvl_def_in)
2384 sbitmap_vector_free (m_vsetvl_def_in);
2385 if (m_vsetvl_def_out)
2386 sbitmap_vector_free (m_vsetvl_def_out);
2387
2388 if (m_avloc)
2389 sbitmap_vector_free (m_avloc);
2390 if (m_kill)
2391 sbitmap_vector_free (m_kill);
2392 if (m_antloc)
2393 sbitmap_vector_free (m_antloc);
2394 if (m_transp)
2395 sbitmap_vector_free (m_transp);
2396 if (m_insert)
2397 sbitmap_vector_free (m_insert);
2398 if (m_del)
2399 sbitmap_vector_free (m_del);
2400 if (m_avin)
2401 sbitmap_vector_free (m_avin);
2402 if (m_avout)
2403 sbitmap_vector_free (m_avout);
2404
2405 if (m_edges)
2406 free_edge_list (m_edges);
2407 }
2408
29331e72 2409 void compute_vsetvl_def_data ();
9dd10de1 2410 void compute_transparent (const bb_info *);
29331e72
LD
2411 void compute_lcm_local_properties ();
2412
2413 void fuse_local_vsetvl_info ();
33408780 2414 bool earliest_fuse_vsetvl_info (int iter);
29331e72
LD
2415 void pre_global_vsetvl_info ();
2416 void emit_vsetvl ();
d83070ae 2417 void cleanup ();
29331e72
LD
2418 void remove_avl_operand ();
2419 void remove_unused_dest_operand ();
22622a5a 2420 void remove_vsetvl_pre_insns ();
29331e72
LD
2421
2422 void dump (FILE *file, const char *title) const
2423 {
2424 fprintf (file, "\nVSETVL infos after %s\n\n", title);
2425 for (const bb_info *bb : crtl->ssa->bbs ())
2426 {
2427 const auto &block_info = m_vector_block_infos[bb->index ()];
2428 fprintf (file, " bb %d:\n", bb->index ());
2429 fprintf (file, " probability: ");
2430 block_info.probability.dump (file);
2431 fprintf (file, "\n");
2432 if (!block_info.empty_p ())
2433 {
2434 fprintf (file, " Header vsetvl info:");
2435 block_info.get_entry_info ().dump (file, " ");
2436 fprintf (file, " Footer vsetvl info:");
2437 block_info.get_exit_info ().dump (file, " ");
4fd09aed 2438 for (const auto &info : block_info.local_infos)
29331e72
LD
2439 {
2440 fprintf (file,
2441 " insn %d vsetvl info:", info.get_insn ()->uid ());
2442 info.dump (file, " ");
2443 }
2444 }
2445 }
2446 }
2447};
c139f5e1 2448
9243c3d1 2449void
29331e72 2450pre_vsetvl::compute_vsetvl_def_data ()
9243c3d1 2451{
29331e72 2452 m_vsetvl_def_exprs.truncate (0);
d83070ae 2453 add_expr (m_vsetvl_def_exprs, m_unknown_info);
29331e72 2454 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2455 {
29331e72
LD
2456 vsetvl_block_info &block_info = get_block_info (bb);
2457 if (block_info.empty_p ())
2458 continue;
2459 vsetvl_info &footer_info = block_info.get_exit_info ();
2460 gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
2461 add_expr (m_vsetvl_def_exprs, footer_info);
9243c3d1
JZZ
2462 }
2463
29331e72
LD
2464 if (m_vsetvl_def_in)
2465 sbitmap_vector_free (m_vsetvl_def_in);
2466 if (m_vsetvl_def_out)
2467 sbitmap_vector_free (m_vsetvl_def_out);
9243c3d1 2468
29331e72
LD
2469 sbitmap *def_loc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2470 m_vsetvl_def_exprs.length ());
2471 sbitmap *m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2472 m_vsetvl_def_exprs.length ());
9243c3d1 2473
29331e72
LD
2474 m_vsetvl_def_in = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2475 m_vsetvl_def_exprs.length ());
2476 m_vsetvl_def_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2477 m_vsetvl_def_exprs.length ());
9243c3d1 2478
29331e72
LD
2479 bitmap_vector_clear (def_loc, last_basic_block_for_fn (cfun));
2480 bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun));
2481 bitmap_vector_clear (m_vsetvl_def_out, last_basic_block_for_fn (cfun));
9243c3d1 2482
29331e72
LD
2483 for (const bb_info *bb : crtl->ssa->bbs ())
2484 {
2485 vsetvl_block_info &block_info = get_block_info (bb);
2486 if (block_info.empty_p ())
9243c3d1 2487 {
29331e72 2488 for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i += 1)
9243c3d1 2489 {
9dd10de1
JZ
2490 auto *info = m_vsetvl_def_exprs[i];
2491 if (info->has_nonvlmax_reg_avl ()
2492 && bitmap_bit_p (m_reg_def_loc[bb->index ()],
2493 REGNO (info->get_avl ())))
2494 {
2495 bitmap_set_bit (m_kill[bb->index ()], i);
2496 bitmap_set_bit (def_loc[bb->index ()],
2497 get_expr_index (m_vsetvl_def_exprs,
d83070ae 2498 m_unknown_info));
9dd10de1 2499 }
9243c3d1 2500 }
29331e72 2501 continue;
9243c3d1
JZZ
2502 }
2503
29331e72
LD
2504 vsetvl_info &footer_info = block_info.get_exit_info ();
2505 bitmap_ones (m_kill[bb->index ()]);
2506 bitmap_set_bit (def_loc[bb->index ()],
2507 get_expr_index (m_vsetvl_def_exprs, footer_info));
9243c3d1
JZZ
2508 }
2509
d83070ae 2510 /* Set the def_out of the ENTRY basic block to m_unknown_info expr. */
29331e72
LD
2511 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2512 bitmap_set_bit (m_vsetvl_def_out[entry->index],
d83070ae 2513 get_expr_index (m_vsetvl_def_exprs, m_unknown_info));
9243c3d1 2514
29331e72
LD
2515 compute_reaching_defintion (def_loc, m_kill, m_vsetvl_def_in,
2516 m_vsetvl_def_out);
2517
2518 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e 2519 {
29331e72 2520 fprintf (dump_file,
d83070ae 2521 "\n Compute vsetvl info reaching definition data:\n\n");
29331e72
LD
2522 fprintf (dump_file, " Expression List (%d):\n",
2523 m_vsetvl_def_exprs.length ());
2524 for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i++)
2525 {
2526 const auto &info = *m_vsetvl_def_exprs[i];
2527 fprintf (dump_file, " Expr[%u]: ", i);
2528 info.dump (dump_file, " ");
2529 }
2530 fprintf (dump_file, "\n bitmap data:\n");
2531 for (const bb_info *bb : crtl->ssa->bbs ())
2532 {
2533 unsigned int i = bb->index ();
2534 fprintf (dump_file, " BB %u:\n", i);
2535 fprintf (dump_file, " def_loc: ");
2536 dump_bitmap_file (dump_file, def_loc[i]);
2537 fprintf (dump_file, " kill: ");
2538 dump_bitmap_file (dump_file, m_kill[i]);
2539 fprintf (dump_file, " vsetvl_def_in: ");
2540 dump_bitmap_file (dump_file, m_vsetvl_def_in[i]);
2541 fprintf (dump_file, " vsetvl_def_out: ");
2542 dump_bitmap_file (dump_file, m_vsetvl_def_out[i]);
2543 }
e030af3e 2544 }
4f673c5e 2545
29331e72
LD
2546 sbitmap_vector_free (def_loc);
2547 sbitmap_vector_free (m_kill);
e030af3e 2548}
9243c3d1 2549
9dd10de1
JZ
2550/* Subroutine of compute_lcm_local_properties which Compute local transparent
2551 BB. Note that the compile time is very sensitive to compute_transparent and
2552 compute_lcm_local_properties, any change of these 2 functions should be
2553 aware of the compile time changing of the program which has a large number of
2554 blocks, e.g SPEC 2017 wrf.
2555
2556 Current compile time profile of SPEC 2017 wrf:
2557
2558 1. scheduling - 27%
2559 2. machine dep reorg (VSETVL PASS) - 18%
2560
2561 VSETVL pass should not spend more time than scheduling in compilation. */
2562void
2563pre_vsetvl::compute_transparent (const bb_info *bb)
2564{
2565 int num_exprs = m_exprs.length ();
2566 unsigned bb_index = bb->index ();
2567 for (int i = 0; i < num_exprs; i++)
2568 {
2569 auto *info = m_exprs[i];
2570 if (info->has_nonvlmax_reg_avl ()
2571 && bitmap_bit_p (m_reg_def_loc[bb_index], REGNO (info->get_avl ())))
2572 bitmap_clear_bit (m_transp[bb_index], i);
2573 else if (info->has_vl ()
2574 && bitmap_bit_p (m_reg_def_loc[bb_index],
2575 REGNO (info->get_vl ())))
2576 bitmap_clear_bit (m_transp[bb_index], i);
2577 }
2578}
2579
e030af3e 2580/* Compute the local properties of each recorded expression.
6b6b9c68 2581
e030af3e
JZ
2582 Local properties are those that are defined by the block, irrespective of
2583 other blocks.
6b6b9c68 2584
e030af3e
JZ
2585 An expression is transparent in a block if its operands are not modified
2586 in the block.
6b6b9c68 2587
e030af3e
JZ
2588 An expression is computed (locally available) in a block if it is computed
2589 at least once and expression would contain the same value if the
2590 computation was moved to the end of the block.
2591
2592 An expression is locally anticipatable in a block if it is computed at
2593 least once and expression would contain the same value if the computation
2594 was moved to the beginning of the block. */
2595void
29331e72 2596pre_vsetvl::compute_lcm_local_properties ()
6b6b9c68 2597{
29331e72
LD
2598 m_exprs.truncate (0);
2599 for (const bb_info *bb : crtl->ssa->bbs ())
2600 {
2601 vsetvl_block_info &block_info = get_block_info (bb);
2602 if (block_info.empty_p ())
2603 continue;
2604 vsetvl_info &header_info = block_info.get_entry_info ();
2605 vsetvl_info &footer_info = block_info.get_exit_info ();
2606 gcc_assert (footer_info.valid_p () || footer_info.unknown_p ());
d40b3c1e
JZ
2607 if (header_info.valid_p ())
2608 add_expr (m_exprs, header_info);
2609 if (footer_info.valid_p ())
2610 add_expr (m_exprs, footer_info);
29331e72
LD
2611 }
2612
2613 int num_exprs = m_exprs.length ();
2614 if (m_avloc)
2615 sbitmap_vector_free (m_avloc);
2616 if (m_kill)
2617 sbitmap_vector_free (m_kill);
2618 if (m_antloc)
2619 sbitmap_vector_free (m_antloc);
2620 if (m_transp)
2621 sbitmap_vector_free (m_transp);
2622 if (m_avin)
2623 sbitmap_vector_free (m_avin);
2624 if (m_avout)
2625 sbitmap_vector_free (m_avout);
2626
2627 m_avloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2628 m_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2629 m_antloc = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2630 m_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2631 m_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2632 m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2633
2634 bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun));
2635 bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun));
9dd10de1 2636 bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun));
29331e72 2637
e030af3e
JZ
2638 /* - If T is locally available at the end of a block, then T' must be
2639 available at the end of the same block. Since some optimization has
2640 occurred earlier, T' might not be locally available, however, it must
2641 have been previously computed on all paths. As a formula, T at AVLOC(B)
2642 implies that T' at AVOUT(B).
2643 An "available occurrence" is one that is the last occurrence in the
2644 basic block and the operands are not modified by following statements in
2645 the basic block [including this insn].
6b6b9c68 2646
e030af3e
JZ
2647 - If T is locally anticipated at the beginning of a block, then either
2648 T', is locally anticipated or it is already available from previous
2649 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
2650 ANTLOC(B) at AVIN(B).
2651 An "anticipatable occurrence" is one that is the first occurrence in the
2652 basic block, the operands are not modified in the basic block prior
2653 to the occurrence and the output is not used between the start of
2654 the block and the occurrence. */
e030af3e 2655 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 2656 {
29331e72
LD
2657 unsigned bb_index = bb->index ();
2658 vsetvl_block_info &block_info = get_block_info (bb);
9243c3d1 2659
29331e72
LD
2660 /* Compute m_transp */
2661 if (block_info.empty_p ())
9dd10de1
JZ
2662 compute_transparent (bb);
2663 else
9243c3d1 2664 {
9dd10de1
JZ
2665 bitmap_clear (m_transp[bb_index]);
2666 vsetvl_info &header_info = block_info.get_entry_info ();
2667 vsetvl_info &footer_info = block_info.get_exit_info ();
29331e72 2668
9dd10de1
JZ
2669 if (header_info.valid_p () && anticipated_exp_p (header_info))
2670 bitmap_set_bit (m_antloc[bb_index],
2671 get_expr_index (m_exprs, header_info));
9243c3d1 2672
9dd10de1
JZ
2673 if (footer_info.valid_p ())
2674 for (int i = 0; i < num_exprs; i += 1)
2675 {
2676 const vsetvl_info &info = *m_exprs[i];
2677 if (!info.valid_p ())
2678 continue;
2679 if (available_exp_p (footer_info, info))
2680 bitmap_set_bit (m_avloc[bb_index], i);
2681 }
9243c3d1 2682 }
e030af3e 2683
4a0a8dc1
JZ
2684 if (invalid_opt_bb_p (bb->cfg_bb ()))
2685 {
2686 bitmap_clear (m_antloc[bb_index]);
2687 bitmap_clear (m_transp[bb_index]);
2688 }
9dd10de1 2689
d40b3c1e
JZ
2690 /* Compute ae_kill for each basic block using:
2691
2692 ~(TRANSP | COMP)
2693 */
2694 bitmap_ior (m_kill[bb_index], m_transp[bb_index], m_avloc[bb_index]);
2695 bitmap_not (m_kill[bb_index], m_kill[bb_index]);
9243c3d1
JZZ
2696 }
2697}
2698
29331e72
LD
2699void
2700pre_vsetvl::fuse_local_vsetvl_info ()
e030af3e 2701{
29331e72
LD
2702 m_reg_def_loc
2703 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), GP_REG_LAST + 1);
2704 bitmap_vector_clear (m_reg_def_loc, last_basic_block_for_fn (cfun));
2705 bitmap_ones (m_reg_def_loc[ENTRY_BLOCK_PTR_FOR_FN (cfun)->index]);
2706
2707 for (bb_info *bb : crtl->ssa->bbs ())
e030af3e 2708 {
29331e72 2709 auto &block_info = get_block_info (bb);
4fd09aed 2710 block_info.bb = bb;
29331e72 2711 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e 2712 {
29331e72
LD
2713 fprintf (dump_file, " Try fuse basic block %d\n", bb->index ());
2714 }
2715 auto_vec<vsetvl_info> infos;
2716 for (insn_info *insn : bb->real_nondebug_insns ())
2717 {
2718 vsetvl_info curr_info = vsetvl_info (insn);
2719 if (curr_info.valid_p () || curr_info.unknown_p ())
2720 infos.safe_push (curr_info);
2721
2722 /* Collecting GP registers modified by the current bb. */
2723 if (insn->is_real ())
2724 for (def_info *def : insn->defs ())
2725 if (def->is_reg () && GP_REG_P (def->regno ()))
2726 bitmap_set_bit (m_reg_def_loc[bb->index ()], def->regno ());
2727 }
e030af3e 2728
29331e72
LD
2729 vsetvl_info prev_info = vsetvl_info ();
2730 prev_info.set_empty ();
2731 for (auto &curr_info : infos)
2732 {
2733 if (prev_info.empty_p ())
2734 prev_info = curr_info;
2735 else if ((curr_info.unknown_p () && prev_info.valid_p ())
2736 || (curr_info.valid_p () && prev_info.unknown_p ()))
2737 {
4fd09aed 2738 block_info.local_infos.safe_push (prev_info);
29331e72
LD
2739 prev_info = curr_info;
2740 }
2741 else if (curr_info.valid_p () && prev_info.valid_p ())
2742 {
2743 if (m_dem.available_p (prev_info, curr_info))
e7b585a4 2744 {
29331e72 2745 if (dump_file && (dump_flags & TDF_DETAILS))
e7b585a4 2746 {
29331e72
LD
2747 fprintf (dump_file,
2748 " Ignore curr info since prev info "
2749 "available with it:\n");
2750 fprintf (dump_file, " prev_info: ");
2751 prev_info.dump (dump_file, " ");
2752 fprintf (dump_file, " curr_info: ");
2753 curr_info.dump (dump_file, " ");
2754 fprintf (dump_file, "\n");
e7b585a4 2755 }
6cf47447
JZ
2756 /* Even though prev_info is available with curr_info,
2757 we need to update the MAX_SEW of prev_info since
2758 we don't check MAX_SEW in available_p check.
2759
2760 prev_info:
2761 Demand fields: demand_ratio_and_ge_sew demand_avl
2762 SEW=16, VLMUL=mf4, RATIO=64, MAX_SEW=64
2763
2764 curr_info:
2765 Demand fields: demand_ge_sew demand_non_zero_avl
2766 SEW=16, VLMUL=m1, RATIO=16, MAX_SEW=32
2767
2768 In the example above, prev_info is available with
2769 curr_info, we need to update prev_info MAX_SEW from
2770 64 into 32. */
2771 prev_info.set_max_sew (
2772 MIN (prev_info.get_max_sew (), curr_info.get_max_sew ()));
4cd4c34a 2773 if (!curr_info.vl_used_by_non_rvv_insn_p ()
29331e72
LD
2774 && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
2775 m_delete_list.safe_push (curr_info);
e030af3e 2776
29331e72
LD
2777 if (curr_info.get_read_vl_insn ())
2778 prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
e030af3e 2779 }
29331e72 2780 else if (m_dem.compatible_p (prev_info, curr_info))
e030af3e 2781 {
29331e72 2782 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e 2783 {
29331e72
LD
2784 fprintf (dump_file, " Fuse curr info since prev info "
2785 "compatible with it:\n");
2786 fprintf (dump_file, " prev_info: ");
2787 prev_info.dump (dump_file, " ");
2788 fprintf (dump_file, " curr_info: ");
2789 curr_info.dump (dump_file, " ");
e030af3e 2790 }
29331e72
LD
2791 m_dem.merge (prev_info, curr_info);
2792 if (curr_info.get_read_vl_insn ())
2793 prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
2794 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e 2795 {
29331e72
LD
2796 fprintf (dump_file, " prev_info after fused: ");
2797 prev_info.dump (dump_file, " ");
2798 fprintf (dump_file, "\n");
e030af3e 2799 }
e030af3e
JZ
2800 }
2801 else
2802 {
29331e72
LD
2803 if (dump_file && (dump_flags & TDF_DETAILS))
2804 {
2805 fprintf (dump_file,
d83070ae 2806 " Cannot fuse incompatible infos:\n");
29331e72
LD
2807 fprintf (dump_file, " prev_info: ");
2808 prev_info.dump (dump_file, " ");
2809 fprintf (dump_file, " curr_info: ");
2810 curr_info.dump (dump_file, " ");
2811 }
4fd09aed 2812 block_info.local_infos.safe_push (prev_info);
29331e72 2813 prev_info = curr_info;
e030af3e
JZ
2814 }
2815 }
2816 }
29331e72
LD
2817
2818 if (prev_info.valid_p () || prev_info.unknown_p ())
4fd09aed 2819 block_info.local_infos.safe_push (prev_info);
e030af3e 2820 }
e030af3e
JZ
2821}
2822
29331e72 2823
9243c3d1 2824bool
33408780 2825pre_vsetvl::earliest_fuse_vsetvl_info (int iter)
9243c3d1 2826{
29331e72
LD
2827 compute_vsetvl_def_data ();
2828 compute_lcm_local_properties ();
9243c3d1 2829
29331e72
LD
2830 unsigned num_exprs = m_exprs.length ();
2831 struct edge_list *m_edges = create_edge_list ();
2832 unsigned num_edges = NUM_EDGES (m_edges);
2833 sbitmap *antin
2834 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
2835 sbitmap *antout
2836 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
005fad9d 2837
29331e72 2838 sbitmap *earliest = sbitmap_vector_alloc (num_edges, num_exprs);
9243c3d1 2839
29331e72
LD
2840 compute_available (m_avloc, m_kill, m_avout, m_avin);
2841 compute_antinout_edge (m_antloc, m_transp, antin, antout);
2842 compute_earliest (m_edges, num_exprs, antin, antout, m_avout, m_kill,
2843 earliest);
2844
2845 if (dump_file && (dump_flags & TDF_DETAILS))
9243c3d1 2846 {
33408780
VG
2847 fprintf (dump_file, "\n Compute LCM earliest insert data (lift %d):\n\n",
2848 iter);
29331e72
LD
2849 fprintf (dump_file, " Expression List (%u):\n", num_exprs);
2850 for (unsigned i = 0; i < num_exprs; i++)
9243c3d1 2851 {
29331e72
LD
2852 const auto &info = *m_exprs[i];
2853 fprintf (dump_file, " Expr[%u]: ", i);
2854 info.dump (dump_file, " ");
9243c3d1 2855 }
29331e72
LD
2856 fprintf (dump_file, "\n bitmap data:\n");
2857 for (const bb_info *bb : crtl->ssa->bbs ())
2858 {
2859 unsigned int i = bb->index ();
2860 fprintf (dump_file, " BB %u:\n", i);
2861 fprintf (dump_file, " avloc: ");
2862 dump_bitmap_file (dump_file, m_avloc[i]);
2863 fprintf (dump_file, " kill: ");
2864 dump_bitmap_file (dump_file, m_kill[i]);
2865 fprintf (dump_file, " antloc: ");
2866 dump_bitmap_file (dump_file, m_antloc[i]);
2867 fprintf (dump_file, " transp: ");
2868 dump_bitmap_file (dump_file, m_transp[i]);
2869
2870 fprintf (dump_file, " avin: ");
2871 dump_bitmap_file (dump_file, m_avin[i]);
2872 fprintf (dump_file, " avout: ");
2873 dump_bitmap_file (dump_file, m_avout[i]);
2874 fprintf (dump_file, " antin: ");
2875 dump_bitmap_file (dump_file, antin[i]);
2876 fprintf (dump_file, " antout: ");
2877 dump_bitmap_file (dump_file, antout[i]);
2878 }
2879 fprintf (dump_file, "\n");
2880 fprintf (dump_file, " earliest:\n");
2881 for (unsigned ed = 0; ed < num_edges; ed++)
2882 {
2883 edge eg = INDEX_EDGE (m_edges, ed);
9243c3d1 2884
29331e72
LD
2885 if (bitmap_empty_p (earliest[ed]))
2886 continue;
2887 fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
2888 eg->dest->index);
2889 dump_bitmap_file (dump_file, earliest[ed]);
2890 }
2891 fprintf (dump_file, "\n");
2892 }
9243c3d1 2893
29331e72 2894 if (dump_file && (dump_flags & TDF_DETAILS))
9243c3d1 2895 {
33408780 2896 fprintf (dump_file, " Fused global info result (lift %d):\n", iter);
29331e72 2897 }
9243c3d1 2898
29331e72
LD
2899 bool changed = false;
2900 for (unsigned ed = 0; ed < num_edges; ed++)
2901 {
2902 sbitmap e = earliest[ed];
2903 if (bitmap_empty_p (e))
9243c3d1
JZZ
2904 continue;
2905
29331e72
LD
2906 unsigned int expr_index;
2907 sbitmap_iterator sbi;
2908 EXECUTE_IF_SET_IN_BITMAP (e, 0, expr_index, sbi)
ec99ffab 2909 {
29331e72 2910 vsetvl_info &curr_info = *m_exprs[expr_index];
29331e72 2911 edge eg = INDEX_EDGE (m_edges, ed);
29331e72
LD
2912 vsetvl_block_info &src_block_info = get_block_info (eg->src);
2913 vsetvl_block_info &dest_block_info = get_block_info (eg->dest);
ff8f9544 2914
bf23a62e
JZ
2915 if (!curr_info.valid_p ()
2916 || eg->probability == profile_probability::never ()
2917 || src_block_info.probability
2918 == profile_probability::uninitialized ()
2919 /* When multiple set bits in earliest edge, such edge may
2920 have infinite loop in preds or succs or multiple conflict
2921 vsetvl expression which make such edge is unrelated. We
2922 don't perform fusion for such situation. */
2923 || bitmap_count_bits (e) != 1)
ff8f9544 2924 continue;
9243c3d1 2925
29331e72 2926 if (src_block_info.empty_p ())
9243c3d1 2927 {
29331e72
LD
2928 vsetvl_info new_curr_info = curr_info;
2929 new_curr_info.set_bb (crtl->ssa->bb (eg->dest));
e935c066
JZ
2930 bool has_compatible_p
2931 = has_compatible_reaching_vsetvl_p (new_curr_info);
29331e72 2932 if (!has_compatible_p)
9243c3d1 2933 {
29331e72
LD
2934 if (dump_file && (dump_flags & TDF_DETAILS))
2935 {
2936 fprintf (dump_file,
2937 " Forbidden lift up vsetvl info into bb %u "
2938 "since there is no vsetvl info that reaching in "
2939 "is compatible with it:",
2940 eg->src->index);
2941 curr_info.dump (dump_file, " ");
2942 }
2943 continue;
9243c3d1
JZZ
2944 }
2945
29331e72 2946 if (dump_file && (dump_flags & TDF_DETAILS))
e030af3e
JZ
2947 {
2948 fprintf (dump_file,
29331e72
LD
2949 " Set empty bb %u to info:", eg->src->index);
2950 curr_info.dump (dump_file, " ");
e030af3e 2951 }
29331e72
LD
2952 src_block_info.set_info (curr_info);
2953 src_block_info.probability = dest_block_info.probability;
2954 changed = true;
9243c3d1 2955 }
29331e72
LD
2956 else if (src_block_info.has_info ())
2957 {
2958 vsetvl_info &prev_info = src_block_info.get_exit_info ();
2959 gcc_assert (prev_info.valid_p ());
2960
2961 if (m_dem.compatible_p (prev_info, curr_info))
2962 {
2963 if (dump_file && (dump_flags & TDF_DETAILS))
2964 {
2965 fprintf (dump_file, " Fuse curr info since prev info "
2966 "compatible with it:\n");
2967 fprintf (dump_file, " prev_info: ");
2968 prev_info.dump (dump_file, " ");
2969 fprintf (dump_file, " curr_info: ");
2970 curr_info.dump (dump_file, " ");
2971 }
2972 m_dem.merge (prev_info, curr_info);
2973 if (dump_file && (dump_flags & TDF_DETAILS))
2974 {
2975 fprintf (dump_file, " prev_info after fused: ");
2976 prev_info.dump (dump_file, " ");
2977 fprintf (dump_file, "\n");
2978 }
2979 changed = true;
2980 if (src_block_info.has_info ())
2981 src_block_info.probability += dest_block_info.probability;
2982 }
33408780 2983 else
29331e72
LD
2984 {
2985 /* Cancel lift up if probabilities are equal. */
e935c066
JZ
2986 if (successors_probability_equal_p (eg->src)
2987 || (dest_block_info.probability
2988 > src_block_info.probability
2989 && !has_compatible_reaching_vsetvl_p (curr_info)))
29331e72
LD
2990 {
2991 if (dump_file && (dump_flags & TDF_DETAILS))
2992 {
2993 fprintf (dump_file,
33408780 2994 " Reset bb %u:",
29331e72
LD
2995 eg->src->index);
2996 prev_info.dump (dump_file, " ");
e935c066
JZ
2997 fprintf (dump_file, " due to (same probability or no "
2998 "compatible reaching):");
29331e72
LD
2999 curr_info.dump (dump_file, " ");
3000 }
3001 src_block_info.set_empty_info ();
3002 src_block_info.probability
3003 = profile_probability::uninitialized ();
60820248
JZ
3004 /* See PR113696, we should reset immediate dominator to
3005 empty since we may uplift ineffective vsetvl which
3006 locate at low probability block. */
3007 basic_block dom
3008 = get_immediate_dominator (CDI_DOMINATORS, eg->src);
3009 auto &dom_block_info = get_block_info (dom);
3010 if (dom_block_info.has_info ()
3011 && !m_dem.compatible_p (
3012 dom_block_info.get_exit_info (), curr_info))
3013 {
3014 dom_block_info.set_empty_info ();
3015 dom_block_info.probability
3016 = profile_probability::uninitialized ();
3017 if (dump_file && (dump_flags & TDF_DETAILS))
3018 {
3019 fprintf (dump_file,
3020 " Reset dominator bb %u:",
3021 dom->index);
3022 prev_info.dump (dump_file, " ");
3023 fprintf (dump_file,
3024 " due to (same probability or no "
3025 "compatible reaching):");
3026 curr_info.dump (dump_file, " ");
3027 }
3028 }
29331e72
LD
3029 changed = true;
3030 }
3031 /* Choose the one with higher probability. */
3032 else if (dest_block_info.probability
3033 > src_block_info.probability)
3034 {
3035 if (dump_file && (dump_flags & TDF_DETAILS))
3036 {
3037 fprintf (dump_file,
33408780 3038 " Change bb %u from:",
29331e72
LD
3039 eg->src->index);
3040 prev_info.dump (dump_file, " ");
3041 fprintf (dump_file,
3042 " to (higher probability):");
3043 curr_info.dump (dump_file, " ");
3044 }
3045 src_block_info.set_info (curr_info);
3046 src_block_info.probability = dest_block_info.probability;
3047 changed = true;
3048 }
3049 }
3050 }
3051 else
e030af3e 3052 {
29331e72
LD
3053 vsetvl_info &prev_info = src_block_info.get_exit_info ();
3054 if (!prev_info.valid_p ()
bf23a62e
JZ
3055 || m_dem.available_p (prev_info, curr_info)
3056 || !m_dem.compatible_p (prev_info, curr_info))
29331e72
LD
3057 continue;
3058
bf23a62e 3059 if (dump_file && (dump_flags & TDF_DETAILS))
29331e72 3060 {
bf23a62e
JZ
3061 fprintf (dump_file, " Fuse curr info since prev info "
3062 "compatible with it:\n");
3063 fprintf (dump_file, " prev_info: ");
3064 prev_info.dump (dump_file, " ");
3065 fprintf (dump_file, " curr_info: ");
3066 curr_info.dump (dump_file, " ");
3067 }
3068 m_dem.merge (prev_info, curr_info);
3069 if (dump_file && (dump_flags & TDF_DETAILS))
3070 {
3071 fprintf (dump_file, " prev_info after fused: ");
3072 prev_info.dump (dump_file, " ");
3073 fprintf (dump_file, "\n");
29331e72 3074 }
bf23a62e 3075 changed = true;
e030af3e 3076 }
9243c3d1
JZZ
3077 }
3078 }
3079
0d50facd 3080 if (dump_file && (dump_flags & TDF_DETAILS))
c919d059 3081 {
29331e72 3082 fprintf (dump_file, "\n");
c919d059 3083 }
c919d059 3084
29331e72
LD
3085 sbitmap_vector_free (antin);
3086 sbitmap_vector_free (antout);
3087 sbitmap_vector_free (earliest);
3088 free_edge_list (m_edges);
c919d059 3089
29331e72 3090 return changed;
c919d059
KC
3091}
3092
8421f279 3093void
29331e72 3094pre_vsetvl::pre_global_vsetvl_info ()
c919d059 3095{
29331e72
LD
3096 compute_vsetvl_def_data ();
3097 compute_lcm_local_properties ();
c919d059 3098
29331e72
LD
3099 unsigned num_exprs = m_exprs.length ();
3100 m_edges = pre_edge_lcm_avs (num_exprs, m_transp, m_avloc, m_antloc, m_kill,
3101 m_avin, m_avout, &m_insert, &m_del);
3102 unsigned num_edges = NUM_EDGES (m_edges);
c919d059 3103
29331e72
LD
3104 if (dump_file && (dump_flags & TDF_DETAILS))
3105 {
3106 fprintf (dump_file, "\n Compute LCM insert and delete data:\n\n");
3107 fprintf (dump_file, " Expression List (%u):\n", num_exprs);
3108 for (unsigned i = 0; i < num_exprs; i++)
c919d059 3109 {
29331e72
LD
3110 const auto &info = *m_exprs[i];
3111 fprintf (dump_file, " Expr[%u]: ", i);
3112 info.dump (dump_file, " ");
c919d059 3113 }
29331e72
LD
3114 fprintf (dump_file, "\n bitmap data:\n");
3115 for (const bb_info *bb : crtl->ssa->bbs ())
c919d059 3116 {
29331e72
LD
3117 unsigned i = bb->index ();
3118 fprintf (dump_file, " BB %u:\n", i);
3119 fprintf (dump_file, " avloc: ");
3120 dump_bitmap_file (dump_file, m_avloc[i]);
3121 fprintf (dump_file, " kill: ");
3122 dump_bitmap_file (dump_file, m_kill[i]);
3123 fprintf (dump_file, " antloc: ");
3124 dump_bitmap_file (dump_file, m_antloc[i]);
3125 fprintf (dump_file, " transp: ");
3126 dump_bitmap_file (dump_file, m_transp[i]);
3127
3128 fprintf (dump_file, " avin: ");
3129 dump_bitmap_file (dump_file, m_avin[i]);
3130 fprintf (dump_file, " avout: ");
3131 dump_bitmap_file (dump_file, m_avout[i]);
3132 fprintf (dump_file, " del: ");
3133 dump_bitmap_file (dump_file, m_del[i]);
c919d059 3134 }
29331e72
LD
3135 fprintf (dump_file, "\n");
3136 fprintf (dump_file, " insert:\n");
3137 for (unsigned ed = 0; ed < num_edges; ed++)
8421f279 3138 {
29331e72 3139 edge eg = INDEX_EDGE (m_edges, ed);
c919d059 3140
29331e72
LD
3141 if (bitmap_empty_p (m_insert[ed]))
3142 continue;
3143 fprintf (dump_file, " Edge(bb %u -> bb %u): ", eg->src->index,
3144 eg->dest->index);
3145 dump_bitmap_file (dump_file, m_insert[ed]);
c919d059 3146 }
29331e72
LD
3147 }
3148
3149 /* Remove vsetvl infos as LCM suggest */
3150 for (const bb_info *bb : crtl->ssa->bbs ())
3151 {
3152 sbitmap d = m_del[bb->index ()];
3153 if (bitmap_count_bits (d) == 0)
c919d059 3154 continue;
29331e72
LD
3155 gcc_assert (bitmap_count_bits (d) == 1);
3156 unsigned expr_index = bitmap_first_set_bit (d);
3157 vsetvl_info &info = *m_exprs[expr_index];
3158 gcc_assert (info.valid_p ());
3159 gcc_assert (info.get_bb () == bb);
3160 const vsetvl_block_info &block_info = get_block_info (info.get_bb ());
3161 gcc_assert (block_info.get_entry_info () == info);
3162 info.set_delete ();
c6c2a1d7
JZ
3163 if (dump_file && (dump_flags & TDF_DETAILS))
3164 {
3165 fprintf (dump_file,
3166 "\nLCM deleting vsetvl of block %d, it has predecessors: \n",
3167 bb->index ());
3168 hash_set<basic_block> all_preds
3169 = get_all_predecessors (bb->cfg_bb ());
3170 int i = 0;
3171 for (const auto pred : all_preds)
3172 {
3173 fprintf (dump_file, "%d ", pred->index);
3174 i++;
3175 if (i % 32 == 0)
3176 fprintf (dump_file, "\n");
3177 }
3178 fprintf (dump_file, "\n");
3179 }
29331e72 3180 }
c919d059 3181
d83070ae 3182 /* Remove vsetvl infos if all predecessors are available to the block. */
ef21ae5c
JZ
3183 for (const bb_info *bb : crtl->ssa->bbs ())
3184 {
3185 vsetvl_block_info &block_info = get_block_info (bb);
5ee45f5e
JZ
3186 if (block_info.empty_p ())
3187 continue;
3188 vsetvl_info &curr_info = block_info.get_entry_info ();
3189 if (!curr_info.valid_p ())
ef21ae5c
JZ
3190 continue;
3191
5ee45f5e
JZ
3192 unsigned int expr_index;
3193 sbitmap_iterator sbi;
3194 gcc_assert (
3195 !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
3196 bool full_available = true;
3197 EXECUTE_IF_SET_IN_BITMAP (m_vsetvl_def_in[bb->index ()], 0, expr_index,
3198 sbi)
3199 {
3200 vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
3201 if (!prev_info.valid_p ()
3202 || !m_dem.available_p (prev_info, curr_info))
3203 {
3204 full_available = false;
3205 break;
3206 }
3207 }
3208 if (full_available)
3209 curr_info.set_delete ();
ef21ae5c
JZ
3210 }
3211
29331e72
LD
3212 for (const bb_info *bb : crtl->ssa->bbs ())
3213 {
3214 vsetvl_block_info &block_info = get_block_info (bb);
3215 if (block_info.empty_p ())
3216 continue;
3217 vsetvl_info &curr_info = block_info.get_entry_info ();
3218 if (curr_info.delete_p ())
c919d059 3219 {
4fd09aed 3220 if (block_info.local_infos.is_empty ())
29331e72 3221 continue;
4fd09aed 3222 curr_info = block_info.local_infos[0];
c919d059 3223 }
4cd4c34a 3224 if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
923a67f1 3225 && preds_all_same_avl_and_ratio_p (curr_info))
29331e72 3226 curr_info.set_change_vtype_only ();
c919d059 3227
29331e72
LD
3228 vsetvl_info prev_info = vsetvl_info ();
3229 prev_info.set_empty ();
4fd09aed 3230 for (auto &curr_info : block_info.local_infos)
c919d059 3231 {
29331e72 3232 if (prev_info.valid_p () && curr_info.valid_p ()
923a67f1
JZ
3233 && m_dem.avl_available_p (prev_info, curr_info)
3234 && prev_info.get_ratio () == curr_info.get_ratio ())
29331e72
LD
3235 curr_info.set_change_vtype_only ();
3236 prev_info = curr_info;
c919d059 3237 }
20c85207 3238 }
20c85207
JZ
3239}
3240
29331e72
LD
3241void
3242pre_vsetvl::emit_vsetvl ()
20c85207 3243{
29331e72 3244 bool need_commit = false;
20c85207 3245
4a0a8dc1
JZ
3246 /* Fake edge is created by connect infinite loops to exit function.
3247 We should commit vsetvl edge after fake edges removes, otherwise,
3248 it will cause ICE. */
3249 remove_fake_exit_edges ();
29331e72 3250 for (const bb_info *bb : crtl->ssa->bbs ())
20c85207 3251 {
4fd09aed 3252 for (const auto &curr_info : get_block_info (bb).local_infos)
29331e72
LD
3253 {
3254 insn_info *insn = curr_info.get_insn ();
3255 if (curr_info.delete_p ())
3256 {
3257 if (vsetvl_insn_p (insn->rtl ()))
d29136ad 3258 remove_vsetvl_insn (curr_info.get_insn ()->rtl ());
29331e72
LD
3259 continue;
3260 }
3261 else if (curr_info.valid_p ())
3262 {
3263 if (vsetvl_insn_p (insn->rtl ()))
3264 {
3265 const vsetvl_info temp = vsetvl_info (insn);
3266 if (!(curr_info == temp))
3267 {
3268 if (dump_file)
3269 {
3270 fprintf (dump_file, "\n Change vsetvl info from: ");
3271 temp.dump (dump_file, " ");
3272 fprintf (dump_file, " to: ");
3273 curr_info.dump (dump_file, " ");
3274 }
3275 change_vsetvl_insn (curr_info);
3276 }
3277 }
3278 else
3279 {
3280 if (dump_file)
3281 {
3282 fprintf (dump_file,
3283 "\n Insert vsetvl info before insn %d: ",
3284 insn->uid ());
3285 curr_info.dump (dump_file, " ");
3286 }
3287 insert_vsetvl_insn (EMIT_BEFORE, curr_info);
3288 }
3289 }
3290 }
20c85207 3291 }
20c85207 3292
29331e72 3293 for (const vsetvl_info &item : m_delete_list)
20c85207 3294 {
29331e72 3295 gcc_assert (vsetvl_insn_p (item.get_insn ()->rtl ()));
d29136ad 3296 remove_vsetvl_insn (item.get_insn ()->rtl ());
20c85207
JZ
3297 }
3298
d1189cee
JZ
3299 /* Insert vsetvl info that was not deleted after lift up. */
3300 for (const bb_info *bb : crtl->ssa->bbs ())
3301 {
3302 const vsetvl_block_info &block_info = get_block_info (bb);
3303 if (!block_info.has_info ())
3304 continue;
3305
3306 const vsetvl_info &footer_info = block_info.get_exit_info ();
3307
3308 if (footer_info.delete_p ())
3309 continue;
3310
3311 edge eg;
3312 edge_iterator eg_iterator;
3313 FOR_EACH_EDGE (eg, eg_iterator, bb->cfg_bb ()->succs)
3314 {
3315 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3316 if (dump_file)
3317 {
3318 fprintf (
3319 dump_file,
3320 "\n Insert missed vsetvl info at edge(bb %u -> bb %u): ",
3321 eg->src->index, eg->dest->index);
3322 footer_info.dump (dump_file, " ");
3323 }
3324 start_sequence ();
3325 insert_vsetvl_insn (EMIT_DIRECT, footer_info);
3326 rtx_insn *rinsn = get_insns ();
3327 end_sequence ();
3328 default_rtl_profile ();
3329 insert_insn_on_edge (rinsn, eg);
3330 need_commit = true;
3331 }
3332 }
3333
29331e72
LD
3334 /* m_insert vsetvl as LCM suggest. */
3335 for (int ed = 0; ed < NUM_EDGES (m_edges); ed++)
20c85207 3336 {
29331e72
LD
3337 edge eg = INDEX_EDGE (m_edges, ed);
3338 sbitmap i = m_insert[ed];
bf23a62e 3339 if (bitmap_count_bits (i) != 1)
29331e72
LD
3340 /* For code with infinite loop (e.g. pr61634.c), The data flow is
3341 completely wrong. */
3342 continue;
3343
29331e72
LD
3344 unsigned expr_index = bitmap_first_set_bit (i);
3345 const vsetvl_info &info = *m_exprs[expr_index];
3346 gcc_assert (info.valid_p ());
3347 if (dump_file)
20c85207 3348 {
29331e72
LD
3349 fprintf (dump_file,
3350 "\n Insert vsetvl info at edge(bb %u -> bb %u): ",
3351 eg->src->index, eg->dest->index);
3352 info.dump (dump_file, " ");
20c85207 3353 }
29331e72
LD
3354 rtl_profile_for_edge (eg);
3355 start_sequence ();
3356
3357 insert_vsetvl_insn (EMIT_DIRECT, info);
3358 rtx_insn *rinsn = get_insns ();
3359 end_sequence ();
3360 default_rtl_profile ();
3361
3362 /* We should not get an abnormal edge here. */
3363 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3364 need_commit = true;
3365 insert_insn_on_edge (rinsn, eg);
20c85207
JZ
3366 }
3367
29331e72
LD
3368 if (need_commit)
3369 commit_edge_insertions ();
20c85207
JZ
3370}
3371
9243c3d1 3372void
d83070ae 3373pre_vsetvl::cleanup ()
9243c3d1 3374{
29331e72
LD
3375 remove_avl_operand ();
3376 remove_unused_dest_operand ();
22622a5a 3377 remove_vsetvl_pre_insns ();
29331e72 3378}
9243c3d1 3379
29331e72
LD
3380void
3381pre_vsetvl::remove_avl_operand ()
3382{
3383 basic_block cfg_bb;
3384 rtx_insn *rinsn;
3385 FOR_ALL_BB_FN (cfg_bb, cfun)
3386 FOR_BB_INSNS (cfg_bb, rinsn)
3387 if (NONDEBUG_INSN_P (rinsn) && has_vl_op (rinsn)
3388 && REG_P (get_vl (rinsn)))
3389 {
9243c3d1 3390 rtx avl = get_vl (rinsn);
a2d12abe 3391 if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
9243c3d1 3392 {
29331e72 3393 rtx new_pat;
60bd33bc 3394 if (fault_first_load_p (rinsn))
29331e72
LD
3395 new_pat
3396 = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
60bd33bc
JZZ
3397 else
3398 {
3399 rtx set = single_set (rinsn);
3400 rtx src
3401 = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
29331e72
LD
3402 new_pat = gen_rtx_SET (SET_DEST (set), src);
3403 }
3404 if (dump_file)
3405 {
3406 fprintf (dump_file, " Cleanup insn %u's avl operand:\n",
3407 INSN_UID (rinsn));
3408 print_rtl_single (dump_file, rinsn);
60bd33bc 3409 }
29331e72 3410 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false);
9243c3d1
JZZ
3411 }
3412 }
20c85207
JZ
3413}
3414
6b6b9c68 3415void
29331e72 3416pre_vsetvl::remove_unused_dest_operand ()
20c85207 3417{
6b6b9c68 3418 df_analyze ();
20c85207
JZ
3419 basic_block cfg_bb;
3420 rtx_insn *rinsn;
3421 FOR_ALL_BB_FN (cfg_bb, cfun)
29331e72
LD
3422 FOR_BB_INSNS (cfg_bb, rinsn)
3423 if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn))
6b6b9c68 3424 {
29331e72
LD
3425 rtx vl = get_vl (rinsn);
3426 vsetvl_info info = vsetvl_info (rinsn);
3427 if (has_no_uses (cfg_bb, rinsn, REGNO (vl)))
3428 if (!info.has_vlmax_avl ())
3429 {
3430 rtx new_pat = info.get_vsetvl_pat (true);
3431 if (dump_file)
3432 {
3433 fprintf (dump_file,
3434 " Remove vsetvl insn %u's dest(vl) operand since "
3435 "it unused:\n",
3436 INSN_UID (rinsn));
3437 print_rtl_single (dump_file, rinsn);
3438 }
3439 validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat,
3440 false);
3441 }
6b6b9c68 3442 }
6b6b9c68
JZZ
3443}
3444
22622a5a
JZ
3445/* Remove all bogus vsetvl_pre instructions. */
3446void
3447pre_vsetvl::remove_vsetvl_pre_insns ()
3448{
3449 basic_block cfg_bb;
3450 rtx_insn *rinsn;
3451 FOR_ALL_BB_FN (cfg_bb, cfun)
3452 FOR_BB_INSNS (cfg_bb, rinsn)
3453 if (NONDEBUG_INSN_P (rinsn) && vsetvl_pre_insn_p (rinsn))
3454 {
3455 if (dump_file)
3456 {
3457 fprintf (dump_file, " Eliminate vsetvl_pre insn %d:\n",
3458 INSN_UID (rinsn));
3459 print_rtl_single (dump_file, rinsn);
3460 }
d29136ad 3461 remove_vsetvl_insn (rinsn);
22622a5a
JZ
3462 }
3463}
3464
29331e72
LD
3465const pass_data pass_data_vsetvl = {
3466 RTL_PASS, /* type */
3467 "vsetvl", /* name */
3468 OPTGROUP_NONE, /* optinfo_flags */
01260a82 3469 TV_MACH_DEP, /* tv_id */
29331e72
LD
3470 0, /* properties_required */
3471 0, /* properties_provided */
3472 0, /* properties_destroyed */
3473 0, /* todo_flags_start */
3474 0, /* todo_flags_finish */
3475};
9243c3d1 3476
29331e72
LD
3477class pass_vsetvl : public rtl_opt_pass
3478{
3479private:
3480 void simple_vsetvl ();
3481 void lazy_vsetvl ();
9243c3d1 3482
29331e72
LD
3483public:
3484 pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
9243c3d1 3485
29331e72
LD
3486 /* opt_pass methods: */
3487 virtual bool gate (function *) final override { return TARGET_VECTOR; }
3488 virtual unsigned int execute (function *) final override;
3489}; // class pass_vsetvl
9243c3d1 3490
acc10c79 3491void
29331e72 3492pass_vsetvl::simple_vsetvl ()
acc10c79 3493{
29331e72
LD
3494 if (dump_file)
3495 fprintf (dump_file, "\nEntering Simple VSETVL PASS\n");
acc10c79 3496
29331e72
LD
3497 basic_block cfg_bb;
3498 rtx_insn *rinsn;
3499 FOR_ALL_BB_FN (cfg_bb, cfun)
acc10c79 3500 {
29331e72 3501 FOR_BB_INSNS (cfg_bb, rinsn)
acc10c79 3502 {
29331e72 3503 if (!NONDEBUG_INSN_P (rinsn))
acc10c79 3504 continue;
29331e72
LD
3505 if (has_vtype_op (rinsn))
3506 {
3507 const auto &info = vsetvl_info (rinsn);
3508 rtx pat = info.get_vsetvl_pat ();
3509 emit_insn_before (pat, rinsn);
3510 if (dump_file)
3511 {
3512 fprintf (dump_file, " Insert vsetvl insn before insn %d:\n",
3513 INSN_UID (rinsn));
3514 print_rtl_single (dump_file, PREV_INSN (rinsn));
3515 }
3516 }
acc10c79
JZZ
3517 }
3518 }
acc10c79
JZZ
3519}
3520
9243c3d1
JZZ
3521/* Lazy vsetvl insertion for optimize > 0. */
3522void
29331e72 3523pass_vsetvl::lazy_vsetvl ()
9243c3d1
JZZ
3524{
3525 if (dump_file)
29331e72
LD
3526 fprintf (dump_file, "\nEntering Lazy VSETVL PASS\n\n");
3527
3528 pre_vsetvl pre = pre_vsetvl ();
9243c3d1 3529
9243c3d1 3530 if (dump_file)
29331e72
LD
3531 fprintf (dump_file, "\nPhase 1: Fuse local vsetvl infos.\n\n");
3532 pre.fuse_local_vsetvl_info ();
0d50facd 3533 if (dump_file && (dump_flags & TDF_DETAILS))
29331e72 3534 pre.dump (dump_file, "phase 1");
9243c3d1 3535
29331e72 3536 /* Phase 2: Fuse header and footer vsetvl infos between basic blocks. */
9243c3d1 3537 if (dump_file)
29331e72 3538 fprintf (dump_file, "\nPhase 2: Lift up vsetvl info.\n\n");
1a8bebb1 3539 if (vsetvl_strategy != VSETVL_OPT_NO_FUSION)
29331e72 3540 {
1a8bebb1
JZ
3541 bool changed = true;
3542 int fused_count = 0;
3543 do
3544 {
3545 if (dump_file)
3546 fprintf (dump_file, " Try lift up %d.\n\n", fused_count);
3547 changed = pre.earliest_fuse_vsetvl_info (fused_count);
3548 fused_count += 1;
3549 } while (changed);
3550 }
0d50facd 3551 if (dump_file && (dump_flags & TDF_DETAILS))
29331e72 3552 pre.dump (dump_file, "phase 2");
9243c3d1 3553
29331e72 3554 /* Phase 3: Reducing redundant vsetvl infos using LCM. */
9243c3d1 3555 if (dump_file)
29331e72
LD
3556 fprintf (dump_file, "\nPhase 3: Reduce global vsetvl infos.\n\n");
3557 pre.pre_global_vsetvl_info ();
3558 if (dump_file && (dump_flags & TDF_DETAILS))
3559 pre.dump (dump_file, "phase 3");
9243c3d1 3560
29331e72 3561 /* Phase 4: Insert, modify and remove vsetvl insns. */
9243c3d1 3562 if (dump_file)
29331e72
LD
3563 fprintf (dump_file,
3564 "\nPhase 4: Insert, modify and remove vsetvl insns.\n\n");
3565 pre.emit_vsetvl ();
9243c3d1 3566
d83070ae 3567 /* Phase 5: Cleanup */
9243c3d1 3568 if (dump_file)
d83070ae
KC
3569 fprintf (dump_file, "\nPhase 5: Cleanup\n\n");
3570 pre.cleanup ();
6b6b9c68 3571
29331e72 3572 pre.finish ();
9243c3d1
JZZ
3573}
3574
3575/* Main entry point for this pass. */
3576unsigned int
3577pass_vsetvl::execute (function *)
3578{
3579 if (n_basic_blocks_for_fn (cfun) <= 0)
3580 return 0;
3581
ca8fb009
JZZ
3582 /* The RVV instruction may change after split which is not a stable
3583 instruction. We need to split it here to avoid potential issue
3584 since the VSETVL PASS is insert before split PASS. */
3585 split_all_insns ();
9243c3d1
JZZ
3586
3587 /* Early return for there is no vector instructions. */
3588 if (!has_vector_insn (cfun))
3589 return 0;
3590
1a8bebb1 3591 if (!optimize || vsetvl_strategy == VSETVL_SIMPLE)
9243c3d1
JZZ
3592 simple_vsetvl ();
3593 else
3594 lazy_vsetvl ();
3595
9243c3d1
JZZ
3596 return 0;
3597}
3598
3599rtl_opt_pass *
3600make_pass_vsetvl (gcc::context *ctxt)
3601{
3602 return new pass_vsetvl (ctxt);
3603}