]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/riscv/riscv-vsetvl.cc
Canonicalize vec_merge when mask is constant.
[thirdparty/gcc.git] / gcc / config / riscv / riscv-vsetvl.cc
CommitLineData
9243c3d1 1/* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
c841bde5 2 Copyright (C) 2022-2023 Free Software Foundation, Inc.
9243c3d1
JZZ
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or(at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* This pass is to Set VL/VTYPE global status for RVV instructions
22 that depend on VL and VTYPE registers by Lazy code motion (LCM).
23
24 Strategy:
25
26 - Backward demanded info fusion within block.
27
28 - Lazy code motion (LCM) based demanded info backward propagation.
29
30 - RTL_SSA framework for def-use, PHI analysis.
31
32 - Lazy code motion (LCM) for global VL/VTYPE optimization.
33
34 Assumption:
35
36 - Each avl operand is either an immediate (must be in range 0 ~ 31) or reg.
37
38 This pass consists of 5 phases:
39
40 - Phase 1 - compute VL/VTYPE demanded information within each block
41 by backward data-flow analysis.
42
43 - Phase 2 - Emit vsetvl instructions within each basic block according to
44 demand, compute and save ANTLOC && AVLOC of each block.
45
387cd9d3
JZZ
46 - Phase 3 - Backward && forward demanded info propagation and fusion across
47 blocks.
9243c3d1
JZZ
48
49 - Phase 4 - Lazy code motion including: compute local properties,
50 pre_edge_lcm and vsetvl insertion && delete edges for LCM results.
51
52 - Phase 5 - Cleanup AVL operand of RVV instruction since it will not be
53 used any more and VL operand of VSETVL instruction if it is not used by
54 any non-debug instructions.
55
6b6b9c68
JZZ
56 - Phase 6 - Propagate AVL between vsetvl instructions.
57
9243c3d1
JZZ
58 Implementation:
59
60 - The subroutine of optimize == 0 is simple_vsetvl.
61 This function simplily vsetvl insertion for each RVV
62 instruction. No optimization.
63
64 - The subroutine of optimize > 0 is lazy_vsetvl.
65 This function optimize vsetvl insertion process by
ec99ffab
JZZ
66 lazy code motion (LCM) layering on RTL_SSA.
67
68 - get_avl (), get_insn (), get_avl_source ():
69
70 1. get_insn () is the current instruction, find_access (get_insn
71 ())->def is the same as get_avl_source () if get_insn () demand VL.
72 2. If get_avl () is non-VLMAX REG, get_avl () == get_avl_source
73 ()->regno ().
74 3. get_avl_source ()->regno () is the REGNO that we backward propagate.
75 */
9243c3d1
JZZ
76
77#define IN_TARGET_CODE 1
78#define INCLUDE_ALGORITHM
79#define INCLUDE_FUNCTIONAL
80
81#include "config.h"
82#include "system.h"
83#include "coretypes.h"
84#include "tm.h"
85#include "backend.h"
86#include "rtl.h"
87#include "target.h"
88#include "tree-pass.h"
89#include "df.h"
90#include "rtl-ssa.h"
91#include "cfgcleanup.h"
92#include "insn-config.h"
93#include "insn-attr.h"
94#include "insn-opinit.h"
95#include "tm-constrs.h"
96#include "cfgrtl.h"
97#include "cfganal.h"
98#include "lcm.h"
99#include "predict.h"
100#include "profile-count.h"
101#include "riscv-vsetvl.h"
102
103using namespace rtl_ssa;
104using namespace riscv_vector;
105
ec99ffab
JZZ
106static CONSTEXPR const unsigned ALL_SEW[] = {8, 16, 32, 64};
107static CONSTEXPR const vlmul_type ALL_LMUL[]
108 = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
ec99ffab 109
9243c3d1
JZZ
110DEBUG_FUNCTION void
111debug (const vector_insn_info *info)
112{
113 info->dump (stderr);
114}
115
116DEBUG_FUNCTION void
117debug (const vector_infos_manager *info)
118{
119 info->dump (stderr);
120}
121
122static bool
123vlmax_avl_p (rtx x)
124{
125 return x && rtx_equal_p (x, RVV_VLMAX);
126}
127
128static bool
129vlmax_avl_insn_p (rtx_insn *rinsn)
130{
85112fbb
JZZ
131 return (INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi
132 || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi);
9243c3d1
JZZ
133}
134
8d8cc482
JZZ
135/* Return true if the block is a loop itself:
136 local_dem
137 __________
138 ____|____ |
139 | | |
140 |________| |
141 |_________|
142 reaching_out
143*/
9243c3d1
JZZ
144static bool
145loop_basic_block_p (const basic_block cfg_bb)
146{
8d8cc482
JZZ
147 if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb)))
148 {
149 edge e;
150 edge_iterator ei;
151 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
152 if (e->dest->index == cfg_bb->index)
153 return true;
154 }
155 return false;
9243c3d1
JZZ
156}
157
158/* Return true if it is an RVV instruction depends on VTYPE global
159 status register. */
160static bool
161has_vtype_op (rtx_insn *rinsn)
162{
163 return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
164}
165
166/* Return true if it is an RVV instruction depends on VL global
167 status register. */
168static bool
169has_vl_op (rtx_insn *rinsn)
170{
171 return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn);
172}
173
174/* Is this a SEW value that can be encoded into the VTYPE format. */
175static bool
176valid_sew_p (size_t sew)
177{
178 return exact_log2 (sew) && sew >= 8 && sew <= 64;
179}
180
ec99ffab
JZZ
181/* Return true if the instruction ignores VLMUL field of VTYPE. */
182static bool
183ignore_vlmul_insn_p (rtx_insn *rinsn)
184{
185 return get_attr_type (rinsn) == TYPE_VIMOVVX
186 || get_attr_type (rinsn) == TYPE_VFMOVVF
187 || get_attr_type (rinsn) == TYPE_VIMOVXV
188 || get_attr_type (rinsn) == TYPE_VFMOVFV;
189}
190
191/* Return true if the instruction is scalar move instruction. */
192static bool
193scalar_move_insn_p (rtx_insn *rinsn)
194{
195 return get_attr_type (rinsn) == TYPE_VIMOVXV
196 || get_attr_type (rinsn) == TYPE_VFMOVFV;
197}
198
60bd33bc
JZZ
199/* Return true if the instruction is fault first load instruction. */
200static bool
201fault_first_load_p (rtx_insn *rinsn)
202{
6313b045
JZZ
203 return recog_memoized (rinsn) >= 0
204 && (get_attr_type (rinsn) == TYPE_VLDFF
205 || get_attr_type (rinsn) == TYPE_VLSEGDFF);
60bd33bc
JZZ
206}
207
208/* Return true if the instruction is read vl instruction. */
209static bool
210read_vl_insn_p (rtx_insn *rinsn)
211{
212 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL;
213}
214
9243c3d1
JZZ
215/* Return true if it is a vsetvl instruction. */
216static bool
217vector_config_insn_p (rtx_insn *rinsn)
218{
219 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
220}
221
222/* Return true if it is vsetvldi or vsetvlsi. */
223static bool
224vsetvl_insn_p (rtx_insn *rinsn)
225{
6b6b9c68
JZZ
226 if (!vector_config_insn_p (rinsn))
227 return false;
85112fbb 228 return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
6b6b9c68
JZZ
229 || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
230}
231
232/* Return true if it is vsetvl zero, rs1. */
233static bool
234vsetvl_discard_result_insn_p (rtx_insn *rinsn)
235{
236 if (!vector_config_insn_p (rinsn))
237 return false;
238 return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi
239 || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
9243c3d1
JZZ
240}
241
9243c3d1 242static bool
4f673c5e 243real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
9243c3d1 244{
4f673c5e 245 return insn != nullptr && insn->is_real () && insn->bb () == bb;
9243c3d1
JZZ
246}
247
9243c3d1 248static bool
4f673c5e 249before_p (const insn_info *insn1, const insn_info *insn2)
9243c3d1 250{
9b9a1ac1 251 return insn1->compare_with (insn2) < 0;
4f673c5e
JZZ
252}
253
6b6b9c68
JZZ
254static insn_info *
255find_reg_killed_by (const bb_info *bb, rtx x)
4f673c5e 256{
6b6b9c68
JZZ
257 if (!x || vlmax_avl_p (x) || !REG_P (x))
258 return nullptr;
259 for (insn_info *insn : bb->reverse_real_nondebug_insns ())
4f673c5e 260 if (find_access (insn->defs (), REGNO (x)))
6b6b9c68
JZZ
261 return insn;
262 return nullptr;
263}
264
265/* Helper function to get VL operand. */
266static rtx
267get_vl (rtx_insn *rinsn)
268{
269 if (has_vl_op (rinsn))
270 {
271 extract_insn_cached (rinsn);
272 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
273 }
274 return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
4f673c5e
JZZ
275}
276
277static bool
278has_vsetvl_killed_avl_p (const bb_info *bb, const vector_insn_info &info)
279{
280 if (info.dirty_with_killed_avl_p ())
281 {
282 rtx avl = info.get_avl ();
6b6b9c68 283 if (vlmax_avl_p (avl))
ec99ffab 284 return find_reg_killed_by (bb, info.get_avl_reg_rtx ()) != nullptr;
4f673c5e
JZZ
285 for (const insn_info *insn : bb->reverse_real_nondebug_insns ())
286 {
287 def_info *def = find_access (insn->defs (), REGNO (avl));
288 if (def)
289 {
290 set_info *set = safe_dyn_cast<set_info *> (def);
291 if (!set)
292 return false;
293
294 rtx new_avl = gen_rtx_REG (GET_MODE (avl), REGNO (avl));
295 gcc_assert (new_avl != avl);
296 if (!info.compatible_avl_p (avl_info (new_avl, set)))
297 return false;
298
299 return true;
300 }
301 }
302 }
303 return false;
304}
305
9243c3d1
JZZ
306/* An "anticipatable occurrence" is one that is the first occurrence in the
307 basic block, the operands are not modified in the basic block prior
308 to the occurrence and the output is not used between the start of
4f673c5e
JZZ
309 the block and the occurrence.
310
311 For VSETVL instruction, we have these following formats:
312 1. vsetvl zero, rs1.
313 2. vsetvl zero, imm.
314 3. vsetvl rd, rs1.
315
316 So base on these circumstances, a DEM is considered as a local anticipatable
317 occurrence should satisfy these following conditions:
318
319 1). rs1 (avl) are not modified in the basic block prior to the VSETVL.
320 2). rd (vl) are not modified in the basic block prior to the VSETVL.
321 3). rd (vl) is not used between the start of the block and the occurrence.
322
323 Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
324 is modified prior to the occurrence. This case is already considered as
325 a non-local anticipatable occurrence.
326*/
9243c3d1 327static bool
4f673c5e 328anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem)
9243c3d1 329{
4f673c5e 330 insn_info *insn = dem.get_insn ();
9243c3d1
JZZ
331 /* The only possible operand we care of VSETVL is AVL. */
332 if (dem.has_avl_reg ())
333 {
4f673c5e 334 /* rs1 (avl) are not modified in the basic block prior to the VSETVL. */
9243c3d1
JZZ
335 if (!vlmax_avl_p (dem.get_avl ()))
336 {
ec99ffab 337 set_info *set = dem.get_avl_source ();
9243c3d1
JZZ
338 /* If it's undefined, it's not anticipatable conservatively. */
339 if (!set)
340 return false;
4f673c5e
JZZ
341 if (real_insn_and_same_bb_p (set->insn (), bb)
342 && before_p (set->insn (), insn))
9243c3d1
JZZ
343 return false;
344 }
345 }
346
4f673c5e 347 /* rd (vl) is not used between the start of the block and the occurrence. */
9243c3d1
JZZ
348 if (vsetvl_insn_p (insn->rtl ()))
349 {
4f673c5e
JZZ
350 rtx dest = get_vl (insn->rtl ());
351 for (insn_info *i = insn->prev_nondebug_insn ();
352 real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
353 {
354 /* rd (vl) is not used between the start of the block and the
355 * occurrence. */
356 if (find_access (i->uses (), REGNO (dest)))
357 return false;
358 /* rd (vl) are not modified in the basic block prior to the VSETVL. */
359 if (find_access (i->defs (), REGNO (dest)))
360 return false;
361 }
9243c3d1
JZZ
362 }
363
364 return true;
365}
366
367/* An "available occurrence" is one that is the last occurrence in the
368 basic block and the operands are not modified by following statements in
4f673c5e
JZZ
369 the basic block [including this insn].
370
371 For VSETVL instruction, we have these following formats:
372 1. vsetvl zero, rs1.
373 2. vsetvl zero, imm.
374 3. vsetvl rd, rs1.
375
376 So base on these circumstances, a DEM is considered as a local available
377 occurrence should satisfy these following conditions:
378
379 1). rs1 (avl) are not modified by following statements in
380 the basic block.
381 2). rd (vl) are not modified by following statements in
382 the basic block.
383
384 Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
385 is modified prior to the occurrence. This case is already considered as
386 a non-local available occurrence.
387*/
9243c3d1 388static bool
4f673c5e 389available_occurrence_p (const bb_info *bb, const vector_insn_info dem)
9243c3d1 390{
4f673c5e 391 insn_info *insn = dem.get_insn ();
9243c3d1
JZZ
392 /* The only possible operand we care of VSETVL is AVL. */
393 if (dem.has_avl_reg ())
394 {
9243c3d1
JZZ
395 if (!vlmax_avl_p (dem.get_avl ()))
396 {
4f673c5e
JZZ
397 rtx dest = NULL_RTX;
398 if (vsetvl_insn_p (insn->rtl ()))
399 dest = get_vl (insn->rtl ());
400 for (const insn_info *i = insn; real_insn_and_same_bb_p (i, bb);
401 i = i->next_nondebug_insn ())
402 {
60bd33bc
JZZ
403 if (read_vl_insn_p (i->rtl ()))
404 continue;
4f673c5e
JZZ
405 /* rs1 (avl) are not modified by following statements in
406 the basic block. */
407 if (find_access (i->defs (), REGNO (dem.get_avl ())))
408 return false;
409 /* rd (vl) are not modified by following statements in
410 the basic block. */
411 if (dest && find_access (i->defs (), REGNO (dest)))
412 return false;
413 }
9243c3d1
JZZ
414 }
415 }
416 return true;
417}
418
6b6b9c68
JZZ
419static bool
420insn_should_be_added_p (const insn_info *insn, unsigned int types)
9243c3d1 421{
6b6b9c68
JZZ
422 if (insn->is_real () && (types & REAL_SET))
423 return true;
424 if (insn->is_phi () && (types & PHI_SET))
425 return true;
426 if (insn->is_bb_head () && (types & BB_HEAD_SET))
427 return true;
428 if (insn->is_bb_end () && (types & BB_END_SET))
429 return true;
430 return false;
9243c3d1
JZZ
431}
432
6b6b9c68
JZZ
433/* Recursively find all define instructions. The kind of instruction is
434 specified by the DEF_TYPE. */
435static hash_set<set_info *>
436get_all_sets (phi_info *phi, unsigned int types)
9243c3d1 437{
6b6b9c68 438 hash_set<set_info *> insns;
4f673c5e
JZZ
439 auto_vec<phi_info *> work_list;
440 hash_set<phi_info *> visited_list;
441 if (!phi)
6b6b9c68 442 return hash_set<set_info *> ();
4f673c5e 443 work_list.safe_push (phi);
9243c3d1 444
4f673c5e 445 while (!work_list.is_empty ())
9243c3d1 446 {
4f673c5e
JZZ
447 phi_info *phi = work_list.pop ();
448 visited_list.add (phi);
449 for (use_info *use : phi->inputs ())
9243c3d1 450 {
4f673c5e 451 def_info *def = use->def ();
6b6b9c68
JZZ
452 set_info *set = safe_dyn_cast<set_info *> (def);
453 if (!set)
454 return hash_set<set_info *> ();
9243c3d1 455
6b6b9c68 456 gcc_assert (!set->insn ()->is_debug_insn ());
9243c3d1 457
6b6b9c68
JZZ
458 if (insn_should_be_added_p (set->insn (), types))
459 insns.add (set);
460 if (set->insn ()->is_phi ())
4f673c5e 461 {
6b6b9c68 462 phi_info *new_phi = as_a<phi_info *> (set);
4f673c5e
JZZ
463 if (!visited_list.contains (new_phi))
464 work_list.safe_push (new_phi);
465 }
466 }
9243c3d1 467 }
4f673c5e
JZZ
468 return insns;
469}
9243c3d1 470
6b6b9c68
JZZ
471static hash_set<set_info *>
472get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
473 bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
474{
475 if (real_p && phi_p && param_p)
476 return get_all_sets (safe_dyn_cast<phi_info *> (set),
477 REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
478
479 else if (real_p && param_p)
480 return get_all_sets (safe_dyn_cast<phi_info *> (set),
481 REAL_SET | BB_HEAD_SET | BB_END_SET);
482
483 else if (real_p)
484 return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
485 return hash_set<set_info *> ();
486}
487
488/* Helper function to get AVL operand. */
489static rtx
490get_avl (rtx_insn *rinsn)
491{
492 if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
493 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
494
495 if (!has_vl_op (rinsn))
496 return NULL_RTX;
497 if (get_attr_avl_type (rinsn) == VLMAX)
498 return RVV_VLMAX;
499 extract_insn_cached (rinsn);
500 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
501}
502
503static set_info *
504get_same_bb_set (hash_set<set_info *> &sets, const basic_block cfg_bb)
505{
506 for (set_info *set : sets)
507 if (set->bb ()->cfg_bb () == cfg_bb)
508 return set;
509 return nullptr;
510}
511
4f673c5e
JZZ
512/* Recursively find all predecessor blocks for cfg_bb. */
513static hash_set<basic_block>
514get_all_predecessors (basic_block cfg_bb)
515{
516 hash_set<basic_block> blocks;
517 auto_vec<basic_block> work_list;
518 hash_set<basic_block> visited_list;
519 work_list.safe_push (cfg_bb);
9243c3d1 520
4f673c5e 521 while (!work_list.is_empty ())
9243c3d1 522 {
4f673c5e
JZZ
523 basic_block new_cfg_bb = work_list.pop ();
524 visited_list.add (new_cfg_bb);
525 edge e;
526 edge_iterator ei;
527 FOR_EACH_EDGE (e, ei, new_cfg_bb->preds)
528 {
529 if (!visited_list.contains (e->src))
530 work_list.safe_push (e->src);
531 blocks.add (e->src);
532 }
9243c3d1 533 }
4f673c5e
JZZ
534 return blocks;
535}
9243c3d1 536
4f673c5e
JZZ
537/* Return true if there is an INSN in insns staying in the block BB. */
538static bool
6b6b9c68 539any_set_in_bb_p (hash_set<set_info *> sets, const bb_info *bb)
4f673c5e 540{
6b6b9c68
JZZ
541 for (const set_info *set : sets)
542 if (set->bb ()->index () == bb->index ())
4f673c5e
JZZ
543 return true;
544 return false;
9243c3d1
JZZ
545}
546
547/* Helper function to get SEW operand. We always have SEW value for
548 all RVV instructions that have VTYPE OP. */
549static uint8_t
550get_sew (rtx_insn *rinsn)
551{
552 return get_attr_sew (rinsn);
553}
554
555/* Helper function to get VLMUL operand. We always have VLMUL value for
556 all RVV instructions that have VTYPE OP. */
557static enum vlmul_type
558get_vlmul (rtx_insn *rinsn)
559{
560 return (enum vlmul_type) get_attr_vlmul (rinsn);
561}
562
563/* Get default tail policy. */
564static bool
565get_default_ta ()
566{
567 /* For the instruction that doesn't require TA, we still need a default value
568 to emit vsetvl. We pick up the default value according to prefer policy. */
569 return (bool) (get_prefer_tail_policy () & 0x1
570 || (get_prefer_tail_policy () >> 1 & 0x1));
571}
572
573/* Get default mask policy. */
574static bool
575get_default_ma ()
576{
577 /* For the instruction that doesn't require MA, we still need a default value
578 to emit vsetvl. We pick up the default value according to prefer policy. */
579 return (bool) (get_prefer_mask_policy () & 0x1
580 || (get_prefer_mask_policy () >> 1 & 0x1));
581}
582
583/* Helper function to get TA operand. */
584static bool
585tail_agnostic_p (rtx_insn *rinsn)
586{
587 /* If it doesn't have TA, we return agnostic by default. */
588 extract_insn_cached (rinsn);
589 int ta = get_attr_ta (rinsn);
590 return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta);
591}
592
593/* Helper function to get MA operand. */
594static bool
595mask_agnostic_p (rtx_insn *rinsn)
596{
597 /* If it doesn't have MA, we return agnostic by default. */
598 extract_insn_cached (rinsn);
599 int ma = get_attr_ma (rinsn);
600 return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
601}
602
603/* Return true if FN has a vector instruction that use VL/VTYPE. */
604static bool
605has_vector_insn (function *fn)
606{
607 basic_block cfg_bb;
608 rtx_insn *rinsn;
609 FOR_ALL_BB_FN (cfg_bb, fn)
610 FOR_BB_INSNS (cfg_bb, rinsn)
611 if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
612 return true;
613 return false;
614}
615
616/* Emit vsetvl instruction. */
617static rtx
e577b91b 618gen_vsetvl_pat (enum vsetvl_type insn_type, const vl_vtype_info &info, rtx vl)
9243c3d1
JZZ
619{
620 rtx avl = info.get_avl ();
621 rtx sew = gen_int_mode (info.get_sew (), Pmode);
622 rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode);
623 rtx ta = gen_int_mode (info.get_ta (), Pmode);
624 rtx ma = gen_int_mode (info.get_ma (), Pmode);
625
626 if (insn_type == VSETVL_NORMAL)
627 {
628 gcc_assert (vl != NULL_RTX);
629 return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma);
630 }
631 else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY)
632 return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
633 else
634 return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
635}
636
637static rtx
e577b91b 638gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info)
9243c3d1
JZZ
639{
640 rtx new_pat;
60bd33bc
JZZ
641 vl_vtype_info new_info = info;
642 if (info.get_insn () && info.get_insn ()->rtl ()
643 && fault_first_load_p (info.get_insn ()->rtl ()))
644 new_info.set_avl_info (
645 avl_info (get_avl (info.get_insn ()->rtl ()), nullptr));
9243c3d1
JZZ
646 if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ()))
647 {
648 rtx dest = get_vl (rinsn);
60bd33bc 649 new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, dest);
9243c3d1
JZZ
650 }
651 else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only)
60bd33bc 652 new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, new_info, NULL_RTX);
9243c3d1 653 else
60bd33bc 654 new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
9243c3d1
JZZ
655 return new_pat;
656}
657
658static void
659emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type,
e577b91b 660 const vl_vtype_info &info, rtx vl, rtx_insn *rinsn)
9243c3d1
JZZ
661{
662 rtx pat = gen_vsetvl_pat (insn_type, info, vl);
663 if (dump_file)
664 {
665 fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n");
666 print_rtl_single (dump_file, pat);
667 }
668
669 if (emit_type == EMIT_DIRECT)
670 emit_insn (pat);
671 else if (emit_type == EMIT_BEFORE)
672 emit_insn_before (pat, rinsn);
673 else
674 emit_insn_after (pat, rinsn);
675}
676
677static void
678eliminate_insn (rtx_insn *rinsn)
679{
680 if (dump_file)
681 {
682 fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn));
683 print_rtl_single (dump_file, rinsn);
684 }
685 if (in_sequence_p ())
686 remove_insn (rinsn);
687 else
688 delete_insn (rinsn);
689}
690
a481eed8 691static vsetvl_type
9243c3d1
JZZ
692insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn,
693 const vector_insn_info &info, const vector_insn_info &prev_info)
694{
695 /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
696 VLMAX. */
697 if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
4f673c5e 698 && info.compatible_avl_p (prev_info) && info.same_vlmax_p (prev_info))
9243c3d1
JZZ
699 {
700 emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
701 rinsn);
a481eed8 702 return VSETVL_VTYPE_CHANGE_ONLY;
9243c3d1
JZZ
703 }
704
705 if (info.has_avl_imm ())
706 {
707 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX,
708 rinsn);
a481eed8 709 return VSETVL_DISCARD_RESULT;
9243c3d1
JZZ
710 }
711
712 if (info.has_avl_no_reg ())
713 {
714 /* We can only use x0, x0 if there's no chance of the vtype change causing
715 the previous vl to become invalid. */
716 if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
717 && info.same_vlmax_p (prev_info))
718 {
719 emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
720 rinsn);
a481eed8 721 return VSETVL_VTYPE_CHANGE_ONLY;
9243c3d1
JZZ
722 }
723 /* Otherwise use an AVL of 0 to avoid depending on previous vl. */
724 vl_vtype_info new_info = info;
725 new_info.set_avl_info (avl_info (const0_rtx, nullptr));
726 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX,
727 rinsn);
a481eed8 728 return VSETVL_DISCARD_RESULT;
9243c3d1
JZZ
729 }
730
731 /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the
732 opcode if the AVLReg is X0 as they have different register classes for
733 the AVL operand. */
734 if (vlmax_avl_p (info.get_avl ()))
735 {
736 gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn));
ec99ffab 737 rtx vl_op = info.get_avl_reg_rtx ();
9243c3d1
JZZ
738 gcc_assert (!vlmax_avl_p (vl_op));
739 emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn);
a481eed8 740 return VSETVL_NORMAL;
9243c3d1
JZZ
741 }
742
743 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn);
744
745 if (dump_file)
746 {
747 fprintf (dump_file, "Update VL/VTYPE info, previous info=");
748 prev_info.dump (dump_file);
749 }
a481eed8 750 return VSETVL_DISCARD_RESULT;
9243c3d1
JZZ
751}
752
753/* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
754 to INSN. If such notes are added to an insn which references a
755 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
756 that note, because the following loop optimization pass requires
757 them. */
758
759/* ??? If there was a jump optimization pass after gcse and before loop,
760 then we would not need to do this here, because jump would add the
761 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
762
763static void
27a2a4b6 764add_label_notes (rtx x, rtx_insn *rinsn)
9243c3d1
JZZ
765{
766 enum rtx_code code = GET_CODE (x);
767 int i, j;
768 const char *fmt;
769
770 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
771 {
772 /* This code used to ignore labels that referred to dispatch tables to
773 avoid flow generating (slightly) worse code.
774
775 We no longer ignore such label references (see LABEL_REF handling in
776 mark_jump_label for additional information). */
777
778 /* There's no reason for current users to emit jump-insns with
779 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
780 notes. */
27a2a4b6
JZZ
781 gcc_assert (!JUMP_P (rinsn));
782 add_reg_note (rinsn, REG_LABEL_OPERAND, label_ref_label (x));
9243c3d1
JZZ
783
784 if (LABEL_P (label_ref_label (x)))
785 LABEL_NUSES (label_ref_label (x))++;
786
787 return;
788 }
789
790 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
791 {
792 if (fmt[i] == 'e')
27a2a4b6 793 add_label_notes (XEXP (x, i), rinsn);
9243c3d1
JZZ
794 else if (fmt[i] == 'E')
795 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
27a2a4b6 796 add_label_notes (XVECEXP (x, i, j), rinsn);
9243c3d1
JZZ
797 }
798}
799
800/* Add EXPR to the end of basic block BB.
801
802 This is used by both the PRE and code hoisting. */
803
804static void
805insert_insn_end_basic_block (rtx_insn *rinsn, basic_block cfg_bb)
806{
807 rtx_insn *end_rinsn = BB_END (cfg_bb);
808 rtx_insn *new_insn;
809 rtx_insn *pat, *pat_end;
810
811 pat = rinsn;
812 gcc_assert (pat && INSN_P (pat));
813
814 pat_end = pat;
815 while (NEXT_INSN (pat_end) != NULL_RTX)
816 pat_end = NEXT_INSN (pat_end);
817
818 /* If the last end_rinsn is a jump, insert EXPR in front. Similarly we need
819 to take care of trapping instructions in presence of non-call exceptions.
820 */
821
822 if (JUMP_P (end_rinsn)
823 || (NONJUMP_INSN_P (end_rinsn)
824 && (!single_succ_p (cfg_bb)
825 || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL)))
826 {
827 /* FIXME: What if something in jump uses value set in new end_rinsn? */
828 new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
829 }
830
831 /* Likewise if the last end_rinsn is a call, as will happen in the presence
832 of exception handling. */
833 else if (CALL_P (end_rinsn)
834 && (!single_succ_p (cfg_bb)
835 || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL))
836 {
837 /* Keeping in mind targets with small register classes and parameters
838 in registers, we search backward and place the instructions before
839 the first parameter is loaded. Do this for everyone for consistency
840 and a presumption that we'll get better code elsewhere as well. */
841
842 /* Since different machines initialize their parameter registers
843 in different orders, assume nothing. Collect the set of all
844 parameter registers. */
845 end_rinsn = find_first_parameter_load (end_rinsn, BB_HEAD (cfg_bb));
846
847 /* If we found all the parameter loads, then we want to insert
848 before the first parameter load.
849
850 If we did not find all the parameter loads, then we might have
851 stopped on the head of the block, which could be a CODE_LABEL.
852 If we inserted before the CODE_LABEL, then we would be putting
853 the end_rinsn in the wrong basic block. In that case, put the
854 end_rinsn after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK.
855 */
856 while (LABEL_P (end_rinsn) || NOTE_INSN_BASIC_BLOCK_P (end_rinsn))
857 end_rinsn = NEXT_INSN (end_rinsn);
858
859 new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
860 }
861 else
862 new_insn = emit_insn_after_noloc (pat, end_rinsn, cfg_bb);
863
864 while (1)
865 {
866 if (INSN_P (pat))
867 add_label_notes (PATTERN (pat), new_insn);
868 if (pat == pat_end)
869 break;
870 pat = NEXT_INSN (pat);
871 }
872}
873
874/* Get VL/VTYPE information for INSN. */
875static vl_vtype_info
876get_vl_vtype_info (const insn_info *insn)
877{
9243c3d1
JZZ
878 set_info *set = nullptr;
879 rtx avl = ::get_avl (insn->rtl ());
ec99ffab
JZZ
880 if (avl && REG_P (avl))
881 {
882 if (vlmax_avl_p (avl) && has_vl_op (insn->rtl ()))
883 set
884 = find_access (insn->uses (), REGNO (get_vl (insn->rtl ())))->def ();
885 else if (!vlmax_avl_p (avl))
886 set = find_access (insn->uses (), REGNO (avl))->def ();
887 else
888 set = nullptr;
889 }
9243c3d1
JZZ
890
891 uint8_t sew = get_sew (insn->rtl ());
892 enum vlmul_type vlmul = get_vlmul (insn->rtl ());
893 uint8_t ratio = get_attr_ratio (insn->rtl ());
894 /* when get_attr_ratio is invalid, this kind of instructions
895 doesn't care about ratio. However, we still need this value
896 in demand info backward analysis. */
897 if (ratio == INVALID_ATTRIBUTE)
898 ratio = calculate_ratio (sew, vlmul);
899 bool ta = tail_agnostic_p (insn->rtl ());
900 bool ma = mask_agnostic_p (insn->rtl ());
901
902 /* If merge operand is undef value, we prefer agnostic. */
903 int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
904 if (merge_op_idx != INVALID_ATTRIBUTE
905 && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
906 {
907 ta = true;
908 ma = true;
909 }
910
911 vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma);
912 return info;
913}
914
915static void
916change_insn (rtx_insn *rinsn, rtx new_pat)
917{
918 /* We don't apply change on RTL_SSA here since it's possible a
919 new INSN we add in the PASS before which doesn't have RTL_SSA
920 info yet.*/
921 if (dump_file)
922 {
923 fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
924 INSN_UID (rinsn));
925 print_rtl_single (dump_file, PATTERN (rinsn));
926 }
927
011ba384 928 validate_change (rinsn, &PATTERN (rinsn), new_pat, false);
9243c3d1
JZZ
929
930 if (dump_file)
931 {
932 fprintf (dump_file, "\nto:\n");
933 print_rtl_single (dump_file, PATTERN (rinsn));
934 }
935}
936
60bd33bc
JZZ
937static const insn_info *
938get_forward_read_vl_insn (const insn_info *insn)
939{
940 const bb_info *bb = insn->bb ();
941 for (const insn_info *i = insn->next_nondebug_insn ();
942 real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
943 {
944 if (find_access (i->defs (), VL_REGNUM))
945 return nullptr;
946 if (read_vl_insn_p (i->rtl ()))
947 return i;
948 }
949 return nullptr;
950}
951
952static const insn_info *
953get_backward_fault_first_load_insn (const insn_info *insn)
954{
955 const bb_info *bb = insn->bb ();
956 for (const insn_info *i = insn->prev_nondebug_insn ();
957 real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
958 {
959 if (fault_first_load_p (i->rtl ()))
960 return i;
961 if (find_access (i->defs (), VL_REGNUM))
962 return nullptr;
963 }
964 return nullptr;
965}
966
9243c3d1
JZZ
967static bool
968change_insn (function_info *ssa, insn_change change, insn_info *insn,
969 rtx new_pat)
970{
971 rtx_insn *rinsn = insn->rtl ();
972 auto attempt = ssa->new_change_attempt ();
973 if (!restrict_movement (change))
974 return false;
975
976 if (dump_file)
977 {
978 fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
979 INSN_UID (rinsn));
980 print_rtl_single (dump_file, PATTERN (rinsn));
9243c3d1
JZZ
981 }
982
983 insn_change_watermark watermark;
984 validate_change (rinsn, &PATTERN (rinsn), new_pat, true);
985
986 /* These routines report failures themselves. */
987 if (!recog (attempt, change) || !change_is_worthwhile (change, false))
988 return false;
a1e42094
JZZ
989
990 /* Fix bug:
991 (insn 12 34 13 2 (set (reg:VNx8DI 120 v24 [orig:134 _1 ] [134])
992 (if_then_else:VNx8DI (unspec:VNx8BI [
993 (const_vector:VNx8BI repeat [
994 (const_int 1 [0x1])
995 ])
996 (const_int 0 [0])
997 (const_int 2 [0x2]) repeated x2
998 (const_int 0 [0])
999 (reg:SI 66 vl)
1000 (reg:SI 67 vtype)
1001 ] UNSPEC_VPREDICATE)
1002 (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137])
1003 (sign_extend:VNx8DI (vec_duplicate:VNx8SI (reg:SI 15 a5
1004 [140])))) (unspec:VNx8DI [ (const_int 0 [0]) ] UNSPEC_VUNDEF))) "rvv.c":8:12
1005 2784 {pred_single_widen_addsvnx8di_scalar} (expr_list:REG_EQUIV
1006 (mem/c:VNx8DI (reg:DI 10 a0 [142]) [1 <retval>+0 S[64, 64] A128])
1007 (expr_list:REG_EQUAL (if_then_else:VNx8DI (unspec:VNx8BI [
1008 (const_vector:VNx8BI repeat [
1009 (const_int 1 [0x1])
1010 ])
1011 (reg/v:DI 13 a3 [orig:139 vl ] [139])
1012 (const_int 2 [0x2]) repeated x2
1013 (const_int 0 [0])
1014 (reg:SI 66 vl)
1015 (reg:SI 67 vtype)
1016 ] UNSPEC_VPREDICATE)
1017 (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137])
1018 (const_vector:VNx8DI repeat [
1019 (const_int 2730 [0xaaa])
1020 ]))
1021 (unspec:VNx8DI [
1022 (const_int 0 [0])
1023 ] UNSPEC_VUNDEF))
1024 (nil))))
1025 Here we want to remove use "a3". However, the REG_EQUAL/REG_EQUIV note use
1026 "a3" which made us fail in change_insn. We reference to the
1027 'aarch64-cc-fusion.cc' and add this method. */
1028 remove_reg_equal_equiv_notes (rinsn);
9243c3d1
JZZ
1029 confirm_change_group ();
1030 ssa->change_insn (change);
1031
1032 if (dump_file)
1033 {
1034 fprintf (dump_file, "\nto:\n");
1035 print_rtl_single (dump_file, PATTERN (rinsn));
9243c3d1
JZZ
1036 }
1037 return true;
1038}
1039
aef20243
JZZ
1040static void
1041change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info)
1042{
1043 rtx_insn *rinsn;
1044 if (vector_config_insn_p (insn->rtl ()))
1045 {
1046 rinsn = insn->rtl ();
1047 gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet");
1048 }
1049 else
1050 {
1051 gcc_assert (has_vtype_op (insn->rtl ()));
1052 rinsn = PREV_INSN (insn->rtl ());
1053 gcc_assert (vector_config_insn_p (rinsn));
1054 }
1055 rtx new_pat = gen_vsetvl_pat (rinsn, info);
1056 change_insn (rinsn, new_pat);
1057}
1058
d51f2456
JZ
1059static void
1060local_eliminate_vsetvl_insn (const vector_insn_info &dem)
1061{
1062 const insn_info *insn = dem.get_insn ();
1063 if (!insn || insn->is_artificial ())
1064 return;
1065 rtx_insn *rinsn = insn->rtl ();
1066 const bb_info *bb = insn->bb ();
1067 if (vsetvl_insn_p (rinsn))
1068 {
1069 rtx vl = get_vl (rinsn);
1070 for (insn_info *i = insn->next_nondebug_insn ();
1071 real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
1072 {
1073 if (i->is_call () || i->is_asm ()
1074 || find_access (i->defs (), VL_REGNUM)
1075 || find_access (i->defs (), VTYPE_REGNUM))
1076 return;
1077
1078 if (has_vtype_op (i->rtl ()))
1079 {
1080 if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ())))
1081 return;
1082 rtx avl = get_avl (i->rtl ());
1083 if (avl != vl)
1084 return;
1085 set_info *def = find_access (i->uses (), REGNO (avl))->def ();
1086 if (def->insn () != insn)
1087 return;
1088
1089 vector_insn_info new_info;
1090 new_info.parse_insn (i);
1091 if (!new_info.skip_avl_compatible_p (dem))
1092 return;
1093
1094 new_info.set_avl_info (dem.get_avl_info ());
1095 new_info = dem.merge (new_info, LOCAL_MERGE);
1096 change_vsetvl_insn (insn, new_info);
1097 eliminate_insn (PREV_INSN (i->rtl ()));
1098 return;
1099 }
1100 }
1101 }
1102}
1103
4f673c5e 1104static bool
6b6b9c68 1105source_equal_p (insn_info *insn1, insn_info *insn2)
4f673c5e 1106{
6b6b9c68
JZZ
1107 if (!insn1 || !insn2)
1108 return false;
1109 rtx_insn *rinsn1 = insn1->rtl ();
1110 rtx_insn *rinsn2 = insn2->rtl ();
4f673c5e
JZZ
1111 if (!rinsn1 || !rinsn2)
1112 return false;
1113 rtx note1 = find_reg_equal_equiv_note (rinsn1);
1114 rtx note2 = find_reg_equal_equiv_note (rinsn2);
1115 rtx single_set1 = single_set (rinsn1);
1116 rtx single_set2 = single_set (rinsn2);
60bd33bc
JZZ
1117 if (read_vl_insn_p (rinsn1) && read_vl_insn_p (rinsn2))
1118 {
1119 const insn_info *load1 = get_backward_fault_first_load_insn (insn1);
1120 const insn_info *load2 = get_backward_fault_first_load_insn (insn2);
1121 return load1 && load2 && load1 == load2;
1122 }
4f673c5e
JZZ
1123
1124 if (note1 && note2 && rtx_equal_p (note1, note2))
1125 return true;
6b6b9c68
JZZ
1126
1127 /* Since vsetvl instruction is not single SET.
1128 We handle this case specially here. */
1129 if (vsetvl_insn_p (insn1->rtl ()) && vsetvl_insn_p (insn2->rtl ()))
1130 {
1131 /* For example:
1132 vsetvl1 a6,a5,e32m1
1133 RVV 1 (use a6 as AVL)
1134 vsetvl2 a5,a5,e8mf4
1135 RVV 2 (use a5 as AVL)
1136 We consider AVL of RVV 1 and RVV 2 are same so that we can
1137 gain more optimization opportunities.
1138
1139 Note: insn1_info.compatible_avl_p (insn2_info)
1140 will make sure there is no instruction between vsetvl1 and vsetvl2
1141 modify a5 since their def will be different if there is instruction
1142 modify a5 and compatible_avl_p will return false. */
1143 vector_insn_info insn1_info, insn2_info;
1144 insn1_info.parse_insn (insn1);
1145 insn2_info.parse_insn (insn2);
1146 if (insn1_info.same_vlmax_p (insn2_info)
1147 && insn1_info.compatible_avl_p (insn2_info))
1148 return true;
1149 }
1150
1151 /* We only handle AVL is set by instructions with no side effects. */
1152 if (!single_set1 || !single_set2)
1153 return false;
1154 if (!rtx_equal_p (SET_SRC (single_set1), SET_SRC (single_set2)))
1155 return false;
1156 gcc_assert (insn1->uses ().size () == insn2->uses ().size ());
1157 for (size_t i = 0; i < insn1->uses ().size (); i++)
1158 if (insn1->uses ()[i] != insn2->uses ()[i])
1159 return false;
1160 return true;
4f673c5e
JZZ
1161}
1162
1163/* Helper function to get single same real RTL source.
1164 return NULL if it is not a single real RTL source. */
6b6b9c68 1165static insn_info *
4f673c5e
JZZ
1166extract_single_source (set_info *set)
1167{
1168 if (!set)
1169 return nullptr;
1170 if (set->insn ()->is_real ())
6b6b9c68 1171 return set->insn ();
4f673c5e
JZZ
1172 if (!set->insn ()->is_phi ())
1173 return nullptr;
6b6b9c68 1174 hash_set<set_info *> sets = get_all_sets (set, true, false, true);
4f673c5e 1175
6b6b9c68 1176 insn_info *first_insn = (*sets.begin ())->insn ();
4f673c5e
JZZ
1177 if (first_insn->is_artificial ())
1178 return nullptr;
6b6b9c68 1179 for (const set_info *set : sets)
4f673c5e
JZZ
1180 {
1181 /* If there is a head or end insn, we conservative return
1182 NULL so that VSETVL PASS will insert vsetvl directly. */
6b6b9c68 1183 if (set->insn ()->is_artificial ())
4f673c5e 1184 return nullptr;
6b6b9c68 1185 if (!source_equal_p (set->insn (), first_insn))
4f673c5e
JZZ
1186 return nullptr;
1187 }
1188
6b6b9c68 1189 return first_insn;
4f673c5e
JZZ
1190}
1191
ec99ffab
JZZ
1192static unsigned
1193calculate_sew (vlmul_type vlmul, unsigned int ratio)
1194{
1195 for (const unsigned sew : ALL_SEW)
1196 if (calculate_ratio (sew, vlmul) == ratio)
1197 return sew;
1198 return 0;
1199}
1200
1201static vlmul_type
1202calculate_vlmul (unsigned int sew, unsigned int ratio)
1203{
1204 for (const vlmul_type vlmul : ALL_LMUL)
1205 if (calculate_ratio (sew, vlmul) == ratio)
1206 return vlmul;
1207 return LMUL_RESERVED;
1208}
1209
1210static bool
1211incompatible_avl_p (const vector_insn_info &info1,
1212 const vector_insn_info &info2)
1213{
1214 return !info1.compatible_avl_p (info2) && !info2.compatible_avl_p (info1);
1215}
1216
1217static bool
1218different_sew_p (const vector_insn_info &info1, const vector_insn_info &info2)
1219{
1220 return info1.get_sew () != info2.get_sew ();
1221}
1222
1223static bool
1224different_lmul_p (const vector_insn_info &info1, const vector_insn_info &info2)
1225{
1226 return info1.get_vlmul () != info2.get_vlmul ();
1227}
1228
1229static bool
1230different_ratio_p (const vector_insn_info &info1, const vector_insn_info &info2)
1231{
1232 return info1.get_ratio () != info2.get_ratio ();
1233}
1234
1235static bool
1236different_tail_policy_p (const vector_insn_info &info1,
1237 const vector_insn_info &info2)
1238{
1239 return info1.get_ta () != info2.get_ta ();
1240}
1241
1242static bool
1243different_mask_policy_p (const vector_insn_info &info1,
1244 const vector_insn_info &info2)
1245{
1246 return info1.get_ma () != info2.get_ma ();
1247}
1248
1249static bool
1250possible_zero_avl_p (const vector_insn_info &info1,
1251 const vector_insn_info &info2)
1252{
1253 return !info1.has_non_zero_avl () || !info2.has_non_zero_avl ();
1254}
1255
ec99ffab
JZZ
1256static bool
1257second_ratio_invalid_for_first_sew_p (const vector_insn_info &info1,
1258 const vector_insn_info &info2)
1259{
1260 return calculate_vlmul (info1.get_sew (), info2.get_ratio ())
1261 == LMUL_RESERVED;
1262}
1263
1264static bool
1265second_ratio_invalid_for_first_lmul_p (const vector_insn_info &info1,
1266 const vector_insn_info &info2)
1267{
1268 return calculate_sew (info1.get_vlmul (), info2.get_ratio ()) == 0;
1269}
1270
1271static bool
1272second_sew_less_than_first_sew_p (const vector_insn_info &info1,
1273 const vector_insn_info &info2)
1274{
1275 return info2.get_sew () < info1.get_sew ();
1276}
1277
1278static bool
1279first_sew_less_than_second_sew_p (const vector_insn_info &info1,
1280 const vector_insn_info &info2)
1281{
1282 return info1.get_sew () < info2.get_sew ();
1283}
1284
1285/* return 0 if LMUL1 == LMUL2.
1286 return -1 if LMUL1 < LMUL2.
1287 return 1 if LMUL1 > LMUL2. */
1288static int
1289compare_lmul (vlmul_type vlmul1, vlmul_type vlmul2)
1290{
1291 if (vlmul1 == vlmul2)
1292 return 0;
1293
1294 switch (vlmul1)
1295 {
1296 case LMUL_1:
1297 if (vlmul2 == LMUL_2 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1298 return 1;
1299 else
1300 return -1;
1301 case LMUL_2:
1302 if (vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1303 return 1;
1304 else
1305 return -1;
1306 case LMUL_4:
1307 if (vlmul2 == LMUL_8)
1308 return 1;
1309 else
1310 return -1;
1311 case LMUL_8:
1312 return -1;
1313 case LMUL_F2:
1314 if (vlmul2 == LMUL_1 || vlmul2 == LMUL_2 || vlmul2 == LMUL_4
1315 || vlmul2 == LMUL_8)
1316 return 1;
1317 else
1318 return -1;
1319 case LMUL_F4:
1320 if (vlmul2 == LMUL_F2 || vlmul2 == LMUL_1 || vlmul2 == LMUL_2
1321 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1322 return 1;
1323 else
1324 return -1;
1325 case LMUL_F8:
1326 return 0;
1327 default:
1328 gcc_unreachable ();
1329 }
1330}
1331
1332static bool
1333second_lmul_less_than_first_lmul_p (const vector_insn_info &info1,
1334 const vector_insn_info &info2)
1335{
1336 return compare_lmul (info2.get_vlmul (), info1.get_vlmul ()) == -1;
1337}
1338
ec99ffab
JZZ
1339static bool
1340second_ratio_less_than_first_ratio_p (const vector_insn_info &info1,
1341 const vector_insn_info &info2)
1342{
1343 return info2.get_ratio () < info1.get_ratio ();
1344}
1345
1346static CONSTEXPR const demands_cond incompatible_conds[] = {
1347#define DEF_INCOMPATIBLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, \
1348 GE_SEW1, TAIL_POLICTY1, MASK_POLICY1, AVL2, \
1349 SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, \
1350 TAIL_POLICTY2, MASK_POLICY2, COND) \
1351 {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
1352 MASK_POLICY1}, \
1353 {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1354 MASK_POLICY2}}, \
1355 COND},
1356#include "riscv-vsetvl.def"
1357};
1358
1359static unsigned
1360greatest_sew (const vector_insn_info &info1, const vector_insn_info &info2)
1361{
1362 return std::max (info1.get_sew (), info2.get_sew ());
1363}
1364
1365static unsigned
1366first_sew (const vector_insn_info &info1, const vector_insn_info &)
1367{
1368 return info1.get_sew ();
1369}
1370
1371static unsigned
1372second_sew (const vector_insn_info &, const vector_insn_info &info2)
1373{
1374 return info2.get_sew ();
1375}
1376
1377static vlmul_type
1378first_vlmul (const vector_insn_info &info1, const vector_insn_info &)
1379{
1380 return info1.get_vlmul ();
1381}
1382
1383static vlmul_type
1384second_vlmul (const vector_insn_info &, const vector_insn_info &info2)
1385{
1386 return info2.get_vlmul ();
1387}
1388
1389static unsigned
1390first_ratio (const vector_insn_info &info1, const vector_insn_info &)
1391{
1392 return info1.get_ratio ();
1393}
1394
1395static unsigned
1396second_ratio (const vector_insn_info &, const vector_insn_info &info2)
1397{
1398 return info2.get_ratio ();
1399}
1400
1401static vlmul_type
1402vlmul_for_first_sew_second_ratio (const vector_insn_info &info1,
1403 const vector_insn_info &info2)
1404{
1405 return calculate_vlmul (info1.get_sew (), info2.get_ratio ());
1406}
1407
1408static unsigned
1409ratio_for_second_sew_first_vlmul (const vector_insn_info &info1,
1410 const vector_insn_info &info2)
1411{
1412 return calculate_ratio (info2.get_sew (), info1.get_vlmul ());
1413}
1414
1415static CONSTEXPR const demands_fuse_rule fuse_rules[] = {
1416#define DEF_SEW_LMUL_FUSE_RULE(DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, \
1417 DEMAND_GE_SEW1, DEMAND_SEW2, DEMAND_LMUL2, \
1418 DEMAND_RATIO2, DEMAND_GE_SEW2, NEW_DEMAND_SEW, \
1419 NEW_DEMAND_LMUL, NEW_DEMAND_RATIO, \
1420 NEW_DEMAND_GE_SEW, NEW_SEW, NEW_VLMUL, \
1421 NEW_RATIO) \
1422 {{{DEMAND_ANY, DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, DEMAND_ANY, \
1423 DEMAND_GE_SEW1, DEMAND_ANY, DEMAND_ANY}, \
1424 {DEMAND_ANY, DEMAND_SEW2, DEMAND_LMUL2, DEMAND_RATIO2, DEMAND_ANY, \
1425 DEMAND_GE_SEW2, DEMAND_ANY, DEMAND_ANY}}, \
1426 NEW_DEMAND_SEW, \
1427 NEW_DEMAND_LMUL, \
1428 NEW_DEMAND_RATIO, \
1429 NEW_DEMAND_GE_SEW, \
1430 NEW_SEW, \
1431 NEW_VLMUL, \
1432 NEW_RATIO},
1433#include "riscv-vsetvl.def"
1434};
1435
1436static bool
1437always_unavailable (const vector_insn_info &, const vector_insn_info &)
1438{
1439 return true;
1440}
1441
1442static bool
1443avl_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2)
1444{
1445 return !info2.compatible_avl_p (info1.get_avl_info ());
1446}
1447
1448static bool
1449sew_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2)
1450{
1451 if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO))
1452 {
1453 if (info2.demand_p (DEMAND_GE_SEW))
1454 return info1.get_sew () < info2.get_sew ();
1455 return info1.get_sew () != info2.get_sew ();
1456 }
1457 return true;
1458}
1459
1460static bool
1461lmul_unavailable_p (const vector_insn_info &info1,
1462 const vector_insn_info &info2)
1463{
1464 if (info1.get_vlmul () == info2.get_vlmul () && !info2.demand_p (DEMAND_SEW)
1465 && !info2.demand_p (DEMAND_RATIO))
1466 return false;
1467 return true;
1468}
1469
1470static bool
1471ge_sew_unavailable_p (const vector_insn_info &info1,
1472 const vector_insn_info &info2)
1473{
1474 if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO)
1475 && info2.demand_p (DEMAND_GE_SEW))
1476 return info1.get_sew () < info2.get_sew ();
1477 return true;
1478}
1479
1480static bool
1481ge_sew_lmul_unavailable_p (const vector_insn_info &info1,
1482 const vector_insn_info &info2)
1483{
1484 if (!info2.demand_p (DEMAND_RATIO) && info2.demand_p (DEMAND_GE_SEW))
1485 return info1.get_sew () < info2.get_sew ();
1486 return true;
1487}
1488
1489static bool
1490ge_sew_ratio_unavailable_p (const vector_insn_info &info1,
1491 const vector_insn_info &info2)
1492{
1493 if (!info2.demand_p (DEMAND_LMUL) && info2.demand_p (DEMAND_GE_SEW))
1494 return info1.get_sew () < info2.get_sew ();
1495 return true;
1496}
1497
1498static CONSTEXPR const demands_cond unavailable_conds[] = {
1499#define DEF_UNAVAILABLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, \
1500 TAIL_POLICTY1, MASK_POLICY1, AVL2, SEW2, LMUL2, \
1501 RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1502 MASK_POLICY2, COND) \
1503 {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
1504 MASK_POLICY1}, \
1505 {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1506 MASK_POLICY2}}, \
1507 COND},
1508#include "riscv-vsetvl.def"
1509};
1510
1511static bool
1512same_sew_lmul_demand_p (const bool *dems1, const bool *dems2)
1513{
1514 return dems1[DEMAND_SEW] == dems2[DEMAND_SEW]
1515 && dems1[DEMAND_LMUL] == dems2[DEMAND_LMUL]
1516 && dems1[DEMAND_RATIO] == dems2[DEMAND_RATIO] && !dems1[DEMAND_GE_SEW]
1517 && !dems2[DEMAND_GE_SEW];
1518}
1519
1520static bool
1521propagate_avl_across_demands_p (const vector_insn_info &info1,
1522 const vector_insn_info &info2)
1523{
1524 if (info2.demand_p (DEMAND_AVL))
1525 {
1526 if (info2.demand_p (DEMAND_NONZERO_AVL))
1527 return info1.demand_p (DEMAND_AVL)
1528 && !info1.demand_p (DEMAND_NONZERO_AVL) && info1.has_avl_reg ();
1529 }
1530 else
1531 return info1.demand_p (DEMAND_AVL) && info1.has_avl_reg ();
1532 return false;
1533}
1534
1535static bool
a481eed8 1536reg_available_p (const insn_info *insn, const vector_insn_info &info)
ec99ffab 1537{
a481eed8 1538 if (info.has_avl_reg () && !info.get_avl_source ())
44c918b5 1539 return false;
a481eed8
JZZ
1540 insn_info *def_insn = info.get_avl_source ()->insn ();
1541 if (def_insn->bb () == insn->bb ())
1542 return before_p (def_insn, insn);
ec99ffab 1543 else
a481eed8
JZZ
1544 return dominated_by_p (CDI_DOMINATORS, insn->bb ()->cfg_bb (),
1545 def_insn->bb ()->cfg_bb ());
ec99ffab
JZZ
1546}
1547
60bd33bc
JZZ
1548/* Return true if the instruction support relaxed compatible check. */
1549static bool
1550support_relaxed_compatible_p (const vector_insn_info &info1,
1551 const vector_insn_info &info2)
1552{
1553 if (fault_first_load_p (info1.get_insn ()->rtl ())
1554 && info2.demand_p (DEMAND_AVL) && info2.has_avl_reg ()
1555 && info2.get_avl_source () && info2.get_avl_source ()->insn ()->is_phi ())
1556 {
1557 hash_set<set_info *> sets
1558 = get_all_sets (info2.get_avl_source (), true, false, false);
1559 for (set_info *set : sets)
1560 {
1561 if (read_vl_insn_p (set->insn ()->rtl ()))
1562 {
1563 const insn_info *insn
1564 = get_backward_fault_first_load_insn (set->insn ());
1565 if (insn == info1.get_insn ())
1566 return info2.compatible_vtype_p (info1);
1567 }
1568 }
1569 }
1570 return false;
1571}
1572
1573/* Return true if the block is worthwhile backward propagation. */
1574static bool
1575backward_propagate_worthwhile_p (const basic_block cfg_bb,
1576 const vector_block_info block_info)
1577{
1578 if (loop_basic_block_p (cfg_bb))
1579 {
1580 if (block_info.reaching_out.valid_or_dirty_p ())
1581 {
1582 if (block_info.local_dem.compatible_p (block_info.reaching_out))
1583 {
1584 /* Case 1 (Can backward propagate):
1585 ....
1586 bb0:
1587 ...
1588 for (int i = 0; i < n; i++)
1589 {
1590 vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
1591 __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
1592 }
1593 The local_dem is compatible with reaching_out. Such case is
1594 worthwhile backward propagation. */
1595 return true;
1596 }
1597 else
1598 {
1599 if (support_relaxed_compatible_p (block_info.reaching_out,
1600 block_info.local_dem))
1601 return true;
1602 /* Case 2 (Don't backward propagate):
1603 ....
1604 bb0:
1605 ...
1606 for (int i = 0; i < n; i++)
1607 {
1608 vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
1609 __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
1610 vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8);
1611 __riscv_vse16_v_i16mf2 (out + i + 6, v, 8);
1612 }
1613 The local_dem is incompatible with reaching_out.
1614 It makes no sense to backward propagate the local_dem since we
1615 can't avoid VSETVL inside the loop. */
1616 return false;
1617 }
1618 }
1619 else
1620 {
1621 gcc_assert (block_info.reaching_out.unknown_p ());
1622 /* Case 3 (Don't backward propagate):
1623 ....
1624 bb0:
1625 ...
1626 for (int i = 0; i < n; i++)
1627 {
1628 vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
1629 __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
1630 fn3 ();
1631 }
1632 The local_dem is VALID, but the reaching_out is UNKNOWN.
1633 It makes no sense to backward propagate the local_dem since we
1634 can't avoid VSETVL inside the loop. */
1635 return false;
1636 }
1637 }
1638
1639 return true;
1640}
1641
a2d12abe
JZZ
1642/* Count the number of REGNO in RINSN. */
1643static int
1644count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
1645{
1646 int count = 0;
1647 extract_insn (rinsn);
1648 for (int i = 0; i < recog_data.n_operands; i++)
1649 if (refers_to_regno_p (regno, recog_data.operand[i]))
1650 count++;
1651 return count;
1652}
1653
12b23c71
JZZ
1654avl_info::avl_info (const avl_info &other)
1655{
1656 m_value = other.get_value ();
1657 m_source = other.get_source ();
1658}
1659
9243c3d1
JZZ
1660avl_info::avl_info (rtx value_in, set_info *source_in)
1661 : m_value (value_in), m_source (source_in)
1662{}
1663
4f673c5e
JZZ
1664bool
1665avl_info::single_source_equal_p (const avl_info &other) const
1666{
1667 set_info *set1 = m_source;
1668 set_info *set2 = other.get_source ();
6b6b9c68
JZZ
1669 insn_info *insn1 = extract_single_source (set1);
1670 insn_info *insn2 = extract_single_source (set2);
1671 if (!insn1 || !insn2)
1672 return false;
1673 return source_equal_p (insn1, insn2);
1674}
1675
1676bool
1677avl_info::multiple_source_equal_p (const avl_info &other) const
1678{
d875d756
JZ
1679 /* When the def info is same in RTL_SSA namespace, it's safe
1680 to consider they are avl compatible. */
1681 if (m_source == other.get_source ())
1682 return true;
6b6b9c68 1683
d875d756
JZ
1684 /* We only consider handle PHI node. */
1685 if (!m_source->insn ()->is_phi () || !other.get_source ()->insn ()->is_phi ())
1686 return false;
6b6b9c68 1687
d875d756
JZ
1688 phi_info *phi1 = as_a<phi_info *> (m_source);
1689 phi_info *phi2 = as_a<phi_info *> (other.get_source ());
6b6b9c68 1690
d875d756
JZ
1691 if (phi1->is_degenerate () && phi2->is_degenerate ())
1692 {
1693 /* Degenerate PHI means the PHI node only have one input. */
1694
1695 /* If both PHI nodes have the same single input in use list.
1696 We consider they are AVL compatible. */
1697 if (phi1->input_value (0) == phi2->input_value (0))
1698 return true;
1699 }
1700 /* TODO: We can support more optimization cases in the future. */
1701 return false;
4f673c5e
JZZ
1702}
1703
9243c3d1
JZZ
1704avl_info &
1705avl_info::operator= (const avl_info &other)
1706{
1707 m_value = other.get_value ();
1708 m_source = other.get_source ();
1709 return *this;
1710}
1711
1712bool
1713avl_info::operator== (const avl_info &other) const
1714{
1715 if (!m_value)
1716 return !other.get_value ();
1717 if (!other.get_value ())
1718 return false;
1719
9243c3d1
JZZ
1720 if (GET_CODE (m_value) != GET_CODE (other.get_value ()))
1721 return false;
1722
1723 /* Handle CONST_INT AVL. */
1724 if (CONST_INT_P (m_value))
1725 return INTVAL (m_value) == INTVAL (other.get_value ());
1726
1727 /* Handle VLMAX AVL. */
1728 if (vlmax_avl_p (m_value))
1729 return vlmax_avl_p (other.get_value ());
1730
4f673c5e
JZZ
1731 /* If any source is undef value, we think they are not equal. */
1732 if (!m_source || !other.get_source ())
1733 return false;
1734
1735 /* If both sources are single source (defined by a single real RTL)
1736 and their definitions are same. */
1737 if (single_source_equal_p (other))
1738 return true;
1739
6b6b9c68 1740 return multiple_source_equal_p (other);
9243c3d1
JZZ
1741}
1742
1743bool
1744avl_info::operator!= (const avl_info &other) const
1745{
1746 return !(*this == other);
1747}
1748
ec99ffab
JZZ
1749bool
1750avl_info::has_non_zero_avl () const
1751{
1752 if (has_avl_imm ())
1753 return INTVAL (get_value ()) > 0;
1754 if (has_avl_reg ())
1755 return vlmax_avl_p (get_value ());
1756 return false;
1757}
1758
9243c3d1
JZZ
1759/* Initialize VL/VTYPE information. */
1760vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in,
1761 enum vlmul_type vlmul_in, uint8_t ratio_in,
1762 bool ta_in, bool ma_in)
1763 : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in),
1764 m_ta (ta_in), m_ma (ma_in)
1765{
1766 gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW");
1767}
1768
1769bool
1770vl_vtype_info::operator== (const vl_vtype_info &other) const
1771{
6b6b9c68 1772 return same_avl_p (other) && m_sew == other.get_sew ()
9243c3d1
JZZ
1773 && m_vlmul == other.get_vlmul () && m_ta == other.get_ta ()
1774 && m_ma == other.get_ma () && m_ratio == other.get_ratio ();
1775}
1776
1777bool
1778vl_vtype_info::operator!= (const vl_vtype_info &other) const
1779{
1780 return !(*this == other);
1781}
1782
9243c3d1
JZZ
1783bool
1784vl_vtype_info::same_avl_p (const vl_vtype_info &other) const
1785{
6b6b9c68
JZZ
1786 /* We need to compare both RTL and SET. If both AVL are CONST_INT.
1787 For example, const_int 3 and const_int 4, we need to compare
1788 RTL. If both AVL are REG and their REGNO are same, we need to
1789 compare SET. */
1790 return get_avl () == other.get_avl ()
1791 && get_avl_source () == other.get_avl_source ();
9243c3d1
JZZ
1792}
1793
1794bool
1795vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const
1796{
1797 return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul ()
1798 && get_ta () == other.get_ta () && get_ma () == other.get_ma ();
1799}
1800
1801bool
1802vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const
1803{
1804 return get_ratio () == other.get_ratio ();
1805}
1806
1807/* Compare the compatibility between Dem1 and Dem2.
1808 If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2
1809 meaning Dem1 is easier be compatible with others than Dem2
1810 or Dem2 is stricter than Dem1.
1811 For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */
9243c3d1
JZZ
1812bool
1813vector_insn_info::operator>= (const vector_insn_info &other) const
1814{
60bd33bc
JZZ
1815 if (support_relaxed_compatible_p (*this, other))
1816 {
1817 unsigned array_size = sizeof (unavailable_conds) / sizeof (demands_cond);
1818 /* Bypass AVL unavailable cases. */
1819 for (unsigned i = 2; i < array_size; i++)
1820 if (unavailable_conds[i].pair.match_cond_p (this->get_demands (),
1821 other.get_demands ())
1822 && unavailable_conds[i].incompatible_p (*this, other))
1823 return false;
1824 return true;
1825 }
1826
1827 if (!other.compatible_p (static_cast<const vl_vtype_info &> (*this)))
1828 return false;
1829 if (!this->compatible_p (static_cast<const vl_vtype_info &> (other)))
9243c3d1
JZZ
1830 return true;
1831
1832 if (*this == other)
1833 return true;
1834
ec99ffab
JZZ
1835 for (const auto &cond : unavailable_conds)
1836 if (cond.pair.match_cond_p (this->get_demands (), other.get_demands ())
1837 && cond.incompatible_p (*this, other))
1838 return false;
9243c3d1
JZZ
1839
1840 return true;
1841}
1842
1843bool
1844vector_insn_info::operator== (const vector_insn_info &other) const
1845{
1846 gcc_assert (!uninit_p () && !other.uninit_p ()
1847 && "Uninitialization should not happen");
1848
1849 /* Empty is only equal to another Empty. */
1850 if (empty_p ())
1851 return other.empty_p ();
1852 if (other.empty_p ())
1853 return empty_p ();
1854
1855 /* Unknown is only equal to another Unknown. */
1856 if (unknown_p ())
1857 return other.unknown_p ();
1858 if (other.unknown_p ())
1859 return unknown_p ();
1860
1861 for (size_t i = 0; i < NUM_DEMAND; i++)
1862 if (m_demands[i] != other.demand_p ((enum demand_type) i))
1863 return false;
1864
7ae4d1df
JZZ
1865 if (vector_config_insn_p (m_insn->rtl ())
1866 || vector_config_insn_p (other.get_insn ()->rtl ()))
1867 if (m_insn != other.get_insn ())
1868 return false;
9243c3d1
JZZ
1869
1870 if (!same_avl_p (other))
1871 return false;
1872
1873 /* If the full VTYPE is valid, check that it is the same. */
1874 return same_vtype_p (other);
1875}
1876
1877void
1878vector_insn_info::parse_insn (rtx_insn *rinsn)
1879{
1880 *this = vector_insn_info ();
1881 if (!NONDEBUG_INSN_P (rinsn))
1882 return;
1883 if (!has_vtype_op (rinsn))
1884 return;
1885 m_state = VALID;
1886 extract_insn_cached (rinsn);
1887 const rtx avl = recog_data.operand[get_attr_vl_op_idx (rinsn)];
1888 m_avl = avl_info (avl, nullptr);
1889 m_sew = ::get_sew (rinsn);
1890 m_vlmul = ::get_vlmul (rinsn);
1891 m_ta = tail_agnostic_p (rinsn);
1892 m_ma = mask_agnostic_p (rinsn);
1893}
1894
1895void
1896vector_insn_info::parse_insn (insn_info *insn)
1897{
1898 *this = vector_insn_info ();
1899
1900 /* Return if it is debug insn for the consistency with optimize == 0. */
1901 if (insn->is_debug_insn ())
1902 return;
1903
1904 /* We set it as unknown since we don't what will happen in CALL or ASM. */
1905 if (insn->is_call () || insn->is_asm ())
1906 {
1907 set_unknown ();
1908 return;
1909 }
1910
1911 /* If this is something that updates VL/VTYPE that we don't know about, set
1912 the state to unknown. */
60bd33bc 1913 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
9243c3d1
JZZ
1914 && (find_access (insn->defs (), VL_REGNUM)
1915 || find_access (insn->defs (), VTYPE_REGNUM)))
1916 {
1917 set_unknown ();
1918 return;
1919 }
1920
1921 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
1922 return;
1923
1924 /* Warning: This function has to work on both the lowered (i.e. post
1925 emit_local_forward_vsetvls) and pre-lowering forms. The main implication
1926 of this is that it can't use the value of a SEW, VL, or Policy operand as
1927 they might be stale after lowering. */
1928 vl_vtype_info::operator= (get_vl_vtype_info (insn));
1929 m_insn = insn;
1930 m_state = VALID;
1931 if (vector_config_insn_p (insn->rtl ()))
1932 {
1933 m_demands[DEMAND_AVL] = true;
1934 m_demands[DEMAND_RATIO] = true;
1935 return;
1936 }
1937
1938 if (has_vl_op (insn->rtl ()))
1939 m_demands[DEMAND_AVL] = true;
1940
1941 if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
1942 m_demands[DEMAND_RATIO] = true;
1943 else
1944 {
1945 /* TODO: By default, if it doesn't demand RATIO, we set it
1946 demand SEW && LMUL both. Some instructions may demand SEW
1947 only and ignore LMUL, will fix it later. */
1948 m_demands[DEMAND_SEW] = true;
ec99ffab
JZZ
1949 if (!ignore_vlmul_insn_p (insn->rtl ()))
1950 m_demands[DEMAND_LMUL] = true;
9243c3d1
JZZ
1951 }
1952
1953 if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE)
1954 m_demands[DEMAND_TAIL_POLICY] = true;
1955 if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE)
1956 m_demands[DEMAND_MASK_POLICY] = true;
6b6b9c68
JZZ
1957
1958 if (vector_config_insn_p (insn->rtl ()))
1959 return;
1960
ec99ffab
JZZ
1961 if (scalar_move_insn_p (insn->rtl ()))
1962 {
1963 if (m_avl.has_non_zero_avl ())
1964 m_demands[DEMAND_NONZERO_AVL] = true;
1965 if (m_ta)
1966 m_demands[DEMAND_GE_SEW] = true;
1967 }
1968
1969 if (!m_avl.has_avl_reg () || vlmax_avl_p (get_avl ()) || !m_avl.get_source ())
1970 return;
1971 if (!m_avl.get_source ()->insn ()->is_real ()
1972 && !m_avl.get_source ()->insn ()->is_phi ())
6b6b9c68
JZZ
1973 return;
1974
1975 insn_info *def_insn = extract_single_source (m_avl.get_source ());
ec99ffab
JZZ
1976 if (!def_insn || !vsetvl_insn_p (def_insn->rtl ()))
1977 return;
9243c3d1 1978
ec99ffab
JZZ
1979 vector_insn_info new_info;
1980 new_info.parse_insn (def_insn);
1981 if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ()))
1982 return;
1983 /* TODO: Currently, we don't forward AVL for non-VLMAX vsetvl. */
1984 if (vlmax_avl_p (new_info.get_avl ()))
1985 set_avl_info (avl_info (new_info.get_avl (), get_avl_source ()));
1986
1987 if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ())
1988 m_demands[DEMAND_NONZERO_AVL] = true;
9243c3d1
JZZ
1989}
1990
1991bool
1992vector_insn_info::compatible_p (const vector_insn_info &other) const
1993{
1994 gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
1995 && "Can't compare invalid demanded infos");
1996
ec99ffab 1997 for (const auto &cond : incompatible_conds)
60bd33bc 1998 if (cond.dual_incompatible_p (*this, other))
ec99ffab 1999 return false;
9243c3d1
JZZ
2000 return true;
2001}
2002
d51f2456
JZ
2003bool
2004vector_insn_info::skip_avl_compatible_p (const vector_insn_info &other) const
2005{
2006 gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
2007 && "Can't compare invalid demanded infos");
2008 unsigned array_size = sizeof (incompatible_conds) / sizeof (demands_cond);
2009 /* Bypass AVL incompatible cases. */
2010 for (unsigned i = 1; i < array_size; i++)
2011 if (incompatible_conds[i].dual_incompatible_p (*this, other))
2012 return false;
2013 return true;
2014}
2015
9243c3d1
JZZ
2016bool
2017vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const
2018{
2019 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2020 gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
2021 if (!demand_p (DEMAND_AVL))
2022 return true;
ec99ffab
JZZ
2023 if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ())
2024 return true;
9243c3d1
JZZ
2025 return get_avl_info () == other.get_avl_info ();
2026}
2027
4f673c5e
JZZ
2028bool
2029vector_insn_info::compatible_avl_p (const avl_info &other) const
2030{
2031 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2032 gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
2033 gcc_assert (demand_p (DEMAND_AVL) && "Can't compare AVL undemand state");
ec99ffab
JZZ
2034 if (!demand_p (DEMAND_AVL))
2035 return true;
2036 if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ())
2037 return true;
4f673c5e
JZZ
2038 return get_avl_info () == other;
2039}
2040
9243c3d1
JZZ
2041bool
2042vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const
2043{
2044 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2045 gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state");
ec99ffab
JZZ
2046 if (demand_p (DEMAND_SEW))
2047 {
2048 if (!demand_p (DEMAND_GE_SEW) && m_sew != other.get_sew ())
2049 return false;
2050 if (demand_p (DEMAND_GE_SEW) && m_sew > other.get_sew ())
2051 return false;
2052 }
9243c3d1
JZZ
2053 if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ())
2054 return false;
2055 if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ())
2056 return false;
2057 if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ())
2058 return false;
2059 if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ())
2060 return false;
2061 return true;
2062}
2063
2064/* Determine whether the vector instructions requirements represented by
2065 Require are compatible with the previous vsetvli instruction represented
2066 by this. INSN is the instruction whose requirements we're considering. */
2067bool
2068vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const
2069{
2070 gcc_assert (!uninit_p () && "Can't handle uninitialized info");
2071 if (empty_p ())
2072 return false;
2073
2074 /* Nothing is compatible with Unknown. */
2075 if (unknown_p ())
2076 return false;
2077
2078 /* If the instruction doesn't need an AVLReg and the SEW matches, consider
2079 it compatible. */
2080 if (!demand_p (DEMAND_AVL))
2081 if (m_sew == curr_info.get_sew ())
2082 return true;
2083
2084 return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info);
2085}
2086
6b6b9c68
JZZ
2087bool
2088vector_insn_info::available_p (const vector_insn_info &other) const
2089{
ec99ffab
JZZ
2090 return *this >= other;
2091}
2092
2093void
2094vector_insn_info::fuse_avl (const vector_insn_info &info1,
2095 const vector_insn_info &info2)
2096{
2097 set_insn (info1.get_insn ());
2098 if (info1.demand_p (DEMAND_AVL))
2099 {
2100 if (info1.demand_p (DEMAND_NONZERO_AVL))
2101 {
2102 if (info2.demand_p (DEMAND_AVL)
2103 && !info2.demand_p (DEMAND_NONZERO_AVL))
2104 {
2105 set_avl_info (info2.get_avl_info ());
2106 set_demand (DEMAND_AVL, true);
2107 set_demand (DEMAND_NONZERO_AVL, false);
2108 return;
2109 }
2110 }
2111 set_avl_info (info1.get_avl_info ());
2112 set_demand (DEMAND_NONZERO_AVL, info1.demand_p (DEMAND_NONZERO_AVL));
2113 }
2114 else
2115 {
2116 set_avl_info (info2.get_avl_info ());
2117 set_demand (DEMAND_NONZERO_AVL, info2.demand_p (DEMAND_NONZERO_AVL));
2118 }
2119 set_demand (DEMAND_AVL,
2120 info1.demand_p (DEMAND_AVL) || info2.demand_p (DEMAND_AVL));
2121}
2122
2123void
2124vector_insn_info::fuse_sew_lmul (const vector_insn_info &info1,
2125 const vector_insn_info &info2)
2126{
2127 /* We need to fuse sew && lmul according to demand info:
2128
2129 1. GE_SEW.
2130 2. SEW.
2131 3. LMUL.
2132 4. RATIO. */
2133 if (same_sew_lmul_demand_p (info1.get_demands (), info2.get_demands ()))
2134 {
2135 set_demand (DEMAND_SEW, info2.demand_p (DEMAND_SEW));
2136 set_demand (DEMAND_LMUL, info2.demand_p (DEMAND_LMUL));
2137 set_demand (DEMAND_RATIO, info2.demand_p (DEMAND_RATIO));
2138 set_demand (DEMAND_GE_SEW, info2.demand_p (DEMAND_GE_SEW));
2139 set_sew (info2.get_sew ());
2140 set_vlmul (info2.get_vlmul ());
2141 set_ratio (info2.get_ratio ());
2142 return;
2143 }
2144 for (const auto &rule : fuse_rules)
2145 {
2146 if (rule.pair.match_cond_p (info1.get_demands (), info2.get_demands ()))
2147 {
2148 set_demand (DEMAND_SEW, rule.demand_sew_p);
2149 set_demand (DEMAND_LMUL, rule.demand_lmul_p);
2150 set_demand (DEMAND_RATIO, rule.demand_ratio_p);
2151 set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p);
2152 set_sew (rule.new_sew (info1, info2));
2153 set_vlmul (rule.new_vlmul (info1, info2));
2154 set_ratio (rule.new_ratio (info1, info2));
2155 return;
2156 }
2157 if (rule.pair.match_cond_p (info2.get_demands (), info1.get_demands ()))
2158 {
2159 set_demand (DEMAND_SEW, rule.demand_sew_p);
2160 set_demand (DEMAND_LMUL, rule.demand_lmul_p);
2161 set_demand (DEMAND_RATIO, rule.demand_ratio_p);
2162 set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p);
2163 set_sew (rule.new_sew (info2, info1));
2164 set_vlmul (rule.new_vlmul (info2, info1));
2165 set_ratio (rule.new_ratio (info2, info1));
2166 return;
2167 }
2168 }
2169 gcc_unreachable ();
2170}
2171
2172void
2173vector_insn_info::fuse_tail_policy (const vector_insn_info &info1,
2174 const vector_insn_info &info2)
2175{
2176 if (info1.demand_p (DEMAND_TAIL_POLICY))
2177 {
2178 set_ta (info1.get_ta ());
2179 demand (DEMAND_TAIL_POLICY);
2180 }
2181 else if (info2.demand_p (DEMAND_TAIL_POLICY))
2182 {
2183 set_ta (info2.get_ta ());
2184 demand (DEMAND_TAIL_POLICY);
2185 }
2186 else
2187 set_ta (get_default_ta ());
2188}
2189
2190void
2191vector_insn_info::fuse_mask_policy (const vector_insn_info &info1,
2192 const vector_insn_info &info2)
2193{
2194 if (info1.demand_p (DEMAND_MASK_POLICY))
2195 {
2196 set_ma (info1.get_ma ());
2197 demand (DEMAND_MASK_POLICY);
2198 }
2199 else if (info2.demand_p (DEMAND_MASK_POLICY))
2200 {
2201 set_ma (info2.get_ma ());
2202 demand (DEMAND_MASK_POLICY);
2203 }
2204 else
2205 set_ma (get_default_ma ());
6b6b9c68
JZZ
2206}
2207
9243c3d1
JZZ
2208vector_insn_info
2209vector_insn_info::merge (const vector_insn_info &merge_info,
d51f2456 2210 enum merge_type type) const
9243c3d1 2211{
6b6b9c68
JZZ
2212 if (!vsetvl_insn_p (get_insn ()->rtl ()))
2213 gcc_assert (this->compatible_p (merge_info)
2214 && "Can't merge incompatible demanded infos");
9243c3d1
JZZ
2215
2216 vector_insn_info new_info;
ec99ffab 2217 new_info.set_valid ();
4f673c5e 2218 if (type == LOCAL_MERGE)
9243c3d1 2219 {
4f673c5e 2220 /* For local backward data flow, we always update INSN && AVL as the
ec99ffab
JZZ
2221 latest INSN and AVL so that we can keep track status of each INSN. */
2222 new_info.fuse_avl (merge_info, *this);
9243c3d1
JZZ
2223 }
2224 else
2225 {
4f673c5e 2226 /* For global data flow, we should keep original INSN and AVL if they
ec99ffab 2227 valid since we should keep the life information of each block.
9243c3d1 2228
ec99ffab
JZZ
2229 For example:
2230 bb 0 -> bb 1.
2231 We should keep INSN && AVL of bb 1 since we will eventually emit
2232 vsetvl instruction according to INSN and AVL of bb 1. */
2233 new_info.fuse_avl (*this, merge_info);
2234 }
9243c3d1 2235
ec99ffab
JZZ
2236 new_info.fuse_sew_lmul (*this, merge_info);
2237 new_info.fuse_tail_policy (*this, merge_info);
2238 new_info.fuse_mask_policy (*this, merge_info);
9243c3d1
JZZ
2239 return new_info;
2240}
2241
60bd33bc
JZZ
2242bool
2243vector_insn_info::update_fault_first_load_avl (insn_info *insn)
2244{
2245 // Update AVL to vl-output of the fault first load.
2246 const insn_info *read_vl = get_forward_read_vl_insn (insn);
2247 if (read_vl)
2248 {
2249 rtx vl = SET_DEST (PATTERN (read_vl->rtl ()));
2250 def_info *def = find_access (read_vl->defs (), REGNO (vl));
2251 set_info *set = safe_dyn_cast<set_info *> (def);
2252 set_avl_info (avl_info (vl, set));
2253 set_insn (insn);
2254 return true;
2255 }
2256 return false;
2257}
2258
9243c3d1
JZZ
2259void
2260vector_insn_info::dump (FILE *file) const
2261{
2262 fprintf (file, "[");
2263 if (uninit_p ())
2264 fprintf (file, "UNINITIALIZED,");
2265 else if (valid_p ())
2266 fprintf (file, "VALID,");
2267 else if (unknown_p ())
2268 fprintf (file, "UNKNOWN,");
2269 else if (empty_p ())
2270 fprintf (file, "EMPTY,");
6b6b9c68
JZZ
2271 else if (hard_empty_p ())
2272 fprintf (file, "HARD_EMPTY,");
4f673c5e
JZZ
2273 else if (dirty_with_killed_avl_p ())
2274 fprintf (file, "DIRTY_WITH_KILLED_AVL,");
9243c3d1
JZZ
2275 else
2276 fprintf (file, "DIRTY,");
2277
2278 fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL));
ec99ffab 2279 fprintf (file, "%d(DEMAND_NONZERO_AVL),", demand_p (DEMAND_NONZERO_AVL));
9243c3d1 2280 fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW));
ec99ffab 2281 fprintf (file, "%d(DEMAND_GE_SEW),", demand_p (DEMAND_GE_SEW));
9243c3d1
JZZ
2282 fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL));
2283 fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO));
2284 fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY));
2285 fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY));
2286
2287 fprintf (file, "AVL=");
2288 print_rtl_single (file, get_avl ());
2289 fprintf (file, "SEW=%d,", get_sew ());
2290 fprintf (file, "VLMUL=%d,", get_vlmul ());
2291 fprintf (file, "RATIO=%d,", get_ratio ());
2292 fprintf (file, "TAIL_POLICY=%d,", get_ta ());
2293 fprintf (file, "MASK_POLICY=%d", get_ma ());
2294 fprintf (file, "]\n");
2295
2296 if (valid_p ())
2297 {
2298 if (get_insn ())
2299 {
12b23c71
JZZ
2300 fprintf (file, "The real INSN=");
2301 print_rtl_single (file, get_insn ()->rtl ());
9243c3d1 2302 }
9243c3d1
JZZ
2303 }
2304}
2305
2306vector_infos_manager::vector_infos_manager ()
2307{
2308 vector_edge_list = nullptr;
2309 vector_kill = nullptr;
2310 vector_del = nullptr;
2311 vector_insert = nullptr;
2312 vector_antic = nullptr;
2313 vector_transp = nullptr;
2314 vector_comp = nullptr;
2315 vector_avin = nullptr;
2316 vector_avout = nullptr;
2317 vector_insn_infos.safe_grow (get_max_uid ());
2318 vector_block_infos.safe_grow (last_basic_block_for_fn (cfun));
2319 if (!optimize)
2320 {
2321 basic_block cfg_bb;
2322 rtx_insn *rinsn;
2323 FOR_ALL_BB_FN (cfg_bb, cfun)
2324 {
2325 vector_block_infos[cfg_bb->index].local_dem = vector_insn_info ();
2326 vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info ();
2327 FOR_BB_INSNS (cfg_bb, rinsn)
2328 vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn);
2329 }
2330 }
2331 else
2332 {
2333 for (const bb_info *bb : crtl->ssa->bbs ())
2334 {
2335 vector_block_infos[bb->index ()].local_dem = vector_insn_info ();
2336 vector_block_infos[bb->index ()].reaching_out = vector_insn_info ();
2337 for (insn_info *insn : bb->real_insns ())
2338 vector_insn_infos[insn->uid ()].parse_insn (insn);
acc10c79 2339 vector_block_infos[bb->index ()].probability = profile_probability ();
9243c3d1
JZZ
2340 }
2341 }
2342}
2343
2344void
2345vector_infos_manager::create_expr (vector_insn_info &info)
2346{
2347 for (size_t i = 0; i < vector_exprs.length (); i++)
2348 if (*vector_exprs[i] == info)
2349 return;
2350 vector_exprs.safe_push (&info);
2351}
2352
2353size_t
2354vector_infos_manager::get_expr_id (const vector_insn_info &info) const
2355{
2356 for (size_t i = 0; i < vector_exprs.length (); i++)
2357 if (*vector_exprs[i] == info)
2358 return i;
2359 gcc_unreachable ();
2360}
2361
2362auto_vec<size_t>
2363vector_infos_manager::get_all_available_exprs (
2364 const vector_insn_info &info) const
2365{
2366 auto_vec<size_t> available_list;
2367 for (size_t i = 0; i < vector_exprs.length (); i++)
6b6b9c68 2368 if (info.available_p (*vector_exprs[i]))
9243c3d1
JZZ
2369 available_list.safe_push (i);
2370 return available_list;
2371}
2372
d06e9264
JZ
2373bool
2374vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const
2375{
2376 hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb);
2377 for (const basic_block pred_cfg_bb : pred_cfg_bbs)
2378 {
2379 const auto &pred_block_info = vector_block_infos[pred_cfg_bb->index];
2380 if (!pred_block_info.local_dem.valid_or_dirty_p ()
2381 && !pred_block_info.reaching_out.valid_or_dirty_p ())
2382 continue;
2383 return false;
2384 }
2385 return true;
2386}
2387
9243c3d1
JZZ
2388bool
2389vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const
2390{
2391 if (bitmap_empty_p (bitdata))
2392 return false;
2393
2394 int ratio = -1;
2395 unsigned int bb_index;
2396 sbitmap_iterator sbi;
2397
2398 EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
2399 {
2400 if (ratio == -1)
2401 ratio = vector_exprs[bb_index]->get_ratio ();
2402 else if (vector_exprs[bb_index]->get_ratio () != ratio)
2403 return false;
2404 }
2405 return true;
2406}
2407
ff8f9544
JZ
2408/* Return TRUE if the incoming vector configuration state
2409 to CFG_BB is compatible with the vector configuration
2410 state in CFG_BB, FALSE otherwise. */
2411bool
2412vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const
2413{
2414 const auto &info = vector_block_infos[cfg_bb->index].local_dem;
2415 sbitmap avin = vector_avin[cfg_bb->index];
2416 unsigned int bb_index;
2417 sbitmap_iterator sbi;
2418 EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
2419 {
2420 const auto &avin_info
2421 = static_cast<const vl_vtype_info &> (*vector_exprs[bb_index]);
2422 if (!info.compatible_p (avin_info))
2423 return false;
2424 }
2425 return true;
2426}
2427
005fad9d
JZZ
2428bool
2429vector_infos_manager::all_same_avl_p (const basic_block cfg_bb,
2430 sbitmap bitdata) const
2431{
2432 if (bitmap_empty_p (bitdata))
2433 return false;
2434
2435 const auto &block_info = vector_block_infos[cfg_bb->index];
2436 if (!block_info.local_dem.demand_p (DEMAND_AVL))
2437 return true;
2438
2439 avl_info avl = block_info.local_dem.get_avl_info ();
2440 unsigned int bb_index;
2441 sbitmap_iterator sbi;
2442
2443 EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
2444 {
2445 if (vector_exprs[bb_index]->get_avl_info () != avl)
2446 return false;
2447 }
2448 return true;
2449}
2450
9243c3d1
JZZ
2451size_t
2452vector_infos_manager::expr_set_num (sbitmap bitdata) const
2453{
2454 size_t count = 0;
2455 for (size_t i = 0; i < vector_exprs.length (); i++)
2456 if (bitmap_bit_p (bitdata, i))
2457 count++;
2458 return count;
2459}
2460
2461void
2462vector_infos_manager::release (void)
2463{
2464 if (!vector_insn_infos.is_empty ())
2465 vector_insn_infos.release ();
2466 if (!vector_block_infos.is_empty ())
2467 vector_block_infos.release ();
2468 if (!vector_exprs.is_empty ())
2469 vector_exprs.release ();
2470
ec99ffab
JZZ
2471 gcc_assert (to_refine_vsetvls.is_empty ());
2472 gcc_assert (to_delete_vsetvls.is_empty ());
9243c3d1 2473 if (optimize > 0)
cfe3fbc6
JZZ
2474 free_bitmap_vectors ();
2475}
2476
2477void
2478vector_infos_manager::create_bitmap_vectors (void)
2479{
2480 /* Create the bitmap vectors. */
2481 vector_antic = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2482 vector_exprs.length ());
2483 vector_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2484 vector_exprs.length ());
2485 vector_comp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2486 vector_exprs.length ());
2487 vector_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2488 vector_exprs.length ());
2489 vector_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2490 vector_exprs.length ());
2491 vector_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2492 vector_exprs.length ());
2493
2494 bitmap_vector_ones (vector_transp, last_basic_block_for_fn (cfun));
2495 bitmap_vector_clear (vector_antic, last_basic_block_for_fn (cfun));
2496 bitmap_vector_clear (vector_comp, last_basic_block_for_fn (cfun));
2497}
2498
2499void
2500vector_infos_manager::free_bitmap_vectors (void)
2501{
2502 /* Finished. Free up all the things we've allocated. */
2503 free_edge_list (vector_edge_list);
2504 if (vector_del)
2505 sbitmap_vector_free (vector_del);
2506 if (vector_insert)
2507 sbitmap_vector_free (vector_insert);
2508 if (vector_kill)
2509 sbitmap_vector_free (vector_kill);
2510 if (vector_antic)
2511 sbitmap_vector_free (vector_antic);
2512 if (vector_transp)
2513 sbitmap_vector_free (vector_transp);
2514 if (vector_comp)
2515 sbitmap_vector_free (vector_comp);
2516 if (vector_avin)
2517 sbitmap_vector_free (vector_avin);
2518 if (vector_avout)
2519 sbitmap_vector_free (vector_avout);
2520
2521 vector_edge_list = nullptr;
2522 vector_kill = nullptr;
2523 vector_del = nullptr;
2524 vector_insert = nullptr;
2525 vector_antic = nullptr;
2526 vector_transp = nullptr;
2527 vector_comp = nullptr;
2528 vector_avin = nullptr;
2529 vector_avout = nullptr;
9243c3d1
JZZ
2530}
2531
2532void
2533vector_infos_manager::dump (FILE *file) const
2534{
2535 basic_block cfg_bb;
2536 rtx_insn *rinsn;
2537
2538 fprintf (file, "\n");
2539 FOR_ALL_BB_FN (cfg_bb, cfun)
2540 {
2541 fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index);
2542 fprintf (file, "<HEADER>=");
2543 vector_block_infos[cfg_bb->index].local_dem.dump (file);
2544 FOR_BB_INSNS (cfg_bb, rinsn)
2545 {
2546 if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn))
2547 continue;
2548 fprintf (file, "<insn %d>=", INSN_UID (rinsn));
2549 const auto &info = vector_insn_infos[INSN_UID (rinsn)];
2550 info.dump (file);
2551 }
2552 fprintf (file, "<FOOTER>=");
2553 vector_block_infos[cfg_bb->index].reaching_out.dump (file);
acc10c79
JZZ
2554 fprintf (file, "<Probability>=");
2555 vector_block_infos[cfg_bb->index].probability.dump (file);
9243c3d1
JZZ
2556 fprintf (file, "\n\n");
2557 }
2558
2559 fprintf (file, "\n");
2560 FOR_ALL_BB_FN (cfg_bb, cfun)
2561 {
2562 fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index);
2563
2564 fprintf (file, "<ANTLOC>=");
2565 if (vector_antic == nullptr)
2566 fprintf (file, "(nil)\n");
2567 else
2568 dump_bitmap_file (file, vector_antic[cfg_bb->index]);
2569
2570 fprintf (file, "<AVLOC>=");
2571 if (vector_comp == nullptr)
2572 fprintf (file, "(nil)\n");
2573 else
2574 dump_bitmap_file (file, vector_comp[cfg_bb->index]);
2575
2576 fprintf (file, "<TRANSP>=");
2577 if (vector_transp == nullptr)
2578 fprintf (file, "(nil)\n");
2579 else
2580 dump_bitmap_file (file, vector_transp[cfg_bb->index]);
2581
2582 fprintf (file, "<KILL>=");
2583 if (vector_kill == nullptr)
2584 fprintf (file, "(nil)\n");
2585 else
2586 dump_bitmap_file (file, vector_kill[cfg_bb->index]);
2587 }
2588
2589 fprintf (file, "\n");
2590 FOR_ALL_BB_FN (cfg_bb, cfun)
2591 {
2592 fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n",
2593 cfg_bb->index);
2594
2595 fprintf (file, "<AVIN>=");
2596 if (vector_avin == nullptr)
2597 fprintf (file, "(nil)\n");
2598 else
2599 dump_bitmap_file (file, vector_avin[cfg_bb->index]);
2600
2601 fprintf (file, "<AVOUT>=");
2602 if (vector_avout == nullptr)
2603 fprintf (file, "(nil)\n");
2604 else
2605 dump_bitmap_file (file, vector_avout[cfg_bb->index]);
2606
2607 fprintf (file, "<DELETE>=");
2608 if (vector_del == nullptr)
2609 fprintf (file, "(nil)\n");
2610 else
2611 dump_bitmap_file (file, vector_del[cfg_bb->index]);
2612 }
2613
2614 fprintf (file, "\nGlobal LCM (Lazy code motion) INSERT info:\n");
2615 for (size_t i = 0; i < vector_exprs.length (); i++)
2616 {
2617 for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++)
2618 {
2619 edge eg = INDEX_EDGE (vector_edge_list, ed);
2620 if (bitmap_bit_p (vector_insert[ed], i))
2621 fprintf (dump_file,
2622 "INSERT edge %d from bb %d to bb %d for VSETVL "
2623 "expr[%ld]\n",
2624 ed, eg->src->index, eg->dest->index, i);
2625 }
2626 }
2627}
2628
2629const pass_data pass_data_vsetvl = {
2630 RTL_PASS, /* type */
2631 "vsetvl", /* name */
2632 OPTGROUP_NONE, /* optinfo_flags */
2633 TV_NONE, /* tv_id */
2634 0, /* properties_required */
2635 0, /* properties_provided */
2636 0, /* properties_destroyed */
2637 0, /* todo_flags_start */
2638 0, /* todo_flags_finish */
2639};
2640
2641class pass_vsetvl : public rtl_opt_pass
2642{
2643private:
2644 class vector_infos_manager *m_vector_manager;
2645
2646 void simple_vsetvl (void) const;
2647 void lazy_vsetvl (void);
2648
2649 /* Phase 1. */
2650 void compute_local_backward_infos (const bb_info *);
2651
2652 /* Phase 2. */
2653 bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const;
2654 void transfer_before (vector_insn_info &, insn_info *) const;
2655 void transfer_after (vector_insn_info &, insn_info *) const;
2656 void emit_local_forward_vsetvls (const bb_info *);
2657
2658 /* Phase 3. */
4f673c5e
JZZ
2659 enum fusion_type get_backward_fusion_type (const bb_info *,
2660 const vector_insn_info &);
6b6b9c68 2661 bool hard_empty_block_p (const bb_info *, const vector_insn_info &) const;
387cd9d3
JZZ
2662 bool backward_demand_fusion (void);
2663 bool forward_demand_fusion (void);
6b6b9c68 2664 bool cleanup_illegal_dirty_blocks (void);
387cd9d3 2665 void demand_fusion (void);
9243c3d1
JZZ
2666
2667 /* Phase 4. */
2668 void prune_expressions (void);
2669 void compute_local_properties (void);
6b6b9c68 2670 bool can_refine_vsetvl_p (const basic_block, const vector_insn_info &) const;
9243c3d1
JZZ
2671 void refine_vsetvls (void) const;
2672 void cleanup_vsetvls (void);
2673 bool commit_vsetvls (void);
2674 void pre_vsetvl (void);
2675
2676 /* Phase 5. */
2677 void cleanup_insns (void) const;
2678
6b6b9c68
JZZ
2679 /* Phase 6. */
2680 void propagate_avl (void) const;
2681
9243c3d1
JZZ
2682 void init (void);
2683 void done (void);
acc10c79 2684 void compute_probabilities (void);
9243c3d1
JZZ
2685
2686public:
2687 pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
2688
2689 /* opt_pass methods: */
2690 virtual bool gate (function *) final override { return TARGET_VECTOR; }
2691 virtual unsigned int execute (function *) final override;
2692}; // class pass_vsetvl
2693
2694/* Simple m_vsetvl_insert vsetvl for optimize == 0. */
2695void
2696pass_vsetvl::simple_vsetvl (void) const
2697{
2698 if (dump_file)
2699 fprintf (dump_file,
2700 "\nEntering Simple VSETVL PASS and Handling %d basic blocks for "
2701 "function:%s\n",
2702 n_basic_blocks_for_fn (cfun), function_name (cfun));
2703
2704 basic_block cfg_bb;
2705 rtx_insn *rinsn;
2706 FOR_ALL_BB_FN (cfg_bb, cfun)
2707 {
2708 FOR_BB_INSNS (cfg_bb, rinsn)
2709 {
2710 if (!NONDEBUG_INSN_P (rinsn))
2711 continue;
2712 if (has_vtype_op (rinsn))
2713 {
2714 const auto info
2715 = m_vector_manager->vector_insn_infos[INSN_UID (rinsn)];
2716 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info,
2717 NULL_RTX, rinsn);
2718 }
2719 }
2720 }
2721}
2722
2723/* Compute demanded information by backward data-flow analysis. */
2724void
2725pass_vsetvl::compute_local_backward_infos (const bb_info *bb)
2726{
2727 vector_insn_info change;
2728 change.set_empty ();
2729
2730 auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
2731 block_info.reaching_out = change;
2732
2733 for (insn_info *insn : bb->reverse_real_nondebug_insns ())
2734 {
2735 auto &info = m_vector_manager->vector_insn_infos[insn->uid ()];
2736
2737 if (info.uninit_p ())
2738 /* If it is uninitialized, propagate it directly. */
2739 info = change;
2740 else if (info.unknown_p ())
2741 change = info;
2742 else
2743 {
2744 gcc_assert (info.valid_p () && "Unexpected Invalid demanded info");
ec99ffab
JZZ
2745 if (change.valid_p ())
2746 {
a481eed8
JZZ
2747 if (!(propagate_avl_across_demands_p (change, info)
2748 && !reg_available_p (insn, change))
ec99ffab 2749 && change.compatible_p (info))
fdc5abbd 2750 {
d51f2456 2751 info = change.merge (info, LOCAL_MERGE);
fdc5abbd
JZ
2752 /* Fix PR109399, we should update user vsetvl instruction
2753 if there is a change in demand fusion. */
2754 if (vsetvl_insn_p (insn->rtl ()))
2755 change_vsetvl_insn (insn, info);
2756 }
ec99ffab 2757 }
9243c3d1
JZZ
2758 change = info;
2759 }
2760 }
2761
2762 block_info.local_dem = change;
2763 if (block_info.local_dem.empty_p ())
2764 block_info.reaching_out = block_info.local_dem;
2765}
2766
2767/* Return true if a dem_info is required to transition from curr_info to
2768 require before INSN. */
2769bool
2770pass_vsetvl::need_vsetvl (const vector_insn_info &require,
2771 const vector_insn_info &curr_info) const
2772{
2773 if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ())
2774 return true;
2775
a481eed8 2776 if (require.compatible_p (static_cast<const vl_vtype_info &> (curr_info)))
9243c3d1
JZZ
2777 return false;
2778
2779 return true;
2780}
2781
2782/* Given an incoming state reaching INSN, modifies that state so that it is
2783 minimally compatible with INSN. The resulting state is guaranteed to be
2784 semantically legal for INSN, but may not be the state requested by INSN. */
2785void
2786pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const
2787{
2788 if (!has_vtype_op (insn->rtl ()))
2789 return;
2790
2791 const vector_insn_info require
2792 = m_vector_manager->vector_insn_infos[insn->uid ()];
2793 if (info.valid_p () && !need_vsetvl (require, info))
2794 return;
2795 info = require;
2796}
2797
2798/* Given a state with which we evaluated insn (see transfer_before above for why
2799 this might be different that the state insn requested), modify the state to
2800 reflect the changes insn might make. */
2801void
2802pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const
2803{
2804 if (vector_config_insn_p (insn->rtl ()))
2805 {
2806 info = m_vector_manager->vector_insn_infos[insn->uid ()];
2807 return;
2808 }
2809
60bd33bc
JZZ
2810 if (fault_first_load_p (insn->rtl ())
2811 && info.update_fault_first_load_avl (insn))
2812 return;
9243c3d1
JZZ
2813
2814 /* If this is something that updates VL/VTYPE that we don't know about, set
2815 the state to unknown. */
2816 if (insn->is_call () || insn->is_asm ()
2817 || find_access (insn->defs (), VL_REGNUM)
2818 || find_access (insn->defs (), VTYPE_REGNUM))
2819 info = vector_insn_info::get_unknown ();
2820}
2821
2822/* Emit vsetvl within each block by forward data-flow analysis. */
2823void
2824pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb)
2825{
2826 auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
2827 if (block_info.local_dem.empty_p ())
2828 return;
2829
2830 vector_insn_info curr_info;
2831 for (insn_info *insn : bb->real_nondebug_insns ())
2832 {
2833 const vector_insn_info prev_info = curr_info;
a481eed8 2834 enum vsetvl_type type = NUM_VSETVL_TYPE;
9243c3d1
JZZ
2835 transfer_before (curr_info, insn);
2836
2837 if (has_vtype_op (insn->rtl ()))
2838 {
2839 if (static_cast<const vl_vtype_info &> (prev_info)
2840 != static_cast<const vl_vtype_info &> (curr_info))
2841 {
2842 const auto require
2843 = m_vector_manager->vector_insn_infos[insn->uid ()];
2844 if (!require.compatible_p (
2845 static_cast<const vl_vtype_info &> (prev_info)))
a481eed8
JZZ
2846 type = insert_vsetvl (EMIT_BEFORE, insn->rtl (), require,
2847 prev_info);
9243c3d1
JZZ
2848 }
2849 }
2850
a481eed8
JZZ
2851 /* Fix the issue of following sequence:
2852 vsetivli zero, 5
2853 ....
2854 vsetvli zero, zero
2855 vmv.x.s (demand AVL = 8).
2856 ....
2857 incorrect: vsetvli zero, zero ===> Since the curr_info is AVL = 8.
2858 correct: vsetivli zero, 8
2859 vadd (demand AVL = 8). */
2860 if (type == VSETVL_VTYPE_CHANGE_ONLY)
2861 {
2862 /* Update the curr_info to be real correct AVL. */
2863 curr_info.set_avl_info (prev_info.get_avl_info ());
2864 }
9243c3d1
JZZ
2865 transfer_after (curr_info, insn);
2866 }
2867
2868 block_info.reaching_out = curr_info;
2869}
2870
4f673c5e
JZZ
2871enum fusion_type
2872pass_vsetvl::get_backward_fusion_type (const bb_info *bb,
2873 const vector_insn_info &prop)
9243c3d1 2874{
4f673c5e 2875 insn_info *insn = prop.get_insn ();
9243c3d1 2876
4f673c5e
JZZ
2877 /* TODO: We don't backward propagate the explict VSETVL here
2878 since we will change vsetvl and vsetvlmax intrinsics into
2879 no side effects which can be optimized into optimal location
2880 by GCC internal passes. We only need to support these backward
2881 propagation if vsetvl intrinsics have side effects. */
2882 if (vsetvl_insn_p (insn->rtl ()))
2883 return INVALID_FUSION;
2884
2885 gcc_assert (has_vtype_op (insn->rtl ()));
2886 rtx reg = NULL_RTX;
2887
2888 /* Case 1: Don't need VL. Just let it backward propagate. */
ec99ffab 2889 if (!prop.demand_p (DEMAND_AVL))
4f673c5e
JZZ
2890 return VALID_AVL_FUSION;
2891 else
9243c3d1 2892 {
4f673c5e
JZZ
2893 /* Case 2: CONST_INT AVL, we don't need to check def. */
2894 if (prop.has_avl_imm ())
2895 return VALID_AVL_FUSION;
2896 else
2897 {
2898 /* Case 3: REG AVL, we need to check the distance of def to make
2899 sure we won't backward propagate over the def. */
2900 gcc_assert (prop.has_avl_reg ());
2901 if (vlmax_avl_p (prop.get_avl ()))
2902 /* Check VL operand for vsetvl vl,zero. */
ec99ffab 2903 reg = prop.get_avl_reg_rtx ();
4f673c5e
JZZ
2904 else
2905 /* Check AVL operand for vsetvl zero,avl. */
ec99ffab 2906 reg = prop.get_avl ();
4f673c5e
JZZ
2907 }
2908 }
9243c3d1 2909
4f673c5e 2910 gcc_assert (reg);
ec99ffab
JZZ
2911 if (!prop.get_avl_source ()->insn ()->is_phi ()
2912 && prop.get_avl_source ()->insn ()->bb () == insn->bb ())
6b6b9c68
JZZ
2913 return INVALID_FUSION;
2914 hash_set<set_info *> sets
2915 = get_all_sets (prop.get_avl_source (), true, true, true);
2916 if (any_set_in_bb_p (sets, insn->bb ()))
2917 return INVALID_FUSION;
2918
2919 if (vlmax_avl_p (prop.get_avl ()))
4f673c5e 2920 {
6b6b9c68 2921 if (find_reg_killed_by (bb, reg))
4f673c5e 2922 return INVALID_FUSION;
6b6b9c68
JZZ
2923 else
2924 return VALID_AVL_FUSION;
4f673c5e 2925 }
6b6b9c68
JZZ
2926
2927 /* By default, we always enable backward fusion so that we can
2928 gain more optimizations. */
2929 if (!find_reg_killed_by (bb, reg))
2930 return VALID_AVL_FUSION;
2931 return KILLED_AVL_FUSION;
2932}
2933
2934/* We almost enable all cases in get_backward_fusion_type, this function
2935 disable the backward fusion by changing dirty blocks into hard empty
2936 blocks in forward dataflow. We can have more accurate optimization by
2937 this method. */
2938bool
2939pass_vsetvl::hard_empty_block_p (const bb_info *bb,
2940 const vector_insn_info &info) const
2941{
2942 if (!info.dirty_p () || !info.has_avl_reg ())
2943 return false;
2944
2945 basic_block cfg_bb = bb->cfg_bb ();
2946 sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
ec99ffab
JZZ
2947 set_info *set = info.get_avl_source ();
2948 rtx avl = gen_rtx_REG (Pmode, set->regno ());
6b6b9c68
JZZ
2949 hash_set<set_info *> sets = get_all_sets (set, true, false, false);
2950 hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb);
2951
2952 if (find_reg_killed_by (bb, avl))
2953 {
2954 /* Condition 1:
2955 Dirty block with killed AVL means that the empty block (no RVV
2956 instructions) are polluted as Dirty blocks with the value of current
2957 AVL is killed. For example:
2958 bb 0:
2959 ...
2960 bb 1:
2961 def a5
2962 bb 2:
2963 RVV (use a5)
2964 In backward dataflow, we will polluted BB0 and BB1 as Dirt with AVL
2965 killed. since a5 is killed in BB1.
2966 In this case, let's take a look at this example:
2967
2968 bb 3: bb 4:
2969 def3 a5 def4 a5
2970 bb 5: bb 6:
2971 def1 a5 def2 a5
2972 \ /
2973 \ /
2974 \ /
2975 \ /
2976 bb 7:
2977 RVV (use a5)
2978 In thi case, we can polluted BB5 and BB6 as dirty if get-def
2979 of a5 from RVV instruction in BB7 is the def1 in BB5 and
2980 def2 BB6 so we can return false early here for HARD_EMPTY_BLOCK_P.
2981 However, we are not sure whether BB3 and BB4 can be
2982 polluted as Dirty with AVL killed so we can't return false
2983 for HARD_EMPTY_BLOCK_P here since it's too early which will
2984 potentially produce issues. */
2985 gcc_assert (info.dirty_with_killed_avl_p ());
2986 if (info.get_avl_source ()
2987 && get_same_bb_set (sets, bb->cfg_bb ()) == info.get_avl_source ())
2988 return false;
4f673c5e 2989 }
9243c3d1 2990
6b6b9c68
JZZ
2991 /* Condition 2:
2992 Suppress the VL/VTYPE info backward propagation too early:
2993 ________
2994 | BB0 |
2995 |________|
2996 |
2997 ____|____
2998 | BB1 |
2999 |________|
3000 In this case, suppose BB 1 has multiple predecessors, BB 0 is one
3001 of them. BB1 has VL/VTYPE info (may be VALID or DIRTY) to backward
3002 propagate.
3003 The AVIN (available in) which is calculated by LCM is empty only
3004 in these 2 circumstances:
3005 1. all predecessors of BB1 are empty (not VALID
3006 and can not be polluted in backward fusion flow)
3007 2. VL/VTYPE info of BB1 predecessors are conflict.
3008
3009 We keep it as dirty in 2nd circumstance and set it as HARD_EMPTY
3010 (can not be polluted as DIRTY any more) in 1st circumstance.
3011 We don't backward propagate in 1st circumstance since there is
3012 no VALID RVV instruction and no polluted blocks (dirty blocks)
3013 by backward propagation from other following blocks.
3014 It's meaningless to keep it as Dirty anymore.
3015
3016 However, since we keep it as dirty in 2nd since there are VALID or
3017 Dirty blocks in predecessors, we can still gain the benefits and
3018 optimization opportunities. For example, in this case:
3019 for (size_t i = 0; i < n; i++)
3020 {
3021 if (i != cond) {
3022 vint8mf8_t v = *(vint8mf8_t*)(in + i + 100);
3023 *(vint8mf8_t*)(out + i + 100) = v;
3024 } else {
3025 vbool1_t v = *(vbool1_t*)(in + i + 400);
3026 *(vbool1_t*)(out + i + 400) = v;
3027 }
3028 }
3029 VL/VTYPE in if-else are conflict which will produce empty AVIN LCM result
3030 but we can still keep dirty blocks if *(i != cond)* is very unlikely then
3031 we can preset vsetvl (VL/VTYPE) info from else (static propability model).
3032
3033 We don't want to backward propagate VL/VTYPE information too early
3034 which is not the optimal and may potentially produce issues. */
3035 if (bitmap_empty_p (avin))
4f673c5e 3036 {
6b6b9c68
JZZ
3037 bool hard_empty_p = true;
3038 for (const basic_block pred_cfg_bb : pred_cfg_bbs)
3039 {
3040 if (pred_cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3041 continue;
3042 sbitmap avout = m_vector_manager->vector_avout[pred_cfg_bb->index];
3043 if (!bitmap_empty_p (avout))
3044 {
3045 hard_empty_p = false;
3046 break;
3047 }
3048 }
3049 if (hard_empty_p)
3050 return true;
4f673c5e 3051 }
9243c3d1 3052
6b6b9c68
JZZ
3053 edge e;
3054 edge_iterator ei;
3055 bool has_avl_killed_insn_p = false;
3056 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
4f673c5e 3057 {
6b6b9c68
JZZ
3058 const auto block_info
3059 = m_vector_manager->vector_block_infos[e->dest->index];
3060 if (block_info.local_dem.dirty_with_killed_avl_p ())
9243c3d1 3061 {
6b6b9c68
JZZ
3062 has_avl_killed_insn_p = true;
3063 break;
3064 }
3065 }
3066 if (!has_avl_killed_insn_p)
3067 return false;
4f673c5e 3068
6b6b9c68
JZZ
3069 bool any_set_in_bbs_p = false;
3070 for (const basic_block pred_cfg_bb : pred_cfg_bbs)
3071 {
3072 insn_info *def_insn = extract_single_source (set);
3073 if (def_insn)
3074 {
3075 /* Condition 3:
3076
3077 Case 1: Case 2:
3078 bb 0: bb 0:
3079 def a5 101 ...
3080 bb 1: bb 1:
3081 ... ...
3082 bb 2: bb 2:
3083 RVV 1 (use a5 with TAIL ANY) ...
3084 bb 3: bb 3:
3085 def a5 101 def a5 101
3086 bb 4: bb 4:
3087 ... ...
3088 bb 5: bb 5:
3089 RVV 2 (use a5 with TU) RVV 1 (use a5)
3090
3091 Case 1: We can pollute BB3,BB2,BB1,BB0 are all Dirt blocks
3092 with killed AVL so that we can merge TU demand info from RVV 2
3093 into RVV 1 and elide the vsevl instruction in BB5.
3094
3095 TODO: We only optimize for single source def since multiple source
3096 def is quite complicated.
3097
3098 Case 2: We only can pollute bb 3 as dirty and it has been accepted
3099 in Condition 2 and we can't pollute BB3,BB2,BB1,BB0 like case 1. */
3100 insn_info *last_killed_insn
3101 = find_reg_killed_by (crtl->ssa->bb (pred_cfg_bb), avl);
3102 if (!last_killed_insn || pred_cfg_bb == def_insn->bb ()->cfg_bb ())
3103 continue;
3104 if (source_equal_p (last_killed_insn, def_insn))
4f673c5e 3105 {
6b6b9c68
JZZ
3106 any_set_in_bbs_p = true;
3107 break;
4f673c5e 3108 }
9243c3d1
JZZ
3109 }
3110 else
4f673c5e 3111 {
6b6b9c68
JZZ
3112 /* Condition 4:
3113
3114 bb 0: bb 1: bb 3:
3115 def1 a5 def2 a5 ...
3116 \ / /
3117 \ / /
3118 \ / /
3119 \ / /
3120 bb 4: /
3121 | /
3122 | /
3123 bb 5: /
3124 | /
3125 | /
3126 bb 6: /
3127 | /
3128 | /
3129 bb 8:
3130 RVV 1 (use a5)
3131 If we get-def (REAL) of a5 from RVV 1 instruction, we will get
3132 def1 from BB0 and def2 from BB1. So we will pollute BB6,BB5,BB4,
3133 BB0,BB1 with DIRTY and set BB3 as HARD_EMPTY so that we won't
3134 propagate AVL to BB3. */
3135 if (any_set_in_bb_p (sets, crtl->ssa->bb (pred_cfg_bb)))
3136 {
3137 any_set_in_bbs_p = true;
3138 break;
3139 }
4f673c5e 3140 }
9243c3d1 3141 }
6b6b9c68
JZZ
3142 if (!any_set_in_bbs_p)
3143 return true;
3144 return false;
9243c3d1
JZZ
3145}
3146
3147/* Compute global backward demanded info. */
387cd9d3
JZZ
3148bool
3149pass_vsetvl::backward_demand_fusion (void)
9243c3d1
JZZ
3150{
3151 /* We compute global infos by backward propagation.
3152 We want to have better performance in these following cases:
3153
3154 1. for (size_t i = 0; i < n; i++) {
3155 if (i != cond) {
3156 vint8mf8_t v = *(vint8mf8_t*)(in + i + 100);
3157 *(vint8mf8_t*)(out + i + 100) = v;
3158 } else {
3159 vbool1_t v = *(vbool1_t*)(in + i + 400);
3160 *(vbool1_t*)(out + i + 400) = v;
3161 }
3162 }
3163
3164 Since we don't have any RVV instruction in the BEFORE blocks,
3165 LCM fails to optimize such case. We want to backward propagate
3166 them into empty blocks so that we could have better performance
3167 in LCM.
3168
3169 2. bb 0:
3170 vsetvl e8,mf8 (demand RATIO)
3171 bb 1:
3172 vsetvl e32,mf2 (demand SEW and LMUL)
3173 We backward propagate the first VSETVL into e32,mf2 so that we
3174 could be able to eliminate the second VSETVL in LCM. */
3175
387cd9d3 3176 bool changed_p = false;
9243c3d1
JZZ
3177 for (const bb_info *bb : crtl->ssa->reverse_bbs ())
3178 {
3179 basic_block cfg_bb = bb->cfg_bb ();
b9b251b7
JZZ
3180 const auto &curr_block_info
3181 = m_vector_manager->vector_block_infos[cfg_bb->index];
3182 const auto &prop = curr_block_info.local_dem;
9243c3d1
JZZ
3183
3184 /* If there is nothing to propagate, just skip it. */
3185 if (!prop.valid_or_dirty_p ())
3186 continue;
3187
b9b251b7 3188 if (!backward_propagate_worthwhile_p (cfg_bb, curr_block_info))
9243c3d1
JZZ
3189 continue;
3190
d06e9264
JZ
3191 /* Fix PR108270:
3192
3193 bb 0 -> bb 1
3194 We don't need to backward fuse VL/VTYPE info from bb 1 to bb 0
3195 if bb 1 is not inside a loop and all predecessors of bb 0 are empty. */
3196 if (m_vector_manager->all_empty_predecessor_p (cfg_bb))
3197 continue;
3198
9243c3d1
JZZ
3199 edge e;
3200 edge_iterator ei;
3201 /* Backward propagate to each predecessor. */
3202 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3203 {
9243c3d1
JZZ
3204 auto &block_info
3205 = m_vector_manager->vector_block_infos[e->src->index];
3206
3207 /* We don't propagate through critical edges. */
3208 if (e->flags & EDGE_COMPLEX)
3209 continue;
3210 if (e->src->index == ENTRY_BLOCK_PTR_FOR_FN (cfun)->index)
3211 continue;
44c918b5
JZZ
3212 /* If prop is demand of vsetvl instruction and reaching doesn't demand
3213 AVL. We don't backward propagate since vsetvl instruction has no
3214 side effects. */
3215 if (vsetvl_insn_p (prop.get_insn ()->rtl ())
3216 && propagate_avl_across_demands_p (prop, block_info.reaching_out))
3217 continue;
9243c3d1
JZZ
3218
3219 if (block_info.reaching_out.unknown_p ())
3220 continue;
6b6b9c68
JZZ
3221 else if (block_info.reaching_out.hard_empty_p ())
3222 continue;
9243c3d1
JZZ
3223 else if (block_info.reaching_out.empty_p ())
3224 {
4f673c5e
JZZ
3225 enum fusion_type type
3226 = get_backward_fusion_type (crtl->ssa->bb (e->src), prop);
3227 if (type == INVALID_FUSION)
9243c3d1
JZZ
3228 continue;
3229
4f673c5e
JZZ
3230 block_info.reaching_out = prop;
3231 block_info.reaching_out.set_dirty (type);
6b6b9c68
JZZ
3232
3233 if (prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ()))
3234 {
3235 hash_set<set_info *> sets
3236 = get_all_sets (prop.get_avl_source (), true, true, true);
3237 set_info *set = get_same_bb_set (sets, e->src);
3238 if (set)
3239 block_info.reaching_out.set_avl_info (
3240 avl_info (prop.get_avl (), set));
3241 }
3242
4f673c5e
JZZ
3243 block_info.local_dem = block_info.reaching_out;
3244 block_info.probability = curr_block_info.probability;
3245 changed_p = true;
9243c3d1
JZZ
3246 }
3247 else if (block_info.reaching_out.dirty_p ())
3248 {
3249 /* DIRTY -> DIRTY or VALID -> DIRTY. */
3250 vector_insn_info new_info;
3251
3252 if (block_info.reaching_out.compatible_p (prop))
3253 {
ec99ffab 3254 if (block_info.reaching_out.available_p (prop))
9243c3d1 3255 continue;
4f673c5e 3256 new_info = block_info.reaching_out.merge (prop, GLOBAL_MERGE);
6b6b9c68
JZZ
3257 new_info.set_dirty (
3258 block_info.reaching_out.dirty_with_killed_avl_p ());
3259 block_info.probability += curr_block_info.probability;
9243c3d1
JZZ
3260 }
3261 else
3262 {
4f673c5e
JZZ
3263 if (curr_block_info.probability > block_info.probability)
3264 {
6b6b9c68
JZZ
3265 enum fusion_type type
3266 = get_backward_fusion_type (crtl->ssa->bb (e->src),
3267 prop);
3268 if (type == INVALID_FUSION)
3269 continue;
4f673c5e 3270 new_info = prop;
6b6b9c68 3271 new_info.set_dirty (type);
4f673c5e
JZZ
3272 block_info.probability = curr_block_info.probability;
3273 }
9243c3d1
JZZ
3274 else
3275 continue;
3276 }
3277
ec99ffab
JZZ
3278 if (propagate_avl_across_demands_p (prop,
3279 block_info.reaching_out))
3280 {
3281 rtx reg = new_info.get_avl_reg_rtx ();
3282 if (find_reg_killed_by (crtl->ssa->bb (e->src), reg))
3283 new_info.set_dirty (true);
3284 }
3285
9243c3d1
JZZ
3286 block_info.local_dem = new_info;
3287 block_info.reaching_out = new_info;
387cd9d3 3288 changed_p = true;
9243c3d1
JZZ
3289 }
3290 else
3291 {
3292 /* We not only change the info during backward propagation,
3293 but also change the VSETVL instruction. */
3294 gcc_assert (block_info.reaching_out.valid_p ());
6b6b9c68
JZZ
3295 hash_set<set_info *> sets
3296 = get_all_sets (prop.get_avl_source (), true, false, false);
3297 set_info *set = get_same_bb_set (sets, e->src);
3298 if (vsetvl_insn_p (block_info.reaching_out.get_insn ()->rtl ())
3299 && prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ()))
3300 {
3301 if (!block_info.reaching_out.same_vlmax_p (prop))
3302 continue;
3303 if (block_info.reaching_out.same_vtype_p (prop))
3304 continue;
3305 if (!set)
3306 continue;
3307 if (set->insn () != block_info.reaching_out.get_insn ())
3308 continue;
3309 }
ec99ffab
JZZ
3310
3311 if (!block_info.reaching_out.compatible_p (prop))
3312 continue;
3313 if (block_info.reaching_out.available_p (prop))
3314 continue;
9243c3d1
JZZ
3315
3316 vector_insn_info be_merged = block_info.reaching_out;
3317 if (block_info.local_dem == block_info.reaching_out)
3318 be_merged = block_info.local_dem;
4f673c5e
JZZ
3319 vector_insn_info new_info = be_merged.merge (prop, GLOBAL_MERGE);
3320
3321 if (curr_block_info.probability > block_info.probability)
3322 block_info.probability = curr_block_info.probability;
9243c3d1 3323
ec99ffab 3324 if (propagate_avl_across_demands_p (prop, block_info.reaching_out)
a481eed8
JZZ
3325 && !reg_available_p (crtl->ssa->bb (e->src)->end_insn (),
3326 new_info))
ec99ffab
JZZ
3327 continue;
3328
aef20243 3329 change_vsetvl_insn (new_info.get_insn (), new_info);
9243c3d1
JZZ
3330 if (block_info.local_dem == block_info.reaching_out)
3331 block_info.local_dem = new_info;
3332 block_info.reaching_out = new_info;
387cd9d3 3333 changed_p = true;
9243c3d1
JZZ
3334 }
3335 }
3336 }
387cd9d3
JZZ
3337 return changed_p;
3338}
3339
3340/* Compute global forward demanded info. */
3341bool
3342pass_vsetvl::forward_demand_fusion (void)
3343{
3344 /* Enhance the global information propagation especially
3345 backward propagation miss the propagation.
3346 Consider such case:
3347
3348 bb0
3349 (TU)
3350 / \
3351 bb1 bb2
3352 (TU) (ANY)
3353 existing edge -----> \ / (TU) <----- LCM create this edge.
3354 bb3
3355 (TU)
3356
3357 Base on the situation, LCM fails to eliminate the VSETVL instruction and
3358 insert an edge from bb2 to bb3 since we can't backward propagate bb3 into
3359 bb2. To avoid this confusing LCM result and non-optimal codegen, we should
3360 forward propagate information from bb0 to bb2 which is friendly to LCM. */
3361 bool changed_p = false;
3362 for (const bb_info *bb : crtl->ssa->bbs ())
3363 {
3364 basic_block cfg_bb = bb->cfg_bb ();
3365 const auto &prop
3366 = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
3367
3368 /* If there is nothing to propagate, just skip it. */
3369 if (!prop.valid_or_dirty_p ())
3370 continue;
3371
00fb7698
JZZ
3372 if (cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3373 continue;
3374
ec99ffab
JZZ
3375 if (vsetvl_insn_p (prop.get_insn ()->rtl ()))
3376 continue;
3377
387cd9d3
JZZ
3378 edge e;
3379 edge_iterator ei;
3380 /* Forward propagate to each successor. */
3381 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
3382 {
3383 auto &local_dem
3384 = m_vector_manager->vector_block_infos[e->dest->index].local_dem;
3385 auto &reaching_out
3386 = m_vector_manager->vector_block_infos[e->dest->index].reaching_out;
3387
3388 /* It's quite obvious, we don't need to propagate itself. */
3389 if (e->dest->index == cfg_bb->index)
3390 continue;
00fb7698
JZZ
3391 /* We don't propagate through critical edges. */
3392 if (e->flags & EDGE_COMPLEX)
3393 continue;
3394 if (e->dest->index == EXIT_BLOCK_PTR_FOR_FN (cfun)->index)
3395 continue;
387cd9d3
JZZ
3396
3397 /* If there is nothing to propagate, just skip it. */
3398 if (!local_dem.valid_or_dirty_p ())
3399 continue;
ec99ffab 3400 if (local_dem.available_p (prop))
4f673c5e
JZZ
3401 continue;
3402 if (!local_dem.compatible_p (prop))
3403 continue;
ec99ffab
JZZ
3404 if (propagate_avl_across_demands_p (prop, local_dem))
3405 continue;
387cd9d3 3406
4f673c5e
JZZ
3407 vector_insn_info new_info = local_dem.merge (prop, GLOBAL_MERGE);
3408 new_info.set_insn (local_dem.get_insn ());
3409 if (local_dem.dirty_p ())
387cd9d3 3410 {
4f673c5e 3411 gcc_assert (local_dem == reaching_out);
6b6b9c68 3412 new_info.set_dirty (local_dem.dirty_with_killed_avl_p ());
4f673c5e 3413 local_dem = new_info;
4f673c5e
JZZ
3414 reaching_out = local_dem;
3415 }
3416 else
3417 {
3418 if (reaching_out == local_dem)
3419 reaching_out = new_info;
3420 local_dem = new_info;
3421 change_vsetvl_insn (local_dem.get_insn (), new_info);
387cd9d3 3422 }
4f673c5e
JZZ
3423 auto &prob
3424 = m_vector_manager->vector_block_infos[e->dest->index].probability;
3425 auto &curr_prob
3426 = m_vector_manager->vector_block_infos[cfg_bb->index].probability;
3427 prob = curr_prob * e->probability;
3428 changed_p = true;
387cd9d3
JZZ
3429 }
3430 }
3431 return changed_p;
3432}
3433
3434void
3435pass_vsetvl::demand_fusion (void)
3436{
3437 bool changed_p = true;
3438 while (changed_p)
3439 {
3440 changed_p = false;
4f673c5e
JZZ
3441 /* To optimize the case like this:
3442 void f2 (int8_t * restrict in, int8_t * restrict out, int n, int cond)
3443 {
3444 size_t vl = 101;
3445
3446 for (size_t i = 0; i < n; i++)
3447 {
3448 vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
3449 __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
3450 }
3451
3452 for (size_t i = 0; i < n; i++)
3453 {
3454 vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
3455 __riscv_vse8_v_i8mf8 (out + i, v, vl);
3456
3457 vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
3458 __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
3459 }
3460 }
3461
3462 bb 0: li a5, 101 (killed avl)
3463 ...
3464 bb 1: vsetvli zero, a5, ta
3465 ...
3466 bb 2: li a5, 101 (killed avl)
3467 ...
3468 bb 3: vsetvli zero, a3, tu
3469
3470 We want to fuse VSEVLI instructions on bb 1 and bb 3. However, there is
3471 an AVL kill instruction in bb 2 that we can't backward fuse bb 3 or
3472 forward bb 1 arbitrarily. We need available information of each block to
3473 help for such cases. */
6b6b9c68
JZZ
3474 changed_p |= backward_demand_fusion ();
3475 changed_p |= forward_demand_fusion ();
3476 }
3477
3478 changed_p = true;
3479 while (changed_p)
3480 {
3481 changed_p = false;
3482 prune_expressions ();
3483 m_vector_manager->create_bitmap_vectors ();
3484 compute_local_properties ();
4f673c5e
JZZ
3485 compute_available (m_vector_manager->vector_comp,
3486 m_vector_manager->vector_kill,
3487 m_vector_manager->vector_avout,
3488 m_vector_manager->vector_avin);
6b6b9c68 3489 changed_p |= cleanup_illegal_dirty_blocks ();
4f673c5e
JZZ
3490 m_vector_manager->free_bitmap_vectors ();
3491 if (!m_vector_manager->vector_exprs.is_empty ())
3492 m_vector_manager->vector_exprs.release ();
387cd9d3 3493 }
9243c3d1
JZZ
3494
3495 if (dump_file)
3496 {
3497 fprintf (dump_file, "\n\nDirty blocks list: ");
681a5632
JZZ
3498 for (const bb_info *bb : crtl->ssa->bbs ())
3499 if (m_vector_manager->vector_block_infos[bb->index ()]
3500 .reaching_out.dirty_p ())
3501 fprintf (dump_file, "%d ", bb->index ());
9243c3d1
JZZ
3502 fprintf (dump_file, "\n\n");
3503 }
3504}
3505
6b6b9c68
JZZ
3506/* Cleanup illegal dirty blocks. */
3507bool
3508pass_vsetvl::cleanup_illegal_dirty_blocks (void)
3509{
3510 bool changed_p = false;
3511 for (const bb_info *bb : crtl->ssa->bbs ())
3512 {
3513 basic_block cfg_bb = bb->cfg_bb ();
3514 const auto &prop
3515 = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
3516
3517 /* If there is nothing to cleanup, just skip it. */
3518 if (!prop.valid_or_dirty_p ())
3519 continue;
3520
3521 if (hard_empty_block_p (bb, prop))
3522 {
3523 m_vector_manager->vector_block_infos[cfg_bb->index].local_dem
3524 = vector_insn_info::get_hard_empty ();
3525 m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out
3526 = vector_insn_info::get_hard_empty ();
3527 changed_p = true;
3528 continue;
3529 }
3530 }
3531 return changed_p;
3532}
3533
9243c3d1
JZZ
3534/* Assemble the candidates expressions for LCM. */
3535void
3536pass_vsetvl::prune_expressions (void)
3537{
681a5632 3538 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 3539 {
681a5632
JZZ
3540 if (m_vector_manager->vector_block_infos[bb->index ()]
3541 .local_dem.valid_or_dirty_p ())
9243c3d1 3542 m_vector_manager->create_expr (
681a5632
JZZ
3543 m_vector_manager->vector_block_infos[bb->index ()].local_dem);
3544 if (m_vector_manager->vector_block_infos[bb->index ()]
9243c3d1
JZZ
3545 .reaching_out.valid_or_dirty_p ())
3546 m_vector_manager->create_expr (
681a5632 3547 m_vector_manager->vector_block_infos[bb->index ()].reaching_out);
9243c3d1
JZZ
3548 }
3549
3550 if (dump_file)
3551 {
3552 fprintf (dump_file, "\nThe total VSETVL expression num = %d\n",
3553 m_vector_manager->vector_exprs.length ());
3554 fprintf (dump_file, "Expression List:\n");
3555 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3556 {
3557 fprintf (dump_file, "Expr[%ld]:\n", i);
3558 m_vector_manager->vector_exprs[i]->dump (dump_file);
3559 fprintf (dump_file, "\n");
3560 }
3561 }
3562}
3563
4f673c5e
JZZ
3564/* Compute the local properties of each recorded expression.
3565
3566 Local properties are those that are defined by the block, irrespective of
3567 other blocks.
3568
3569 An expression is transparent in a block if its operands are not modified
3570 in the block.
3571
3572 An expression is computed (locally available) in a block if it is computed
3573 at least once and expression would contain the same value if the
3574 computation was moved to the end of the block.
3575
3576 An expression is locally anticipatable in a block if it is computed at
3577 least once and expression would contain the same value if the computation
3578 was moved to the beginning of the block. */
9243c3d1
JZZ
3579void
3580pass_vsetvl::compute_local_properties (void)
3581{
3582 /* - If T is locally available at the end of a block, then T' must be
3583 available at the end of the same block. Since some optimization has
3584 occurred earlier, T' might not be locally available, however, it must
3585 have been previously computed on all paths. As a formula, T at AVLOC(B)
3586 implies that T' at AVOUT(B).
3587 An "available occurrence" is one that is the last occurrence in the
3588 basic block and the operands are not modified by following statements in
3589 the basic block [including this insn].
3590
3591 - If T is locally anticipated at the beginning of a block, then either
3592 T', is locally anticipated or it is already available from previous
3593 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
3594 ANTLOC(B) at AVIN(B).
3595 An "anticipatable occurrence" is one that is the first occurrence in the
3596 basic block, the operands are not modified in the basic block prior
3597 to the occurrence and the output is not used between the start of
3598 the block and the occurrence. */
3599
3600 basic_block cfg_bb;
4f673c5e 3601 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 3602 {
4f673c5e 3603 unsigned int curr_bb_idx = bb->index ();
9243c3d1
JZZ
3604 const auto local_dem
3605 = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem;
3606 const auto reaching_out
3607 = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out;
3608
4f673c5e
JZZ
3609 /* Compute transparent. */
3610 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
9243c3d1 3611 {
4f673c5e
JZZ
3612 const vector_insn_info *expr = m_vector_manager->vector_exprs[i];
3613 if (local_dem.real_dirty_p () || local_dem.valid_p ()
3614 || local_dem.unknown_p ()
3615 || has_vsetvl_killed_avl_p (bb, local_dem))
9243c3d1 3616 bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i);
4f673c5e
JZZ
3617 /* FIXME: Here we set the block as non-transparent (killed) if there
3618 is an instruction killed the value of AVL according to the
3619 definition of Local transparent. This is true for such following
3620 case:
3621
3622 bb 0 (Loop label):
3623 vsetvl zero, a5, e8, mf8
3624 bb 1:
3625 def a5
3626 bb 2:
3627 branch bb 0 (Loop label).
3628
3629 In this case, we known there is a loop bb 0->bb 1->bb 2. According
3630 to LCM definition, it is correct when we set vsetvl zero, a5, e8,
3631 mf8 as non-transparent (killed) so that LCM will not hoist outside
3632 the bb 0.
3633
3634 However, such conservative configuration will forbid optimization
3635 on some unlucky case. For example:
3636
3637 bb 0:
3638 li a5, 101
3639 bb 1:
3640 vsetvl zero, a5, e8, mf8
3641 bb 2:
3642 li a5, 101
3643 bb 3:
3644 vsetvl zero, a5, e8, mf8.
3645 So we also relax def a5 as transparent to gain more optimizations
3646 as long as the all real def insn of avl do not come from this
3647 block. This configuration may be still missing some optimization
3648 opportunities. */
6b6b9c68 3649 if (find_reg_killed_by (bb, expr->get_avl ()))
4f673c5e 3650 {
6b6b9c68
JZZ
3651 hash_set<set_info *> sets
3652 = get_all_sets (expr->get_avl_source (), true, false, false);
3653 if (any_set_in_bb_p (sets, bb))
4f673c5e
JZZ
3654 bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx],
3655 i);
3656 }
9243c3d1
JZZ
3657 }
3658
4f673c5e 3659 /* Compute anticipatable occurrences. */
6b6b9c68
JZZ
3660 if (local_dem.valid_p () || local_dem.real_dirty_p ()
3661 || (has_vsetvl_killed_avl_p (bb, local_dem)
3662 && vlmax_avl_p (local_dem.get_avl ())))
4f673c5e
JZZ
3663 if (anticipatable_occurrence_p (bb, local_dem))
3664 bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx],
3665 m_vector_manager->get_expr_id (local_dem));
9243c3d1 3666
4f673c5e 3667 /* Compute available occurrences. */
9243c3d1
JZZ
3668 if (reaching_out.valid_or_dirty_p ())
3669 {
9243c3d1
JZZ
3670 auto_vec<size_t> available_list
3671 = m_vector_manager->get_all_available_exprs (reaching_out);
3672 for (size_t i = 0; i < available_list.length (); i++)
4f673c5e
JZZ
3673 {
3674 const vector_insn_info *expr
3675 = m_vector_manager->vector_exprs[available_list[i]];
3676 if (reaching_out.real_dirty_p ()
3677 || has_vsetvl_killed_avl_p (bb, reaching_out)
3678 || available_occurrence_p (bb, *expr))
3679 bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx],
3680 available_list[i]);
3681 }
9243c3d1
JZZ
3682 }
3683 }
3684
3685 /* Compute kill for each basic block using:
3686
3687 ~(TRANSP | COMP)
3688 */
3689
3690 FOR_EACH_BB_FN (cfg_bb, cfun)
3691 {
3692 bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index],
3693 m_vector_manager->vector_transp[cfg_bb->index],
3694 m_vector_manager->vector_comp[cfg_bb->index]);
3695 bitmap_not (m_vector_manager->vector_kill[cfg_bb->index],
3696 m_vector_manager->vector_kill[cfg_bb->index]);
3697 }
3698
3699 FOR_EACH_BB_FN (cfg_bb, cfun)
3700 {
3701 edge e;
3702 edge_iterator ei;
3703
3704 /* If the current block is the destination of an abnormal edge, we
3705 kill all trapping (for PRE) and memory (for hoist) expressions
3706 because we won't be able to properly place the instruction on
3707 the edge. So make them neither anticipatable nor transparent.
3708 This is fairly conservative.
3709
3710 ??? For hoisting it may be necessary to check for set-and-jump
3711 instructions here, not just for abnormal edges. The general problem
3712 is that when an expression cannot not be placed right at the end of
3713 a basic block we should account for any side-effects of a subsequent
3714 jump instructions that could clobber the expression. It would
3715 be best to implement this check along the lines of
3716 should_hoist_expr_to_dom where the target block is already known
3717 and, hence, there's no need to conservatively prune expressions on
3718 "intermediate" set-and-jump instructions. */
3719 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3720 if (e->flags & EDGE_COMPLEX)
3721 {
3722 bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]);
3723 bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]);
3724 }
3725 }
3726}
3727
3728/* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */
3729bool
6b6b9c68
JZZ
3730pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb,
3731 const vector_insn_info &info) const
9243c3d1
JZZ
3732{
3733 if (!m_vector_manager->all_same_ratio_p (
3734 m_vector_manager->vector_avin[cfg_bb->index]))
3735 return false;
3736
005fad9d
JZZ
3737 if (!m_vector_manager->all_same_avl_p (
3738 cfg_bb, m_vector_manager->vector_avin[cfg_bb->index]))
3739 return false;
3740
9243c3d1
JZZ
3741 size_t expr_id
3742 = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]);
6b6b9c68
JZZ
3743 if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info))
3744 return false;
3745 if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info))
9243c3d1
JZZ
3746 return false;
3747
3748 edge e;
3749 edge_iterator ei;
3750 bool all_valid_p = true;
3751 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3752 {
3753 if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index]))
3754 {
3755 all_valid_p = false;
3756 break;
3757 }
3758 }
3759
3760 if (!all_valid_p)
3761 return false;
3762 return true;
3763}
3764
3765/* Optimize athe case like this:
3766
3767 bb 0:
3768 vsetvl 0 a5,zero,e8,mf8
3769 insn 0 (demand SEW + LMUL)
3770 bb 1:
3771 vsetvl 1 a5,zero,e16,mf4
3772 insn 1 (demand SEW + LMUL)
3773
3774 In this case, we should be able to refine
3775 vsetvl 1 into vsetvl zero, zero according AVIN. */
3776void
3777pass_vsetvl::refine_vsetvls (void) const
3778{
3779 basic_block cfg_bb;
3780 FOR_EACH_BB_FN (cfg_bb, cfun)
3781 {
3782 auto info = m_vector_manager->vector_block_infos[cfg_bb->index].local_dem;
3783 insn_info *insn = info.get_insn ();
3784 if (!info.valid_p ())
3785 continue;
3786
3787 rtx_insn *rinsn = insn->rtl ();
6b6b9c68 3788 if (!can_refine_vsetvl_p (cfg_bb, info))
9243c3d1
JZZ
3789 continue;
3790
ec99ffab
JZZ
3791 /* We can't refine user vsetvl into vsetvl zero,zero since the dest
3792 will be used by the following instructions. */
3793 if (vector_config_insn_p (rinsn))
3794 {
3795 m_vector_manager->to_refine_vsetvls.add (rinsn);
3796 continue;
3797 }
ff8f9544
JZ
3798
3799 /* If all incoming edges to a block have a vector state that is compatbile
3800 with the block. In such a case we need not emit a vsetvl in the current
3801 block. */
3802
3803 gcc_assert (has_vtype_op (insn->rtl ()));
3804 rinsn = PREV_INSN (insn->rtl ());
3805 gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
3806 if (m_vector_manager->all_avail_in_compatible_p (cfg_bb))
3807 {
3808 size_t id = m_vector_manager->get_expr_id (info);
3809 if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id))
3810 continue;
3811 eliminate_insn (rinsn);
3812 }
3813 else
3814 {
3815 rtx new_pat
3816 = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
3817 change_insn (rinsn, new_pat);
3818 }
9243c3d1
JZZ
3819 }
3820}
3821
3822void
3823pass_vsetvl::cleanup_vsetvls ()
3824{
3825 basic_block cfg_bb;
3826 FOR_EACH_BB_FN (cfg_bb, cfun)
3827 {
3828 auto &info
3829 = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
3830 gcc_assert (m_vector_manager->expr_set_num (
3831 m_vector_manager->vector_del[cfg_bb->index])
3832 <= 1);
3833 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3834 {
3835 if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i))
3836 {
3837 if (info.dirty_p ())
3838 info.set_unknown ();
3839 else
3840 {
4f673c5e
JZZ
3841 const auto dem
3842 = m_vector_manager->vector_block_infos[cfg_bb->index]
3843 .local_dem;
3844 gcc_assert (dem == *m_vector_manager->vector_exprs[i]);
3845 insn_info *insn = dem.get_insn ();
9243c3d1
JZZ
3846 gcc_assert (insn && insn->rtl ());
3847 rtx_insn *rinsn;
ec99ffab
JZZ
3848 /* We can't eliminate user vsetvl since the dest will be used
3849 * by the following instructions. */
9243c3d1 3850 if (vector_config_insn_p (insn->rtl ()))
9243c3d1 3851 {
ec99ffab
JZZ
3852 m_vector_manager->to_delete_vsetvls.add (insn->rtl ());
3853 continue;
9243c3d1 3854 }
ec99ffab
JZZ
3855
3856 gcc_assert (has_vtype_op (insn->rtl ()));
3857 rinsn = PREV_INSN (insn->rtl ());
3858 gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
9243c3d1
JZZ
3859 eliminate_insn (rinsn);
3860 }
3861 }
3862 }
3863 }
3864}
3865
3866bool
3867pass_vsetvl::commit_vsetvls (void)
3868{
3869 bool need_commit = false;
3870
3871 for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++)
3872 {
3873 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3874 {
3875 edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
3876 if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i))
3877 {
3878 const vector_insn_info *require
3879 = m_vector_manager->vector_exprs[i];
3880 gcc_assert (require->valid_or_dirty_p ());
3881 rtl_profile_for_edge (eg);
3882 start_sequence ();
3883
3884 insn_info *insn = require->get_insn ();
3885 vector_insn_info prev_info = vector_insn_info ();
005fad9d
JZZ
3886 sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index];
3887 if (m_vector_manager->all_same_ratio_p (bitdata)
3888 && m_vector_manager->all_same_avl_p (eg->dest, bitdata))
9243c3d1 3889 {
005fad9d 3890 size_t first = bitmap_first_set_bit (bitdata);
9243c3d1
JZZ
3891 prev_info = *m_vector_manager->vector_exprs[first];
3892 }
3893
3894 insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info);
3895 rtx_insn *rinsn = get_insns ();
3896 end_sequence ();
3897 default_rtl_profile ();
3898
3899 /* We should not get an abnormal edge here. */
3900 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3901 need_commit = true;
3902 insert_insn_on_edge (rinsn, eg);
3903 }
3904 }
3905 }
3906
4f673c5e 3907 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 3908 {
4f673c5e 3909 basic_block cfg_bb = bb->cfg_bb ();
9243c3d1
JZZ
3910 const auto reaching_out
3911 = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
3912 if (!reaching_out.dirty_p ())
3913 continue;
3914
4f673c5e
JZZ
3915 if (reaching_out.dirty_with_killed_avl_p ())
3916 {
3917 if (!has_vsetvl_killed_avl_p (bb, reaching_out))
3918 continue;
3919
3920 unsigned int bb_index;
3921 sbitmap_iterator sbi;
3922 sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
3923 bool available_p = false;
3924 EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
3925 {
ec99ffab
JZZ
3926 if (m_vector_manager->vector_exprs[bb_index]->available_p (
3927 reaching_out))
4f673c5e
JZZ
3928 {
3929 available_p = true;
3930 break;
3931 }
3932 }
3933 if (available_p)
3934 continue;
3935 }
7ae4d1df
JZZ
3936
3937 rtx new_pat;
ec99ffab
JZZ
3938 if (!reaching_out.demand_p (DEMAND_AVL))
3939 {
3940 vl_vtype_info new_info = reaching_out;
3941 new_info.set_avl_info (avl_info (const0_rtx, nullptr));
3942 new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
3943 }
3944 else if (can_refine_vsetvl_p (cfg_bb, reaching_out))
9243c3d1
JZZ
3945 new_pat
3946 = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX);
7ae4d1df
JZZ
3947 else if (vlmax_avl_p (reaching_out.get_avl ()))
3948 new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out,
ec99ffab 3949 reaching_out.get_avl_reg_rtx ());
7ae4d1df
JZZ
3950 else
3951 new_pat
3952 = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX);
9243c3d1
JZZ
3953
3954 start_sequence ();
3955 emit_insn (new_pat);
3956 rtx_insn *rinsn = get_insns ();
3957 end_sequence ();
3958 insert_insn_end_basic_block (rinsn, cfg_bb);
3959 if (dump_file)
3960 {
3961 fprintf (dump_file,
3962 "\nInsert vsetvl insn %d at the end of <bb %d>:\n",
3963 INSN_UID (rinsn), cfg_bb->index);
3964 print_rtl_single (dump_file, rinsn);
3965 }
3966 }
3967
3968 return need_commit;
3969}
3970
3971void
3972pass_vsetvl::pre_vsetvl (void)
3973{
3974 /* Compute entity list. */
3975 prune_expressions ();
3976
cfe3fbc6 3977 m_vector_manager->create_bitmap_vectors ();
9243c3d1
JZZ
3978 compute_local_properties ();
3979 m_vector_manager->vector_edge_list = pre_edge_lcm_avs (
3980 m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp,
3981 m_vector_manager->vector_comp, m_vector_manager->vector_antic,
3982 m_vector_manager->vector_kill, m_vector_manager->vector_avin,
3983 m_vector_manager->vector_avout, &m_vector_manager->vector_insert,
3984 &m_vector_manager->vector_del);
3985
3986 /* We should dump the information before CFG is changed. Otherwise it will
3987 produce ICE (internal compiler error). */
3988 if (dump_file)
3989 m_vector_manager->dump (dump_file);
3990
3991 refine_vsetvls ();
3992 cleanup_vsetvls ();
3993 bool need_commit = commit_vsetvls ();
3994 if (need_commit)
3995 commit_edge_insertions ();
3996}
3997
c5a1fa59
JZ
3998/* Before VSETVL PASS, RVV instructions pattern is depending on AVL operand
3999 implicitly. Since we will emit VSETVL instruction and make RVV instructions
4000 depending on VL/VTYPE global status registers, we remove the such AVL operand
4001 in the RVV instructions pattern here in order to remove AVL dependencies when
4002 AVL operand is a register operand.
4003
4004 Before the VSETVL PASS:
4005 li a5,32
4006 ...
4007 vadd.vv (..., a5)
4008 After the VSETVL PASS:
4009 li a5,32
4010 vsetvli zero, a5, ...
4011 ...
4012 vadd.vv (..., const_int 0). */
9243c3d1
JZZ
4013void
4014pass_vsetvl::cleanup_insns (void) const
4015{
4016 for (const bb_info *bb : crtl->ssa->bbs ())
4017 {
4018 for (insn_info *insn : bb->real_nondebug_insns ())
4019 {
4020 rtx_insn *rinsn = insn->rtl ();
d51f2456
JZ
4021 const auto &dem = m_vector_manager->vector_insn_infos[insn->uid ()];
4022 /* Eliminate local vsetvl:
4023 bb 0:
4024 vsetvl a5,a6,...
4025 vsetvl zero,a5.
4026
4027 Eliminate vsetvl in bb2 when a5 is only coming from
4028 bb 0. */
4029 local_eliminate_vsetvl_insn (dem);
9243c3d1
JZZ
4030
4031 if (vlmax_avl_insn_p (rinsn))
4032 {
4033 eliminate_insn (rinsn);
4034 continue;
4035 }
4036
4037 /* Erase the AVL operand from the instruction. */
4038 if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn)))
4039 continue;
4040 rtx avl = get_vl (rinsn);
a2d12abe 4041 if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
9243c3d1
JZZ
4042 {
4043 /* Get the list of uses for the new instruction. */
4044 auto attempt = crtl->ssa->new_change_attempt ();
4045 insn_change change (insn);
4046 /* Remove the use of the substituted value. */
4047 access_array_builder uses_builder (attempt);
4048 uses_builder.reserve (insn->num_uses () - 1);
4049 for (use_info *use : insn->uses ())
4050 if (use != find_access (insn->uses (), REGNO (avl)))
4051 uses_builder.quick_push (use);
4052 use_array new_uses = use_array (uses_builder.finish ());
4053 change.new_uses = new_uses;
4054 change.move_range = insn->ebb ()->insn_range ();
60bd33bc
JZZ
4055 rtx pat;
4056 if (fault_first_load_p (rinsn))
4057 pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
4058 else
4059 {
4060 rtx set = single_set (rinsn);
4061 rtx src
4062 = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
4063 pat = gen_rtx_SET (SET_DEST (set), src);
4064 }
9243c3d1
JZZ
4065 gcc_assert (change_insn (crtl->ssa, change, insn, pat));
4066 }
4067 }
4068 }
4069}
4070
6b6b9c68
JZZ
4071void
4072pass_vsetvl::propagate_avl (void) const
4073{
4074 /* Rebuild the RTL_SSA according to the new CFG generated by LCM. */
4075 /* Finalization of RTL_SSA. */
4076 free_dominance_info (CDI_DOMINATORS);
4077 if (crtl->ssa->perform_pending_updates ())
4078 cleanup_cfg (0);
4079 delete crtl->ssa;
4080 crtl->ssa = nullptr;
4081 /* Initialization of RTL_SSA. */
4082 calculate_dominance_info (CDI_DOMINATORS);
4083 df_analyze ();
4084 crtl->ssa = new function_info (cfun);
4085
4086 hash_set<rtx_insn *> to_delete;
4087 for (const bb_info *bb : crtl->ssa->bbs ())
4088 {
4089 for (insn_info *insn : bb->real_nondebug_insns ())
4090 {
4091 if (vsetvl_discard_result_insn_p (insn->rtl ()))
4092 {
4093 rtx avl = get_avl (insn->rtl ());
4094 if (!REG_P (avl))
4095 continue;
4096
4097 set_info *set = find_access (insn->uses (), REGNO (avl))->def ();
4098 insn_info *def_insn = extract_single_source (set);
4099 if (!def_insn)
4100 continue;
4101
4102 /* Handle this case:
4103 vsetvli a6,zero,e32,m1,ta,mu
4104 li a5,4096
4105 add a7,a0,a5
4106 addi a7,a7,-96
4107 vsetvli t1,zero,e8,mf8,ta,ma
4108 vle8.v v24,0(a7)
4109 add a5,a3,a5
4110 addi a5,a5,-96
4111 vse8.v v24,0(a5)
4112 vsetvli zero,a6,e32,m1,tu,ma
4113 */
4114 if (vsetvl_insn_p (def_insn->rtl ()))
4115 {
4116 vl_vtype_info def_info = get_vl_vtype_info (def_insn);
4117 vl_vtype_info info = get_vl_vtype_info (insn);
4118 rtx avl = get_avl (def_insn->rtl ());
4119 rtx vl = get_vl (def_insn->rtl ());
4120 if (def_info.get_ratio () == info.get_ratio ())
4121 {
4122 if (vlmax_avl_p (def_info.get_avl ()))
4123 {
4124 info.set_avl_info (
4125 avl_info (def_info.get_avl (), nullptr));
4126 rtx new_pat
4127 = gen_vsetvl_pat (VSETVL_NORMAL, info, vl);
4128 validate_change (insn->rtl (),
4129 &PATTERN (insn->rtl ()), new_pat,
4130 false);
4131 continue;
4132 }
4133 if (def_info.has_avl_imm () || rtx_equal_p (avl, vl))
4134 {
4135 info.set_avl_info (avl_info (avl, nullptr));
4136 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_AFTER,
4137 info, NULL_RTX, insn->rtl ());
4138 if (set->single_nondebug_insn_use ())
4139 {
4140 to_delete.add (insn->rtl ());
4141 to_delete.add (def_insn->rtl ());
4142 }
4143 continue;
4144 }
4145 }
4146 }
4147 }
4148
4149 /* Change vsetvl rd, rs1 --> vsevl zero, rs1,
4150 if rd is not used by any nondebug instructions.
4151 Even though this PASS runs after RA and it doesn't help for
4152 reduce register pressure, it can help instructions scheduling
4153 since we remove the dependencies. */
4154 if (vsetvl_insn_p (insn->rtl ()))
4155 {
4156 rtx vl = get_vl (insn->rtl ());
4157 rtx avl = get_avl (insn->rtl ());
6b6b9c68
JZZ
4158 def_info *def = find_access (insn->defs (), REGNO (vl));
4159 set_info *set = safe_dyn_cast<set_info *> (def);
ec99ffab
JZZ
4160 vector_insn_info info;
4161 info.parse_insn (insn);
6b6b9c68 4162 gcc_assert (set);
ec99ffab
JZZ
4163 if (m_vector_manager->to_delete_vsetvls.contains (insn->rtl ()))
4164 {
4165 m_vector_manager->to_delete_vsetvls.remove (insn->rtl ());
4166 if (m_vector_manager->to_refine_vsetvls.contains (
4167 insn->rtl ()))
4168 m_vector_manager->to_refine_vsetvls.remove (insn->rtl ());
4169 if (!set->has_nondebug_insn_uses ())
4170 {
4171 to_delete.add (insn->rtl ());
4172 continue;
4173 }
4174 }
4175 if (m_vector_manager->to_refine_vsetvls.contains (insn->rtl ()))
4176 {
4177 m_vector_manager->to_refine_vsetvls.remove (insn->rtl ());
4178 if (!set->has_nondebug_insn_uses ())
4179 {
4180 rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY,
4181 info, NULL_RTX);
4182 change_insn (insn->rtl (), new_pat);
4183 continue;
4184 }
4185 }
4186 if (vlmax_avl_p (avl))
4187 continue;
6b6b9c68
JZZ
4188 rtx new_pat
4189 = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX);
4190 if (!set->has_nondebug_insn_uses ())
4191 {
4192 validate_change (insn->rtl (), &PATTERN (insn->rtl ()),
4193 new_pat, false);
4194 continue;
4195 }
4196 }
4197 }
4198 }
4199
4200 for (rtx_insn *rinsn : to_delete)
4201 eliminate_insn (rinsn);
4202}
4203
9243c3d1
JZZ
4204void
4205pass_vsetvl::init (void)
4206{
4207 if (optimize > 0)
4208 {
4209 /* Initialization of RTL_SSA. */
4210 calculate_dominance_info (CDI_DOMINATORS);
4211 df_analyze ();
4212 crtl->ssa = new function_info (cfun);
4213 }
4214
4215 m_vector_manager = new vector_infos_manager ();
4f673c5e 4216 compute_probabilities ();
9243c3d1
JZZ
4217
4218 if (dump_file)
4219 {
4220 fprintf (dump_file, "\nPrologue: Initialize vector infos\n");
4221 m_vector_manager->dump (dump_file);
4222 }
4223}
4224
4225void
4226pass_vsetvl::done (void)
4227{
4228 if (optimize > 0)
4229 {
4230 /* Finalization of RTL_SSA. */
4231 free_dominance_info (CDI_DOMINATORS);
4232 if (crtl->ssa->perform_pending_updates ())
4233 cleanup_cfg (0);
4234 delete crtl->ssa;
4235 crtl->ssa = nullptr;
4236 }
4237 m_vector_manager->release ();
4238 delete m_vector_manager;
4239 m_vector_manager = nullptr;
4240}
4241
acc10c79
JZZ
4242/* Compute probability for each block. */
4243void
4244pass_vsetvl::compute_probabilities (void)
4245{
4246 /* Don't compute it in -O0 since we don't need it. */
4247 if (!optimize)
4248 return;
4249 edge e;
4250 edge_iterator ei;
4251
4252 for (const bb_info *bb : crtl->ssa->bbs ())
4253 {
4254 basic_block cfg_bb = bb->cfg_bb ();
4255 auto &curr_prob
4256 = m_vector_manager->vector_block_infos[cfg_bb->index].probability;
c129d22d
JZZ
4257
4258 /* GCC assume entry block (bb 0) are always so
4259 executed so set its probability as "always". */
acc10c79
JZZ
4260 if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
4261 curr_prob = profile_probability::always ();
c129d22d
JZZ
4262 /* Exit block (bb 1) is the block we don't need to process. */
4263 if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
4264 continue;
4265
acc10c79
JZZ
4266 gcc_assert (curr_prob.initialized_p ());
4267 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
4268 {
4269 auto &new_prob
4270 = m_vector_manager->vector_block_infos[e->dest->index].probability;
4271 if (!new_prob.initialized_p ())
4272 new_prob = curr_prob * e->probability;
4273 else if (new_prob == profile_probability::always ())
4274 continue;
4275 else
4276 new_prob += curr_prob * e->probability;
4277 }
4278 }
acc10c79
JZZ
4279}
4280
9243c3d1
JZZ
4281/* Lazy vsetvl insertion for optimize > 0. */
4282void
4283pass_vsetvl::lazy_vsetvl (void)
4284{
4285 if (dump_file)
4286 fprintf (dump_file,
4287 "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for "
4288 "function:%s\n",
4289 n_basic_blocks_for_fn (cfun), function_name (cfun));
4290
4291 /* Phase 1 - Compute the local dems within each block.
4292 The data-flow analysis within each block is backward analysis. */
4293 if (dump_file)
4294 fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n");
4295 for (const bb_info *bb : crtl->ssa->bbs ())
4296 compute_local_backward_infos (bb);
4297 if (dump_file)
4298 m_vector_manager->dump (dump_file);
4299
4300 /* Phase 2 - Emit vsetvl instructions within each basic block according to
4301 demand, compute and save ANTLOC && AVLOC of each block. */
4302 if (dump_file)
4303 fprintf (dump_file,
4304 "\nPhase 2: Emit vsetvl instruction within each block\n");
4305 for (const bb_info *bb : crtl->ssa->bbs ())
4306 emit_local_forward_vsetvls (bb);
4307 if (dump_file)
4308 m_vector_manager->dump (dump_file);
4309
4310 /* Phase 3 - Propagate demanded info across blocks. */
4311 if (dump_file)
4312 fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n");
387cd9d3 4313 demand_fusion ();
9243c3d1
JZZ
4314 if (dump_file)
4315 m_vector_manager->dump (dump_file);
4316
4317 /* Phase 4 - Lazy code motion. */
4318 if (dump_file)
4319 fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n");
4320 pre_vsetvl ();
4321
4322 /* Phase 5 - Cleanup AVL && VL operand of RVV instruction. */
4323 if (dump_file)
4324 fprintf (dump_file, "\nPhase 5: Cleanup AVL and VL operands\n");
4325 cleanup_insns ();
6b6b9c68
JZZ
4326
4327 /* Phase 6 - Rebuild RTL_SSA to propagate AVL between vsetvls. */
4328 if (dump_file)
4329 fprintf (dump_file,
4330 "\nPhase 6: Rebuild RTL_SSA to propagate AVL between vsetvls\n");
4331 propagate_avl ();
9243c3d1
JZZ
4332}
4333
4334/* Main entry point for this pass. */
4335unsigned int
4336pass_vsetvl::execute (function *)
4337{
4338 if (n_basic_blocks_for_fn (cfun) <= 0)
4339 return 0;
4340
ca8fb009
JZZ
4341 /* The RVV instruction may change after split which is not a stable
4342 instruction. We need to split it here to avoid potential issue
4343 since the VSETVL PASS is insert before split PASS. */
4344 split_all_insns ();
9243c3d1
JZZ
4345
4346 /* Early return for there is no vector instructions. */
4347 if (!has_vector_insn (cfun))
4348 return 0;
4349
4350 init ();
4351
4352 if (!optimize)
4353 simple_vsetvl ();
4354 else
4355 lazy_vsetvl ();
4356
4357 done ();
4358 return 0;
4359}
4360
4361rtl_opt_pass *
4362make_pass_vsetvl (gcc::context *ctxt)
4363{
4364 return new pass_vsetvl (ctxt);
4365}