]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/riscv/riscv-vsetvl.cc
Daily bump.
[thirdparty/gcc.git] / gcc / config / riscv / riscv-vsetvl.cc
CommitLineData
9243c3d1 1/* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
c841bde5 2 Copyright (C) 2022-2023 Free Software Foundation, Inc.
9243c3d1
JZZ
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or(at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* This pass is to Set VL/VTYPE global status for RVV instructions
22 that depend on VL and VTYPE registers by Lazy code motion (LCM).
23
24 Strategy:
25
26 - Backward demanded info fusion within block.
27
28 - Lazy code motion (LCM) based demanded info backward propagation.
29
30 - RTL_SSA framework for def-use, PHI analysis.
31
32 - Lazy code motion (LCM) for global VL/VTYPE optimization.
33
34 Assumption:
35
36 - Each avl operand is either an immediate (must be in range 0 ~ 31) or reg.
37
38 This pass consists of 5 phases:
39
40 - Phase 1 - compute VL/VTYPE demanded information within each block
41 by backward data-flow analysis.
42
43 - Phase 2 - Emit vsetvl instructions within each basic block according to
44 demand, compute and save ANTLOC && AVLOC of each block.
45
387cd9d3
JZZ
46 - Phase 3 - Backward && forward demanded info propagation and fusion across
47 blocks.
9243c3d1
JZZ
48
49 - Phase 4 - Lazy code motion including: compute local properties,
50 pre_edge_lcm and vsetvl insertion && delete edges for LCM results.
51
52 - Phase 5 - Cleanup AVL operand of RVV instruction since it will not be
53 used any more and VL operand of VSETVL instruction if it is not used by
54 any non-debug instructions.
55
6b6b9c68
JZZ
56 - Phase 6 - Propagate AVL between vsetvl instructions.
57
9243c3d1
JZZ
58 Implementation:
59
60 - The subroutine of optimize == 0 is simple_vsetvl.
61 This function simplily vsetvl insertion for each RVV
62 instruction. No optimization.
63
64 - The subroutine of optimize > 0 is lazy_vsetvl.
65 This function optimize vsetvl insertion process by
ec99ffab
JZZ
66 lazy code motion (LCM) layering on RTL_SSA.
67
68 - get_avl (), get_insn (), get_avl_source ():
69
70 1. get_insn () is the current instruction, find_access (get_insn
71 ())->def is the same as get_avl_source () if get_insn () demand VL.
72 2. If get_avl () is non-VLMAX REG, get_avl () == get_avl_source
73 ()->regno ().
74 3. get_avl_source ()->regno () is the REGNO that we backward propagate.
75 */
9243c3d1
JZZ
76
77#define IN_TARGET_CODE 1
78#define INCLUDE_ALGORITHM
79#define INCLUDE_FUNCTIONAL
80
81#include "config.h"
82#include "system.h"
83#include "coretypes.h"
84#include "tm.h"
85#include "backend.h"
86#include "rtl.h"
87#include "target.h"
88#include "tree-pass.h"
89#include "df.h"
90#include "rtl-ssa.h"
91#include "cfgcleanup.h"
92#include "insn-config.h"
93#include "insn-attr.h"
94#include "insn-opinit.h"
95#include "tm-constrs.h"
96#include "cfgrtl.h"
97#include "cfganal.h"
98#include "lcm.h"
99#include "predict.h"
100#include "profile-count.h"
101#include "riscv-vsetvl.h"
102
103using namespace rtl_ssa;
104using namespace riscv_vector;
105
ec99ffab
JZZ
106static CONSTEXPR const unsigned ALL_SEW[] = {8, 16, 32, 64};
107static CONSTEXPR const vlmul_type ALL_LMUL[]
108 = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
ec99ffab 109
9243c3d1
JZZ
110DEBUG_FUNCTION void
111debug (const vector_insn_info *info)
112{
113 info->dump (stderr);
114}
115
116DEBUG_FUNCTION void
117debug (const vector_infos_manager *info)
118{
119 info->dump (stderr);
120}
121
122static bool
123vlmax_avl_p (rtx x)
124{
125 return x && rtx_equal_p (x, RVV_VLMAX);
126}
127
128static bool
129vlmax_avl_insn_p (rtx_insn *rinsn)
130{
85112fbb
JZZ
131 return (INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi
132 || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi);
9243c3d1
JZZ
133}
134
8d8cc482
JZZ
135/* Return true if the block is a loop itself:
136 local_dem
137 __________
138 ____|____ |
139 | | |
140 |________| |
141 |_________|
142 reaching_out
143*/
9243c3d1
JZZ
144static bool
145loop_basic_block_p (const basic_block cfg_bb)
146{
8d8cc482
JZZ
147 if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb)))
148 {
149 edge e;
150 edge_iterator ei;
151 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
152 if (e->dest->index == cfg_bb->index)
153 return true;
154 }
155 return false;
9243c3d1
JZZ
156}
157
158/* Return true if it is an RVV instruction depends on VTYPE global
159 status register. */
160static bool
161has_vtype_op (rtx_insn *rinsn)
162{
163 return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
164}
165
166/* Return true if it is an RVV instruction depends on VL global
167 status register. */
168static bool
169has_vl_op (rtx_insn *rinsn)
170{
171 return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn);
172}
173
174/* Is this a SEW value that can be encoded into the VTYPE format. */
175static bool
176valid_sew_p (size_t sew)
177{
178 return exact_log2 (sew) && sew >= 8 && sew <= 64;
179}
180
ec99ffab
JZZ
181/* Return true if the instruction ignores VLMUL field of VTYPE. */
182static bool
183ignore_vlmul_insn_p (rtx_insn *rinsn)
184{
185 return get_attr_type (rinsn) == TYPE_VIMOVVX
186 || get_attr_type (rinsn) == TYPE_VFMOVVF
187 || get_attr_type (rinsn) == TYPE_VIMOVXV
188 || get_attr_type (rinsn) == TYPE_VFMOVFV;
189}
190
191/* Return true if the instruction is scalar move instruction. */
192static bool
193scalar_move_insn_p (rtx_insn *rinsn)
194{
195 return get_attr_type (rinsn) == TYPE_VIMOVXV
196 || get_attr_type (rinsn) == TYPE_VFMOVFV;
197}
198
60bd33bc
JZZ
199/* Return true if the instruction is fault first load instruction. */
200static bool
201fault_first_load_p (rtx_insn *rinsn)
202{
6313b045
JZZ
203 return recog_memoized (rinsn) >= 0
204 && (get_attr_type (rinsn) == TYPE_VLDFF
205 || get_attr_type (rinsn) == TYPE_VLSEGDFF);
60bd33bc
JZZ
206}
207
208/* Return true if the instruction is read vl instruction. */
209static bool
210read_vl_insn_p (rtx_insn *rinsn)
211{
212 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL;
213}
214
9243c3d1
JZZ
215/* Return true if it is a vsetvl instruction. */
216static bool
217vector_config_insn_p (rtx_insn *rinsn)
218{
219 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
220}
221
222/* Return true if it is vsetvldi or vsetvlsi. */
223static bool
224vsetvl_insn_p (rtx_insn *rinsn)
225{
6b6b9c68
JZZ
226 if (!vector_config_insn_p (rinsn))
227 return false;
85112fbb 228 return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
6b6b9c68
JZZ
229 || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
230}
231
232/* Return true if it is vsetvl zero, rs1. */
233static bool
234vsetvl_discard_result_insn_p (rtx_insn *rinsn)
235{
236 if (!vector_config_insn_p (rinsn))
237 return false;
238 return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi
239 || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
9243c3d1
JZZ
240}
241
9243c3d1 242static bool
4f673c5e 243real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
9243c3d1 244{
4f673c5e 245 return insn != nullptr && insn->is_real () && insn->bb () == bb;
9243c3d1
JZZ
246}
247
9243c3d1 248static bool
4f673c5e 249before_p (const insn_info *insn1, const insn_info *insn2)
9243c3d1 250{
9b9a1ac1 251 return insn1->compare_with (insn2) < 0;
4f673c5e
JZZ
252}
253
6b6b9c68
JZZ
254static insn_info *
255find_reg_killed_by (const bb_info *bb, rtx x)
4f673c5e 256{
6b6b9c68
JZZ
257 if (!x || vlmax_avl_p (x) || !REG_P (x))
258 return nullptr;
259 for (insn_info *insn : bb->reverse_real_nondebug_insns ())
4f673c5e 260 if (find_access (insn->defs (), REGNO (x)))
6b6b9c68
JZZ
261 return insn;
262 return nullptr;
263}
264
265/* Helper function to get VL operand. */
266static rtx
267get_vl (rtx_insn *rinsn)
268{
269 if (has_vl_op (rinsn))
270 {
271 extract_insn_cached (rinsn);
272 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
273 }
274 return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
4f673c5e
JZZ
275}
276
277static bool
278has_vsetvl_killed_avl_p (const bb_info *bb, const vector_insn_info &info)
279{
280 if (info.dirty_with_killed_avl_p ())
281 {
282 rtx avl = info.get_avl ();
6b6b9c68 283 if (vlmax_avl_p (avl))
ec99ffab 284 return find_reg_killed_by (bb, info.get_avl_reg_rtx ()) != nullptr;
4f673c5e
JZZ
285 for (const insn_info *insn : bb->reverse_real_nondebug_insns ())
286 {
287 def_info *def = find_access (insn->defs (), REGNO (avl));
288 if (def)
289 {
290 set_info *set = safe_dyn_cast<set_info *> (def);
291 if (!set)
292 return false;
293
294 rtx new_avl = gen_rtx_REG (GET_MODE (avl), REGNO (avl));
295 gcc_assert (new_avl != avl);
296 if (!info.compatible_avl_p (avl_info (new_avl, set)))
297 return false;
298
299 return true;
300 }
301 }
302 }
303 return false;
304}
305
9243c3d1
JZZ
306/* An "anticipatable occurrence" is one that is the first occurrence in the
307 basic block, the operands are not modified in the basic block prior
308 to the occurrence and the output is not used between the start of
4f673c5e
JZZ
309 the block and the occurrence.
310
311 For VSETVL instruction, we have these following formats:
312 1. vsetvl zero, rs1.
313 2. vsetvl zero, imm.
314 3. vsetvl rd, rs1.
315
316 So base on these circumstances, a DEM is considered as a local anticipatable
317 occurrence should satisfy these following conditions:
318
319 1). rs1 (avl) are not modified in the basic block prior to the VSETVL.
320 2). rd (vl) are not modified in the basic block prior to the VSETVL.
321 3). rd (vl) is not used between the start of the block and the occurrence.
322
323 Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
324 is modified prior to the occurrence. This case is already considered as
325 a non-local anticipatable occurrence.
326*/
9243c3d1 327static bool
4f673c5e 328anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem)
9243c3d1 329{
4f673c5e 330 insn_info *insn = dem.get_insn ();
9243c3d1
JZZ
331 /* The only possible operand we care of VSETVL is AVL. */
332 if (dem.has_avl_reg ())
333 {
4f673c5e 334 /* rs1 (avl) are not modified in the basic block prior to the VSETVL. */
9243c3d1
JZZ
335 if (!vlmax_avl_p (dem.get_avl ()))
336 {
ec99ffab 337 set_info *set = dem.get_avl_source ();
9243c3d1
JZZ
338 /* If it's undefined, it's not anticipatable conservatively. */
339 if (!set)
340 return false;
4f673c5e
JZZ
341 if (real_insn_and_same_bb_p (set->insn (), bb)
342 && before_p (set->insn (), insn))
9243c3d1
JZZ
343 return false;
344 }
345 }
346
4f673c5e 347 /* rd (vl) is not used between the start of the block and the occurrence. */
9243c3d1
JZZ
348 if (vsetvl_insn_p (insn->rtl ()))
349 {
4f673c5e
JZZ
350 rtx dest = get_vl (insn->rtl ());
351 for (insn_info *i = insn->prev_nondebug_insn ();
352 real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
353 {
354 /* rd (vl) is not used between the start of the block and the
355 * occurrence. */
356 if (find_access (i->uses (), REGNO (dest)))
357 return false;
358 /* rd (vl) are not modified in the basic block prior to the VSETVL. */
359 if (find_access (i->defs (), REGNO (dest)))
360 return false;
361 }
9243c3d1
JZZ
362 }
363
364 return true;
365}
366
367/* An "available occurrence" is one that is the last occurrence in the
368 basic block and the operands are not modified by following statements in
4f673c5e
JZZ
369 the basic block [including this insn].
370
371 For VSETVL instruction, we have these following formats:
372 1. vsetvl zero, rs1.
373 2. vsetvl zero, imm.
374 3. vsetvl rd, rs1.
375
376 So base on these circumstances, a DEM is considered as a local available
377 occurrence should satisfy these following conditions:
378
379 1). rs1 (avl) are not modified by following statements in
380 the basic block.
381 2). rd (vl) are not modified by following statements in
382 the basic block.
383
384 Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
385 is modified prior to the occurrence. This case is already considered as
386 a non-local available occurrence.
387*/
9243c3d1 388static bool
4f673c5e 389available_occurrence_p (const bb_info *bb, const vector_insn_info dem)
9243c3d1 390{
4f673c5e 391 insn_info *insn = dem.get_insn ();
9243c3d1
JZZ
392 /* The only possible operand we care of VSETVL is AVL. */
393 if (dem.has_avl_reg ())
394 {
9243c3d1
JZZ
395 if (!vlmax_avl_p (dem.get_avl ()))
396 {
4f673c5e
JZZ
397 rtx dest = NULL_RTX;
398 if (vsetvl_insn_p (insn->rtl ()))
399 dest = get_vl (insn->rtl ());
400 for (const insn_info *i = insn; real_insn_and_same_bb_p (i, bb);
401 i = i->next_nondebug_insn ())
402 {
60bd33bc
JZZ
403 if (read_vl_insn_p (i->rtl ()))
404 continue;
4f673c5e
JZZ
405 /* rs1 (avl) are not modified by following statements in
406 the basic block. */
407 if (find_access (i->defs (), REGNO (dem.get_avl ())))
408 return false;
409 /* rd (vl) are not modified by following statements in
410 the basic block. */
411 if (dest && find_access (i->defs (), REGNO (dest)))
412 return false;
413 }
9243c3d1
JZZ
414 }
415 }
416 return true;
417}
418
6b6b9c68
JZZ
419static bool
420insn_should_be_added_p (const insn_info *insn, unsigned int types)
9243c3d1 421{
6b6b9c68
JZZ
422 if (insn->is_real () && (types & REAL_SET))
423 return true;
424 if (insn->is_phi () && (types & PHI_SET))
425 return true;
426 if (insn->is_bb_head () && (types & BB_HEAD_SET))
427 return true;
428 if (insn->is_bb_end () && (types & BB_END_SET))
429 return true;
430 return false;
9243c3d1
JZZ
431}
432
6b6b9c68
JZZ
433/* Recursively find all define instructions. The kind of instruction is
434 specified by the DEF_TYPE. */
435static hash_set<set_info *>
436get_all_sets (phi_info *phi, unsigned int types)
9243c3d1 437{
6b6b9c68 438 hash_set<set_info *> insns;
4f673c5e
JZZ
439 auto_vec<phi_info *> work_list;
440 hash_set<phi_info *> visited_list;
441 if (!phi)
6b6b9c68 442 return hash_set<set_info *> ();
4f673c5e 443 work_list.safe_push (phi);
9243c3d1 444
4f673c5e 445 while (!work_list.is_empty ())
9243c3d1 446 {
4f673c5e
JZZ
447 phi_info *phi = work_list.pop ();
448 visited_list.add (phi);
449 for (use_info *use : phi->inputs ())
9243c3d1 450 {
4f673c5e 451 def_info *def = use->def ();
6b6b9c68
JZZ
452 set_info *set = safe_dyn_cast<set_info *> (def);
453 if (!set)
454 return hash_set<set_info *> ();
9243c3d1 455
6b6b9c68 456 gcc_assert (!set->insn ()->is_debug_insn ());
9243c3d1 457
6b6b9c68
JZZ
458 if (insn_should_be_added_p (set->insn (), types))
459 insns.add (set);
460 if (set->insn ()->is_phi ())
4f673c5e 461 {
6b6b9c68 462 phi_info *new_phi = as_a<phi_info *> (set);
4f673c5e
JZZ
463 if (!visited_list.contains (new_phi))
464 work_list.safe_push (new_phi);
465 }
466 }
9243c3d1 467 }
4f673c5e
JZZ
468 return insns;
469}
9243c3d1 470
6b6b9c68
JZZ
471static hash_set<set_info *>
472get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
473 bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
474{
475 if (real_p && phi_p && param_p)
476 return get_all_sets (safe_dyn_cast<phi_info *> (set),
477 REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
478
479 else if (real_p && param_p)
480 return get_all_sets (safe_dyn_cast<phi_info *> (set),
481 REAL_SET | BB_HEAD_SET | BB_END_SET);
482
483 else if (real_p)
484 return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
485 return hash_set<set_info *> ();
486}
487
488/* Helper function to get AVL operand. */
489static rtx
490get_avl (rtx_insn *rinsn)
491{
492 if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
493 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
494
495 if (!has_vl_op (rinsn))
496 return NULL_RTX;
497 if (get_attr_avl_type (rinsn) == VLMAX)
498 return RVV_VLMAX;
499 extract_insn_cached (rinsn);
500 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
501}
502
503static set_info *
504get_same_bb_set (hash_set<set_info *> &sets, const basic_block cfg_bb)
505{
506 for (set_info *set : sets)
507 if (set->bb ()->cfg_bb () == cfg_bb)
508 return set;
509 return nullptr;
510}
511
4f673c5e
JZZ
512/* Recursively find all predecessor blocks for cfg_bb. */
513static hash_set<basic_block>
514get_all_predecessors (basic_block cfg_bb)
515{
516 hash_set<basic_block> blocks;
517 auto_vec<basic_block> work_list;
518 hash_set<basic_block> visited_list;
519 work_list.safe_push (cfg_bb);
9243c3d1 520
4f673c5e 521 while (!work_list.is_empty ())
9243c3d1 522 {
4f673c5e
JZZ
523 basic_block new_cfg_bb = work_list.pop ();
524 visited_list.add (new_cfg_bb);
525 edge e;
526 edge_iterator ei;
527 FOR_EACH_EDGE (e, ei, new_cfg_bb->preds)
528 {
529 if (!visited_list.contains (e->src))
530 work_list.safe_push (e->src);
531 blocks.add (e->src);
532 }
9243c3d1 533 }
4f673c5e
JZZ
534 return blocks;
535}
9243c3d1 536
4f673c5e
JZZ
537/* Return true if there is an INSN in insns staying in the block BB. */
538static bool
6b6b9c68 539any_set_in_bb_p (hash_set<set_info *> sets, const bb_info *bb)
4f673c5e 540{
6b6b9c68
JZZ
541 for (const set_info *set : sets)
542 if (set->bb ()->index () == bb->index ())
4f673c5e
JZZ
543 return true;
544 return false;
9243c3d1
JZZ
545}
546
547/* Helper function to get SEW operand. We always have SEW value for
548 all RVV instructions that have VTYPE OP. */
549static uint8_t
550get_sew (rtx_insn *rinsn)
551{
552 return get_attr_sew (rinsn);
553}
554
555/* Helper function to get VLMUL operand. We always have VLMUL value for
556 all RVV instructions that have VTYPE OP. */
557static enum vlmul_type
558get_vlmul (rtx_insn *rinsn)
559{
560 return (enum vlmul_type) get_attr_vlmul (rinsn);
561}
562
563/* Get default tail policy. */
564static bool
565get_default_ta ()
566{
567 /* For the instruction that doesn't require TA, we still need a default value
568 to emit vsetvl. We pick up the default value according to prefer policy. */
569 return (bool) (get_prefer_tail_policy () & 0x1
570 || (get_prefer_tail_policy () >> 1 & 0x1));
571}
572
573/* Get default mask policy. */
574static bool
575get_default_ma ()
576{
577 /* For the instruction that doesn't require MA, we still need a default value
578 to emit vsetvl. We pick up the default value according to prefer policy. */
579 return (bool) (get_prefer_mask_policy () & 0x1
580 || (get_prefer_mask_policy () >> 1 & 0x1));
581}
582
583/* Helper function to get TA operand. */
584static bool
585tail_agnostic_p (rtx_insn *rinsn)
586{
587 /* If it doesn't have TA, we return agnostic by default. */
588 extract_insn_cached (rinsn);
589 int ta = get_attr_ta (rinsn);
590 return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta);
591}
592
593/* Helper function to get MA operand. */
594static bool
595mask_agnostic_p (rtx_insn *rinsn)
596{
597 /* If it doesn't have MA, we return agnostic by default. */
598 extract_insn_cached (rinsn);
599 int ma = get_attr_ma (rinsn);
600 return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
601}
602
603/* Return true if FN has a vector instruction that use VL/VTYPE. */
604static bool
605has_vector_insn (function *fn)
606{
607 basic_block cfg_bb;
608 rtx_insn *rinsn;
609 FOR_ALL_BB_FN (cfg_bb, fn)
610 FOR_BB_INSNS (cfg_bb, rinsn)
611 if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
612 return true;
613 return false;
614}
615
616/* Emit vsetvl instruction. */
617static rtx
e577b91b 618gen_vsetvl_pat (enum vsetvl_type insn_type, const vl_vtype_info &info, rtx vl)
9243c3d1
JZZ
619{
620 rtx avl = info.get_avl ();
04655110
LX
621 /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
622 set the value of avl to (const_int 0) so that VSETVL PASS will
623 insert vsetvl correctly.*/
624 if (info.has_avl_no_reg ())
625 avl = GEN_INT (0);
9243c3d1
JZZ
626 rtx sew = gen_int_mode (info.get_sew (), Pmode);
627 rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode);
628 rtx ta = gen_int_mode (info.get_ta (), Pmode);
629 rtx ma = gen_int_mode (info.get_ma (), Pmode);
630
631 if (insn_type == VSETVL_NORMAL)
632 {
633 gcc_assert (vl != NULL_RTX);
634 return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma);
635 }
636 else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY)
637 return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
638 else
639 return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
640}
641
642static rtx
e577b91b 643gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info)
9243c3d1
JZZ
644{
645 rtx new_pat;
60bd33bc
JZZ
646 vl_vtype_info new_info = info;
647 if (info.get_insn () && info.get_insn ()->rtl ()
648 && fault_first_load_p (info.get_insn ()->rtl ()))
649 new_info.set_avl_info (
650 avl_info (get_avl (info.get_insn ()->rtl ()), nullptr));
9243c3d1
JZZ
651 if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ()))
652 {
653 rtx dest = get_vl (rinsn);
60bd33bc 654 new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, dest);
9243c3d1
JZZ
655 }
656 else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only)
60bd33bc 657 new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, new_info, NULL_RTX);
9243c3d1 658 else
60bd33bc 659 new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
9243c3d1
JZZ
660 return new_pat;
661}
662
663static void
664emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type,
e577b91b 665 const vl_vtype_info &info, rtx vl, rtx_insn *rinsn)
9243c3d1
JZZ
666{
667 rtx pat = gen_vsetvl_pat (insn_type, info, vl);
668 if (dump_file)
669 {
670 fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n");
671 print_rtl_single (dump_file, pat);
672 }
673
674 if (emit_type == EMIT_DIRECT)
675 emit_insn (pat);
676 else if (emit_type == EMIT_BEFORE)
677 emit_insn_before (pat, rinsn);
678 else
679 emit_insn_after (pat, rinsn);
680}
681
682static void
683eliminate_insn (rtx_insn *rinsn)
684{
685 if (dump_file)
686 {
687 fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn));
688 print_rtl_single (dump_file, rinsn);
689 }
690 if (in_sequence_p ())
691 remove_insn (rinsn);
692 else
693 delete_insn (rinsn);
694}
695
a481eed8 696static vsetvl_type
9243c3d1
JZZ
697insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn,
698 const vector_insn_info &info, const vector_insn_info &prev_info)
699{
700 /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
701 VLMAX. */
702 if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
4f673c5e 703 && info.compatible_avl_p (prev_info) && info.same_vlmax_p (prev_info))
9243c3d1
JZZ
704 {
705 emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
706 rinsn);
a481eed8 707 return VSETVL_VTYPE_CHANGE_ONLY;
9243c3d1
JZZ
708 }
709
710 if (info.has_avl_imm ())
711 {
712 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX,
713 rinsn);
a481eed8 714 return VSETVL_DISCARD_RESULT;
9243c3d1
JZZ
715 }
716
717 if (info.has_avl_no_reg ())
718 {
719 /* We can only use x0, x0 if there's no chance of the vtype change causing
720 the previous vl to become invalid. */
721 if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
722 && info.same_vlmax_p (prev_info))
723 {
724 emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
725 rinsn);
a481eed8 726 return VSETVL_VTYPE_CHANGE_ONLY;
9243c3d1
JZZ
727 }
728 /* Otherwise use an AVL of 0 to avoid depending on previous vl. */
729 vl_vtype_info new_info = info;
730 new_info.set_avl_info (avl_info (const0_rtx, nullptr));
731 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX,
732 rinsn);
a481eed8 733 return VSETVL_DISCARD_RESULT;
9243c3d1
JZZ
734 }
735
736 /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the
737 opcode if the AVLReg is X0 as they have different register classes for
738 the AVL operand. */
739 if (vlmax_avl_p (info.get_avl ()))
740 {
741 gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn));
ec99ffab 742 rtx vl_op = info.get_avl_reg_rtx ();
9243c3d1
JZZ
743 gcc_assert (!vlmax_avl_p (vl_op));
744 emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn);
a481eed8 745 return VSETVL_NORMAL;
9243c3d1
JZZ
746 }
747
748 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn);
749
750 if (dump_file)
751 {
752 fprintf (dump_file, "Update VL/VTYPE info, previous info=");
753 prev_info.dump (dump_file);
754 }
a481eed8 755 return VSETVL_DISCARD_RESULT;
9243c3d1
JZZ
756}
757
758/* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
759 to INSN. If such notes are added to an insn which references a
760 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
761 that note, because the following loop optimization pass requires
762 them. */
763
764/* ??? If there was a jump optimization pass after gcse and before loop,
765 then we would not need to do this here, because jump would add the
766 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
767
768static void
27a2a4b6 769add_label_notes (rtx x, rtx_insn *rinsn)
9243c3d1
JZZ
770{
771 enum rtx_code code = GET_CODE (x);
772 int i, j;
773 const char *fmt;
774
775 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
776 {
777 /* This code used to ignore labels that referred to dispatch tables to
778 avoid flow generating (slightly) worse code.
779
780 We no longer ignore such label references (see LABEL_REF handling in
781 mark_jump_label for additional information). */
782
783 /* There's no reason for current users to emit jump-insns with
784 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
785 notes. */
27a2a4b6
JZZ
786 gcc_assert (!JUMP_P (rinsn));
787 add_reg_note (rinsn, REG_LABEL_OPERAND, label_ref_label (x));
9243c3d1
JZZ
788
789 if (LABEL_P (label_ref_label (x)))
790 LABEL_NUSES (label_ref_label (x))++;
791
792 return;
793 }
794
795 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
796 {
797 if (fmt[i] == 'e')
27a2a4b6 798 add_label_notes (XEXP (x, i), rinsn);
9243c3d1
JZZ
799 else if (fmt[i] == 'E')
800 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
27a2a4b6 801 add_label_notes (XVECEXP (x, i, j), rinsn);
9243c3d1
JZZ
802 }
803}
804
805/* Add EXPR to the end of basic block BB.
806
807 This is used by both the PRE and code hoisting. */
808
809static void
810insert_insn_end_basic_block (rtx_insn *rinsn, basic_block cfg_bb)
811{
812 rtx_insn *end_rinsn = BB_END (cfg_bb);
813 rtx_insn *new_insn;
814 rtx_insn *pat, *pat_end;
815
816 pat = rinsn;
817 gcc_assert (pat && INSN_P (pat));
818
819 pat_end = pat;
820 while (NEXT_INSN (pat_end) != NULL_RTX)
821 pat_end = NEXT_INSN (pat_end);
822
823 /* If the last end_rinsn is a jump, insert EXPR in front. Similarly we need
824 to take care of trapping instructions in presence of non-call exceptions.
825 */
826
827 if (JUMP_P (end_rinsn)
828 || (NONJUMP_INSN_P (end_rinsn)
829 && (!single_succ_p (cfg_bb)
830 || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL)))
831 {
832 /* FIXME: What if something in jump uses value set in new end_rinsn? */
833 new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
834 }
835
836 /* Likewise if the last end_rinsn is a call, as will happen in the presence
837 of exception handling. */
838 else if (CALL_P (end_rinsn)
839 && (!single_succ_p (cfg_bb)
840 || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL))
841 {
842 /* Keeping in mind targets with small register classes and parameters
843 in registers, we search backward and place the instructions before
844 the first parameter is loaded. Do this for everyone for consistency
845 and a presumption that we'll get better code elsewhere as well. */
846
847 /* Since different machines initialize their parameter registers
848 in different orders, assume nothing. Collect the set of all
849 parameter registers. */
850 end_rinsn = find_first_parameter_load (end_rinsn, BB_HEAD (cfg_bb));
851
852 /* If we found all the parameter loads, then we want to insert
853 before the first parameter load.
854
855 If we did not find all the parameter loads, then we might have
856 stopped on the head of the block, which could be a CODE_LABEL.
857 If we inserted before the CODE_LABEL, then we would be putting
858 the end_rinsn in the wrong basic block. In that case, put the
859 end_rinsn after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK.
860 */
861 while (LABEL_P (end_rinsn) || NOTE_INSN_BASIC_BLOCK_P (end_rinsn))
862 end_rinsn = NEXT_INSN (end_rinsn);
863
864 new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
865 }
866 else
867 new_insn = emit_insn_after_noloc (pat, end_rinsn, cfg_bb);
868
869 while (1)
870 {
871 if (INSN_P (pat))
872 add_label_notes (PATTERN (pat), new_insn);
873 if (pat == pat_end)
874 break;
875 pat = NEXT_INSN (pat);
876 }
877}
878
879/* Get VL/VTYPE information for INSN. */
880static vl_vtype_info
881get_vl_vtype_info (const insn_info *insn)
882{
9243c3d1
JZZ
883 set_info *set = nullptr;
884 rtx avl = ::get_avl (insn->rtl ());
ec99ffab
JZZ
885 if (avl && REG_P (avl))
886 {
887 if (vlmax_avl_p (avl) && has_vl_op (insn->rtl ()))
888 set
889 = find_access (insn->uses (), REGNO (get_vl (insn->rtl ())))->def ();
890 else if (!vlmax_avl_p (avl))
891 set = find_access (insn->uses (), REGNO (avl))->def ();
892 else
893 set = nullptr;
894 }
9243c3d1
JZZ
895
896 uint8_t sew = get_sew (insn->rtl ());
897 enum vlmul_type vlmul = get_vlmul (insn->rtl ());
898 uint8_t ratio = get_attr_ratio (insn->rtl ());
899 /* when get_attr_ratio is invalid, this kind of instructions
900 doesn't care about ratio. However, we still need this value
901 in demand info backward analysis. */
902 if (ratio == INVALID_ATTRIBUTE)
903 ratio = calculate_ratio (sew, vlmul);
904 bool ta = tail_agnostic_p (insn->rtl ());
905 bool ma = mask_agnostic_p (insn->rtl ());
906
907 /* If merge operand is undef value, we prefer agnostic. */
908 int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
909 if (merge_op_idx != INVALID_ATTRIBUTE
910 && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
911 {
912 ta = true;
913 ma = true;
914 }
915
916 vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma);
917 return info;
918}
919
920static void
921change_insn (rtx_insn *rinsn, rtx new_pat)
922{
923 /* We don't apply change on RTL_SSA here since it's possible a
924 new INSN we add in the PASS before which doesn't have RTL_SSA
925 info yet.*/
926 if (dump_file)
927 {
928 fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
929 INSN_UID (rinsn));
930 print_rtl_single (dump_file, PATTERN (rinsn));
931 }
932
011ba384 933 validate_change (rinsn, &PATTERN (rinsn), new_pat, false);
9243c3d1
JZZ
934
935 if (dump_file)
936 {
937 fprintf (dump_file, "\nto:\n");
938 print_rtl_single (dump_file, PATTERN (rinsn));
939 }
940}
941
60bd33bc
JZZ
942static const insn_info *
943get_forward_read_vl_insn (const insn_info *insn)
944{
945 const bb_info *bb = insn->bb ();
946 for (const insn_info *i = insn->next_nondebug_insn ();
947 real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
948 {
949 if (find_access (i->defs (), VL_REGNUM))
950 return nullptr;
951 if (read_vl_insn_p (i->rtl ()))
952 return i;
953 }
954 return nullptr;
955}
956
957static const insn_info *
958get_backward_fault_first_load_insn (const insn_info *insn)
959{
960 const bb_info *bb = insn->bb ();
961 for (const insn_info *i = insn->prev_nondebug_insn ();
962 real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
963 {
964 if (fault_first_load_p (i->rtl ()))
965 return i;
966 if (find_access (i->defs (), VL_REGNUM))
967 return nullptr;
968 }
969 return nullptr;
970}
971
9243c3d1
JZZ
972static bool
973change_insn (function_info *ssa, insn_change change, insn_info *insn,
974 rtx new_pat)
975{
976 rtx_insn *rinsn = insn->rtl ();
977 auto attempt = ssa->new_change_attempt ();
978 if (!restrict_movement (change))
979 return false;
980
981 if (dump_file)
982 {
983 fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
984 INSN_UID (rinsn));
985 print_rtl_single (dump_file, PATTERN (rinsn));
9243c3d1
JZZ
986 }
987
988 insn_change_watermark watermark;
989 validate_change (rinsn, &PATTERN (rinsn), new_pat, true);
990
991 /* These routines report failures themselves. */
992 if (!recog (attempt, change) || !change_is_worthwhile (change, false))
993 return false;
a1e42094
JZZ
994
995 /* Fix bug:
996 (insn 12 34 13 2 (set (reg:VNx8DI 120 v24 [orig:134 _1 ] [134])
997 (if_then_else:VNx8DI (unspec:VNx8BI [
998 (const_vector:VNx8BI repeat [
999 (const_int 1 [0x1])
1000 ])
1001 (const_int 0 [0])
1002 (const_int 2 [0x2]) repeated x2
1003 (const_int 0 [0])
1004 (reg:SI 66 vl)
1005 (reg:SI 67 vtype)
1006 ] UNSPEC_VPREDICATE)
1007 (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137])
1008 (sign_extend:VNx8DI (vec_duplicate:VNx8SI (reg:SI 15 a5
1009 [140])))) (unspec:VNx8DI [ (const_int 0 [0]) ] UNSPEC_VUNDEF))) "rvv.c":8:12
1010 2784 {pred_single_widen_addsvnx8di_scalar} (expr_list:REG_EQUIV
1011 (mem/c:VNx8DI (reg:DI 10 a0 [142]) [1 <retval>+0 S[64, 64] A128])
1012 (expr_list:REG_EQUAL (if_then_else:VNx8DI (unspec:VNx8BI [
1013 (const_vector:VNx8BI repeat [
1014 (const_int 1 [0x1])
1015 ])
1016 (reg/v:DI 13 a3 [orig:139 vl ] [139])
1017 (const_int 2 [0x2]) repeated x2
1018 (const_int 0 [0])
1019 (reg:SI 66 vl)
1020 (reg:SI 67 vtype)
1021 ] UNSPEC_VPREDICATE)
1022 (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137])
1023 (const_vector:VNx8DI repeat [
1024 (const_int 2730 [0xaaa])
1025 ]))
1026 (unspec:VNx8DI [
1027 (const_int 0 [0])
1028 ] UNSPEC_VUNDEF))
1029 (nil))))
1030 Here we want to remove use "a3". However, the REG_EQUAL/REG_EQUIV note use
1031 "a3" which made us fail in change_insn. We reference to the
1032 'aarch64-cc-fusion.cc' and add this method. */
1033 remove_reg_equal_equiv_notes (rinsn);
9243c3d1
JZZ
1034 confirm_change_group ();
1035 ssa->change_insn (change);
1036
1037 if (dump_file)
1038 {
1039 fprintf (dump_file, "\nto:\n");
1040 print_rtl_single (dump_file, PATTERN (rinsn));
9243c3d1
JZZ
1041 }
1042 return true;
1043}
1044
aef20243
JZZ
1045static void
1046change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info)
1047{
1048 rtx_insn *rinsn;
1049 if (vector_config_insn_p (insn->rtl ()))
1050 {
1051 rinsn = insn->rtl ();
1052 gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet");
1053 }
1054 else
1055 {
1056 gcc_assert (has_vtype_op (insn->rtl ()));
1057 rinsn = PREV_INSN (insn->rtl ());
1058 gcc_assert (vector_config_insn_p (rinsn));
1059 }
1060 rtx new_pat = gen_vsetvl_pat (rinsn, info);
1061 change_insn (rinsn, new_pat);
1062}
1063
69f39144
JZ
1064static bool
1065avl_source_has_vsetvl_p (set_info *avl_source)
1066{
1067 if (!avl_source)
1068 return false;
1069 if (!avl_source->insn ())
1070 return false;
1071 if (avl_source->insn ()->is_real ())
1072 return vsetvl_insn_p (avl_source->insn ()->rtl ());
1073 hash_set<set_info *> sets = get_all_sets (avl_source, true, false, true);
1074 for (const auto set : sets)
1075 {
1076 if (set->insn ()->is_real () && vsetvl_insn_p (set->insn ()->rtl ()))
1077 return true;
1078 }
1079 return false;
1080}
1081
4f673c5e 1082static bool
6b6b9c68 1083source_equal_p (insn_info *insn1, insn_info *insn2)
4f673c5e 1084{
6b6b9c68
JZZ
1085 if (!insn1 || !insn2)
1086 return false;
1087 rtx_insn *rinsn1 = insn1->rtl ();
1088 rtx_insn *rinsn2 = insn2->rtl ();
4f673c5e
JZZ
1089 if (!rinsn1 || !rinsn2)
1090 return false;
1091 rtx note1 = find_reg_equal_equiv_note (rinsn1);
1092 rtx note2 = find_reg_equal_equiv_note (rinsn2);
1093 rtx single_set1 = single_set (rinsn1);
1094 rtx single_set2 = single_set (rinsn2);
60bd33bc
JZZ
1095 if (read_vl_insn_p (rinsn1) && read_vl_insn_p (rinsn2))
1096 {
1097 const insn_info *load1 = get_backward_fault_first_load_insn (insn1);
1098 const insn_info *load2 = get_backward_fault_first_load_insn (insn2);
1099 return load1 && load2 && load1 == load2;
1100 }
4f673c5e
JZZ
1101
1102 if (note1 && note2 && rtx_equal_p (note1, note2))
1103 return true;
6b6b9c68
JZZ
1104
1105 /* Since vsetvl instruction is not single SET.
1106 We handle this case specially here. */
1107 if (vsetvl_insn_p (insn1->rtl ()) && vsetvl_insn_p (insn2->rtl ()))
1108 {
1109 /* For example:
1110 vsetvl1 a6,a5,e32m1
1111 RVV 1 (use a6 as AVL)
1112 vsetvl2 a5,a5,e8mf4
1113 RVV 2 (use a5 as AVL)
1114 We consider AVL of RVV 1 and RVV 2 are same so that we can
1115 gain more optimization opportunities.
1116
1117 Note: insn1_info.compatible_avl_p (insn2_info)
1118 will make sure there is no instruction between vsetvl1 and vsetvl2
1119 modify a5 since their def will be different if there is instruction
1120 modify a5 and compatible_avl_p will return false. */
1121 vector_insn_info insn1_info, insn2_info;
1122 insn1_info.parse_insn (insn1);
1123 insn2_info.parse_insn (insn2);
69f39144
JZ
1124
1125 /* To avoid dead loop, we don't optimize a vsetvli def has vsetvli
1126 instructions which will complicate the situation. */
1127 if (avl_source_has_vsetvl_p (insn1_info.get_avl_source ())
1128 || avl_source_has_vsetvl_p (insn2_info.get_avl_source ()))
1129 return false;
1130
6b6b9c68
JZZ
1131 if (insn1_info.same_vlmax_p (insn2_info)
1132 && insn1_info.compatible_avl_p (insn2_info))
1133 return true;
1134 }
1135
1136 /* We only handle AVL is set by instructions with no side effects. */
1137 if (!single_set1 || !single_set2)
1138 return false;
1139 if (!rtx_equal_p (SET_SRC (single_set1), SET_SRC (single_set2)))
1140 return false;
e9fd9efc
JZ
1141 /* RTL_SSA uses include REG_NOTE. Consider this following case:
1142
1143 insn1 RTL:
1144 (insn 41 39 42 4 (set (reg:DI 26 s10 [orig:159 loop_len_46 ] [159])
1145 (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201])
1146 (reg:DI 14 a4 [276]))) 408 {*umindi3}
1147 (expr_list:REG_EQUAL (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201])
1148 (const_int 2 [0x2]))
1149 (nil)))
1150 The RTL_SSA uses of this instruction has 2 uses:
1151 1. (reg:DI 15 a5 [orig:201 _149 ] [201]) - twice.
1152 2. (reg:DI 14 a4 [276]) - once.
1153
1154 insn2 RTL:
1155 (insn 38 353 351 4 (set (reg:DI 27 s11 [orig:160 loop_len_47 ] [160])
1156 (umin:DI (reg:DI 15 a5 [orig:199 _146 ] [199])
1157 (reg:DI 14 a4 [276]))) 408 {*umindi3}
1158 (expr_list:REG_EQUAL (umin:DI (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200])
1159 (const_int 2 [0x2]))
1160 (nil)))
1161 The RTL_SSA uses of this instruction has 3 uses:
1162 1. (reg:DI 15 a5 [orig:199 _146 ] [199]) - once
1163 2. (reg:DI 14 a4 [276]) - once
1164 3. (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200]) - once
1165
1166 Return false when insn1->uses ().size () != insn2->uses ().size ()
1167 */
1168 if (insn1->uses ().size () != insn2->uses ().size ())
1169 return false;
6b6b9c68
JZZ
1170 for (size_t i = 0; i < insn1->uses ().size (); i++)
1171 if (insn1->uses ()[i] != insn2->uses ()[i])
1172 return false;
1173 return true;
4f673c5e
JZZ
1174}
1175
1176/* Helper function to get single same real RTL source.
1177 return NULL if it is not a single real RTL source. */
6b6b9c68 1178static insn_info *
4f673c5e
JZZ
1179extract_single_source (set_info *set)
1180{
1181 if (!set)
1182 return nullptr;
1183 if (set->insn ()->is_real ())
6b6b9c68 1184 return set->insn ();
4f673c5e
JZZ
1185 if (!set->insn ()->is_phi ())
1186 return nullptr;
6b6b9c68 1187 hash_set<set_info *> sets = get_all_sets (set, true, false, true);
4f673c5e 1188
6b6b9c68 1189 insn_info *first_insn = (*sets.begin ())->insn ();
4f673c5e
JZZ
1190 if (first_insn->is_artificial ())
1191 return nullptr;
6b6b9c68 1192 for (const set_info *set : sets)
4f673c5e
JZZ
1193 {
1194 /* If there is a head or end insn, we conservative return
1195 NULL so that VSETVL PASS will insert vsetvl directly. */
6b6b9c68 1196 if (set->insn ()->is_artificial ())
4f673c5e 1197 return nullptr;
6b6b9c68 1198 if (!source_equal_p (set->insn (), first_insn))
4f673c5e
JZZ
1199 return nullptr;
1200 }
1201
6b6b9c68 1202 return first_insn;
4f673c5e
JZZ
1203}
1204
ec99ffab
JZZ
1205static unsigned
1206calculate_sew (vlmul_type vlmul, unsigned int ratio)
1207{
1208 for (const unsigned sew : ALL_SEW)
1209 if (calculate_ratio (sew, vlmul) == ratio)
1210 return sew;
1211 return 0;
1212}
1213
1214static vlmul_type
1215calculate_vlmul (unsigned int sew, unsigned int ratio)
1216{
1217 for (const vlmul_type vlmul : ALL_LMUL)
1218 if (calculate_ratio (sew, vlmul) == ratio)
1219 return vlmul;
1220 return LMUL_RESERVED;
1221}
1222
1223static bool
1224incompatible_avl_p (const vector_insn_info &info1,
1225 const vector_insn_info &info2)
1226{
1227 return !info1.compatible_avl_p (info2) && !info2.compatible_avl_p (info1);
1228}
1229
1230static bool
1231different_sew_p (const vector_insn_info &info1, const vector_insn_info &info2)
1232{
1233 return info1.get_sew () != info2.get_sew ();
1234}
1235
1236static bool
1237different_lmul_p (const vector_insn_info &info1, const vector_insn_info &info2)
1238{
1239 return info1.get_vlmul () != info2.get_vlmul ();
1240}
1241
1242static bool
1243different_ratio_p (const vector_insn_info &info1, const vector_insn_info &info2)
1244{
1245 return info1.get_ratio () != info2.get_ratio ();
1246}
1247
1248static bool
1249different_tail_policy_p (const vector_insn_info &info1,
1250 const vector_insn_info &info2)
1251{
1252 return info1.get_ta () != info2.get_ta ();
1253}
1254
1255static bool
1256different_mask_policy_p (const vector_insn_info &info1,
1257 const vector_insn_info &info2)
1258{
1259 return info1.get_ma () != info2.get_ma ();
1260}
1261
1262static bool
1263possible_zero_avl_p (const vector_insn_info &info1,
1264 const vector_insn_info &info2)
1265{
1266 return !info1.has_non_zero_avl () || !info2.has_non_zero_avl ();
1267}
1268
ec99ffab
JZZ
1269static bool
1270second_ratio_invalid_for_first_sew_p (const vector_insn_info &info1,
1271 const vector_insn_info &info2)
1272{
1273 return calculate_vlmul (info1.get_sew (), info2.get_ratio ())
1274 == LMUL_RESERVED;
1275}
1276
1277static bool
1278second_ratio_invalid_for_first_lmul_p (const vector_insn_info &info1,
1279 const vector_insn_info &info2)
1280{
1281 return calculate_sew (info1.get_vlmul (), info2.get_ratio ()) == 0;
1282}
1283
1284static bool
1285second_sew_less_than_first_sew_p (const vector_insn_info &info1,
1286 const vector_insn_info &info2)
1287{
1288 return info2.get_sew () < info1.get_sew ();
1289}
1290
1291static bool
1292first_sew_less_than_second_sew_p (const vector_insn_info &info1,
1293 const vector_insn_info &info2)
1294{
1295 return info1.get_sew () < info2.get_sew ();
1296}
1297
1298/* return 0 if LMUL1 == LMUL2.
1299 return -1 if LMUL1 < LMUL2.
1300 return 1 if LMUL1 > LMUL2. */
1301static int
1302compare_lmul (vlmul_type vlmul1, vlmul_type vlmul2)
1303{
1304 if (vlmul1 == vlmul2)
1305 return 0;
1306
1307 switch (vlmul1)
1308 {
1309 case LMUL_1:
1310 if (vlmul2 == LMUL_2 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1311 return 1;
1312 else
1313 return -1;
1314 case LMUL_2:
1315 if (vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1316 return 1;
1317 else
1318 return -1;
1319 case LMUL_4:
1320 if (vlmul2 == LMUL_8)
1321 return 1;
1322 else
1323 return -1;
1324 case LMUL_8:
1325 return -1;
1326 case LMUL_F2:
1327 if (vlmul2 == LMUL_1 || vlmul2 == LMUL_2 || vlmul2 == LMUL_4
1328 || vlmul2 == LMUL_8)
1329 return 1;
1330 else
1331 return -1;
1332 case LMUL_F4:
1333 if (vlmul2 == LMUL_F2 || vlmul2 == LMUL_1 || vlmul2 == LMUL_2
1334 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1335 return 1;
1336 else
1337 return -1;
1338 case LMUL_F8:
1339 return 0;
1340 default:
1341 gcc_unreachable ();
1342 }
1343}
1344
1345static bool
1346second_lmul_less_than_first_lmul_p (const vector_insn_info &info1,
1347 const vector_insn_info &info2)
1348{
1349 return compare_lmul (info2.get_vlmul (), info1.get_vlmul ()) == -1;
1350}
1351
ec99ffab
JZZ
1352static bool
1353second_ratio_less_than_first_ratio_p (const vector_insn_info &info1,
1354 const vector_insn_info &info2)
1355{
1356 return info2.get_ratio () < info1.get_ratio ();
1357}
1358
1359static CONSTEXPR const demands_cond incompatible_conds[] = {
1360#define DEF_INCOMPATIBLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, \
1361 GE_SEW1, TAIL_POLICTY1, MASK_POLICY1, AVL2, \
1362 SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, \
1363 TAIL_POLICTY2, MASK_POLICY2, COND) \
1364 {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
1365 MASK_POLICY1}, \
1366 {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1367 MASK_POLICY2}}, \
1368 COND},
1369#include "riscv-vsetvl.def"
1370};
1371
1372static unsigned
1373greatest_sew (const vector_insn_info &info1, const vector_insn_info &info2)
1374{
1375 return std::max (info1.get_sew (), info2.get_sew ());
1376}
1377
1378static unsigned
1379first_sew (const vector_insn_info &info1, const vector_insn_info &)
1380{
1381 return info1.get_sew ();
1382}
1383
1384static unsigned
1385second_sew (const vector_insn_info &, const vector_insn_info &info2)
1386{
1387 return info2.get_sew ();
1388}
1389
1390static vlmul_type
1391first_vlmul (const vector_insn_info &info1, const vector_insn_info &)
1392{
1393 return info1.get_vlmul ();
1394}
1395
1396static vlmul_type
1397second_vlmul (const vector_insn_info &, const vector_insn_info &info2)
1398{
1399 return info2.get_vlmul ();
1400}
1401
1402static unsigned
1403first_ratio (const vector_insn_info &info1, const vector_insn_info &)
1404{
1405 return info1.get_ratio ();
1406}
1407
1408static unsigned
1409second_ratio (const vector_insn_info &, const vector_insn_info &info2)
1410{
1411 return info2.get_ratio ();
1412}
1413
1414static vlmul_type
1415vlmul_for_first_sew_second_ratio (const vector_insn_info &info1,
1416 const vector_insn_info &info2)
1417{
1418 return calculate_vlmul (info1.get_sew (), info2.get_ratio ());
1419}
1420
1421static unsigned
1422ratio_for_second_sew_first_vlmul (const vector_insn_info &info1,
1423 const vector_insn_info &info2)
1424{
1425 return calculate_ratio (info2.get_sew (), info1.get_vlmul ());
1426}
1427
1428static CONSTEXPR const demands_fuse_rule fuse_rules[] = {
1429#define DEF_SEW_LMUL_FUSE_RULE(DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, \
1430 DEMAND_GE_SEW1, DEMAND_SEW2, DEMAND_LMUL2, \
1431 DEMAND_RATIO2, DEMAND_GE_SEW2, NEW_DEMAND_SEW, \
1432 NEW_DEMAND_LMUL, NEW_DEMAND_RATIO, \
1433 NEW_DEMAND_GE_SEW, NEW_SEW, NEW_VLMUL, \
1434 NEW_RATIO) \
1435 {{{DEMAND_ANY, DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, DEMAND_ANY, \
1436 DEMAND_GE_SEW1, DEMAND_ANY, DEMAND_ANY}, \
1437 {DEMAND_ANY, DEMAND_SEW2, DEMAND_LMUL2, DEMAND_RATIO2, DEMAND_ANY, \
1438 DEMAND_GE_SEW2, DEMAND_ANY, DEMAND_ANY}}, \
1439 NEW_DEMAND_SEW, \
1440 NEW_DEMAND_LMUL, \
1441 NEW_DEMAND_RATIO, \
1442 NEW_DEMAND_GE_SEW, \
1443 NEW_SEW, \
1444 NEW_VLMUL, \
1445 NEW_RATIO},
1446#include "riscv-vsetvl.def"
1447};
1448
1449static bool
1450always_unavailable (const vector_insn_info &, const vector_insn_info &)
1451{
1452 return true;
1453}
1454
1455static bool
1456avl_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2)
1457{
1458 return !info2.compatible_avl_p (info1.get_avl_info ());
1459}
1460
1461static bool
1462sew_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2)
1463{
1464 if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO))
1465 {
1466 if (info2.demand_p (DEMAND_GE_SEW))
1467 return info1.get_sew () < info2.get_sew ();
1468 return info1.get_sew () != info2.get_sew ();
1469 }
1470 return true;
1471}
1472
1473static bool
1474lmul_unavailable_p (const vector_insn_info &info1,
1475 const vector_insn_info &info2)
1476{
1477 if (info1.get_vlmul () == info2.get_vlmul () && !info2.demand_p (DEMAND_SEW)
1478 && !info2.demand_p (DEMAND_RATIO))
1479 return false;
1480 return true;
1481}
1482
1483static bool
1484ge_sew_unavailable_p (const vector_insn_info &info1,
1485 const vector_insn_info &info2)
1486{
1487 if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO)
1488 && info2.demand_p (DEMAND_GE_SEW))
1489 return info1.get_sew () < info2.get_sew ();
1490 return true;
1491}
1492
1493static bool
1494ge_sew_lmul_unavailable_p (const vector_insn_info &info1,
1495 const vector_insn_info &info2)
1496{
1497 if (!info2.demand_p (DEMAND_RATIO) && info2.demand_p (DEMAND_GE_SEW))
1498 return info1.get_sew () < info2.get_sew ();
1499 return true;
1500}
1501
1502static bool
1503ge_sew_ratio_unavailable_p (const vector_insn_info &info1,
1504 const vector_insn_info &info2)
1505{
1506 if (!info2.demand_p (DEMAND_LMUL) && info2.demand_p (DEMAND_GE_SEW))
1507 return info1.get_sew () < info2.get_sew ();
1508 return true;
1509}
1510
1511static CONSTEXPR const demands_cond unavailable_conds[] = {
1512#define DEF_UNAVAILABLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, \
1513 TAIL_POLICTY1, MASK_POLICY1, AVL2, SEW2, LMUL2, \
1514 RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1515 MASK_POLICY2, COND) \
1516 {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
1517 MASK_POLICY1}, \
1518 {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1519 MASK_POLICY2}}, \
1520 COND},
1521#include "riscv-vsetvl.def"
1522};
1523
1524static bool
1525same_sew_lmul_demand_p (const bool *dems1, const bool *dems2)
1526{
1527 return dems1[DEMAND_SEW] == dems2[DEMAND_SEW]
1528 && dems1[DEMAND_LMUL] == dems2[DEMAND_LMUL]
1529 && dems1[DEMAND_RATIO] == dems2[DEMAND_RATIO] && !dems1[DEMAND_GE_SEW]
1530 && !dems2[DEMAND_GE_SEW];
1531}
1532
1533static bool
1534propagate_avl_across_demands_p (const vector_insn_info &info1,
1535 const vector_insn_info &info2)
1536{
1537 if (info2.demand_p (DEMAND_AVL))
1538 {
1539 if (info2.demand_p (DEMAND_NONZERO_AVL))
1540 return info1.demand_p (DEMAND_AVL)
1541 && !info1.demand_p (DEMAND_NONZERO_AVL) && info1.has_avl_reg ();
1542 }
1543 else
1544 return info1.demand_p (DEMAND_AVL) && info1.has_avl_reg ();
1545 return false;
1546}
1547
1548static bool
a481eed8 1549reg_available_p (const insn_info *insn, const vector_insn_info &info)
ec99ffab 1550{
a481eed8 1551 if (info.has_avl_reg () && !info.get_avl_source ())
44c918b5 1552 return false;
a481eed8
JZZ
1553 insn_info *def_insn = info.get_avl_source ()->insn ();
1554 if (def_insn->bb () == insn->bb ())
1555 return before_p (def_insn, insn);
ec99ffab 1556 else
a481eed8
JZZ
1557 return dominated_by_p (CDI_DOMINATORS, insn->bb ()->cfg_bb (),
1558 def_insn->bb ()->cfg_bb ());
ec99ffab
JZZ
1559}
1560
60bd33bc
JZZ
1561/* Return true if the instruction support relaxed compatible check. */
1562static bool
1563support_relaxed_compatible_p (const vector_insn_info &info1,
1564 const vector_insn_info &info2)
1565{
1566 if (fault_first_load_p (info1.get_insn ()->rtl ())
1567 && info2.demand_p (DEMAND_AVL) && info2.has_avl_reg ()
1568 && info2.get_avl_source () && info2.get_avl_source ()->insn ()->is_phi ())
1569 {
1570 hash_set<set_info *> sets
1571 = get_all_sets (info2.get_avl_source (), true, false, false);
1572 for (set_info *set : sets)
1573 {
1574 if (read_vl_insn_p (set->insn ()->rtl ()))
1575 {
1576 const insn_info *insn
1577 = get_backward_fault_first_load_insn (set->insn ());
1578 if (insn == info1.get_insn ())
1579 return info2.compatible_vtype_p (info1);
1580 }
1581 }
1582 }
1583 return false;
1584}
1585
1586/* Return true if the block is worthwhile backward propagation. */
1587static bool
1588backward_propagate_worthwhile_p (const basic_block cfg_bb,
1589 const vector_block_info block_info)
1590{
1591 if (loop_basic_block_p (cfg_bb))
1592 {
1593 if (block_info.reaching_out.valid_or_dirty_p ())
1594 {
1595 if (block_info.local_dem.compatible_p (block_info.reaching_out))
1596 {
1597 /* Case 1 (Can backward propagate):
1598 ....
1599 bb0:
1600 ...
1601 for (int i = 0; i < n; i++)
1602 {
1603 vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
1604 __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
1605 }
1606 The local_dem is compatible with reaching_out. Such case is
1607 worthwhile backward propagation. */
1608 return true;
1609 }
1610 else
1611 {
1612 if (support_relaxed_compatible_p (block_info.reaching_out,
1613 block_info.local_dem))
1614 return true;
1615 /* Case 2 (Don't backward propagate):
1616 ....
1617 bb0:
1618 ...
1619 for (int i = 0; i < n; i++)
1620 {
1621 vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
1622 __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
1623 vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8);
1624 __riscv_vse16_v_i16mf2 (out + i + 6, v, 8);
1625 }
1626 The local_dem is incompatible with reaching_out.
1627 It makes no sense to backward propagate the local_dem since we
1628 can't avoid VSETVL inside the loop. */
1629 return false;
1630 }
1631 }
1632 else
1633 {
1634 gcc_assert (block_info.reaching_out.unknown_p ());
1635 /* Case 3 (Don't backward propagate):
1636 ....
1637 bb0:
1638 ...
1639 for (int i = 0; i < n; i++)
1640 {
1641 vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
1642 __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
1643 fn3 ();
1644 }
1645 The local_dem is VALID, but the reaching_out is UNKNOWN.
1646 It makes no sense to backward propagate the local_dem since we
1647 can't avoid VSETVL inside the loop. */
1648 return false;
1649 }
1650 }
1651
1652 return true;
1653}
1654
a2d12abe
JZZ
1655/* Count the number of REGNO in RINSN. */
1656static int
1657count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
1658{
1659 int count = 0;
1660 extract_insn (rinsn);
1661 for (int i = 0; i < recog_data.n_operands; i++)
1662 if (refers_to_regno_p (regno, recog_data.operand[i]))
1663 count++;
1664 return count;
1665}
1666
12b23c71
JZZ
1667avl_info::avl_info (const avl_info &other)
1668{
1669 m_value = other.get_value ();
1670 m_source = other.get_source ();
1671}
1672
9243c3d1
JZZ
1673avl_info::avl_info (rtx value_in, set_info *source_in)
1674 : m_value (value_in), m_source (source_in)
1675{}
1676
4f673c5e
JZZ
1677bool
1678avl_info::single_source_equal_p (const avl_info &other) const
1679{
1680 set_info *set1 = m_source;
1681 set_info *set2 = other.get_source ();
6b6b9c68
JZZ
1682 insn_info *insn1 = extract_single_source (set1);
1683 insn_info *insn2 = extract_single_source (set2);
1684 if (!insn1 || !insn2)
1685 return false;
1686 return source_equal_p (insn1, insn2);
1687}
1688
1689bool
1690avl_info::multiple_source_equal_p (const avl_info &other) const
1691{
d875d756
JZ
1692 /* When the def info is same in RTL_SSA namespace, it's safe
1693 to consider they are avl compatible. */
1694 if (m_source == other.get_source ())
1695 return true;
6b6b9c68 1696
d875d756
JZ
1697 /* We only consider handle PHI node. */
1698 if (!m_source->insn ()->is_phi () || !other.get_source ()->insn ()->is_phi ())
1699 return false;
6b6b9c68 1700
d875d756
JZ
1701 phi_info *phi1 = as_a<phi_info *> (m_source);
1702 phi_info *phi2 = as_a<phi_info *> (other.get_source ());
6b6b9c68 1703
d875d756
JZ
1704 if (phi1->is_degenerate () && phi2->is_degenerate ())
1705 {
1706 /* Degenerate PHI means the PHI node only have one input. */
1707
1708 /* If both PHI nodes have the same single input in use list.
1709 We consider they are AVL compatible. */
1710 if (phi1->input_value (0) == phi2->input_value (0))
1711 return true;
1712 }
1713 /* TODO: We can support more optimization cases in the future. */
1714 return false;
4f673c5e
JZZ
1715}
1716
9243c3d1
JZZ
1717avl_info &
1718avl_info::operator= (const avl_info &other)
1719{
1720 m_value = other.get_value ();
1721 m_source = other.get_source ();
1722 return *this;
1723}
1724
1725bool
1726avl_info::operator== (const avl_info &other) const
1727{
1728 if (!m_value)
1729 return !other.get_value ();
1730 if (!other.get_value ())
1731 return false;
1732
9243c3d1
JZZ
1733 if (GET_CODE (m_value) != GET_CODE (other.get_value ()))
1734 return false;
1735
1736 /* Handle CONST_INT AVL. */
1737 if (CONST_INT_P (m_value))
1738 return INTVAL (m_value) == INTVAL (other.get_value ());
1739
1740 /* Handle VLMAX AVL. */
1741 if (vlmax_avl_p (m_value))
1742 return vlmax_avl_p (other.get_value ());
1743
4f673c5e
JZZ
1744 /* If any source is undef value, we think they are not equal. */
1745 if (!m_source || !other.get_source ())
1746 return false;
1747
1748 /* If both sources are single source (defined by a single real RTL)
1749 and their definitions are same. */
1750 if (single_source_equal_p (other))
1751 return true;
1752
6b6b9c68 1753 return multiple_source_equal_p (other);
9243c3d1
JZZ
1754}
1755
1756bool
1757avl_info::operator!= (const avl_info &other) const
1758{
1759 return !(*this == other);
1760}
1761
ec99ffab
JZZ
1762bool
1763avl_info::has_non_zero_avl () const
1764{
1765 if (has_avl_imm ())
1766 return INTVAL (get_value ()) > 0;
1767 if (has_avl_reg ())
1768 return vlmax_avl_p (get_value ());
1769 return false;
1770}
1771
9243c3d1
JZZ
1772/* Initialize VL/VTYPE information. */
1773vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in,
1774 enum vlmul_type vlmul_in, uint8_t ratio_in,
1775 bool ta_in, bool ma_in)
1776 : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in),
1777 m_ta (ta_in), m_ma (ma_in)
1778{
1779 gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW");
1780}
1781
1782bool
1783vl_vtype_info::operator== (const vl_vtype_info &other) const
1784{
6b6b9c68 1785 return same_avl_p (other) && m_sew == other.get_sew ()
9243c3d1
JZZ
1786 && m_vlmul == other.get_vlmul () && m_ta == other.get_ta ()
1787 && m_ma == other.get_ma () && m_ratio == other.get_ratio ();
1788}
1789
1790bool
1791vl_vtype_info::operator!= (const vl_vtype_info &other) const
1792{
1793 return !(*this == other);
1794}
1795
9243c3d1
JZZ
1796bool
1797vl_vtype_info::same_avl_p (const vl_vtype_info &other) const
1798{
6b6b9c68
JZZ
1799 /* We need to compare both RTL and SET. If both AVL are CONST_INT.
1800 For example, const_int 3 and const_int 4, we need to compare
1801 RTL. If both AVL are REG and their REGNO are same, we need to
1802 compare SET. */
1803 return get_avl () == other.get_avl ()
1804 && get_avl_source () == other.get_avl_source ();
9243c3d1
JZZ
1805}
1806
1807bool
1808vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const
1809{
1810 return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul ()
1811 && get_ta () == other.get_ta () && get_ma () == other.get_ma ();
1812}
1813
1814bool
1815vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const
1816{
1817 return get_ratio () == other.get_ratio ();
1818}
1819
1820/* Compare the compatibility between Dem1 and Dem2.
1821 If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2
1822 meaning Dem1 is easier be compatible with others than Dem2
1823 or Dem2 is stricter than Dem1.
1824 For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */
9243c3d1
JZZ
1825bool
1826vector_insn_info::operator>= (const vector_insn_info &other) const
1827{
60bd33bc
JZZ
1828 if (support_relaxed_compatible_p (*this, other))
1829 {
1830 unsigned array_size = sizeof (unavailable_conds) / sizeof (demands_cond);
1831 /* Bypass AVL unavailable cases. */
1832 for (unsigned i = 2; i < array_size; i++)
1833 if (unavailable_conds[i].pair.match_cond_p (this->get_demands (),
1834 other.get_demands ())
1835 && unavailable_conds[i].incompatible_p (*this, other))
1836 return false;
1837 return true;
1838 }
1839
1840 if (!other.compatible_p (static_cast<const vl_vtype_info &> (*this)))
1841 return false;
1842 if (!this->compatible_p (static_cast<const vl_vtype_info &> (other)))
9243c3d1
JZZ
1843 return true;
1844
1845 if (*this == other)
1846 return true;
1847
ec99ffab
JZZ
1848 for (const auto &cond : unavailable_conds)
1849 if (cond.pair.match_cond_p (this->get_demands (), other.get_demands ())
1850 && cond.incompatible_p (*this, other))
1851 return false;
9243c3d1
JZZ
1852
1853 return true;
1854}
1855
1856bool
1857vector_insn_info::operator== (const vector_insn_info &other) const
1858{
1859 gcc_assert (!uninit_p () && !other.uninit_p ()
1860 && "Uninitialization should not happen");
1861
1862 /* Empty is only equal to another Empty. */
1863 if (empty_p ())
1864 return other.empty_p ();
1865 if (other.empty_p ())
1866 return empty_p ();
1867
1868 /* Unknown is only equal to another Unknown. */
1869 if (unknown_p ())
1870 return other.unknown_p ();
1871 if (other.unknown_p ())
1872 return unknown_p ();
1873
1874 for (size_t i = 0; i < NUM_DEMAND; i++)
1875 if (m_demands[i] != other.demand_p ((enum demand_type) i))
1876 return false;
1877
7ae4d1df
JZZ
1878 if (vector_config_insn_p (m_insn->rtl ())
1879 || vector_config_insn_p (other.get_insn ()->rtl ()))
1880 if (m_insn != other.get_insn ())
1881 return false;
9243c3d1
JZZ
1882
1883 if (!same_avl_p (other))
1884 return false;
1885
1886 /* If the full VTYPE is valid, check that it is the same. */
1887 return same_vtype_p (other);
1888}
1889
1890void
1891vector_insn_info::parse_insn (rtx_insn *rinsn)
1892{
1893 *this = vector_insn_info ();
1894 if (!NONDEBUG_INSN_P (rinsn))
1895 return;
1896 if (!has_vtype_op (rinsn))
1897 return;
1898 m_state = VALID;
1899 extract_insn_cached (rinsn);
1900 const rtx avl = recog_data.operand[get_attr_vl_op_idx (rinsn)];
1901 m_avl = avl_info (avl, nullptr);
1902 m_sew = ::get_sew (rinsn);
1903 m_vlmul = ::get_vlmul (rinsn);
1904 m_ta = tail_agnostic_p (rinsn);
1905 m_ma = mask_agnostic_p (rinsn);
1906}
1907
1908void
1909vector_insn_info::parse_insn (insn_info *insn)
1910{
1911 *this = vector_insn_info ();
1912
1913 /* Return if it is debug insn for the consistency with optimize == 0. */
1914 if (insn->is_debug_insn ())
1915 return;
1916
1917 /* We set it as unknown since we don't what will happen in CALL or ASM. */
1918 if (insn->is_call () || insn->is_asm ())
1919 {
1920 set_unknown ();
1921 return;
1922 }
1923
1924 /* If this is something that updates VL/VTYPE that we don't know about, set
1925 the state to unknown. */
60bd33bc 1926 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
9243c3d1
JZZ
1927 && (find_access (insn->defs (), VL_REGNUM)
1928 || find_access (insn->defs (), VTYPE_REGNUM)))
1929 {
1930 set_unknown ();
1931 return;
1932 }
1933
1934 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
1935 return;
1936
1937 /* Warning: This function has to work on both the lowered (i.e. post
1938 emit_local_forward_vsetvls) and pre-lowering forms. The main implication
1939 of this is that it can't use the value of a SEW, VL, or Policy operand as
1940 they might be stale after lowering. */
1941 vl_vtype_info::operator= (get_vl_vtype_info (insn));
1942 m_insn = insn;
1943 m_state = VALID;
1944 if (vector_config_insn_p (insn->rtl ()))
1945 {
1946 m_demands[DEMAND_AVL] = true;
1947 m_demands[DEMAND_RATIO] = true;
1948 return;
1949 }
1950
1951 if (has_vl_op (insn->rtl ()))
1952 m_demands[DEMAND_AVL] = true;
1953
1954 if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
1955 m_demands[DEMAND_RATIO] = true;
1956 else
1957 {
1958 /* TODO: By default, if it doesn't demand RATIO, we set it
1959 demand SEW && LMUL both. Some instructions may demand SEW
1960 only and ignore LMUL, will fix it later. */
1961 m_demands[DEMAND_SEW] = true;
ec99ffab
JZZ
1962 if (!ignore_vlmul_insn_p (insn->rtl ()))
1963 m_demands[DEMAND_LMUL] = true;
9243c3d1
JZZ
1964 }
1965
1966 if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE)
1967 m_demands[DEMAND_TAIL_POLICY] = true;
1968 if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE)
1969 m_demands[DEMAND_MASK_POLICY] = true;
6b6b9c68
JZZ
1970
1971 if (vector_config_insn_p (insn->rtl ()))
1972 return;
1973
ec99ffab
JZZ
1974 if (scalar_move_insn_p (insn->rtl ()))
1975 {
1976 if (m_avl.has_non_zero_avl ())
1977 m_demands[DEMAND_NONZERO_AVL] = true;
1978 if (m_ta)
1979 m_demands[DEMAND_GE_SEW] = true;
1980 }
1981
1982 if (!m_avl.has_avl_reg () || vlmax_avl_p (get_avl ()) || !m_avl.get_source ())
1983 return;
1984 if (!m_avl.get_source ()->insn ()->is_real ()
1985 && !m_avl.get_source ()->insn ()->is_phi ())
6b6b9c68
JZZ
1986 return;
1987
1988 insn_info *def_insn = extract_single_source (m_avl.get_source ());
ec99ffab
JZZ
1989 if (!def_insn || !vsetvl_insn_p (def_insn->rtl ()))
1990 return;
9243c3d1 1991
ec99ffab
JZZ
1992 vector_insn_info new_info;
1993 new_info.parse_insn (def_insn);
1994 if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ()))
1995 return;
1996 /* TODO: Currently, we don't forward AVL for non-VLMAX vsetvl. */
1997 if (vlmax_avl_p (new_info.get_avl ()))
1998 set_avl_info (avl_info (new_info.get_avl (), get_avl_source ()));
1999
2000 if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ())
2001 m_demands[DEMAND_NONZERO_AVL] = true;
9243c3d1
JZZ
2002}
2003
2004bool
2005vector_insn_info::compatible_p (const vector_insn_info &other) const
2006{
2007 gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
2008 && "Can't compare invalid demanded infos");
2009
ec99ffab 2010 for (const auto &cond : incompatible_conds)
60bd33bc 2011 if (cond.dual_incompatible_p (*this, other))
ec99ffab 2012 return false;
9243c3d1
JZZ
2013 return true;
2014}
2015
d51f2456
JZ
2016bool
2017vector_insn_info::skip_avl_compatible_p (const vector_insn_info &other) const
2018{
2019 gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
2020 && "Can't compare invalid demanded infos");
2021 unsigned array_size = sizeof (incompatible_conds) / sizeof (demands_cond);
2022 /* Bypass AVL incompatible cases. */
2023 for (unsigned i = 1; i < array_size; i++)
2024 if (incompatible_conds[i].dual_incompatible_p (*this, other))
2025 return false;
2026 return true;
2027}
2028
9243c3d1
JZZ
2029bool
2030vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const
2031{
2032 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2033 gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
2034 if (!demand_p (DEMAND_AVL))
2035 return true;
ec99ffab
JZZ
2036 if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ())
2037 return true;
9243c3d1
JZZ
2038 return get_avl_info () == other.get_avl_info ();
2039}
2040
4f673c5e
JZZ
2041bool
2042vector_insn_info::compatible_avl_p (const avl_info &other) const
2043{
2044 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2045 gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
2046 gcc_assert (demand_p (DEMAND_AVL) && "Can't compare AVL undemand state");
ec99ffab
JZZ
2047 if (!demand_p (DEMAND_AVL))
2048 return true;
2049 if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ())
2050 return true;
4f673c5e
JZZ
2051 return get_avl_info () == other;
2052}
2053
9243c3d1
JZZ
2054bool
2055vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const
2056{
2057 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2058 gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state");
ec99ffab
JZZ
2059 if (demand_p (DEMAND_SEW))
2060 {
2061 if (!demand_p (DEMAND_GE_SEW) && m_sew != other.get_sew ())
2062 return false;
2063 if (demand_p (DEMAND_GE_SEW) && m_sew > other.get_sew ())
2064 return false;
2065 }
9243c3d1
JZZ
2066 if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ())
2067 return false;
2068 if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ())
2069 return false;
2070 if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ())
2071 return false;
2072 if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ())
2073 return false;
2074 return true;
2075}
2076
2077/* Determine whether the vector instructions requirements represented by
2078 Require are compatible with the previous vsetvli instruction represented
2079 by this. INSN is the instruction whose requirements we're considering. */
2080bool
2081vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const
2082{
2083 gcc_assert (!uninit_p () && "Can't handle uninitialized info");
2084 if (empty_p ())
2085 return false;
2086
2087 /* Nothing is compatible with Unknown. */
2088 if (unknown_p ())
2089 return false;
2090
2091 /* If the instruction doesn't need an AVLReg and the SEW matches, consider
2092 it compatible. */
2093 if (!demand_p (DEMAND_AVL))
2094 if (m_sew == curr_info.get_sew ())
2095 return true;
2096
2097 return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info);
2098}
2099
6b6b9c68
JZZ
2100bool
2101vector_insn_info::available_p (const vector_insn_info &other) const
2102{
ec99ffab
JZZ
2103 return *this >= other;
2104}
2105
2106void
2107vector_insn_info::fuse_avl (const vector_insn_info &info1,
2108 const vector_insn_info &info2)
2109{
2110 set_insn (info1.get_insn ());
2111 if (info1.demand_p (DEMAND_AVL))
2112 {
2113 if (info1.demand_p (DEMAND_NONZERO_AVL))
2114 {
2115 if (info2.demand_p (DEMAND_AVL)
2116 && !info2.demand_p (DEMAND_NONZERO_AVL))
2117 {
2118 set_avl_info (info2.get_avl_info ());
2119 set_demand (DEMAND_AVL, true);
2120 set_demand (DEMAND_NONZERO_AVL, false);
2121 return;
2122 }
2123 }
2124 set_avl_info (info1.get_avl_info ());
2125 set_demand (DEMAND_NONZERO_AVL, info1.demand_p (DEMAND_NONZERO_AVL));
2126 }
2127 else
2128 {
2129 set_avl_info (info2.get_avl_info ());
2130 set_demand (DEMAND_NONZERO_AVL, info2.demand_p (DEMAND_NONZERO_AVL));
2131 }
2132 set_demand (DEMAND_AVL,
2133 info1.demand_p (DEMAND_AVL) || info2.demand_p (DEMAND_AVL));
2134}
2135
2136void
2137vector_insn_info::fuse_sew_lmul (const vector_insn_info &info1,
2138 const vector_insn_info &info2)
2139{
2140 /* We need to fuse sew && lmul according to demand info:
2141
2142 1. GE_SEW.
2143 2. SEW.
2144 3. LMUL.
2145 4. RATIO. */
2146 if (same_sew_lmul_demand_p (info1.get_demands (), info2.get_demands ()))
2147 {
2148 set_demand (DEMAND_SEW, info2.demand_p (DEMAND_SEW));
2149 set_demand (DEMAND_LMUL, info2.demand_p (DEMAND_LMUL));
2150 set_demand (DEMAND_RATIO, info2.demand_p (DEMAND_RATIO));
2151 set_demand (DEMAND_GE_SEW, info2.demand_p (DEMAND_GE_SEW));
2152 set_sew (info2.get_sew ());
2153 set_vlmul (info2.get_vlmul ());
2154 set_ratio (info2.get_ratio ());
2155 return;
2156 }
2157 for (const auto &rule : fuse_rules)
2158 {
2159 if (rule.pair.match_cond_p (info1.get_demands (), info2.get_demands ()))
2160 {
2161 set_demand (DEMAND_SEW, rule.demand_sew_p);
2162 set_demand (DEMAND_LMUL, rule.demand_lmul_p);
2163 set_demand (DEMAND_RATIO, rule.demand_ratio_p);
2164 set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p);
2165 set_sew (rule.new_sew (info1, info2));
2166 set_vlmul (rule.new_vlmul (info1, info2));
2167 set_ratio (rule.new_ratio (info1, info2));
2168 return;
2169 }
2170 if (rule.pair.match_cond_p (info2.get_demands (), info1.get_demands ()))
2171 {
2172 set_demand (DEMAND_SEW, rule.demand_sew_p);
2173 set_demand (DEMAND_LMUL, rule.demand_lmul_p);
2174 set_demand (DEMAND_RATIO, rule.demand_ratio_p);
2175 set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p);
2176 set_sew (rule.new_sew (info2, info1));
2177 set_vlmul (rule.new_vlmul (info2, info1));
2178 set_ratio (rule.new_ratio (info2, info1));
2179 return;
2180 }
2181 }
2182 gcc_unreachable ();
2183}
2184
2185void
2186vector_insn_info::fuse_tail_policy (const vector_insn_info &info1,
2187 const vector_insn_info &info2)
2188{
2189 if (info1.demand_p (DEMAND_TAIL_POLICY))
2190 {
2191 set_ta (info1.get_ta ());
2192 demand (DEMAND_TAIL_POLICY);
2193 }
2194 else if (info2.demand_p (DEMAND_TAIL_POLICY))
2195 {
2196 set_ta (info2.get_ta ());
2197 demand (DEMAND_TAIL_POLICY);
2198 }
2199 else
2200 set_ta (get_default_ta ());
2201}
2202
2203void
2204vector_insn_info::fuse_mask_policy (const vector_insn_info &info1,
2205 const vector_insn_info &info2)
2206{
2207 if (info1.demand_p (DEMAND_MASK_POLICY))
2208 {
2209 set_ma (info1.get_ma ());
2210 demand (DEMAND_MASK_POLICY);
2211 }
2212 else if (info2.demand_p (DEMAND_MASK_POLICY))
2213 {
2214 set_ma (info2.get_ma ());
2215 demand (DEMAND_MASK_POLICY);
2216 }
2217 else
2218 set_ma (get_default_ma ());
6b6b9c68
JZZ
2219}
2220
9243c3d1
JZZ
2221vector_insn_info
2222vector_insn_info::merge (const vector_insn_info &merge_info,
d51f2456 2223 enum merge_type type) const
9243c3d1 2224{
6b6b9c68
JZZ
2225 if (!vsetvl_insn_p (get_insn ()->rtl ()))
2226 gcc_assert (this->compatible_p (merge_info)
2227 && "Can't merge incompatible demanded infos");
9243c3d1
JZZ
2228
2229 vector_insn_info new_info;
ec99ffab 2230 new_info.set_valid ();
4f673c5e 2231 if (type == LOCAL_MERGE)
9243c3d1 2232 {
4f673c5e 2233 /* For local backward data flow, we always update INSN && AVL as the
ec99ffab
JZZ
2234 latest INSN and AVL so that we can keep track status of each INSN. */
2235 new_info.fuse_avl (merge_info, *this);
9243c3d1
JZZ
2236 }
2237 else
2238 {
4f673c5e 2239 /* For global data flow, we should keep original INSN and AVL if they
ec99ffab 2240 valid since we should keep the life information of each block.
9243c3d1 2241
ec99ffab
JZZ
2242 For example:
2243 bb 0 -> bb 1.
2244 We should keep INSN && AVL of bb 1 since we will eventually emit
2245 vsetvl instruction according to INSN and AVL of bb 1. */
2246 new_info.fuse_avl (*this, merge_info);
2247 }
9243c3d1 2248
ec99ffab
JZZ
2249 new_info.fuse_sew_lmul (*this, merge_info);
2250 new_info.fuse_tail_policy (*this, merge_info);
2251 new_info.fuse_mask_policy (*this, merge_info);
9243c3d1
JZZ
2252 return new_info;
2253}
2254
60bd33bc
JZZ
2255bool
2256vector_insn_info::update_fault_first_load_avl (insn_info *insn)
2257{
2258 // Update AVL to vl-output of the fault first load.
2259 const insn_info *read_vl = get_forward_read_vl_insn (insn);
2260 if (read_vl)
2261 {
2262 rtx vl = SET_DEST (PATTERN (read_vl->rtl ()));
2263 def_info *def = find_access (read_vl->defs (), REGNO (vl));
2264 set_info *set = safe_dyn_cast<set_info *> (def);
2265 set_avl_info (avl_info (vl, set));
2266 set_insn (insn);
2267 return true;
2268 }
2269 return false;
2270}
2271
ea7a9f36
KC
2272static const char *
2273vlmul_to_str (vlmul_type vlmul)
2274{
2275 switch (vlmul)
2276 {
2277 case LMUL_1:
2278 return "m1";
2279 case LMUL_2:
2280 return "m2";
2281 case LMUL_4:
2282 return "m4";
2283 case LMUL_8:
2284 return "m8";
2285 case LMUL_RESERVED:
2286 return "INVALID LMUL";
2287 case LMUL_F8:
2288 return "mf8";
2289 case LMUL_F4:
2290 return "mf4";
2291 case LMUL_F2:
2292 return "mf2";
2293
2294 default:
2295 gcc_unreachable ();
2296 }
2297}
2298
2299static const char *
2300policy_to_str (bool agnostic_p)
2301{
2302 return agnostic_p ? "agnostic" : "undisturbed";
2303}
2304
9243c3d1
JZZ
2305void
2306vector_insn_info::dump (FILE *file) const
2307{
2308 fprintf (file, "[");
2309 if (uninit_p ())
2310 fprintf (file, "UNINITIALIZED,");
2311 else if (valid_p ())
2312 fprintf (file, "VALID,");
2313 else if (unknown_p ())
2314 fprintf (file, "UNKNOWN,");
2315 else if (empty_p ())
2316 fprintf (file, "EMPTY,");
6b6b9c68
JZZ
2317 else if (hard_empty_p ())
2318 fprintf (file, "HARD_EMPTY,");
4f673c5e
JZZ
2319 else if (dirty_with_killed_avl_p ())
2320 fprintf (file, "DIRTY_WITH_KILLED_AVL,");
9243c3d1
JZZ
2321 else
2322 fprintf (file, "DIRTY,");
2323
2324 fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL));
ec99ffab 2325 fprintf (file, "%d(DEMAND_NONZERO_AVL),", demand_p (DEMAND_NONZERO_AVL));
9243c3d1 2326 fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW));
ec99ffab 2327 fprintf (file, "%d(DEMAND_GE_SEW),", demand_p (DEMAND_GE_SEW));
9243c3d1
JZZ
2328 fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL));
2329 fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO));
2330 fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY));
2331 fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY));
2332
2333 fprintf (file, "AVL=");
2334 print_rtl_single (file, get_avl ());
2335 fprintf (file, "SEW=%d,", get_sew ());
ea7a9f36 2336 fprintf (file, "VLMUL=%s,", vlmul_to_str (get_vlmul ()));
9243c3d1 2337 fprintf (file, "RATIO=%d,", get_ratio ());
ea7a9f36
KC
2338 fprintf (file, "TAIL_POLICY=%s,", policy_to_str (get_ta ()));
2339 fprintf (file, "MASK_POLICY=%s", policy_to_str (get_ma ()));
9243c3d1
JZZ
2340 fprintf (file, "]\n");
2341
2342 if (valid_p ())
2343 {
2344 if (get_insn ())
2345 {
12b23c71
JZZ
2346 fprintf (file, "The real INSN=");
2347 print_rtl_single (file, get_insn ()->rtl ());
9243c3d1 2348 }
9243c3d1
JZZ
2349 }
2350}
2351
2352vector_infos_manager::vector_infos_manager ()
2353{
2354 vector_edge_list = nullptr;
2355 vector_kill = nullptr;
2356 vector_del = nullptr;
2357 vector_insert = nullptr;
2358 vector_antic = nullptr;
2359 vector_transp = nullptr;
2360 vector_comp = nullptr;
2361 vector_avin = nullptr;
2362 vector_avout = nullptr;
2363 vector_insn_infos.safe_grow (get_max_uid ());
2364 vector_block_infos.safe_grow (last_basic_block_for_fn (cfun));
2365 if (!optimize)
2366 {
2367 basic_block cfg_bb;
2368 rtx_insn *rinsn;
2369 FOR_ALL_BB_FN (cfg_bb, cfun)
2370 {
2371 vector_block_infos[cfg_bb->index].local_dem = vector_insn_info ();
2372 vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info ();
2373 FOR_BB_INSNS (cfg_bb, rinsn)
2374 vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn);
2375 }
2376 }
2377 else
2378 {
2379 for (const bb_info *bb : crtl->ssa->bbs ())
2380 {
2381 vector_block_infos[bb->index ()].local_dem = vector_insn_info ();
2382 vector_block_infos[bb->index ()].reaching_out = vector_insn_info ();
2383 for (insn_info *insn : bb->real_insns ())
2384 vector_insn_infos[insn->uid ()].parse_insn (insn);
acc10c79 2385 vector_block_infos[bb->index ()].probability = profile_probability ();
9243c3d1
JZZ
2386 }
2387 }
2388}
2389
2390void
2391vector_infos_manager::create_expr (vector_insn_info &info)
2392{
2393 for (size_t i = 0; i < vector_exprs.length (); i++)
2394 if (*vector_exprs[i] == info)
2395 return;
2396 vector_exprs.safe_push (&info);
2397}
2398
2399size_t
2400vector_infos_manager::get_expr_id (const vector_insn_info &info) const
2401{
2402 for (size_t i = 0; i < vector_exprs.length (); i++)
2403 if (*vector_exprs[i] == info)
2404 return i;
2405 gcc_unreachable ();
2406}
2407
2408auto_vec<size_t>
2409vector_infos_manager::get_all_available_exprs (
2410 const vector_insn_info &info) const
2411{
2412 auto_vec<size_t> available_list;
2413 for (size_t i = 0; i < vector_exprs.length (); i++)
6b6b9c68 2414 if (info.available_p (*vector_exprs[i]))
9243c3d1
JZZ
2415 available_list.safe_push (i);
2416 return available_list;
2417}
2418
d06e9264
JZ
2419bool
2420vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const
2421{
2422 hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb);
2423 for (const basic_block pred_cfg_bb : pred_cfg_bbs)
2424 {
2425 const auto &pred_block_info = vector_block_infos[pred_cfg_bb->index];
2426 if (!pred_block_info.local_dem.valid_or_dirty_p ()
2427 && !pred_block_info.reaching_out.valid_or_dirty_p ())
2428 continue;
2429 return false;
2430 }
2431 return true;
2432}
2433
9243c3d1
JZZ
2434bool
2435vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const
2436{
2437 if (bitmap_empty_p (bitdata))
2438 return false;
2439
2440 int ratio = -1;
2441 unsigned int bb_index;
2442 sbitmap_iterator sbi;
2443
2444 EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
2445 {
2446 if (ratio == -1)
2447 ratio = vector_exprs[bb_index]->get_ratio ();
2448 else if (vector_exprs[bb_index]->get_ratio () != ratio)
2449 return false;
2450 }
2451 return true;
2452}
2453
ff8f9544
JZ
2454/* Return TRUE if the incoming vector configuration state
2455 to CFG_BB is compatible with the vector configuration
2456 state in CFG_BB, FALSE otherwise. */
2457bool
2458vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const
2459{
2460 const auto &info = vector_block_infos[cfg_bb->index].local_dem;
2461 sbitmap avin = vector_avin[cfg_bb->index];
2462 unsigned int bb_index;
2463 sbitmap_iterator sbi;
2464 EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
2465 {
2466 const auto &avin_info
2467 = static_cast<const vl_vtype_info &> (*vector_exprs[bb_index]);
2468 if (!info.compatible_p (avin_info))
2469 return false;
2470 }
2471 return true;
2472}
2473
005fad9d
JZZ
2474bool
2475vector_infos_manager::all_same_avl_p (const basic_block cfg_bb,
2476 sbitmap bitdata) const
2477{
2478 if (bitmap_empty_p (bitdata))
2479 return false;
2480
2481 const auto &block_info = vector_block_infos[cfg_bb->index];
2482 if (!block_info.local_dem.demand_p (DEMAND_AVL))
2483 return true;
2484
2485 avl_info avl = block_info.local_dem.get_avl_info ();
2486 unsigned int bb_index;
2487 sbitmap_iterator sbi;
2488
2489 EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
2490 {
2491 if (vector_exprs[bb_index]->get_avl_info () != avl)
2492 return false;
2493 }
2494 return true;
2495}
2496
9243c3d1
JZZ
2497size_t
2498vector_infos_manager::expr_set_num (sbitmap bitdata) const
2499{
2500 size_t count = 0;
2501 for (size_t i = 0; i < vector_exprs.length (); i++)
2502 if (bitmap_bit_p (bitdata, i))
2503 count++;
2504 return count;
2505}
2506
2507void
2508vector_infos_manager::release (void)
2509{
2510 if (!vector_insn_infos.is_empty ())
2511 vector_insn_infos.release ();
2512 if (!vector_block_infos.is_empty ())
2513 vector_block_infos.release ();
2514 if (!vector_exprs.is_empty ())
2515 vector_exprs.release ();
2516
ec99ffab
JZZ
2517 gcc_assert (to_refine_vsetvls.is_empty ());
2518 gcc_assert (to_delete_vsetvls.is_empty ());
9243c3d1 2519 if (optimize > 0)
cfe3fbc6
JZZ
2520 free_bitmap_vectors ();
2521}
2522
2523void
2524vector_infos_manager::create_bitmap_vectors (void)
2525{
2526 /* Create the bitmap vectors. */
2527 vector_antic = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2528 vector_exprs.length ());
2529 vector_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2530 vector_exprs.length ());
2531 vector_comp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2532 vector_exprs.length ());
2533 vector_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2534 vector_exprs.length ());
2535 vector_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2536 vector_exprs.length ());
2537 vector_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2538 vector_exprs.length ());
2539
2540 bitmap_vector_ones (vector_transp, last_basic_block_for_fn (cfun));
2541 bitmap_vector_clear (vector_antic, last_basic_block_for_fn (cfun));
2542 bitmap_vector_clear (vector_comp, last_basic_block_for_fn (cfun));
2543}
2544
2545void
2546vector_infos_manager::free_bitmap_vectors (void)
2547{
2548 /* Finished. Free up all the things we've allocated. */
2549 free_edge_list (vector_edge_list);
2550 if (vector_del)
2551 sbitmap_vector_free (vector_del);
2552 if (vector_insert)
2553 sbitmap_vector_free (vector_insert);
2554 if (vector_kill)
2555 sbitmap_vector_free (vector_kill);
2556 if (vector_antic)
2557 sbitmap_vector_free (vector_antic);
2558 if (vector_transp)
2559 sbitmap_vector_free (vector_transp);
2560 if (vector_comp)
2561 sbitmap_vector_free (vector_comp);
2562 if (vector_avin)
2563 sbitmap_vector_free (vector_avin);
2564 if (vector_avout)
2565 sbitmap_vector_free (vector_avout);
2566
2567 vector_edge_list = nullptr;
2568 vector_kill = nullptr;
2569 vector_del = nullptr;
2570 vector_insert = nullptr;
2571 vector_antic = nullptr;
2572 vector_transp = nullptr;
2573 vector_comp = nullptr;
2574 vector_avin = nullptr;
2575 vector_avout = nullptr;
9243c3d1
JZZ
2576}
2577
2578void
2579vector_infos_manager::dump (FILE *file) const
2580{
2581 basic_block cfg_bb;
2582 rtx_insn *rinsn;
2583
2584 fprintf (file, "\n");
2585 FOR_ALL_BB_FN (cfg_bb, cfun)
2586 {
2587 fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index);
2588 fprintf (file, "<HEADER>=");
2589 vector_block_infos[cfg_bb->index].local_dem.dump (file);
2590 FOR_BB_INSNS (cfg_bb, rinsn)
2591 {
2592 if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn))
2593 continue;
2594 fprintf (file, "<insn %d>=", INSN_UID (rinsn));
2595 const auto &info = vector_insn_infos[INSN_UID (rinsn)];
2596 info.dump (file);
2597 }
2598 fprintf (file, "<FOOTER>=");
2599 vector_block_infos[cfg_bb->index].reaching_out.dump (file);
acc10c79
JZZ
2600 fprintf (file, "<Probability>=");
2601 vector_block_infos[cfg_bb->index].probability.dump (file);
9243c3d1
JZZ
2602 fprintf (file, "\n\n");
2603 }
2604
2605 fprintf (file, "\n");
2606 FOR_ALL_BB_FN (cfg_bb, cfun)
2607 {
2608 fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index);
2609
2610 fprintf (file, "<ANTLOC>=");
2611 if (vector_antic == nullptr)
2612 fprintf (file, "(nil)\n");
2613 else
2614 dump_bitmap_file (file, vector_antic[cfg_bb->index]);
2615
2616 fprintf (file, "<AVLOC>=");
2617 if (vector_comp == nullptr)
2618 fprintf (file, "(nil)\n");
2619 else
2620 dump_bitmap_file (file, vector_comp[cfg_bb->index]);
2621
2622 fprintf (file, "<TRANSP>=");
2623 if (vector_transp == nullptr)
2624 fprintf (file, "(nil)\n");
2625 else
2626 dump_bitmap_file (file, vector_transp[cfg_bb->index]);
2627
2628 fprintf (file, "<KILL>=");
2629 if (vector_kill == nullptr)
2630 fprintf (file, "(nil)\n");
2631 else
2632 dump_bitmap_file (file, vector_kill[cfg_bb->index]);
2633 }
2634
2635 fprintf (file, "\n");
2636 FOR_ALL_BB_FN (cfg_bb, cfun)
2637 {
2638 fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n",
2639 cfg_bb->index);
2640
2641 fprintf (file, "<AVIN>=");
2642 if (vector_avin == nullptr)
2643 fprintf (file, "(nil)\n");
2644 else
2645 dump_bitmap_file (file, vector_avin[cfg_bb->index]);
2646
2647 fprintf (file, "<AVOUT>=");
2648 if (vector_avout == nullptr)
2649 fprintf (file, "(nil)\n");
2650 else
2651 dump_bitmap_file (file, vector_avout[cfg_bb->index]);
2652
2653 fprintf (file, "<DELETE>=");
2654 if (vector_del == nullptr)
2655 fprintf (file, "(nil)\n");
2656 else
2657 dump_bitmap_file (file, vector_del[cfg_bb->index]);
2658 }
2659
2660 fprintf (file, "\nGlobal LCM (Lazy code motion) INSERT info:\n");
2661 for (size_t i = 0; i < vector_exprs.length (); i++)
2662 {
2663 for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++)
2664 {
2665 edge eg = INDEX_EDGE (vector_edge_list, ed);
2666 if (bitmap_bit_p (vector_insert[ed], i))
2667 fprintf (dump_file,
2668 "INSERT edge %d from bb %d to bb %d for VSETVL "
2669 "expr[%ld]\n",
2670 ed, eg->src->index, eg->dest->index, i);
2671 }
2672 }
2673}
2674
2675const pass_data pass_data_vsetvl = {
2676 RTL_PASS, /* type */
2677 "vsetvl", /* name */
2678 OPTGROUP_NONE, /* optinfo_flags */
2679 TV_NONE, /* tv_id */
2680 0, /* properties_required */
2681 0, /* properties_provided */
2682 0, /* properties_destroyed */
2683 0, /* todo_flags_start */
2684 0, /* todo_flags_finish */
2685};
2686
2687class pass_vsetvl : public rtl_opt_pass
2688{
2689private:
c139f5e1
KC
2690 vector_infos_manager *m_vector_manager;
2691
2692 const vector_insn_info &get_vector_info (const rtx_insn *) const;
2693 const vector_insn_info &get_vector_info (const insn_info *) const;
2694 const vector_block_info &get_block_info (const basic_block) const;
2695 const vector_block_info &get_block_info (const bb_info *) const;
2696 vector_block_info &get_block_info (const basic_block);
2697 vector_block_info &get_block_info (const bb_info *);
2698 void update_vector_info (const insn_info *, const vector_insn_info &);
9243c3d1
JZZ
2699
2700 void simple_vsetvl (void) const;
2701 void lazy_vsetvl (void);
2702
2703 /* Phase 1. */
2704 void compute_local_backward_infos (const bb_info *);
2705
2706 /* Phase 2. */
2707 bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const;
2708 void transfer_before (vector_insn_info &, insn_info *) const;
2709 void transfer_after (vector_insn_info &, insn_info *) const;
2710 void emit_local_forward_vsetvls (const bb_info *);
2711
2712 /* Phase 3. */
4f673c5e
JZZ
2713 enum fusion_type get_backward_fusion_type (const bb_info *,
2714 const vector_insn_info &);
6b6b9c68 2715 bool hard_empty_block_p (const bb_info *, const vector_insn_info &) const;
387cd9d3
JZZ
2716 bool backward_demand_fusion (void);
2717 bool forward_demand_fusion (void);
6b6b9c68 2718 bool cleanup_illegal_dirty_blocks (void);
387cd9d3 2719 void demand_fusion (void);
9243c3d1
JZZ
2720
2721 /* Phase 4. */
2722 void prune_expressions (void);
2723 void compute_local_properties (void);
6b6b9c68 2724 bool can_refine_vsetvl_p (const basic_block, const vector_insn_info &) const;
9243c3d1
JZZ
2725 void refine_vsetvls (void) const;
2726 void cleanup_vsetvls (void);
2727 bool commit_vsetvls (void);
2728 void pre_vsetvl (void);
2729
2730 /* Phase 5. */
c919d059
KC
2731 rtx_insn *get_vsetvl_at_end (const bb_info *, vector_insn_info *) const;
2732 void local_eliminate_vsetvl_insn (const bb_info *) const;
9243c3d1
JZZ
2733 void cleanup_insns (void) const;
2734
6b6b9c68
JZZ
2735 /* Phase 6. */
2736 void propagate_avl (void) const;
2737
9243c3d1
JZZ
2738 void init (void);
2739 void done (void);
acc10c79 2740 void compute_probabilities (void);
9243c3d1
JZZ
2741
2742public:
2743 pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
2744
2745 /* opt_pass methods: */
2746 virtual bool gate (function *) final override { return TARGET_VECTOR; }
2747 virtual unsigned int execute (function *) final override;
2748}; // class pass_vsetvl
2749
c139f5e1
KC
2750const vector_insn_info &
2751pass_vsetvl::get_vector_info (const rtx_insn *i) const
2752{
2753 return m_vector_manager->vector_insn_infos[INSN_UID (i)];
2754}
2755
2756const vector_insn_info &
2757pass_vsetvl::get_vector_info (const insn_info *i) const
2758{
2759 return m_vector_manager->vector_insn_infos[i->uid ()];
2760}
2761
2762const vector_block_info &
2763pass_vsetvl::get_block_info (const basic_block bb) const
2764{
2765 return m_vector_manager->vector_block_infos[bb->index];
2766}
2767
2768const vector_block_info &
2769pass_vsetvl::get_block_info (const bb_info *bb) const
2770{
2771 return m_vector_manager->vector_block_infos[bb->index ()];
2772}
2773
2774vector_block_info &
2775pass_vsetvl::get_block_info (const basic_block bb)
2776{
2777 return m_vector_manager->vector_block_infos[bb->index];
2778}
2779
2780vector_block_info &
2781pass_vsetvl::get_block_info (const bb_info *bb)
2782{
2783 return m_vector_manager->vector_block_infos[bb->index ()];
2784}
2785
2786void
2787pass_vsetvl::update_vector_info (const insn_info *i,
2788 const vector_insn_info &new_info)
2789{
2790 m_vector_manager->vector_insn_infos[i->uid ()] = new_info;
2791}
2792
9243c3d1
JZZ
2793/* Simple m_vsetvl_insert vsetvl for optimize == 0. */
2794void
2795pass_vsetvl::simple_vsetvl (void) const
2796{
2797 if (dump_file)
2798 fprintf (dump_file,
2799 "\nEntering Simple VSETVL PASS and Handling %d basic blocks for "
2800 "function:%s\n",
2801 n_basic_blocks_for_fn (cfun), function_name (cfun));
2802
2803 basic_block cfg_bb;
2804 rtx_insn *rinsn;
2805 FOR_ALL_BB_FN (cfg_bb, cfun)
2806 {
2807 FOR_BB_INSNS (cfg_bb, rinsn)
2808 {
2809 if (!NONDEBUG_INSN_P (rinsn))
2810 continue;
2811 if (has_vtype_op (rinsn))
2812 {
c139f5e1 2813 const auto info = get_vector_info (rinsn);
9243c3d1
JZZ
2814 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info,
2815 NULL_RTX, rinsn);
2816 }
2817 }
2818 }
2819}
2820
2821/* Compute demanded information by backward data-flow analysis. */
2822void
2823pass_vsetvl::compute_local_backward_infos (const bb_info *bb)
2824{
2825 vector_insn_info change;
2826 change.set_empty ();
2827
2828 auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
2829 block_info.reaching_out = change;
2830
2831 for (insn_info *insn : bb->reverse_real_nondebug_insns ())
2832 {
c139f5e1 2833 auto &info = get_vector_info (insn);
9243c3d1
JZZ
2834
2835 if (info.uninit_p ())
2836 /* If it is uninitialized, propagate it directly. */
c139f5e1 2837 update_vector_info (insn, change);
9243c3d1
JZZ
2838 else if (info.unknown_p ())
2839 change = info;
2840 else
2841 {
2842 gcc_assert (info.valid_p () && "Unexpected Invalid demanded info");
ec99ffab
JZZ
2843 if (change.valid_p ())
2844 {
a481eed8
JZZ
2845 if (!(propagate_avl_across_demands_p (change, info)
2846 && !reg_available_p (insn, change))
ec99ffab 2847 && change.compatible_p (info))
fdc5abbd 2848 {
c139f5e1 2849 update_vector_info (insn, change.merge (info, LOCAL_MERGE));
fdc5abbd
JZ
2850 /* Fix PR109399, we should update user vsetvl instruction
2851 if there is a change in demand fusion. */
2852 if (vsetvl_insn_p (insn->rtl ()))
2853 change_vsetvl_insn (insn, info);
2854 }
ec99ffab 2855 }
9243c3d1
JZZ
2856 change = info;
2857 }
2858 }
2859
2860 block_info.local_dem = change;
2861 if (block_info.local_dem.empty_p ())
2862 block_info.reaching_out = block_info.local_dem;
2863}
2864
2865/* Return true if a dem_info is required to transition from curr_info to
2866 require before INSN. */
2867bool
2868pass_vsetvl::need_vsetvl (const vector_insn_info &require,
2869 const vector_insn_info &curr_info) const
2870{
2871 if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ())
2872 return true;
2873
a481eed8 2874 if (require.compatible_p (static_cast<const vl_vtype_info &> (curr_info)))
9243c3d1
JZZ
2875 return false;
2876
2877 return true;
2878}
2879
2880/* Given an incoming state reaching INSN, modifies that state so that it is
2881 minimally compatible with INSN. The resulting state is guaranteed to be
2882 semantically legal for INSN, but may not be the state requested by INSN. */
2883void
2884pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const
2885{
2886 if (!has_vtype_op (insn->rtl ()))
2887 return;
2888
c139f5e1 2889 const vector_insn_info require = get_vector_info (insn);
9243c3d1
JZZ
2890 if (info.valid_p () && !need_vsetvl (require, info))
2891 return;
2892 info = require;
2893}
2894
2895/* Given a state with which we evaluated insn (see transfer_before above for why
2896 this might be different that the state insn requested), modify the state to
2897 reflect the changes insn might make. */
2898void
2899pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const
2900{
2901 if (vector_config_insn_p (insn->rtl ()))
2902 {
c139f5e1 2903 info = get_vector_info (insn);
9243c3d1
JZZ
2904 return;
2905 }
2906
60bd33bc
JZZ
2907 if (fault_first_load_p (insn->rtl ())
2908 && info.update_fault_first_load_avl (insn))
2909 return;
9243c3d1
JZZ
2910
2911 /* If this is something that updates VL/VTYPE that we don't know about, set
2912 the state to unknown. */
2913 if (insn->is_call () || insn->is_asm ()
2914 || find_access (insn->defs (), VL_REGNUM)
2915 || find_access (insn->defs (), VTYPE_REGNUM))
2916 info = vector_insn_info::get_unknown ();
2917}
2918
2919/* Emit vsetvl within each block by forward data-flow analysis. */
2920void
2921pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb)
2922{
2923 auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
2924 if (block_info.local_dem.empty_p ())
2925 return;
2926
2927 vector_insn_info curr_info;
2928 for (insn_info *insn : bb->real_nondebug_insns ())
2929 {
2930 const vector_insn_info prev_info = curr_info;
a481eed8 2931 enum vsetvl_type type = NUM_VSETVL_TYPE;
9243c3d1
JZZ
2932 transfer_before (curr_info, insn);
2933
2934 if (has_vtype_op (insn->rtl ()))
2935 {
2936 if (static_cast<const vl_vtype_info &> (prev_info)
2937 != static_cast<const vl_vtype_info &> (curr_info))
2938 {
c139f5e1 2939 const auto require = get_vector_info (insn);
9243c3d1
JZZ
2940 if (!require.compatible_p (
2941 static_cast<const vl_vtype_info &> (prev_info)))
a481eed8
JZZ
2942 type = insert_vsetvl (EMIT_BEFORE, insn->rtl (), require,
2943 prev_info);
9243c3d1
JZZ
2944 }
2945 }
2946
a481eed8
JZZ
2947 /* Fix the issue of following sequence:
2948 vsetivli zero, 5
2949 ....
2950 vsetvli zero, zero
2951 vmv.x.s (demand AVL = 8).
2952 ....
2953 incorrect: vsetvli zero, zero ===> Since the curr_info is AVL = 8.
2954 correct: vsetivli zero, 8
2955 vadd (demand AVL = 8). */
2956 if (type == VSETVL_VTYPE_CHANGE_ONLY)
2957 {
2958 /* Update the curr_info to be real correct AVL. */
2959 curr_info.set_avl_info (prev_info.get_avl_info ());
2960 }
9243c3d1
JZZ
2961 transfer_after (curr_info, insn);
2962 }
2963
2964 block_info.reaching_out = curr_info;
2965}
2966
4f673c5e
JZZ
2967enum fusion_type
2968pass_vsetvl::get_backward_fusion_type (const bb_info *bb,
2969 const vector_insn_info &prop)
9243c3d1 2970{
4f673c5e 2971 insn_info *insn = prop.get_insn ();
9243c3d1 2972
4f673c5e
JZZ
2973 /* TODO: We don't backward propagate the explict VSETVL here
2974 since we will change vsetvl and vsetvlmax intrinsics into
2975 no side effects which can be optimized into optimal location
2976 by GCC internal passes. We only need to support these backward
2977 propagation if vsetvl intrinsics have side effects. */
2978 if (vsetvl_insn_p (insn->rtl ()))
2979 return INVALID_FUSION;
2980
2981 gcc_assert (has_vtype_op (insn->rtl ()));
2982 rtx reg = NULL_RTX;
2983
2984 /* Case 1: Don't need VL. Just let it backward propagate. */
ec99ffab 2985 if (!prop.demand_p (DEMAND_AVL))
4f673c5e
JZZ
2986 return VALID_AVL_FUSION;
2987 else
9243c3d1 2988 {
4f673c5e
JZZ
2989 /* Case 2: CONST_INT AVL, we don't need to check def. */
2990 if (prop.has_avl_imm ())
2991 return VALID_AVL_FUSION;
2992 else
2993 {
2994 /* Case 3: REG AVL, we need to check the distance of def to make
2995 sure we won't backward propagate over the def. */
2996 gcc_assert (prop.has_avl_reg ());
2997 if (vlmax_avl_p (prop.get_avl ()))
2998 /* Check VL operand for vsetvl vl,zero. */
ec99ffab 2999 reg = prop.get_avl_reg_rtx ();
4f673c5e
JZZ
3000 else
3001 /* Check AVL operand for vsetvl zero,avl. */
ec99ffab 3002 reg = prop.get_avl ();
4f673c5e
JZZ
3003 }
3004 }
9243c3d1 3005
4f673c5e 3006 gcc_assert (reg);
ec99ffab
JZZ
3007 if (!prop.get_avl_source ()->insn ()->is_phi ()
3008 && prop.get_avl_source ()->insn ()->bb () == insn->bb ())
6b6b9c68
JZZ
3009 return INVALID_FUSION;
3010 hash_set<set_info *> sets
3011 = get_all_sets (prop.get_avl_source (), true, true, true);
3012 if (any_set_in_bb_p (sets, insn->bb ()))
3013 return INVALID_FUSION;
3014
3015 if (vlmax_avl_p (prop.get_avl ()))
4f673c5e 3016 {
6b6b9c68 3017 if (find_reg_killed_by (bb, reg))
4f673c5e 3018 return INVALID_FUSION;
6b6b9c68
JZZ
3019 else
3020 return VALID_AVL_FUSION;
4f673c5e 3021 }
6b6b9c68
JZZ
3022
3023 /* By default, we always enable backward fusion so that we can
3024 gain more optimizations. */
3025 if (!find_reg_killed_by (bb, reg))
3026 return VALID_AVL_FUSION;
3027 return KILLED_AVL_FUSION;
3028}
3029
3030/* We almost enable all cases in get_backward_fusion_type, this function
3031 disable the backward fusion by changing dirty blocks into hard empty
3032 blocks in forward dataflow. We can have more accurate optimization by
3033 this method. */
3034bool
3035pass_vsetvl::hard_empty_block_p (const bb_info *bb,
3036 const vector_insn_info &info) const
3037{
3038 if (!info.dirty_p () || !info.has_avl_reg ())
3039 return false;
3040
3041 basic_block cfg_bb = bb->cfg_bb ();
3042 sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
ec99ffab
JZZ
3043 set_info *set = info.get_avl_source ();
3044 rtx avl = gen_rtx_REG (Pmode, set->regno ());
6b6b9c68
JZZ
3045 hash_set<set_info *> sets = get_all_sets (set, true, false, false);
3046 hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb);
3047
3048 if (find_reg_killed_by (bb, avl))
3049 {
3050 /* Condition 1:
3051 Dirty block with killed AVL means that the empty block (no RVV
3052 instructions) are polluted as Dirty blocks with the value of current
3053 AVL is killed. For example:
3054 bb 0:
3055 ...
3056 bb 1:
3057 def a5
3058 bb 2:
3059 RVV (use a5)
3060 In backward dataflow, we will polluted BB0 and BB1 as Dirt with AVL
3061 killed. since a5 is killed in BB1.
3062 In this case, let's take a look at this example:
3063
3064 bb 3: bb 4:
3065 def3 a5 def4 a5
3066 bb 5: bb 6:
3067 def1 a5 def2 a5
3068 \ /
3069 \ /
3070 \ /
3071 \ /
3072 bb 7:
3073 RVV (use a5)
3074 In thi case, we can polluted BB5 and BB6 as dirty if get-def
3075 of a5 from RVV instruction in BB7 is the def1 in BB5 and
3076 def2 BB6 so we can return false early here for HARD_EMPTY_BLOCK_P.
3077 However, we are not sure whether BB3 and BB4 can be
3078 polluted as Dirty with AVL killed so we can't return false
3079 for HARD_EMPTY_BLOCK_P here since it's too early which will
3080 potentially produce issues. */
3081 gcc_assert (info.dirty_with_killed_avl_p ());
3082 if (info.get_avl_source ()
3083 && get_same_bb_set (sets, bb->cfg_bb ()) == info.get_avl_source ())
3084 return false;
4f673c5e 3085 }
9243c3d1 3086
6b6b9c68
JZZ
3087 /* Condition 2:
3088 Suppress the VL/VTYPE info backward propagation too early:
3089 ________
3090 | BB0 |
3091 |________|
3092 |
3093 ____|____
3094 | BB1 |
3095 |________|
3096 In this case, suppose BB 1 has multiple predecessors, BB 0 is one
3097 of them. BB1 has VL/VTYPE info (may be VALID or DIRTY) to backward
3098 propagate.
3099 The AVIN (available in) which is calculated by LCM is empty only
3100 in these 2 circumstances:
3101 1. all predecessors of BB1 are empty (not VALID
3102 and can not be polluted in backward fusion flow)
3103 2. VL/VTYPE info of BB1 predecessors are conflict.
3104
3105 We keep it as dirty in 2nd circumstance and set it as HARD_EMPTY
3106 (can not be polluted as DIRTY any more) in 1st circumstance.
3107 We don't backward propagate in 1st circumstance since there is
3108 no VALID RVV instruction and no polluted blocks (dirty blocks)
3109 by backward propagation from other following blocks.
3110 It's meaningless to keep it as Dirty anymore.
3111
3112 However, since we keep it as dirty in 2nd since there are VALID or
3113 Dirty blocks in predecessors, we can still gain the benefits and
3114 optimization opportunities. For example, in this case:
3115 for (size_t i = 0; i < n; i++)
3116 {
3117 if (i != cond) {
3118 vint8mf8_t v = *(vint8mf8_t*)(in + i + 100);
3119 *(vint8mf8_t*)(out + i + 100) = v;
3120 } else {
3121 vbool1_t v = *(vbool1_t*)(in + i + 400);
3122 *(vbool1_t*)(out + i + 400) = v;
3123 }
3124 }
3125 VL/VTYPE in if-else are conflict which will produce empty AVIN LCM result
3126 but we can still keep dirty blocks if *(i != cond)* is very unlikely then
3127 we can preset vsetvl (VL/VTYPE) info from else (static propability model).
3128
3129 We don't want to backward propagate VL/VTYPE information too early
3130 which is not the optimal and may potentially produce issues. */
3131 if (bitmap_empty_p (avin))
4f673c5e 3132 {
6b6b9c68
JZZ
3133 bool hard_empty_p = true;
3134 for (const basic_block pred_cfg_bb : pred_cfg_bbs)
3135 {
3136 if (pred_cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3137 continue;
3138 sbitmap avout = m_vector_manager->vector_avout[pred_cfg_bb->index];
3139 if (!bitmap_empty_p (avout))
3140 {
3141 hard_empty_p = false;
3142 break;
3143 }
3144 }
3145 if (hard_empty_p)
3146 return true;
4f673c5e 3147 }
9243c3d1 3148
6b6b9c68
JZZ
3149 edge e;
3150 edge_iterator ei;
3151 bool has_avl_killed_insn_p = false;
3152 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
4f673c5e 3153 {
6b6b9c68
JZZ
3154 const auto block_info
3155 = m_vector_manager->vector_block_infos[e->dest->index];
3156 if (block_info.local_dem.dirty_with_killed_avl_p ())
9243c3d1 3157 {
6b6b9c68
JZZ
3158 has_avl_killed_insn_p = true;
3159 break;
3160 }
3161 }
3162 if (!has_avl_killed_insn_p)
3163 return false;
4f673c5e 3164
6b6b9c68
JZZ
3165 bool any_set_in_bbs_p = false;
3166 for (const basic_block pred_cfg_bb : pred_cfg_bbs)
3167 {
3168 insn_info *def_insn = extract_single_source (set);
3169 if (def_insn)
3170 {
3171 /* Condition 3:
3172
3173 Case 1: Case 2:
3174 bb 0: bb 0:
3175 def a5 101 ...
3176 bb 1: bb 1:
3177 ... ...
3178 bb 2: bb 2:
3179 RVV 1 (use a5 with TAIL ANY) ...
3180 bb 3: bb 3:
3181 def a5 101 def a5 101
3182 bb 4: bb 4:
3183 ... ...
3184 bb 5: bb 5:
3185 RVV 2 (use a5 with TU) RVV 1 (use a5)
3186
3187 Case 1: We can pollute BB3,BB2,BB1,BB0 are all Dirt blocks
3188 with killed AVL so that we can merge TU demand info from RVV 2
3189 into RVV 1 and elide the vsevl instruction in BB5.
3190
3191 TODO: We only optimize for single source def since multiple source
3192 def is quite complicated.
3193
3194 Case 2: We only can pollute bb 3 as dirty and it has been accepted
3195 in Condition 2 and we can't pollute BB3,BB2,BB1,BB0 like case 1. */
3196 insn_info *last_killed_insn
3197 = find_reg_killed_by (crtl->ssa->bb (pred_cfg_bb), avl);
3198 if (!last_killed_insn || pred_cfg_bb == def_insn->bb ()->cfg_bb ())
3199 continue;
3200 if (source_equal_p (last_killed_insn, def_insn))
4f673c5e 3201 {
6b6b9c68
JZZ
3202 any_set_in_bbs_p = true;
3203 break;
4f673c5e 3204 }
9243c3d1
JZZ
3205 }
3206 else
4f673c5e 3207 {
6b6b9c68
JZZ
3208 /* Condition 4:
3209
3210 bb 0: bb 1: bb 3:
3211 def1 a5 def2 a5 ...
3212 \ / /
3213 \ / /
3214 \ / /
3215 \ / /
3216 bb 4: /
3217 | /
3218 | /
3219 bb 5: /
3220 | /
3221 | /
3222 bb 6: /
3223 | /
3224 | /
3225 bb 8:
3226 RVV 1 (use a5)
3227 If we get-def (REAL) of a5 from RVV 1 instruction, we will get
3228 def1 from BB0 and def2 from BB1. So we will pollute BB6,BB5,BB4,
3229 BB0,BB1 with DIRTY and set BB3 as HARD_EMPTY so that we won't
3230 propagate AVL to BB3. */
3231 if (any_set_in_bb_p (sets, crtl->ssa->bb (pred_cfg_bb)))
3232 {
3233 any_set_in_bbs_p = true;
3234 break;
3235 }
4f673c5e 3236 }
9243c3d1 3237 }
6b6b9c68
JZZ
3238 if (!any_set_in_bbs_p)
3239 return true;
3240 return false;
9243c3d1
JZZ
3241}
3242
3243/* Compute global backward demanded info. */
387cd9d3
JZZ
3244bool
3245pass_vsetvl::backward_demand_fusion (void)
9243c3d1
JZZ
3246{
3247 /* We compute global infos by backward propagation.
3248 We want to have better performance in these following cases:
3249
3250 1. for (size_t i = 0; i < n; i++) {
3251 if (i != cond) {
3252 vint8mf8_t v = *(vint8mf8_t*)(in + i + 100);
3253 *(vint8mf8_t*)(out + i + 100) = v;
3254 } else {
3255 vbool1_t v = *(vbool1_t*)(in + i + 400);
3256 *(vbool1_t*)(out + i + 400) = v;
3257 }
3258 }
3259
3260 Since we don't have any RVV instruction in the BEFORE blocks,
3261 LCM fails to optimize such case. We want to backward propagate
3262 them into empty blocks so that we could have better performance
3263 in LCM.
3264
3265 2. bb 0:
3266 vsetvl e8,mf8 (demand RATIO)
3267 bb 1:
3268 vsetvl e32,mf2 (demand SEW and LMUL)
3269 We backward propagate the first VSETVL into e32,mf2 so that we
3270 could be able to eliminate the second VSETVL in LCM. */
3271
387cd9d3 3272 bool changed_p = false;
9243c3d1
JZZ
3273 for (const bb_info *bb : crtl->ssa->reverse_bbs ())
3274 {
3275 basic_block cfg_bb = bb->cfg_bb ();
b9b251b7
JZZ
3276 const auto &curr_block_info
3277 = m_vector_manager->vector_block_infos[cfg_bb->index];
3278 const auto &prop = curr_block_info.local_dem;
9243c3d1
JZZ
3279
3280 /* If there is nothing to propagate, just skip it. */
3281 if (!prop.valid_or_dirty_p ())
3282 continue;
3283
b9b251b7 3284 if (!backward_propagate_worthwhile_p (cfg_bb, curr_block_info))
9243c3d1
JZZ
3285 continue;
3286
d06e9264
JZ
3287 /* Fix PR108270:
3288
3289 bb 0 -> bb 1
3290 We don't need to backward fuse VL/VTYPE info from bb 1 to bb 0
3291 if bb 1 is not inside a loop and all predecessors of bb 0 are empty. */
3292 if (m_vector_manager->all_empty_predecessor_p (cfg_bb))
3293 continue;
3294
9243c3d1
JZZ
3295 edge e;
3296 edge_iterator ei;
3297 /* Backward propagate to each predecessor. */
3298 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3299 {
9243c3d1
JZZ
3300 auto &block_info
3301 = m_vector_manager->vector_block_infos[e->src->index];
3302
3303 /* We don't propagate through critical edges. */
3304 if (e->flags & EDGE_COMPLEX)
3305 continue;
3306 if (e->src->index == ENTRY_BLOCK_PTR_FOR_FN (cfun)->index)
3307 continue;
44c918b5
JZZ
3308 /* If prop is demand of vsetvl instruction and reaching doesn't demand
3309 AVL. We don't backward propagate since vsetvl instruction has no
3310 side effects. */
3311 if (vsetvl_insn_p (prop.get_insn ()->rtl ())
3312 && propagate_avl_across_demands_p (prop, block_info.reaching_out))
3313 continue;
9243c3d1
JZZ
3314
3315 if (block_info.reaching_out.unknown_p ())
3316 continue;
6b6b9c68
JZZ
3317 else if (block_info.reaching_out.hard_empty_p ())
3318 continue;
9243c3d1
JZZ
3319 else if (block_info.reaching_out.empty_p ())
3320 {
4f673c5e
JZZ
3321 enum fusion_type type
3322 = get_backward_fusion_type (crtl->ssa->bb (e->src), prop);
3323 if (type == INVALID_FUSION)
9243c3d1
JZZ
3324 continue;
3325
4f673c5e
JZZ
3326 block_info.reaching_out = prop;
3327 block_info.reaching_out.set_dirty (type);
6b6b9c68
JZZ
3328
3329 if (prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ()))
3330 {
3331 hash_set<set_info *> sets
3332 = get_all_sets (prop.get_avl_source (), true, true, true);
3333 set_info *set = get_same_bb_set (sets, e->src);
3334 if (set)
3335 block_info.reaching_out.set_avl_info (
3336 avl_info (prop.get_avl (), set));
3337 }
3338
4f673c5e
JZZ
3339 block_info.local_dem = block_info.reaching_out;
3340 block_info.probability = curr_block_info.probability;
3341 changed_p = true;
9243c3d1
JZZ
3342 }
3343 else if (block_info.reaching_out.dirty_p ())
3344 {
3345 /* DIRTY -> DIRTY or VALID -> DIRTY. */
3346 vector_insn_info new_info;
3347
3348 if (block_info.reaching_out.compatible_p (prop))
3349 {
ec99ffab 3350 if (block_info.reaching_out.available_p (prop))
9243c3d1 3351 continue;
4f673c5e 3352 new_info = block_info.reaching_out.merge (prop, GLOBAL_MERGE);
6b6b9c68
JZZ
3353 new_info.set_dirty (
3354 block_info.reaching_out.dirty_with_killed_avl_p ());
3355 block_info.probability += curr_block_info.probability;
9243c3d1
JZZ
3356 }
3357 else
3358 {
4f673c5e
JZZ
3359 if (curr_block_info.probability > block_info.probability)
3360 {
6b6b9c68
JZZ
3361 enum fusion_type type
3362 = get_backward_fusion_type (crtl->ssa->bb (e->src),
3363 prop);
3364 if (type == INVALID_FUSION)
3365 continue;
4f673c5e 3366 new_info = prop;
6b6b9c68 3367 new_info.set_dirty (type);
4f673c5e
JZZ
3368 block_info.probability = curr_block_info.probability;
3369 }
9243c3d1
JZZ
3370 else
3371 continue;
3372 }
3373
ec99ffab
JZZ
3374 if (propagate_avl_across_demands_p (prop,
3375 block_info.reaching_out))
3376 {
3377 rtx reg = new_info.get_avl_reg_rtx ();
3378 if (find_reg_killed_by (crtl->ssa->bb (e->src), reg))
3379 new_info.set_dirty (true);
3380 }
3381
9243c3d1
JZZ
3382 block_info.local_dem = new_info;
3383 block_info.reaching_out = new_info;
387cd9d3 3384 changed_p = true;
9243c3d1
JZZ
3385 }
3386 else
3387 {
3388 /* We not only change the info during backward propagation,
3389 but also change the VSETVL instruction. */
3390 gcc_assert (block_info.reaching_out.valid_p ());
6b6b9c68
JZZ
3391 hash_set<set_info *> sets
3392 = get_all_sets (prop.get_avl_source (), true, false, false);
3393 set_info *set = get_same_bb_set (sets, e->src);
3394 if (vsetvl_insn_p (block_info.reaching_out.get_insn ()->rtl ())
3395 && prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ()))
3396 {
3397 if (!block_info.reaching_out.same_vlmax_p (prop))
3398 continue;
3399 if (block_info.reaching_out.same_vtype_p (prop))
3400 continue;
3401 if (!set)
3402 continue;
3403 if (set->insn () != block_info.reaching_out.get_insn ())
3404 continue;
3405 }
ec99ffab
JZZ
3406
3407 if (!block_info.reaching_out.compatible_p (prop))
3408 continue;
3409 if (block_info.reaching_out.available_p (prop))
3410 continue;
9243c3d1
JZZ
3411
3412 vector_insn_info be_merged = block_info.reaching_out;
3413 if (block_info.local_dem == block_info.reaching_out)
3414 be_merged = block_info.local_dem;
4f673c5e
JZZ
3415 vector_insn_info new_info = be_merged.merge (prop, GLOBAL_MERGE);
3416
3417 if (curr_block_info.probability > block_info.probability)
3418 block_info.probability = curr_block_info.probability;
9243c3d1 3419
ec99ffab 3420 if (propagate_avl_across_demands_p (prop, block_info.reaching_out)
a481eed8
JZZ
3421 && !reg_available_p (crtl->ssa->bb (e->src)->end_insn (),
3422 new_info))
ec99ffab
JZZ
3423 continue;
3424
aef20243 3425 change_vsetvl_insn (new_info.get_insn (), new_info);
9243c3d1
JZZ
3426 if (block_info.local_dem == block_info.reaching_out)
3427 block_info.local_dem = new_info;
3428 block_info.reaching_out = new_info;
387cd9d3 3429 changed_p = true;
9243c3d1
JZZ
3430 }
3431 }
3432 }
387cd9d3
JZZ
3433 return changed_p;
3434}
3435
3436/* Compute global forward demanded info. */
3437bool
3438pass_vsetvl::forward_demand_fusion (void)
3439{
3440 /* Enhance the global information propagation especially
3441 backward propagation miss the propagation.
3442 Consider such case:
3443
3444 bb0
3445 (TU)
3446 / \
3447 bb1 bb2
3448 (TU) (ANY)
3449 existing edge -----> \ / (TU) <----- LCM create this edge.
3450 bb3
3451 (TU)
3452
3453 Base on the situation, LCM fails to eliminate the VSETVL instruction and
3454 insert an edge from bb2 to bb3 since we can't backward propagate bb3 into
3455 bb2. To avoid this confusing LCM result and non-optimal codegen, we should
3456 forward propagate information from bb0 to bb2 which is friendly to LCM. */
3457 bool changed_p = false;
3458 for (const bb_info *bb : crtl->ssa->bbs ())
3459 {
3460 basic_block cfg_bb = bb->cfg_bb ();
3461 const auto &prop
3462 = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
3463
3464 /* If there is nothing to propagate, just skip it. */
3465 if (!prop.valid_or_dirty_p ())
3466 continue;
3467
00fb7698
JZZ
3468 if (cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3469 continue;
3470
ec99ffab
JZZ
3471 if (vsetvl_insn_p (prop.get_insn ()->rtl ()))
3472 continue;
3473
387cd9d3
JZZ
3474 edge e;
3475 edge_iterator ei;
3476 /* Forward propagate to each successor. */
3477 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
3478 {
3479 auto &local_dem
3480 = m_vector_manager->vector_block_infos[e->dest->index].local_dem;
3481 auto &reaching_out
3482 = m_vector_manager->vector_block_infos[e->dest->index].reaching_out;
3483
3484 /* It's quite obvious, we don't need to propagate itself. */
3485 if (e->dest->index == cfg_bb->index)
3486 continue;
00fb7698
JZZ
3487 /* We don't propagate through critical edges. */
3488 if (e->flags & EDGE_COMPLEX)
3489 continue;
3490 if (e->dest->index == EXIT_BLOCK_PTR_FOR_FN (cfun)->index)
3491 continue;
387cd9d3
JZZ
3492
3493 /* If there is nothing to propagate, just skip it. */
3494 if (!local_dem.valid_or_dirty_p ())
3495 continue;
ec99ffab 3496 if (local_dem.available_p (prop))
4f673c5e
JZZ
3497 continue;
3498 if (!local_dem.compatible_p (prop))
3499 continue;
ec99ffab
JZZ
3500 if (propagate_avl_across_demands_p (prop, local_dem))
3501 continue;
387cd9d3 3502
4f673c5e
JZZ
3503 vector_insn_info new_info = local_dem.merge (prop, GLOBAL_MERGE);
3504 new_info.set_insn (local_dem.get_insn ());
3505 if (local_dem.dirty_p ())
387cd9d3 3506 {
4f673c5e 3507 gcc_assert (local_dem == reaching_out);
6b6b9c68 3508 new_info.set_dirty (local_dem.dirty_with_killed_avl_p ());
4f673c5e 3509 local_dem = new_info;
4f673c5e
JZZ
3510 reaching_out = local_dem;
3511 }
3512 else
3513 {
3514 if (reaching_out == local_dem)
3515 reaching_out = new_info;
3516 local_dem = new_info;
3517 change_vsetvl_insn (local_dem.get_insn (), new_info);
387cd9d3 3518 }
4f673c5e
JZZ
3519 auto &prob
3520 = m_vector_manager->vector_block_infos[e->dest->index].probability;
3521 auto &curr_prob
3522 = m_vector_manager->vector_block_infos[cfg_bb->index].probability;
3523 prob = curr_prob * e->probability;
3524 changed_p = true;
387cd9d3
JZZ
3525 }
3526 }
3527 return changed_p;
3528}
3529
3530void
3531pass_vsetvl::demand_fusion (void)
3532{
3533 bool changed_p = true;
3534 while (changed_p)
3535 {
3536 changed_p = false;
4f673c5e
JZZ
3537 /* To optimize the case like this:
3538 void f2 (int8_t * restrict in, int8_t * restrict out, int n, int cond)
3539 {
3540 size_t vl = 101;
3541
3542 for (size_t i = 0; i < n; i++)
3543 {
3544 vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
3545 __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
3546 }
3547
3548 for (size_t i = 0; i < n; i++)
3549 {
3550 vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
3551 __riscv_vse8_v_i8mf8 (out + i, v, vl);
3552
3553 vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
3554 __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
3555 }
3556 }
3557
3558 bb 0: li a5, 101 (killed avl)
3559 ...
3560 bb 1: vsetvli zero, a5, ta
3561 ...
3562 bb 2: li a5, 101 (killed avl)
3563 ...
3564 bb 3: vsetvli zero, a3, tu
3565
3566 We want to fuse VSEVLI instructions on bb 1 and bb 3. However, there is
3567 an AVL kill instruction in bb 2 that we can't backward fuse bb 3 or
3568 forward bb 1 arbitrarily. We need available information of each block to
3569 help for such cases. */
6b6b9c68
JZZ
3570 changed_p |= backward_demand_fusion ();
3571 changed_p |= forward_demand_fusion ();
3572 }
3573
3574 changed_p = true;
3575 while (changed_p)
3576 {
3577 changed_p = false;
3578 prune_expressions ();
3579 m_vector_manager->create_bitmap_vectors ();
3580 compute_local_properties ();
4f673c5e
JZZ
3581 compute_available (m_vector_manager->vector_comp,
3582 m_vector_manager->vector_kill,
3583 m_vector_manager->vector_avout,
3584 m_vector_manager->vector_avin);
6b6b9c68 3585 changed_p |= cleanup_illegal_dirty_blocks ();
4f673c5e
JZZ
3586 m_vector_manager->free_bitmap_vectors ();
3587 if (!m_vector_manager->vector_exprs.is_empty ())
3588 m_vector_manager->vector_exprs.release ();
387cd9d3 3589 }
9243c3d1
JZZ
3590
3591 if (dump_file)
3592 {
3593 fprintf (dump_file, "\n\nDirty blocks list: ");
681a5632
JZZ
3594 for (const bb_info *bb : crtl->ssa->bbs ())
3595 if (m_vector_manager->vector_block_infos[bb->index ()]
3596 .reaching_out.dirty_p ())
3597 fprintf (dump_file, "%d ", bb->index ());
9243c3d1
JZZ
3598 fprintf (dump_file, "\n\n");
3599 }
3600}
3601
6b6b9c68
JZZ
3602/* Cleanup illegal dirty blocks. */
3603bool
3604pass_vsetvl::cleanup_illegal_dirty_blocks (void)
3605{
3606 bool changed_p = false;
3607 for (const bb_info *bb : crtl->ssa->bbs ())
3608 {
3609 basic_block cfg_bb = bb->cfg_bb ();
3610 const auto &prop
3611 = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
3612
3613 /* If there is nothing to cleanup, just skip it. */
3614 if (!prop.valid_or_dirty_p ())
3615 continue;
3616
3617 if (hard_empty_block_p (bb, prop))
3618 {
3619 m_vector_manager->vector_block_infos[cfg_bb->index].local_dem
3620 = vector_insn_info::get_hard_empty ();
3621 m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out
3622 = vector_insn_info::get_hard_empty ();
3623 changed_p = true;
3624 continue;
3625 }
3626 }
3627 return changed_p;
3628}
3629
9243c3d1
JZZ
3630/* Assemble the candidates expressions for LCM. */
3631void
3632pass_vsetvl::prune_expressions (void)
3633{
681a5632 3634 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 3635 {
681a5632
JZZ
3636 if (m_vector_manager->vector_block_infos[bb->index ()]
3637 .local_dem.valid_or_dirty_p ())
9243c3d1 3638 m_vector_manager->create_expr (
681a5632
JZZ
3639 m_vector_manager->vector_block_infos[bb->index ()].local_dem);
3640 if (m_vector_manager->vector_block_infos[bb->index ()]
9243c3d1
JZZ
3641 .reaching_out.valid_or_dirty_p ())
3642 m_vector_manager->create_expr (
681a5632 3643 m_vector_manager->vector_block_infos[bb->index ()].reaching_out);
9243c3d1
JZZ
3644 }
3645
3646 if (dump_file)
3647 {
3648 fprintf (dump_file, "\nThe total VSETVL expression num = %d\n",
3649 m_vector_manager->vector_exprs.length ());
3650 fprintf (dump_file, "Expression List:\n");
3651 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3652 {
3653 fprintf (dump_file, "Expr[%ld]:\n", i);
3654 m_vector_manager->vector_exprs[i]->dump (dump_file);
3655 fprintf (dump_file, "\n");
3656 }
3657 }
3658}
3659
4f673c5e
JZZ
3660/* Compute the local properties of each recorded expression.
3661
3662 Local properties are those that are defined by the block, irrespective of
3663 other blocks.
3664
3665 An expression is transparent in a block if its operands are not modified
3666 in the block.
3667
3668 An expression is computed (locally available) in a block if it is computed
3669 at least once and expression would contain the same value if the
3670 computation was moved to the end of the block.
3671
3672 An expression is locally anticipatable in a block if it is computed at
3673 least once and expression would contain the same value if the computation
3674 was moved to the beginning of the block. */
9243c3d1
JZZ
3675void
3676pass_vsetvl::compute_local_properties (void)
3677{
3678 /* - If T is locally available at the end of a block, then T' must be
3679 available at the end of the same block. Since some optimization has
3680 occurred earlier, T' might not be locally available, however, it must
3681 have been previously computed on all paths. As a formula, T at AVLOC(B)
3682 implies that T' at AVOUT(B).
3683 An "available occurrence" is one that is the last occurrence in the
3684 basic block and the operands are not modified by following statements in
3685 the basic block [including this insn].
3686
3687 - If T is locally anticipated at the beginning of a block, then either
3688 T', is locally anticipated or it is already available from previous
3689 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
3690 ANTLOC(B) at AVIN(B).
3691 An "anticipatable occurrence" is one that is the first occurrence in the
3692 basic block, the operands are not modified in the basic block prior
3693 to the occurrence and the output is not used between the start of
3694 the block and the occurrence. */
3695
3696 basic_block cfg_bb;
4f673c5e 3697 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 3698 {
4f673c5e 3699 unsigned int curr_bb_idx = bb->index ();
9243c3d1
JZZ
3700 const auto local_dem
3701 = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem;
3702 const auto reaching_out
3703 = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out;
3704
4f673c5e
JZZ
3705 /* Compute transparent. */
3706 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
9243c3d1 3707 {
4f673c5e
JZZ
3708 const vector_insn_info *expr = m_vector_manager->vector_exprs[i];
3709 if (local_dem.real_dirty_p () || local_dem.valid_p ()
3710 || local_dem.unknown_p ()
3711 || has_vsetvl_killed_avl_p (bb, local_dem))
9243c3d1 3712 bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i);
4f673c5e
JZZ
3713 /* FIXME: Here we set the block as non-transparent (killed) if there
3714 is an instruction killed the value of AVL according to the
3715 definition of Local transparent. This is true for such following
3716 case:
3717
3718 bb 0 (Loop label):
3719 vsetvl zero, a5, e8, mf8
3720 bb 1:
3721 def a5
3722 bb 2:
3723 branch bb 0 (Loop label).
3724
3725 In this case, we known there is a loop bb 0->bb 1->bb 2. According
3726 to LCM definition, it is correct when we set vsetvl zero, a5, e8,
3727 mf8 as non-transparent (killed) so that LCM will not hoist outside
3728 the bb 0.
3729
3730 However, such conservative configuration will forbid optimization
3731 on some unlucky case. For example:
3732
3733 bb 0:
3734 li a5, 101
3735 bb 1:
3736 vsetvl zero, a5, e8, mf8
3737 bb 2:
3738 li a5, 101
3739 bb 3:
3740 vsetvl zero, a5, e8, mf8.
3741 So we also relax def a5 as transparent to gain more optimizations
3742 as long as the all real def insn of avl do not come from this
3743 block. This configuration may be still missing some optimization
3744 opportunities. */
6b6b9c68 3745 if (find_reg_killed_by (bb, expr->get_avl ()))
4f673c5e 3746 {
6b6b9c68
JZZ
3747 hash_set<set_info *> sets
3748 = get_all_sets (expr->get_avl_source (), true, false, false);
3749 if (any_set_in_bb_p (sets, bb))
4f673c5e
JZZ
3750 bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx],
3751 i);
3752 }
9243c3d1
JZZ
3753 }
3754
4f673c5e 3755 /* Compute anticipatable occurrences. */
6b6b9c68
JZZ
3756 if (local_dem.valid_p () || local_dem.real_dirty_p ()
3757 || (has_vsetvl_killed_avl_p (bb, local_dem)
3758 && vlmax_avl_p (local_dem.get_avl ())))
4f673c5e
JZZ
3759 if (anticipatable_occurrence_p (bb, local_dem))
3760 bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx],
3761 m_vector_manager->get_expr_id (local_dem));
9243c3d1 3762
4f673c5e 3763 /* Compute available occurrences. */
9243c3d1
JZZ
3764 if (reaching_out.valid_or_dirty_p ())
3765 {
9243c3d1
JZZ
3766 auto_vec<size_t> available_list
3767 = m_vector_manager->get_all_available_exprs (reaching_out);
3768 for (size_t i = 0; i < available_list.length (); i++)
4f673c5e
JZZ
3769 {
3770 const vector_insn_info *expr
3771 = m_vector_manager->vector_exprs[available_list[i]];
3772 if (reaching_out.real_dirty_p ()
3773 || has_vsetvl_killed_avl_p (bb, reaching_out)
3774 || available_occurrence_p (bb, *expr))
3775 bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx],
3776 available_list[i]);
3777 }
9243c3d1
JZZ
3778 }
3779 }
3780
3781 /* Compute kill for each basic block using:
3782
3783 ~(TRANSP | COMP)
3784 */
3785
3786 FOR_EACH_BB_FN (cfg_bb, cfun)
3787 {
3788 bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index],
3789 m_vector_manager->vector_transp[cfg_bb->index],
3790 m_vector_manager->vector_comp[cfg_bb->index]);
3791 bitmap_not (m_vector_manager->vector_kill[cfg_bb->index],
3792 m_vector_manager->vector_kill[cfg_bb->index]);
3793 }
3794
3795 FOR_EACH_BB_FN (cfg_bb, cfun)
3796 {
3797 edge e;
3798 edge_iterator ei;
3799
3800 /* If the current block is the destination of an abnormal edge, we
3801 kill all trapping (for PRE) and memory (for hoist) expressions
3802 because we won't be able to properly place the instruction on
3803 the edge. So make them neither anticipatable nor transparent.
3804 This is fairly conservative.
3805
3806 ??? For hoisting it may be necessary to check for set-and-jump
3807 instructions here, not just for abnormal edges. The general problem
3808 is that when an expression cannot not be placed right at the end of
3809 a basic block we should account for any side-effects of a subsequent
3810 jump instructions that could clobber the expression. It would
3811 be best to implement this check along the lines of
3812 should_hoist_expr_to_dom where the target block is already known
3813 and, hence, there's no need to conservatively prune expressions on
3814 "intermediate" set-and-jump instructions. */
3815 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3816 if (e->flags & EDGE_COMPLEX)
3817 {
3818 bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]);
3819 bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]);
3820 }
3821 }
3822}
3823
3824/* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */
3825bool
6b6b9c68
JZZ
3826pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb,
3827 const vector_insn_info &info) const
9243c3d1
JZZ
3828{
3829 if (!m_vector_manager->all_same_ratio_p (
3830 m_vector_manager->vector_avin[cfg_bb->index]))
3831 return false;
3832
005fad9d
JZZ
3833 if (!m_vector_manager->all_same_avl_p (
3834 cfg_bb, m_vector_manager->vector_avin[cfg_bb->index]))
3835 return false;
3836
9243c3d1
JZZ
3837 size_t expr_id
3838 = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]);
6b6b9c68
JZZ
3839 if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info))
3840 return false;
3841 if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info))
9243c3d1
JZZ
3842 return false;
3843
3844 edge e;
3845 edge_iterator ei;
3846 bool all_valid_p = true;
3847 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3848 {
3849 if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index]))
3850 {
3851 all_valid_p = false;
3852 break;
3853 }
3854 }
3855
3856 if (!all_valid_p)
3857 return false;
3858 return true;
3859}
3860
3861/* Optimize athe case like this:
3862
3863 bb 0:
3864 vsetvl 0 a5,zero,e8,mf8
3865 insn 0 (demand SEW + LMUL)
3866 bb 1:
3867 vsetvl 1 a5,zero,e16,mf4
3868 insn 1 (demand SEW + LMUL)
3869
3870 In this case, we should be able to refine
3871 vsetvl 1 into vsetvl zero, zero according AVIN. */
3872void
3873pass_vsetvl::refine_vsetvls (void) const
3874{
3875 basic_block cfg_bb;
3876 FOR_EACH_BB_FN (cfg_bb, cfun)
3877 {
c139f5e1 3878 auto info = get_block_info(cfg_bb).local_dem;
9243c3d1
JZZ
3879 insn_info *insn = info.get_insn ();
3880 if (!info.valid_p ())
3881 continue;
3882
3883 rtx_insn *rinsn = insn->rtl ();
6b6b9c68 3884 if (!can_refine_vsetvl_p (cfg_bb, info))
9243c3d1
JZZ
3885 continue;
3886
ec99ffab
JZZ
3887 /* We can't refine user vsetvl into vsetvl zero,zero since the dest
3888 will be used by the following instructions. */
3889 if (vector_config_insn_p (rinsn))
3890 {
3891 m_vector_manager->to_refine_vsetvls.add (rinsn);
3892 continue;
3893 }
ff8f9544
JZ
3894
3895 /* If all incoming edges to a block have a vector state that is compatbile
3896 with the block. In such a case we need not emit a vsetvl in the current
3897 block. */
3898
3899 gcc_assert (has_vtype_op (insn->rtl ()));
3900 rinsn = PREV_INSN (insn->rtl ());
3901 gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
3902 if (m_vector_manager->all_avail_in_compatible_p (cfg_bb))
3903 {
3904 size_t id = m_vector_manager->get_expr_id (info);
3905 if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id))
3906 continue;
3907 eliminate_insn (rinsn);
3908 }
3909 else
3910 {
3911 rtx new_pat
3912 = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
3913 change_insn (rinsn, new_pat);
3914 }
9243c3d1
JZZ
3915 }
3916}
3917
3918void
3919pass_vsetvl::cleanup_vsetvls ()
3920{
3921 basic_block cfg_bb;
3922 FOR_EACH_BB_FN (cfg_bb, cfun)
3923 {
3924 auto &info
c139f5e1 3925 = get_block_info(cfg_bb).reaching_out;
9243c3d1
JZZ
3926 gcc_assert (m_vector_manager->expr_set_num (
3927 m_vector_manager->vector_del[cfg_bb->index])
3928 <= 1);
3929 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3930 {
3931 if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i))
3932 {
3933 if (info.dirty_p ())
3934 info.set_unknown ();
3935 else
3936 {
4f673c5e 3937 const auto dem
c139f5e1 3938 = get_block_info(cfg_bb)
4f673c5e
JZZ
3939 .local_dem;
3940 gcc_assert (dem == *m_vector_manager->vector_exprs[i]);
3941 insn_info *insn = dem.get_insn ();
9243c3d1
JZZ
3942 gcc_assert (insn && insn->rtl ());
3943 rtx_insn *rinsn;
ec99ffab
JZZ
3944 /* We can't eliminate user vsetvl since the dest will be used
3945 * by the following instructions. */
9243c3d1 3946 if (vector_config_insn_p (insn->rtl ()))
9243c3d1 3947 {
ec99ffab
JZZ
3948 m_vector_manager->to_delete_vsetvls.add (insn->rtl ());
3949 continue;
9243c3d1 3950 }
ec99ffab
JZZ
3951
3952 gcc_assert (has_vtype_op (insn->rtl ()));
3953 rinsn = PREV_INSN (insn->rtl ());
3954 gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
9243c3d1
JZZ
3955 eliminate_insn (rinsn);
3956 }
3957 }
3958 }
3959 }
3960}
3961
3962bool
3963pass_vsetvl::commit_vsetvls (void)
3964{
3965 bool need_commit = false;
3966
3967 for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++)
3968 {
3969 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3970 {
3971 edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
3972 if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i))
3973 {
3974 const vector_insn_info *require
3975 = m_vector_manager->vector_exprs[i];
3976 gcc_assert (require->valid_or_dirty_p ());
3977 rtl_profile_for_edge (eg);
3978 start_sequence ();
3979
3980 insn_info *insn = require->get_insn ();
3981 vector_insn_info prev_info = vector_insn_info ();
005fad9d
JZZ
3982 sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index];
3983 if (m_vector_manager->all_same_ratio_p (bitdata)
3984 && m_vector_manager->all_same_avl_p (eg->dest, bitdata))
9243c3d1 3985 {
005fad9d 3986 size_t first = bitmap_first_set_bit (bitdata);
9243c3d1
JZZ
3987 prev_info = *m_vector_manager->vector_exprs[first];
3988 }
3989
3990 insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info);
3991 rtx_insn *rinsn = get_insns ();
3992 end_sequence ();
3993 default_rtl_profile ();
3994
3995 /* We should not get an abnormal edge here. */
3996 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3997 need_commit = true;
3998 insert_insn_on_edge (rinsn, eg);
3999 }
4000 }
4001 }
4002
4f673c5e 4003 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 4004 {
4f673c5e 4005 basic_block cfg_bb = bb->cfg_bb ();
9243c3d1 4006 const auto reaching_out
c139f5e1 4007 = get_block_info(cfg_bb).reaching_out;
9243c3d1
JZZ
4008 if (!reaching_out.dirty_p ())
4009 continue;
4010
4f673c5e
JZZ
4011 if (reaching_out.dirty_with_killed_avl_p ())
4012 {
4013 if (!has_vsetvl_killed_avl_p (bb, reaching_out))
4014 continue;
4015
4016 unsigned int bb_index;
4017 sbitmap_iterator sbi;
4018 sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
4019 bool available_p = false;
4020 EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
4021 {
ec99ffab
JZZ
4022 if (m_vector_manager->vector_exprs[bb_index]->available_p (
4023 reaching_out))
4f673c5e
JZZ
4024 {
4025 available_p = true;
4026 break;
4027 }
4028 }
4029 if (available_p)
4030 continue;
4031 }
7ae4d1df
JZZ
4032
4033 rtx new_pat;
ec99ffab
JZZ
4034 if (!reaching_out.demand_p (DEMAND_AVL))
4035 {
4036 vl_vtype_info new_info = reaching_out;
4037 new_info.set_avl_info (avl_info (const0_rtx, nullptr));
4038 new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
4039 }
4040 else if (can_refine_vsetvl_p (cfg_bb, reaching_out))
9243c3d1
JZZ
4041 new_pat
4042 = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX);
7ae4d1df
JZZ
4043 else if (vlmax_avl_p (reaching_out.get_avl ()))
4044 new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out,
ec99ffab 4045 reaching_out.get_avl_reg_rtx ());
7ae4d1df
JZZ
4046 else
4047 new_pat
4048 = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX);
9243c3d1
JZZ
4049
4050 start_sequence ();
4051 emit_insn (new_pat);
4052 rtx_insn *rinsn = get_insns ();
4053 end_sequence ();
4054 insert_insn_end_basic_block (rinsn, cfg_bb);
4055 if (dump_file)
4056 {
4057 fprintf (dump_file,
4058 "\nInsert vsetvl insn %d at the end of <bb %d>:\n",
4059 INSN_UID (rinsn), cfg_bb->index);
4060 print_rtl_single (dump_file, rinsn);
4061 }
4062 }
4063
4064 return need_commit;
4065}
4066
4067void
4068pass_vsetvl::pre_vsetvl (void)
4069{
4070 /* Compute entity list. */
4071 prune_expressions ();
4072
cfe3fbc6 4073 m_vector_manager->create_bitmap_vectors ();
9243c3d1
JZZ
4074 compute_local_properties ();
4075 m_vector_manager->vector_edge_list = pre_edge_lcm_avs (
4076 m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp,
4077 m_vector_manager->vector_comp, m_vector_manager->vector_antic,
4078 m_vector_manager->vector_kill, m_vector_manager->vector_avin,
4079 m_vector_manager->vector_avout, &m_vector_manager->vector_insert,
4080 &m_vector_manager->vector_del);
4081
4082 /* We should dump the information before CFG is changed. Otherwise it will
4083 produce ICE (internal compiler error). */
4084 if (dump_file)
4085 m_vector_manager->dump (dump_file);
4086
4087 refine_vsetvls ();
4088 cleanup_vsetvls ();
4089 bool need_commit = commit_vsetvls ();
4090 if (need_commit)
4091 commit_edge_insertions ();
4092}
4093
c919d059
KC
4094/* Some instruction can not be accessed in RTL_SSA when we don't re-init
4095 the new RTL_SSA framework but it is definetely at the END of the block.
4096
4097 Here we optimize the VSETVL is hoisted by LCM:
4098
4099 Before LCM:
4100 bb 1:
4101 vsetvli a5,a2,e32,m1,ta,mu
4102 bb 2:
4103 vsetvli zero,a5,e32,m1,ta,mu
4104 ...
4105
4106 After LCM:
4107 bb 1:
4108 vsetvli a5,a2,e32,m1,ta,mu
4109 LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate
4110 bb 2:
4111 ...
4112 */
4113rtx_insn *
4114pass_vsetvl::get_vsetvl_at_end (const bb_info *bb, vector_insn_info *dem) const
4115{
4116 rtx_insn *end_vsetvl = BB_END (bb->cfg_bb ());
4117 if (end_vsetvl && NONDEBUG_INSN_P (end_vsetvl))
4118 {
4119 if (JUMP_P (end_vsetvl))
4120 end_vsetvl = PREV_INSN (end_vsetvl);
4121
4122 if (NONDEBUG_INSN_P (end_vsetvl)
4123 && vsetvl_discard_result_insn_p (end_vsetvl))
4124 {
4125 /* Only handle single succ. here, multiple succ. is much
4126 more complicated. */
4127 if (single_succ_p (bb->cfg_bb ()))
4128 {
4129 edge e = single_succ_edge (bb->cfg_bb ());
4130 *dem = get_block_info (e->dest).local_dem;
4131 return end_vsetvl;
4132 }
4133 }
4134 }
4135 return nullptr;
4136}
4137
4138/* This predicator should only used within same basic block. */
4139static bool
4140local_avl_compatible_p (rtx avl1, rtx avl2)
4141{
4142 if (!REG_P (avl1) || !REG_P (avl2))
4143 return false;
4144
4145 return REGNO (avl1) == REGNO (avl2);
4146}
4147
8421f279
JZ
4148/* Local user vsetvl optimizaiton:
4149
4150 Case 1:
4151 vsetvl a5,a4,e8,mf8
4152 ...
4153 vsetvl zero,a5,e8,mf8 --> Eliminate directly.
4154
4155 Case 2:
4156 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
4157 ...
4158 vsetvl zero,a5,e32,mf2 --> Eliminate directly. */
4159void
c919d059
KC
4160pass_vsetvl::local_eliminate_vsetvl_insn (const bb_info *bb) const
4161{
4162 rtx_insn *prev_vsetvl = nullptr;
4163 rtx_insn *curr_vsetvl = nullptr;
4164 rtx vl_placeholder = RVV_VLMAX;
4165 rtx prev_avl = vl_placeholder;
4166 rtx curr_avl = vl_placeholder;
4167 vector_insn_info prev_dem;
4168
4169 /* Instruction inserted by LCM is not appeared in RTL-SSA yet, try to
4170 found those instruciton. */
4171 if (rtx_insn *end_vsetvl = get_vsetvl_at_end (bb, &prev_dem))
8421f279 4172 {
c919d059
KC
4173 prev_avl = get_avl (end_vsetvl);
4174 prev_vsetvl = end_vsetvl;
4175 }
4176
4177 bool skip_one = false;
4178 /* Backward propgate vsetvl info, drop the later one (prev_vsetvl) if it's
4179 compatible with current vsetvl (curr_avl), and merge the vtype and avl
4180 info. into current vsetvl. */
4181 for (insn_info *insn : bb->reverse_real_nondebug_insns ())
4182 {
4183 rtx_insn *rinsn = insn->rtl ();
4184 const auto &curr_dem = get_vector_info (insn);
4185 bool need_invalidate = false;
4186
4187 /* Skip if this insn already handled in last iteration. */
4188 if (skip_one)
4189 {
4190 skip_one = false;
4191 continue;
4192 }
4193
4194 if (vsetvl_insn_p (rinsn))
4195 {
4196 curr_vsetvl = rinsn;
4197 /* vsetvl are using vl rather than avl since it will try to merge
4198 with other vsetvl_discard_result.
4199
4200 v--- avl
4201 vsetvl a5,a4,e8,mf8 # vsetvl
4202 ... ^--- vl
4203 vsetvl zero,a5,e8,mf8 # vsetvl_discard_result
4204 ^--- avl
4205 */
4206 curr_avl = get_vl (rinsn);
4207 /* vsetvl is a cut point of local backward vsetvl elimination. */
4208 need_invalidate = true;
4209 }
4210 else if (has_vtype_op (rinsn) && NONDEBUG_INSN_P (PREV_INSN (rinsn))
4211 && (vsetvl_discard_result_insn_p (PREV_INSN (rinsn))
4212 || vsetvl_insn_p (PREV_INSN (rinsn))))
8421f279 4213 {
c919d059 4214 curr_vsetvl = PREV_INSN (rinsn);
8421f279 4215
c919d059 4216 if (vsetvl_insn_p (PREV_INSN (rinsn)))
8421f279 4217 {
c919d059
KC
4218 /* Need invalidate and skip if it's vsetvl. */
4219 need_invalidate = true;
4220 /* vsetvl_discard_result_insn_p won't appeared in RTL-SSA,
4221 * so only need to skip for vsetvl. */
4222 skip_one = true;
8421f279 4223 }
c919d059
KC
4224
4225 curr_avl = get_avl (rinsn);
4226
4227 /* Some instrucion like pred_extract_first<mode> don't reqruie avl, so
4228 the avl is null, use vl_placeholder for unify the handling
4229 logic. */
4230 if (!curr_avl)
4231 curr_avl = vl_placeholder;
4232 }
4233 else if (insn->is_call () || insn->is_asm ()
4234 || find_access (insn->defs (), VL_REGNUM)
4235 || find_access (insn->defs (), VTYPE_REGNUM)
4236 || (REG_P (prev_avl)
4237 && find_access (insn->defs (), REGNO (prev_avl))))
4238 {
4239 /* Invalidate if this insn can't propagate vl, vtype or avl. */
4240 need_invalidate = true;
4241 prev_dem = vector_insn_info ();
4242 }
4243 else
4244 /* Not interested instruction. */
4245 continue;
4246
4247 /* Local AVL compatibility checking is simpler than global, we only
4248 need to check the REGNO is same. */
4249 if (prev_dem.valid_p () && prev_dem.skip_avl_compatible_p (curr_dem)
4250 && local_avl_compatible_p (prev_avl, curr_avl))
4251 {
4252 /* curr_dem and prev_dem is compatible! */
4253 /* Update avl info since we need to make sure they are fully
4254 compatible before merge. */
4255 prev_dem.set_avl_info (curr_dem.get_avl_info ());
4256 /* Merge both and update into curr_vsetvl. */
4257 prev_dem = curr_dem.merge (prev_dem, LOCAL_MERGE);
4258 change_vsetvl_insn (curr_dem.get_insn (), prev_dem);
4259 /* Then we can drop prev_vsetvl. */
4260 eliminate_insn (prev_vsetvl);
4261 }
4262
4263 if (need_invalidate)
4264 {
4265 prev_vsetvl = nullptr;
4266 curr_vsetvl = nullptr;
4267 prev_avl = vl_placeholder;
4268 curr_avl = vl_placeholder;
4269 prev_dem = vector_insn_info ();
4270 }
4271 else
4272 {
4273 prev_vsetvl = curr_vsetvl;
4274 prev_avl = curr_avl;
4275 prev_dem = curr_dem;
8421f279
JZ
4276 }
4277 }
4278}
4279
c5a1fa59
JZ
4280/* Before VSETVL PASS, RVV instructions pattern is depending on AVL operand
4281 implicitly. Since we will emit VSETVL instruction and make RVV instructions
4282 depending on VL/VTYPE global status registers, we remove the such AVL operand
4283 in the RVV instructions pattern here in order to remove AVL dependencies when
4284 AVL operand is a register operand.
4285
4286 Before the VSETVL PASS:
4287 li a5,32
4288 ...
4289 vadd.vv (..., a5)
4290 After the VSETVL PASS:
4291 li a5,32
4292 vsetvli zero, a5, ...
4293 ...
4294 vadd.vv (..., const_int 0). */
9243c3d1
JZZ
4295void
4296pass_vsetvl::cleanup_insns (void) const
4297{
4298 for (const bb_info *bb : crtl->ssa->bbs ())
4299 {
c919d059 4300 local_eliminate_vsetvl_insn (bb);
9243c3d1
JZZ
4301 for (insn_info *insn : bb->real_nondebug_insns ())
4302 {
4303 rtx_insn *rinsn = insn->rtl ();
9243c3d1
JZZ
4304 if (vlmax_avl_insn_p (rinsn))
4305 {
4306 eliminate_insn (rinsn);
4307 continue;
4308 }
4309
4310 /* Erase the AVL operand from the instruction. */
4311 if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn)))
4312 continue;
4313 rtx avl = get_vl (rinsn);
a2d12abe 4314 if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
9243c3d1
JZZ
4315 {
4316 /* Get the list of uses for the new instruction. */
4317 auto attempt = crtl->ssa->new_change_attempt ();
4318 insn_change change (insn);
4319 /* Remove the use of the substituted value. */
4320 access_array_builder uses_builder (attempt);
4321 uses_builder.reserve (insn->num_uses () - 1);
4322 for (use_info *use : insn->uses ())
4323 if (use != find_access (insn->uses (), REGNO (avl)))
4324 uses_builder.quick_push (use);
4325 use_array new_uses = use_array (uses_builder.finish ());
4326 change.new_uses = new_uses;
4327 change.move_range = insn->ebb ()->insn_range ();
60bd33bc
JZZ
4328 rtx pat;
4329 if (fault_first_load_p (rinsn))
4330 pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
4331 else
4332 {
4333 rtx set = single_set (rinsn);
4334 rtx src
4335 = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
4336 pat = gen_rtx_SET (SET_DEST (set), src);
4337 }
1d339ce8
KC
4338 bool ok = change_insn (crtl->ssa, change, insn, pat);
4339 gcc_assert (ok);
9243c3d1
JZZ
4340 }
4341 }
4342 }
4343}
4344
6b6b9c68
JZZ
4345void
4346pass_vsetvl::propagate_avl (void) const
4347{
4348 /* Rebuild the RTL_SSA according to the new CFG generated by LCM. */
4349 /* Finalization of RTL_SSA. */
4350 free_dominance_info (CDI_DOMINATORS);
4351 if (crtl->ssa->perform_pending_updates ())
4352 cleanup_cfg (0);
4353 delete crtl->ssa;
4354 crtl->ssa = nullptr;
4355 /* Initialization of RTL_SSA. */
4356 calculate_dominance_info (CDI_DOMINATORS);
4357 df_analyze ();
4358 crtl->ssa = new function_info (cfun);
4359
4360 hash_set<rtx_insn *> to_delete;
4361 for (const bb_info *bb : crtl->ssa->bbs ())
4362 {
4363 for (insn_info *insn : bb->real_nondebug_insns ())
4364 {
4365 if (vsetvl_discard_result_insn_p (insn->rtl ()))
4366 {
4367 rtx avl = get_avl (insn->rtl ());
4368 if (!REG_P (avl))
4369 continue;
4370
4371 set_info *set = find_access (insn->uses (), REGNO (avl))->def ();
4372 insn_info *def_insn = extract_single_source (set);
4373 if (!def_insn)
4374 continue;
4375
4376 /* Handle this case:
4377 vsetvli a6,zero,e32,m1,ta,mu
4378 li a5,4096
4379 add a7,a0,a5
4380 addi a7,a7,-96
4381 vsetvli t1,zero,e8,mf8,ta,ma
4382 vle8.v v24,0(a7)
4383 add a5,a3,a5
4384 addi a5,a5,-96
4385 vse8.v v24,0(a5)
4386 vsetvli zero,a6,e32,m1,tu,ma
4387 */
4388 if (vsetvl_insn_p (def_insn->rtl ()))
4389 {
4390 vl_vtype_info def_info = get_vl_vtype_info (def_insn);
4391 vl_vtype_info info = get_vl_vtype_info (insn);
4392 rtx avl = get_avl (def_insn->rtl ());
4393 rtx vl = get_vl (def_insn->rtl ());
4394 if (def_info.get_ratio () == info.get_ratio ())
4395 {
4396 if (vlmax_avl_p (def_info.get_avl ()))
4397 {
4398 info.set_avl_info (
4399 avl_info (def_info.get_avl (), nullptr));
4400 rtx new_pat
4401 = gen_vsetvl_pat (VSETVL_NORMAL, info, vl);
4402 validate_change (insn->rtl (),
4403 &PATTERN (insn->rtl ()), new_pat,
4404 false);
4405 continue;
4406 }
4407 if (def_info.has_avl_imm () || rtx_equal_p (avl, vl))
4408 {
4409 info.set_avl_info (avl_info (avl, nullptr));
4410 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_AFTER,
4411 info, NULL_RTX, insn->rtl ());
4412 if (set->single_nondebug_insn_use ())
4413 {
4414 to_delete.add (insn->rtl ());
4415 to_delete.add (def_insn->rtl ());
4416 }
4417 continue;
4418 }
4419 }
4420 }
4421 }
4422
4423 /* Change vsetvl rd, rs1 --> vsevl zero, rs1,
4424 if rd is not used by any nondebug instructions.
4425 Even though this PASS runs after RA and it doesn't help for
4426 reduce register pressure, it can help instructions scheduling
4427 since we remove the dependencies. */
4428 if (vsetvl_insn_p (insn->rtl ()))
4429 {
4430 rtx vl = get_vl (insn->rtl ());
4431 rtx avl = get_avl (insn->rtl ());
6b6b9c68
JZZ
4432 def_info *def = find_access (insn->defs (), REGNO (vl));
4433 set_info *set = safe_dyn_cast<set_info *> (def);
ec99ffab
JZZ
4434 vector_insn_info info;
4435 info.parse_insn (insn);
6b6b9c68 4436 gcc_assert (set);
ec99ffab
JZZ
4437 if (m_vector_manager->to_delete_vsetvls.contains (insn->rtl ()))
4438 {
4439 m_vector_manager->to_delete_vsetvls.remove (insn->rtl ());
4440 if (m_vector_manager->to_refine_vsetvls.contains (
4441 insn->rtl ()))
4442 m_vector_manager->to_refine_vsetvls.remove (insn->rtl ());
4443 if (!set->has_nondebug_insn_uses ())
4444 {
4445 to_delete.add (insn->rtl ());
4446 continue;
4447 }
4448 }
4449 if (m_vector_manager->to_refine_vsetvls.contains (insn->rtl ()))
4450 {
4451 m_vector_manager->to_refine_vsetvls.remove (insn->rtl ());
4452 if (!set->has_nondebug_insn_uses ())
4453 {
4454 rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY,
4455 info, NULL_RTX);
4456 change_insn (insn->rtl (), new_pat);
4457 continue;
4458 }
4459 }
4460 if (vlmax_avl_p (avl))
4461 continue;
6b6b9c68
JZZ
4462 rtx new_pat
4463 = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX);
4464 if (!set->has_nondebug_insn_uses ())
4465 {
4466 validate_change (insn->rtl (), &PATTERN (insn->rtl ()),
4467 new_pat, false);
4468 continue;
4469 }
4470 }
4471 }
4472 }
4473
4474 for (rtx_insn *rinsn : to_delete)
4475 eliminate_insn (rinsn);
4476}
4477
9243c3d1
JZZ
4478void
4479pass_vsetvl::init (void)
4480{
4481 if (optimize > 0)
4482 {
4483 /* Initialization of RTL_SSA. */
4484 calculate_dominance_info (CDI_DOMINATORS);
4485 df_analyze ();
4486 crtl->ssa = new function_info (cfun);
4487 }
4488
4489 m_vector_manager = new vector_infos_manager ();
4f673c5e 4490 compute_probabilities ();
9243c3d1
JZZ
4491
4492 if (dump_file)
4493 {
4494 fprintf (dump_file, "\nPrologue: Initialize vector infos\n");
4495 m_vector_manager->dump (dump_file);
4496 }
4497}
4498
4499void
4500pass_vsetvl::done (void)
4501{
4502 if (optimize > 0)
4503 {
4504 /* Finalization of RTL_SSA. */
4505 free_dominance_info (CDI_DOMINATORS);
4506 if (crtl->ssa->perform_pending_updates ())
4507 cleanup_cfg (0);
4508 delete crtl->ssa;
4509 crtl->ssa = nullptr;
4510 }
4511 m_vector_manager->release ();
4512 delete m_vector_manager;
4513 m_vector_manager = nullptr;
4514}
4515
acc10c79
JZZ
4516/* Compute probability for each block. */
4517void
4518pass_vsetvl::compute_probabilities (void)
4519{
4520 /* Don't compute it in -O0 since we don't need it. */
4521 if (!optimize)
4522 return;
4523 edge e;
4524 edge_iterator ei;
4525
4526 for (const bb_info *bb : crtl->ssa->bbs ())
4527 {
4528 basic_block cfg_bb = bb->cfg_bb ();
4529 auto &curr_prob
c139f5e1 4530 = get_block_info(cfg_bb).probability;
c129d22d
JZZ
4531
4532 /* GCC assume entry block (bb 0) are always so
4533 executed so set its probability as "always". */
acc10c79
JZZ
4534 if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
4535 curr_prob = profile_probability::always ();
c129d22d
JZZ
4536 /* Exit block (bb 1) is the block we don't need to process. */
4537 if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
4538 continue;
4539
acc10c79
JZZ
4540 gcc_assert (curr_prob.initialized_p ());
4541 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
4542 {
4543 auto &new_prob
c139f5e1 4544 = get_block_info(e->dest).probability;
acc10c79
JZZ
4545 if (!new_prob.initialized_p ())
4546 new_prob = curr_prob * e->probability;
4547 else if (new_prob == profile_probability::always ())
4548 continue;
4549 else
4550 new_prob += curr_prob * e->probability;
4551 }
4552 }
acc10c79
JZZ
4553}
4554
9243c3d1
JZZ
4555/* Lazy vsetvl insertion for optimize > 0. */
4556void
4557pass_vsetvl::lazy_vsetvl (void)
4558{
4559 if (dump_file)
4560 fprintf (dump_file,
4561 "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for "
4562 "function:%s\n",
4563 n_basic_blocks_for_fn (cfun), function_name (cfun));
4564
4565 /* Phase 1 - Compute the local dems within each block.
4566 The data-flow analysis within each block is backward analysis. */
4567 if (dump_file)
4568 fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n");
4569 for (const bb_info *bb : crtl->ssa->bbs ())
4570 compute_local_backward_infos (bb);
4571 if (dump_file)
4572 m_vector_manager->dump (dump_file);
4573
4574 /* Phase 2 - Emit vsetvl instructions within each basic block according to
4575 demand, compute and save ANTLOC && AVLOC of each block. */
4576 if (dump_file)
4577 fprintf (dump_file,
4578 "\nPhase 2: Emit vsetvl instruction within each block\n");
4579 for (const bb_info *bb : crtl->ssa->bbs ())
4580 emit_local_forward_vsetvls (bb);
4581 if (dump_file)
4582 m_vector_manager->dump (dump_file);
4583
4584 /* Phase 3 - Propagate demanded info across blocks. */
4585 if (dump_file)
4586 fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n");
387cd9d3 4587 demand_fusion ();
9243c3d1
JZZ
4588 if (dump_file)
4589 m_vector_manager->dump (dump_file);
4590
4591 /* Phase 4 - Lazy code motion. */
4592 if (dump_file)
4593 fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n");
4594 pre_vsetvl ();
4595
4596 /* Phase 5 - Cleanup AVL && VL operand of RVV instruction. */
4597 if (dump_file)
4598 fprintf (dump_file, "\nPhase 5: Cleanup AVL and VL operands\n");
4599 cleanup_insns ();
6b6b9c68
JZZ
4600
4601 /* Phase 6 - Rebuild RTL_SSA to propagate AVL between vsetvls. */
4602 if (dump_file)
4603 fprintf (dump_file,
4604 "\nPhase 6: Rebuild RTL_SSA to propagate AVL between vsetvls\n");
4605 propagate_avl ();
9243c3d1
JZZ
4606}
4607
4608/* Main entry point for this pass. */
4609unsigned int
4610pass_vsetvl::execute (function *)
4611{
4612 if (n_basic_blocks_for_fn (cfun) <= 0)
4613 return 0;
4614
ca8fb009
JZZ
4615 /* The RVV instruction may change after split which is not a stable
4616 instruction. We need to split it here to avoid potential issue
4617 since the VSETVL PASS is insert before split PASS. */
4618 split_all_insns ();
9243c3d1
JZZ
4619
4620 /* Early return for there is no vector instructions. */
4621 if (!has_vector_insn (cfun))
4622 return 0;
4623
4624 init ();
4625
4626 if (!optimize)
4627 simple_vsetvl ();
4628 else
4629 lazy_vsetvl ();
4630
4631 done ();
4632 return 0;
4633}
4634
4635rtl_opt_pass *
4636make_pass_vsetvl (gcc::context *ctxt)
4637{
4638 return new pass_vsetvl (ctxt);
4639}