]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/riscv/riscv-vsetvl.cc
RISC-V: Add tuple type vget/vset intrinsics
[thirdparty/gcc.git] / gcc / config / riscv / riscv-vsetvl.cc
CommitLineData
9243c3d1 1/* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
c841bde5 2 Copyright (C) 2022-2023 Free Software Foundation, Inc.
9243c3d1
JZZ
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or(at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* This pass is to Set VL/VTYPE global status for RVV instructions
22 that depend on VL and VTYPE registers by Lazy code motion (LCM).
23
24 Strategy:
25
26 - Backward demanded info fusion within block.
27
28 - Lazy code motion (LCM) based demanded info backward propagation.
29
30 - RTL_SSA framework for def-use, PHI analysis.
31
32 - Lazy code motion (LCM) for global VL/VTYPE optimization.
33
34 Assumption:
35
36 - Each avl operand is either an immediate (must be in range 0 ~ 31) or reg.
37
38 This pass consists of 5 phases:
39
40 - Phase 1 - compute VL/VTYPE demanded information within each block
41 by backward data-flow analysis.
42
43 - Phase 2 - Emit vsetvl instructions within each basic block according to
44 demand, compute and save ANTLOC && AVLOC of each block.
45
387cd9d3
JZZ
46 - Phase 3 - Backward && forward demanded info propagation and fusion across
47 blocks.
9243c3d1
JZZ
48
49 - Phase 4 - Lazy code motion including: compute local properties,
50 pre_edge_lcm and vsetvl insertion && delete edges for LCM results.
51
52 - Phase 5 - Cleanup AVL operand of RVV instruction since it will not be
53 used any more and VL operand of VSETVL instruction if it is not used by
54 any non-debug instructions.
55
6b6b9c68
JZZ
56 - Phase 6 - Propagate AVL between vsetvl instructions.
57
9243c3d1
JZZ
58 Implementation:
59
60 - The subroutine of optimize == 0 is simple_vsetvl.
61 This function simplily vsetvl insertion for each RVV
62 instruction. No optimization.
63
64 - The subroutine of optimize > 0 is lazy_vsetvl.
65 This function optimize vsetvl insertion process by
ec99ffab
JZZ
66 lazy code motion (LCM) layering on RTL_SSA.
67
68 - get_avl (), get_insn (), get_avl_source ():
69
70 1. get_insn () is the current instruction, find_access (get_insn
71 ())->def is the same as get_avl_source () if get_insn () demand VL.
72 2. If get_avl () is non-VLMAX REG, get_avl () == get_avl_source
73 ()->regno ().
74 3. get_avl_source ()->regno () is the REGNO that we backward propagate.
75 */
9243c3d1
JZZ
76
77#define IN_TARGET_CODE 1
78#define INCLUDE_ALGORITHM
79#define INCLUDE_FUNCTIONAL
80
81#include "config.h"
82#include "system.h"
83#include "coretypes.h"
84#include "tm.h"
85#include "backend.h"
86#include "rtl.h"
87#include "target.h"
88#include "tree-pass.h"
89#include "df.h"
90#include "rtl-ssa.h"
91#include "cfgcleanup.h"
92#include "insn-config.h"
93#include "insn-attr.h"
94#include "insn-opinit.h"
95#include "tm-constrs.h"
96#include "cfgrtl.h"
97#include "cfganal.h"
98#include "lcm.h"
99#include "predict.h"
100#include "profile-count.h"
101#include "riscv-vsetvl.h"
102
103using namespace rtl_ssa;
104using namespace riscv_vector;
105
ec99ffab
JZZ
106static CONSTEXPR const unsigned ALL_SEW[] = {8, 16, 32, 64};
107static CONSTEXPR const vlmul_type ALL_LMUL[]
108 = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
ec99ffab 109
9243c3d1
JZZ
110DEBUG_FUNCTION void
111debug (const vector_insn_info *info)
112{
113 info->dump (stderr);
114}
115
116DEBUG_FUNCTION void
117debug (const vector_infos_manager *info)
118{
119 info->dump (stderr);
120}
121
122static bool
123vlmax_avl_p (rtx x)
124{
125 return x && rtx_equal_p (x, RVV_VLMAX);
126}
127
128static bool
129vlmax_avl_insn_p (rtx_insn *rinsn)
130{
85112fbb
JZZ
131 return (INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi
132 || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi);
9243c3d1
JZZ
133}
134
8d8cc482
JZZ
135/* Return true if the block is a loop itself:
136 local_dem
137 __________
138 ____|____ |
139 | | |
140 |________| |
141 |_________|
142 reaching_out
143*/
9243c3d1
JZZ
144static bool
145loop_basic_block_p (const basic_block cfg_bb)
146{
8d8cc482
JZZ
147 if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb)))
148 {
149 edge e;
150 edge_iterator ei;
151 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
152 if (e->dest->index == cfg_bb->index)
153 return true;
154 }
155 return false;
9243c3d1
JZZ
156}
157
158/* Return true if it is an RVV instruction depends on VTYPE global
159 status register. */
160static bool
161has_vtype_op (rtx_insn *rinsn)
162{
163 return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
164}
165
166/* Return true if it is an RVV instruction depends on VL global
167 status register. */
168static bool
169has_vl_op (rtx_insn *rinsn)
170{
171 return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn);
172}
173
174/* Is this a SEW value that can be encoded into the VTYPE format. */
175static bool
176valid_sew_p (size_t sew)
177{
178 return exact_log2 (sew) && sew >= 8 && sew <= 64;
179}
180
ec99ffab
JZZ
181/* Return true if the instruction ignores VLMUL field of VTYPE. */
182static bool
183ignore_vlmul_insn_p (rtx_insn *rinsn)
184{
185 return get_attr_type (rinsn) == TYPE_VIMOVVX
186 || get_attr_type (rinsn) == TYPE_VFMOVVF
187 || get_attr_type (rinsn) == TYPE_VIMOVXV
188 || get_attr_type (rinsn) == TYPE_VFMOVFV;
189}
190
191/* Return true if the instruction is scalar move instruction. */
192static bool
193scalar_move_insn_p (rtx_insn *rinsn)
194{
195 return get_attr_type (rinsn) == TYPE_VIMOVXV
196 || get_attr_type (rinsn) == TYPE_VFMOVFV;
197}
198
60bd33bc
JZZ
199/* Return true if the instruction is fault first load instruction. */
200static bool
201fault_first_load_p (rtx_insn *rinsn)
202{
203 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VLDFF;
204}
205
206/* Return true if the instruction is read vl instruction. */
207static bool
208read_vl_insn_p (rtx_insn *rinsn)
209{
210 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL;
211}
212
9243c3d1
JZZ
213/* Return true if it is a vsetvl instruction. */
214static bool
215vector_config_insn_p (rtx_insn *rinsn)
216{
217 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
218}
219
220/* Return true if it is vsetvldi or vsetvlsi. */
221static bool
222vsetvl_insn_p (rtx_insn *rinsn)
223{
6b6b9c68
JZZ
224 if (!vector_config_insn_p (rinsn))
225 return false;
85112fbb 226 return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
6b6b9c68
JZZ
227 || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
228}
229
230/* Return true if it is vsetvl zero, rs1. */
231static bool
232vsetvl_discard_result_insn_p (rtx_insn *rinsn)
233{
234 if (!vector_config_insn_p (rinsn))
235 return false;
236 return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi
237 || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
9243c3d1
JZZ
238}
239
9243c3d1 240static bool
4f673c5e 241real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
9243c3d1 242{
4f673c5e 243 return insn != nullptr && insn->is_real () && insn->bb () == bb;
9243c3d1
JZZ
244}
245
9243c3d1 246static bool
4f673c5e 247before_p (const insn_info *insn1, const insn_info *insn2)
9243c3d1 248{
9b9a1ac1 249 return insn1->compare_with (insn2) < 0;
4f673c5e
JZZ
250}
251
6b6b9c68
JZZ
252static insn_info *
253find_reg_killed_by (const bb_info *bb, rtx x)
4f673c5e 254{
6b6b9c68
JZZ
255 if (!x || vlmax_avl_p (x) || !REG_P (x))
256 return nullptr;
257 for (insn_info *insn : bb->reverse_real_nondebug_insns ())
4f673c5e 258 if (find_access (insn->defs (), REGNO (x)))
6b6b9c68
JZZ
259 return insn;
260 return nullptr;
261}
262
263/* Helper function to get VL operand. */
264static rtx
265get_vl (rtx_insn *rinsn)
266{
267 if (has_vl_op (rinsn))
268 {
269 extract_insn_cached (rinsn);
270 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
271 }
272 return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
4f673c5e
JZZ
273}
274
275static bool
276has_vsetvl_killed_avl_p (const bb_info *bb, const vector_insn_info &info)
277{
278 if (info.dirty_with_killed_avl_p ())
279 {
280 rtx avl = info.get_avl ();
6b6b9c68 281 if (vlmax_avl_p (avl))
ec99ffab 282 return find_reg_killed_by (bb, info.get_avl_reg_rtx ()) != nullptr;
4f673c5e
JZZ
283 for (const insn_info *insn : bb->reverse_real_nondebug_insns ())
284 {
285 def_info *def = find_access (insn->defs (), REGNO (avl));
286 if (def)
287 {
288 set_info *set = safe_dyn_cast<set_info *> (def);
289 if (!set)
290 return false;
291
292 rtx new_avl = gen_rtx_REG (GET_MODE (avl), REGNO (avl));
293 gcc_assert (new_avl != avl);
294 if (!info.compatible_avl_p (avl_info (new_avl, set)))
295 return false;
296
297 return true;
298 }
299 }
300 }
301 return false;
302}
303
9243c3d1
JZZ
304/* An "anticipatable occurrence" is one that is the first occurrence in the
305 basic block, the operands are not modified in the basic block prior
306 to the occurrence and the output is not used between the start of
4f673c5e
JZZ
307 the block and the occurrence.
308
309 For VSETVL instruction, we have these following formats:
310 1. vsetvl zero, rs1.
311 2. vsetvl zero, imm.
312 3. vsetvl rd, rs1.
313
314 So base on these circumstances, a DEM is considered as a local anticipatable
315 occurrence should satisfy these following conditions:
316
317 1). rs1 (avl) are not modified in the basic block prior to the VSETVL.
318 2). rd (vl) are not modified in the basic block prior to the VSETVL.
319 3). rd (vl) is not used between the start of the block and the occurrence.
320
321 Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
322 is modified prior to the occurrence. This case is already considered as
323 a non-local anticipatable occurrence.
324*/
9243c3d1 325static bool
4f673c5e 326anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem)
9243c3d1 327{
4f673c5e 328 insn_info *insn = dem.get_insn ();
9243c3d1
JZZ
329 /* The only possible operand we care of VSETVL is AVL. */
330 if (dem.has_avl_reg ())
331 {
4f673c5e 332 /* rs1 (avl) are not modified in the basic block prior to the VSETVL. */
9243c3d1
JZZ
333 if (!vlmax_avl_p (dem.get_avl ()))
334 {
ec99ffab 335 set_info *set = dem.get_avl_source ();
9243c3d1
JZZ
336 /* If it's undefined, it's not anticipatable conservatively. */
337 if (!set)
338 return false;
4f673c5e
JZZ
339 if (real_insn_and_same_bb_p (set->insn (), bb)
340 && before_p (set->insn (), insn))
9243c3d1
JZZ
341 return false;
342 }
343 }
344
4f673c5e 345 /* rd (vl) is not used between the start of the block and the occurrence. */
9243c3d1
JZZ
346 if (vsetvl_insn_p (insn->rtl ()))
347 {
4f673c5e
JZZ
348 rtx dest = get_vl (insn->rtl ());
349 for (insn_info *i = insn->prev_nondebug_insn ();
350 real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
351 {
352 /* rd (vl) is not used between the start of the block and the
353 * occurrence. */
354 if (find_access (i->uses (), REGNO (dest)))
355 return false;
356 /* rd (vl) are not modified in the basic block prior to the VSETVL. */
357 if (find_access (i->defs (), REGNO (dest)))
358 return false;
359 }
9243c3d1
JZZ
360 }
361
362 return true;
363}
364
365/* An "available occurrence" is one that is the last occurrence in the
366 basic block and the operands are not modified by following statements in
4f673c5e
JZZ
367 the basic block [including this insn].
368
369 For VSETVL instruction, we have these following formats:
370 1. vsetvl zero, rs1.
371 2. vsetvl zero, imm.
372 3. vsetvl rd, rs1.
373
374 So base on these circumstances, a DEM is considered as a local available
375 occurrence should satisfy these following conditions:
376
377 1). rs1 (avl) are not modified by following statements in
378 the basic block.
379 2). rd (vl) are not modified by following statements in
380 the basic block.
381
382 Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
383 is modified prior to the occurrence. This case is already considered as
384 a non-local available occurrence.
385*/
9243c3d1 386static bool
4f673c5e 387available_occurrence_p (const bb_info *bb, const vector_insn_info dem)
9243c3d1 388{
4f673c5e 389 insn_info *insn = dem.get_insn ();
9243c3d1
JZZ
390 /* The only possible operand we care of VSETVL is AVL. */
391 if (dem.has_avl_reg ())
392 {
9243c3d1
JZZ
393 if (!vlmax_avl_p (dem.get_avl ()))
394 {
4f673c5e
JZZ
395 rtx dest = NULL_RTX;
396 if (vsetvl_insn_p (insn->rtl ()))
397 dest = get_vl (insn->rtl ());
398 for (const insn_info *i = insn; real_insn_and_same_bb_p (i, bb);
399 i = i->next_nondebug_insn ())
400 {
60bd33bc
JZZ
401 if (read_vl_insn_p (i->rtl ()))
402 continue;
4f673c5e
JZZ
403 /* rs1 (avl) are not modified by following statements in
404 the basic block. */
405 if (find_access (i->defs (), REGNO (dem.get_avl ())))
406 return false;
407 /* rd (vl) are not modified by following statements in
408 the basic block. */
409 if (dest && find_access (i->defs (), REGNO (dest)))
410 return false;
411 }
9243c3d1
JZZ
412 }
413 }
414 return true;
415}
416
6b6b9c68
JZZ
417static bool
418insn_should_be_added_p (const insn_info *insn, unsigned int types)
9243c3d1 419{
6b6b9c68
JZZ
420 if (insn->is_real () && (types & REAL_SET))
421 return true;
422 if (insn->is_phi () && (types & PHI_SET))
423 return true;
424 if (insn->is_bb_head () && (types & BB_HEAD_SET))
425 return true;
426 if (insn->is_bb_end () && (types & BB_END_SET))
427 return true;
428 return false;
9243c3d1
JZZ
429}
430
6b6b9c68
JZZ
431/* Recursively find all define instructions. The kind of instruction is
432 specified by the DEF_TYPE. */
433static hash_set<set_info *>
434get_all_sets (phi_info *phi, unsigned int types)
9243c3d1 435{
6b6b9c68 436 hash_set<set_info *> insns;
4f673c5e
JZZ
437 auto_vec<phi_info *> work_list;
438 hash_set<phi_info *> visited_list;
439 if (!phi)
6b6b9c68 440 return hash_set<set_info *> ();
4f673c5e 441 work_list.safe_push (phi);
9243c3d1 442
4f673c5e 443 while (!work_list.is_empty ())
9243c3d1 444 {
4f673c5e
JZZ
445 phi_info *phi = work_list.pop ();
446 visited_list.add (phi);
447 for (use_info *use : phi->inputs ())
9243c3d1 448 {
4f673c5e 449 def_info *def = use->def ();
6b6b9c68
JZZ
450 set_info *set = safe_dyn_cast<set_info *> (def);
451 if (!set)
452 return hash_set<set_info *> ();
9243c3d1 453
6b6b9c68 454 gcc_assert (!set->insn ()->is_debug_insn ());
9243c3d1 455
6b6b9c68
JZZ
456 if (insn_should_be_added_p (set->insn (), types))
457 insns.add (set);
458 if (set->insn ()->is_phi ())
4f673c5e 459 {
6b6b9c68 460 phi_info *new_phi = as_a<phi_info *> (set);
4f673c5e
JZZ
461 if (!visited_list.contains (new_phi))
462 work_list.safe_push (new_phi);
463 }
464 }
9243c3d1 465 }
4f673c5e
JZZ
466 return insns;
467}
9243c3d1 468
6b6b9c68
JZZ
469static hash_set<set_info *>
470get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
471 bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
472{
473 if (real_p && phi_p && param_p)
474 return get_all_sets (safe_dyn_cast<phi_info *> (set),
475 REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
476
477 else if (real_p && param_p)
478 return get_all_sets (safe_dyn_cast<phi_info *> (set),
479 REAL_SET | BB_HEAD_SET | BB_END_SET);
480
481 else if (real_p)
482 return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
483 return hash_set<set_info *> ();
484}
485
486/* Helper function to get AVL operand. */
487static rtx
488get_avl (rtx_insn *rinsn)
489{
490 if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
491 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
492
493 if (!has_vl_op (rinsn))
494 return NULL_RTX;
495 if (get_attr_avl_type (rinsn) == VLMAX)
496 return RVV_VLMAX;
497 extract_insn_cached (rinsn);
498 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
499}
500
501static set_info *
502get_same_bb_set (hash_set<set_info *> &sets, const basic_block cfg_bb)
503{
504 for (set_info *set : sets)
505 if (set->bb ()->cfg_bb () == cfg_bb)
506 return set;
507 return nullptr;
508}
509
4f673c5e
JZZ
510/* Recursively find all predecessor blocks for cfg_bb. */
511static hash_set<basic_block>
512get_all_predecessors (basic_block cfg_bb)
513{
514 hash_set<basic_block> blocks;
515 auto_vec<basic_block> work_list;
516 hash_set<basic_block> visited_list;
517 work_list.safe_push (cfg_bb);
9243c3d1 518
4f673c5e 519 while (!work_list.is_empty ())
9243c3d1 520 {
4f673c5e
JZZ
521 basic_block new_cfg_bb = work_list.pop ();
522 visited_list.add (new_cfg_bb);
523 edge e;
524 edge_iterator ei;
525 FOR_EACH_EDGE (e, ei, new_cfg_bb->preds)
526 {
527 if (!visited_list.contains (e->src))
528 work_list.safe_push (e->src);
529 blocks.add (e->src);
530 }
9243c3d1 531 }
4f673c5e
JZZ
532 return blocks;
533}
9243c3d1 534
4f673c5e
JZZ
535/* Return true if there is an INSN in insns staying in the block BB. */
536static bool
6b6b9c68 537any_set_in_bb_p (hash_set<set_info *> sets, const bb_info *bb)
4f673c5e 538{
6b6b9c68
JZZ
539 for (const set_info *set : sets)
540 if (set->bb ()->index () == bb->index ())
4f673c5e
JZZ
541 return true;
542 return false;
9243c3d1
JZZ
543}
544
545/* Helper function to get SEW operand. We always have SEW value for
546 all RVV instructions that have VTYPE OP. */
547static uint8_t
548get_sew (rtx_insn *rinsn)
549{
550 return get_attr_sew (rinsn);
551}
552
553/* Helper function to get VLMUL operand. We always have VLMUL value for
554 all RVV instructions that have VTYPE OP. */
555static enum vlmul_type
556get_vlmul (rtx_insn *rinsn)
557{
558 return (enum vlmul_type) get_attr_vlmul (rinsn);
559}
560
561/* Get default tail policy. */
562static bool
563get_default_ta ()
564{
565 /* For the instruction that doesn't require TA, we still need a default value
566 to emit vsetvl. We pick up the default value according to prefer policy. */
567 return (bool) (get_prefer_tail_policy () & 0x1
568 || (get_prefer_tail_policy () >> 1 & 0x1));
569}
570
571/* Get default mask policy. */
572static bool
573get_default_ma ()
574{
575 /* For the instruction that doesn't require MA, we still need a default value
576 to emit vsetvl. We pick up the default value according to prefer policy. */
577 return (bool) (get_prefer_mask_policy () & 0x1
578 || (get_prefer_mask_policy () >> 1 & 0x1));
579}
580
581/* Helper function to get TA operand. */
582static bool
583tail_agnostic_p (rtx_insn *rinsn)
584{
585 /* If it doesn't have TA, we return agnostic by default. */
586 extract_insn_cached (rinsn);
587 int ta = get_attr_ta (rinsn);
588 return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta);
589}
590
591/* Helper function to get MA operand. */
592static bool
593mask_agnostic_p (rtx_insn *rinsn)
594{
595 /* If it doesn't have MA, we return agnostic by default. */
596 extract_insn_cached (rinsn);
597 int ma = get_attr_ma (rinsn);
598 return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
599}
600
601/* Return true if FN has a vector instruction that use VL/VTYPE. */
602static bool
603has_vector_insn (function *fn)
604{
605 basic_block cfg_bb;
606 rtx_insn *rinsn;
607 FOR_ALL_BB_FN (cfg_bb, fn)
608 FOR_BB_INSNS (cfg_bb, rinsn)
609 if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
610 return true;
611 return false;
612}
613
614/* Emit vsetvl instruction. */
615static rtx
e577b91b 616gen_vsetvl_pat (enum vsetvl_type insn_type, const vl_vtype_info &info, rtx vl)
9243c3d1
JZZ
617{
618 rtx avl = info.get_avl ();
619 rtx sew = gen_int_mode (info.get_sew (), Pmode);
620 rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode);
621 rtx ta = gen_int_mode (info.get_ta (), Pmode);
622 rtx ma = gen_int_mode (info.get_ma (), Pmode);
623
624 if (insn_type == VSETVL_NORMAL)
625 {
626 gcc_assert (vl != NULL_RTX);
627 return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma);
628 }
629 else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY)
630 return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
631 else
632 return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
633}
634
635static rtx
e577b91b 636gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info)
9243c3d1
JZZ
637{
638 rtx new_pat;
60bd33bc
JZZ
639 vl_vtype_info new_info = info;
640 if (info.get_insn () && info.get_insn ()->rtl ()
641 && fault_first_load_p (info.get_insn ()->rtl ()))
642 new_info.set_avl_info (
643 avl_info (get_avl (info.get_insn ()->rtl ()), nullptr));
9243c3d1
JZZ
644 if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ()))
645 {
646 rtx dest = get_vl (rinsn);
60bd33bc 647 new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, dest);
9243c3d1
JZZ
648 }
649 else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only)
60bd33bc 650 new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, new_info, NULL_RTX);
9243c3d1 651 else
60bd33bc 652 new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
9243c3d1
JZZ
653 return new_pat;
654}
655
656static void
657emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type,
e577b91b 658 const vl_vtype_info &info, rtx vl, rtx_insn *rinsn)
9243c3d1
JZZ
659{
660 rtx pat = gen_vsetvl_pat (insn_type, info, vl);
661 if (dump_file)
662 {
663 fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n");
664 print_rtl_single (dump_file, pat);
665 }
666
667 if (emit_type == EMIT_DIRECT)
668 emit_insn (pat);
669 else if (emit_type == EMIT_BEFORE)
670 emit_insn_before (pat, rinsn);
671 else
672 emit_insn_after (pat, rinsn);
673}
674
675static void
676eliminate_insn (rtx_insn *rinsn)
677{
678 if (dump_file)
679 {
680 fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn));
681 print_rtl_single (dump_file, rinsn);
682 }
683 if (in_sequence_p ())
684 remove_insn (rinsn);
685 else
686 delete_insn (rinsn);
687}
688
a481eed8 689static vsetvl_type
9243c3d1
JZZ
690insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn,
691 const vector_insn_info &info, const vector_insn_info &prev_info)
692{
693 /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
694 VLMAX. */
695 if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
4f673c5e 696 && info.compatible_avl_p (prev_info) && info.same_vlmax_p (prev_info))
9243c3d1
JZZ
697 {
698 emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
699 rinsn);
a481eed8 700 return VSETVL_VTYPE_CHANGE_ONLY;
9243c3d1
JZZ
701 }
702
703 if (info.has_avl_imm ())
704 {
705 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX,
706 rinsn);
a481eed8 707 return VSETVL_DISCARD_RESULT;
9243c3d1
JZZ
708 }
709
710 if (info.has_avl_no_reg ())
711 {
712 /* We can only use x0, x0 if there's no chance of the vtype change causing
713 the previous vl to become invalid. */
714 if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
715 && info.same_vlmax_p (prev_info))
716 {
717 emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
718 rinsn);
a481eed8 719 return VSETVL_VTYPE_CHANGE_ONLY;
9243c3d1
JZZ
720 }
721 /* Otherwise use an AVL of 0 to avoid depending on previous vl. */
722 vl_vtype_info new_info = info;
723 new_info.set_avl_info (avl_info (const0_rtx, nullptr));
724 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX,
725 rinsn);
a481eed8 726 return VSETVL_DISCARD_RESULT;
9243c3d1
JZZ
727 }
728
729 /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the
730 opcode if the AVLReg is X0 as they have different register classes for
731 the AVL operand. */
732 if (vlmax_avl_p (info.get_avl ()))
733 {
734 gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn));
ec99ffab 735 rtx vl_op = info.get_avl_reg_rtx ();
9243c3d1
JZZ
736 gcc_assert (!vlmax_avl_p (vl_op));
737 emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn);
a481eed8 738 return VSETVL_NORMAL;
9243c3d1
JZZ
739 }
740
741 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn);
742
743 if (dump_file)
744 {
745 fprintf (dump_file, "Update VL/VTYPE info, previous info=");
746 prev_info.dump (dump_file);
747 }
a481eed8 748 return VSETVL_DISCARD_RESULT;
9243c3d1
JZZ
749}
750
751/* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
752 to INSN. If such notes are added to an insn which references a
753 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
754 that note, because the following loop optimization pass requires
755 them. */
756
757/* ??? If there was a jump optimization pass after gcse and before loop,
758 then we would not need to do this here, because jump would add the
759 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
760
761static void
27a2a4b6 762add_label_notes (rtx x, rtx_insn *rinsn)
9243c3d1
JZZ
763{
764 enum rtx_code code = GET_CODE (x);
765 int i, j;
766 const char *fmt;
767
768 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
769 {
770 /* This code used to ignore labels that referred to dispatch tables to
771 avoid flow generating (slightly) worse code.
772
773 We no longer ignore such label references (see LABEL_REF handling in
774 mark_jump_label for additional information). */
775
776 /* There's no reason for current users to emit jump-insns with
777 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
778 notes. */
27a2a4b6
JZZ
779 gcc_assert (!JUMP_P (rinsn));
780 add_reg_note (rinsn, REG_LABEL_OPERAND, label_ref_label (x));
9243c3d1
JZZ
781
782 if (LABEL_P (label_ref_label (x)))
783 LABEL_NUSES (label_ref_label (x))++;
784
785 return;
786 }
787
788 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
789 {
790 if (fmt[i] == 'e')
27a2a4b6 791 add_label_notes (XEXP (x, i), rinsn);
9243c3d1
JZZ
792 else if (fmt[i] == 'E')
793 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
27a2a4b6 794 add_label_notes (XVECEXP (x, i, j), rinsn);
9243c3d1
JZZ
795 }
796}
797
798/* Add EXPR to the end of basic block BB.
799
800 This is used by both the PRE and code hoisting. */
801
802static void
803insert_insn_end_basic_block (rtx_insn *rinsn, basic_block cfg_bb)
804{
805 rtx_insn *end_rinsn = BB_END (cfg_bb);
806 rtx_insn *new_insn;
807 rtx_insn *pat, *pat_end;
808
809 pat = rinsn;
810 gcc_assert (pat && INSN_P (pat));
811
812 pat_end = pat;
813 while (NEXT_INSN (pat_end) != NULL_RTX)
814 pat_end = NEXT_INSN (pat_end);
815
816 /* If the last end_rinsn is a jump, insert EXPR in front. Similarly we need
817 to take care of trapping instructions in presence of non-call exceptions.
818 */
819
820 if (JUMP_P (end_rinsn)
821 || (NONJUMP_INSN_P (end_rinsn)
822 && (!single_succ_p (cfg_bb)
823 || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL)))
824 {
825 /* FIXME: What if something in jump uses value set in new end_rinsn? */
826 new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
827 }
828
829 /* Likewise if the last end_rinsn is a call, as will happen in the presence
830 of exception handling. */
831 else if (CALL_P (end_rinsn)
832 && (!single_succ_p (cfg_bb)
833 || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL))
834 {
835 /* Keeping in mind targets with small register classes and parameters
836 in registers, we search backward and place the instructions before
837 the first parameter is loaded. Do this for everyone for consistency
838 and a presumption that we'll get better code elsewhere as well. */
839
840 /* Since different machines initialize their parameter registers
841 in different orders, assume nothing. Collect the set of all
842 parameter registers. */
843 end_rinsn = find_first_parameter_load (end_rinsn, BB_HEAD (cfg_bb));
844
845 /* If we found all the parameter loads, then we want to insert
846 before the first parameter load.
847
848 If we did not find all the parameter loads, then we might have
849 stopped on the head of the block, which could be a CODE_LABEL.
850 If we inserted before the CODE_LABEL, then we would be putting
851 the end_rinsn in the wrong basic block. In that case, put the
852 end_rinsn after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK.
853 */
854 while (LABEL_P (end_rinsn) || NOTE_INSN_BASIC_BLOCK_P (end_rinsn))
855 end_rinsn = NEXT_INSN (end_rinsn);
856
857 new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
858 }
859 else
860 new_insn = emit_insn_after_noloc (pat, end_rinsn, cfg_bb);
861
862 while (1)
863 {
864 if (INSN_P (pat))
865 add_label_notes (PATTERN (pat), new_insn);
866 if (pat == pat_end)
867 break;
868 pat = NEXT_INSN (pat);
869 }
870}
871
872/* Get VL/VTYPE information for INSN. */
873static vl_vtype_info
874get_vl_vtype_info (const insn_info *insn)
875{
9243c3d1
JZZ
876 set_info *set = nullptr;
877 rtx avl = ::get_avl (insn->rtl ());
ec99ffab
JZZ
878 if (avl && REG_P (avl))
879 {
880 if (vlmax_avl_p (avl) && has_vl_op (insn->rtl ()))
881 set
882 = find_access (insn->uses (), REGNO (get_vl (insn->rtl ())))->def ();
883 else if (!vlmax_avl_p (avl))
884 set = find_access (insn->uses (), REGNO (avl))->def ();
885 else
886 set = nullptr;
887 }
9243c3d1
JZZ
888
889 uint8_t sew = get_sew (insn->rtl ());
890 enum vlmul_type vlmul = get_vlmul (insn->rtl ());
891 uint8_t ratio = get_attr_ratio (insn->rtl ());
892 /* when get_attr_ratio is invalid, this kind of instructions
893 doesn't care about ratio. However, we still need this value
894 in demand info backward analysis. */
895 if (ratio == INVALID_ATTRIBUTE)
896 ratio = calculate_ratio (sew, vlmul);
897 bool ta = tail_agnostic_p (insn->rtl ());
898 bool ma = mask_agnostic_p (insn->rtl ());
899
900 /* If merge operand is undef value, we prefer agnostic. */
901 int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
902 if (merge_op_idx != INVALID_ATTRIBUTE
903 && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
904 {
905 ta = true;
906 ma = true;
907 }
908
909 vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma);
910 return info;
911}
912
913static void
914change_insn (rtx_insn *rinsn, rtx new_pat)
915{
916 /* We don't apply change on RTL_SSA here since it's possible a
917 new INSN we add in the PASS before which doesn't have RTL_SSA
918 info yet.*/
919 if (dump_file)
920 {
921 fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
922 INSN_UID (rinsn));
923 print_rtl_single (dump_file, PATTERN (rinsn));
924 }
925
011ba384 926 validate_change (rinsn, &PATTERN (rinsn), new_pat, false);
9243c3d1
JZZ
927
928 if (dump_file)
929 {
930 fprintf (dump_file, "\nto:\n");
931 print_rtl_single (dump_file, PATTERN (rinsn));
932 }
933}
934
60bd33bc
JZZ
935static const insn_info *
936get_forward_read_vl_insn (const insn_info *insn)
937{
938 const bb_info *bb = insn->bb ();
939 for (const insn_info *i = insn->next_nondebug_insn ();
940 real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
941 {
942 if (find_access (i->defs (), VL_REGNUM))
943 return nullptr;
944 if (read_vl_insn_p (i->rtl ()))
945 return i;
946 }
947 return nullptr;
948}
949
950static const insn_info *
951get_backward_fault_first_load_insn (const insn_info *insn)
952{
953 const bb_info *bb = insn->bb ();
954 for (const insn_info *i = insn->prev_nondebug_insn ();
955 real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
956 {
957 if (fault_first_load_p (i->rtl ()))
958 return i;
959 if (find_access (i->defs (), VL_REGNUM))
960 return nullptr;
961 }
962 return nullptr;
963}
964
9243c3d1
JZZ
965static bool
966change_insn (function_info *ssa, insn_change change, insn_info *insn,
967 rtx new_pat)
968{
969 rtx_insn *rinsn = insn->rtl ();
970 auto attempt = ssa->new_change_attempt ();
971 if (!restrict_movement (change))
972 return false;
973
974 if (dump_file)
975 {
976 fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
977 INSN_UID (rinsn));
978 print_rtl_single (dump_file, PATTERN (rinsn));
9243c3d1
JZZ
979 }
980
981 insn_change_watermark watermark;
982 validate_change (rinsn, &PATTERN (rinsn), new_pat, true);
983
984 /* These routines report failures themselves. */
985 if (!recog (attempt, change) || !change_is_worthwhile (change, false))
986 return false;
a1e42094
JZZ
987
988 /* Fix bug:
989 (insn 12 34 13 2 (set (reg:VNx8DI 120 v24 [orig:134 _1 ] [134])
990 (if_then_else:VNx8DI (unspec:VNx8BI [
991 (const_vector:VNx8BI repeat [
992 (const_int 1 [0x1])
993 ])
994 (const_int 0 [0])
995 (const_int 2 [0x2]) repeated x2
996 (const_int 0 [0])
997 (reg:SI 66 vl)
998 (reg:SI 67 vtype)
999 ] UNSPEC_VPREDICATE)
1000 (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137])
1001 (sign_extend:VNx8DI (vec_duplicate:VNx8SI (reg:SI 15 a5
1002 [140])))) (unspec:VNx8DI [ (const_int 0 [0]) ] UNSPEC_VUNDEF))) "rvv.c":8:12
1003 2784 {pred_single_widen_addsvnx8di_scalar} (expr_list:REG_EQUIV
1004 (mem/c:VNx8DI (reg:DI 10 a0 [142]) [1 <retval>+0 S[64, 64] A128])
1005 (expr_list:REG_EQUAL (if_then_else:VNx8DI (unspec:VNx8BI [
1006 (const_vector:VNx8BI repeat [
1007 (const_int 1 [0x1])
1008 ])
1009 (reg/v:DI 13 a3 [orig:139 vl ] [139])
1010 (const_int 2 [0x2]) repeated x2
1011 (const_int 0 [0])
1012 (reg:SI 66 vl)
1013 (reg:SI 67 vtype)
1014 ] UNSPEC_VPREDICATE)
1015 (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137])
1016 (const_vector:VNx8DI repeat [
1017 (const_int 2730 [0xaaa])
1018 ]))
1019 (unspec:VNx8DI [
1020 (const_int 0 [0])
1021 ] UNSPEC_VUNDEF))
1022 (nil))))
1023 Here we want to remove use "a3". However, the REG_EQUAL/REG_EQUIV note use
1024 "a3" which made us fail in change_insn. We reference to the
1025 'aarch64-cc-fusion.cc' and add this method. */
1026 remove_reg_equal_equiv_notes (rinsn);
9243c3d1
JZZ
1027 confirm_change_group ();
1028 ssa->change_insn (change);
1029
1030 if (dump_file)
1031 {
1032 fprintf (dump_file, "\nto:\n");
1033 print_rtl_single (dump_file, PATTERN (rinsn));
9243c3d1
JZZ
1034 }
1035 return true;
1036}
1037
aef20243
JZZ
1038static void
1039change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info)
1040{
1041 rtx_insn *rinsn;
1042 if (vector_config_insn_p (insn->rtl ()))
1043 {
1044 rinsn = insn->rtl ();
1045 gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet");
1046 }
1047 else
1048 {
1049 gcc_assert (has_vtype_op (insn->rtl ()));
1050 rinsn = PREV_INSN (insn->rtl ());
1051 gcc_assert (vector_config_insn_p (rinsn));
1052 }
1053 rtx new_pat = gen_vsetvl_pat (rinsn, info);
1054 change_insn (rinsn, new_pat);
1055}
1056
d51f2456
JZ
1057static void
1058local_eliminate_vsetvl_insn (const vector_insn_info &dem)
1059{
1060 const insn_info *insn = dem.get_insn ();
1061 if (!insn || insn->is_artificial ())
1062 return;
1063 rtx_insn *rinsn = insn->rtl ();
1064 const bb_info *bb = insn->bb ();
1065 if (vsetvl_insn_p (rinsn))
1066 {
1067 rtx vl = get_vl (rinsn);
1068 for (insn_info *i = insn->next_nondebug_insn ();
1069 real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
1070 {
1071 if (i->is_call () || i->is_asm ()
1072 || find_access (i->defs (), VL_REGNUM)
1073 || find_access (i->defs (), VTYPE_REGNUM))
1074 return;
1075
1076 if (has_vtype_op (i->rtl ()))
1077 {
1078 if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ())))
1079 return;
1080 rtx avl = get_avl (i->rtl ());
1081 if (avl != vl)
1082 return;
1083 set_info *def = find_access (i->uses (), REGNO (avl))->def ();
1084 if (def->insn () != insn)
1085 return;
1086
1087 vector_insn_info new_info;
1088 new_info.parse_insn (i);
1089 if (!new_info.skip_avl_compatible_p (dem))
1090 return;
1091
1092 new_info.set_avl_info (dem.get_avl_info ());
1093 new_info = dem.merge (new_info, LOCAL_MERGE);
1094 change_vsetvl_insn (insn, new_info);
1095 eliminate_insn (PREV_INSN (i->rtl ()));
1096 return;
1097 }
1098 }
1099 }
1100}
1101
4f673c5e 1102static bool
6b6b9c68 1103source_equal_p (insn_info *insn1, insn_info *insn2)
4f673c5e 1104{
6b6b9c68
JZZ
1105 if (!insn1 || !insn2)
1106 return false;
1107 rtx_insn *rinsn1 = insn1->rtl ();
1108 rtx_insn *rinsn2 = insn2->rtl ();
4f673c5e
JZZ
1109 if (!rinsn1 || !rinsn2)
1110 return false;
1111 rtx note1 = find_reg_equal_equiv_note (rinsn1);
1112 rtx note2 = find_reg_equal_equiv_note (rinsn2);
1113 rtx single_set1 = single_set (rinsn1);
1114 rtx single_set2 = single_set (rinsn2);
60bd33bc
JZZ
1115 if (read_vl_insn_p (rinsn1) && read_vl_insn_p (rinsn2))
1116 {
1117 const insn_info *load1 = get_backward_fault_first_load_insn (insn1);
1118 const insn_info *load2 = get_backward_fault_first_load_insn (insn2);
1119 return load1 && load2 && load1 == load2;
1120 }
4f673c5e
JZZ
1121
1122 if (note1 && note2 && rtx_equal_p (note1, note2))
1123 return true;
6b6b9c68
JZZ
1124
1125 /* Since vsetvl instruction is not single SET.
1126 We handle this case specially here. */
1127 if (vsetvl_insn_p (insn1->rtl ()) && vsetvl_insn_p (insn2->rtl ()))
1128 {
1129 /* For example:
1130 vsetvl1 a6,a5,e32m1
1131 RVV 1 (use a6 as AVL)
1132 vsetvl2 a5,a5,e8mf4
1133 RVV 2 (use a5 as AVL)
1134 We consider AVL of RVV 1 and RVV 2 are same so that we can
1135 gain more optimization opportunities.
1136
1137 Note: insn1_info.compatible_avl_p (insn2_info)
1138 will make sure there is no instruction between vsetvl1 and vsetvl2
1139 modify a5 since their def will be different if there is instruction
1140 modify a5 and compatible_avl_p will return false. */
1141 vector_insn_info insn1_info, insn2_info;
1142 insn1_info.parse_insn (insn1);
1143 insn2_info.parse_insn (insn2);
1144 if (insn1_info.same_vlmax_p (insn2_info)
1145 && insn1_info.compatible_avl_p (insn2_info))
1146 return true;
1147 }
1148
1149 /* We only handle AVL is set by instructions with no side effects. */
1150 if (!single_set1 || !single_set2)
1151 return false;
1152 if (!rtx_equal_p (SET_SRC (single_set1), SET_SRC (single_set2)))
1153 return false;
1154 gcc_assert (insn1->uses ().size () == insn2->uses ().size ());
1155 for (size_t i = 0; i < insn1->uses ().size (); i++)
1156 if (insn1->uses ()[i] != insn2->uses ()[i])
1157 return false;
1158 return true;
4f673c5e
JZZ
1159}
1160
1161/* Helper function to get single same real RTL source.
1162 return NULL if it is not a single real RTL source. */
6b6b9c68 1163static insn_info *
4f673c5e
JZZ
1164extract_single_source (set_info *set)
1165{
1166 if (!set)
1167 return nullptr;
1168 if (set->insn ()->is_real ())
6b6b9c68 1169 return set->insn ();
4f673c5e
JZZ
1170 if (!set->insn ()->is_phi ())
1171 return nullptr;
6b6b9c68 1172 hash_set<set_info *> sets = get_all_sets (set, true, false, true);
4f673c5e 1173
6b6b9c68 1174 insn_info *first_insn = (*sets.begin ())->insn ();
4f673c5e
JZZ
1175 if (first_insn->is_artificial ())
1176 return nullptr;
6b6b9c68 1177 for (const set_info *set : sets)
4f673c5e
JZZ
1178 {
1179 /* If there is a head or end insn, we conservative return
1180 NULL so that VSETVL PASS will insert vsetvl directly. */
6b6b9c68 1181 if (set->insn ()->is_artificial ())
4f673c5e 1182 return nullptr;
6b6b9c68 1183 if (!source_equal_p (set->insn (), first_insn))
4f673c5e
JZZ
1184 return nullptr;
1185 }
1186
6b6b9c68 1187 return first_insn;
4f673c5e
JZZ
1188}
1189
ec99ffab
JZZ
1190static unsigned
1191calculate_sew (vlmul_type vlmul, unsigned int ratio)
1192{
1193 for (const unsigned sew : ALL_SEW)
1194 if (calculate_ratio (sew, vlmul) == ratio)
1195 return sew;
1196 return 0;
1197}
1198
1199static vlmul_type
1200calculate_vlmul (unsigned int sew, unsigned int ratio)
1201{
1202 for (const vlmul_type vlmul : ALL_LMUL)
1203 if (calculate_ratio (sew, vlmul) == ratio)
1204 return vlmul;
1205 return LMUL_RESERVED;
1206}
1207
1208static bool
1209incompatible_avl_p (const vector_insn_info &info1,
1210 const vector_insn_info &info2)
1211{
1212 return !info1.compatible_avl_p (info2) && !info2.compatible_avl_p (info1);
1213}
1214
1215static bool
1216different_sew_p (const vector_insn_info &info1, const vector_insn_info &info2)
1217{
1218 return info1.get_sew () != info2.get_sew ();
1219}
1220
1221static bool
1222different_lmul_p (const vector_insn_info &info1, const vector_insn_info &info2)
1223{
1224 return info1.get_vlmul () != info2.get_vlmul ();
1225}
1226
1227static bool
1228different_ratio_p (const vector_insn_info &info1, const vector_insn_info &info2)
1229{
1230 return info1.get_ratio () != info2.get_ratio ();
1231}
1232
1233static bool
1234different_tail_policy_p (const vector_insn_info &info1,
1235 const vector_insn_info &info2)
1236{
1237 return info1.get_ta () != info2.get_ta ();
1238}
1239
1240static bool
1241different_mask_policy_p (const vector_insn_info &info1,
1242 const vector_insn_info &info2)
1243{
1244 return info1.get_ma () != info2.get_ma ();
1245}
1246
1247static bool
1248possible_zero_avl_p (const vector_insn_info &info1,
1249 const vector_insn_info &info2)
1250{
1251 return !info1.has_non_zero_avl () || !info2.has_non_zero_avl ();
1252}
1253
ec99ffab
JZZ
1254static bool
1255second_ratio_invalid_for_first_sew_p (const vector_insn_info &info1,
1256 const vector_insn_info &info2)
1257{
1258 return calculate_vlmul (info1.get_sew (), info2.get_ratio ())
1259 == LMUL_RESERVED;
1260}
1261
1262static bool
1263second_ratio_invalid_for_first_lmul_p (const vector_insn_info &info1,
1264 const vector_insn_info &info2)
1265{
1266 return calculate_sew (info1.get_vlmul (), info2.get_ratio ()) == 0;
1267}
1268
1269static bool
1270second_sew_less_than_first_sew_p (const vector_insn_info &info1,
1271 const vector_insn_info &info2)
1272{
1273 return info2.get_sew () < info1.get_sew ();
1274}
1275
1276static bool
1277first_sew_less_than_second_sew_p (const vector_insn_info &info1,
1278 const vector_insn_info &info2)
1279{
1280 return info1.get_sew () < info2.get_sew ();
1281}
1282
1283/* return 0 if LMUL1 == LMUL2.
1284 return -1 if LMUL1 < LMUL2.
1285 return 1 if LMUL1 > LMUL2. */
1286static int
1287compare_lmul (vlmul_type vlmul1, vlmul_type vlmul2)
1288{
1289 if (vlmul1 == vlmul2)
1290 return 0;
1291
1292 switch (vlmul1)
1293 {
1294 case LMUL_1:
1295 if (vlmul2 == LMUL_2 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1296 return 1;
1297 else
1298 return -1;
1299 case LMUL_2:
1300 if (vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1301 return 1;
1302 else
1303 return -1;
1304 case LMUL_4:
1305 if (vlmul2 == LMUL_8)
1306 return 1;
1307 else
1308 return -1;
1309 case LMUL_8:
1310 return -1;
1311 case LMUL_F2:
1312 if (vlmul2 == LMUL_1 || vlmul2 == LMUL_2 || vlmul2 == LMUL_4
1313 || vlmul2 == LMUL_8)
1314 return 1;
1315 else
1316 return -1;
1317 case LMUL_F4:
1318 if (vlmul2 == LMUL_F2 || vlmul2 == LMUL_1 || vlmul2 == LMUL_2
1319 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1320 return 1;
1321 else
1322 return -1;
1323 case LMUL_F8:
1324 return 0;
1325 default:
1326 gcc_unreachable ();
1327 }
1328}
1329
1330static bool
1331second_lmul_less_than_first_lmul_p (const vector_insn_info &info1,
1332 const vector_insn_info &info2)
1333{
1334 return compare_lmul (info2.get_vlmul (), info1.get_vlmul ()) == -1;
1335}
1336
ec99ffab
JZZ
1337static bool
1338second_ratio_less_than_first_ratio_p (const vector_insn_info &info1,
1339 const vector_insn_info &info2)
1340{
1341 return info2.get_ratio () < info1.get_ratio ();
1342}
1343
1344static CONSTEXPR const demands_cond incompatible_conds[] = {
1345#define DEF_INCOMPATIBLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, \
1346 GE_SEW1, TAIL_POLICTY1, MASK_POLICY1, AVL2, \
1347 SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, \
1348 TAIL_POLICTY2, MASK_POLICY2, COND) \
1349 {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
1350 MASK_POLICY1}, \
1351 {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1352 MASK_POLICY2}}, \
1353 COND},
1354#include "riscv-vsetvl.def"
1355};
1356
1357static unsigned
1358greatest_sew (const vector_insn_info &info1, const vector_insn_info &info2)
1359{
1360 return std::max (info1.get_sew (), info2.get_sew ());
1361}
1362
1363static unsigned
1364first_sew (const vector_insn_info &info1, const vector_insn_info &)
1365{
1366 return info1.get_sew ();
1367}
1368
1369static unsigned
1370second_sew (const vector_insn_info &, const vector_insn_info &info2)
1371{
1372 return info2.get_sew ();
1373}
1374
1375static vlmul_type
1376first_vlmul (const vector_insn_info &info1, const vector_insn_info &)
1377{
1378 return info1.get_vlmul ();
1379}
1380
1381static vlmul_type
1382second_vlmul (const vector_insn_info &, const vector_insn_info &info2)
1383{
1384 return info2.get_vlmul ();
1385}
1386
1387static unsigned
1388first_ratio (const vector_insn_info &info1, const vector_insn_info &)
1389{
1390 return info1.get_ratio ();
1391}
1392
1393static unsigned
1394second_ratio (const vector_insn_info &, const vector_insn_info &info2)
1395{
1396 return info2.get_ratio ();
1397}
1398
1399static vlmul_type
1400vlmul_for_first_sew_second_ratio (const vector_insn_info &info1,
1401 const vector_insn_info &info2)
1402{
1403 return calculate_vlmul (info1.get_sew (), info2.get_ratio ());
1404}
1405
1406static unsigned
1407ratio_for_second_sew_first_vlmul (const vector_insn_info &info1,
1408 const vector_insn_info &info2)
1409{
1410 return calculate_ratio (info2.get_sew (), info1.get_vlmul ());
1411}
1412
1413static CONSTEXPR const demands_fuse_rule fuse_rules[] = {
1414#define DEF_SEW_LMUL_FUSE_RULE(DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, \
1415 DEMAND_GE_SEW1, DEMAND_SEW2, DEMAND_LMUL2, \
1416 DEMAND_RATIO2, DEMAND_GE_SEW2, NEW_DEMAND_SEW, \
1417 NEW_DEMAND_LMUL, NEW_DEMAND_RATIO, \
1418 NEW_DEMAND_GE_SEW, NEW_SEW, NEW_VLMUL, \
1419 NEW_RATIO) \
1420 {{{DEMAND_ANY, DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, DEMAND_ANY, \
1421 DEMAND_GE_SEW1, DEMAND_ANY, DEMAND_ANY}, \
1422 {DEMAND_ANY, DEMAND_SEW2, DEMAND_LMUL2, DEMAND_RATIO2, DEMAND_ANY, \
1423 DEMAND_GE_SEW2, DEMAND_ANY, DEMAND_ANY}}, \
1424 NEW_DEMAND_SEW, \
1425 NEW_DEMAND_LMUL, \
1426 NEW_DEMAND_RATIO, \
1427 NEW_DEMAND_GE_SEW, \
1428 NEW_SEW, \
1429 NEW_VLMUL, \
1430 NEW_RATIO},
1431#include "riscv-vsetvl.def"
1432};
1433
1434static bool
1435always_unavailable (const vector_insn_info &, const vector_insn_info &)
1436{
1437 return true;
1438}
1439
1440static bool
1441avl_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2)
1442{
1443 return !info2.compatible_avl_p (info1.get_avl_info ());
1444}
1445
1446static bool
1447sew_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2)
1448{
1449 if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO))
1450 {
1451 if (info2.demand_p (DEMAND_GE_SEW))
1452 return info1.get_sew () < info2.get_sew ();
1453 return info1.get_sew () != info2.get_sew ();
1454 }
1455 return true;
1456}
1457
1458static bool
1459lmul_unavailable_p (const vector_insn_info &info1,
1460 const vector_insn_info &info2)
1461{
1462 if (info1.get_vlmul () == info2.get_vlmul () && !info2.demand_p (DEMAND_SEW)
1463 && !info2.demand_p (DEMAND_RATIO))
1464 return false;
1465 return true;
1466}
1467
1468static bool
1469ge_sew_unavailable_p (const vector_insn_info &info1,
1470 const vector_insn_info &info2)
1471{
1472 if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO)
1473 && info2.demand_p (DEMAND_GE_SEW))
1474 return info1.get_sew () < info2.get_sew ();
1475 return true;
1476}
1477
1478static bool
1479ge_sew_lmul_unavailable_p (const vector_insn_info &info1,
1480 const vector_insn_info &info2)
1481{
1482 if (!info2.demand_p (DEMAND_RATIO) && info2.demand_p (DEMAND_GE_SEW))
1483 return info1.get_sew () < info2.get_sew ();
1484 return true;
1485}
1486
1487static bool
1488ge_sew_ratio_unavailable_p (const vector_insn_info &info1,
1489 const vector_insn_info &info2)
1490{
1491 if (!info2.demand_p (DEMAND_LMUL) && info2.demand_p (DEMAND_GE_SEW))
1492 return info1.get_sew () < info2.get_sew ();
1493 return true;
1494}
1495
1496static CONSTEXPR const demands_cond unavailable_conds[] = {
1497#define DEF_UNAVAILABLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, \
1498 TAIL_POLICTY1, MASK_POLICY1, AVL2, SEW2, LMUL2, \
1499 RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1500 MASK_POLICY2, COND) \
1501 {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
1502 MASK_POLICY1}, \
1503 {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1504 MASK_POLICY2}}, \
1505 COND},
1506#include "riscv-vsetvl.def"
1507};
1508
1509static bool
1510same_sew_lmul_demand_p (const bool *dems1, const bool *dems2)
1511{
1512 return dems1[DEMAND_SEW] == dems2[DEMAND_SEW]
1513 && dems1[DEMAND_LMUL] == dems2[DEMAND_LMUL]
1514 && dems1[DEMAND_RATIO] == dems2[DEMAND_RATIO] && !dems1[DEMAND_GE_SEW]
1515 && !dems2[DEMAND_GE_SEW];
1516}
1517
1518static bool
1519propagate_avl_across_demands_p (const vector_insn_info &info1,
1520 const vector_insn_info &info2)
1521{
1522 if (info2.demand_p (DEMAND_AVL))
1523 {
1524 if (info2.demand_p (DEMAND_NONZERO_AVL))
1525 return info1.demand_p (DEMAND_AVL)
1526 && !info1.demand_p (DEMAND_NONZERO_AVL) && info1.has_avl_reg ();
1527 }
1528 else
1529 return info1.demand_p (DEMAND_AVL) && info1.has_avl_reg ();
1530 return false;
1531}
1532
1533static bool
a481eed8 1534reg_available_p (const insn_info *insn, const vector_insn_info &info)
ec99ffab 1535{
a481eed8 1536 if (info.has_avl_reg () && !info.get_avl_source ())
44c918b5 1537 return false;
a481eed8
JZZ
1538 insn_info *def_insn = info.get_avl_source ()->insn ();
1539 if (def_insn->bb () == insn->bb ())
1540 return before_p (def_insn, insn);
ec99ffab 1541 else
a481eed8
JZZ
1542 return dominated_by_p (CDI_DOMINATORS, insn->bb ()->cfg_bb (),
1543 def_insn->bb ()->cfg_bb ());
ec99ffab
JZZ
1544}
1545
60bd33bc
JZZ
1546/* Return true if the instruction support relaxed compatible check. */
1547static bool
1548support_relaxed_compatible_p (const vector_insn_info &info1,
1549 const vector_insn_info &info2)
1550{
1551 if (fault_first_load_p (info1.get_insn ()->rtl ())
1552 && info2.demand_p (DEMAND_AVL) && info2.has_avl_reg ()
1553 && info2.get_avl_source () && info2.get_avl_source ()->insn ()->is_phi ())
1554 {
1555 hash_set<set_info *> sets
1556 = get_all_sets (info2.get_avl_source (), true, false, false);
1557 for (set_info *set : sets)
1558 {
1559 if (read_vl_insn_p (set->insn ()->rtl ()))
1560 {
1561 const insn_info *insn
1562 = get_backward_fault_first_load_insn (set->insn ());
1563 if (insn == info1.get_insn ())
1564 return info2.compatible_vtype_p (info1);
1565 }
1566 }
1567 }
1568 return false;
1569}
1570
1571/* Return true if the block is worthwhile backward propagation. */
1572static bool
1573backward_propagate_worthwhile_p (const basic_block cfg_bb,
1574 const vector_block_info block_info)
1575{
1576 if (loop_basic_block_p (cfg_bb))
1577 {
1578 if (block_info.reaching_out.valid_or_dirty_p ())
1579 {
1580 if (block_info.local_dem.compatible_p (block_info.reaching_out))
1581 {
1582 /* Case 1 (Can backward propagate):
1583 ....
1584 bb0:
1585 ...
1586 for (int i = 0; i < n; i++)
1587 {
1588 vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
1589 __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
1590 }
1591 The local_dem is compatible with reaching_out. Such case is
1592 worthwhile backward propagation. */
1593 return true;
1594 }
1595 else
1596 {
1597 if (support_relaxed_compatible_p (block_info.reaching_out,
1598 block_info.local_dem))
1599 return true;
1600 /* Case 2 (Don't backward propagate):
1601 ....
1602 bb0:
1603 ...
1604 for (int i = 0; i < n; i++)
1605 {
1606 vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
1607 __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
1608 vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8);
1609 __riscv_vse16_v_i16mf2 (out + i + 6, v, 8);
1610 }
1611 The local_dem is incompatible with reaching_out.
1612 It makes no sense to backward propagate the local_dem since we
1613 can't avoid VSETVL inside the loop. */
1614 return false;
1615 }
1616 }
1617 else
1618 {
1619 gcc_assert (block_info.reaching_out.unknown_p ());
1620 /* Case 3 (Don't backward propagate):
1621 ....
1622 bb0:
1623 ...
1624 for (int i = 0; i < n; i++)
1625 {
1626 vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
1627 __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
1628 fn3 ();
1629 }
1630 The local_dem is VALID, but the reaching_out is UNKNOWN.
1631 It makes no sense to backward propagate the local_dem since we
1632 can't avoid VSETVL inside the loop. */
1633 return false;
1634 }
1635 }
1636
1637 return true;
1638}
1639
a2d12abe
JZZ
1640/* Count the number of REGNO in RINSN. */
1641static int
1642count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
1643{
1644 int count = 0;
1645 extract_insn (rinsn);
1646 for (int i = 0; i < recog_data.n_operands; i++)
1647 if (refers_to_regno_p (regno, recog_data.operand[i]))
1648 count++;
1649 return count;
1650}
1651
12b23c71
JZZ
1652avl_info::avl_info (const avl_info &other)
1653{
1654 m_value = other.get_value ();
1655 m_source = other.get_source ();
1656}
1657
9243c3d1
JZZ
1658avl_info::avl_info (rtx value_in, set_info *source_in)
1659 : m_value (value_in), m_source (source_in)
1660{}
1661
4f673c5e
JZZ
1662bool
1663avl_info::single_source_equal_p (const avl_info &other) const
1664{
1665 set_info *set1 = m_source;
1666 set_info *set2 = other.get_source ();
6b6b9c68
JZZ
1667 insn_info *insn1 = extract_single_source (set1);
1668 insn_info *insn2 = extract_single_source (set2);
1669 if (!insn1 || !insn2)
1670 return false;
1671 return source_equal_p (insn1, insn2);
1672}
1673
1674bool
1675avl_info::multiple_source_equal_p (const avl_info &other) const
1676{
1677 /* TODO: We don't do too much optimization here since it's
1678 too complicated in case of analyzing the PHI node.
1679
1680 For example:
1681 void f (void * restrict in, void * restrict out, int n, int m, int cond)
1682 {
1683 size_t vl;
1684 switch (cond)
1685 {
1686 case 1:
1687 vl = 100;
1688 break;
1689 case 2:
1690 vl = *(size_t*)(in + 100);
1691 break;
1692 case 3:
1693 {
1694 size_t new_vl = *(size_t*)(in + 500);
1695 size_t new_vl2 = *(size_t*)(in + 600);
1696 vl = new_vl + new_vl2 + 777;
1697 break;
1698 }
1699 default:
1700 vl = 4000;
1701 break;
1702 }
1703 for (size_t i = 0; i < n; i++)
1704 {
1705 vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
1706 __riscv_vse8_v_i8mf8 (out + i, v, vl);
1707
1708 vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
1709 __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
1710 }
1711
1712 size_t vl2;
1713 switch (cond)
1714 {
1715 case 1:
1716 vl2 = 100;
1717 break;
1718 case 2:
1719 vl2 = *(size_t*)(in + 100);
1720 break;
1721 case 3:
1722 {
1723 size_t new_vl = *(size_t*)(in + 500);
1724 size_t new_vl2 = *(size_t*)(in + 600);
1725 vl2 = new_vl + new_vl2 + 777;
1726 break;
1727 }
1728 default:
1729 vl2 = 4000;
1730 break;
1731 }
1732 for (size_t i = 0; i < m; i++)
1733 {
1734 vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl2);
1735 __riscv_vse8_v_i8mf8 (out + i + 300, v, vl2);
1736 vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 200, vl2);
1737 __riscv_vse8_v_i8mf8 (out + i + 200, v2, vl2);
1738 }
1739 }
1740 Such case may not be necessary to optimize since the codes of defining
1741 vl and vl2 are redundant. */
1742 return m_source == other.get_source ();
4f673c5e
JZZ
1743}
1744
9243c3d1
JZZ
1745avl_info &
1746avl_info::operator= (const avl_info &other)
1747{
1748 m_value = other.get_value ();
1749 m_source = other.get_source ();
1750 return *this;
1751}
1752
1753bool
1754avl_info::operator== (const avl_info &other) const
1755{
1756 if (!m_value)
1757 return !other.get_value ();
1758 if (!other.get_value ())
1759 return false;
1760
9243c3d1
JZZ
1761 if (GET_CODE (m_value) != GET_CODE (other.get_value ()))
1762 return false;
1763
1764 /* Handle CONST_INT AVL. */
1765 if (CONST_INT_P (m_value))
1766 return INTVAL (m_value) == INTVAL (other.get_value ());
1767
1768 /* Handle VLMAX AVL. */
1769 if (vlmax_avl_p (m_value))
1770 return vlmax_avl_p (other.get_value ());
1771
4f673c5e
JZZ
1772 /* If any source is undef value, we think they are not equal. */
1773 if (!m_source || !other.get_source ())
1774 return false;
1775
1776 /* If both sources are single source (defined by a single real RTL)
1777 and their definitions are same. */
1778 if (single_source_equal_p (other))
1779 return true;
1780
6b6b9c68 1781 return multiple_source_equal_p (other);
9243c3d1
JZZ
1782}
1783
1784bool
1785avl_info::operator!= (const avl_info &other) const
1786{
1787 return !(*this == other);
1788}
1789
ec99ffab
JZZ
1790bool
1791avl_info::has_non_zero_avl () const
1792{
1793 if (has_avl_imm ())
1794 return INTVAL (get_value ()) > 0;
1795 if (has_avl_reg ())
1796 return vlmax_avl_p (get_value ());
1797 return false;
1798}
1799
9243c3d1
JZZ
1800/* Initialize VL/VTYPE information. */
1801vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in,
1802 enum vlmul_type vlmul_in, uint8_t ratio_in,
1803 bool ta_in, bool ma_in)
1804 : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in),
1805 m_ta (ta_in), m_ma (ma_in)
1806{
1807 gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW");
1808}
1809
1810bool
1811vl_vtype_info::operator== (const vl_vtype_info &other) const
1812{
6b6b9c68 1813 return same_avl_p (other) && m_sew == other.get_sew ()
9243c3d1
JZZ
1814 && m_vlmul == other.get_vlmul () && m_ta == other.get_ta ()
1815 && m_ma == other.get_ma () && m_ratio == other.get_ratio ();
1816}
1817
1818bool
1819vl_vtype_info::operator!= (const vl_vtype_info &other) const
1820{
1821 return !(*this == other);
1822}
1823
9243c3d1
JZZ
1824bool
1825vl_vtype_info::same_avl_p (const vl_vtype_info &other) const
1826{
6b6b9c68
JZZ
1827 /* We need to compare both RTL and SET. If both AVL are CONST_INT.
1828 For example, const_int 3 and const_int 4, we need to compare
1829 RTL. If both AVL are REG and their REGNO are same, we need to
1830 compare SET. */
1831 return get_avl () == other.get_avl ()
1832 && get_avl_source () == other.get_avl_source ();
9243c3d1
JZZ
1833}
1834
1835bool
1836vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const
1837{
1838 return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul ()
1839 && get_ta () == other.get_ta () && get_ma () == other.get_ma ();
1840}
1841
1842bool
1843vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const
1844{
1845 return get_ratio () == other.get_ratio ();
1846}
1847
1848/* Compare the compatibility between Dem1 and Dem2.
1849 If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2
1850 meaning Dem1 is easier be compatible with others than Dem2
1851 or Dem2 is stricter than Dem1.
1852 For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */
9243c3d1
JZZ
1853bool
1854vector_insn_info::operator>= (const vector_insn_info &other) const
1855{
60bd33bc
JZZ
1856 if (support_relaxed_compatible_p (*this, other))
1857 {
1858 unsigned array_size = sizeof (unavailable_conds) / sizeof (demands_cond);
1859 /* Bypass AVL unavailable cases. */
1860 for (unsigned i = 2; i < array_size; i++)
1861 if (unavailable_conds[i].pair.match_cond_p (this->get_demands (),
1862 other.get_demands ())
1863 && unavailable_conds[i].incompatible_p (*this, other))
1864 return false;
1865 return true;
1866 }
1867
1868 if (!other.compatible_p (static_cast<const vl_vtype_info &> (*this)))
1869 return false;
1870 if (!this->compatible_p (static_cast<const vl_vtype_info &> (other)))
9243c3d1
JZZ
1871 return true;
1872
1873 if (*this == other)
1874 return true;
1875
ec99ffab
JZZ
1876 for (const auto &cond : unavailable_conds)
1877 if (cond.pair.match_cond_p (this->get_demands (), other.get_demands ())
1878 && cond.incompatible_p (*this, other))
1879 return false;
9243c3d1
JZZ
1880
1881 return true;
1882}
1883
1884bool
1885vector_insn_info::operator== (const vector_insn_info &other) const
1886{
1887 gcc_assert (!uninit_p () && !other.uninit_p ()
1888 && "Uninitialization should not happen");
1889
1890 /* Empty is only equal to another Empty. */
1891 if (empty_p ())
1892 return other.empty_p ();
1893 if (other.empty_p ())
1894 return empty_p ();
1895
1896 /* Unknown is only equal to another Unknown. */
1897 if (unknown_p ())
1898 return other.unknown_p ();
1899 if (other.unknown_p ())
1900 return unknown_p ();
1901
1902 for (size_t i = 0; i < NUM_DEMAND; i++)
1903 if (m_demands[i] != other.demand_p ((enum demand_type) i))
1904 return false;
1905
7ae4d1df
JZZ
1906 if (vector_config_insn_p (m_insn->rtl ())
1907 || vector_config_insn_p (other.get_insn ()->rtl ()))
1908 if (m_insn != other.get_insn ())
1909 return false;
9243c3d1
JZZ
1910
1911 if (!same_avl_p (other))
1912 return false;
1913
1914 /* If the full VTYPE is valid, check that it is the same. */
1915 return same_vtype_p (other);
1916}
1917
1918void
1919vector_insn_info::parse_insn (rtx_insn *rinsn)
1920{
1921 *this = vector_insn_info ();
1922 if (!NONDEBUG_INSN_P (rinsn))
1923 return;
1924 if (!has_vtype_op (rinsn))
1925 return;
1926 m_state = VALID;
1927 extract_insn_cached (rinsn);
1928 const rtx avl = recog_data.operand[get_attr_vl_op_idx (rinsn)];
1929 m_avl = avl_info (avl, nullptr);
1930 m_sew = ::get_sew (rinsn);
1931 m_vlmul = ::get_vlmul (rinsn);
1932 m_ta = tail_agnostic_p (rinsn);
1933 m_ma = mask_agnostic_p (rinsn);
1934}
1935
1936void
1937vector_insn_info::parse_insn (insn_info *insn)
1938{
1939 *this = vector_insn_info ();
1940
1941 /* Return if it is debug insn for the consistency with optimize == 0. */
1942 if (insn->is_debug_insn ())
1943 return;
1944
1945 /* We set it as unknown since we don't what will happen in CALL or ASM. */
1946 if (insn->is_call () || insn->is_asm ())
1947 {
1948 set_unknown ();
1949 return;
1950 }
1951
1952 /* If this is something that updates VL/VTYPE that we don't know about, set
1953 the state to unknown. */
60bd33bc 1954 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
9243c3d1
JZZ
1955 && (find_access (insn->defs (), VL_REGNUM)
1956 || find_access (insn->defs (), VTYPE_REGNUM)))
1957 {
1958 set_unknown ();
1959 return;
1960 }
1961
1962 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
1963 return;
1964
1965 /* Warning: This function has to work on both the lowered (i.e. post
1966 emit_local_forward_vsetvls) and pre-lowering forms. The main implication
1967 of this is that it can't use the value of a SEW, VL, or Policy operand as
1968 they might be stale after lowering. */
1969 vl_vtype_info::operator= (get_vl_vtype_info (insn));
1970 m_insn = insn;
1971 m_state = VALID;
1972 if (vector_config_insn_p (insn->rtl ()))
1973 {
1974 m_demands[DEMAND_AVL] = true;
1975 m_demands[DEMAND_RATIO] = true;
1976 return;
1977 }
1978
1979 if (has_vl_op (insn->rtl ()))
1980 m_demands[DEMAND_AVL] = true;
1981
1982 if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
1983 m_demands[DEMAND_RATIO] = true;
1984 else
1985 {
1986 /* TODO: By default, if it doesn't demand RATIO, we set it
1987 demand SEW && LMUL both. Some instructions may demand SEW
1988 only and ignore LMUL, will fix it later. */
1989 m_demands[DEMAND_SEW] = true;
ec99ffab
JZZ
1990 if (!ignore_vlmul_insn_p (insn->rtl ()))
1991 m_demands[DEMAND_LMUL] = true;
9243c3d1
JZZ
1992 }
1993
1994 if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE)
1995 m_demands[DEMAND_TAIL_POLICY] = true;
1996 if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE)
1997 m_demands[DEMAND_MASK_POLICY] = true;
6b6b9c68
JZZ
1998
1999 if (vector_config_insn_p (insn->rtl ()))
2000 return;
2001
ec99ffab
JZZ
2002 if (scalar_move_insn_p (insn->rtl ()))
2003 {
2004 if (m_avl.has_non_zero_avl ())
2005 m_demands[DEMAND_NONZERO_AVL] = true;
2006 if (m_ta)
2007 m_demands[DEMAND_GE_SEW] = true;
2008 }
2009
2010 if (!m_avl.has_avl_reg () || vlmax_avl_p (get_avl ()) || !m_avl.get_source ())
2011 return;
2012 if (!m_avl.get_source ()->insn ()->is_real ()
2013 && !m_avl.get_source ()->insn ()->is_phi ())
6b6b9c68
JZZ
2014 return;
2015
2016 insn_info *def_insn = extract_single_source (m_avl.get_source ());
ec99ffab
JZZ
2017 if (!def_insn || !vsetvl_insn_p (def_insn->rtl ()))
2018 return;
9243c3d1 2019
ec99ffab
JZZ
2020 vector_insn_info new_info;
2021 new_info.parse_insn (def_insn);
2022 if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ()))
2023 return;
2024 /* TODO: Currently, we don't forward AVL for non-VLMAX vsetvl. */
2025 if (vlmax_avl_p (new_info.get_avl ()))
2026 set_avl_info (avl_info (new_info.get_avl (), get_avl_source ()));
2027
2028 if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ())
2029 m_demands[DEMAND_NONZERO_AVL] = true;
9243c3d1
JZZ
2030}
2031
2032bool
2033vector_insn_info::compatible_p (const vector_insn_info &other) const
2034{
2035 gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
2036 && "Can't compare invalid demanded infos");
2037
ec99ffab 2038 for (const auto &cond : incompatible_conds)
60bd33bc 2039 if (cond.dual_incompatible_p (*this, other))
ec99ffab 2040 return false;
9243c3d1
JZZ
2041 return true;
2042}
2043
d51f2456
JZ
2044bool
2045vector_insn_info::skip_avl_compatible_p (const vector_insn_info &other) const
2046{
2047 gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
2048 && "Can't compare invalid demanded infos");
2049 unsigned array_size = sizeof (incompatible_conds) / sizeof (demands_cond);
2050 /* Bypass AVL incompatible cases. */
2051 for (unsigned i = 1; i < array_size; i++)
2052 if (incompatible_conds[i].dual_incompatible_p (*this, other))
2053 return false;
2054 return true;
2055}
2056
9243c3d1
JZZ
2057bool
2058vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const
2059{
2060 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2061 gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
2062 if (!demand_p (DEMAND_AVL))
2063 return true;
ec99ffab
JZZ
2064 if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ())
2065 return true;
9243c3d1
JZZ
2066 return get_avl_info () == other.get_avl_info ();
2067}
2068
4f673c5e
JZZ
2069bool
2070vector_insn_info::compatible_avl_p (const avl_info &other) const
2071{
2072 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2073 gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
2074 gcc_assert (demand_p (DEMAND_AVL) && "Can't compare AVL undemand state");
ec99ffab
JZZ
2075 if (!demand_p (DEMAND_AVL))
2076 return true;
2077 if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ())
2078 return true;
4f673c5e
JZZ
2079 return get_avl_info () == other;
2080}
2081
9243c3d1
JZZ
2082bool
2083vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const
2084{
2085 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2086 gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state");
ec99ffab
JZZ
2087 if (demand_p (DEMAND_SEW))
2088 {
2089 if (!demand_p (DEMAND_GE_SEW) && m_sew != other.get_sew ())
2090 return false;
2091 if (demand_p (DEMAND_GE_SEW) && m_sew > other.get_sew ())
2092 return false;
2093 }
9243c3d1
JZZ
2094 if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ())
2095 return false;
2096 if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ())
2097 return false;
2098 if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ())
2099 return false;
2100 if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ())
2101 return false;
2102 return true;
2103}
2104
2105/* Determine whether the vector instructions requirements represented by
2106 Require are compatible with the previous vsetvli instruction represented
2107 by this. INSN is the instruction whose requirements we're considering. */
2108bool
2109vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const
2110{
2111 gcc_assert (!uninit_p () && "Can't handle uninitialized info");
2112 if (empty_p ())
2113 return false;
2114
2115 /* Nothing is compatible with Unknown. */
2116 if (unknown_p ())
2117 return false;
2118
2119 /* If the instruction doesn't need an AVLReg and the SEW matches, consider
2120 it compatible. */
2121 if (!demand_p (DEMAND_AVL))
2122 if (m_sew == curr_info.get_sew ())
2123 return true;
2124
2125 return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info);
2126}
2127
6b6b9c68
JZZ
2128bool
2129vector_insn_info::available_p (const vector_insn_info &other) const
2130{
ec99ffab
JZZ
2131 return *this >= other;
2132}
2133
2134void
2135vector_insn_info::fuse_avl (const vector_insn_info &info1,
2136 const vector_insn_info &info2)
2137{
2138 set_insn (info1.get_insn ());
2139 if (info1.demand_p (DEMAND_AVL))
2140 {
2141 if (info1.demand_p (DEMAND_NONZERO_AVL))
2142 {
2143 if (info2.demand_p (DEMAND_AVL)
2144 && !info2.demand_p (DEMAND_NONZERO_AVL))
2145 {
2146 set_avl_info (info2.get_avl_info ());
2147 set_demand (DEMAND_AVL, true);
2148 set_demand (DEMAND_NONZERO_AVL, false);
2149 return;
2150 }
2151 }
2152 set_avl_info (info1.get_avl_info ());
2153 set_demand (DEMAND_NONZERO_AVL, info1.demand_p (DEMAND_NONZERO_AVL));
2154 }
2155 else
2156 {
2157 set_avl_info (info2.get_avl_info ());
2158 set_demand (DEMAND_NONZERO_AVL, info2.demand_p (DEMAND_NONZERO_AVL));
2159 }
2160 set_demand (DEMAND_AVL,
2161 info1.demand_p (DEMAND_AVL) || info2.demand_p (DEMAND_AVL));
2162}
2163
2164void
2165vector_insn_info::fuse_sew_lmul (const vector_insn_info &info1,
2166 const vector_insn_info &info2)
2167{
2168 /* We need to fuse sew && lmul according to demand info:
2169
2170 1. GE_SEW.
2171 2. SEW.
2172 3. LMUL.
2173 4. RATIO. */
2174 if (same_sew_lmul_demand_p (info1.get_demands (), info2.get_demands ()))
2175 {
2176 set_demand (DEMAND_SEW, info2.demand_p (DEMAND_SEW));
2177 set_demand (DEMAND_LMUL, info2.demand_p (DEMAND_LMUL));
2178 set_demand (DEMAND_RATIO, info2.demand_p (DEMAND_RATIO));
2179 set_demand (DEMAND_GE_SEW, info2.demand_p (DEMAND_GE_SEW));
2180 set_sew (info2.get_sew ());
2181 set_vlmul (info2.get_vlmul ());
2182 set_ratio (info2.get_ratio ());
2183 return;
2184 }
2185 for (const auto &rule : fuse_rules)
2186 {
2187 if (rule.pair.match_cond_p (info1.get_demands (), info2.get_demands ()))
2188 {
2189 set_demand (DEMAND_SEW, rule.demand_sew_p);
2190 set_demand (DEMAND_LMUL, rule.demand_lmul_p);
2191 set_demand (DEMAND_RATIO, rule.demand_ratio_p);
2192 set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p);
2193 set_sew (rule.new_sew (info1, info2));
2194 set_vlmul (rule.new_vlmul (info1, info2));
2195 set_ratio (rule.new_ratio (info1, info2));
2196 return;
2197 }
2198 if (rule.pair.match_cond_p (info2.get_demands (), info1.get_demands ()))
2199 {
2200 set_demand (DEMAND_SEW, rule.demand_sew_p);
2201 set_demand (DEMAND_LMUL, rule.demand_lmul_p);
2202 set_demand (DEMAND_RATIO, rule.demand_ratio_p);
2203 set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p);
2204 set_sew (rule.new_sew (info2, info1));
2205 set_vlmul (rule.new_vlmul (info2, info1));
2206 set_ratio (rule.new_ratio (info2, info1));
2207 return;
2208 }
2209 }
2210 gcc_unreachable ();
2211}
2212
2213void
2214vector_insn_info::fuse_tail_policy (const vector_insn_info &info1,
2215 const vector_insn_info &info2)
2216{
2217 if (info1.demand_p (DEMAND_TAIL_POLICY))
2218 {
2219 set_ta (info1.get_ta ());
2220 demand (DEMAND_TAIL_POLICY);
2221 }
2222 else if (info2.demand_p (DEMAND_TAIL_POLICY))
2223 {
2224 set_ta (info2.get_ta ());
2225 demand (DEMAND_TAIL_POLICY);
2226 }
2227 else
2228 set_ta (get_default_ta ());
2229}
2230
2231void
2232vector_insn_info::fuse_mask_policy (const vector_insn_info &info1,
2233 const vector_insn_info &info2)
2234{
2235 if (info1.demand_p (DEMAND_MASK_POLICY))
2236 {
2237 set_ma (info1.get_ma ());
2238 demand (DEMAND_MASK_POLICY);
2239 }
2240 else if (info2.demand_p (DEMAND_MASK_POLICY))
2241 {
2242 set_ma (info2.get_ma ());
2243 demand (DEMAND_MASK_POLICY);
2244 }
2245 else
2246 set_ma (get_default_ma ());
6b6b9c68
JZZ
2247}
2248
9243c3d1
JZZ
2249vector_insn_info
2250vector_insn_info::merge (const vector_insn_info &merge_info,
d51f2456 2251 enum merge_type type) const
9243c3d1 2252{
6b6b9c68
JZZ
2253 if (!vsetvl_insn_p (get_insn ()->rtl ()))
2254 gcc_assert (this->compatible_p (merge_info)
2255 && "Can't merge incompatible demanded infos");
9243c3d1
JZZ
2256
2257 vector_insn_info new_info;
ec99ffab 2258 new_info.set_valid ();
4f673c5e 2259 if (type == LOCAL_MERGE)
9243c3d1 2260 {
4f673c5e 2261 /* For local backward data flow, we always update INSN && AVL as the
ec99ffab
JZZ
2262 latest INSN and AVL so that we can keep track status of each INSN. */
2263 new_info.fuse_avl (merge_info, *this);
9243c3d1
JZZ
2264 }
2265 else
2266 {
4f673c5e 2267 /* For global data flow, we should keep original INSN and AVL if they
ec99ffab 2268 valid since we should keep the life information of each block.
9243c3d1 2269
ec99ffab
JZZ
2270 For example:
2271 bb 0 -> bb 1.
2272 We should keep INSN && AVL of bb 1 since we will eventually emit
2273 vsetvl instruction according to INSN and AVL of bb 1. */
2274 new_info.fuse_avl (*this, merge_info);
2275 }
9243c3d1 2276
ec99ffab
JZZ
2277 new_info.fuse_sew_lmul (*this, merge_info);
2278 new_info.fuse_tail_policy (*this, merge_info);
2279 new_info.fuse_mask_policy (*this, merge_info);
9243c3d1
JZZ
2280 return new_info;
2281}
2282
60bd33bc
JZZ
2283bool
2284vector_insn_info::update_fault_first_load_avl (insn_info *insn)
2285{
2286 // Update AVL to vl-output of the fault first load.
2287 const insn_info *read_vl = get_forward_read_vl_insn (insn);
2288 if (read_vl)
2289 {
2290 rtx vl = SET_DEST (PATTERN (read_vl->rtl ()));
2291 def_info *def = find_access (read_vl->defs (), REGNO (vl));
2292 set_info *set = safe_dyn_cast<set_info *> (def);
2293 set_avl_info (avl_info (vl, set));
2294 set_insn (insn);
2295 return true;
2296 }
2297 return false;
2298}
2299
9243c3d1
JZZ
2300void
2301vector_insn_info::dump (FILE *file) const
2302{
2303 fprintf (file, "[");
2304 if (uninit_p ())
2305 fprintf (file, "UNINITIALIZED,");
2306 else if (valid_p ())
2307 fprintf (file, "VALID,");
2308 else if (unknown_p ())
2309 fprintf (file, "UNKNOWN,");
2310 else if (empty_p ())
2311 fprintf (file, "EMPTY,");
6b6b9c68
JZZ
2312 else if (hard_empty_p ())
2313 fprintf (file, "HARD_EMPTY,");
4f673c5e
JZZ
2314 else if (dirty_with_killed_avl_p ())
2315 fprintf (file, "DIRTY_WITH_KILLED_AVL,");
9243c3d1
JZZ
2316 else
2317 fprintf (file, "DIRTY,");
2318
2319 fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL));
ec99ffab 2320 fprintf (file, "%d(DEMAND_NONZERO_AVL),", demand_p (DEMAND_NONZERO_AVL));
9243c3d1 2321 fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW));
ec99ffab 2322 fprintf (file, "%d(DEMAND_GE_SEW),", demand_p (DEMAND_GE_SEW));
9243c3d1
JZZ
2323 fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL));
2324 fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO));
2325 fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY));
2326 fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY));
2327
2328 fprintf (file, "AVL=");
2329 print_rtl_single (file, get_avl ());
2330 fprintf (file, "SEW=%d,", get_sew ());
2331 fprintf (file, "VLMUL=%d,", get_vlmul ());
2332 fprintf (file, "RATIO=%d,", get_ratio ());
2333 fprintf (file, "TAIL_POLICY=%d,", get_ta ());
2334 fprintf (file, "MASK_POLICY=%d", get_ma ());
2335 fprintf (file, "]\n");
2336
2337 if (valid_p ())
2338 {
2339 if (get_insn ())
2340 {
12b23c71
JZZ
2341 fprintf (file, "The real INSN=");
2342 print_rtl_single (file, get_insn ()->rtl ());
9243c3d1 2343 }
9243c3d1
JZZ
2344 }
2345}
2346
2347vector_infos_manager::vector_infos_manager ()
2348{
2349 vector_edge_list = nullptr;
2350 vector_kill = nullptr;
2351 vector_del = nullptr;
2352 vector_insert = nullptr;
2353 vector_antic = nullptr;
2354 vector_transp = nullptr;
2355 vector_comp = nullptr;
2356 vector_avin = nullptr;
2357 vector_avout = nullptr;
2358 vector_insn_infos.safe_grow (get_max_uid ());
2359 vector_block_infos.safe_grow (last_basic_block_for_fn (cfun));
2360 if (!optimize)
2361 {
2362 basic_block cfg_bb;
2363 rtx_insn *rinsn;
2364 FOR_ALL_BB_FN (cfg_bb, cfun)
2365 {
2366 vector_block_infos[cfg_bb->index].local_dem = vector_insn_info ();
2367 vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info ();
2368 FOR_BB_INSNS (cfg_bb, rinsn)
2369 vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn);
2370 }
2371 }
2372 else
2373 {
2374 for (const bb_info *bb : crtl->ssa->bbs ())
2375 {
2376 vector_block_infos[bb->index ()].local_dem = vector_insn_info ();
2377 vector_block_infos[bb->index ()].reaching_out = vector_insn_info ();
2378 for (insn_info *insn : bb->real_insns ())
2379 vector_insn_infos[insn->uid ()].parse_insn (insn);
acc10c79 2380 vector_block_infos[bb->index ()].probability = profile_probability ();
9243c3d1
JZZ
2381 }
2382 }
2383}
2384
2385void
2386vector_infos_manager::create_expr (vector_insn_info &info)
2387{
2388 for (size_t i = 0; i < vector_exprs.length (); i++)
2389 if (*vector_exprs[i] == info)
2390 return;
2391 vector_exprs.safe_push (&info);
2392}
2393
2394size_t
2395vector_infos_manager::get_expr_id (const vector_insn_info &info) const
2396{
2397 for (size_t i = 0; i < vector_exprs.length (); i++)
2398 if (*vector_exprs[i] == info)
2399 return i;
2400 gcc_unreachable ();
2401}
2402
2403auto_vec<size_t>
2404vector_infos_manager::get_all_available_exprs (
2405 const vector_insn_info &info) const
2406{
2407 auto_vec<size_t> available_list;
2408 for (size_t i = 0; i < vector_exprs.length (); i++)
6b6b9c68 2409 if (info.available_p (*vector_exprs[i]))
9243c3d1
JZZ
2410 available_list.safe_push (i);
2411 return available_list;
2412}
2413
d06e9264
JZ
2414bool
2415vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const
2416{
2417 hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb);
2418 for (const basic_block pred_cfg_bb : pred_cfg_bbs)
2419 {
2420 const auto &pred_block_info = vector_block_infos[pred_cfg_bb->index];
2421 if (!pred_block_info.local_dem.valid_or_dirty_p ()
2422 && !pred_block_info.reaching_out.valid_or_dirty_p ())
2423 continue;
2424 return false;
2425 }
2426 return true;
2427}
2428
9243c3d1
JZZ
2429bool
2430vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const
2431{
2432 if (bitmap_empty_p (bitdata))
2433 return false;
2434
2435 int ratio = -1;
2436 unsigned int bb_index;
2437 sbitmap_iterator sbi;
2438
2439 EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
2440 {
2441 if (ratio == -1)
2442 ratio = vector_exprs[bb_index]->get_ratio ();
2443 else if (vector_exprs[bb_index]->get_ratio () != ratio)
2444 return false;
2445 }
2446 return true;
2447}
2448
ff8f9544
JZ
2449/* Return TRUE if the incoming vector configuration state
2450 to CFG_BB is compatible with the vector configuration
2451 state in CFG_BB, FALSE otherwise. */
2452bool
2453vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const
2454{
2455 const auto &info = vector_block_infos[cfg_bb->index].local_dem;
2456 sbitmap avin = vector_avin[cfg_bb->index];
2457 unsigned int bb_index;
2458 sbitmap_iterator sbi;
2459 EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
2460 {
2461 const auto &avin_info
2462 = static_cast<const vl_vtype_info &> (*vector_exprs[bb_index]);
2463 if (!info.compatible_p (avin_info))
2464 return false;
2465 }
2466 return true;
2467}
2468
005fad9d
JZZ
2469bool
2470vector_infos_manager::all_same_avl_p (const basic_block cfg_bb,
2471 sbitmap bitdata) const
2472{
2473 if (bitmap_empty_p (bitdata))
2474 return false;
2475
2476 const auto &block_info = vector_block_infos[cfg_bb->index];
2477 if (!block_info.local_dem.demand_p (DEMAND_AVL))
2478 return true;
2479
2480 avl_info avl = block_info.local_dem.get_avl_info ();
2481 unsigned int bb_index;
2482 sbitmap_iterator sbi;
2483
2484 EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
2485 {
2486 if (vector_exprs[bb_index]->get_avl_info () != avl)
2487 return false;
2488 }
2489 return true;
2490}
2491
9243c3d1
JZZ
2492size_t
2493vector_infos_manager::expr_set_num (sbitmap bitdata) const
2494{
2495 size_t count = 0;
2496 for (size_t i = 0; i < vector_exprs.length (); i++)
2497 if (bitmap_bit_p (bitdata, i))
2498 count++;
2499 return count;
2500}
2501
2502void
2503vector_infos_manager::release (void)
2504{
2505 if (!vector_insn_infos.is_empty ())
2506 vector_insn_infos.release ();
2507 if (!vector_block_infos.is_empty ())
2508 vector_block_infos.release ();
2509 if (!vector_exprs.is_empty ())
2510 vector_exprs.release ();
2511
ec99ffab
JZZ
2512 gcc_assert (to_refine_vsetvls.is_empty ());
2513 gcc_assert (to_delete_vsetvls.is_empty ());
9243c3d1 2514 if (optimize > 0)
cfe3fbc6
JZZ
2515 free_bitmap_vectors ();
2516}
2517
2518void
2519vector_infos_manager::create_bitmap_vectors (void)
2520{
2521 /* Create the bitmap vectors. */
2522 vector_antic = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2523 vector_exprs.length ());
2524 vector_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2525 vector_exprs.length ());
2526 vector_comp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2527 vector_exprs.length ());
2528 vector_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2529 vector_exprs.length ());
2530 vector_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2531 vector_exprs.length ());
2532 vector_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2533 vector_exprs.length ());
2534
2535 bitmap_vector_ones (vector_transp, last_basic_block_for_fn (cfun));
2536 bitmap_vector_clear (vector_antic, last_basic_block_for_fn (cfun));
2537 bitmap_vector_clear (vector_comp, last_basic_block_for_fn (cfun));
2538}
2539
2540void
2541vector_infos_manager::free_bitmap_vectors (void)
2542{
2543 /* Finished. Free up all the things we've allocated. */
2544 free_edge_list (vector_edge_list);
2545 if (vector_del)
2546 sbitmap_vector_free (vector_del);
2547 if (vector_insert)
2548 sbitmap_vector_free (vector_insert);
2549 if (vector_kill)
2550 sbitmap_vector_free (vector_kill);
2551 if (vector_antic)
2552 sbitmap_vector_free (vector_antic);
2553 if (vector_transp)
2554 sbitmap_vector_free (vector_transp);
2555 if (vector_comp)
2556 sbitmap_vector_free (vector_comp);
2557 if (vector_avin)
2558 sbitmap_vector_free (vector_avin);
2559 if (vector_avout)
2560 sbitmap_vector_free (vector_avout);
2561
2562 vector_edge_list = nullptr;
2563 vector_kill = nullptr;
2564 vector_del = nullptr;
2565 vector_insert = nullptr;
2566 vector_antic = nullptr;
2567 vector_transp = nullptr;
2568 vector_comp = nullptr;
2569 vector_avin = nullptr;
2570 vector_avout = nullptr;
9243c3d1
JZZ
2571}
2572
2573void
2574vector_infos_manager::dump (FILE *file) const
2575{
2576 basic_block cfg_bb;
2577 rtx_insn *rinsn;
2578
2579 fprintf (file, "\n");
2580 FOR_ALL_BB_FN (cfg_bb, cfun)
2581 {
2582 fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index);
2583 fprintf (file, "<HEADER>=");
2584 vector_block_infos[cfg_bb->index].local_dem.dump (file);
2585 FOR_BB_INSNS (cfg_bb, rinsn)
2586 {
2587 if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn))
2588 continue;
2589 fprintf (file, "<insn %d>=", INSN_UID (rinsn));
2590 const auto &info = vector_insn_infos[INSN_UID (rinsn)];
2591 info.dump (file);
2592 }
2593 fprintf (file, "<FOOTER>=");
2594 vector_block_infos[cfg_bb->index].reaching_out.dump (file);
acc10c79
JZZ
2595 fprintf (file, "<Probability>=");
2596 vector_block_infos[cfg_bb->index].probability.dump (file);
9243c3d1
JZZ
2597 fprintf (file, "\n\n");
2598 }
2599
2600 fprintf (file, "\n");
2601 FOR_ALL_BB_FN (cfg_bb, cfun)
2602 {
2603 fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index);
2604
2605 fprintf (file, "<ANTLOC>=");
2606 if (vector_antic == nullptr)
2607 fprintf (file, "(nil)\n");
2608 else
2609 dump_bitmap_file (file, vector_antic[cfg_bb->index]);
2610
2611 fprintf (file, "<AVLOC>=");
2612 if (vector_comp == nullptr)
2613 fprintf (file, "(nil)\n");
2614 else
2615 dump_bitmap_file (file, vector_comp[cfg_bb->index]);
2616
2617 fprintf (file, "<TRANSP>=");
2618 if (vector_transp == nullptr)
2619 fprintf (file, "(nil)\n");
2620 else
2621 dump_bitmap_file (file, vector_transp[cfg_bb->index]);
2622
2623 fprintf (file, "<KILL>=");
2624 if (vector_kill == nullptr)
2625 fprintf (file, "(nil)\n");
2626 else
2627 dump_bitmap_file (file, vector_kill[cfg_bb->index]);
2628 }
2629
2630 fprintf (file, "\n");
2631 FOR_ALL_BB_FN (cfg_bb, cfun)
2632 {
2633 fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n",
2634 cfg_bb->index);
2635
2636 fprintf (file, "<AVIN>=");
2637 if (vector_avin == nullptr)
2638 fprintf (file, "(nil)\n");
2639 else
2640 dump_bitmap_file (file, vector_avin[cfg_bb->index]);
2641
2642 fprintf (file, "<AVOUT>=");
2643 if (vector_avout == nullptr)
2644 fprintf (file, "(nil)\n");
2645 else
2646 dump_bitmap_file (file, vector_avout[cfg_bb->index]);
2647
2648 fprintf (file, "<DELETE>=");
2649 if (vector_del == nullptr)
2650 fprintf (file, "(nil)\n");
2651 else
2652 dump_bitmap_file (file, vector_del[cfg_bb->index]);
2653 }
2654
2655 fprintf (file, "\nGlobal LCM (Lazy code motion) INSERT info:\n");
2656 for (size_t i = 0; i < vector_exprs.length (); i++)
2657 {
2658 for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++)
2659 {
2660 edge eg = INDEX_EDGE (vector_edge_list, ed);
2661 if (bitmap_bit_p (vector_insert[ed], i))
2662 fprintf (dump_file,
2663 "INSERT edge %d from bb %d to bb %d for VSETVL "
2664 "expr[%ld]\n",
2665 ed, eg->src->index, eg->dest->index, i);
2666 }
2667 }
2668}
2669
2670const pass_data pass_data_vsetvl = {
2671 RTL_PASS, /* type */
2672 "vsetvl", /* name */
2673 OPTGROUP_NONE, /* optinfo_flags */
2674 TV_NONE, /* tv_id */
2675 0, /* properties_required */
2676 0, /* properties_provided */
2677 0, /* properties_destroyed */
2678 0, /* todo_flags_start */
2679 0, /* todo_flags_finish */
2680};
2681
2682class pass_vsetvl : public rtl_opt_pass
2683{
2684private:
2685 class vector_infos_manager *m_vector_manager;
2686
2687 void simple_vsetvl (void) const;
2688 void lazy_vsetvl (void);
2689
2690 /* Phase 1. */
2691 void compute_local_backward_infos (const bb_info *);
2692
2693 /* Phase 2. */
2694 bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const;
2695 void transfer_before (vector_insn_info &, insn_info *) const;
2696 void transfer_after (vector_insn_info &, insn_info *) const;
2697 void emit_local_forward_vsetvls (const bb_info *);
2698
2699 /* Phase 3. */
4f673c5e
JZZ
2700 enum fusion_type get_backward_fusion_type (const bb_info *,
2701 const vector_insn_info &);
6b6b9c68 2702 bool hard_empty_block_p (const bb_info *, const vector_insn_info &) const;
387cd9d3
JZZ
2703 bool backward_demand_fusion (void);
2704 bool forward_demand_fusion (void);
6b6b9c68 2705 bool cleanup_illegal_dirty_blocks (void);
387cd9d3 2706 void demand_fusion (void);
9243c3d1
JZZ
2707
2708 /* Phase 4. */
2709 void prune_expressions (void);
2710 void compute_local_properties (void);
6b6b9c68 2711 bool can_refine_vsetvl_p (const basic_block, const vector_insn_info &) const;
9243c3d1
JZZ
2712 void refine_vsetvls (void) const;
2713 void cleanup_vsetvls (void);
2714 bool commit_vsetvls (void);
2715 void pre_vsetvl (void);
2716
2717 /* Phase 5. */
2718 void cleanup_insns (void) const;
2719
6b6b9c68
JZZ
2720 /* Phase 6. */
2721 void propagate_avl (void) const;
2722
9243c3d1
JZZ
2723 void init (void);
2724 void done (void);
acc10c79 2725 void compute_probabilities (void);
9243c3d1
JZZ
2726
2727public:
2728 pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
2729
2730 /* opt_pass methods: */
2731 virtual bool gate (function *) final override { return TARGET_VECTOR; }
2732 virtual unsigned int execute (function *) final override;
2733}; // class pass_vsetvl
2734
2735/* Simple m_vsetvl_insert vsetvl for optimize == 0. */
2736void
2737pass_vsetvl::simple_vsetvl (void) const
2738{
2739 if (dump_file)
2740 fprintf (dump_file,
2741 "\nEntering Simple VSETVL PASS and Handling %d basic blocks for "
2742 "function:%s\n",
2743 n_basic_blocks_for_fn (cfun), function_name (cfun));
2744
2745 basic_block cfg_bb;
2746 rtx_insn *rinsn;
2747 FOR_ALL_BB_FN (cfg_bb, cfun)
2748 {
2749 FOR_BB_INSNS (cfg_bb, rinsn)
2750 {
2751 if (!NONDEBUG_INSN_P (rinsn))
2752 continue;
2753 if (has_vtype_op (rinsn))
2754 {
2755 const auto info
2756 = m_vector_manager->vector_insn_infos[INSN_UID (rinsn)];
2757 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info,
2758 NULL_RTX, rinsn);
2759 }
2760 }
2761 }
2762}
2763
2764/* Compute demanded information by backward data-flow analysis. */
2765void
2766pass_vsetvl::compute_local_backward_infos (const bb_info *bb)
2767{
2768 vector_insn_info change;
2769 change.set_empty ();
2770
2771 auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
2772 block_info.reaching_out = change;
2773
2774 for (insn_info *insn : bb->reverse_real_nondebug_insns ())
2775 {
2776 auto &info = m_vector_manager->vector_insn_infos[insn->uid ()];
2777
2778 if (info.uninit_p ())
2779 /* If it is uninitialized, propagate it directly. */
2780 info = change;
2781 else if (info.unknown_p ())
2782 change = info;
2783 else
2784 {
2785 gcc_assert (info.valid_p () && "Unexpected Invalid demanded info");
ec99ffab
JZZ
2786 if (change.valid_p ())
2787 {
a481eed8
JZZ
2788 if (!(propagate_avl_across_demands_p (change, info)
2789 && !reg_available_p (insn, change))
ec99ffab 2790 && change.compatible_p (info))
fdc5abbd 2791 {
d51f2456 2792 info = change.merge (info, LOCAL_MERGE);
fdc5abbd
JZ
2793 /* Fix PR109399, we should update user vsetvl instruction
2794 if there is a change in demand fusion. */
2795 if (vsetvl_insn_p (insn->rtl ()))
2796 change_vsetvl_insn (insn, info);
2797 }
ec99ffab 2798 }
9243c3d1
JZZ
2799 change = info;
2800 }
2801 }
2802
2803 block_info.local_dem = change;
2804 if (block_info.local_dem.empty_p ())
2805 block_info.reaching_out = block_info.local_dem;
2806}
2807
2808/* Return true if a dem_info is required to transition from curr_info to
2809 require before INSN. */
2810bool
2811pass_vsetvl::need_vsetvl (const vector_insn_info &require,
2812 const vector_insn_info &curr_info) const
2813{
2814 if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ())
2815 return true;
2816
a481eed8 2817 if (require.compatible_p (static_cast<const vl_vtype_info &> (curr_info)))
9243c3d1
JZZ
2818 return false;
2819
2820 return true;
2821}
2822
2823/* Given an incoming state reaching INSN, modifies that state so that it is
2824 minimally compatible with INSN. The resulting state is guaranteed to be
2825 semantically legal for INSN, but may not be the state requested by INSN. */
2826void
2827pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const
2828{
2829 if (!has_vtype_op (insn->rtl ()))
2830 return;
2831
2832 const vector_insn_info require
2833 = m_vector_manager->vector_insn_infos[insn->uid ()];
2834 if (info.valid_p () && !need_vsetvl (require, info))
2835 return;
2836 info = require;
2837}
2838
2839/* Given a state with which we evaluated insn (see transfer_before above for why
2840 this might be different that the state insn requested), modify the state to
2841 reflect the changes insn might make. */
2842void
2843pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const
2844{
2845 if (vector_config_insn_p (insn->rtl ()))
2846 {
2847 info = m_vector_manager->vector_insn_infos[insn->uid ()];
2848 return;
2849 }
2850
60bd33bc
JZZ
2851 if (fault_first_load_p (insn->rtl ())
2852 && info.update_fault_first_load_avl (insn))
2853 return;
9243c3d1
JZZ
2854
2855 /* If this is something that updates VL/VTYPE that we don't know about, set
2856 the state to unknown. */
2857 if (insn->is_call () || insn->is_asm ()
2858 || find_access (insn->defs (), VL_REGNUM)
2859 || find_access (insn->defs (), VTYPE_REGNUM))
2860 info = vector_insn_info::get_unknown ();
2861}
2862
2863/* Emit vsetvl within each block by forward data-flow analysis. */
2864void
2865pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb)
2866{
2867 auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
2868 if (block_info.local_dem.empty_p ())
2869 return;
2870
2871 vector_insn_info curr_info;
2872 for (insn_info *insn : bb->real_nondebug_insns ())
2873 {
2874 const vector_insn_info prev_info = curr_info;
a481eed8 2875 enum vsetvl_type type = NUM_VSETVL_TYPE;
9243c3d1
JZZ
2876 transfer_before (curr_info, insn);
2877
2878 if (has_vtype_op (insn->rtl ()))
2879 {
2880 if (static_cast<const vl_vtype_info &> (prev_info)
2881 != static_cast<const vl_vtype_info &> (curr_info))
2882 {
2883 const auto require
2884 = m_vector_manager->vector_insn_infos[insn->uid ()];
2885 if (!require.compatible_p (
2886 static_cast<const vl_vtype_info &> (prev_info)))
a481eed8
JZZ
2887 type = insert_vsetvl (EMIT_BEFORE, insn->rtl (), require,
2888 prev_info);
9243c3d1
JZZ
2889 }
2890 }
2891
a481eed8
JZZ
2892 /* Fix the issue of following sequence:
2893 vsetivli zero, 5
2894 ....
2895 vsetvli zero, zero
2896 vmv.x.s (demand AVL = 8).
2897 ....
2898 incorrect: vsetvli zero, zero ===> Since the curr_info is AVL = 8.
2899 correct: vsetivli zero, 8
2900 vadd (demand AVL = 8). */
2901 if (type == VSETVL_VTYPE_CHANGE_ONLY)
2902 {
2903 /* Update the curr_info to be real correct AVL. */
2904 curr_info.set_avl_info (prev_info.get_avl_info ());
2905 }
9243c3d1
JZZ
2906 transfer_after (curr_info, insn);
2907 }
2908
2909 block_info.reaching_out = curr_info;
2910}
2911
4f673c5e
JZZ
2912enum fusion_type
2913pass_vsetvl::get_backward_fusion_type (const bb_info *bb,
2914 const vector_insn_info &prop)
9243c3d1 2915{
4f673c5e 2916 insn_info *insn = prop.get_insn ();
9243c3d1 2917
4f673c5e
JZZ
2918 /* TODO: We don't backward propagate the explict VSETVL here
2919 since we will change vsetvl and vsetvlmax intrinsics into
2920 no side effects which can be optimized into optimal location
2921 by GCC internal passes. We only need to support these backward
2922 propagation if vsetvl intrinsics have side effects. */
2923 if (vsetvl_insn_p (insn->rtl ()))
2924 return INVALID_FUSION;
2925
2926 gcc_assert (has_vtype_op (insn->rtl ()));
2927 rtx reg = NULL_RTX;
2928
2929 /* Case 1: Don't need VL. Just let it backward propagate. */
ec99ffab 2930 if (!prop.demand_p (DEMAND_AVL))
4f673c5e
JZZ
2931 return VALID_AVL_FUSION;
2932 else
9243c3d1 2933 {
4f673c5e
JZZ
2934 /* Case 2: CONST_INT AVL, we don't need to check def. */
2935 if (prop.has_avl_imm ())
2936 return VALID_AVL_FUSION;
2937 else
2938 {
2939 /* Case 3: REG AVL, we need to check the distance of def to make
2940 sure we won't backward propagate over the def. */
2941 gcc_assert (prop.has_avl_reg ());
2942 if (vlmax_avl_p (prop.get_avl ()))
2943 /* Check VL operand for vsetvl vl,zero. */
ec99ffab 2944 reg = prop.get_avl_reg_rtx ();
4f673c5e
JZZ
2945 else
2946 /* Check AVL operand for vsetvl zero,avl. */
ec99ffab 2947 reg = prop.get_avl ();
4f673c5e
JZZ
2948 }
2949 }
9243c3d1 2950
4f673c5e 2951 gcc_assert (reg);
ec99ffab
JZZ
2952 if (!prop.get_avl_source ()->insn ()->is_phi ()
2953 && prop.get_avl_source ()->insn ()->bb () == insn->bb ())
6b6b9c68
JZZ
2954 return INVALID_FUSION;
2955 hash_set<set_info *> sets
2956 = get_all_sets (prop.get_avl_source (), true, true, true);
2957 if (any_set_in_bb_p (sets, insn->bb ()))
2958 return INVALID_FUSION;
2959
2960 if (vlmax_avl_p (prop.get_avl ()))
4f673c5e 2961 {
6b6b9c68 2962 if (find_reg_killed_by (bb, reg))
4f673c5e 2963 return INVALID_FUSION;
6b6b9c68
JZZ
2964 else
2965 return VALID_AVL_FUSION;
4f673c5e 2966 }
6b6b9c68
JZZ
2967
2968 /* By default, we always enable backward fusion so that we can
2969 gain more optimizations. */
2970 if (!find_reg_killed_by (bb, reg))
2971 return VALID_AVL_FUSION;
2972 return KILLED_AVL_FUSION;
2973}
2974
2975/* We almost enable all cases in get_backward_fusion_type, this function
2976 disable the backward fusion by changing dirty blocks into hard empty
2977 blocks in forward dataflow. We can have more accurate optimization by
2978 this method. */
2979bool
2980pass_vsetvl::hard_empty_block_p (const bb_info *bb,
2981 const vector_insn_info &info) const
2982{
2983 if (!info.dirty_p () || !info.has_avl_reg ())
2984 return false;
2985
2986 basic_block cfg_bb = bb->cfg_bb ();
2987 sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
ec99ffab
JZZ
2988 set_info *set = info.get_avl_source ();
2989 rtx avl = gen_rtx_REG (Pmode, set->regno ());
6b6b9c68
JZZ
2990 hash_set<set_info *> sets = get_all_sets (set, true, false, false);
2991 hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb);
2992
2993 if (find_reg_killed_by (bb, avl))
2994 {
2995 /* Condition 1:
2996 Dirty block with killed AVL means that the empty block (no RVV
2997 instructions) are polluted as Dirty blocks with the value of current
2998 AVL is killed. For example:
2999 bb 0:
3000 ...
3001 bb 1:
3002 def a5
3003 bb 2:
3004 RVV (use a5)
3005 In backward dataflow, we will polluted BB0 and BB1 as Dirt with AVL
3006 killed. since a5 is killed in BB1.
3007 In this case, let's take a look at this example:
3008
3009 bb 3: bb 4:
3010 def3 a5 def4 a5
3011 bb 5: bb 6:
3012 def1 a5 def2 a5
3013 \ /
3014 \ /
3015 \ /
3016 \ /
3017 bb 7:
3018 RVV (use a5)
3019 In thi case, we can polluted BB5 and BB6 as dirty if get-def
3020 of a5 from RVV instruction in BB7 is the def1 in BB5 and
3021 def2 BB6 so we can return false early here for HARD_EMPTY_BLOCK_P.
3022 However, we are not sure whether BB3 and BB4 can be
3023 polluted as Dirty with AVL killed so we can't return false
3024 for HARD_EMPTY_BLOCK_P here since it's too early which will
3025 potentially produce issues. */
3026 gcc_assert (info.dirty_with_killed_avl_p ());
3027 if (info.get_avl_source ()
3028 && get_same_bb_set (sets, bb->cfg_bb ()) == info.get_avl_source ())
3029 return false;
4f673c5e 3030 }
9243c3d1 3031
6b6b9c68
JZZ
3032 /* Condition 2:
3033 Suppress the VL/VTYPE info backward propagation too early:
3034 ________
3035 | BB0 |
3036 |________|
3037 |
3038 ____|____
3039 | BB1 |
3040 |________|
3041 In this case, suppose BB 1 has multiple predecessors, BB 0 is one
3042 of them. BB1 has VL/VTYPE info (may be VALID or DIRTY) to backward
3043 propagate.
3044 The AVIN (available in) which is calculated by LCM is empty only
3045 in these 2 circumstances:
3046 1. all predecessors of BB1 are empty (not VALID
3047 and can not be polluted in backward fusion flow)
3048 2. VL/VTYPE info of BB1 predecessors are conflict.
3049
3050 We keep it as dirty in 2nd circumstance and set it as HARD_EMPTY
3051 (can not be polluted as DIRTY any more) in 1st circumstance.
3052 We don't backward propagate in 1st circumstance since there is
3053 no VALID RVV instruction and no polluted blocks (dirty blocks)
3054 by backward propagation from other following blocks.
3055 It's meaningless to keep it as Dirty anymore.
3056
3057 However, since we keep it as dirty in 2nd since there are VALID or
3058 Dirty blocks in predecessors, we can still gain the benefits and
3059 optimization opportunities. For example, in this case:
3060 for (size_t i = 0; i < n; i++)
3061 {
3062 if (i != cond) {
3063 vint8mf8_t v = *(vint8mf8_t*)(in + i + 100);
3064 *(vint8mf8_t*)(out + i + 100) = v;
3065 } else {
3066 vbool1_t v = *(vbool1_t*)(in + i + 400);
3067 *(vbool1_t*)(out + i + 400) = v;
3068 }
3069 }
3070 VL/VTYPE in if-else are conflict which will produce empty AVIN LCM result
3071 but we can still keep dirty blocks if *(i != cond)* is very unlikely then
3072 we can preset vsetvl (VL/VTYPE) info from else (static propability model).
3073
3074 We don't want to backward propagate VL/VTYPE information too early
3075 which is not the optimal and may potentially produce issues. */
3076 if (bitmap_empty_p (avin))
4f673c5e 3077 {
6b6b9c68
JZZ
3078 bool hard_empty_p = true;
3079 for (const basic_block pred_cfg_bb : pred_cfg_bbs)
3080 {
3081 if (pred_cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3082 continue;
3083 sbitmap avout = m_vector_manager->vector_avout[pred_cfg_bb->index];
3084 if (!bitmap_empty_p (avout))
3085 {
3086 hard_empty_p = false;
3087 break;
3088 }
3089 }
3090 if (hard_empty_p)
3091 return true;
4f673c5e 3092 }
9243c3d1 3093
6b6b9c68
JZZ
3094 edge e;
3095 edge_iterator ei;
3096 bool has_avl_killed_insn_p = false;
3097 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
4f673c5e 3098 {
6b6b9c68
JZZ
3099 const auto block_info
3100 = m_vector_manager->vector_block_infos[e->dest->index];
3101 if (block_info.local_dem.dirty_with_killed_avl_p ())
9243c3d1 3102 {
6b6b9c68
JZZ
3103 has_avl_killed_insn_p = true;
3104 break;
3105 }
3106 }
3107 if (!has_avl_killed_insn_p)
3108 return false;
4f673c5e 3109
6b6b9c68
JZZ
3110 bool any_set_in_bbs_p = false;
3111 for (const basic_block pred_cfg_bb : pred_cfg_bbs)
3112 {
3113 insn_info *def_insn = extract_single_source (set);
3114 if (def_insn)
3115 {
3116 /* Condition 3:
3117
3118 Case 1: Case 2:
3119 bb 0: bb 0:
3120 def a5 101 ...
3121 bb 1: bb 1:
3122 ... ...
3123 bb 2: bb 2:
3124 RVV 1 (use a5 with TAIL ANY) ...
3125 bb 3: bb 3:
3126 def a5 101 def a5 101
3127 bb 4: bb 4:
3128 ... ...
3129 bb 5: bb 5:
3130 RVV 2 (use a5 with TU) RVV 1 (use a5)
3131
3132 Case 1: We can pollute BB3,BB2,BB1,BB0 are all Dirt blocks
3133 with killed AVL so that we can merge TU demand info from RVV 2
3134 into RVV 1 and elide the vsevl instruction in BB5.
3135
3136 TODO: We only optimize for single source def since multiple source
3137 def is quite complicated.
3138
3139 Case 2: We only can pollute bb 3 as dirty and it has been accepted
3140 in Condition 2 and we can't pollute BB3,BB2,BB1,BB0 like case 1. */
3141 insn_info *last_killed_insn
3142 = find_reg_killed_by (crtl->ssa->bb (pred_cfg_bb), avl);
3143 if (!last_killed_insn || pred_cfg_bb == def_insn->bb ()->cfg_bb ())
3144 continue;
3145 if (source_equal_p (last_killed_insn, def_insn))
4f673c5e 3146 {
6b6b9c68
JZZ
3147 any_set_in_bbs_p = true;
3148 break;
4f673c5e 3149 }
9243c3d1
JZZ
3150 }
3151 else
4f673c5e 3152 {
6b6b9c68
JZZ
3153 /* Condition 4:
3154
3155 bb 0: bb 1: bb 3:
3156 def1 a5 def2 a5 ...
3157 \ / /
3158 \ / /
3159 \ / /
3160 \ / /
3161 bb 4: /
3162 | /
3163 | /
3164 bb 5: /
3165 | /
3166 | /
3167 bb 6: /
3168 | /
3169 | /
3170 bb 8:
3171 RVV 1 (use a5)
3172 If we get-def (REAL) of a5 from RVV 1 instruction, we will get
3173 def1 from BB0 and def2 from BB1. So we will pollute BB6,BB5,BB4,
3174 BB0,BB1 with DIRTY and set BB3 as HARD_EMPTY so that we won't
3175 propagate AVL to BB3. */
3176 if (any_set_in_bb_p (sets, crtl->ssa->bb (pred_cfg_bb)))
3177 {
3178 any_set_in_bbs_p = true;
3179 break;
3180 }
4f673c5e 3181 }
9243c3d1 3182 }
6b6b9c68
JZZ
3183 if (!any_set_in_bbs_p)
3184 return true;
3185 return false;
9243c3d1
JZZ
3186}
3187
3188/* Compute global backward demanded info. */
387cd9d3
JZZ
3189bool
3190pass_vsetvl::backward_demand_fusion (void)
9243c3d1
JZZ
3191{
3192 /* We compute global infos by backward propagation.
3193 We want to have better performance in these following cases:
3194
3195 1. for (size_t i = 0; i < n; i++) {
3196 if (i != cond) {
3197 vint8mf8_t v = *(vint8mf8_t*)(in + i + 100);
3198 *(vint8mf8_t*)(out + i + 100) = v;
3199 } else {
3200 vbool1_t v = *(vbool1_t*)(in + i + 400);
3201 *(vbool1_t*)(out + i + 400) = v;
3202 }
3203 }
3204
3205 Since we don't have any RVV instruction in the BEFORE blocks,
3206 LCM fails to optimize such case. We want to backward propagate
3207 them into empty blocks so that we could have better performance
3208 in LCM.
3209
3210 2. bb 0:
3211 vsetvl e8,mf8 (demand RATIO)
3212 bb 1:
3213 vsetvl e32,mf2 (demand SEW and LMUL)
3214 We backward propagate the first VSETVL into e32,mf2 so that we
3215 could be able to eliminate the second VSETVL in LCM. */
3216
387cd9d3 3217 bool changed_p = false;
9243c3d1
JZZ
3218 for (const bb_info *bb : crtl->ssa->reverse_bbs ())
3219 {
3220 basic_block cfg_bb = bb->cfg_bb ();
b9b251b7
JZZ
3221 const auto &curr_block_info
3222 = m_vector_manager->vector_block_infos[cfg_bb->index];
3223 const auto &prop = curr_block_info.local_dem;
9243c3d1
JZZ
3224
3225 /* If there is nothing to propagate, just skip it. */
3226 if (!prop.valid_or_dirty_p ())
3227 continue;
3228
b9b251b7 3229 if (!backward_propagate_worthwhile_p (cfg_bb, curr_block_info))
9243c3d1
JZZ
3230 continue;
3231
d06e9264
JZ
3232 /* Fix PR108270:
3233
3234 bb 0 -> bb 1
3235 We don't need to backward fuse VL/VTYPE info from bb 1 to bb 0
3236 if bb 1 is not inside a loop and all predecessors of bb 0 are empty. */
3237 if (m_vector_manager->all_empty_predecessor_p (cfg_bb))
3238 continue;
3239
9243c3d1
JZZ
3240 edge e;
3241 edge_iterator ei;
3242 /* Backward propagate to each predecessor. */
3243 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3244 {
9243c3d1
JZZ
3245 auto &block_info
3246 = m_vector_manager->vector_block_infos[e->src->index];
3247
3248 /* We don't propagate through critical edges. */
3249 if (e->flags & EDGE_COMPLEX)
3250 continue;
3251 if (e->src->index == ENTRY_BLOCK_PTR_FOR_FN (cfun)->index)
3252 continue;
44c918b5
JZZ
3253 /* If prop is demand of vsetvl instruction and reaching doesn't demand
3254 AVL. We don't backward propagate since vsetvl instruction has no
3255 side effects. */
3256 if (vsetvl_insn_p (prop.get_insn ()->rtl ())
3257 && propagate_avl_across_demands_p (prop, block_info.reaching_out))
3258 continue;
9243c3d1
JZZ
3259
3260 if (block_info.reaching_out.unknown_p ())
3261 continue;
6b6b9c68
JZZ
3262 else if (block_info.reaching_out.hard_empty_p ())
3263 continue;
9243c3d1
JZZ
3264 else if (block_info.reaching_out.empty_p ())
3265 {
4f673c5e
JZZ
3266 enum fusion_type type
3267 = get_backward_fusion_type (crtl->ssa->bb (e->src), prop);
3268 if (type == INVALID_FUSION)
9243c3d1
JZZ
3269 continue;
3270
4f673c5e
JZZ
3271 block_info.reaching_out = prop;
3272 block_info.reaching_out.set_dirty (type);
6b6b9c68
JZZ
3273
3274 if (prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ()))
3275 {
3276 hash_set<set_info *> sets
3277 = get_all_sets (prop.get_avl_source (), true, true, true);
3278 set_info *set = get_same_bb_set (sets, e->src);
3279 if (set)
3280 block_info.reaching_out.set_avl_info (
3281 avl_info (prop.get_avl (), set));
3282 }
3283
4f673c5e
JZZ
3284 block_info.local_dem = block_info.reaching_out;
3285 block_info.probability = curr_block_info.probability;
3286 changed_p = true;
9243c3d1
JZZ
3287 }
3288 else if (block_info.reaching_out.dirty_p ())
3289 {
3290 /* DIRTY -> DIRTY or VALID -> DIRTY. */
3291 vector_insn_info new_info;
3292
3293 if (block_info.reaching_out.compatible_p (prop))
3294 {
ec99ffab 3295 if (block_info.reaching_out.available_p (prop))
9243c3d1 3296 continue;
4f673c5e 3297 new_info = block_info.reaching_out.merge (prop, GLOBAL_MERGE);
6b6b9c68
JZZ
3298 new_info.set_dirty (
3299 block_info.reaching_out.dirty_with_killed_avl_p ());
3300 block_info.probability += curr_block_info.probability;
9243c3d1
JZZ
3301 }
3302 else
3303 {
4f673c5e
JZZ
3304 if (curr_block_info.probability > block_info.probability)
3305 {
6b6b9c68
JZZ
3306 enum fusion_type type
3307 = get_backward_fusion_type (crtl->ssa->bb (e->src),
3308 prop);
3309 if (type == INVALID_FUSION)
3310 continue;
4f673c5e 3311 new_info = prop;
6b6b9c68 3312 new_info.set_dirty (type);
4f673c5e
JZZ
3313 block_info.probability = curr_block_info.probability;
3314 }
9243c3d1
JZZ
3315 else
3316 continue;
3317 }
3318
ec99ffab
JZZ
3319 if (propagate_avl_across_demands_p (prop,
3320 block_info.reaching_out))
3321 {
3322 rtx reg = new_info.get_avl_reg_rtx ();
3323 if (find_reg_killed_by (crtl->ssa->bb (e->src), reg))
3324 new_info.set_dirty (true);
3325 }
3326
9243c3d1
JZZ
3327 block_info.local_dem = new_info;
3328 block_info.reaching_out = new_info;
387cd9d3 3329 changed_p = true;
9243c3d1
JZZ
3330 }
3331 else
3332 {
3333 /* We not only change the info during backward propagation,
3334 but also change the VSETVL instruction. */
3335 gcc_assert (block_info.reaching_out.valid_p ());
6b6b9c68
JZZ
3336 hash_set<set_info *> sets
3337 = get_all_sets (prop.get_avl_source (), true, false, false);
3338 set_info *set = get_same_bb_set (sets, e->src);
3339 if (vsetvl_insn_p (block_info.reaching_out.get_insn ()->rtl ())
3340 && prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ()))
3341 {
3342 if (!block_info.reaching_out.same_vlmax_p (prop))
3343 continue;
3344 if (block_info.reaching_out.same_vtype_p (prop))
3345 continue;
3346 if (!set)
3347 continue;
3348 if (set->insn () != block_info.reaching_out.get_insn ())
3349 continue;
3350 }
ec99ffab
JZZ
3351
3352 if (!block_info.reaching_out.compatible_p (prop))
3353 continue;
3354 if (block_info.reaching_out.available_p (prop))
3355 continue;
9243c3d1
JZZ
3356
3357 vector_insn_info be_merged = block_info.reaching_out;
3358 if (block_info.local_dem == block_info.reaching_out)
3359 be_merged = block_info.local_dem;
4f673c5e
JZZ
3360 vector_insn_info new_info = be_merged.merge (prop, GLOBAL_MERGE);
3361
3362 if (curr_block_info.probability > block_info.probability)
3363 block_info.probability = curr_block_info.probability;
9243c3d1 3364
ec99ffab 3365 if (propagate_avl_across_demands_p (prop, block_info.reaching_out)
a481eed8
JZZ
3366 && !reg_available_p (crtl->ssa->bb (e->src)->end_insn (),
3367 new_info))
ec99ffab
JZZ
3368 continue;
3369
aef20243 3370 change_vsetvl_insn (new_info.get_insn (), new_info);
9243c3d1
JZZ
3371 if (block_info.local_dem == block_info.reaching_out)
3372 block_info.local_dem = new_info;
3373 block_info.reaching_out = new_info;
387cd9d3 3374 changed_p = true;
9243c3d1
JZZ
3375 }
3376 }
3377 }
387cd9d3
JZZ
3378 return changed_p;
3379}
3380
3381/* Compute global forward demanded info. */
3382bool
3383pass_vsetvl::forward_demand_fusion (void)
3384{
3385 /* Enhance the global information propagation especially
3386 backward propagation miss the propagation.
3387 Consider such case:
3388
3389 bb0
3390 (TU)
3391 / \
3392 bb1 bb2
3393 (TU) (ANY)
3394 existing edge -----> \ / (TU) <----- LCM create this edge.
3395 bb3
3396 (TU)
3397
3398 Base on the situation, LCM fails to eliminate the VSETVL instruction and
3399 insert an edge from bb2 to bb3 since we can't backward propagate bb3 into
3400 bb2. To avoid this confusing LCM result and non-optimal codegen, we should
3401 forward propagate information from bb0 to bb2 which is friendly to LCM. */
3402 bool changed_p = false;
3403 for (const bb_info *bb : crtl->ssa->bbs ())
3404 {
3405 basic_block cfg_bb = bb->cfg_bb ();
3406 const auto &prop
3407 = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
3408
3409 /* If there is nothing to propagate, just skip it. */
3410 if (!prop.valid_or_dirty_p ())
3411 continue;
3412
00fb7698
JZZ
3413 if (cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3414 continue;
3415
ec99ffab
JZZ
3416 if (vsetvl_insn_p (prop.get_insn ()->rtl ()))
3417 continue;
3418
387cd9d3
JZZ
3419 edge e;
3420 edge_iterator ei;
3421 /* Forward propagate to each successor. */
3422 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
3423 {
3424 auto &local_dem
3425 = m_vector_manager->vector_block_infos[e->dest->index].local_dem;
3426 auto &reaching_out
3427 = m_vector_manager->vector_block_infos[e->dest->index].reaching_out;
3428
3429 /* It's quite obvious, we don't need to propagate itself. */
3430 if (e->dest->index == cfg_bb->index)
3431 continue;
00fb7698
JZZ
3432 /* We don't propagate through critical edges. */
3433 if (e->flags & EDGE_COMPLEX)
3434 continue;
3435 if (e->dest->index == EXIT_BLOCK_PTR_FOR_FN (cfun)->index)
3436 continue;
387cd9d3
JZZ
3437
3438 /* If there is nothing to propagate, just skip it. */
3439 if (!local_dem.valid_or_dirty_p ())
3440 continue;
ec99ffab 3441 if (local_dem.available_p (prop))
4f673c5e
JZZ
3442 continue;
3443 if (!local_dem.compatible_p (prop))
3444 continue;
ec99ffab
JZZ
3445 if (propagate_avl_across_demands_p (prop, local_dem))
3446 continue;
387cd9d3 3447
4f673c5e
JZZ
3448 vector_insn_info new_info = local_dem.merge (prop, GLOBAL_MERGE);
3449 new_info.set_insn (local_dem.get_insn ());
3450 if (local_dem.dirty_p ())
387cd9d3 3451 {
4f673c5e 3452 gcc_assert (local_dem == reaching_out);
6b6b9c68 3453 new_info.set_dirty (local_dem.dirty_with_killed_avl_p ());
4f673c5e 3454 local_dem = new_info;
4f673c5e
JZZ
3455 reaching_out = local_dem;
3456 }
3457 else
3458 {
3459 if (reaching_out == local_dem)
3460 reaching_out = new_info;
3461 local_dem = new_info;
3462 change_vsetvl_insn (local_dem.get_insn (), new_info);
387cd9d3 3463 }
4f673c5e
JZZ
3464 auto &prob
3465 = m_vector_manager->vector_block_infos[e->dest->index].probability;
3466 auto &curr_prob
3467 = m_vector_manager->vector_block_infos[cfg_bb->index].probability;
3468 prob = curr_prob * e->probability;
3469 changed_p = true;
387cd9d3
JZZ
3470 }
3471 }
3472 return changed_p;
3473}
3474
3475void
3476pass_vsetvl::demand_fusion (void)
3477{
3478 bool changed_p = true;
3479 while (changed_p)
3480 {
3481 changed_p = false;
4f673c5e
JZZ
3482 /* To optimize the case like this:
3483 void f2 (int8_t * restrict in, int8_t * restrict out, int n, int cond)
3484 {
3485 size_t vl = 101;
3486
3487 for (size_t i = 0; i < n; i++)
3488 {
3489 vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
3490 __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
3491 }
3492
3493 for (size_t i = 0; i < n; i++)
3494 {
3495 vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
3496 __riscv_vse8_v_i8mf8 (out + i, v, vl);
3497
3498 vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
3499 __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
3500 }
3501 }
3502
3503 bb 0: li a5, 101 (killed avl)
3504 ...
3505 bb 1: vsetvli zero, a5, ta
3506 ...
3507 bb 2: li a5, 101 (killed avl)
3508 ...
3509 bb 3: vsetvli zero, a3, tu
3510
3511 We want to fuse VSEVLI instructions on bb 1 and bb 3. However, there is
3512 an AVL kill instruction in bb 2 that we can't backward fuse bb 3 or
3513 forward bb 1 arbitrarily. We need available information of each block to
3514 help for such cases. */
6b6b9c68
JZZ
3515 changed_p |= backward_demand_fusion ();
3516 changed_p |= forward_demand_fusion ();
3517 }
3518
3519 changed_p = true;
3520 while (changed_p)
3521 {
3522 changed_p = false;
3523 prune_expressions ();
3524 m_vector_manager->create_bitmap_vectors ();
3525 compute_local_properties ();
4f673c5e
JZZ
3526 compute_available (m_vector_manager->vector_comp,
3527 m_vector_manager->vector_kill,
3528 m_vector_manager->vector_avout,
3529 m_vector_manager->vector_avin);
6b6b9c68 3530 changed_p |= cleanup_illegal_dirty_blocks ();
4f673c5e
JZZ
3531 m_vector_manager->free_bitmap_vectors ();
3532 if (!m_vector_manager->vector_exprs.is_empty ())
3533 m_vector_manager->vector_exprs.release ();
387cd9d3 3534 }
9243c3d1
JZZ
3535
3536 if (dump_file)
3537 {
3538 fprintf (dump_file, "\n\nDirty blocks list: ");
681a5632
JZZ
3539 for (const bb_info *bb : crtl->ssa->bbs ())
3540 if (m_vector_manager->vector_block_infos[bb->index ()]
3541 .reaching_out.dirty_p ())
3542 fprintf (dump_file, "%d ", bb->index ());
9243c3d1
JZZ
3543 fprintf (dump_file, "\n\n");
3544 }
3545}
3546
6b6b9c68
JZZ
3547/* Cleanup illegal dirty blocks. */
3548bool
3549pass_vsetvl::cleanup_illegal_dirty_blocks (void)
3550{
3551 bool changed_p = false;
3552 for (const bb_info *bb : crtl->ssa->bbs ())
3553 {
3554 basic_block cfg_bb = bb->cfg_bb ();
3555 const auto &prop
3556 = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
3557
3558 /* If there is nothing to cleanup, just skip it. */
3559 if (!prop.valid_or_dirty_p ())
3560 continue;
3561
3562 if (hard_empty_block_p (bb, prop))
3563 {
3564 m_vector_manager->vector_block_infos[cfg_bb->index].local_dem
3565 = vector_insn_info::get_hard_empty ();
3566 m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out
3567 = vector_insn_info::get_hard_empty ();
3568 changed_p = true;
3569 continue;
3570 }
3571 }
3572 return changed_p;
3573}
3574
9243c3d1
JZZ
3575/* Assemble the candidates expressions for LCM. */
3576void
3577pass_vsetvl::prune_expressions (void)
3578{
681a5632 3579 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 3580 {
681a5632
JZZ
3581 if (m_vector_manager->vector_block_infos[bb->index ()]
3582 .local_dem.valid_or_dirty_p ())
9243c3d1 3583 m_vector_manager->create_expr (
681a5632
JZZ
3584 m_vector_manager->vector_block_infos[bb->index ()].local_dem);
3585 if (m_vector_manager->vector_block_infos[bb->index ()]
9243c3d1
JZZ
3586 .reaching_out.valid_or_dirty_p ())
3587 m_vector_manager->create_expr (
681a5632 3588 m_vector_manager->vector_block_infos[bb->index ()].reaching_out);
9243c3d1
JZZ
3589 }
3590
3591 if (dump_file)
3592 {
3593 fprintf (dump_file, "\nThe total VSETVL expression num = %d\n",
3594 m_vector_manager->vector_exprs.length ());
3595 fprintf (dump_file, "Expression List:\n");
3596 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3597 {
3598 fprintf (dump_file, "Expr[%ld]:\n", i);
3599 m_vector_manager->vector_exprs[i]->dump (dump_file);
3600 fprintf (dump_file, "\n");
3601 }
3602 }
3603}
3604
4f673c5e
JZZ
3605/* Compute the local properties of each recorded expression.
3606
3607 Local properties are those that are defined by the block, irrespective of
3608 other blocks.
3609
3610 An expression is transparent in a block if its operands are not modified
3611 in the block.
3612
3613 An expression is computed (locally available) in a block if it is computed
3614 at least once and expression would contain the same value if the
3615 computation was moved to the end of the block.
3616
3617 An expression is locally anticipatable in a block if it is computed at
3618 least once and expression would contain the same value if the computation
3619 was moved to the beginning of the block. */
9243c3d1
JZZ
3620void
3621pass_vsetvl::compute_local_properties (void)
3622{
3623 /* - If T is locally available at the end of a block, then T' must be
3624 available at the end of the same block. Since some optimization has
3625 occurred earlier, T' might not be locally available, however, it must
3626 have been previously computed on all paths. As a formula, T at AVLOC(B)
3627 implies that T' at AVOUT(B).
3628 An "available occurrence" is one that is the last occurrence in the
3629 basic block and the operands are not modified by following statements in
3630 the basic block [including this insn].
3631
3632 - If T is locally anticipated at the beginning of a block, then either
3633 T', is locally anticipated or it is already available from previous
3634 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
3635 ANTLOC(B) at AVIN(B).
3636 An "anticipatable occurrence" is one that is the first occurrence in the
3637 basic block, the operands are not modified in the basic block prior
3638 to the occurrence and the output is not used between the start of
3639 the block and the occurrence. */
3640
3641 basic_block cfg_bb;
4f673c5e 3642 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 3643 {
4f673c5e 3644 unsigned int curr_bb_idx = bb->index ();
9243c3d1
JZZ
3645 const auto local_dem
3646 = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem;
3647 const auto reaching_out
3648 = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out;
3649
4f673c5e
JZZ
3650 /* Compute transparent. */
3651 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
9243c3d1 3652 {
4f673c5e
JZZ
3653 const vector_insn_info *expr = m_vector_manager->vector_exprs[i];
3654 if (local_dem.real_dirty_p () || local_dem.valid_p ()
3655 || local_dem.unknown_p ()
3656 || has_vsetvl_killed_avl_p (bb, local_dem))
9243c3d1 3657 bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i);
4f673c5e
JZZ
3658 /* FIXME: Here we set the block as non-transparent (killed) if there
3659 is an instruction killed the value of AVL according to the
3660 definition of Local transparent. This is true for such following
3661 case:
3662
3663 bb 0 (Loop label):
3664 vsetvl zero, a5, e8, mf8
3665 bb 1:
3666 def a5
3667 bb 2:
3668 branch bb 0 (Loop label).
3669
3670 In this case, we known there is a loop bb 0->bb 1->bb 2. According
3671 to LCM definition, it is correct when we set vsetvl zero, a5, e8,
3672 mf8 as non-transparent (killed) so that LCM will not hoist outside
3673 the bb 0.
3674
3675 However, such conservative configuration will forbid optimization
3676 on some unlucky case. For example:
3677
3678 bb 0:
3679 li a5, 101
3680 bb 1:
3681 vsetvl zero, a5, e8, mf8
3682 bb 2:
3683 li a5, 101
3684 bb 3:
3685 vsetvl zero, a5, e8, mf8.
3686 So we also relax def a5 as transparent to gain more optimizations
3687 as long as the all real def insn of avl do not come from this
3688 block. This configuration may be still missing some optimization
3689 opportunities. */
6b6b9c68 3690 if (find_reg_killed_by (bb, expr->get_avl ()))
4f673c5e 3691 {
6b6b9c68
JZZ
3692 hash_set<set_info *> sets
3693 = get_all_sets (expr->get_avl_source (), true, false, false);
3694 if (any_set_in_bb_p (sets, bb))
4f673c5e
JZZ
3695 bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx],
3696 i);
3697 }
9243c3d1
JZZ
3698 }
3699
4f673c5e 3700 /* Compute anticipatable occurrences. */
6b6b9c68
JZZ
3701 if (local_dem.valid_p () || local_dem.real_dirty_p ()
3702 || (has_vsetvl_killed_avl_p (bb, local_dem)
3703 && vlmax_avl_p (local_dem.get_avl ())))
4f673c5e
JZZ
3704 if (anticipatable_occurrence_p (bb, local_dem))
3705 bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx],
3706 m_vector_manager->get_expr_id (local_dem));
9243c3d1 3707
4f673c5e 3708 /* Compute available occurrences. */
9243c3d1
JZZ
3709 if (reaching_out.valid_or_dirty_p ())
3710 {
9243c3d1
JZZ
3711 auto_vec<size_t> available_list
3712 = m_vector_manager->get_all_available_exprs (reaching_out);
3713 for (size_t i = 0; i < available_list.length (); i++)
4f673c5e
JZZ
3714 {
3715 const vector_insn_info *expr
3716 = m_vector_manager->vector_exprs[available_list[i]];
3717 if (reaching_out.real_dirty_p ()
3718 || has_vsetvl_killed_avl_p (bb, reaching_out)
3719 || available_occurrence_p (bb, *expr))
3720 bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx],
3721 available_list[i]);
3722 }
9243c3d1
JZZ
3723 }
3724 }
3725
3726 /* Compute kill for each basic block using:
3727
3728 ~(TRANSP | COMP)
3729 */
3730
3731 FOR_EACH_BB_FN (cfg_bb, cfun)
3732 {
3733 bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index],
3734 m_vector_manager->vector_transp[cfg_bb->index],
3735 m_vector_manager->vector_comp[cfg_bb->index]);
3736 bitmap_not (m_vector_manager->vector_kill[cfg_bb->index],
3737 m_vector_manager->vector_kill[cfg_bb->index]);
3738 }
3739
3740 FOR_EACH_BB_FN (cfg_bb, cfun)
3741 {
3742 edge e;
3743 edge_iterator ei;
3744
3745 /* If the current block is the destination of an abnormal edge, we
3746 kill all trapping (for PRE) and memory (for hoist) expressions
3747 because we won't be able to properly place the instruction on
3748 the edge. So make them neither anticipatable nor transparent.
3749 This is fairly conservative.
3750
3751 ??? For hoisting it may be necessary to check for set-and-jump
3752 instructions here, not just for abnormal edges. The general problem
3753 is that when an expression cannot not be placed right at the end of
3754 a basic block we should account for any side-effects of a subsequent
3755 jump instructions that could clobber the expression. It would
3756 be best to implement this check along the lines of
3757 should_hoist_expr_to_dom where the target block is already known
3758 and, hence, there's no need to conservatively prune expressions on
3759 "intermediate" set-and-jump instructions. */
3760 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3761 if (e->flags & EDGE_COMPLEX)
3762 {
3763 bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]);
3764 bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]);
3765 }
3766 }
3767}
3768
3769/* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */
3770bool
6b6b9c68
JZZ
3771pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb,
3772 const vector_insn_info &info) const
9243c3d1
JZZ
3773{
3774 if (!m_vector_manager->all_same_ratio_p (
3775 m_vector_manager->vector_avin[cfg_bb->index]))
3776 return false;
3777
005fad9d
JZZ
3778 if (!m_vector_manager->all_same_avl_p (
3779 cfg_bb, m_vector_manager->vector_avin[cfg_bb->index]))
3780 return false;
3781
9243c3d1
JZZ
3782 size_t expr_id
3783 = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]);
6b6b9c68
JZZ
3784 if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info))
3785 return false;
3786 if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info))
9243c3d1
JZZ
3787 return false;
3788
3789 edge e;
3790 edge_iterator ei;
3791 bool all_valid_p = true;
3792 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3793 {
3794 if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index]))
3795 {
3796 all_valid_p = false;
3797 break;
3798 }
3799 }
3800
3801 if (!all_valid_p)
3802 return false;
3803 return true;
3804}
3805
3806/* Optimize athe case like this:
3807
3808 bb 0:
3809 vsetvl 0 a5,zero,e8,mf8
3810 insn 0 (demand SEW + LMUL)
3811 bb 1:
3812 vsetvl 1 a5,zero,e16,mf4
3813 insn 1 (demand SEW + LMUL)
3814
3815 In this case, we should be able to refine
3816 vsetvl 1 into vsetvl zero, zero according AVIN. */
3817void
3818pass_vsetvl::refine_vsetvls (void) const
3819{
3820 basic_block cfg_bb;
3821 FOR_EACH_BB_FN (cfg_bb, cfun)
3822 {
3823 auto info = m_vector_manager->vector_block_infos[cfg_bb->index].local_dem;
3824 insn_info *insn = info.get_insn ();
3825 if (!info.valid_p ())
3826 continue;
3827
3828 rtx_insn *rinsn = insn->rtl ();
6b6b9c68 3829 if (!can_refine_vsetvl_p (cfg_bb, info))
9243c3d1
JZZ
3830 continue;
3831
ec99ffab
JZZ
3832 /* We can't refine user vsetvl into vsetvl zero,zero since the dest
3833 will be used by the following instructions. */
3834 if (vector_config_insn_p (rinsn))
3835 {
3836 m_vector_manager->to_refine_vsetvls.add (rinsn);
3837 continue;
3838 }
ff8f9544
JZ
3839
3840 /* If all incoming edges to a block have a vector state that is compatbile
3841 with the block. In such a case we need not emit a vsetvl in the current
3842 block. */
3843
3844 gcc_assert (has_vtype_op (insn->rtl ()));
3845 rinsn = PREV_INSN (insn->rtl ());
3846 gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
3847 if (m_vector_manager->all_avail_in_compatible_p (cfg_bb))
3848 {
3849 size_t id = m_vector_manager->get_expr_id (info);
3850 if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id))
3851 continue;
3852 eliminate_insn (rinsn);
3853 }
3854 else
3855 {
3856 rtx new_pat
3857 = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
3858 change_insn (rinsn, new_pat);
3859 }
9243c3d1
JZZ
3860 }
3861}
3862
3863void
3864pass_vsetvl::cleanup_vsetvls ()
3865{
3866 basic_block cfg_bb;
3867 FOR_EACH_BB_FN (cfg_bb, cfun)
3868 {
3869 auto &info
3870 = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
3871 gcc_assert (m_vector_manager->expr_set_num (
3872 m_vector_manager->vector_del[cfg_bb->index])
3873 <= 1);
3874 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3875 {
3876 if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i))
3877 {
3878 if (info.dirty_p ())
3879 info.set_unknown ();
3880 else
3881 {
4f673c5e
JZZ
3882 const auto dem
3883 = m_vector_manager->vector_block_infos[cfg_bb->index]
3884 .local_dem;
3885 gcc_assert (dem == *m_vector_manager->vector_exprs[i]);
3886 insn_info *insn = dem.get_insn ();
9243c3d1
JZZ
3887 gcc_assert (insn && insn->rtl ());
3888 rtx_insn *rinsn;
ec99ffab
JZZ
3889 /* We can't eliminate user vsetvl since the dest will be used
3890 * by the following instructions. */
9243c3d1 3891 if (vector_config_insn_p (insn->rtl ()))
9243c3d1 3892 {
ec99ffab
JZZ
3893 m_vector_manager->to_delete_vsetvls.add (insn->rtl ());
3894 continue;
9243c3d1 3895 }
ec99ffab
JZZ
3896
3897 gcc_assert (has_vtype_op (insn->rtl ()));
3898 rinsn = PREV_INSN (insn->rtl ());
3899 gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
9243c3d1
JZZ
3900 eliminate_insn (rinsn);
3901 }
3902 }
3903 }
3904 }
3905}
3906
3907bool
3908pass_vsetvl::commit_vsetvls (void)
3909{
3910 bool need_commit = false;
3911
3912 for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++)
3913 {
3914 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3915 {
3916 edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
3917 if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i))
3918 {
3919 const vector_insn_info *require
3920 = m_vector_manager->vector_exprs[i];
3921 gcc_assert (require->valid_or_dirty_p ());
3922 rtl_profile_for_edge (eg);
3923 start_sequence ();
3924
3925 insn_info *insn = require->get_insn ();
3926 vector_insn_info prev_info = vector_insn_info ();
005fad9d
JZZ
3927 sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index];
3928 if (m_vector_manager->all_same_ratio_p (bitdata)
3929 && m_vector_manager->all_same_avl_p (eg->dest, bitdata))
9243c3d1 3930 {
005fad9d 3931 size_t first = bitmap_first_set_bit (bitdata);
9243c3d1
JZZ
3932 prev_info = *m_vector_manager->vector_exprs[first];
3933 }
3934
3935 insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info);
3936 rtx_insn *rinsn = get_insns ();
3937 end_sequence ();
3938 default_rtl_profile ();
3939
3940 /* We should not get an abnormal edge here. */
3941 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
3942 need_commit = true;
3943 insert_insn_on_edge (rinsn, eg);
3944 }
3945 }
3946 }
3947
4f673c5e 3948 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 3949 {
4f673c5e 3950 basic_block cfg_bb = bb->cfg_bb ();
9243c3d1
JZZ
3951 const auto reaching_out
3952 = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
3953 if (!reaching_out.dirty_p ())
3954 continue;
3955
4f673c5e
JZZ
3956 if (reaching_out.dirty_with_killed_avl_p ())
3957 {
3958 if (!has_vsetvl_killed_avl_p (bb, reaching_out))
3959 continue;
3960
3961 unsigned int bb_index;
3962 sbitmap_iterator sbi;
3963 sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
3964 bool available_p = false;
3965 EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
3966 {
ec99ffab
JZZ
3967 if (m_vector_manager->vector_exprs[bb_index]->available_p (
3968 reaching_out))
4f673c5e
JZZ
3969 {
3970 available_p = true;
3971 break;
3972 }
3973 }
3974 if (available_p)
3975 continue;
3976 }
7ae4d1df
JZZ
3977
3978 rtx new_pat;
ec99ffab
JZZ
3979 if (!reaching_out.demand_p (DEMAND_AVL))
3980 {
3981 vl_vtype_info new_info = reaching_out;
3982 new_info.set_avl_info (avl_info (const0_rtx, nullptr));
3983 new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
3984 }
3985 else if (can_refine_vsetvl_p (cfg_bb, reaching_out))
9243c3d1
JZZ
3986 new_pat
3987 = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX);
7ae4d1df
JZZ
3988 else if (vlmax_avl_p (reaching_out.get_avl ()))
3989 new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out,
ec99ffab 3990 reaching_out.get_avl_reg_rtx ());
7ae4d1df
JZZ
3991 else
3992 new_pat
3993 = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX);
9243c3d1
JZZ
3994
3995 start_sequence ();
3996 emit_insn (new_pat);
3997 rtx_insn *rinsn = get_insns ();
3998 end_sequence ();
3999 insert_insn_end_basic_block (rinsn, cfg_bb);
4000 if (dump_file)
4001 {
4002 fprintf (dump_file,
4003 "\nInsert vsetvl insn %d at the end of <bb %d>:\n",
4004 INSN_UID (rinsn), cfg_bb->index);
4005 print_rtl_single (dump_file, rinsn);
4006 }
4007 }
4008
4009 return need_commit;
4010}
4011
4012void
4013pass_vsetvl::pre_vsetvl (void)
4014{
4015 /* Compute entity list. */
4016 prune_expressions ();
4017
cfe3fbc6 4018 m_vector_manager->create_bitmap_vectors ();
9243c3d1
JZZ
4019 compute_local_properties ();
4020 m_vector_manager->vector_edge_list = pre_edge_lcm_avs (
4021 m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp,
4022 m_vector_manager->vector_comp, m_vector_manager->vector_antic,
4023 m_vector_manager->vector_kill, m_vector_manager->vector_avin,
4024 m_vector_manager->vector_avout, &m_vector_manager->vector_insert,
4025 &m_vector_manager->vector_del);
4026
4027 /* We should dump the information before CFG is changed. Otherwise it will
4028 produce ICE (internal compiler error). */
4029 if (dump_file)
4030 m_vector_manager->dump (dump_file);
4031
4032 refine_vsetvls ();
4033 cleanup_vsetvls ();
4034 bool need_commit = commit_vsetvls ();
4035 if (need_commit)
4036 commit_edge_insertions ();
4037}
4038
c5a1fa59
JZ
4039/* Before VSETVL PASS, RVV instructions pattern is depending on AVL operand
4040 implicitly. Since we will emit VSETVL instruction and make RVV instructions
4041 depending on VL/VTYPE global status registers, we remove the such AVL operand
4042 in the RVV instructions pattern here in order to remove AVL dependencies when
4043 AVL operand is a register operand.
4044
4045 Before the VSETVL PASS:
4046 li a5,32
4047 ...
4048 vadd.vv (..., a5)
4049 After the VSETVL PASS:
4050 li a5,32
4051 vsetvli zero, a5, ...
4052 ...
4053 vadd.vv (..., const_int 0). */
9243c3d1
JZZ
4054void
4055pass_vsetvl::cleanup_insns (void) const
4056{
4057 for (const bb_info *bb : crtl->ssa->bbs ())
4058 {
4059 for (insn_info *insn : bb->real_nondebug_insns ())
4060 {
4061 rtx_insn *rinsn = insn->rtl ();
d51f2456
JZ
4062 const auto &dem = m_vector_manager->vector_insn_infos[insn->uid ()];
4063 /* Eliminate local vsetvl:
4064 bb 0:
4065 vsetvl a5,a6,...
4066 vsetvl zero,a5.
4067
4068 Eliminate vsetvl in bb2 when a5 is only coming from
4069 bb 0. */
4070 local_eliminate_vsetvl_insn (dem);
9243c3d1
JZZ
4071
4072 if (vlmax_avl_insn_p (rinsn))
4073 {
4074 eliminate_insn (rinsn);
4075 continue;
4076 }
4077
4078 /* Erase the AVL operand from the instruction. */
4079 if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn)))
4080 continue;
4081 rtx avl = get_vl (rinsn);
a2d12abe 4082 if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
9243c3d1
JZZ
4083 {
4084 /* Get the list of uses for the new instruction. */
4085 auto attempt = crtl->ssa->new_change_attempt ();
4086 insn_change change (insn);
4087 /* Remove the use of the substituted value. */
4088 access_array_builder uses_builder (attempt);
4089 uses_builder.reserve (insn->num_uses () - 1);
4090 for (use_info *use : insn->uses ())
4091 if (use != find_access (insn->uses (), REGNO (avl)))
4092 uses_builder.quick_push (use);
4093 use_array new_uses = use_array (uses_builder.finish ());
4094 change.new_uses = new_uses;
4095 change.move_range = insn->ebb ()->insn_range ();
60bd33bc
JZZ
4096 rtx pat;
4097 if (fault_first_load_p (rinsn))
4098 pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
4099 else
4100 {
4101 rtx set = single_set (rinsn);
4102 rtx src
4103 = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
4104 pat = gen_rtx_SET (SET_DEST (set), src);
4105 }
9243c3d1
JZZ
4106 gcc_assert (change_insn (crtl->ssa, change, insn, pat));
4107 }
4108 }
4109 }
4110}
4111
6b6b9c68
JZZ
4112void
4113pass_vsetvl::propagate_avl (void) const
4114{
4115 /* Rebuild the RTL_SSA according to the new CFG generated by LCM. */
4116 /* Finalization of RTL_SSA. */
4117 free_dominance_info (CDI_DOMINATORS);
4118 if (crtl->ssa->perform_pending_updates ())
4119 cleanup_cfg (0);
4120 delete crtl->ssa;
4121 crtl->ssa = nullptr;
4122 /* Initialization of RTL_SSA. */
4123 calculate_dominance_info (CDI_DOMINATORS);
4124 df_analyze ();
4125 crtl->ssa = new function_info (cfun);
4126
4127 hash_set<rtx_insn *> to_delete;
4128 for (const bb_info *bb : crtl->ssa->bbs ())
4129 {
4130 for (insn_info *insn : bb->real_nondebug_insns ())
4131 {
4132 if (vsetvl_discard_result_insn_p (insn->rtl ()))
4133 {
4134 rtx avl = get_avl (insn->rtl ());
4135 if (!REG_P (avl))
4136 continue;
4137
4138 set_info *set = find_access (insn->uses (), REGNO (avl))->def ();
4139 insn_info *def_insn = extract_single_source (set);
4140 if (!def_insn)
4141 continue;
4142
4143 /* Handle this case:
4144 vsetvli a6,zero,e32,m1,ta,mu
4145 li a5,4096
4146 add a7,a0,a5
4147 addi a7,a7,-96
4148 vsetvli t1,zero,e8,mf8,ta,ma
4149 vle8.v v24,0(a7)
4150 add a5,a3,a5
4151 addi a5,a5,-96
4152 vse8.v v24,0(a5)
4153 vsetvli zero,a6,e32,m1,tu,ma
4154 */
4155 if (vsetvl_insn_p (def_insn->rtl ()))
4156 {
4157 vl_vtype_info def_info = get_vl_vtype_info (def_insn);
4158 vl_vtype_info info = get_vl_vtype_info (insn);
4159 rtx avl = get_avl (def_insn->rtl ());
4160 rtx vl = get_vl (def_insn->rtl ());
4161 if (def_info.get_ratio () == info.get_ratio ())
4162 {
4163 if (vlmax_avl_p (def_info.get_avl ()))
4164 {
4165 info.set_avl_info (
4166 avl_info (def_info.get_avl (), nullptr));
4167 rtx new_pat
4168 = gen_vsetvl_pat (VSETVL_NORMAL, info, vl);
4169 validate_change (insn->rtl (),
4170 &PATTERN (insn->rtl ()), new_pat,
4171 false);
4172 continue;
4173 }
4174 if (def_info.has_avl_imm () || rtx_equal_p (avl, vl))
4175 {
4176 info.set_avl_info (avl_info (avl, nullptr));
4177 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_AFTER,
4178 info, NULL_RTX, insn->rtl ());
4179 if (set->single_nondebug_insn_use ())
4180 {
4181 to_delete.add (insn->rtl ());
4182 to_delete.add (def_insn->rtl ());
4183 }
4184 continue;
4185 }
4186 }
4187 }
4188 }
4189
4190 /* Change vsetvl rd, rs1 --> vsevl zero, rs1,
4191 if rd is not used by any nondebug instructions.
4192 Even though this PASS runs after RA and it doesn't help for
4193 reduce register pressure, it can help instructions scheduling
4194 since we remove the dependencies. */
4195 if (vsetvl_insn_p (insn->rtl ()))
4196 {
4197 rtx vl = get_vl (insn->rtl ());
4198 rtx avl = get_avl (insn->rtl ());
6b6b9c68
JZZ
4199 def_info *def = find_access (insn->defs (), REGNO (vl));
4200 set_info *set = safe_dyn_cast<set_info *> (def);
ec99ffab
JZZ
4201 vector_insn_info info;
4202 info.parse_insn (insn);
6b6b9c68 4203 gcc_assert (set);
ec99ffab
JZZ
4204 if (m_vector_manager->to_delete_vsetvls.contains (insn->rtl ()))
4205 {
4206 m_vector_manager->to_delete_vsetvls.remove (insn->rtl ());
4207 if (m_vector_manager->to_refine_vsetvls.contains (
4208 insn->rtl ()))
4209 m_vector_manager->to_refine_vsetvls.remove (insn->rtl ());
4210 if (!set->has_nondebug_insn_uses ())
4211 {
4212 to_delete.add (insn->rtl ());
4213 continue;
4214 }
4215 }
4216 if (m_vector_manager->to_refine_vsetvls.contains (insn->rtl ()))
4217 {
4218 m_vector_manager->to_refine_vsetvls.remove (insn->rtl ());
4219 if (!set->has_nondebug_insn_uses ())
4220 {
4221 rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY,
4222 info, NULL_RTX);
4223 change_insn (insn->rtl (), new_pat);
4224 continue;
4225 }
4226 }
4227 if (vlmax_avl_p (avl))
4228 continue;
6b6b9c68
JZZ
4229 rtx new_pat
4230 = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX);
4231 if (!set->has_nondebug_insn_uses ())
4232 {
4233 validate_change (insn->rtl (), &PATTERN (insn->rtl ()),
4234 new_pat, false);
4235 continue;
4236 }
4237 }
4238 }
4239 }
4240
4241 for (rtx_insn *rinsn : to_delete)
4242 eliminate_insn (rinsn);
4243}
4244
9243c3d1
JZZ
4245void
4246pass_vsetvl::init (void)
4247{
4248 if (optimize > 0)
4249 {
4250 /* Initialization of RTL_SSA. */
4251 calculate_dominance_info (CDI_DOMINATORS);
4252 df_analyze ();
4253 crtl->ssa = new function_info (cfun);
4254 }
4255
4256 m_vector_manager = new vector_infos_manager ();
4f673c5e 4257 compute_probabilities ();
9243c3d1
JZZ
4258
4259 if (dump_file)
4260 {
4261 fprintf (dump_file, "\nPrologue: Initialize vector infos\n");
4262 m_vector_manager->dump (dump_file);
4263 }
4264}
4265
4266void
4267pass_vsetvl::done (void)
4268{
4269 if (optimize > 0)
4270 {
4271 /* Finalization of RTL_SSA. */
4272 free_dominance_info (CDI_DOMINATORS);
4273 if (crtl->ssa->perform_pending_updates ())
4274 cleanup_cfg (0);
4275 delete crtl->ssa;
4276 crtl->ssa = nullptr;
4277 }
4278 m_vector_manager->release ();
4279 delete m_vector_manager;
4280 m_vector_manager = nullptr;
4281}
4282
acc10c79
JZZ
4283/* Compute probability for each block. */
4284void
4285pass_vsetvl::compute_probabilities (void)
4286{
4287 /* Don't compute it in -O0 since we don't need it. */
4288 if (!optimize)
4289 return;
4290 edge e;
4291 edge_iterator ei;
4292
4293 for (const bb_info *bb : crtl->ssa->bbs ())
4294 {
4295 basic_block cfg_bb = bb->cfg_bb ();
4296 auto &curr_prob
4297 = m_vector_manager->vector_block_infos[cfg_bb->index].probability;
c129d22d
JZZ
4298
4299 /* GCC assume entry block (bb 0) are always so
4300 executed so set its probability as "always". */
acc10c79
JZZ
4301 if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
4302 curr_prob = profile_probability::always ();
c129d22d
JZZ
4303 /* Exit block (bb 1) is the block we don't need to process. */
4304 if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
4305 continue;
4306
acc10c79
JZZ
4307 gcc_assert (curr_prob.initialized_p ());
4308 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
4309 {
4310 auto &new_prob
4311 = m_vector_manager->vector_block_infos[e->dest->index].probability;
4312 if (!new_prob.initialized_p ())
4313 new_prob = curr_prob * e->probability;
4314 else if (new_prob == profile_probability::always ())
4315 continue;
4316 else
4317 new_prob += curr_prob * e->probability;
4318 }
4319 }
acc10c79
JZZ
4320}
4321
9243c3d1
JZZ
4322/* Lazy vsetvl insertion for optimize > 0. */
4323void
4324pass_vsetvl::lazy_vsetvl (void)
4325{
4326 if (dump_file)
4327 fprintf (dump_file,
4328 "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for "
4329 "function:%s\n",
4330 n_basic_blocks_for_fn (cfun), function_name (cfun));
4331
4332 /* Phase 1 - Compute the local dems within each block.
4333 The data-flow analysis within each block is backward analysis. */
4334 if (dump_file)
4335 fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n");
4336 for (const bb_info *bb : crtl->ssa->bbs ())
4337 compute_local_backward_infos (bb);
4338 if (dump_file)
4339 m_vector_manager->dump (dump_file);
4340
4341 /* Phase 2 - Emit vsetvl instructions within each basic block according to
4342 demand, compute and save ANTLOC && AVLOC of each block. */
4343 if (dump_file)
4344 fprintf (dump_file,
4345 "\nPhase 2: Emit vsetvl instruction within each block\n");
4346 for (const bb_info *bb : crtl->ssa->bbs ())
4347 emit_local_forward_vsetvls (bb);
4348 if (dump_file)
4349 m_vector_manager->dump (dump_file);
4350
4351 /* Phase 3 - Propagate demanded info across blocks. */
4352 if (dump_file)
4353 fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n");
387cd9d3 4354 demand_fusion ();
9243c3d1
JZZ
4355 if (dump_file)
4356 m_vector_manager->dump (dump_file);
4357
4358 /* Phase 4 - Lazy code motion. */
4359 if (dump_file)
4360 fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n");
4361 pre_vsetvl ();
4362
4363 /* Phase 5 - Cleanup AVL && VL operand of RVV instruction. */
4364 if (dump_file)
4365 fprintf (dump_file, "\nPhase 5: Cleanup AVL and VL operands\n");
4366 cleanup_insns ();
6b6b9c68
JZZ
4367
4368 /* Phase 6 - Rebuild RTL_SSA to propagate AVL between vsetvls. */
4369 if (dump_file)
4370 fprintf (dump_file,
4371 "\nPhase 6: Rebuild RTL_SSA to propagate AVL between vsetvls\n");
4372 propagate_avl ();
9243c3d1
JZZ
4373}
4374
4375/* Main entry point for this pass. */
4376unsigned int
4377pass_vsetvl::execute (function *)
4378{
4379 if (n_basic_blocks_for_fn (cfun) <= 0)
4380 return 0;
4381
ca8fb009
JZZ
4382 /* The RVV instruction may change after split which is not a stable
4383 instruction. We need to split it here to avoid potential issue
4384 since the VSETVL PASS is insert before split PASS. */
4385 split_all_insns ();
9243c3d1
JZZ
4386
4387 /* Early return for there is no vector instructions. */
4388 if (!has_vector_insn (cfun))
4389 return 0;
4390
4391 init ();
4392
4393 if (!optimize)
4394 simple_vsetvl ();
4395 else
4396 lazy_vsetvl ();
4397
4398 done ();
4399 return 0;
4400}
4401
4402rtl_opt_pass *
4403make_pass_vsetvl (gcc::context *ctxt)
4404{
4405 return new pass_vsetvl (ctxt);
4406}