]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/riscv/riscv-vsetvl.cc
arm: Fix MVE intrinsics support with LTO (PR target/110268)
[thirdparty/gcc.git] / gcc / config / riscv / riscv-vsetvl.cc
CommitLineData
9243c3d1 1/* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
c841bde5 2 Copyright (C) 2022-2023 Free Software Foundation, Inc.
9243c3d1
JZZ
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or(at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* This pass is to Set VL/VTYPE global status for RVV instructions
22 that depend on VL and VTYPE registers by Lazy code motion (LCM).
23
24 Strategy:
25
26 - Backward demanded info fusion within block.
27
28 - Lazy code motion (LCM) based demanded info backward propagation.
29
30 - RTL_SSA framework for def-use, PHI analysis.
31
32 - Lazy code motion (LCM) for global VL/VTYPE optimization.
33
34 Assumption:
35
36 - Each avl operand is either an immediate (must be in range 0 ~ 31) or reg.
37
38 This pass consists of 5 phases:
39
40 - Phase 1 - compute VL/VTYPE demanded information within each block
41 by backward data-flow analysis.
42
43 - Phase 2 - Emit vsetvl instructions within each basic block according to
44 demand, compute and save ANTLOC && AVLOC of each block.
45
387cd9d3
JZZ
46 - Phase 3 - Backward && forward demanded info propagation and fusion across
47 blocks.
9243c3d1
JZZ
48
49 - Phase 4 - Lazy code motion including: compute local properties,
50 pre_edge_lcm and vsetvl insertion && delete edges for LCM results.
51
52 - Phase 5 - Cleanup AVL operand of RVV instruction since it will not be
53 used any more and VL operand of VSETVL instruction if it is not used by
54 any non-debug instructions.
55
6b6b9c68
JZZ
56 - Phase 6 - Propagate AVL between vsetvl instructions.
57
9243c3d1
JZZ
58 Implementation:
59
60 - The subroutine of optimize == 0 is simple_vsetvl.
61 This function simplily vsetvl insertion for each RVV
62 instruction. No optimization.
63
64 - The subroutine of optimize > 0 is lazy_vsetvl.
65 This function optimize vsetvl insertion process by
ec99ffab
JZZ
66 lazy code motion (LCM) layering on RTL_SSA.
67
68 - get_avl (), get_insn (), get_avl_source ():
69
70 1. get_insn () is the current instruction, find_access (get_insn
71 ())->def is the same as get_avl_source () if get_insn () demand VL.
72 2. If get_avl () is non-VLMAX REG, get_avl () == get_avl_source
73 ()->regno ().
74 3. get_avl_source ()->regno () is the REGNO that we backward propagate.
75 */
9243c3d1
JZZ
76
77#define IN_TARGET_CODE 1
78#define INCLUDE_ALGORITHM
79#define INCLUDE_FUNCTIONAL
80
81#include "config.h"
82#include "system.h"
83#include "coretypes.h"
84#include "tm.h"
85#include "backend.h"
86#include "rtl.h"
87#include "target.h"
88#include "tree-pass.h"
89#include "df.h"
90#include "rtl-ssa.h"
91#include "cfgcleanup.h"
92#include "insn-config.h"
93#include "insn-attr.h"
94#include "insn-opinit.h"
95#include "tm-constrs.h"
96#include "cfgrtl.h"
97#include "cfganal.h"
98#include "lcm.h"
99#include "predict.h"
100#include "profile-count.h"
101#include "riscv-vsetvl.h"
102
103using namespace rtl_ssa;
104using namespace riscv_vector;
105
ec99ffab
JZZ
106static CONSTEXPR const unsigned ALL_SEW[] = {8, 16, 32, 64};
107static CONSTEXPR const vlmul_type ALL_LMUL[]
108 = {LMUL_1, LMUL_2, LMUL_4, LMUL_8, LMUL_F8, LMUL_F4, LMUL_F2};
ec99ffab 109
9243c3d1
JZZ
110DEBUG_FUNCTION void
111debug (const vector_insn_info *info)
112{
113 info->dump (stderr);
114}
115
116DEBUG_FUNCTION void
117debug (const vector_infos_manager *info)
118{
119 info->dump (stderr);
120}
121
122static bool
123vlmax_avl_p (rtx x)
124{
125 return x && rtx_equal_p (x, RVV_VLMAX);
126}
127
128static bool
129vlmax_avl_insn_p (rtx_insn *rinsn)
130{
85112fbb
JZZ
131 return (INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi
132 || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi);
9243c3d1
JZZ
133}
134
8d8cc482
JZZ
135/* Return true if the block is a loop itself:
136 local_dem
137 __________
138 ____|____ |
139 | | |
140 |________| |
141 |_________|
142 reaching_out
143*/
9243c3d1
JZZ
144static bool
145loop_basic_block_p (const basic_block cfg_bb)
146{
8d8cc482
JZZ
147 if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb)))
148 {
149 edge e;
150 edge_iterator ei;
151 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
152 if (e->dest->index == cfg_bb->index)
153 return true;
154 }
155 return false;
9243c3d1
JZZ
156}
157
158/* Return true if it is an RVV instruction depends on VTYPE global
159 status register. */
160static bool
161has_vtype_op (rtx_insn *rinsn)
162{
163 return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
164}
165
166/* Return true if it is an RVV instruction depends on VL global
167 status register. */
168static bool
169has_vl_op (rtx_insn *rinsn)
170{
171 return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn);
172}
173
174/* Is this a SEW value that can be encoded into the VTYPE format. */
175static bool
176valid_sew_p (size_t sew)
177{
178 return exact_log2 (sew) && sew >= 8 && sew <= 64;
179}
180
ec99ffab
JZZ
181/* Return true if the instruction ignores VLMUL field of VTYPE. */
182static bool
183ignore_vlmul_insn_p (rtx_insn *rinsn)
184{
185 return get_attr_type (rinsn) == TYPE_VIMOVVX
186 || get_attr_type (rinsn) == TYPE_VFMOVVF
187 || get_attr_type (rinsn) == TYPE_VIMOVXV
188 || get_attr_type (rinsn) == TYPE_VFMOVFV;
189}
190
191/* Return true if the instruction is scalar move instruction. */
192static bool
193scalar_move_insn_p (rtx_insn *rinsn)
194{
195 return get_attr_type (rinsn) == TYPE_VIMOVXV
196 || get_attr_type (rinsn) == TYPE_VFMOVFV;
197}
198
60bd33bc
JZZ
199/* Return true if the instruction is fault first load instruction. */
200static bool
201fault_first_load_p (rtx_insn *rinsn)
202{
6313b045
JZZ
203 return recog_memoized (rinsn) >= 0
204 && (get_attr_type (rinsn) == TYPE_VLDFF
205 || get_attr_type (rinsn) == TYPE_VLSEGDFF);
60bd33bc
JZZ
206}
207
208/* Return true if the instruction is read vl instruction. */
209static bool
210read_vl_insn_p (rtx_insn *rinsn)
211{
212 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_RDVL;
213}
214
9243c3d1
JZZ
215/* Return true if it is a vsetvl instruction. */
216static bool
217vector_config_insn_p (rtx_insn *rinsn)
218{
219 return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
220}
221
222/* Return true if it is vsetvldi or vsetvlsi. */
223static bool
224vsetvl_insn_p (rtx_insn *rinsn)
225{
6b6b9c68
JZZ
226 if (!vector_config_insn_p (rinsn))
227 return false;
85112fbb 228 return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
6b6b9c68
JZZ
229 || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
230}
231
232/* Return true if it is vsetvl zero, rs1. */
233static bool
234vsetvl_discard_result_insn_p (rtx_insn *rinsn)
235{
236 if (!vector_config_insn_p (rinsn))
237 return false;
238 return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi
239 || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
9243c3d1
JZZ
240}
241
9243c3d1 242static bool
4f673c5e 243real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
9243c3d1 244{
4f673c5e 245 return insn != nullptr && insn->is_real () && insn->bb () == bb;
9243c3d1
JZZ
246}
247
9243c3d1 248static bool
4f673c5e 249before_p (const insn_info *insn1, const insn_info *insn2)
9243c3d1 250{
9b9a1ac1 251 return insn1->compare_with (insn2) < 0;
4f673c5e
JZZ
252}
253
6b6b9c68
JZZ
254static insn_info *
255find_reg_killed_by (const bb_info *bb, rtx x)
4f673c5e 256{
6b6b9c68
JZZ
257 if (!x || vlmax_avl_p (x) || !REG_P (x))
258 return nullptr;
259 for (insn_info *insn : bb->reverse_real_nondebug_insns ())
4f673c5e 260 if (find_access (insn->defs (), REGNO (x)))
6b6b9c68
JZZ
261 return insn;
262 return nullptr;
263}
264
265/* Helper function to get VL operand. */
266static rtx
267get_vl (rtx_insn *rinsn)
268{
269 if (has_vl_op (rinsn))
270 {
271 extract_insn_cached (rinsn);
272 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
273 }
274 return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
4f673c5e
JZZ
275}
276
277static bool
278has_vsetvl_killed_avl_p (const bb_info *bb, const vector_insn_info &info)
279{
280 if (info.dirty_with_killed_avl_p ())
281 {
282 rtx avl = info.get_avl ();
6b6b9c68 283 if (vlmax_avl_p (avl))
ec99ffab 284 return find_reg_killed_by (bb, info.get_avl_reg_rtx ()) != nullptr;
4f673c5e
JZZ
285 for (const insn_info *insn : bb->reverse_real_nondebug_insns ())
286 {
287 def_info *def = find_access (insn->defs (), REGNO (avl));
288 if (def)
289 {
290 set_info *set = safe_dyn_cast<set_info *> (def);
291 if (!set)
292 return false;
293
294 rtx new_avl = gen_rtx_REG (GET_MODE (avl), REGNO (avl));
295 gcc_assert (new_avl != avl);
296 if (!info.compatible_avl_p (avl_info (new_avl, set)))
297 return false;
298
299 return true;
300 }
301 }
302 }
303 return false;
304}
305
9243c3d1
JZZ
306/* An "anticipatable occurrence" is one that is the first occurrence in the
307 basic block, the operands are not modified in the basic block prior
308 to the occurrence and the output is not used between the start of
4f673c5e
JZZ
309 the block and the occurrence.
310
311 For VSETVL instruction, we have these following formats:
312 1. vsetvl zero, rs1.
313 2. vsetvl zero, imm.
314 3. vsetvl rd, rs1.
315
316 So base on these circumstances, a DEM is considered as a local anticipatable
317 occurrence should satisfy these following conditions:
318
319 1). rs1 (avl) are not modified in the basic block prior to the VSETVL.
320 2). rd (vl) are not modified in the basic block prior to the VSETVL.
321 3). rd (vl) is not used between the start of the block and the occurrence.
322
323 Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
324 is modified prior to the occurrence. This case is already considered as
325 a non-local anticipatable occurrence.
326*/
9243c3d1 327static bool
4f673c5e 328anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem)
9243c3d1 329{
4f673c5e 330 insn_info *insn = dem.get_insn ();
9243c3d1
JZZ
331 /* The only possible operand we care of VSETVL is AVL. */
332 if (dem.has_avl_reg ())
333 {
4f673c5e 334 /* rs1 (avl) are not modified in the basic block prior to the VSETVL. */
9243c3d1
JZZ
335 if (!vlmax_avl_p (dem.get_avl ()))
336 {
ec99ffab 337 set_info *set = dem.get_avl_source ();
9243c3d1
JZZ
338 /* If it's undefined, it's not anticipatable conservatively. */
339 if (!set)
340 return false;
4f673c5e
JZZ
341 if (real_insn_and_same_bb_p (set->insn (), bb)
342 && before_p (set->insn (), insn))
9243c3d1
JZZ
343 return false;
344 }
345 }
346
4f673c5e 347 /* rd (vl) is not used between the start of the block and the occurrence. */
9243c3d1
JZZ
348 if (vsetvl_insn_p (insn->rtl ()))
349 {
4f673c5e
JZZ
350 rtx dest = get_vl (insn->rtl ());
351 for (insn_info *i = insn->prev_nondebug_insn ();
352 real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
353 {
354 /* rd (vl) is not used between the start of the block and the
355 * occurrence. */
356 if (find_access (i->uses (), REGNO (dest)))
357 return false;
358 /* rd (vl) are not modified in the basic block prior to the VSETVL. */
359 if (find_access (i->defs (), REGNO (dest)))
360 return false;
361 }
9243c3d1
JZZ
362 }
363
364 return true;
365}
366
367/* An "available occurrence" is one that is the last occurrence in the
368 basic block and the operands are not modified by following statements in
4f673c5e
JZZ
369 the basic block [including this insn].
370
371 For VSETVL instruction, we have these following formats:
372 1. vsetvl zero, rs1.
373 2. vsetvl zero, imm.
374 3. vsetvl rd, rs1.
375
376 So base on these circumstances, a DEM is considered as a local available
377 occurrence should satisfy these following conditions:
378
379 1). rs1 (avl) are not modified by following statements in
380 the basic block.
381 2). rd (vl) are not modified by following statements in
382 the basic block.
383
384 Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
385 is modified prior to the occurrence. This case is already considered as
386 a non-local available occurrence.
387*/
9243c3d1 388static bool
4f673c5e 389available_occurrence_p (const bb_info *bb, const vector_insn_info dem)
9243c3d1 390{
4f673c5e 391 insn_info *insn = dem.get_insn ();
9243c3d1
JZZ
392 /* The only possible operand we care of VSETVL is AVL. */
393 if (dem.has_avl_reg ())
394 {
9243c3d1
JZZ
395 if (!vlmax_avl_p (dem.get_avl ()))
396 {
4f673c5e 397 rtx dest = NULL_RTX;
20c85207 398 insn_info *i = insn;
4f673c5e 399 if (vsetvl_insn_p (insn->rtl ()))
20c85207
JZ
400 {
401 dest = get_vl (insn->rtl ());
402 /* For user vsetvl a2, a2 instruction, we consider it as
403 available even though it modifies "a2". */
404 i = i->next_nondebug_insn ();
405 }
406 for (; real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
4f673c5e 407 {
60bd33bc
JZZ
408 if (read_vl_insn_p (i->rtl ()))
409 continue;
4f673c5e
JZZ
410 /* rs1 (avl) are not modified by following statements in
411 the basic block. */
412 if (find_access (i->defs (), REGNO (dem.get_avl ())))
413 return false;
414 /* rd (vl) are not modified by following statements in
415 the basic block. */
416 if (dest && find_access (i->defs (), REGNO (dest)))
417 return false;
418 }
9243c3d1
JZZ
419 }
420 }
421 return true;
422}
423
6b6b9c68
JZZ
424static bool
425insn_should_be_added_p (const insn_info *insn, unsigned int types)
9243c3d1 426{
6b6b9c68
JZZ
427 if (insn->is_real () && (types & REAL_SET))
428 return true;
429 if (insn->is_phi () && (types & PHI_SET))
430 return true;
431 if (insn->is_bb_head () && (types & BB_HEAD_SET))
432 return true;
433 if (insn->is_bb_end () && (types & BB_END_SET))
434 return true;
435 return false;
9243c3d1
JZZ
436}
437
6b6b9c68
JZZ
438/* Recursively find all define instructions. The kind of instruction is
439 specified by the DEF_TYPE. */
440static hash_set<set_info *>
441get_all_sets (phi_info *phi, unsigned int types)
9243c3d1 442{
6b6b9c68 443 hash_set<set_info *> insns;
4f673c5e
JZZ
444 auto_vec<phi_info *> work_list;
445 hash_set<phi_info *> visited_list;
446 if (!phi)
6b6b9c68 447 return hash_set<set_info *> ();
4f673c5e 448 work_list.safe_push (phi);
9243c3d1 449
4f673c5e 450 while (!work_list.is_empty ())
9243c3d1 451 {
4f673c5e
JZZ
452 phi_info *phi = work_list.pop ();
453 visited_list.add (phi);
454 for (use_info *use : phi->inputs ())
9243c3d1 455 {
4f673c5e 456 def_info *def = use->def ();
6b6b9c68
JZZ
457 set_info *set = safe_dyn_cast<set_info *> (def);
458 if (!set)
459 return hash_set<set_info *> ();
9243c3d1 460
6b6b9c68 461 gcc_assert (!set->insn ()->is_debug_insn ());
9243c3d1 462
6b6b9c68
JZZ
463 if (insn_should_be_added_p (set->insn (), types))
464 insns.add (set);
465 if (set->insn ()->is_phi ())
4f673c5e 466 {
6b6b9c68 467 phi_info *new_phi = as_a<phi_info *> (set);
4f673c5e
JZZ
468 if (!visited_list.contains (new_phi))
469 work_list.safe_push (new_phi);
470 }
471 }
9243c3d1 472 }
4f673c5e
JZZ
473 return insns;
474}
9243c3d1 475
6b6b9c68
JZZ
476static hash_set<set_info *>
477get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
478 bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
479{
480 if (real_p && phi_p && param_p)
481 return get_all_sets (safe_dyn_cast<phi_info *> (set),
482 REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
483
484 else if (real_p && param_p)
485 return get_all_sets (safe_dyn_cast<phi_info *> (set),
486 REAL_SET | BB_HEAD_SET | BB_END_SET);
487
488 else if (real_p)
489 return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
490 return hash_set<set_info *> ();
491}
492
493/* Helper function to get AVL operand. */
494static rtx
495get_avl (rtx_insn *rinsn)
496{
497 if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
498 return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
499
500 if (!has_vl_op (rinsn))
501 return NULL_RTX;
502 if (get_attr_avl_type (rinsn) == VLMAX)
503 return RVV_VLMAX;
504 extract_insn_cached (rinsn);
505 return recog_data.operand[get_attr_vl_op_idx (rinsn)];
506}
507
508static set_info *
509get_same_bb_set (hash_set<set_info *> &sets, const basic_block cfg_bb)
510{
511 for (set_info *set : sets)
512 if (set->bb ()->cfg_bb () == cfg_bb)
513 return set;
514 return nullptr;
515}
516
4f673c5e
JZZ
517/* Recursively find all predecessor blocks for cfg_bb. */
518static hash_set<basic_block>
519get_all_predecessors (basic_block cfg_bb)
520{
521 hash_set<basic_block> blocks;
522 auto_vec<basic_block> work_list;
523 hash_set<basic_block> visited_list;
524 work_list.safe_push (cfg_bb);
9243c3d1 525
4f673c5e 526 while (!work_list.is_empty ())
9243c3d1 527 {
4f673c5e
JZZ
528 basic_block new_cfg_bb = work_list.pop ();
529 visited_list.add (new_cfg_bb);
530 edge e;
531 edge_iterator ei;
532 FOR_EACH_EDGE (e, ei, new_cfg_bb->preds)
533 {
534 if (!visited_list.contains (e->src))
535 work_list.safe_push (e->src);
536 blocks.add (e->src);
537 }
9243c3d1 538 }
4f673c5e
JZZ
539 return blocks;
540}
9243c3d1 541
4f673c5e
JZZ
542/* Return true if there is an INSN in insns staying in the block BB. */
543static bool
6b6b9c68 544any_set_in_bb_p (hash_set<set_info *> sets, const bb_info *bb)
4f673c5e 545{
6b6b9c68
JZZ
546 for (const set_info *set : sets)
547 if (set->bb ()->index () == bb->index ())
4f673c5e
JZZ
548 return true;
549 return false;
9243c3d1
JZZ
550}
551
552/* Helper function to get SEW operand. We always have SEW value for
553 all RVV instructions that have VTYPE OP. */
554static uint8_t
555get_sew (rtx_insn *rinsn)
556{
557 return get_attr_sew (rinsn);
558}
559
560/* Helper function to get VLMUL operand. We always have VLMUL value for
561 all RVV instructions that have VTYPE OP. */
562static enum vlmul_type
563get_vlmul (rtx_insn *rinsn)
564{
565 return (enum vlmul_type) get_attr_vlmul (rinsn);
566}
567
568/* Get default tail policy. */
569static bool
570get_default_ta ()
571{
572 /* For the instruction that doesn't require TA, we still need a default value
573 to emit vsetvl. We pick up the default value according to prefer policy. */
574 return (bool) (get_prefer_tail_policy () & 0x1
575 || (get_prefer_tail_policy () >> 1 & 0x1));
576}
577
578/* Get default mask policy. */
579static bool
580get_default_ma ()
581{
582 /* For the instruction that doesn't require MA, we still need a default value
583 to emit vsetvl. We pick up the default value according to prefer policy. */
584 return (bool) (get_prefer_mask_policy () & 0x1
585 || (get_prefer_mask_policy () >> 1 & 0x1));
586}
587
588/* Helper function to get TA operand. */
589static bool
590tail_agnostic_p (rtx_insn *rinsn)
591{
592 /* If it doesn't have TA, we return agnostic by default. */
593 extract_insn_cached (rinsn);
594 int ta = get_attr_ta (rinsn);
595 return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta);
596}
597
598/* Helper function to get MA operand. */
599static bool
600mask_agnostic_p (rtx_insn *rinsn)
601{
602 /* If it doesn't have MA, we return agnostic by default. */
603 extract_insn_cached (rinsn);
604 int ma = get_attr_ma (rinsn);
605 return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
606}
607
608/* Return true if FN has a vector instruction that use VL/VTYPE. */
609static bool
610has_vector_insn (function *fn)
611{
612 basic_block cfg_bb;
613 rtx_insn *rinsn;
614 FOR_ALL_BB_FN (cfg_bb, fn)
615 FOR_BB_INSNS (cfg_bb, rinsn)
616 if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
617 return true;
618 return false;
619}
620
621/* Emit vsetvl instruction. */
622static rtx
e577b91b 623gen_vsetvl_pat (enum vsetvl_type insn_type, const vl_vtype_info &info, rtx vl)
9243c3d1
JZZ
624{
625 rtx avl = info.get_avl ();
04655110
LX
626 /* if optimization == 0 and the instruction is vmv.x.s/vfmv.f.s,
627 set the value of avl to (const_int 0) so that VSETVL PASS will
628 insert vsetvl correctly.*/
629 if (info.has_avl_no_reg ())
630 avl = GEN_INT (0);
9243c3d1
JZZ
631 rtx sew = gen_int_mode (info.get_sew (), Pmode);
632 rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode);
633 rtx ta = gen_int_mode (info.get_ta (), Pmode);
634 rtx ma = gen_int_mode (info.get_ma (), Pmode);
635
636 if (insn_type == VSETVL_NORMAL)
637 {
638 gcc_assert (vl != NULL_RTX);
639 return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma);
640 }
641 else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY)
642 return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
643 else
644 return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
645}
646
647static rtx
e577b91b 648gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info)
9243c3d1
JZZ
649{
650 rtx new_pat;
60bd33bc
JZZ
651 vl_vtype_info new_info = info;
652 if (info.get_insn () && info.get_insn ()->rtl ()
653 && fault_first_load_p (info.get_insn ()->rtl ()))
654 new_info.set_avl_info (
655 avl_info (get_avl (info.get_insn ()->rtl ()), nullptr));
9243c3d1
JZZ
656 if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ()))
657 {
658 rtx dest = get_vl (rinsn);
60bd33bc 659 new_pat = gen_vsetvl_pat (VSETVL_NORMAL, new_info, dest);
9243c3d1
JZZ
660 }
661 else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only)
60bd33bc 662 new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, new_info, NULL_RTX);
9243c3d1 663 else
60bd33bc 664 new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
9243c3d1
JZZ
665 return new_pat;
666}
667
668static void
669emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type,
e577b91b 670 const vl_vtype_info &info, rtx vl, rtx_insn *rinsn)
9243c3d1
JZZ
671{
672 rtx pat = gen_vsetvl_pat (insn_type, info, vl);
673 if (dump_file)
674 {
675 fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n");
676 print_rtl_single (dump_file, pat);
677 }
678
679 if (emit_type == EMIT_DIRECT)
680 emit_insn (pat);
681 else if (emit_type == EMIT_BEFORE)
682 emit_insn_before (pat, rinsn);
683 else
684 emit_insn_after (pat, rinsn);
685}
686
687static void
688eliminate_insn (rtx_insn *rinsn)
689{
690 if (dump_file)
691 {
692 fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn));
693 print_rtl_single (dump_file, rinsn);
694 }
695 if (in_sequence_p ())
696 remove_insn (rinsn);
697 else
698 delete_insn (rinsn);
699}
700
a481eed8 701static vsetvl_type
9243c3d1
JZZ
702insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn,
703 const vector_insn_info &info, const vector_insn_info &prev_info)
704{
705 /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
706 VLMAX. */
707 if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
4f673c5e 708 && info.compatible_avl_p (prev_info) && info.same_vlmax_p (prev_info))
9243c3d1
JZZ
709 {
710 emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
711 rinsn);
a481eed8 712 return VSETVL_VTYPE_CHANGE_ONLY;
9243c3d1
JZZ
713 }
714
715 if (info.has_avl_imm ())
716 {
717 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX,
718 rinsn);
a481eed8 719 return VSETVL_DISCARD_RESULT;
9243c3d1
JZZ
720 }
721
722 if (info.has_avl_no_reg ())
723 {
724 /* We can only use x0, x0 if there's no chance of the vtype change causing
725 the previous vl to become invalid. */
726 if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
727 && info.same_vlmax_p (prev_info))
728 {
729 emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
730 rinsn);
a481eed8 731 return VSETVL_VTYPE_CHANGE_ONLY;
9243c3d1
JZZ
732 }
733 /* Otherwise use an AVL of 0 to avoid depending on previous vl. */
734 vl_vtype_info new_info = info;
735 new_info.set_avl_info (avl_info (const0_rtx, nullptr));
736 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX,
737 rinsn);
a481eed8 738 return VSETVL_DISCARD_RESULT;
9243c3d1
JZZ
739 }
740
741 /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the
742 opcode if the AVLReg is X0 as they have different register classes for
743 the AVL operand. */
744 if (vlmax_avl_p (info.get_avl ()))
745 {
746 gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn));
dd6e1cba
JZ
747 /* For user vsetvli a5, zero, we should use get_vl to get the VL
748 operand "a5". */
749 rtx vl_op
750 = vsetvl_insn_p (rinsn) ? get_vl (rinsn) : info.get_avl_reg_rtx ();
9243c3d1
JZZ
751 gcc_assert (!vlmax_avl_p (vl_op));
752 emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn);
a481eed8 753 return VSETVL_NORMAL;
9243c3d1
JZZ
754 }
755
756 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn);
757
758 if (dump_file)
759 {
760 fprintf (dump_file, "Update VL/VTYPE info, previous info=");
761 prev_info.dump (dump_file);
762 }
a481eed8 763 return VSETVL_DISCARD_RESULT;
9243c3d1
JZZ
764}
765
766/* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
767 to INSN. If such notes are added to an insn which references a
768 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
769 that note, because the following loop optimization pass requires
770 them. */
771
772/* ??? If there was a jump optimization pass after gcse and before loop,
773 then we would not need to do this here, because jump would add the
774 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
775
776static void
27a2a4b6 777add_label_notes (rtx x, rtx_insn *rinsn)
9243c3d1
JZZ
778{
779 enum rtx_code code = GET_CODE (x);
780 int i, j;
781 const char *fmt;
782
783 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
784 {
785 /* This code used to ignore labels that referred to dispatch tables to
786 avoid flow generating (slightly) worse code.
787
788 We no longer ignore such label references (see LABEL_REF handling in
789 mark_jump_label for additional information). */
790
791 /* There's no reason for current users to emit jump-insns with
792 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
793 notes. */
27a2a4b6
JZZ
794 gcc_assert (!JUMP_P (rinsn));
795 add_reg_note (rinsn, REG_LABEL_OPERAND, label_ref_label (x));
9243c3d1
JZZ
796
797 if (LABEL_P (label_ref_label (x)))
798 LABEL_NUSES (label_ref_label (x))++;
799
800 return;
801 }
802
803 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
804 {
805 if (fmt[i] == 'e')
27a2a4b6 806 add_label_notes (XEXP (x, i), rinsn);
9243c3d1
JZZ
807 else if (fmt[i] == 'E')
808 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
27a2a4b6 809 add_label_notes (XVECEXP (x, i, j), rinsn);
9243c3d1
JZZ
810 }
811}
812
813/* Add EXPR to the end of basic block BB.
814
815 This is used by both the PRE and code hoisting. */
816
817static void
818insert_insn_end_basic_block (rtx_insn *rinsn, basic_block cfg_bb)
819{
820 rtx_insn *end_rinsn = BB_END (cfg_bb);
821 rtx_insn *new_insn;
822 rtx_insn *pat, *pat_end;
823
824 pat = rinsn;
825 gcc_assert (pat && INSN_P (pat));
826
827 pat_end = pat;
828 while (NEXT_INSN (pat_end) != NULL_RTX)
829 pat_end = NEXT_INSN (pat_end);
830
831 /* If the last end_rinsn is a jump, insert EXPR in front. Similarly we need
832 to take care of trapping instructions in presence of non-call exceptions.
833 */
834
835 if (JUMP_P (end_rinsn)
836 || (NONJUMP_INSN_P (end_rinsn)
837 && (!single_succ_p (cfg_bb)
838 || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL)))
839 {
840 /* FIXME: What if something in jump uses value set in new end_rinsn? */
841 new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
842 }
843
844 /* Likewise if the last end_rinsn is a call, as will happen in the presence
845 of exception handling. */
846 else if (CALL_P (end_rinsn)
847 && (!single_succ_p (cfg_bb)
848 || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL))
849 {
850 /* Keeping in mind targets with small register classes and parameters
851 in registers, we search backward and place the instructions before
852 the first parameter is loaded. Do this for everyone for consistency
853 and a presumption that we'll get better code elsewhere as well. */
854
855 /* Since different machines initialize their parameter registers
856 in different orders, assume nothing. Collect the set of all
857 parameter registers. */
858 end_rinsn = find_first_parameter_load (end_rinsn, BB_HEAD (cfg_bb));
859
860 /* If we found all the parameter loads, then we want to insert
861 before the first parameter load.
862
863 If we did not find all the parameter loads, then we might have
864 stopped on the head of the block, which could be a CODE_LABEL.
865 If we inserted before the CODE_LABEL, then we would be putting
866 the end_rinsn in the wrong basic block. In that case, put the
867 end_rinsn after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK.
868 */
869 while (LABEL_P (end_rinsn) || NOTE_INSN_BASIC_BLOCK_P (end_rinsn))
870 end_rinsn = NEXT_INSN (end_rinsn);
871
872 new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
873 }
874 else
875 new_insn = emit_insn_after_noloc (pat, end_rinsn, cfg_bb);
876
877 while (1)
878 {
879 if (INSN_P (pat))
880 add_label_notes (PATTERN (pat), new_insn);
881 if (pat == pat_end)
882 break;
883 pat = NEXT_INSN (pat);
884 }
885}
886
887/* Get VL/VTYPE information for INSN. */
888static vl_vtype_info
889get_vl_vtype_info (const insn_info *insn)
890{
9243c3d1
JZZ
891 set_info *set = nullptr;
892 rtx avl = ::get_avl (insn->rtl ());
ec99ffab
JZZ
893 if (avl && REG_P (avl))
894 {
895 if (vlmax_avl_p (avl) && has_vl_op (insn->rtl ()))
896 set
897 = find_access (insn->uses (), REGNO (get_vl (insn->rtl ())))->def ();
898 else if (!vlmax_avl_p (avl))
899 set = find_access (insn->uses (), REGNO (avl))->def ();
900 else
901 set = nullptr;
902 }
9243c3d1
JZZ
903
904 uint8_t sew = get_sew (insn->rtl ());
905 enum vlmul_type vlmul = get_vlmul (insn->rtl ());
906 uint8_t ratio = get_attr_ratio (insn->rtl ());
907 /* when get_attr_ratio is invalid, this kind of instructions
908 doesn't care about ratio. However, we still need this value
909 in demand info backward analysis. */
910 if (ratio == INVALID_ATTRIBUTE)
911 ratio = calculate_ratio (sew, vlmul);
912 bool ta = tail_agnostic_p (insn->rtl ());
913 bool ma = mask_agnostic_p (insn->rtl ());
914
915 /* If merge operand is undef value, we prefer agnostic. */
916 int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
917 if (merge_op_idx != INVALID_ATTRIBUTE
918 && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
919 {
920 ta = true;
921 ma = true;
922 }
923
924 vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma);
925 return info;
926}
927
928static void
929change_insn (rtx_insn *rinsn, rtx new_pat)
930{
931 /* We don't apply change on RTL_SSA here since it's possible a
932 new INSN we add in the PASS before which doesn't have RTL_SSA
933 info yet.*/
934 if (dump_file)
935 {
936 fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
937 INSN_UID (rinsn));
938 print_rtl_single (dump_file, PATTERN (rinsn));
939 }
940
011ba384 941 validate_change (rinsn, &PATTERN (rinsn), new_pat, false);
9243c3d1
JZZ
942
943 if (dump_file)
944 {
945 fprintf (dump_file, "\nto:\n");
946 print_rtl_single (dump_file, PATTERN (rinsn));
947 }
948}
949
60bd33bc
JZZ
950static const insn_info *
951get_forward_read_vl_insn (const insn_info *insn)
952{
953 const bb_info *bb = insn->bb ();
954 for (const insn_info *i = insn->next_nondebug_insn ();
955 real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
956 {
957 if (find_access (i->defs (), VL_REGNUM))
958 return nullptr;
959 if (read_vl_insn_p (i->rtl ()))
960 return i;
961 }
962 return nullptr;
963}
964
965static const insn_info *
966get_backward_fault_first_load_insn (const insn_info *insn)
967{
968 const bb_info *bb = insn->bb ();
969 for (const insn_info *i = insn->prev_nondebug_insn ();
970 real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
971 {
972 if (fault_first_load_p (i->rtl ()))
973 return i;
974 if (find_access (i->defs (), VL_REGNUM))
975 return nullptr;
976 }
977 return nullptr;
978}
979
9243c3d1
JZZ
980static bool
981change_insn (function_info *ssa, insn_change change, insn_info *insn,
982 rtx new_pat)
983{
984 rtx_insn *rinsn = insn->rtl ();
985 auto attempt = ssa->new_change_attempt ();
986 if (!restrict_movement (change))
987 return false;
988
989 if (dump_file)
990 {
991 fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
992 INSN_UID (rinsn));
993 print_rtl_single (dump_file, PATTERN (rinsn));
9243c3d1
JZZ
994 }
995
996 insn_change_watermark watermark;
997 validate_change (rinsn, &PATTERN (rinsn), new_pat, true);
998
999 /* These routines report failures themselves. */
1000 if (!recog (attempt, change) || !change_is_worthwhile (change, false))
1001 return false;
a1e42094
JZZ
1002
1003 /* Fix bug:
1004 (insn 12 34 13 2 (set (reg:VNx8DI 120 v24 [orig:134 _1 ] [134])
1005 (if_then_else:VNx8DI (unspec:VNx8BI [
1006 (const_vector:VNx8BI repeat [
1007 (const_int 1 [0x1])
1008 ])
1009 (const_int 0 [0])
1010 (const_int 2 [0x2]) repeated x2
1011 (const_int 0 [0])
1012 (reg:SI 66 vl)
1013 (reg:SI 67 vtype)
1014 ] UNSPEC_VPREDICATE)
1015 (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137])
1016 (sign_extend:VNx8DI (vec_duplicate:VNx8SI (reg:SI 15 a5
1017 [140])))) (unspec:VNx8DI [ (const_int 0 [0]) ] UNSPEC_VUNDEF))) "rvv.c":8:12
1018 2784 {pred_single_widen_addsvnx8di_scalar} (expr_list:REG_EQUIV
1019 (mem/c:VNx8DI (reg:DI 10 a0 [142]) [1 <retval>+0 S[64, 64] A128])
1020 (expr_list:REG_EQUAL (if_then_else:VNx8DI (unspec:VNx8BI [
1021 (const_vector:VNx8BI repeat [
1022 (const_int 1 [0x1])
1023 ])
1024 (reg/v:DI 13 a3 [orig:139 vl ] [139])
1025 (const_int 2 [0x2]) repeated x2
1026 (const_int 0 [0])
1027 (reg:SI 66 vl)
1028 (reg:SI 67 vtype)
1029 ] UNSPEC_VPREDICATE)
1030 (plus:VNx8DI (reg/v:VNx8DI 104 v8 [orig:137 op1 ] [137])
1031 (const_vector:VNx8DI repeat [
1032 (const_int 2730 [0xaaa])
1033 ]))
1034 (unspec:VNx8DI [
1035 (const_int 0 [0])
1036 ] UNSPEC_VUNDEF))
1037 (nil))))
1038 Here we want to remove use "a3". However, the REG_EQUAL/REG_EQUIV note use
1039 "a3" which made us fail in change_insn. We reference to the
1040 'aarch64-cc-fusion.cc' and add this method. */
1041 remove_reg_equal_equiv_notes (rinsn);
9243c3d1
JZZ
1042 confirm_change_group ();
1043 ssa->change_insn (change);
1044
1045 if (dump_file)
1046 {
1047 fprintf (dump_file, "\nto:\n");
1048 print_rtl_single (dump_file, PATTERN (rinsn));
9243c3d1
JZZ
1049 }
1050 return true;
1051}
1052
aef20243
JZZ
1053static void
1054change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info)
1055{
1056 rtx_insn *rinsn;
1057 if (vector_config_insn_p (insn->rtl ()))
1058 {
1059 rinsn = insn->rtl ();
1060 gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet");
1061 }
1062 else
1063 {
1064 gcc_assert (has_vtype_op (insn->rtl ()));
1065 rinsn = PREV_INSN (insn->rtl ());
1066 gcc_assert (vector_config_insn_p (rinsn));
1067 }
1068 rtx new_pat = gen_vsetvl_pat (rinsn, info);
1069 change_insn (rinsn, new_pat);
1070}
1071
69f39144
JZ
1072static bool
1073avl_source_has_vsetvl_p (set_info *avl_source)
1074{
1075 if (!avl_source)
1076 return false;
1077 if (!avl_source->insn ())
1078 return false;
1079 if (avl_source->insn ()->is_real ())
1080 return vsetvl_insn_p (avl_source->insn ()->rtl ());
1081 hash_set<set_info *> sets = get_all_sets (avl_source, true, false, true);
1082 for (const auto set : sets)
1083 {
1084 if (set->insn ()->is_real () && vsetvl_insn_p (set->insn ()->rtl ()))
1085 return true;
1086 }
1087 return false;
1088}
1089
4f673c5e 1090static bool
6b6b9c68 1091source_equal_p (insn_info *insn1, insn_info *insn2)
4f673c5e 1092{
6b6b9c68
JZZ
1093 if (!insn1 || !insn2)
1094 return false;
1095 rtx_insn *rinsn1 = insn1->rtl ();
1096 rtx_insn *rinsn2 = insn2->rtl ();
4f673c5e
JZZ
1097 if (!rinsn1 || !rinsn2)
1098 return false;
1099 rtx note1 = find_reg_equal_equiv_note (rinsn1);
1100 rtx note2 = find_reg_equal_equiv_note (rinsn2);
1101 rtx single_set1 = single_set (rinsn1);
1102 rtx single_set2 = single_set (rinsn2);
60bd33bc
JZZ
1103 if (read_vl_insn_p (rinsn1) && read_vl_insn_p (rinsn2))
1104 {
1105 const insn_info *load1 = get_backward_fault_first_load_insn (insn1);
1106 const insn_info *load2 = get_backward_fault_first_load_insn (insn2);
1107 return load1 && load2 && load1 == load2;
1108 }
4f673c5e
JZZ
1109
1110 if (note1 && note2 && rtx_equal_p (note1, note2))
1111 return true;
6b6b9c68
JZZ
1112
1113 /* Since vsetvl instruction is not single SET.
1114 We handle this case specially here. */
1115 if (vsetvl_insn_p (insn1->rtl ()) && vsetvl_insn_p (insn2->rtl ()))
1116 {
1117 /* For example:
1118 vsetvl1 a6,a5,e32m1
1119 RVV 1 (use a6 as AVL)
1120 vsetvl2 a5,a5,e8mf4
1121 RVV 2 (use a5 as AVL)
1122 We consider AVL of RVV 1 and RVV 2 are same so that we can
1123 gain more optimization opportunities.
1124
1125 Note: insn1_info.compatible_avl_p (insn2_info)
1126 will make sure there is no instruction between vsetvl1 and vsetvl2
1127 modify a5 since their def will be different if there is instruction
1128 modify a5 and compatible_avl_p will return false. */
1129 vector_insn_info insn1_info, insn2_info;
1130 insn1_info.parse_insn (insn1);
1131 insn2_info.parse_insn (insn2);
69f39144
JZ
1132
1133 /* To avoid dead loop, we don't optimize a vsetvli def has vsetvli
1134 instructions which will complicate the situation. */
1135 if (avl_source_has_vsetvl_p (insn1_info.get_avl_source ())
1136 || avl_source_has_vsetvl_p (insn2_info.get_avl_source ()))
1137 return false;
1138
6b6b9c68
JZZ
1139 if (insn1_info.same_vlmax_p (insn2_info)
1140 && insn1_info.compatible_avl_p (insn2_info))
1141 return true;
1142 }
1143
1144 /* We only handle AVL is set by instructions with no side effects. */
1145 if (!single_set1 || !single_set2)
1146 return false;
1147 if (!rtx_equal_p (SET_SRC (single_set1), SET_SRC (single_set2)))
1148 return false;
e9fd9efc
JZ
1149 /* RTL_SSA uses include REG_NOTE. Consider this following case:
1150
1151 insn1 RTL:
1152 (insn 41 39 42 4 (set (reg:DI 26 s10 [orig:159 loop_len_46 ] [159])
1153 (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201])
1154 (reg:DI 14 a4 [276]))) 408 {*umindi3}
1155 (expr_list:REG_EQUAL (umin:DI (reg:DI 15 a5 [orig:201 _149 ] [201])
1156 (const_int 2 [0x2]))
1157 (nil)))
1158 The RTL_SSA uses of this instruction has 2 uses:
1159 1. (reg:DI 15 a5 [orig:201 _149 ] [201]) - twice.
1160 2. (reg:DI 14 a4 [276]) - once.
1161
1162 insn2 RTL:
1163 (insn 38 353 351 4 (set (reg:DI 27 s11 [orig:160 loop_len_47 ] [160])
1164 (umin:DI (reg:DI 15 a5 [orig:199 _146 ] [199])
1165 (reg:DI 14 a4 [276]))) 408 {*umindi3}
1166 (expr_list:REG_EQUAL (umin:DI (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200])
1167 (const_int 2 [0x2]))
1168 (nil)))
1169 The RTL_SSA uses of this instruction has 3 uses:
1170 1. (reg:DI 15 a5 [orig:199 _146 ] [199]) - once
1171 2. (reg:DI 14 a4 [276]) - once
1172 3. (reg:DI 28 t3 [orig:200 ivtmp_147 ] [200]) - once
1173
1174 Return false when insn1->uses ().size () != insn2->uses ().size ()
1175 */
1176 if (insn1->uses ().size () != insn2->uses ().size ())
1177 return false;
6b6b9c68
JZZ
1178 for (size_t i = 0; i < insn1->uses ().size (); i++)
1179 if (insn1->uses ()[i] != insn2->uses ()[i])
1180 return false;
1181 return true;
4f673c5e
JZZ
1182}
1183
1184/* Helper function to get single same real RTL source.
1185 return NULL if it is not a single real RTL source. */
6b6b9c68 1186static insn_info *
4f673c5e
JZZ
1187extract_single_source (set_info *set)
1188{
1189 if (!set)
1190 return nullptr;
1191 if (set->insn ()->is_real ())
6b6b9c68 1192 return set->insn ();
4f673c5e
JZZ
1193 if (!set->insn ()->is_phi ())
1194 return nullptr;
6b6b9c68 1195 hash_set<set_info *> sets = get_all_sets (set, true, false, true);
4f673c5e 1196
6b6b9c68 1197 insn_info *first_insn = (*sets.begin ())->insn ();
4f673c5e
JZZ
1198 if (first_insn->is_artificial ())
1199 return nullptr;
6b6b9c68 1200 for (const set_info *set : sets)
4f673c5e
JZZ
1201 {
1202 /* If there is a head or end insn, we conservative return
1203 NULL so that VSETVL PASS will insert vsetvl directly. */
6b6b9c68 1204 if (set->insn ()->is_artificial ())
4f673c5e 1205 return nullptr;
6b6b9c68 1206 if (!source_equal_p (set->insn (), first_insn))
4f673c5e
JZZ
1207 return nullptr;
1208 }
1209
6b6b9c68 1210 return first_insn;
4f673c5e
JZZ
1211}
1212
ec99ffab
JZZ
1213static unsigned
1214calculate_sew (vlmul_type vlmul, unsigned int ratio)
1215{
1216 for (const unsigned sew : ALL_SEW)
1217 if (calculate_ratio (sew, vlmul) == ratio)
1218 return sew;
1219 return 0;
1220}
1221
1222static vlmul_type
1223calculate_vlmul (unsigned int sew, unsigned int ratio)
1224{
1225 for (const vlmul_type vlmul : ALL_LMUL)
1226 if (calculate_ratio (sew, vlmul) == ratio)
1227 return vlmul;
1228 return LMUL_RESERVED;
1229}
1230
1231static bool
1232incompatible_avl_p (const vector_insn_info &info1,
1233 const vector_insn_info &info2)
1234{
1235 return !info1.compatible_avl_p (info2) && !info2.compatible_avl_p (info1);
1236}
1237
1238static bool
1239different_sew_p (const vector_insn_info &info1, const vector_insn_info &info2)
1240{
1241 return info1.get_sew () != info2.get_sew ();
1242}
1243
1244static bool
1245different_lmul_p (const vector_insn_info &info1, const vector_insn_info &info2)
1246{
1247 return info1.get_vlmul () != info2.get_vlmul ();
1248}
1249
1250static bool
1251different_ratio_p (const vector_insn_info &info1, const vector_insn_info &info2)
1252{
1253 return info1.get_ratio () != info2.get_ratio ();
1254}
1255
1256static bool
1257different_tail_policy_p (const vector_insn_info &info1,
1258 const vector_insn_info &info2)
1259{
1260 return info1.get_ta () != info2.get_ta ();
1261}
1262
1263static bool
1264different_mask_policy_p (const vector_insn_info &info1,
1265 const vector_insn_info &info2)
1266{
1267 return info1.get_ma () != info2.get_ma ();
1268}
1269
1270static bool
1271possible_zero_avl_p (const vector_insn_info &info1,
1272 const vector_insn_info &info2)
1273{
1274 return !info1.has_non_zero_avl () || !info2.has_non_zero_avl ();
1275}
1276
ec99ffab
JZZ
1277static bool
1278second_ratio_invalid_for_first_sew_p (const vector_insn_info &info1,
1279 const vector_insn_info &info2)
1280{
1281 return calculate_vlmul (info1.get_sew (), info2.get_ratio ())
1282 == LMUL_RESERVED;
1283}
1284
1285static bool
1286second_ratio_invalid_for_first_lmul_p (const vector_insn_info &info1,
1287 const vector_insn_info &info2)
1288{
1289 return calculate_sew (info1.get_vlmul (), info2.get_ratio ()) == 0;
1290}
1291
1292static bool
1293second_sew_less_than_first_sew_p (const vector_insn_info &info1,
1294 const vector_insn_info &info2)
1295{
1296 return info2.get_sew () < info1.get_sew ();
1297}
1298
1299static bool
1300first_sew_less_than_second_sew_p (const vector_insn_info &info1,
1301 const vector_insn_info &info2)
1302{
1303 return info1.get_sew () < info2.get_sew ();
1304}
1305
1306/* return 0 if LMUL1 == LMUL2.
1307 return -1 if LMUL1 < LMUL2.
1308 return 1 if LMUL1 > LMUL2. */
1309static int
1310compare_lmul (vlmul_type vlmul1, vlmul_type vlmul2)
1311{
1312 if (vlmul1 == vlmul2)
1313 return 0;
1314
1315 switch (vlmul1)
1316 {
1317 case LMUL_1:
1318 if (vlmul2 == LMUL_2 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1319 return 1;
1320 else
1321 return -1;
1322 case LMUL_2:
1323 if (vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1324 return 1;
1325 else
1326 return -1;
1327 case LMUL_4:
1328 if (vlmul2 == LMUL_8)
1329 return 1;
1330 else
1331 return -1;
1332 case LMUL_8:
1333 return -1;
1334 case LMUL_F2:
1335 if (vlmul2 == LMUL_1 || vlmul2 == LMUL_2 || vlmul2 == LMUL_4
1336 || vlmul2 == LMUL_8)
1337 return 1;
1338 else
1339 return -1;
1340 case LMUL_F4:
1341 if (vlmul2 == LMUL_F2 || vlmul2 == LMUL_1 || vlmul2 == LMUL_2
1342 || vlmul2 == LMUL_4 || vlmul2 == LMUL_8)
1343 return 1;
1344 else
1345 return -1;
1346 case LMUL_F8:
1347 return 0;
1348 default:
1349 gcc_unreachable ();
1350 }
1351}
1352
1353static bool
1354second_lmul_less_than_first_lmul_p (const vector_insn_info &info1,
1355 const vector_insn_info &info2)
1356{
1357 return compare_lmul (info2.get_vlmul (), info1.get_vlmul ()) == -1;
1358}
1359
ec99ffab
JZZ
1360static bool
1361second_ratio_less_than_first_ratio_p (const vector_insn_info &info1,
1362 const vector_insn_info &info2)
1363{
1364 return info2.get_ratio () < info1.get_ratio ();
1365}
1366
1367static CONSTEXPR const demands_cond incompatible_conds[] = {
1368#define DEF_INCOMPATIBLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, \
1369 GE_SEW1, TAIL_POLICTY1, MASK_POLICY1, AVL2, \
1370 SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, \
1371 TAIL_POLICTY2, MASK_POLICY2, COND) \
1372 {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
1373 MASK_POLICY1}, \
1374 {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1375 MASK_POLICY2}}, \
1376 COND},
1377#include "riscv-vsetvl.def"
1378};
1379
1380static unsigned
1381greatest_sew (const vector_insn_info &info1, const vector_insn_info &info2)
1382{
1383 return std::max (info1.get_sew (), info2.get_sew ());
1384}
1385
1386static unsigned
1387first_sew (const vector_insn_info &info1, const vector_insn_info &)
1388{
1389 return info1.get_sew ();
1390}
1391
1392static unsigned
1393second_sew (const vector_insn_info &, const vector_insn_info &info2)
1394{
1395 return info2.get_sew ();
1396}
1397
1398static vlmul_type
1399first_vlmul (const vector_insn_info &info1, const vector_insn_info &)
1400{
1401 return info1.get_vlmul ();
1402}
1403
1404static vlmul_type
1405second_vlmul (const vector_insn_info &, const vector_insn_info &info2)
1406{
1407 return info2.get_vlmul ();
1408}
1409
1410static unsigned
1411first_ratio (const vector_insn_info &info1, const vector_insn_info &)
1412{
1413 return info1.get_ratio ();
1414}
1415
1416static unsigned
1417second_ratio (const vector_insn_info &, const vector_insn_info &info2)
1418{
1419 return info2.get_ratio ();
1420}
1421
1422static vlmul_type
1423vlmul_for_first_sew_second_ratio (const vector_insn_info &info1,
1424 const vector_insn_info &info2)
1425{
1426 return calculate_vlmul (info1.get_sew (), info2.get_ratio ());
1427}
1428
1429static unsigned
1430ratio_for_second_sew_first_vlmul (const vector_insn_info &info1,
1431 const vector_insn_info &info2)
1432{
1433 return calculate_ratio (info2.get_sew (), info1.get_vlmul ());
1434}
1435
1436static CONSTEXPR const demands_fuse_rule fuse_rules[] = {
1437#define DEF_SEW_LMUL_FUSE_RULE(DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, \
1438 DEMAND_GE_SEW1, DEMAND_SEW2, DEMAND_LMUL2, \
1439 DEMAND_RATIO2, DEMAND_GE_SEW2, NEW_DEMAND_SEW, \
1440 NEW_DEMAND_LMUL, NEW_DEMAND_RATIO, \
1441 NEW_DEMAND_GE_SEW, NEW_SEW, NEW_VLMUL, \
1442 NEW_RATIO) \
1443 {{{DEMAND_ANY, DEMAND_SEW1, DEMAND_LMUL1, DEMAND_RATIO1, DEMAND_ANY, \
1444 DEMAND_GE_SEW1, DEMAND_ANY, DEMAND_ANY}, \
1445 {DEMAND_ANY, DEMAND_SEW2, DEMAND_LMUL2, DEMAND_RATIO2, DEMAND_ANY, \
1446 DEMAND_GE_SEW2, DEMAND_ANY, DEMAND_ANY}}, \
1447 NEW_DEMAND_SEW, \
1448 NEW_DEMAND_LMUL, \
1449 NEW_DEMAND_RATIO, \
1450 NEW_DEMAND_GE_SEW, \
1451 NEW_SEW, \
1452 NEW_VLMUL, \
1453 NEW_RATIO},
1454#include "riscv-vsetvl.def"
1455};
1456
1457static bool
1458always_unavailable (const vector_insn_info &, const vector_insn_info &)
1459{
1460 return true;
1461}
1462
1463static bool
1464avl_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2)
1465{
1466 return !info2.compatible_avl_p (info1.get_avl_info ());
1467}
1468
1469static bool
1470sew_unavailable_p (const vector_insn_info &info1, const vector_insn_info &info2)
1471{
1472 if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO))
1473 {
1474 if (info2.demand_p (DEMAND_GE_SEW))
1475 return info1.get_sew () < info2.get_sew ();
1476 return info1.get_sew () != info2.get_sew ();
1477 }
1478 return true;
1479}
1480
1481static bool
1482lmul_unavailable_p (const vector_insn_info &info1,
1483 const vector_insn_info &info2)
1484{
1485 if (info1.get_vlmul () == info2.get_vlmul () && !info2.demand_p (DEMAND_SEW)
1486 && !info2.demand_p (DEMAND_RATIO))
1487 return false;
1488 return true;
1489}
1490
1491static bool
1492ge_sew_unavailable_p (const vector_insn_info &info1,
1493 const vector_insn_info &info2)
1494{
1495 if (!info2.demand_p (DEMAND_LMUL) && !info2.demand_p (DEMAND_RATIO)
1496 && info2.demand_p (DEMAND_GE_SEW))
1497 return info1.get_sew () < info2.get_sew ();
1498 return true;
1499}
1500
1501static bool
1502ge_sew_lmul_unavailable_p (const vector_insn_info &info1,
1503 const vector_insn_info &info2)
1504{
1505 if (!info2.demand_p (DEMAND_RATIO) && info2.demand_p (DEMAND_GE_SEW))
1506 return info1.get_sew () < info2.get_sew ();
1507 return true;
1508}
1509
1510static bool
1511ge_sew_ratio_unavailable_p (const vector_insn_info &info1,
1512 const vector_insn_info &info2)
1513{
1514 if (!info2.demand_p (DEMAND_LMUL) && info2.demand_p (DEMAND_GE_SEW))
1515 return info1.get_sew () < info2.get_sew ();
1516 return true;
1517}
1518
1519static CONSTEXPR const demands_cond unavailable_conds[] = {
1520#define DEF_UNAVAILABLE_COND(AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, \
1521 TAIL_POLICTY1, MASK_POLICY1, AVL2, SEW2, LMUL2, \
1522 RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1523 MASK_POLICY2, COND) \
1524 {{{AVL1, SEW1, LMUL1, RATIO1, NONZERO_AVL1, GE_SEW1, TAIL_POLICTY1, \
1525 MASK_POLICY1}, \
1526 {AVL2, SEW2, LMUL2, RATIO2, NONZERO_AVL2, GE_SEW2, TAIL_POLICTY2, \
1527 MASK_POLICY2}}, \
1528 COND},
1529#include "riscv-vsetvl.def"
1530};
1531
1532static bool
1533same_sew_lmul_demand_p (const bool *dems1, const bool *dems2)
1534{
1535 return dems1[DEMAND_SEW] == dems2[DEMAND_SEW]
1536 && dems1[DEMAND_LMUL] == dems2[DEMAND_LMUL]
1537 && dems1[DEMAND_RATIO] == dems2[DEMAND_RATIO] && !dems1[DEMAND_GE_SEW]
1538 && !dems2[DEMAND_GE_SEW];
1539}
1540
1541static bool
1542propagate_avl_across_demands_p (const vector_insn_info &info1,
1543 const vector_insn_info &info2)
1544{
1545 if (info2.demand_p (DEMAND_AVL))
1546 {
1547 if (info2.demand_p (DEMAND_NONZERO_AVL))
1548 return info1.demand_p (DEMAND_AVL)
1549 && !info1.demand_p (DEMAND_NONZERO_AVL) && info1.has_avl_reg ();
1550 }
1551 else
1552 return info1.demand_p (DEMAND_AVL) && info1.has_avl_reg ();
1553 return false;
1554}
1555
1556static bool
a481eed8 1557reg_available_p (const insn_info *insn, const vector_insn_info &info)
ec99ffab 1558{
a481eed8 1559 if (info.has_avl_reg () && !info.get_avl_source ())
44c918b5 1560 return false;
a481eed8
JZZ
1561 insn_info *def_insn = info.get_avl_source ()->insn ();
1562 if (def_insn->bb () == insn->bb ())
1563 return before_p (def_insn, insn);
ec99ffab 1564 else
a481eed8
JZZ
1565 return dominated_by_p (CDI_DOMINATORS, insn->bb ()->cfg_bb (),
1566 def_insn->bb ()->cfg_bb ());
ec99ffab
JZZ
1567}
1568
60bd33bc
JZZ
1569/* Return true if the instruction support relaxed compatible check. */
1570static bool
1571support_relaxed_compatible_p (const vector_insn_info &info1,
1572 const vector_insn_info &info2)
1573{
1574 if (fault_first_load_p (info1.get_insn ()->rtl ())
1575 && info2.demand_p (DEMAND_AVL) && info2.has_avl_reg ()
1576 && info2.get_avl_source () && info2.get_avl_source ()->insn ()->is_phi ())
1577 {
1578 hash_set<set_info *> sets
1579 = get_all_sets (info2.get_avl_source (), true, false, false);
1580 for (set_info *set : sets)
1581 {
1582 if (read_vl_insn_p (set->insn ()->rtl ()))
1583 {
1584 const insn_info *insn
1585 = get_backward_fault_first_load_insn (set->insn ());
1586 if (insn == info1.get_insn ())
1587 return info2.compatible_vtype_p (info1);
1588 }
1589 }
1590 }
1591 return false;
1592}
1593
1594/* Return true if the block is worthwhile backward propagation. */
1595static bool
1596backward_propagate_worthwhile_p (const basic_block cfg_bb,
1597 const vector_block_info block_info)
1598{
1599 if (loop_basic_block_p (cfg_bb))
1600 {
1601 if (block_info.reaching_out.valid_or_dirty_p ())
1602 {
1603 if (block_info.local_dem.compatible_p (block_info.reaching_out))
1604 {
1605 /* Case 1 (Can backward propagate):
1606 ....
1607 bb0:
1608 ...
1609 for (int i = 0; i < n; i++)
1610 {
1611 vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
1612 __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
1613 }
1614 The local_dem is compatible with reaching_out. Such case is
1615 worthwhile backward propagation. */
1616 return true;
1617 }
1618 else
1619 {
1620 if (support_relaxed_compatible_p (block_info.reaching_out,
1621 block_info.local_dem))
1622 return true;
1623 /* Case 2 (Don't backward propagate):
1624 ....
1625 bb0:
1626 ...
1627 for (int i = 0; i < n; i++)
1628 {
1629 vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
1630 __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
1631 vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8);
1632 __riscv_vse16_v_i16mf2 (out + i + 6, v, 8);
1633 }
1634 The local_dem is incompatible with reaching_out.
1635 It makes no sense to backward propagate the local_dem since we
1636 can't avoid VSETVL inside the loop. */
1637 return false;
1638 }
1639 }
1640 else
1641 {
1642 gcc_assert (block_info.reaching_out.unknown_p ());
1643 /* Case 3 (Don't backward propagate):
1644 ....
1645 bb0:
1646 ...
1647 for (int i = 0; i < n; i++)
1648 {
1649 vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
1650 __riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
1651 fn3 ();
1652 }
1653 The local_dem is VALID, but the reaching_out is UNKNOWN.
1654 It makes no sense to backward propagate the local_dem since we
1655 can't avoid VSETVL inside the loop. */
1656 return false;
1657 }
1658 }
1659
1660 return true;
1661}
1662
a2d12abe
JZZ
1663/* Count the number of REGNO in RINSN. */
1664static int
1665count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
1666{
1667 int count = 0;
1668 extract_insn (rinsn);
1669 for (int i = 0; i < recog_data.n_operands; i++)
1670 if (refers_to_regno_p (regno, recog_data.operand[i]))
1671 count++;
1672 return count;
1673}
1674
12b23c71
JZZ
1675avl_info::avl_info (const avl_info &other)
1676{
1677 m_value = other.get_value ();
1678 m_source = other.get_source ();
1679}
1680
9243c3d1
JZZ
1681avl_info::avl_info (rtx value_in, set_info *source_in)
1682 : m_value (value_in), m_source (source_in)
1683{}
1684
4f673c5e
JZZ
1685bool
1686avl_info::single_source_equal_p (const avl_info &other) const
1687{
1688 set_info *set1 = m_source;
1689 set_info *set2 = other.get_source ();
6b6b9c68
JZZ
1690 insn_info *insn1 = extract_single_source (set1);
1691 insn_info *insn2 = extract_single_source (set2);
1692 if (!insn1 || !insn2)
1693 return false;
1694 return source_equal_p (insn1, insn2);
1695}
1696
1697bool
1698avl_info::multiple_source_equal_p (const avl_info &other) const
1699{
d875d756
JZ
1700 /* When the def info is same in RTL_SSA namespace, it's safe
1701 to consider they are avl compatible. */
1702 if (m_source == other.get_source ())
1703 return true;
6b6b9c68 1704
d875d756
JZ
1705 /* We only consider handle PHI node. */
1706 if (!m_source->insn ()->is_phi () || !other.get_source ()->insn ()->is_phi ())
1707 return false;
6b6b9c68 1708
d875d756
JZ
1709 phi_info *phi1 = as_a<phi_info *> (m_source);
1710 phi_info *phi2 = as_a<phi_info *> (other.get_source ());
6b6b9c68 1711
d875d756
JZ
1712 if (phi1->is_degenerate () && phi2->is_degenerate ())
1713 {
1714 /* Degenerate PHI means the PHI node only have one input. */
1715
1716 /* If both PHI nodes have the same single input in use list.
1717 We consider they are AVL compatible. */
1718 if (phi1->input_value (0) == phi2->input_value (0))
1719 return true;
1720 }
1721 /* TODO: We can support more optimization cases in the future. */
1722 return false;
4f673c5e
JZZ
1723}
1724
9243c3d1
JZZ
1725avl_info &
1726avl_info::operator= (const avl_info &other)
1727{
1728 m_value = other.get_value ();
1729 m_source = other.get_source ();
1730 return *this;
1731}
1732
1733bool
1734avl_info::operator== (const avl_info &other) const
1735{
1736 if (!m_value)
1737 return !other.get_value ();
1738 if (!other.get_value ())
1739 return false;
1740
9243c3d1
JZZ
1741 if (GET_CODE (m_value) != GET_CODE (other.get_value ()))
1742 return false;
1743
1744 /* Handle CONST_INT AVL. */
1745 if (CONST_INT_P (m_value))
1746 return INTVAL (m_value) == INTVAL (other.get_value ());
1747
1748 /* Handle VLMAX AVL. */
1749 if (vlmax_avl_p (m_value))
1750 return vlmax_avl_p (other.get_value ());
1751
4f673c5e
JZZ
1752 /* If any source is undef value, we think they are not equal. */
1753 if (!m_source || !other.get_source ())
1754 return false;
1755
1756 /* If both sources are single source (defined by a single real RTL)
1757 and their definitions are same. */
1758 if (single_source_equal_p (other))
1759 return true;
1760
6b6b9c68 1761 return multiple_source_equal_p (other);
9243c3d1
JZZ
1762}
1763
1764bool
1765avl_info::operator!= (const avl_info &other) const
1766{
1767 return !(*this == other);
1768}
1769
ec99ffab
JZZ
1770bool
1771avl_info::has_non_zero_avl () const
1772{
1773 if (has_avl_imm ())
1774 return INTVAL (get_value ()) > 0;
1775 if (has_avl_reg ())
1776 return vlmax_avl_p (get_value ());
1777 return false;
1778}
1779
9243c3d1
JZZ
1780/* Initialize VL/VTYPE information. */
1781vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in,
1782 enum vlmul_type vlmul_in, uint8_t ratio_in,
1783 bool ta_in, bool ma_in)
1784 : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in),
1785 m_ta (ta_in), m_ma (ma_in)
1786{
1787 gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW");
1788}
1789
1790bool
1791vl_vtype_info::operator== (const vl_vtype_info &other) const
1792{
6b6b9c68 1793 return same_avl_p (other) && m_sew == other.get_sew ()
9243c3d1
JZZ
1794 && m_vlmul == other.get_vlmul () && m_ta == other.get_ta ()
1795 && m_ma == other.get_ma () && m_ratio == other.get_ratio ();
1796}
1797
1798bool
1799vl_vtype_info::operator!= (const vl_vtype_info &other) const
1800{
1801 return !(*this == other);
1802}
1803
9243c3d1
JZZ
1804bool
1805vl_vtype_info::same_avl_p (const vl_vtype_info &other) const
1806{
6b6b9c68
JZZ
1807 /* We need to compare both RTL and SET. If both AVL are CONST_INT.
1808 For example, const_int 3 and const_int 4, we need to compare
1809 RTL. If both AVL are REG and their REGNO are same, we need to
1810 compare SET. */
1811 return get_avl () == other.get_avl ()
1812 && get_avl_source () == other.get_avl_source ();
9243c3d1
JZZ
1813}
1814
1815bool
1816vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const
1817{
1818 return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul ()
1819 && get_ta () == other.get_ta () && get_ma () == other.get_ma ();
1820}
1821
1822bool
1823vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const
1824{
1825 return get_ratio () == other.get_ratio ();
1826}
1827
1828/* Compare the compatibility between Dem1 and Dem2.
1829 If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2
1830 meaning Dem1 is easier be compatible with others than Dem2
1831 or Dem2 is stricter than Dem1.
1832 For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */
9243c3d1
JZZ
1833bool
1834vector_insn_info::operator>= (const vector_insn_info &other) const
1835{
60bd33bc
JZZ
1836 if (support_relaxed_compatible_p (*this, other))
1837 {
1838 unsigned array_size = sizeof (unavailable_conds) / sizeof (demands_cond);
1839 /* Bypass AVL unavailable cases. */
1840 for (unsigned i = 2; i < array_size; i++)
1841 if (unavailable_conds[i].pair.match_cond_p (this->get_demands (),
1842 other.get_demands ())
1843 && unavailable_conds[i].incompatible_p (*this, other))
1844 return false;
1845 return true;
1846 }
1847
1848 if (!other.compatible_p (static_cast<const vl_vtype_info &> (*this)))
1849 return false;
1850 if (!this->compatible_p (static_cast<const vl_vtype_info &> (other)))
9243c3d1
JZZ
1851 return true;
1852
1853 if (*this == other)
1854 return true;
1855
ec99ffab
JZZ
1856 for (const auto &cond : unavailable_conds)
1857 if (cond.pair.match_cond_p (this->get_demands (), other.get_demands ())
1858 && cond.incompatible_p (*this, other))
1859 return false;
9243c3d1
JZZ
1860
1861 return true;
1862}
1863
1864bool
1865vector_insn_info::operator== (const vector_insn_info &other) const
1866{
1867 gcc_assert (!uninit_p () && !other.uninit_p ()
1868 && "Uninitialization should not happen");
1869
1870 /* Empty is only equal to another Empty. */
1871 if (empty_p ())
1872 return other.empty_p ();
1873 if (other.empty_p ())
1874 return empty_p ();
1875
1876 /* Unknown is only equal to another Unknown. */
1877 if (unknown_p ())
1878 return other.unknown_p ();
1879 if (other.unknown_p ())
1880 return unknown_p ();
1881
1882 for (size_t i = 0; i < NUM_DEMAND; i++)
1883 if (m_demands[i] != other.demand_p ((enum demand_type) i))
1884 return false;
1885
7ae4d1df
JZZ
1886 if (vector_config_insn_p (m_insn->rtl ())
1887 || vector_config_insn_p (other.get_insn ()->rtl ()))
1888 if (m_insn != other.get_insn ())
1889 return false;
9243c3d1
JZZ
1890
1891 if (!same_avl_p (other))
1892 return false;
1893
1894 /* If the full VTYPE is valid, check that it is the same. */
1895 return same_vtype_p (other);
1896}
1897
1898void
1899vector_insn_info::parse_insn (rtx_insn *rinsn)
1900{
1901 *this = vector_insn_info ();
1902 if (!NONDEBUG_INSN_P (rinsn))
1903 return;
20c85207
JZ
1904 if (optimize == 0 && !has_vtype_op (rinsn))
1905 return;
1906 if (optimize > 0 && !vsetvl_insn_p (rinsn))
9243c3d1
JZZ
1907 return;
1908 m_state = VALID;
1909 extract_insn_cached (rinsn);
20c85207 1910 rtx avl = ::get_avl (rinsn);
9243c3d1
JZZ
1911 m_avl = avl_info (avl, nullptr);
1912 m_sew = ::get_sew (rinsn);
1913 m_vlmul = ::get_vlmul (rinsn);
1914 m_ta = tail_agnostic_p (rinsn);
1915 m_ma = mask_agnostic_p (rinsn);
1916}
1917
1918void
1919vector_insn_info::parse_insn (insn_info *insn)
1920{
1921 *this = vector_insn_info ();
1922
1923 /* Return if it is debug insn for the consistency with optimize == 0. */
1924 if (insn->is_debug_insn ())
1925 return;
1926
1927 /* We set it as unknown since we don't what will happen in CALL or ASM. */
1928 if (insn->is_call () || insn->is_asm ())
1929 {
1930 set_unknown ();
1931 return;
1932 }
1933
1934 /* If this is something that updates VL/VTYPE that we don't know about, set
1935 the state to unknown. */
60bd33bc 1936 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())
9243c3d1
JZZ
1937 && (find_access (insn->defs (), VL_REGNUM)
1938 || find_access (insn->defs (), VTYPE_REGNUM)))
1939 {
1940 set_unknown ();
1941 return;
1942 }
1943
1944 if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
1945 return;
1946
1947 /* Warning: This function has to work on both the lowered (i.e. post
1948 emit_local_forward_vsetvls) and pre-lowering forms. The main implication
1949 of this is that it can't use the value of a SEW, VL, or Policy operand as
1950 they might be stale after lowering. */
1951 vl_vtype_info::operator= (get_vl_vtype_info (insn));
1952 m_insn = insn;
1953 m_state = VALID;
1954 if (vector_config_insn_p (insn->rtl ()))
1955 {
1956 m_demands[DEMAND_AVL] = true;
1957 m_demands[DEMAND_RATIO] = true;
1958 return;
1959 }
1960
1961 if (has_vl_op (insn->rtl ()))
1962 m_demands[DEMAND_AVL] = true;
1963
1964 if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
1965 m_demands[DEMAND_RATIO] = true;
1966 else
1967 {
1968 /* TODO: By default, if it doesn't demand RATIO, we set it
1969 demand SEW && LMUL both. Some instructions may demand SEW
1970 only and ignore LMUL, will fix it later. */
1971 m_demands[DEMAND_SEW] = true;
ec99ffab
JZZ
1972 if (!ignore_vlmul_insn_p (insn->rtl ()))
1973 m_demands[DEMAND_LMUL] = true;
9243c3d1
JZZ
1974 }
1975
1976 if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE)
1977 m_demands[DEMAND_TAIL_POLICY] = true;
1978 if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE)
1979 m_demands[DEMAND_MASK_POLICY] = true;
6b6b9c68
JZZ
1980
1981 if (vector_config_insn_p (insn->rtl ()))
1982 return;
1983
ec99ffab
JZZ
1984 if (scalar_move_insn_p (insn->rtl ()))
1985 {
1986 if (m_avl.has_non_zero_avl ())
1987 m_demands[DEMAND_NONZERO_AVL] = true;
1988 if (m_ta)
1989 m_demands[DEMAND_GE_SEW] = true;
1990 }
1991
1992 if (!m_avl.has_avl_reg () || vlmax_avl_p (get_avl ()) || !m_avl.get_source ())
1993 return;
1994 if (!m_avl.get_source ()->insn ()->is_real ()
1995 && !m_avl.get_source ()->insn ()->is_phi ())
6b6b9c68
JZZ
1996 return;
1997
1998 insn_info *def_insn = extract_single_source (m_avl.get_source ());
ec99ffab
JZZ
1999 if (!def_insn || !vsetvl_insn_p (def_insn->rtl ()))
2000 return;
9243c3d1 2001
ec99ffab
JZZ
2002 vector_insn_info new_info;
2003 new_info.parse_insn (def_insn);
2004 if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ()))
2005 return;
9326a49c
JZ
2006
2007 if (new_info.has_avl ())
2008 {
2009 if (new_info.has_avl_imm ())
2010 set_avl_info (avl_info (new_info.get_avl (), nullptr));
2011 else
2012 {
2013 if (vlmax_avl_p (new_info.get_avl ()))
2014 set_avl_info (avl_info (new_info.get_avl (), get_avl_source ()));
2015 else
2016 {
2017 /* Conservatively propagate non-VLMAX AVL of user vsetvl:
2018 1. The user vsetvl should be same block with the rvv insn.
2019 2. The user vsetvl is the only def insn of rvv insn.
2020 3. The AVL is not modified between def-use chain.
2021 4. The VL is only used by insn within EBB.
2022 */
2023 bool modified_p = false;
2024 for (insn_info *i = def_insn->next_nondebug_insn ();
2025 real_insn_and_same_bb_p (i, get_insn ()->bb ());
2026 i = i->next_nondebug_insn ())
2027 {
c26f2758
JZ
2028 /* Consider this following sequence:
2029
2030 insn 1: vsetvli a5,a3,e8,mf4,ta,mu
2031 insn 2: vsetvli zero,a5,e32,m1,ta,ma
2032 ...
2033 vle32.v v1,0(a1)
2034 vsetvli a2,zero,e32,m1,ta,ma
2035 vadd.vv v1,v1,v1
2036 vsetvli zero,a5,e32,m1,ta,ma
2037 vse32.v v1,0(a0)
2038 ...
2039 insn 3: sub a3,a3,a5
2040 ...
2041
2042 We can local AVL propagate "a3" from insn 1 to insn 2
2043 if no insns between insn 1 and insn 2 modify "a3 even
2044 though insn 3 modifies "a3".
2045 Otherwise, we can't perform local AVL propagation.
2046
2047 Early break if we reach the insn 2. */
2048 if (!before_p (i, insn))
2049 break;
9326a49c
JZ
2050 if (find_access (i->defs (), REGNO (new_info.get_avl ())))
2051 {
2052 modified_p = true;
2053 break;
2054 }
2055 }
2056
2057 bool has_live_out_use = false;
2058 for (use_info *use : m_avl.get_source ()->all_uses ())
2059 {
2060 if (use->is_live_out_use ())
2061 {
2062 has_live_out_use = true;
2063 break;
2064 }
2065 }
2066 if (!modified_p && !has_live_out_use
2067 && def_insn == m_avl.get_source ()->insn ()
2068 && m_insn->bb () == def_insn->bb ())
2069 set_avl_info (new_info.get_avl_info ());
2070 }
2071 }
2072 }
ec99ffab
JZZ
2073
2074 if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ())
2075 m_demands[DEMAND_NONZERO_AVL] = true;
9243c3d1
JZZ
2076}
2077
2078bool
2079vector_insn_info::compatible_p (const vector_insn_info &other) const
2080{
2081 gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
2082 && "Can't compare invalid demanded infos");
2083
ec99ffab 2084 for (const auto &cond : incompatible_conds)
60bd33bc 2085 if (cond.dual_incompatible_p (*this, other))
ec99ffab 2086 return false;
9243c3d1
JZZ
2087 return true;
2088}
2089
d51f2456
JZ
2090bool
2091vector_insn_info::skip_avl_compatible_p (const vector_insn_info &other) const
2092{
2093 gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
2094 && "Can't compare invalid demanded infos");
2095 unsigned array_size = sizeof (incompatible_conds) / sizeof (demands_cond);
2096 /* Bypass AVL incompatible cases. */
2097 for (unsigned i = 1; i < array_size; i++)
2098 if (incompatible_conds[i].dual_incompatible_p (*this, other))
2099 return false;
2100 return true;
2101}
2102
9243c3d1
JZZ
2103bool
2104vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const
2105{
2106 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2107 gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
2108 if (!demand_p (DEMAND_AVL))
2109 return true;
ec99ffab
JZZ
2110 if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ())
2111 return true;
9243c3d1
JZZ
2112 return get_avl_info () == other.get_avl_info ();
2113}
2114
4f673c5e
JZZ
2115bool
2116vector_insn_info::compatible_avl_p (const avl_info &other) const
2117{
2118 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2119 gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
2120 gcc_assert (demand_p (DEMAND_AVL) && "Can't compare AVL undemand state");
ec99ffab
JZZ
2121 if (!demand_p (DEMAND_AVL))
2122 return true;
2123 if (demand_p (DEMAND_NONZERO_AVL) && other.has_non_zero_avl ())
2124 return true;
4f673c5e
JZZ
2125 return get_avl_info () == other;
2126}
2127
9243c3d1
JZZ
2128bool
2129vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const
2130{
2131 gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
2132 gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state");
ec99ffab
JZZ
2133 if (demand_p (DEMAND_SEW))
2134 {
2135 if (!demand_p (DEMAND_GE_SEW) && m_sew != other.get_sew ())
2136 return false;
2137 if (demand_p (DEMAND_GE_SEW) && m_sew > other.get_sew ())
2138 return false;
2139 }
9243c3d1
JZZ
2140 if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ())
2141 return false;
2142 if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ())
2143 return false;
2144 if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ())
2145 return false;
2146 if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ())
2147 return false;
2148 return true;
2149}
2150
2151/* Determine whether the vector instructions requirements represented by
2152 Require are compatible with the previous vsetvli instruction represented
2153 by this. INSN is the instruction whose requirements we're considering. */
2154bool
2155vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const
2156{
2157 gcc_assert (!uninit_p () && "Can't handle uninitialized info");
2158 if (empty_p ())
2159 return false;
2160
2161 /* Nothing is compatible with Unknown. */
2162 if (unknown_p ())
2163 return false;
2164
2165 /* If the instruction doesn't need an AVLReg and the SEW matches, consider
2166 it compatible. */
2167 if (!demand_p (DEMAND_AVL))
2168 if (m_sew == curr_info.get_sew ())
2169 return true;
2170
2171 return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info);
2172}
2173
6b6b9c68
JZZ
2174bool
2175vector_insn_info::available_p (const vector_insn_info &other) const
2176{
ec99ffab
JZZ
2177 return *this >= other;
2178}
2179
2180void
2181vector_insn_info::fuse_avl (const vector_insn_info &info1,
2182 const vector_insn_info &info2)
2183{
2184 set_insn (info1.get_insn ());
2185 if (info1.demand_p (DEMAND_AVL))
2186 {
2187 if (info1.demand_p (DEMAND_NONZERO_AVL))
2188 {
2189 if (info2.demand_p (DEMAND_AVL)
2190 && !info2.demand_p (DEMAND_NONZERO_AVL))
2191 {
2192 set_avl_info (info2.get_avl_info ());
2193 set_demand (DEMAND_AVL, true);
2194 set_demand (DEMAND_NONZERO_AVL, false);
2195 return;
2196 }
2197 }
2198 set_avl_info (info1.get_avl_info ());
2199 set_demand (DEMAND_NONZERO_AVL, info1.demand_p (DEMAND_NONZERO_AVL));
2200 }
2201 else
2202 {
2203 set_avl_info (info2.get_avl_info ());
2204 set_demand (DEMAND_NONZERO_AVL, info2.demand_p (DEMAND_NONZERO_AVL));
2205 }
2206 set_demand (DEMAND_AVL,
2207 info1.demand_p (DEMAND_AVL) || info2.demand_p (DEMAND_AVL));
2208}
2209
2210void
2211vector_insn_info::fuse_sew_lmul (const vector_insn_info &info1,
2212 const vector_insn_info &info2)
2213{
2214 /* We need to fuse sew && lmul according to demand info:
2215
2216 1. GE_SEW.
2217 2. SEW.
2218 3. LMUL.
2219 4. RATIO. */
2220 if (same_sew_lmul_demand_p (info1.get_demands (), info2.get_demands ()))
2221 {
2222 set_demand (DEMAND_SEW, info2.demand_p (DEMAND_SEW));
2223 set_demand (DEMAND_LMUL, info2.demand_p (DEMAND_LMUL));
2224 set_demand (DEMAND_RATIO, info2.demand_p (DEMAND_RATIO));
2225 set_demand (DEMAND_GE_SEW, info2.demand_p (DEMAND_GE_SEW));
2226 set_sew (info2.get_sew ());
2227 set_vlmul (info2.get_vlmul ());
2228 set_ratio (info2.get_ratio ());
2229 return;
2230 }
2231 for (const auto &rule : fuse_rules)
2232 {
2233 if (rule.pair.match_cond_p (info1.get_demands (), info2.get_demands ()))
2234 {
2235 set_demand (DEMAND_SEW, rule.demand_sew_p);
2236 set_demand (DEMAND_LMUL, rule.demand_lmul_p);
2237 set_demand (DEMAND_RATIO, rule.demand_ratio_p);
2238 set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p);
2239 set_sew (rule.new_sew (info1, info2));
2240 set_vlmul (rule.new_vlmul (info1, info2));
2241 set_ratio (rule.new_ratio (info1, info2));
2242 return;
2243 }
2244 if (rule.pair.match_cond_p (info2.get_demands (), info1.get_demands ()))
2245 {
2246 set_demand (DEMAND_SEW, rule.demand_sew_p);
2247 set_demand (DEMAND_LMUL, rule.demand_lmul_p);
2248 set_demand (DEMAND_RATIO, rule.demand_ratio_p);
2249 set_demand (DEMAND_GE_SEW, rule.demand_ge_sew_p);
2250 set_sew (rule.new_sew (info2, info1));
2251 set_vlmul (rule.new_vlmul (info2, info1));
2252 set_ratio (rule.new_ratio (info2, info1));
2253 return;
2254 }
2255 }
2256 gcc_unreachable ();
2257}
2258
2259void
2260vector_insn_info::fuse_tail_policy (const vector_insn_info &info1,
2261 const vector_insn_info &info2)
2262{
2263 if (info1.demand_p (DEMAND_TAIL_POLICY))
2264 {
2265 set_ta (info1.get_ta ());
2266 demand (DEMAND_TAIL_POLICY);
2267 }
2268 else if (info2.demand_p (DEMAND_TAIL_POLICY))
2269 {
2270 set_ta (info2.get_ta ());
2271 demand (DEMAND_TAIL_POLICY);
2272 }
2273 else
2274 set_ta (get_default_ta ());
2275}
2276
2277void
2278vector_insn_info::fuse_mask_policy (const vector_insn_info &info1,
2279 const vector_insn_info &info2)
2280{
2281 if (info1.demand_p (DEMAND_MASK_POLICY))
2282 {
2283 set_ma (info1.get_ma ());
2284 demand (DEMAND_MASK_POLICY);
2285 }
2286 else if (info2.demand_p (DEMAND_MASK_POLICY))
2287 {
2288 set_ma (info2.get_ma ());
2289 demand (DEMAND_MASK_POLICY);
2290 }
2291 else
2292 set_ma (get_default_ma ());
6b6b9c68
JZZ
2293}
2294
9243c3d1
JZZ
2295vector_insn_info
2296vector_insn_info::merge (const vector_insn_info &merge_info,
d51f2456 2297 enum merge_type type) const
9243c3d1 2298{
6b6b9c68
JZZ
2299 if (!vsetvl_insn_p (get_insn ()->rtl ()))
2300 gcc_assert (this->compatible_p (merge_info)
2301 && "Can't merge incompatible demanded infos");
9243c3d1
JZZ
2302
2303 vector_insn_info new_info;
ec99ffab 2304 new_info.set_valid ();
4f673c5e 2305 if (type == LOCAL_MERGE)
9243c3d1 2306 {
4f673c5e 2307 /* For local backward data flow, we always update INSN && AVL as the
ec99ffab
JZZ
2308 latest INSN and AVL so that we can keep track status of each INSN. */
2309 new_info.fuse_avl (merge_info, *this);
9243c3d1
JZZ
2310 }
2311 else
2312 {
4f673c5e 2313 /* For global data flow, we should keep original INSN and AVL if they
ec99ffab 2314 valid since we should keep the life information of each block.
9243c3d1 2315
ec99ffab
JZZ
2316 For example:
2317 bb 0 -> bb 1.
2318 We should keep INSN && AVL of bb 1 since we will eventually emit
2319 vsetvl instruction according to INSN and AVL of bb 1. */
2320 new_info.fuse_avl (*this, merge_info);
2321 }
9243c3d1 2322
ec99ffab
JZZ
2323 new_info.fuse_sew_lmul (*this, merge_info);
2324 new_info.fuse_tail_policy (*this, merge_info);
2325 new_info.fuse_mask_policy (*this, merge_info);
9243c3d1
JZZ
2326 return new_info;
2327}
2328
60bd33bc
JZZ
2329bool
2330vector_insn_info::update_fault_first_load_avl (insn_info *insn)
2331{
2332 // Update AVL to vl-output of the fault first load.
2333 const insn_info *read_vl = get_forward_read_vl_insn (insn);
2334 if (read_vl)
2335 {
2336 rtx vl = SET_DEST (PATTERN (read_vl->rtl ()));
2337 def_info *def = find_access (read_vl->defs (), REGNO (vl));
2338 set_info *set = safe_dyn_cast<set_info *> (def);
2339 set_avl_info (avl_info (vl, set));
2340 set_insn (insn);
2341 return true;
2342 }
2343 return false;
2344}
2345
ea7a9f36
KC
2346static const char *
2347vlmul_to_str (vlmul_type vlmul)
2348{
2349 switch (vlmul)
2350 {
2351 case LMUL_1:
2352 return "m1";
2353 case LMUL_2:
2354 return "m2";
2355 case LMUL_4:
2356 return "m4";
2357 case LMUL_8:
2358 return "m8";
2359 case LMUL_RESERVED:
2360 return "INVALID LMUL";
2361 case LMUL_F8:
2362 return "mf8";
2363 case LMUL_F4:
2364 return "mf4";
2365 case LMUL_F2:
2366 return "mf2";
2367
2368 default:
2369 gcc_unreachable ();
2370 }
2371}
2372
2373static const char *
2374policy_to_str (bool agnostic_p)
2375{
2376 return agnostic_p ? "agnostic" : "undisturbed";
2377}
2378
9243c3d1
JZZ
2379void
2380vector_insn_info::dump (FILE *file) const
2381{
2382 fprintf (file, "[");
2383 if (uninit_p ())
2384 fprintf (file, "UNINITIALIZED,");
2385 else if (valid_p ())
2386 fprintf (file, "VALID,");
2387 else if (unknown_p ())
2388 fprintf (file, "UNKNOWN,");
2389 else if (empty_p ())
2390 fprintf (file, "EMPTY,");
6b6b9c68
JZZ
2391 else if (hard_empty_p ())
2392 fprintf (file, "HARD_EMPTY,");
4f673c5e
JZZ
2393 else if (dirty_with_killed_avl_p ())
2394 fprintf (file, "DIRTY_WITH_KILLED_AVL,");
9243c3d1
JZZ
2395 else
2396 fprintf (file, "DIRTY,");
2397
2398 fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL));
ec99ffab 2399 fprintf (file, "%d(DEMAND_NONZERO_AVL),", demand_p (DEMAND_NONZERO_AVL));
9243c3d1 2400 fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW));
ec99ffab 2401 fprintf (file, "%d(DEMAND_GE_SEW),", demand_p (DEMAND_GE_SEW));
9243c3d1
JZZ
2402 fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL));
2403 fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO));
2404 fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY));
2405 fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY));
2406
2407 fprintf (file, "AVL=");
2408 print_rtl_single (file, get_avl ());
2409 fprintf (file, "SEW=%d,", get_sew ());
ea7a9f36 2410 fprintf (file, "VLMUL=%s,", vlmul_to_str (get_vlmul ()));
9243c3d1 2411 fprintf (file, "RATIO=%d,", get_ratio ());
ea7a9f36
KC
2412 fprintf (file, "TAIL_POLICY=%s,", policy_to_str (get_ta ()));
2413 fprintf (file, "MASK_POLICY=%s", policy_to_str (get_ma ()));
9243c3d1
JZZ
2414 fprintf (file, "]\n");
2415
2416 if (valid_p ())
2417 {
2418 if (get_insn ())
2419 {
12b23c71
JZZ
2420 fprintf (file, "The real INSN=");
2421 print_rtl_single (file, get_insn ()->rtl ());
9243c3d1 2422 }
9243c3d1
JZZ
2423 }
2424}
2425
2426vector_infos_manager::vector_infos_manager ()
2427{
2428 vector_edge_list = nullptr;
2429 vector_kill = nullptr;
2430 vector_del = nullptr;
2431 vector_insert = nullptr;
2432 vector_antic = nullptr;
2433 vector_transp = nullptr;
2434 vector_comp = nullptr;
2435 vector_avin = nullptr;
2436 vector_avout = nullptr;
2437 vector_insn_infos.safe_grow (get_max_uid ());
2438 vector_block_infos.safe_grow (last_basic_block_for_fn (cfun));
2439 if (!optimize)
2440 {
2441 basic_block cfg_bb;
2442 rtx_insn *rinsn;
2443 FOR_ALL_BB_FN (cfg_bb, cfun)
2444 {
2445 vector_block_infos[cfg_bb->index].local_dem = vector_insn_info ();
2446 vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info ();
2447 FOR_BB_INSNS (cfg_bb, rinsn)
2448 vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn);
2449 }
2450 }
2451 else
2452 {
2453 for (const bb_info *bb : crtl->ssa->bbs ())
2454 {
2455 vector_block_infos[bb->index ()].local_dem = vector_insn_info ();
2456 vector_block_infos[bb->index ()].reaching_out = vector_insn_info ();
2457 for (insn_info *insn : bb->real_insns ())
2458 vector_insn_infos[insn->uid ()].parse_insn (insn);
acc10c79 2459 vector_block_infos[bb->index ()].probability = profile_probability ();
9243c3d1
JZZ
2460 }
2461 }
2462}
2463
2464void
2465vector_infos_manager::create_expr (vector_insn_info &info)
2466{
2467 for (size_t i = 0; i < vector_exprs.length (); i++)
2468 if (*vector_exprs[i] == info)
2469 return;
2470 vector_exprs.safe_push (&info);
2471}
2472
2473size_t
2474vector_infos_manager::get_expr_id (const vector_insn_info &info) const
2475{
2476 for (size_t i = 0; i < vector_exprs.length (); i++)
2477 if (*vector_exprs[i] == info)
2478 return i;
2479 gcc_unreachable ();
2480}
2481
2482auto_vec<size_t>
2483vector_infos_manager::get_all_available_exprs (
2484 const vector_insn_info &info) const
2485{
2486 auto_vec<size_t> available_list;
2487 for (size_t i = 0; i < vector_exprs.length (); i++)
6b6b9c68 2488 if (info.available_p (*vector_exprs[i]))
9243c3d1
JZZ
2489 available_list.safe_push (i);
2490 return available_list;
2491}
2492
d06e9264
JZ
2493bool
2494vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const
2495{
2496 hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb);
2497 for (const basic_block pred_cfg_bb : pred_cfg_bbs)
2498 {
2499 const auto &pred_block_info = vector_block_infos[pred_cfg_bb->index];
2500 if (!pred_block_info.local_dem.valid_or_dirty_p ()
2501 && !pred_block_info.reaching_out.valid_or_dirty_p ())
2502 continue;
2503 return false;
2504 }
2505 return true;
2506}
2507
9243c3d1
JZZ
2508bool
2509vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const
2510{
2511 if (bitmap_empty_p (bitdata))
2512 return false;
2513
2514 int ratio = -1;
2515 unsigned int bb_index;
2516 sbitmap_iterator sbi;
2517
2518 EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
2519 {
2520 if (ratio == -1)
2521 ratio = vector_exprs[bb_index]->get_ratio ();
2522 else if (vector_exprs[bb_index]->get_ratio () != ratio)
2523 return false;
2524 }
2525 return true;
2526}
2527
ff8f9544
JZ
2528/* Return TRUE if the incoming vector configuration state
2529 to CFG_BB is compatible with the vector configuration
2530 state in CFG_BB, FALSE otherwise. */
2531bool
2532vector_infos_manager::all_avail_in_compatible_p (const basic_block cfg_bb) const
2533{
2534 const auto &info = vector_block_infos[cfg_bb->index].local_dem;
2535 sbitmap avin = vector_avin[cfg_bb->index];
2536 unsigned int bb_index;
2537 sbitmap_iterator sbi;
2538 EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
2539 {
2540 const auto &avin_info
2541 = static_cast<const vl_vtype_info &> (*vector_exprs[bb_index]);
2542 if (!info.compatible_p (avin_info))
2543 return false;
2544 }
2545 return true;
2546}
2547
005fad9d
JZZ
2548bool
2549vector_infos_manager::all_same_avl_p (const basic_block cfg_bb,
2550 sbitmap bitdata) const
2551{
2552 if (bitmap_empty_p (bitdata))
2553 return false;
2554
2555 const auto &block_info = vector_block_infos[cfg_bb->index];
2556 if (!block_info.local_dem.demand_p (DEMAND_AVL))
2557 return true;
2558
2559 avl_info avl = block_info.local_dem.get_avl_info ();
2560 unsigned int bb_index;
2561 sbitmap_iterator sbi;
2562
2563 EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
2564 {
2565 if (vector_exprs[bb_index]->get_avl_info () != avl)
2566 return false;
2567 }
2568 return true;
2569}
2570
9243c3d1
JZZ
2571size_t
2572vector_infos_manager::expr_set_num (sbitmap bitdata) const
2573{
2574 size_t count = 0;
2575 for (size_t i = 0; i < vector_exprs.length (); i++)
2576 if (bitmap_bit_p (bitdata, i))
2577 count++;
2578 return count;
2579}
2580
2581void
2582vector_infos_manager::release (void)
2583{
2584 if (!vector_insn_infos.is_empty ())
2585 vector_insn_infos.release ();
2586 if (!vector_block_infos.is_empty ())
2587 vector_block_infos.release ();
2588 if (!vector_exprs.is_empty ())
2589 vector_exprs.release ();
2590
ec99ffab
JZZ
2591 gcc_assert (to_refine_vsetvls.is_empty ());
2592 gcc_assert (to_delete_vsetvls.is_empty ());
9243c3d1 2593 if (optimize > 0)
cfe3fbc6
JZZ
2594 free_bitmap_vectors ();
2595}
2596
2597void
2598vector_infos_manager::create_bitmap_vectors (void)
2599{
2600 /* Create the bitmap vectors. */
2601 vector_antic = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2602 vector_exprs.length ());
2603 vector_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2604 vector_exprs.length ());
2605 vector_comp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2606 vector_exprs.length ());
2607 vector_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2608 vector_exprs.length ());
2609 vector_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2610 vector_exprs.length ());
2611 vector_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
2612 vector_exprs.length ());
2613
2614 bitmap_vector_ones (vector_transp, last_basic_block_for_fn (cfun));
2615 bitmap_vector_clear (vector_antic, last_basic_block_for_fn (cfun));
2616 bitmap_vector_clear (vector_comp, last_basic_block_for_fn (cfun));
2617}
2618
2619void
2620vector_infos_manager::free_bitmap_vectors (void)
2621{
2622 /* Finished. Free up all the things we've allocated. */
2623 free_edge_list (vector_edge_list);
2624 if (vector_del)
2625 sbitmap_vector_free (vector_del);
2626 if (vector_insert)
2627 sbitmap_vector_free (vector_insert);
2628 if (vector_kill)
2629 sbitmap_vector_free (vector_kill);
2630 if (vector_antic)
2631 sbitmap_vector_free (vector_antic);
2632 if (vector_transp)
2633 sbitmap_vector_free (vector_transp);
2634 if (vector_comp)
2635 sbitmap_vector_free (vector_comp);
2636 if (vector_avin)
2637 sbitmap_vector_free (vector_avin);
2638 if (vector_avout)
2639 sbitmap_vector_free (vector_avout);
2640
2641 vector_edge_list = nullptr;
2642 vector_kill = nullptr;
2643 vector_del = nullptr;
2644 vector_insert = nullptr;
2645 vector_antic = nullptr;
2646 vector_transp = nullptr;
2647 vector_comp = nullptr;
2648 vector_avin = nullptr;
2649 vector_avout = nullptr;
9243c3d1
JZZ
2650}
2651
2652void
2653vector_infos_manager::dump (FILE *file) const
2654{
2655 basic_block cfg_bb;
2656 rtx_insn *rinsn;
2657
2658 fprintf (file, "\n");
2659 FOR_ALL_BB_FN (cfg_bb, cfun)
2660 {
2661 fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index);
2662 fprintf (file, "<HEADER>=");
2663 vector_block_infos[cfg_bb->index].local_dem.dump (file);
2664 FOR_BB_INSNS (cfg_bb, rinsn)
2665 {
2666 if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn))
2667 continue;
2668 fprintf (file, "<insn %d>=", INSN_UID (rinsn));
2669 const auto &info = vector_insn_infos[INSN_UID (rinsn)];
2670 info.dump (file);
2671 }
2672 fprintf (file, "<FOOTER>=");
2673 vector_block_infos[cfg_bb->index].reaching_out.dump (file);
acc10c79
JZZ
2674 fprintf (file, "<Probability>=");
2675 vector_block_infos[cfg_bb->index].probability.dump (file);
9243c3d1
JZZ
2676 fprintf (file, "\n\n");
2677 }
2678
2679 fprintf (file, "\n");
2680 FOR_ALL_BB_FN (cfg_bb, cfun)
2681 {
2682 fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index);
2683
2684 fprintf (file, "<ANTLOC>=");
2685 if (vector_antic == nullptr)
2686 fprintf (file, "(nil)\n");
2687 else
2688 dump_bitmap_file (file, vector_antic[cfg_bb->index]);
2689
2690 fprintf (file, "<AVLOC>=");
2691 if (vector_comp == nullptr)
2692 fprintf (file, "(nil)\n");
2693 else
2694 dump_bitmap_file (file, vector_comp[cfg_bb->index]);
2695
2696 fprintf (file, "<TRANSP>=");
2697 if (vector_transp == nullptr)
2698 fprintf (file, "(nil)\n");
2699 else
2700 dump_bitmap_file (file, vector_transp[cfg_bb->index]);
2701
2702 fprintf (file, "<KILL>=");
2703 if (vector_kill == nullptr)
2704 fprintf (file, "(nil)\n");
2705 else
2706 dump_bitmap_file (file, vector_kill[cfg_bb->index]);
2707 }
2708
2709 fprintf (file, "\n");
2710 FOR_ALL_BB_FN (cfg_bb, cfun)
2711 {
2712 fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n",
2713 cfg_bb->index);
2714
2715 fprintf (file, "<AVIN>=");
2716 if (vector_avin == nullptr)
2717 fprintf (file, "(nil)\n");
2718 else
2719 dump_bitmap_file (file, vector_avin[cfg_bb->index]);
2720
2721 fprintf (file, "<AVOUT>=");
2722 if (vector_avout == nullptr)
2723 fprintf (file, "(nil)\n");
2724 else
2725 dump_bitmap_file (file, vector_avout[cfg_bb->index]);
2726
2727 fprintf (file, "<DELETE>=");
2728 if (vector_del == nullptr)
2729 fprintf (file, "(nil)\n");
2730 else
2731 dump_bitmap_file (file, vector_del[cfg_bb->index]);
2732 }
2733
2734 fprintf (file, "\nGlobal LCM (Lazy code motion) INSERT info:\n");
2735 for (size_t i = 0; i < vector_exprs.length (); i++)
2736 {
2737 for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++)
2738 {
2739 edge eg = INDEX_EDGE (vector_edge_list, ed);
2740 if (bitmap_bit_p (vector_insert[ed], i))
2741 fprintf (dump_file,
2742 "INSERT edge %d from bb %d to bb %d for VSETVL "
2743 "expr[%ld]\n",
2744 ed, eg->src->index, eg->dest->index, i);
2745 }
2746 }
2747}
2748
2749const pass_data pass_data_vsetvl = {
2750 RTL_PASS, /* type */
2751 "vsetvl", /* name */
2752 OPTGROUP_NONE, /* optinfo_flags */
2753 TV_NONE, /* tv_id */
2754 0, /* properties_required */
2755 0, /* properties_provided */
2756 0, /* properties_destroyed */
2757 0, /* todo_flags_start */
2758 0, /* todo_flags_finish */
2759};
2760
2761class pass_vsetvl : public rtl_opt_pass
2762{
2763private:
c139f5e1
KC
2764 vector_infos_manager *m_vector_manager;
2765
2766 const vector_insn_info &get_vector_info (const rtx_insn *) const;
2767 const vector_insn_info &get_vector_info (const insn_info *) const;
2768 const vector_block_info &get_block_info (const basic_block) const;
2769 const vector_block_info &get_block_info (const bb_info *) const;
2770 vector_block_info &get_block_info (const basic_block);
2771 vector_block_info &get_block_info (const bb_info *);
2772 void update_vector_info (const insn_info *, const vector_insn_info &);
9243c3d1
JZZ
2773
2774 void simple_vsetvl (void) const;
2775 void lazy_vsetvl (void);
2776
2777 /* Phase 1. */
2778 void compute_local_backward_infos (const bb_info *);
2779
2780 /* Phase 2. */
2781 bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const;
2782 void transfer_before (vector_insn_info &, insn_info *) const;
2783 void transfer_after (vector_insn_info &, insn_info *) const;
2784 void emit_local_forward_vsetvls (const bb_info *);
2785
2786 /* Phase 3. */
4f673c5e
JZZ
2787 enum fusion_type get_backward_fusion_type (const bb_info *,
2788 const vector_insn_info &);
6b6b9c68 2789 bool hard_empty_block_p (const bb_info *, const vector_insn_info &) const;
387cd9d3
JZZ
2790 bool backward_demand_fusion (void);
2791 bool forward_demand_fusion (void);
6b6b9c68 2792 bool cleanup_illegal_dirty_blocks (void);
387cd9d3 2793 void demand_fusion (void);
9243c3d1
JZZ
2794
2795 /* Phase 4. */
2796 void prune_expressions (void);
2797 void compute_local_properties (void);
6b6b9c68 2798 bool can_refine_vsetvl_p (const basic_block, const vector_insn_info &) const;
9243c3d1
JZZ
2799 void refine_vsetvls (void) const;
2800 void cleanup_vsetvls (void);
2801 bool commit_vsetvls (void);
2802 void pre_vsetvl (void);
2803
2804 /* Phase 5. */
c919d059
KC
2805 rtx_insn *get_vsetvl_at_end (const bb_info *, vector_insn_info *) const;
2806 void local_eliminate_vsetvl_insn (const bb_info *) const;
20c85207
JZ
2807 bool global_eliminate_vsetvl_insn (const bb_info *) const;
2808 void ssa_post_optimization (void) const;
9243c3d1 2809
6b6b9c68 2810 /* Phase 6. */
20c85207 2811 void df_post_optimization (void) const;
6b6b9c68 2812
9243c3d1
JZZ
2813 void init (void);
2814 void done (void);
acc10c79 2815 void compute_probabilities (void);
9243c3d1
JZZ
2816
2817public:
2818 pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
2819
2820 /* opt_pass methods: */
2821 virtual bool gate (function *) final override { return TARGET_VECTOR; }
2822 virtual unsigned int execute (function *) final override;
2823}; // class pass_vsetvl
2824
c139f5e1
KC
2825const vector_insn_info &
2826pass_vsetvl::get_vector_info (const rtx_insn *i) const
2827{
2828 return m_vector_manager->vector_insn_infos[INSN_UID (i)];
2829}
2830
2831const vector_insn_info &
2832pass_vsetvl::get_vector_info (const insn_info *i) const
2833{
2834 return m_vector_manager->vector_insn_infos[i->uid ()];
2835}
2836
2837const vector_block_info &
2838pass_vsetvl::get_block_info (const basic_block bb) const
2839{
2840 return m_vector_manager->vector_block_infos[bb->index];
2841}
2842
2843const vector_block_info &
2844pass_vsetvl::get_block_info (const bb_info *bb) const
2845{
2846 return m_vector_manager->vector_block_infos[bb->index ()];
2847}
2848
2849vector_block_info &
2850pass_vsetvl::get_block_info (const basic_block bb)
2851{
2852 return m_vector_manager->vector_block_infos[bb->index];
2853}
2854
2855vector_block_info &
2856pass_vsetvl::get_block_info (const bb_info *bb)
2857{
2858 return m_vector_manager->vector_block_infos[bb->index ()];
2859}
2860
2861void
2862pass_vsetvl::update_vector_info (const insn_info *i,
2863 const vector_insn_info &new_info)
2864{
2865 m_vector_manager->vector_insn_infos[i->uid ()] = new_info;
2866}
2867
9243c3d1
JZZ
2868/* Simple m_vsetvl_insert vsetvl for optimize == 0. */
2869void
2870pass_vsetvl::simple_vsetvl (void) const
2871{
2872 if (dump_file)
2873 fprintf (dump_file,
2874 "\nEntering Simple VSETVL PASS and Handling %d basic blocks for "
2875 "function:%s\n",
2876 n_basic_blocks_for_fn (cfun), function_name (cfun));
2877
2878 basic_block cfg_bb;
2879 rtx_insn *rinsn;
2880 FOR_ALL_BB_FN (cfg_bb, cfun)
2881 {
2882 FOR_BB_INSNS (cfg_bb, rinsn)
2883 {
2884 if (!NONDEBUG_INSN_P (rinsn))
2885 continue;
2886 if (has_vtype_op (rinsn))
2887 {
c139f5e1 2888 const auto info = get_vector_info (rinsn);
9243c3d1
JZZ
2889 emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info,
2890 NULL_RTX, rinsn);
2891 }
2892 }
2893 }
2894}
2895
2896/* Compute demanded information by backward data-flow analysis. */
2897void
2898pass_vsetvl::compute_local_backward_infos (const bb_info *bb)
2899{
2900 vector_insn_info change;
2901 change.set_empty ();
2902
2903 auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
2904 block_info.reaching_out = change;
2905
2906 for (insn_info *insn : bb->reverse_real_nondebug_insns ())
2907 {
c139f5e1 2908 auto &info = get_vector_info (insn);
9243c3d1
JZZ
2909
2910 if (info.uninit_p ())
2911 /* If it is uninitialized, propagate it directly. */
c139f5e1 2912 update_vector_info (insn, change);
9243c3d1
JZZ
2913 else if (info.unknown_p ())
2914 change = info;
2915 else
2916 {
2917 gcc_assert (info.valid_p () && "Unexpected Invalid demanded info");
ec99ffab
JZZ
2918 if (change.valid_p ())
2919 {
a481eed8
JZZ
2920 if (!(propagate_avl_across_demands_p (change, info)
2921 && !reg_available_p (insn, change))
ec99ffab 2922 && change.compatible_p (info))
fdc5abbd 2923 {
c139f5e1 2924 update_vector_info (insn, change.merge (info, LOCAL_MERGE));
fdc5abbd
JZ
2925 /* Fix PR109399, we should update user vsetvl instruction
2926 if there is a change in demand fusion. */
2927 if (vsetvl_insn_p (insn->rtl ()))
2928 change_vsetvl_insn (insn, info);
2929 }
ec99ffab 2930 }
9243c3d1
JZZ
2931 change = info;
2932 }
2933 }
2934
2935 block_info.local_dem = change;
2936 if (block_info.local_dem.empty_p ())
2937 block_info.reaching_out = block_info.local_dem;
2938}
2939
2940/* Return true if a dem_info is required to transition from curr_info to
2941 require before INSN. */
2942bool
2943pass_vsetvl::need_vsetvl (const vector_insn_info &require,
2944 const vector_insn_info &curr_info) const
2945{
2946 if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ())
2947 return true;
2948
a481eed8 2949 if (require.compatible_p (static_cast<const vl_vtype_info &> (curr_info)))
9243c3d1
JZZ
2950 return false;
2951
2952 return true;
2953}
2954
2955/* Given an incoming state reaching INSN, modifies that state so that it is
2956 minimally compatible with INSN. The resulting state is guaranteed to be
2957 semantically legal for INSN, but may not be the state requested by INSN. */
2958void
2959pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const
2960{
2961 if (!has_vtype_op (insn->rtl ()))
2962 return;
2963
c139f5e1 2964 const vector_insn_info require = get_vector_info (insn);
9243c3d1
JZZ
2965 if (info.valid_p () && !need_vsetvl (require, info))
2966 return;
2967 info = require;
2968}
2969
2970/* Given a state with which we evaluated insn (see transfer_before above for why
2971 this might be different that the state insn requested), modify the state to
2972 reflect the changes insn might make. */
2973void
2974pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const
2975{
2976 if (vector_config_insn_p (insn->rtl ()))
2977 {
c139f5e1 2978 info = get_vector_info (insn);
9243c3d1
JZZ
2979 return;
2980 }
2981
60bd33bc
JZZ
2982 if (fault_first_load_p (insn->rtl ())
2983 && info.update_fault_first_load_avl (insn))
2984 return;
9243c3d1
JZZ
2985
2986 /* If this is something that updates VL/VTYPE that we don't know about, set
2987 the state to unknown. */
2988 if (insn->is_call () || insn->is_asm ()
2989 || find_access (insn->defs (), VL_REGNUM)
2990 || find_access (insn->defs (), VTYPE_REGNUM))
2991 info = vector_insn_info::get_unknown ();
2992}
2993
2994/* Emit vsetvl within each block by forward data-flow analysis. */
2995void
2996pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb)
2997{
2998 auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
2999 if (block_info.local_dem.empty_p ())
3000 return;
3001
3002 vector_insn_info curr_info;
3003 for (insn_info *insn : bb->real_nondebug_insns ())
3004 {
3005 const vector_insn_info prev_info = curr_info;
a481eed8 3006 enum vsetvl_type type = NUM_VSETVL_TYPE;
9243c3d1
JZZ
3007 transfer_before (curr_info, insn);
3008
3009 if (has_vtype_op (insn->rtl ()))
3010 {
3011 if (static_cast<const vl_vtype_info &> (prev_info)
3012 != static_cast<const vl_vtype_info &> (curr_info))
3013 {
c139f5e1 3014 const auto require = get_vector_info (insn);
9243c3d1
JZZ
3015 if (!require.compatible_p (
3016 static_cast<const vl_vtype_info &> (prev_info)))
a481eed8
JZZ
3017 type = insert_vsetvl (EMIT_BEFORE, insn->rtl (), require,
3018 prev_info);
9243c3d1
JZZ
3019 }
3020 }
3021
a481eed8
JZZ
3022 /* Fix the issue of following sequence:
3023 vsetivli zero, 5
3024 ....
3025 vsetvli zero, zero
3026 vmv.x.s (demand AVL = 8).
3027 ....
3028 incorrect: vsetvli zero, zero ===> Since the curr_info is AVL = 8.
3029 correct: vsetivli zero, 8
3030 vadd (demand AVL = 8). */
3031 if (type == VSETVL_VTYPE_CHANGE_ONLY)
3032 {
3033 /* Update the curr_info to be real correct AVL. */
3034 curr_info.set_avl_info (prev_info.get_avl_info ());
3035 }
9243c3d1
JZZ
3036 transfer_after (curr_info, insn);
3037 }
3038
3039 block_info.reaching_out = curr_info;
3040}
3041
4f673c5e
JZZ
3042enum fusion_type
3043pass_vsetvl::get_backward_fusion_type (const bb_info *bb,
3044 const vector_insn_info &prop)
9243c3d1 3045{
4f673c5e 3046 insn_info *insn = prop.get_insn ();
9243c3d1 3047
4f673c5e
JZZ
3048 /* TODO: We don't backward propagate the explict VSETVL here
3049 since we will change vsetvl and vsetvlmax intrinsics into
3050 no side effects which can be optimized into optimal location
3051 by GCC internal passes. We only need to support these backward
3052 propagation if vsetvl intrinsics have side effects. */
3053 if (vsetvl_insn_p (insn->rtl ()))
3054 return INVALID_FUSION;
3055
3056 gcc_assert (has_vtype_op (insn->rtl ()));
3057 rtx reg = NULL_RTX;
3058
3059 /* Case 1: Don't need VL. Just let it backward propagate. */
ec99ffab 3060 if (!prop.demand_p (DEMAND_AVL))
4f673c5e
JZZ
3061 return VALID_AVL_FUSION;
3062 else
9243c3d1 3063 {
4f673c5e
JZZ
3064 /* Case 2: CONST_INT AVL, we don't need to check def. */
3065 if (prop.has_avl_imm ())
3066 return VALID_AVL_FUSION;
3067 else
3068 {
3069 /* Case 3: REG AVL, we need to check the distance of def to make
3070 sure we won't backward propagate over the def. */
3071 gcc_assert (prop.has_avl_reg ());
3072 if (vlmax_avl_p (prop.get_avl ()))
3073 /* Check VL operand for vsetvl vl,zero. */
ec99ffab 3074 reg = prop.get_avl_reg_rtx ();
4f673c5e
JZZ
3075 else
3076 /* Check AVL operand for vsetvl zero,avl. */
ec99ffab 3077 reg = prop.get_avl ();
4f673c5e
JZZ
3078 }
3079 }
9243c3d1 3080
4f673c5e 3081 gcc_assert (reg);
ec99ffab
JZZ
3082 if (!prop.get_avl_source ()->insn ()->is_phi ()
3083 && prop.get_avl_source ()->insn ()->bb () == insn->bb ())
6b6b9c68
JZZ
3084 return INVALID_FUSION;
3085 hash_set<set_info *> sets
3086 = get_all_sets (prop.get_avl_source (), true, true, true);
3087 if (any_set_in_bb_p (sets, insn->bb ()))
3088 return INVALID_FUSION;
3089
3090 if (vlmax_avl_p (prop.get_avl ()))
4f673c5e 3091 {
6b6b9c68 3092 if (find_reg_killed_by (bb, reg))
4f673c5e 3093 return INVALID_FUSION;
6b6b9c68
JZZ
3094 else
3095 return VALID_AVL_FUSION;
4f673c5e 3096 }
6b6b9c68
JZZ
3097
3098 /* By default, we always enable backward fusion so that we can
3099 gain more optimizations. */
3100 if (!find_reg_killed_by (bb, reg))
3101 return VALID_AVL_FUSION;
3102 return KILLED_AVL_FUSION;
3103}
3104
3105/* We almost enable all cases in get_backward_fusion_type, this function
3106 disable the backward fusion by changing dirty blocks into hard empty
3107 blocks in forward dataflow. We can have more accurate optimization by
3108 this method. */
3109bool
3110pass_vsetvl::hard_empty_block_p (const bb_info *bb,
3111 const vector_insn_info &info) const
3112{
3113 if (!info.dirty_p () || !info.has_avl_reg ())
3114 return false;
3115
3116 basic_block cfg_bb = bb->cfg_bb ();
3117 sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
ec99ffab
JZZ
3118 set_info *set = info.get_avl_source ();
3119 rtx avl = gen_rtx_REG (Pmode, set->regno ());
6b6b9c68
JZZ
3120 hash_set<set_info *> sets = get_all_sets (set, true, false, false);
3121 hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb);
3122
3123 if (find_reg_killed_by (bb, avl))
3124 {
3125 /* Condition 1:
3126 Dirty block with killed AVL means that the empty block (no RVV
3127 instructions) are polluted as Dirty blocks with the value of current
3128 AVL is killed. For example:
3129 bb 0:
3130 ...
3131 bb 1:
3132 def a5
3133 bb 2:
3134 RVV (use a5)
3135 In backward dataflow, we will polluted BB0 and BB1 as Dirt with AVL
3136 killed. since a5 is killed in BB1.
3137 In this case, let's take a look at this example:
3138
3139 bb 3: bb 4:
3140 def3 a5 def4 a5
3141 bb 5: bb 6:
3142 def1 a5 def2 a5
3143 \ /
3144 \ /
3145 \ /
3146 \ /
3147 bb 7:
3148 RVV (use a5)
3149 In thi case, we can polluted BB5 and BB6 as dirty if get-def
3150 of a5 from RVV instruction in BB7 is the def1 in BB5 and
3151 def2 BB6 so we can return false early here for HARD_EMPTY_BLOCK_P.
3152 However, we are not sure whether BB3 and BB4 can be
3153 polluted as Dirty with AVL killed so we can't return false
3154 for HARD_EMPTY_BLOCK_P here since it's too early which will
3155 potentially produce issues. */
3156 gcc_assert (info.dirty_with_killed_avl_p ());
3157 if (info.get_avl_source ()
3158 && get_same_bb_set (sets, bb->cfg_bb ()) == info.get_avl_source ())
3159 return false;
4f673c5e 3160 }
9243c3d1 3161
6b6b9c68
JZZ
3162 /* Condition 2:
3163 Suppress the VL/VTYPE info backward propagation too early:
3164 ________
3165 | BB0 |
3166 |________|
3167 |
3168 ____|____
3169 | BB1 |
3170 |________|
3171 In this case, suppose BB 1 has multiple predecessors, BB 0 is one
3172 of them. BB1 has VL/VTYPE info (may be VALID or DIRTY) to backward
3173 propagate.
3174 The AVIN (available in) which is calculated by LCM is empty only
3175 in these 2 circumstances:
3176 1. all predecessors of BB1 are empty (not VALID
3177 and can not be polluted in backward fusion flow)
3178 2. VL/VTYPE info of BB1 predecessors are conflict.
3179
3180 We keep it as dirty in 2nd circumstance and set it as HARD_EMPTY
3181 (can not be polluted as DIRTY any more) in 1st circumstance.
3182 We don't backward propagate in 1st circumstance since there is
3183 no VALID RVV instruction and no polluted blocks (dirty blocks)
3184 by backward propagation from other following blocks.
3185 It's meaningless to keep it as Dirty anymore.
3186
3187 However, since we keep it as dirty in 2nd since there are VALID or
3188 Dirty blocks in predecessors, we can still gain the benefits and
3189 optimization opportunities. For example, in this case:
3190 for (size_t i = 0; i < n; i++)
3191 {
3192 if (i != cond) {
3193 vint8mf8_t v = *(vint8mf8_t*)(in + i + 100);
3194 *(vint8mf8_t*)(out + i + 100) = v;
3195 } else {
3196 vbool1_t v = *(vbool1_t*)(in + i + 400);
3197 *(vbool1_t*)(out + i + 400) = v;
3198 }
3199 }
3200 VL/VTYPE in if-else are conflict which will produce empty AVIN LCM result
3201 but we can still keep dirty blocks if *(i != cond)* is very unlikely then
3202 we can preset vsetvl (VL/VTYPE) info from else (static propability model).
3203
3204 We don't want to backward propagate VL/VTYPE information too early
3205 which is not the optimal and may potentially produce issues. */
3206 if (bitmap_empty_p (avin))
4f673c5e 3207 {
6b6b9c68
JZZ
3208 bool hard_empty_p = true;
3209 for (const basic_block pred_cfg_bb : pred_cfg_bbs)
3210 {
3211 if (pred_cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3212 continue;
3213 sbitmap avout = m_vector_manager->vector_avout[pred_cfg_bb->index];
3214 if (!bitmap_empty_p (avout))
3215 {
3216 hard_empty_p = false;
3217 break;
3218 }
3219 }
3220 if (hard_empty_p)
3221 return true;
4f673c5e 3222 }
9243c3d1 3223
6b6b9c68
JZZ
3224 edge e;
3225 edge_iterator ei;
3226 bool has_avl_killed_insn_p = false;
3227 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
4f673c5e 3228 {
6b6b9c68
JZZ
3229 const auto block_info
3230 = m_vector_manager->vector_block_infos[e->dest->index];
3231 if (block_info.local_dem.dirty_with_killed_avl_p ())
9243c3d1 3232 {
6b6b9c68
JZZ
3233 has_avl_killed_insn_p = true;
3234 break;
3235 }
3236 }
3237 if (!has_avl_killed_insn_p)
3238 return false;
4f673c5e 3239
6b6b9c68
JZZ
3240 bool any_set_in_bbs_p = false;
3241 for (const basic_block pred_cfg_bb : pred_cfg_bbs)
3242 {
3243 insn_info *def_insn = extract_single_source (set);
3244 if (def_insn)
3245 {
3246 /* Condition 3:
3247
3248 Case 1: Case 2:
3249 bb 0: bb 0:
3250 def a5 101 ...
3251 bb 1: bb 1:
3252 ... ...
3253 bb 2: bb 2:
3254 RVV 1 (use a5 with TAIL ANY) ...
3255 bb 3: bb 3:
3256 def a5 101 def a5 101
3257 bb 4: bb 4:
3258 ... ...
3259 bb 5: bb 5:
3260 RVV 2 (use a5 with TU) RVV 1 (use a5)
3261
3262 Case 1: We can pollute BB3,BB2,BB1,BB0 are all Dirt blocks
3263 with killed AVL so that we can merge TU demand info from RVV 2
3264 into RVV 1 and elide the vsevl instruction in BB5.
3265
3266 TODO: We only optimize for single source def since multiple source
3267 def is quite complicated.
3268
3269 Case 2: We only can pollute bb 3 as dirty and it has been accepted
3270 in Condition 2 and we can't pollute BB3,BB2,BB1,BB0 like case 1. */
3271 insn_info *last_killed_insn
3272 = find_reg_killed_by (crtl->ssa->bb (pred_cfg_bb), avl);
3273 if (!last_killed_insn || pred_cfg_bb == def_insn->bb ()->cfg_bb ())
3274 continue;
3275 if (source_equal_p (last_killed_insn, def_insn))
4f673c5e 3276 {
6b6b9c68
JZZ
3277 any_set_in_bbs_p = true;
3278 break;
4f673c5e 3279 }
9243c3d1
JZZ
3280 }
3281 else
4f673c5e 3282 {
6b6b9c68
JZZ
3283 /* Condition 4:
3284
3285 bb 0: bb 1: bb 3:
3286 def1 a5 def2 a5 ...
3287 \ / /
3288 \ / /
3289 \ / /
3290 \ / /
3291 bb 4: /
3292 | /
3293 | /
3294 bb 5: /
3295 | /
3296 | /
3297 bb 6: /
3298 | /
3299 | /
3300 bb 8:
3301 RVV 1 (use a5)
3302 If we get-def (REAL) of a5 from RVV 1 instruction, we will get
3303 def1 from BB0 and def2 from BB1. So we will pollute BB6,BB5,BB4,
3304 BB0,BB1 with DIRTY and set BB3 as HARD_EMPTY so that we won't
3305 propagate AVL to BB3. */
3306 if (any_set_in_bb_p (sets, crtl->ssa->bb (pred_cfg_bb)))
3307 {
3308 any_set_in_bbs_p = true;
3309 break;
3310 }
4f673c5e 3311 }
9243c3d1 3312 }
6b6b9c68
JZZ
3313 if (!any_set_in_bbs_p)
3314 return true;
3315 return false;
9243c3d1
JZZ
3316}
3317
3318/* Compute global backward demanded info. */
387cd9d3
JZZ
3319bool
3320pass_vsetvl::backward_demand_fusion (void)
9243c3d1
JZZ
3321{
3322 /* We compute global infos by backward propagation.
3323 We want to have better performance in these following cases:
3324
3325 1. for (size_t i = 0; i < n; i++) {
3326 if (i != cond) {
3327 vint8mf8_t v = *(vint8mf8_t*)(in + i + 100);
3328 *(vint8mf8_t*)(out + i + 100) = v;
3329 } else {
3330 vbool1_t v = *(vbool1_t*)(in + i + 400);
3331 *(vbool1_t*)(out + i + 400) = v;
3332 }
3333 }
3334
3335 Since we don't have any RVV instruction in the BEFORE blocks,
3336 LCM fails to optimize such case. We want to backward propagate
3337 them into empty blocks so that we could have better performance
3338 in LCM.
3339
3340 2. bb 0:
3341 vsetvl e8,mf8 (demand RATIO)
3342 bb 1:
3343 vsetvl e32,mf2 (demand SEW and LMUL)
3344 We backward propagate the first VSETVL into e32,mf2 so that we
3345 could be able to eliminate the second VSETVL in LCM. */
3346
387cd9d3 3347 bool changed_p = false;
9243c3d1
JZZ
3348 for (const bb_info *bb : crtl->ssa->reverse_bbs ())
3349 {
3350 basic_block cfg_bb = bb->cfg_bb ();
b9b251b7
JZZ
3351 const auto &curr_block_info
3352 = m_vector_manager->vector_block_infos[cfg_bb->index];
3353 const auto &prop = curr_block_info.local_dem;
9243c3d1
JZZ
3354
3355 /* If there is nothing to propagate, just skip it. */
3356 if (!prop.valid_or_dirty_p ())
3357 continue;
3358
b9b251b7 3359 if (!backward_propagate_worthwhile_p (cfg_bb, curr_block_info))
9243c3d1
JZZ
3360 continue;
3361
d06e9264
JZ
3362 /* Fix PR108270:
3363
3364 bb 0 -> bb 1
3365 We don't need to backward fuse VL/VTYPE info from bb 1 to bb 0
3366 if bb 1 is not inside a loop and all predecessors of bb 0 are empty. */
3367 if (m_vector_manager->all_empty_predecessor_p (cfg_bb))
3368 continue;
3369
9243c3d1
JZZ
3370 edge e;
3371 edge_iterator ei;
3372 /* Backward propagate to each predecessor. */
3373 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3374 {
9243c3d1
JZZ
3375 auto &block_info
3376 = m_vector_manager->vector_block_infos[e->src->index];
3377
3378 /* We don't propagate through critical edges. */
3379 if (e->flags & EDGE_COMPLEX)
3380 continue;
3381 if (e->src->index == ENTRY_BLOCK_PTR_FOR_FN (cfun)->index)
3382 continue;
44c918b5
JZZ
3383 /* If prop is demand of vsetvl instruction and reaching doesn't demand
3384 AVL. We don't backward propagate since vsetvl instruction has no
3385 side effects. */
3386 if (vsetvl_insn_p (prop.get_insn ()->rtl ())
3387 && propagate_avl_across_demands_p (prop, block_info.reaching_out))
3388 continue;
9243c3d1
JZZ
3389
3390 if (block_info.reaching_out.unknown_p ())
3391 continue;
6b6b9c68
JZZ
3392 else if (block_info.reaching_out.hard_empty_p ())
3393 continue;
9243c3d1
JZZ
3394 else if (block_info.reaching_out.empty_p ())
3395 {
4f673c5e
JZZ
3396 enum fusion_type type
3397 = get_backward_fusion_type (crtl->ssa->bb (e->src), prop);
3398 if (type == INVALID_FUSION)
9243c3d1
JZZ
3399 continue;
3400
4f673c5e
JZZ
3401 block_info.reaching_out = prop;
3402 block_info.reaching_out.set_dirty (type);
6b6b9c68
JZZ
3403
3404 if (prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ()))
3405 {
3406 hash_set<set_info *> sets
3407 = get_all_sets (prop.get_avl_source (), true, true, true);
3408 set_info *set = get_same_bb_set (sets, e->src);
3409 if (set)
3410 block_info.reaching_out.set_avl_info (
3411 avl_info (prop.get_avl (), set));
3412 }
3413
4f673c5e
JZZ
3414 block_info.local_dem = block_info.reaching_out;
3415 block_info.probability = curr_block_info.probability;
3416 changed_p = true;
9243c3d1
JZZ
3417 }
3418 else if (block_info.reaching_out.dirty_p ())
3419 {
3420 /* DIRTY -> DIRTY or VALID -> DIRTY. */
3421 vector_insn_info new_info;
3422
3423 if (block_info.reaching_out.compatible_p (prop))
3424 {
ec99ffab 3425 if (block_info.reaching_out.available_p (prop))
9243c3d1 3426 continue;
4f673c5e 3427 new_info = block_info.reaching_out.merge (prop, GLOBAL_MERGE);
6b6b9c68
JZZ
3428 new_info.set_dirty (
3429 block_info.reaching_out.dirty_with_killed_avl_p ());
3430 block_info.probability += curr_block_info.probability;
9243c3d1
JZZ
3431 }
3432 else
3433 {
4f673c5e
JZZ
3434 if (curr_block_info.probability > block_info.probability)
3435 {
6b6b9c68
JZZ
3436 enum fusion_type type
3437 = get_backward_fusion_type (crtl->ssa->bb (e->src),
3438 prop);
3439 if (type == INVALID_FUSION)
3440 continue;
4f673c5e 3441 new_info = prop;
6b6b9c68 3442 new_info.set_dirty (type);
4f673c5e
JZZ
3443 block_info.probability = curr_block_info.probability;
3444 }
9243c3d1
JZZ
3445 else
3446 continue;
3447 }
3448
ec99ffab
JZZ
3449 if (propagate_avl_across_demands_p (prop,
3450 block_info.reaching_out))
3451 {
3452 rtx reg = new_info.get_avl_reg_rtx ();
3453 if (find_reg_killed_by (crtl->ssa->bb (e->src), reg))
3454 new_info.set_dirty (true);
3455 }
3456
9243c3d1
JZZ
3457 block_info.local_dem = new_info;
3458 block_info.reaching_out = new_info;
387cd9d3 3459 changed_p = true;
9243c3d1
JZZ
3460 }
3461 else
3462 {
3463 /* We not only change the info during backward propagation,
3464 but also change the VSETVL instruction. */
3465 gcc_assert (block_info.reaching_out.valid_p ());
6b6b9c68
JZZ
3466 hash_set<set_info *> sets
3467 = get_all_sets (prop.get_avl_source (), true, false, false);
3468 set_info *set = get_same_bb_set (sets, e->src);
3469 if (vsetvl_insn_p (block_info.reaching_out.get_insn ()->rtl ())
3470 && prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ()))
3471 {
3472 if (!block_info.reaching_out.same_vlmax_p (prop))
3473 continue;
3474 if (block_info.reaching_out.same_vtype_p (prop))
3475 continue;
3476 if (!set)
3477 continue;
3478 if (set->insn () != block_info.reaching_out.get_insn ())
3479 continue;
3480 }
ec99ffab
JZZ
3481
3482 if (!block_info.reaching_out.compatible_p (prop))
3483 continue;
3484 if (block_info.reaching_out.available_p (prop))
3485 continue;
9243c3d1
JZZ
3486
3487 vector_insn_info be_merged = block_info.reaching_out;
3488 if (block_info.local_dem == block_info.reaching_out)
3489 be_merged = block_info.local_dem;
4f673c5e
JZZ
3490 vector_insn_info new_info = be_merged.merge (prop, GLOBAL_MERGE);
3491
3492 if (curr_block_info.probability > block_info.probability)
3493 block_info.probability = curr_block_info.probability;
9243c3d1 3494
ec99ffab 3495 if (propagate_avl_across_demands_p (prop, block_info.reaching_out)
a481eed8
JZZ
3496 && !reg_available_p (crtl->ssa->bb (e->src)->end_insn (),
3497 new_info))
ec99ffab
JZZ
3498 continue;
3499
aef20243 3500 change_vsetvl_insn (new_info.get_insn (), new_info);
9243c3d1
JZZ
3501 if (block_info.local_dem == block_info.reaching_out)
3502 block_info.local_dem = new_info;
3503 block_info.reaching_out = new_info;
387cd9d3 3504 changed_p = true;
9243c3d1
JZZ
3505 }
3506 }
3507 }
387cd9d3
JZZ
3508 return changed_p;
3509}
3510
3511/* Compute global forward demanded info. */
3512bool
3513pass_vsetvl::forward_demand_fusion (void)
3514{
3515 /* Enhance the global information propagation especially
3516 backward propagation miss the propagation.
3517 Consider such case:
3518
3519 bb0
3520 (TU)
3521 / \
3522 bb1 bb2
3523 (TU) (ANY)
3524 existing edge -----> \ / (TU) <----- LCM create this edge.
3525 bb3
3526 (TU)
3527
3528 Base on the situation, LCM fails to eliminate the VSETVL instruction and
3529 insert an edge from bb2 to bb3 since we can't backward propagate bb3 into
3530 bb2. To avoid this confusing LCM result and non-optimal codegen, we should
3531 forward propagate information from bb0 to bb2 which is friendly to LCM. */
3532 bool changed_p = false;
3533 for (const bb_info *bb : crtl->ssa->bbs ())
3534 {
3535 basic_block cfg_bb = bb->cfg_bb ();
3536 const auto &prop
3537 = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
3538
3539 /* If there is nothing to propagate, just skip it. */
3540 if (!prop.valid_or_dirty_p ())
3541 continue;
3542
00fb7698
JZZ
3543 if (cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
3544 continue;
3545
ec99ffab
JZZ
3546 if (vsetvl_insn_p (prop.get_insn ()->rtl ()))
3547 continue;
3548
387cd9d3
JZZ
3549 edge e;
3550 edge_iterator ei;
3551 /* Forward propagate to each successor. */
3552 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
3553 {
3554 auto &local_dem
3555 = m_vector_manager->vector_block_infos[e->dest->index].local_dem;
3556 auto &reaching_out
3557 = m_vector_manager->vector_block_infos[e->dest->index].reaching_out;
3558
3559 /* It's quite obvious, we don't need to propagate itself. */
3560 if (e->dest->index == cfg_bb->index)
3561 continue;
00fb7698
JZZ
3562 /* We don't propagate through critical edges. */
3563 if (e->flags & EDGE_COMPLEX)
3564 continue;
3565 if (e->dest->index == EXIT_BLOCK_PTR_FOR_FN (cfun)->index)
3566 continue;
387cd9d3
JZZ
3567
3568 /* If there is nothing to propagate, just skip it. */
3569 if (!local_dem.valid_or_dirty_p ())
3570 continue;
ec99ffab 3571 if (local_dem.available_p (prop))
4f673c5e
JZZ
3572 continue;
3573 if (!local_dem.compatible_p (prop))
3574 continue;
ec99ffab
JZZ
3575 if (propagate_avl_across_demands_p (prop, local_dem))
3576 continue;
387cd9d3 3577
4f673c5e
JZZ
3578 vector_insn_info new_info = local_dem.merge (prop, GLOBAL_MERGE);
3579 new_info.set_insn (local_dem.get_insn ());
3580 if (local_dem.dirty_p ())
387cd9d3 3581 {
4f673c5e 3582 gcc_assert (local_dem == reaching_out);
6b6b9c68 3583 new_info.set_dirty (local_dem.dirty_with_killed_avl_p ());
4f673c5e 3584 local_dem = new_info;
4f673c5e
JZZ
3585 reaching_out = local_dem;
3586 }
3587 else
3588 {
3589 if (reaching_out == local_dem)
3590 reaching_out = new_info;
3591 local_dem = new_info;
3592 change_vsetvl_insn (local_dem.get_insn (), new_info);
387cd9d3 3593 }
4f673c5e
JZZ
3594 auto &prob
3595 = m_vector_manager->vector_block_infos[e->dest->index].probability;
3596 auto &curr_prob
3597 = m_vector_manager->vector_block_infos[cfg_bb->index].probability;
3598 prob = curr_prob * e->probability;
3599 changed_p = true;
387cd9d3
JZZ
3600 }
3601 }
3602 return changed_p;
3603}
3604
3605void
3606pass_vsetvl::demand_fusion (void)
3607{
3608 bool changed_p = true;
3609 while (changed_p)
3610 {
3611 changed_p = false;
4f673c5e
JZZ
3612 /* To optimize the case like this:
3613 void f2 (int8_t * restrict in, int8_t * restrict out, int n, int cond)
3614 {
3615 size_t vl = 101;
3616
3617 for (size_t i = 0; i < n; i++)
3618 {
3619 vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
3620 __riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
3621 }
3622
3623 for (size_t i = 0; i < n; i++)
3624 {
3625 vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
3626 __riscv_vse8_v_i8mf8 (out + i, v, vl);
3627
3628 vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
3629 __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
3630 }
3631 }
3632
3633 bb 0: li a5, 101 (killed avl)
3634 ...
3635 bb 1: vsetvli zero, a5, ta
3636 ...
3637 bb 2: li a5, 101 (killed avl)
3638 ...
3639 bb 3: vsetvli zero, a3, tu
3640
3641 We want to fuse VSEVLI instructions on bb 1 and bb 3. However, there is
3642 an AVL kill instruction in bb 2 that we can't backward fuse bb 3 or
3643 forward bb 1 arbitrarily. We need available information of each block to
3644 help for such cases. */
6b6b9c68
JZZ
3645 changed_p |= backward_demand_fusion ();
3646 changed_p |= forward_demand_fusion ();
3647 }
3648
3649 changed_p = true;
3650 while (changed_p)
3651 {
3652 changed_p = false;
3653 prune_expressions ();
3654 m_vector_manager->create_bitmap_vectors ();
3655 compute_local_properties ();
4f673c5e
JZZ
3656 compute_available (m_vector_manager->vector_comp,
3657 m_vector_manager->vector_kill,
3658 m_vector_manager->vector_avout,
3659 m_vector_manager->vector_avin);
6b6b9c68 3660 changed_p |= cleanup_illegal_dirty_blocks ();
4f673c5e
JZZ
3661 m_vector_manager->free_bitmap_vectors ();
3662 if (!m_vector_manager->vector_exprs.is_empty ())
3663 m_vector_manager->vector_exprs.release ();
387cd9d3 3664 }
9243c3d1
JZZ
3665
3666 if (dump_file)
3667 {
3668 fprintf (dump_file, "\n\nDirty blocks list: ");
681a5632
JZZ
3669 for (const bb_info *bb : crtl->ssa->bbs ())
3670 if (m_vector_manager->vector_block_infos[bb->index ()]
3671 .reaching_out.dirty_p ())
3672 fprintf (dump_file, "%d ", bb->index ());
9243c3d1
JZZ
3673 fprintf (dump_file, "\n\n");
3674 }
3675}
3676
6b6b9c68
JZZ
3677/* Cleanup illegal dirty blocks. */
3678bool
3679pass_vsetvl::cleanup_illegal_dirty_blocks (void)
3680{
3681 bool changed_p = false;
3682 for (const bb_info *bb : crtl->ssa->bbs ())
3683 {
3684 basic_block cfg_bb = bb->cfg_bb ();
3685 const auto &prop
3686 = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
3687
3688 /* If there is nothing to cleanup, just skip it. */
3689 if (!prop.valid_or_dirty_p ())
3690 continue;
3691
3692 if (hard_empty_block_p (bb, prop))
3693 {
3694 m_vector_manager->vector_block_infos[cfg_bb->index].local_dem
3695 = vector_insn_info::get_hard_empty ();
3696 m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out
3697 = vector_insn_info::get_hard_empty ();
3698 changed_p = true;
3699 continue;
3700 }
3701 }
3702 return changed_p;
3703}
3704
9243c3d1
JZZ
3705/* Assemble the candidates expressions for LCM. */
3706void
3707pass_vsetvl::prune_expressions (void)
3708{
681a5632 3709 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 3710 {
681a5632
JZZ
3711 if (m_vector_manager->vector_block_infos[bb->index ()]
3712 .local_dem.valid_or_dirty_p ())
9243c3d1 3713 m_vector_manager->create_expr (
681a5632
JZZ
3714 m_vector_manager->vector_block_infos[bb->index ()].local_dem);
3715 if (m_vector_manager->vector_block_infos[bb->index ()]
9243c3d1
JZZ
3716 .reaching_out.valid_or_dirty_p ())
3717 m_vector_manager->create_expr (
681a5632 3718 m_vector_manager->vector_block_infos[bb->index ()].reaching_out);
9243c3d1
JZZ
3719 }
3720
3721 if (dump_file)
3722 {
3723 fprintf (dump_file, "\nThe total VSETVL expression num = %d\n",
3724 m_vector_manager->vector_exprs.length ());
3725 fprintf (dump_file, "Expression List:\n");
3726 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
3727 {
3728 fprintf (dump_file, "Expr[%ld]:\n", i);
3729 m_vector_manager->vector_exprs[i]->dump (dump_file);
3730 fprintf (dump_file, "\n");
3731 }
3732 }
3733}
3734
4f673c5e
JZZ
3735/* Compute the local properties of each recorded expression.
3736
3737 Local properties are those that are defined by the block, irrespective of
3738 other blocks.
3739
3740 An expression is transparent in a block if its operands are not modified
3741 in the block.
3742
3743 An expression is computed (locally available) in a block if it is computed
3744 at least once and expression would contain the same value if the
3745 computation was moved to the end of the block.
3746
3747 An expression is locally anticipatable in a block if it is computed at
3748 least once and expression would contain the same value if the computation
3749 was moved to the beginning of the block. */
9243c3d1
JZZ
3750void
3751pass_vsetvl::compute_local_properties (void)
3752{
3753 /* - If T is locally available at the end of a block, then T' must be
3754 available at the end of the same block. Since some optimization has
3755 occurred earlier, T' might not be locally available, however, it must
3756 have been previously computed on all paths. As a formula, T at AVLOC(B)
3757 implies that T' at AVOUT(B).
3758 An "available occurrence" is one that is the last occurrence in the
3759 basic block and the operands are not modified by following statements in
3760 the basic block [including this insn].
3761
3762 - If T is locally anticipated at the beginning of a block, then either
3763 T', is locally anticipated or it is already available from previous
3764 blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
3765 ANTLOC(B) at AVIN(B).
3766 An "anticipatable occurrence" is one that is the first occurrence in the
3767 basic block, the operands are not modified in the basic block prior
3768 to the occurrence and the output is not used between the start of
3769 the block and the occurrence. */
3770
3771 basic_block cfg_bb;
4f673c5e 3772 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 3773 {
4f673c5e 3774 unsigned int curr_bb_idx = bb->index ();
9243c3d1
JZZ
3775 const auto local_dem
3776 = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem;
3777 const auto reaching_out
3778 = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out;
3779
4f673c5e
JZZ
3780 /* Compute transparent. */
3781 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
9243c3d1 3782 {
4f673c5e
JZZ
3783 const vector_insn_info *expr = m_vector_manager->vector_exprs[i];
3784 if (local_dem.real_dirty_p () || local_dem.valid_p ()
3785 || local_dem.unknown_p ()
3786 || has_vsetvl_killed_avl_p (bb, local_dem))
9243c3d1 3787 bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i);
4f673c5e
JZZ
3788 /* FIXME: Here we set the block as non-transparent (killed) if there
3789 is an instruction killed the value of AVL according to the
3790 definition of Local transparent. This is true for such following
3791 case:
3792
3793 bb 0 (Loop label):
3794 vsetvl zero, a5, e8, mf8
3795 bb 1:
3796 def a5
3797 bb 2:
3798 branch bb 0 (Loop label).
3799
3800 In this case, we known there is a loop bb 0->bb 1->bb 2. According
3801 to LCM definition, it is correct when we set vsetvl zero, a5, e8,
3802 mf8 as non-transparent (killed) so that LCM will not hoist outside
3803 the bb 0.
3804
3805 However, such conservative configuration will forbid optimization
3806 on some unlucky case. For example:
3807
3808 bb 0:
3809 li a5, 101
3810 bb 1:
3811 vsetvl zero, a5, e8, mf8
3812 bb 2:
3813 li a5, 101
3814 bb 3:
3815 vsetvl zero, a5, e8, mf8.
3816 So we also relax def a5 as transparent to gain more optimizations
3817 as long as the all real def insn of avl do not come from this
3818 block. This configuration may be still missing some optimization
3819 opportunities. */
6b6b9c68 3820 if (find_reg_killed_by (bb, expr->get_avl ()))
4f673c5e 3821 {
6b6b9c68
JZZ
3822 hash_set<set_info *> sets
3823 = get_all_sets (expr->get_avl_source (), true, false, false);
3824 if (any_set_in_bb_p (sets, bb))
4f673c5e
JZZ
3825 bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx],
3826 i);
3827 }
9243c3d1
JZZ
3828 }
3829
4f673c5e 3830 /* Compute anticipatable occurrences. */
6b6b9c68
JZZ
3831 if (local_dem.valid_p () || local_dem.real_dirty_p ()
3832 || (has_vsetvl_killed_avl_p (bb, local_dem)
3833 && vlmax_avl_p (local_dem.get_avl ())))
4f673c5e
JZZ
3834 if (anticipatable_occurrence_p (bb, local_dem))
3835 bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx],
3836 m_vector_manager->get_expr_id (local_dem));
9243c3d1 3837
4f673c5e 3838 /* Compute available occurrences. */
9243c3d1
JZZ
3839 if (reaching_out.valid_or_dirty_p ())
3840 {
9243c3d1
JZZ
3841 auto_vec<size_t> available_list
3842 = m_vector_manager->get_all_available_exprs (reaching_out);
3843 for (size_t i = 0; i < available_list.length (); i++)
4f673c5e
JZZ
3844 {
3845 const vector_insn_info *expr
3846 = m_vector_manager->vector_exprs[available_list[i]];
3847 if (reaching_out.real_dirty_p ()
3848 || has_vsetvl_killed_avl_p (bb, reaching_out)
3849 || available_occurrence_p (bb, *expr))
3850 bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx],
3851 available_list[i]);
3852 }
9243c3d1
JZZ
3853 }
3854 }
3855
3856 /* Compute kill for each basic block using:
3857
3858 ~(TRANSP | COMP)
3859 */
3860
3861 FOR_EACH_BB_FN (cfg_bb, cfun)
3862 {
3863 bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index],
3864 m_vector_manager->vector_transp[cfg_bb->index],
3865 m_vector_manager->vector_comp[cfg_bb->index]);
3866 bitmap_not (m_vector_manager->vector_kill[cfg_bb->index],
3867 m_vector_manager->vector_kill[cfg_bb->index]);
3868 }
3869
3870 FOR_EACH_BB_FN (cfg_bb, cfun)
3871 {
3872 edge e;
3873 edge_iterator ei;
3874
3875 /* If the current block is the destination of an abnormal edge, we
3876 kill all trapping (for PRE) and memory (for hoist) expressions
3877 because we won't be able to properly place the instruction on
3878 the edge. So make them neither anticipatable nor transparent.
3879 This is fairly conservative.
3880
3881 ??? For hoisting it may be necessary to check for set-and-jump
3882 instructions here, not just for abnormal edges. The general problem
3883 is that when an expression cannot not be placed right at the end of
3884 a basic block we should account for any side-effects of a subsequent
3885 jump instructions that could clobber the expression. It would
3886 be best to implement this check along the lines of
3887 should_hoist_expr_to_dom where the target block is already known
3888 and, hence, there's no need to conservatively prune expressions on
3889 "intermediate" set-and-jump instructions. */
3890 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3891 if (e->flags & EDGE_COMPLEX)
3892 {
3893 bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]);
3894 bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]);
3895 }
3896 }
3897}
3898
3899/* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */
3900bool
6b6b9c68
JZZ
3901pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb,
3902 const vector_insn_info &info) const
9243c3d1
JZZ
3903{
3904 if (!m_vector_manager->all_same_ratio_p (
3905 m_vector_manager->vector_avin[cfg_bb->index]))
3906 return false;
3907
005fad9d
JZZ
3908 if (!m_vector_manager->all_same_avl_p (
3909 cfg_bb, m_vector_manager->vector_avin[cfg_bb->index]))
3910 return false;
3911
9243c3d1
JZZ
3912 size_t expr_id
3913 = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]);
6b6b9c68
JZZ
3914 if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info))
3915 return false;
3916 if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info))
9243c3d1
JZZ
3917 return false;
3918
3919 edge e;
3920 edge_iterator ei;
3921 bool all_valid_p = true;
3922 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
3923 {
3924 if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index]))
3925 {
3926 all_valid_p = false;
3927 break;
3928 }
3929 }
3930
3931 if (!all_valid_p)
3932 return false;
3933 return true;
3934}
3935
3936/* Optimize athe case like this:
3937
3938 bb 0:
3939 vsetvl 0 a5,zero,e8,mf8
3940 insn 0 (demand SEW + LMUL)
3941 bb 1:
3942 vsetvl 1 a5,zero,e16,mf4
3943 insn 1 (demand SEW + LMUL)
3944
3945 In this case, we should be able to refine
3946 vsetvl 1 into vsetvl zero, zero according AVIN. */
3947void
3948pass_vsetvl::refine_vsetvls (void) const
3949{
3950 basic_block cfg_bb;
3951 FOR_EACH_BB_FN (cfg_bb, cfun)
3952 {
c139f5e1 3953 auto info = get_block_info(cfg_bb).local_dem;
9243c3d1
JZZ
3954 insn_info *insn = info.get_insn ();
3955 if (!info.valid_p ())
3956 continue;
3957
3958 rtx_insn *rinsn = insn->rtl ();
6b6b9c68 3959 if (!can_refine_vsetvl_p (cfg_bb, info))
9243c3d1
JZZ
3960 continue;
3961
ec99ffab
JZZ
3962 /* We can't refine user vsetvl into vsetvl zero,zero since the dest
3963 will be used by the following instructions. */
3964 if (vector_config_insn_p (rinsn))
3965 {
3966 m_vector_manager->to_refine_vsetvls.add (rinsn);
3967 continue;
3968 }
ff8f9544
JZ
3969
3970 /* If all incoming edges to a block have a vector state that is compatbile
3971 with the block. In such a case we need not emit a vsetvl in the current
3972 block. */
3973
3974 gcc_assert (has_vtype_op (insn->rtl ()));
3975 rinsn = PREV_INSN (insn->rtl ());
3976 gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
3977 if (m_vector_manager->all_avail_in_compatible_p (cfg_bb))
3978 {
3979 size_t id = m_vector_manager->get_expr_id (info);
3980 if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], id))
3981 continue;
3982 eliminate_insn (rinsn);
3983 }
3984 else
3985 {
3986 rtx new_pat
3987 = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
3988 change_insn (rinsn, new_pat);
3989 }
9243c3d1
JZZ
3990 }
3991}
3992
3993void
3994pass_vsetvl::cleanup_vsetvls ()
3995{
3996 basic_block cfg_bb;
3997 FOR_EACH_BB_FN (cfg_bb, cfun)
3998 {
3999 auto &info
c139f5e1 4000 = get_block_info(cfg_bb).reaching_out;
9243c3d1
JZZ
4001 gcc_assert (m_vector_manager->expr_set_num (
4002 m_vector_manager->vector_del[cfg_bb->index])
4003 <= 1);
4004 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
4005 {
4006 if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i))
4007 {
4008 if (info.dirty_p ())
4009 info.set_unknown ();
4010 else
4011 {
4f673c5e 4012 const auto dem
c139f5e1 4013 = get_block_info(cfg_bb)
4f673c5e
JZZ
4014 .local_dem;
4015 gcc_assert (dem == *m_vector_manager->vector_exprs[i]);
4016 insn_info *insn = dem.get_insn ();
9243c3d1
JZZ
4017 gcc_assert (insn && insn->rtl ());
4018 rtx_insn *rinsn;
ec99ffab
JZZ
4019 /* We can't eliminate user vsetvl since the dest will be used
4020 * by the following instructions. */
9243c3d1 4021 if (vector_config_insn_p (insn->rtl ()))
9243c3d1 4022 {
ec99ffab
JZZ
4023 m_vector_manager->to_delete_vsetvls.add (insn->rtl ());
4024 continue;
9243c3d1 4025 }
ec99ffab
JZZ
4026
4027 gcc_assert (has_vtype_op (insn->rtl ()));
4028 rinsn = PREV_INSN (insn->rtl ());
4029 gcc_assert (vector_config_insn_p (PREV_INSN (insn->rtl ())));
9243c3d1
JZZ
4030 eliminate_insn (rinsn);
4031 }
4032 }
4033 }
4034 }
4035}
4036
4037bool
4038pass_vsetvl::commit_vsetvls (void)
4039{
4040 bool need_commit = false;
4041
4042 for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++)
4043 {
4044 for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
4045 {
4046 edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
4047 if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i))
4048 {
4049 const vector_insn_info *require
4050 = m_vector_manager->vector_exprs[i];
4051 gcc_assert (require->valid_or_dirty_p ());
4052 rtl_profile_for_edge (eg);
4053 start_sequence ();
4054
4055 insn_info *insn = require->get_insn ();
4056 vector_insn_info prev_info = vector_insn_info ();
005fad9d
JZZ
4057 sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index];
4058 if (m_vector_manager->all_same_ratio_p (bitdata)
4059 && m_vector_manager->all_same_avl_p (eg->dest, bitdata))
9243c3d1 4060 {
005fad9d 4061 size_t first = bitmap_first_set_bit (bitdata);
9243c3d1
JZZ
4062 prev_info = *m_vector_manager->vector_exprs[first];
4063 }
4064
4065 insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info);
4066 rtx_insn *rinsn = get_insns ();
4067 end_sequence ();
4068 default_rtl_profile ();
4069
4070 /* We should not get an abnormal edge here. */
4071 gcc_assert (!(eg->flags & EDGE_ABNORMAL));
4072 need_commit = true;
4073 insert_insn_on_edge (rinsn, eg);
4074 }
4075 }
4076 }
4077
4f673c5e 4078 for (const bb_info *bb : crtl->ssa->bbs ())
9243c3d1 4079 {
4f673c5e 4080 basic_block cfg_bb = bb->cfg_bb ();
9243c3d1 4081 const auto reaching_out
c139f5e1 4082 = get_block_info(cfg_bb).reaching_out;
9243c3d1
JZZ
4083 if (!reaching_out.dirty_p ())
4084 continue;
4085
4f673c5e
JZZ
4086 if (reaching_out.dirty_with_killed_avl_p ())
4087 {
4088 if (!has_vsetvl_killed_avl_p (bb, reaching_out))
4089 continue;
4090
4091 unsigned int bb_index;
4092 sbitmap_iterator sbi;
4093 sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
4094 bool available_p = false;
4095 EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
4096 {
ec99ffab
JZZ
4097 if (m_vector_manager->vector_exprs[bb_index]->available_p (
4098 reaching_out))
4f673c5e
JZZ
4099 {
4100 available_p = true;
4101 break;
4102 }
4103 }
4104 if (available_p)
4105 continue;
4106 }
7ae4d1df
JZZ
4107
4108 rtx new_pat;
ec99ffab
JZZ
4109 if (!reaching_out.demand_p (DEMAND_AVL))
4110 {
4111 vl_vtype_info new_info = reaching_out;
4112 new_info.set_avl_info (avl_info (const0_rtx, nullptr));
4113 new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, new_info, NULL_RTX);
4114 }
4115 else if (can_refine_vsetvl_p (cfg_bb, reaching_out))
9243c3d1
JZZ
4116 new_pat
4117 = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX);
7ae4d1df
JZZ
4118 else if (vlmax_avl_p (reaching_out.get_avl ()))
4119 new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out,
ec99ffab 4120 reaching_out.get_avl_reg_rtx ());
7ae4d1df
JZZ
4121 else
4122 new_pat
4123 = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX);
9243c3d1
JZZ
4124
4125 start_sequence ();
4126 emit_insn (new_pat);
4127 rtx_insn *rinsn = get_insns ();
4128 end_sequence ();
4129 insert_insn_end_basic_block (rinsn, cfg_bb);
4130 if (dump_file)
4131 {
4132 fprintf (dump_file,
4133 "\nInsert vsetvl insn %d at the end of <bb %d>:\n",
4134 INSN_UID (rinsn), cfg_bb->index);
4135 print_rtl_single (dump_file, rinsn);
4136 }
4137 }
4138
4139 return need_commit;
4140}
4141
4142void
4143pass_vsetvl::pre_vsetvl (void)
4144{
4145 /* Compute entity list. */
4146 prune_expressions ();
4147
cfe3fbc6 4148 m_vector_manager->create_bitmap_vectors ();
9243c3d1
JZZ
4149 compute_local_properties ();
4150 m_vector_manager->vector_edge_list = pre_edge_lcm_avs (
4151 m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp,
4152 m_vector_manager->vector_comp, m_vector_manager->vector_antic,
4153 m_vector_manager->vector_kill, m_vector_manager->vector_avin,
4154 m_vector_manager->vector_avout, &m_vector_manager->vector_insert,
4155 &m_vector_manager->vector_del);
4156
4157 /* We should dump the information before CFG is changed. Otherwise it will
4158 produce ICE (internal compiler error). */
4159 if (dump_file)
4160 m_vector_manager->dump (dump_file);
4161
4162 refine_vsetvls ();
4163 cleanup_vsetvls ();
4164 bool need_commit = commit_vsetvls ();
4165 if (need_commit)
4166 commit_edge_insertions ();
4167}
4168
c919d059
KC
4169/* Some instruction can not be accessed in RTL_SSA when we don't re-init
4170 the new RTL_SSA framework but it is definetely at the END of the block.
4171
4172 Here we optimize the VSETVL is hoisted by LCM:
4173
4174 Before LCM:
4175 bb 1:
4176 vsetvli a5,a2,e32,m1,ta,mu
4177 bb 2:
4178 vsetvli zero,a5,e32,m1,ta,mu
4179 ...
4180
4181 After LCM:
4182 bb 1:
4183 vsetvli a5,a2,e32,m1,ta,mu
4184 LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate
4185 bb 2:
4186 ...
4187 */
4188rtx_insn *
4189pass_vsetvl::get_vsetvl_at_end (const bb_info *bb, vector_insn_info *dem) const
4190{
4191 rtx_insn *end_vsetvl = BB_END (bb->cfg_bb ());
4192 if (end_vsetvl && NONDEBUG_INSN_P (end_vsetvl))
4193 {
4194 if (JUMP_P (end_vsetvl))
4195 end_vsetvl = PREV_INSN (end_vsetvl);
4196
4197 if (NONDEBUG_INSN_P (end_vsetvl)
4198 && vsetvl_discard_result_insn_p (end_vsetvl))
4199 {
4200 /* Only handle single succ. here, multiple succ. is much
4201 more complicated. */
4202 if (single_succ_p (bb->cfg_bb ()))
4203 {
4204 edge e = single_succ_edge (bb->cfg_bb ());
4205 *dem = get_block_info (e->dest).local_dem;
4206 return end_vsetvl;
4207 }
4208 }
4209 }
4210 return nullptr;
4211}
4212
4213/* This predicator should only used within same basic block. */
4214static bool
4215local_avl_compatible_p (rtx avl1, rtx avl2)
4216{
4217 if (!REG_P (avl1) || !REG_P (avl2))
4218 return false;
4219
4220 return REGNO (avl1) == REGNO (avl2);
4221}
4222
8421f279
JZ
4223/* Local user vsetvl optimizaiton:
4224
4225 Case 1:
4226 vsetvl a5,a4,e8,mf8
4227 ...
4228 vsetvl zero,a5,e8,mf8 --> Eliminate directly.
4229
4230 Case 2:
4231 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
4232 ...
4233 vsetvl zero,a5,e32,mf2 --> Eliminate directly. */
4234void
c919d059
KC
4235pass_vsetvl::local_eliminate_vsetvl_insn (const bb_info *bb) const
4236{
4237 rtx_insn *prev_vsetvl = nullptr;
4238 rtx_insn *curr_vsetvl = nullptr;
4239 rtx vl_placeholder = RVV_VLMAX;
4240 rtx prev_avl = vl_placeholder;
4241 rtx curr_avl = vl_placeholder;
4242 vector_insn_info prev_dem;
4243
4244 /* Instruction inserted by LCM is not appeared in RTL-SSA yet, try to
4245 found those instruciton. */
4246 if (rtx_insn *end_vsetvl = get_vsetvl_at_end (bb, &prev_dem))
8421f279 4247 {
c919d059
KC
4248 prev_avl = get_avl (end_vsetvl);
4249 prev_vsetvl = end_vsetvl;
4250 }
4251
4252 bool skip_one = false;
4253 /* Backward propgate vsetvl info, drop the later one (prev_vsetvl) if it's
4254 compatible with current vsetvl (curr_avl), and merge the vtype and avl
4255 info. into current vsetvl. */
4256 for (insn_info *insn : bb->reverse_real_nondebug_insns ())
4257 {
4258 rtx_insn *rinsn = insn->rtl ();
4259 const auto &curr_dem = get_vector_info (insn);
4260 bool need_invalidate = false;
4261
4262 /* Skip if this insn already handled in last iteration. */
4263 if (skip_one)
4264 {
4265 skip_one = false;
4266 continue;
4267 }
4268
4269 if (vsetvl_insn_p (rinsn))
4270 {
4271 curr_vsetvl = rinsn;
4272 /* vsetvl are using vl rather than avl since it will try to merge
4273 with other vsetvl_discard_result.
4274
4275 v--- avl
4276 vsetvl a5,a4,e8,mf8 # vsetvl
4277 ... ^--- vl
4278 vsetvl zero,a5,e8,mf8 # vsetvl_discard_result
4279 ^--- avl
4280 */
4281 curr_avl = get_vl (rinsn);
4282 /* vsetvl is a cut point of local backward vsetvl elimination. */
4283 need_invalidate = true;
4284 }
4285 else if (has_vtype_op (rinsn) && NONDEBUG_INSN_P (PREV_INSN (rinsn))
4286 && (vsetvl_discard_result_insn_p (PREV_INSN (rinsn))
4287 || vsetvl_insn_p (PREV_INSN (rinsn))))
8421f279 4288 {
c919d059 4289 curr_vsetvl = PREV_INSN (rinsn);
8421f279 4290
c919d059 4291 if (vsetvl_insn_p (PREV_INSN (rinsn)))
8421f279 4292 {
c919d059
KC
4293 /* Need invalidate and skip if it's vsetvl. */
4294 need_invalidate = true;
4295 /* vsetvl_discard_result_insn_p won't appeared in RTL-SSA,
4296 * so only need to skip for vsetvl. */
4297 skip_one = true;
8421f279 4298 }
c919d059
KC
4299
4300 curr_avl = get_avl (rinsn);
4301
4302 /* Some instrucion like pred_extract_first<mode> don't reqruie avl, so
4303 the avl is null, use vl_placeholder for unify the handling
4304 logic. */
4305 if (!curr_avl)
4306 curr_avl = vl_placeholder;
4307 }
4308 else if (insn->is_call () || insn->is_asm ()
4309 || find_access (insn->defs (), VL_REGNUM)
4310 || find_access (insn->defs (), VTYPE_REGNUM)
4311 || (REG_P (prev_avl)
4312 && find_access (insn->defs (), REGNO (prev_avl))))
4313 {
4314 /* Invalidate if this insn can't propagate vl, vtype or avl. */
4315 need_invalidate = true;
4316 prev_dem = vector_insn_info ();
4317 }
4318 else
4319 /* Not interested instruction. */
4320 continue;
4321
4322 /* Local AVL compatibility checking is simpler than global, we only
4323 need to check the REGNO is same. */
20c85207 4324 if (prev_dem.valid_or_dirty_p () && prev_dem.skip_avl_compatible_p (curr_dem)
c919d059
KC
4325 && local_avl_compatible_p (prev_avl, curr_avl))
4326 {
4327 /* curr_dem and prev_dem is compatible! */
4328 /* Update avl info since we need to make sure they are fully
4329 compatible before merge. */
4330 prev_dem.set_avl_info (curr_dem.get_avl_info ());
4331 /* Merge both and update into curr_vsetvl. */
4332 prev_dem = curr_dem.merge (prev_dem, LOCAL_MERGE);
4333 change_vsetvl_insn (curr_dem.get_insn (), prev_dem);
4334 /* Then we can drop prev_vsetvl. */
4335 eliminate_insn (prev_vsetvl);
4336 }
4337
4338 if (need_invalidate)
4339 {
4340 prev_vsetvl = nullptr;
4341 curr_vsetvl = nullptr;
4342 prev_avl = vl_placeholder;
4343 curr_avl = vl_placeholder;
4344 prev_dem = vector_insn_info ();
4345 }
4346 else
4347 {
4348 prev_vsetvl = curr_vsetvl;
4349 prev_avl = curr_avl;
4350 prev_dem = curr_dem;
8421f279
JZ
4351 }
4352 }
4353}
4354
20c85207
JZ
4355/* Return the first vsetvl instruction in CFG_BB or NULL if
4356 none exists or if a user RVV instruction is enountered
4357 prior to any vsetvl. */
4358static rtx_insn *
4359get_first_vsetvl_before_rvv_insns (basic_block cfg_bb)
4360{
4361 rtx_insn *rinsn;
4362 FOR_BB_INSNS (cfg_bb, rinsn)
4363 {
4364 if (!NONDEBUG_INSN_P (rinsn))
4365 continue;
4366 /* If we don't find any inserted vsetvli before user RVV instructions,
4367 we don't need to optimize the vsetvls in this block. */
4368 if (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn))
4369 return nullptr;
4370
4371 if (vsetvl_discard_result_insn_p (rinsn))
4372 return rinsn;
4373 }
4374 return nullptr;
4375}
4376
4377/* Global user vsetvl optimizaiton:
4378
4379 Case 1:
4380 bb 1:
4381 vsetvl a5,a4,e8,mf8
4382 ...
4383 bb 2:
4384 ...
4385 vsetvl zero,a5,e8,mf8 --> Eliminate directly.
4386
4387 Case 2:
4388 bb 1:
4389 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
4390 ...
4391 bb 2:
4392 ...
4393 vsetvl zero,a5,e32,mf2 --> Eliminate directly.
4394
4395 Case 3:
4396 bb 1:
4397 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
4398 ...
4399 bb 2:
4400 ...
4401 vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2
4402 goto bb 3
4403 bb 3:
4404 ...
4405 vsetvl zero,a5,e32,mf2 --> Eliminate directly.
4406*/
4407bool
4408pass_vsetvl::global_eliminate_vsetvl_insn (const bb_info *bb) const
4409{
27612ce3 4410 rtx_insn *vsetvl_rinsn = NULL;
20c85207
JZ
4411 vector_insn_info dem = vector_insn_info ();
4412 const auto &block_info = get_block_info (bb);
4413 basic_block cfg_bb = bb->cfg_bb ();
4414
4415 if (block_info.local_dem.valid_or_dirty_p ())
4416 {
4417 /* Optimize the local vsetvl. */
4418 dem = block_info.local_dem;
4419 vsetvl_rinsn = get_first_vsetvl_before_rvv_insns (cfg_bb);
4420 }
4421 if (!vsetvl_rinsn)
4422 /* Optimize the global vsetvl inserted by LCM. */
4423 vsetvl_rinsn = get_vsetvl_at_end (bb, &dem);
4424
4425 /* No need to optimize if block doesn't have vsetvl instructions. */
4426 if (!dem.valid_or_dirty_p ()
4427 || !vsetvl_rinsn
4428 || !dem.get_avl_source ()
4429 || !dem.has_avl_reg ())
4430 return false;
4431
4432 /* Condition 1: Check it has preds. */
4433 if (EDGE_COUNT (cfg_bb->preds) == 0)
4434 return false;
4435
4436 /* If all preds has VL/VTYPE status setted by user vsetvls, and these
4437 user vsetvls are all skip_avl_compatible_p with the vsetvl in this
4438 block, we can eliminate this vsetvl instruction. */
4439 sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
4440
4441 unsigned int bb_index;
4442 sbitmap_iterator sbi;
4443 rtx avl = get_avl (dem.get_insn ()->rtl ());
4444 hash_set<set_info *> sets
4445 = get_all_sets (dem.get_avl_source (), true, false, false);
4446 /* Condition 2: All VL/VTYPE available in are all compatible. */
4447 EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
4448 {
4449 const auto &expr = m_vector_manager->vector_exprs[bb_index];
4450 const auto &insn = expr->get_insn ();
4451 def_info *def = find_access (insn->defs (), REGNO (avl));
4452 set_info *set = safe_dyn_cast<set_info *> (def);
4453 if (!vsetvl_insn_p (insn->rtl ()) || insn->bb () == bb
4454 || !sets.contains (set))
4455 return false;
4456 }
4457
4458 /* Condition 3: We don't do the global optimization for the block
4459 has a pred is entry block or exit block. */
4460 /* Condition 4: All preds have available VL/VTYPE out. */
4461 edge e;
4462 edge_iterator ei;
4463 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
4464 {
4465 sbitmap avout = m_vector_manager->vector_avout[e->src->index];
4466 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
4467 || e->src == EXIT_BLOCK_PTR_FOR_FN (cfun) || bitmap_empty_p (avout))
4468 return false;
4469
4470 EXECUTE_IF_SET_IN_BITMAP (avout, 0, bb_index, sbi)
4471 {
4472 const auto &expr = m_vector_manager->vector_exprs[bb_index];
4473 const auto &insn = expr->get_insn ();
4474 def_info *def = find_access (insn->defs (), REGNO (avl));
4475 set_info *set = safe_dyn_cast<set_info *> (def);
4476 if (!vsetvl_insn_p (insn->rtl ()) || insn->bb () == bb
4477 || !sets.contains (set) || !expr->skip_avl_compatible_p (dem))
4478 return false;
4479 }
4480 }
4481
4482 /* Step1: Reshape the VL/VTYPE status to make sure everything compatible. */
4483 hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb);
4484 FOR_EACH_EDGE (e, ei, cfg_bb->preds)
4485 {
4486 sbitmap avout = m_vector_manager->vector_avout[e->src->index];
4487 EXECUTE_IF_SET_IN_BITMAP (avout, 0, bb_index, sbi)
4488 {
4489 vector_insn_info prev_dem = *m_vector_manager->vector_exprs[bb_index];
4490 vector_insn_info curr_dem = dem;
4491 insn_info *insn = prev_dem.get_insn ();
4492 if (!pred_cfg_bbs.contains (insn->bb ()->cfg_bb ()))
4493 continue;
4494 /* Update avl info since we need to make sure they are fully
4495 compatible before merge. */
4496 curr_dem.set_avl_info (prev_dem.get_avl_info ());
4497 /* Merge both and update into curr_vsetvl. */
4498 prev_dem = curr_dem.merge (prev_dem, LOCAL_MERGE);
4499 change_vsetvl_insn (insn, prev_dem);
4500 }
4501 }
4502
4503 /* Step2: eliminate the vsetvl instruction. */
4504 eliminate_insn (vsetvl_rinsn);
4505 return true;
4506}
4507
4508/* This function does the following post optimization base on RTL_SSA:
4509
4510 1. Local user vsetvl optimizations.
4511 2. Global user vsetvl optimizations.
4512 3. AVL dependencies removal:
4513 Before VSETVL PASS, RVV instructions pattern is depending on AVL operand
4514 implicitly. Since we will emit VSETVL instruction and make RVV
4515 instructions depending on VL/VTYPE global status registers, we remove the
4516 such AVL operand in the RVV instructions pattern here in order to remove
4517 AVL dependencies when AVL operand is a register operand.
4518
4519 Before the VSETVL PASS:
4520 li a5,32
4521 ...
4522 vadd.vv (..., a5)
4523 After the VSETVL PASS:
4524 li a5,32
4525 vsetvli zero, a5, ...
4526 ...
4527 vadd.vv (..., const_int 0). */
9243c3d1 4528void
20c85207 4529pass_vsetvl::ssa_post_optimization (void) const
9243c3d1
JZZ
4530{
4531 for (const bb_info *bb : crtl->ssa->bbs ())
4532 {
c919d059 4533 local_eliminate_vsetvl_insn (bb);
20c85207
JZ
4534 bool changed_p = true;
4535 while (changed_p)
4536 {
4537 changed_p = false;
4538 changed_p |= global_eliminate_vsetvl_insn (bb);
4539 }
9243c3d1
JZZ
4540 for (insn_info *insn : bb->real_nondebug_insns ())
4541 {
4542 rtx_insn *rinsn = insn->rtl ();
9243c3d1
JZZ
4543 if (vlmax_avl_insn_p (rinsn))
4544 {
4545 eliminate_insn (rinsn);
4546 continue;
4547 }
4548
4549 /* Erase the AVL operand from the instruction. */
4550 if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn)))
4551 continue;
4552 rtx avl = get_vl (rinsn);
a2d12abe 4553 if (count_regno_occurrences (rinsn, REGNO (avl)) == 1)
9243c3d1
JZZ
4554 {
4555 /* Get the list of uses for the new instruction. */
4556 auto attempt = crtl->ssa->new_change_attempt ();
4557 insn_change change (insn);
4558 /* Remove the use of the substituted value. */
4559 access_array_builder uses_builder (attempt);
4560 uses_builder.reserve (insn->num_uses () - 1);
4561 for (use_info *use : insn->uses ())
4562 if (use != find_access (insn->uses (), REGNO (avl)))
4563 uses_builder.quick_push (use);
4564 use_array new_uses = use_array (uses_builder.finish ());
4565 change.new_uses = new_uses;
4566 change.move_range = insn->ebb ()->insn_range ();
60bd33bc
JZZ
4567 rtx pat;
4568 if (fault_first_load_p (rinsn))
4569 pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
4570 else
4571 {
4572 rtx set = single_set (rinsn);
4573 rtx src
4574 = simplify_replace_rtx (SET_SRC (set), avl, const0_rtx);
4575 pat = gen_rtx_SET (SET_DEST (set), src);
4576 }
1d339ce8
KC
4577 bool ok = change_insn (crtl->ssa, change, insn, pat);
4578 gcc_assert (ok);
9243c3d1
JZZ
4579 }
4580 }
4581 }
4582}
4583
20c85207
JZ
4584/* Return true if the SET result is not used by any instructions. */
4585static bool
4586has_no_uses (basic_block cfg_bb, rtx_insn *rinsn, int regno)
4587{
4588 /* Handle the following case that can not be detected in RTL_SSA. */
4589 /* E.g.
4590 li a5, 100
4591 vsetvli a6, a5...
4592 ...
4593 vadd (use a6)
4594
4595 The use of "a6" is removed from "vadd" but the information is
4596 not updated in RTL_SSA framework. We don't want to re-new
4597 a new RTL_SSA which is expensive, instead, we use data-flow
4598 analysis to check whether "a6" has no uses. */
4599 if (bitmap_bit_p (df_get_live_out (cfg_bb), regno))
4600 return false;
4601
4602 rtx_insn *iter;
4603 for (iter = NEXT_INSN (rinsn); iter && iter != NEXT_INSN (BB_END (cfg_bb));
4604 iter = NEXT_INSN (iter))
4605 if (df_find_use (iter, regno_reg_rtx[regno]))
4606 return false;
4607
4608 return true;
4609}
4610
4611/* This function does the following post optimization base on dataflow
4612 analysis:
4613
4614 1. Change vsetvl rd, rs1 --> vsevl zero, rs1, if rd is not used by any
4615 nondebug instructions. Even though this PASS runs after RA and it doesn't
4616 help for reduce register pressure, it can help instructions scheduling since
4617 we remove the dependencies.
4618
4619 2. Remove redundant user vsetvls base on outcome of Phase 4 (LCM) && Phase 5
4620 (AVL dependencies removal). */
6b6b9c68 4621void
20c85207
JZ
4622pass_vsetvl::df_post_optimization (void) const
4623{
6b6b9c68 4624 df_analyze ();
6b6b9c68 4625 hash_set<rtx_insn *> to_delete;
20c85207
JZ
4626 basic_block cfg_bb;
4627 rtx_insn *rinsn;
4628 FOR_ALL_BB_FN (cfg_bb, cfun)
6b6b9c68 4629 {
20c85207 4630 FOR_BB_INSNS (cfg_bb, rinsn)
6b6b9c68 4631 {
20c85207 4632 if (NONDEBUG_INSN_P (rinsn) && vsetvl_insn_p (rinsn))
6b6b9c68 4633 {
20c85207 4634 rtx vl = get_vl (rinsn);
ec99ffab 4635 vector_insn_info info;
20c85207
JZ
4636 info.parse_insn (rinsn);
4637 bool to_delete_p = m_vector_manager->to_delete_p (rinsn);
4638 bool to_refine_p = m_vector_manager->to_refine_p (rinsn);
4639 if (has_no_uses (cfg_bb, rinsn, REGNO (vl)))
ec99ffab 4640 {
20c85207
JZ
4641 if (to_delete_p)
4642 to_delete.add (rinsn);
4643 else if (to_refine_p)
ec99ffab
JZZ
4644 {
4645 rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY,
4646 info, NULL_RTX);
20c85207
JZ
4647 validate_change (rinsn, &PATTERN (rinsn), new_pat, false);
4648 }
4649 else if (!vlmax_avl_p (info.get_avl ()))
4650 {
4651 rtx new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info,
4652 NULL_RTX);
4653 validate_change (rinsn, &PATTERN (rinsn), new_pat, false);
ec99ffab 4654 }
6b6b9c68
JZZ
4655 }
4656 }
4657 }
4658 }
6b6b9c68
JZZ
4659 for (rtx_insn *rinsn : to_delete)
4660 eliminate_insn (rinsn);
4661}
4662
9243c3d1
JZZ
4663void
4664pass_vsetvl::init (void)
4665{
4666 if (optimize > 0)
4667 {
4668 /* Initialization of RTL_SSA. */
4669 calculate_dominance_info (CDI_DOMINATORS);
4670 df_analyze ();
4671 crtl->ssa = new function_info (cfun);
4672 }
4673
4674 m_vector_manager = new vector_infos_manager ();
4f673c5e 4675 compute_probabilities ();
9243c3d1
JZZ
4676
4677 if (dump_file)
4678 {
4679 fprintf (dump_file, "\nPrologue: Initialize vector infos\n");
4680 m_vector_manager->dump (dump_file);
4681 }
4682}
4683
4684void
4685pass_vsetvl::done (void)
4686{
4687 if (optimize > 0)
4688 {
4689 /* Finalization of RTL_SSA. */
4690 free_dominance_info (CDI_DOMINATORS);
4691 if (crtl->ssa->perform_pending_updates ())
4692 cleanup_cfg (0);
4693 delete crtl->ssa;
4694 crtl->ssa = nullptr;
4695 }
4696 m_vector_manager->release ();
4697 delete m_vector_manager;
4698 m_vector_manager = nullptr;
4699}
4700
acc10c79
JZZ
4701/* Compute probability for each block. */
4702void
4703pass_vsetvl::compute_probabilities (void)
4704{
4705 /* Don't compute it in -O0 since we don't need it. */
4706 if (!optimize)
4707 return;
4708 edge e;
4709 edge_iterator ei;
4710
4711 for (const bb_info *bb : crtl->ssa->bbs ())
4712 {
4713 basic_block cfg_bb = bb->cfg_bb ();
4714 auto &curr_prob
c139f5e1 4715 = get_block_info(cfg_bb).probability;
c129d22d
JZZ
4716
4717 /* GCC assume entry block (bb 0) are always so
4718 executed so set its probability as "always". */
acc10c79
JZZ
4719 if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
4720 curr_prob = profile_probability::always ();
c129d22d
JZZ
4721 /* Exit block (bb 1) is the block we don't need to process. */
4722 if (EXIT_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
4723 continue;
4724
acc10c79
JZZ
4725 gcc_assert (curr_prob.initialized_p ());
4726 FOR_EACH_EDGE (e, ei, cfg_bb->succs)
4727 {
4728 auto &new_prob
c139f5e1 4729 = get_block_info(e->dest).probability;
acc10c79
JZZ
4730 if (!new_prob.initialized_p ())
4731 new_prob = curr_prob * e->probability;
4732 else if (new_prob == profile_probability::always ())
4733 continue;
4734 else
4735 new_prob += curr_prob * e->probability;
4736 }
4737 }
acc10c79
JZZ
4738}
4739
9243c3d1
JZZ
4740/* Lazy vsetvl insertion for optimize > 0. */
4741void
4742pass_vsetvl::lazy_vsetvl (void)
4743{
4744 if (dump_file)
4745 fprintf (dump_file,
4746 "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for "
4747 "function:%s\n",
4748 n_basic_blocks_for_fn (cfun), function_name (cfun));
4749
4750 /* Phase 1 - Compute the local dems within each block.
4751 The data-flow analysis within each block is backward analysis. */
4752 if (dump_file)
4753 fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n");
4754 for (const bb_info *bb : crtl->ssa->bbs ())
4755 compute_local_backward_infos (bb);
4756 if (dump_file)
4757 m_vector_manager->dump (dump_file);
4758
4759 /* Phase 2 - Emit vsetvl instructions within each basic block according to
4760 demand, compute and save ANTLOC && AVLOC of each block. */
4761 if (dump_file)
4762 fprintf (dump_file,
4763 "\nPhase 2: Emit vsetvl instruction within each block\n");
4764 for (const bb_info *bb : crtl->ssa->bbs ())
4765 emit_local_forward_vsetvls (bb);
4766 if (dump_file)
4767 m_vector_manager->dump (dump_file);
4768
4769 /* Phase 3 - Propagate demanded info across blocks. */
4770 if (dump_file)
4771 fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n");
387cd9d3 4772 demand_fusion ();
9243c3d1
JZZ
4773 if (dump_file)
4774 m_vector_manager->dump (dump_file);
4775
4776 /* Phase 4 - Lazy code motion. */
4777 if (dump_file)
4778 fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n");
4779 pre_vsetvl ();
4780
20c85207 4781 /* Phase 5 - Post optimization base on RTL_SSA. */
9243c3d1 4782 if (dump_file)
20c85207
JZ
4783 fprintf (dump_file, "\nPhase 5: Post optimization base on RTL_SSA\n");
4784 ssa_post_optimization ();
6b6b9c68 4785
20c85207 4786 /* Phase 6 - Post optimization base on data-flow analysis. */
6b6b9c68
JZZ
4787 if (dump_file)
4788 fprintf (dump_file,
20c85207
JZ
4789 "\nPhase 6: Post optimization base on data-flow analysis\n");
4790 df_post_optimization ();
9243c3d1
JZZ
4791}
4792
4793/* Main entry point for this pass. */
4794unsigned int
4795pass_vsetvl::execute (function *)
4796{
4797 if (n_basic_blocks_for_fn (cfun) <= 0)
4798 return 0;
4799
ca8fb009
JZZ
4800 /* The RVV instruction may change after split which is not a stable
4801 instruction. We need to split it here to avoid potential issue
4802 since the VSETVL PASS is insert before split PASS. */
4803 split_all_insns ();
9243c3d1
JZZ
4804
4805 /* Early return for there is no vector instructions. */
4806 if (!has_vector_insn (cfun))
4807 return 0;
4808
4809 init ();
4810
4811 if (!optimize)
4812 simple_vsetvl ();
4813 else
4814 lazy_vsetvl ();
4815
4816 done ();
4817 return 0;
4818}
4819
4820rtl_opt_pass *
4821make_pass_vsetvl (gcc::context *ctxt)
4822{
4823 return new pass_vsetvl (ctxt);
4824}