]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/riscv/riscv-vector-costs.cc
RISC-V: Fix unexpected big LMUL choosing in dynamic LMUL model for non-adjacent load...
[thirdparty/gcc.git] / gcc / config / riscv / riscv-vector-costs.cc
1 /* Cost model implementation for RISC-V 'V' Extension for GNU compiler.
2 Copyright (C) 2023-2023 Free Software Foundation, Inc.
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #define INCLUDE_STRING
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "target.h"
29 #include "function.h"
30 #include "tree.h"
31 #include "basic-block.h"
32 #include "rtl.h"
33 #include "gimple.h"
34 #include "targhooks.h"
35 #include "cfgloop.h"
36 #include "fold-const.h"
37 #include "tm_p.h"
38 #include "tree-vectorizer.h"
39 #include "gimple-iterator.h"
40 #include "bitmap.h"
41 #include "ssa.h"
42 #include "backend.h"
43 #include "tree-data-ref.h"
44
45 /* This file should be included last. */
46 #include "riscv-vector-costs.h"
47
48 namespace riscv_vector {
49
50 /* Dynamic LMUL philosophy - Local linear-scan SSA live range based analysis
51 determine LMUL
52
53 - Collect all vectorize STMTs locally for each loop block.
54 - Build program point based graph, ignore non-vectorize STMTs:
55
56 vectorize STMT 0 - point 0
57 scalar STMT 0 - ignore.
58 vectorize STMT 1 - point 1
59 ...
60 - Compute the number of live V_REGs live at each program point
61 - Determine LMUL in VECTOR COST model according to the program point
62 which has maximum live V_REGs.
63
64 Note:
65
66 - BIGGEST_MODE is the biggest LMUL auto-vectorization element mode.
67 It's important for mixed size auto-vectorization (Conversions, ... etc).
68 E.g. For a loop that is vectorizing conversion of INT32 -> INT64.
69 The biggest mode is DImode and LMUL = 8, LMUL = 4 for SImode.
70 We compute the number live V_REGs at each program point according to
71 this information.
72 - We only compute program points and live ranges locally (within a block)
73 since we just need to compute the number of live V_REGs at each program
74 point and we are not really allocating the registers for each SSA.
75 We can make the variable has another local live range in another block
76 if it live out/live in to another block. Such approach doesn't affect
77 out accurate live range analysis.
78 - Current analysis didn't consider any instruction scheduling which
79 may improve the register pressure. So we are conservatively doing the
80 analysis which may end up with smaller LMUL.
81 TODO: Maybe we could support a reasonable live range shrink algorithm
82 which take advantage of instruction scheduling.
83 - We may have these following possible autovec modes analysis:
84
85 1. M8 -> M4 -> M2 -> M1 (stop analysis here) -> MF2 -> MF4 -> MF8
86 2. M8 -> M1(M4) -> MF2(M2) -> MF4(M1) (stop analysis here) -> MF8(MF2)
87 3. M1(M8) -> MF2(M4) -> MF4(M2) -> MF8(M1)
88 */
89 static hash_map<class loop *, autovec_info> loop_autovec_infos;
90
91 /* Collect all STMTs that are vectorized and compute their program points.
92 Note that we don't care about the STMTs that are not vectorized and
93 we only build the local graph (within a block) of program points.
94
95 Loop:
96 bb 2:
97 STMT 1 (be vectorized) -- point 0
98 STMT 2 (not be vectorized) -- ignored
99 STMT 3 (be vectorized) -- point 1
100 STMT 4 (be vectorized) -- point 2
101 STMT 5 (be vectorized) -- point 3
102 ...
103 bb 3:
104 STMT 1 (be vectorized) -- point 0
105 STMT 2 (be vectorized) -- point 1
106 STMT 3 (not be vectorized) -- ignored
107 STMT 4 (not be vectorized) -- ignored
108 STMT 5 (be vectorized) -- point 2
109 ...
110 */
111 static void
112 compute_local_program_points (
113 vec_info *vinfo,
114 hash_map<basic_block, vec<stmt_point>> &program_points_per_bb)
115 {
116 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
117 {
118 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
119 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
120 unsigned int nbbs = loop->num_nodes;
121 gimple_stmt_iterator si;
122 unsigned int i;
123 /* Collect the stmts that is vectorized and mark their program point. */
124 for (i = 0; i < nbbs; i++)
125 {
126 int point = 0;
127 basic_block bb = bbs[i];
128 vec<stmt_point> program_points = vNULL;
129 if (dump_enabled_p ())
130 dump_printf_loc (MSG_NOTE, vect_location,
131 "Compute local program points for bb %d:\n",
132 bb->index);
133 for (si = gsi_start_bb (bbs[i]); !gsi_end_p (si); gsi_next (&si))
134 {
135 if (!(is_gimple_assign (gsi_stmt (si))
136 || is_gimple_call (gsi_stmt (si))))
137 continue;
138 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
139 enum stmt_vec_info_type type
140 = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
141 if (type != undef_vec_info_type)
142 {
143 stmt_point info = {point, gsi_stmt (si)};
144 program_points.safe_push (info);
145 point++;
146 if (dump_enabled_p ())
147 dump_printf_loc (MSG_NOTE, vect_location,
148 "program point %d: %G", info.point,
149 gsi_stmt (si));
150 }
151 }
152 program_points_per_bb.put (bb, program_points);
153 }
154 }
155 }
156
157 /* Compute local live ranges of each vectorized variable.
158 Note that we only compute local live ranges (within a block) since
159 local live ranges information is accurate enough for us to determine
160 the LMUL/vectorization factor of the loop.
161
162 Loop:
163 bb 2:
164 STMT 1 -- point 0
165 STMT 2 (def SSA 1) -- point 1
166 STMT 3 (use SSA 1) -- point 2
167 STMT 4 -- point 3
168 bb 3:
169 STMT 1 -- point 0
170 STMT 2 -- point 1
171 STMT 3 -- point 2
172 STMT 4 (use SSA 2) -- point 3
173
174 The live range of SSA 1 is [1, 3] in bb 2.
175 The live range of SSA 2 is [0, 4] in bb 3. */
176 static machine_mode
177 compute_local_live_ranges (
178 const hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
179 hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb)
180 {
181 machine_mode biggest_mode = QImode;
182 if (!program_points_per_bb.is_empty ())
183 {
184 auto_vec<tree> visited_vars;
185 unsigned int i;
186 for (hash_map<basic_block, vec<stmt_point>>::iterator iter
187 = program_points_per_bb.begin ();
188 iter != program_points_per_bb.end (); ++iter)
189 {
190 basic_block bb = (*iter).first;
191 vec<stmt_point> program_points = (*iter).second;
192 bool existed_p = false;
193 hash_map<tree, pair> *live_ranges
194 = &live_ranges_per_bb.get_or_insert (bb, &existed_p);
195 gcc_assert (!existed_p);
196 if (dump_enabled_p ())
197 dump_printf_loc (MSG_NOTE, vect_location,
198 "Compute local live ranges for bb %d:\n",
199 bb->index);
200 for (const auto program_point : program_points)
201 {
202 unsigned int point = program_point.point;
203 gimple *stmt = program_point.stmt;
204 machine_mode mode = biggest_mode;
205 tree lhs = gimple_get_lhs (stmt);
206 if (lhs != NULL_TREE && is_gimple_reg (lhs)
207 && !POINTER_TYPE_P (TREE_TYPE (lhs)))
208 {
209 mode = TYPE_MODE (TREE_TYPE (lhs));
210 bool existed_p = false;
211 pair &live_range
212 = live_ranges->get_or_insert (lhs, &existed_p);
213 gcc_assert (!existed_p);
214 live_range = pair (point, point);
215 }
216 for (i = 0; i < gimple_num_args (stmt); i++)
217 {
218 tree var = gimple_arg (stmt, i);
219 /* Both IMM and REG are included since a VECTOR_CST may be
220 potentially held in a vector register. However, it's not
221 accurate, since a PLUS_EXPR can be vectorized into vadd.vi
222 if IMM is -16 ~ 15.
223
224 TODO: We may elide the cases that the unnecessary IMM in
225 the future. */
226 if (is_gimple_val (var) && !POINTER_TYPE_P (TREE_TYPE (var)))
227 {
228 mode = TYPE_MODE (TREE_TYPE (var));
229 bool existed_p = false;
230 pair &live_range
231 = live_ranges->get_or_insert (var, &existed_p);
232 if (existed_p)
233 /* We will grow the live range for each use. */
234 live_range = pair (live_range.first, point);
235 else
236 /* We assume the variable is live from the start of
237 this block. */
238 live_range = pair (0, point);
239 }
240 }
241 if (GET_MODE_SIZE (mode).to_constant ()
242 > GET_MODE_SIZE (biggest_mode).to_constant ())
243 biggest_mode = mode;
244 }
245 if (dump_enabled_p ())
246 for (hash_map<tree, pair>::iterator iter = live_ranges->begin ();
247 iter != live_ranges->end (); ++iter)
248 dump_printf_loc (MSG_NOTE, vect_location,
249 "%T: type = %T, start = %d, end = %d\n",
250 (*iter).first, TREE_TYPE ((*iter).first),
251 (*iter).second.first, (*iter).second.second);
252 }
253 }
254 if (dump_enabled_p ())
255 dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n",
256 GET_MODE_NAME (biggest_mode));
257 return biggest_mode;
258 }
259
260 /* Compute the mode for MODE, BIGGEST_MODE and LMUL.
261
262 E.g. If mode = SImode, biggest_mode = DImode, LMUL = M4.
263 Then return RVVM4SImode (LMUL = 4, element mode = SImode). */
264 static unsigned int
265 compute_nregs_for_mode (machine_mode mode, machine_mode biggest_mode, int lmul)
266 {
267 unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
268 unsigned int biggest_size = GET_MODE_SIZE (biggest_mode).to_constant ();
269 gcc_assert (biggest_size >= mode_size);
270 unsigned int ratio = biggest_size / mode_size;
271 return lmul / ratio;
272 }
273
274 /* This function helps to determine whether current LMUL will cause
275 potential vector register (V_REG) spillings according to live range
276 information.
277
278 - First, compute how many variable are alive of each program point
279 in each bb of the loop.
280 - Second, compute how many V_REGs are alive of each program point
281 in each bb of the loop according the BIGGEST_MODE and the variable
282 mode.
283 - Third, Return the maximum V_REGs are alive of the loop. */
284 static unsigned int
285 max_number_of_live_regs (const basic_block bb,
286 const hash_map<tree, pair> &live_ranges,
287 unsigned int max_point, machine_mode biggest_mode,
288 int lmul)
289 {
290 unsigned int max_nregs = 0;
291 unsigned int i;
292 unsigned int live_point = 0;
293 auto_vec<unsigned int> live_vars_vec;
294 live_vars_vec.safe_grow_cleared (max_point + 1, true);
295 for (hash_map<tree, pair>::iterator iter = live_ranges.begin ();
296 iter != live_ranges.end (); ++iter)
297 {
298 tree var = (*iter).first;
299 pair live_range = (*iter).second;
300 for (i = live_range.first; i <= live_range.second; i++)
301 {
302 machine_mode mode = TYPE_MODE (TREE_TYPE (var));
303 unsigned int nregs
304 = compute_nregs_for_mode (mode, biggest_mode, lmul);
305 live_vars_vec[i] += nregs;
306 if (live_vars_vec[i] > max_nregs)
307 max_nregs = live_vars_vec[i];
308 }
309 }
310
311 /* Collect user explicit RVV type. */
312 auto_vec<basic_block> all_preds
313 = get_all_dominated_blocks (CDI_POST_DOMINATORS, bb);
314 tree t;
315 FOR_EACH_SSA_NAME (i, t, cfun)
316 {
317 machine_mode mode = TYPE_MODE (TREE_TYPE (t));
318 if (!lookup_vector_type_attribute (TREE_TYPE (t))
319 && !riscv_v_ext_vls_mode_p (mode))
320 continue;
321
322 gimple *def = SSA_NAME_DEF_STMT (t);
323 if (gimple_bb (def) && !all_preds.contains (gimple_bb (def)))
324 continue;
325 use_operand_p use_p;
326 imm_use_iterator iterator;
327
328 FOR_EACH_IMM_USE_FAST (use_p, iterator, t)
329 {
330 if (!USE_STMT (use_p) || is_gimple_debug (USE_STMT (use_p))
331 || !dominated_by_p (CDI_POST_DOMINATORS, bb,
332 gimple_bb (USE_STMT (use_p))))
333 continue;
334
335 int regno_alignment = riscv_get_v_regno_alignment (mode);
336 max_nregs += regno_alignment;
337 if (dump_enabled_p ())
338 dump_printf_loc (
339 MSG_NOTE, vect_location,
340 "Explicit used SSA %T, vectype = %T, mode = %s, cause %d "
341 "V_REG live in bb %d at program point %d\n",
342 t, TREE_TYPE (t), GET_MODE_NAME (mode), regno_alignment,
343 bb->index, live_point);
344 break;
345 }
346 }
347
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "Maximum lmul = %d, %d number of live V_REG at program "
351 "point %d for bb %d\n",
352 lmul, max_nregs, live_point, bb->index);
353 return max_nregs;
354 }
355
356 /* Return the LMUL of the current analysis. */
357 static int
358 get_current_lmul (class loop *loop)
359 {
360 return loop_autovec_infos.get (loop)->current_lmul;
361 }
362
363 /* Get STORE value. */
364 static tree
365 get_store_value (gimple *stmt)
366 {
367 if (is_gimple_call (stmt) && gimple_call_internal_p (stmt))
368 {
369 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
370 return gimple_call_arg (stmt, 3);
371 else
372 gcc_unreachable ();
373 }
374 else
375 return gimple_assign_rhs1 (stmt);
376 }
377
378 /* Return true if it is non-contiguous load/store. */
379 static bool
380 non_contiguous_memory_access_p (stmt_vec_info stmt_info)
381 {
382 enum stmt_vec_info_type type
383 = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
384 return ((type == load_vec_info_type || type == store_vec_info_type)
385 && !adjacent_dr_p (STMT_VINFO_DATA_REF (stmt_info)));
386 }
387
388 /* Update the live ranges according PHI.
389
390 Loop:
391 bb 2:
392 STMT 1 -- point 0
393 STMT 2 (def SSA 1) -- point 1
394 STMT 3 (use SSA 1) -- point 2
395 STMT 4 -- point 3
396 bb 3:
397 SSA 2 = PHI<SSA 1>
398 STMT 1 -- point 0
399 STMT 2 -- point 1
400 STMT 3 (use SSA 2) -- point 2
401 STMT 4 -- point 3
402
403 Before this function, the SSA 1 live range is [2, 3] in bb 2
404 and SSA 2 is [0, 3] in bb 3.
405
406 Then, after this function, we update SSA 1 live range in bb 2
407 into [2, 4] since SSA 1 is live out into bb 3. */
408 static void
409 update_local_live_ranges (
410 vec_info *vinfo,
411 hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
412 hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb)
413 {
414 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
415 if (!loop_vinfo)
416 return;
417
418 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
419 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
420 unsigned int nbbs = loop->num_nodes;
421 unsigned int i, j;
422 gphi_iterator psi;
423 gimple_stmt_iterator si;
424 for (i = 0; i < nbbs; i++)
425 {
426 basic_block bb = bbs[i];
427 if (dump_enabled_p ())
428 dump_printf_loc (MSG_NOTE, vect_location,
429 "Update local program points for bb %d:\n",
430 bbs[i]->index);
431 for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
432 {
433 gphi *phi = psi.phi ();
434 stmt_vec_info stmt_info = vinfo->lookup_stmt (phi);
435 if (STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info))
436 == undef_vec_info_type)
437 continue;
438
439 for (j = 0; j < gimple_phi_num_args (phi); j++)
440 {
441 edge e = gimple_phi_arg_edge (phi, j);
442 tree def = gimple_phi_arg_def (phi, j);
443 auto *live_ranges = live_ranges_per_bb.get (bb);
444 auto *live_range = live_ranges->get (def);
445 if (live_range && flow_bb_inside_loop_p (loop, e->src))
446 {
447 unsigned int start = (*live_range).first;
448 (*live_range).first = 0;
449 if (dump_enabled_p ())
450 dump_printf_loc (MSG_NOTE, vect_location,
451 "Update %T start point from %d to %d:\n",
452 def, start, (*live_range).first);
453 }
454 live_ranges = live_ranges_per_bb.get (e->src);
455 if (!program_points_per_bb.get (e->src))
456 continue;
457 unsigned int max_point
458 = (*program_points_per_bb.get (e->src)).length () - 1;
459 live_range = live_ranges->get (def);
460 if (!live_range)
461 continue;
462
463 unsigned int end = (*live_range).second;
464 (*live_range).second = max_point;
465 if (dump_enabled_p ())
466 dump_printf_loc (MSG_NOTE, vect_location,
467 "Update %T end point from %d to %d:\n", def,
468 end, (*live_range).second);
469 }
470 }
471 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
472 {
473 if (!(is_gimple_assign (gsi_stmt (si))
474 || is_gimple_call (gsi_stmt (si))))
475 continue;
476 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
477 enum stmt_vec_info_type type
478 = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
479 if (non_contiguous_memory_access_p (stmt_info))
480 {
481 /* For non-adjacent load/store STMT, we will potentially
482 convert it into:
483
484 1. MASK_LEN_GATHER_LOAD (..., perm indice).
485 2. Continguous load/store + VEC_PERM (..., perm indice)
486
487 We will be likely using one more vector variable. */
488 unsigned int max_point
489 = (*program_points_per_bb.get (bb)).length () - 1;
490 auto *live_ranges = live_ranges_per_bb.get (bb);
491 bool existed_p = false;
492 tree var = type == load_vec_info_type
493 ? gimple_get_lhs (gsi_stmt (si))
494 : get_store_value (gsi_stmt (si));
495 tree sel_type = build_nonstandard_integer_type (
496 TYPE_PRECISION (TREE_TYPE (var)), 1);
497 tree sel = build_decl (UNKNOWN_LOCATION, VAR_DECL,
498 get_identifier ("vect_perm"), sel_type);
499 pair &live_range = live_ranges->get_or_insert (sel, &existed_p);
500 gcc_assert (!existed_p);
501 live_range = pair (0, max_point);
502 if (dump_enabled_p ())
503 dump_printf_loc (MSG_NOTE, vect_location,
504 "Add perm indice %T, start = 0, end = %d\n",
505 sel, max_point);
506 }
507 }
508 }
509 }
510
511 costs::costs (vec_info *vinfo, bool costing_for_scalar)
512 : vector_costs (vinfo, costing_for_scalar)
513 {}
514
515 /* Return true that the LMUL of new COST model is preferred. */
516 bool
517 costs::preferred_new_lmul_p (const vector_costs *uncast_other) const
518 {
519 auto other = static_cast<const costs *> (uncast_other);
520 auto this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
521 auto other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
522 class loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
523
524 if (loop_autovec_infos.get (loop) && loop_autovec_infos.get (loop)->end_p)
525 return false;
526 else if (loop_autovec_infos.get (loop))
527 loop_autovec_infos.get (loop)->current_lmul
528 = loop_autovec_infos.get (loop)->current_lmul / 2;
529 else
530 {
531 int regno_alignment
532 = riscv_get_v_regno_alignment (other_loop_vinfo->vector_mode);
533 if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (other_loop_vinfo), 1U))
534 regno_alignment = RVV_M8;
535 loop_autovec_infos.put (loop, {regno_alignment, regno_alignment, false});
536 }
537
538 int lmul = get_current_lmul (loop);
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "Comparing two main loops (%s at VF %d vs %s at VF %d)\n",
542 GET_MODE_NAME (this_loop_vinfo->vector_mode),
543 vect_vf_for_cost (this_loop_vinfo),
544 GET_MODE_NAME (other_loop_vinfo->vector_mode),
545 vect_vf_for_cost (other_loop_vinfo));
546
547 /* Compute local program points.
548 It's a fast and effective computation. */
549 hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
550 compute_local_program_points (other->m_vinfo, program_points_per_bb);
551
552 /* Compute local live ranges. */
553 hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
554 machine_mode biggest_mode
555 = compute_local_live_ranges (program_points_per_bb, live_ranges_per_bb);
556
557 /* If we can use simple VLS modes to handle NITERS element.
558 We don't need to use VLA modes with partial vector auto-vectorization. */
559 if (LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo)
560 && known_le (tree_to_poly_int64 (LOOP_VINFO_NITERS (this_loop_vinfo))
561 * GET_MODE_SIZE (biggest_mode).to_constant (),
562 (int) RVV_M8 * BYTES_PER_RISCV_VECTOR)
563 && pow2p_hwi (LOOP_VINFO_INT_NITERS (this_loop_vinfo)))
564 return vector_costs::better_main_loop_than_p (other);
565
566 /* Update live ranges according to PHI. */
567 update_local_live_ranges (other->m_vinfo, program_points_per_bb,
568 live_ranges_per_bb);
569
570 /* TODO: We calculate the maximum live vars base on current STMTS
571 sequence. We can support live range shrink if it can give us
572 big improvement in the future. */
573 if (!live_ranges_per_bb.is_empty ())
574 {
575 unsigned int max_nregs = 0;
576 for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter
577 = live_ranges_per_bb.begin ();
578 iter != live_ranges_per_bb.end (); ++iter)
579 {
580 basic_block bb = (*iter).first;
581 unsigned int max_point
582 = (*program_points_per_bb.get (bb)).length () - 1;
583 if ((*iter).second.is_empty ())
584 continue;
585 /* We prefer larger LMUL unless it causes register spillings. */
586 unsigned int nregs
587 = max_number_of_live_regs (bb, (*iter).second, max_point,
588 biggest_mode, lmul);
589 if (nregs > max_nregs)
590 max_nregs = nregs;
591 live_ranges_per_bb.empty ();
592 }
593 live_ranges_per_bb.empty ();
594 if (loop_autovec_infos.get (loop)->current_lmul == RVV_M1
595 || max_nregs <= V_REG_NUM)
596 loop_autovec_infos.get (loop)->end_p = true;
597 if (loop_autovec_infos.get (loop)->current_lmul > RVV_M1)
598 return max_nregs > V_REG_NUM;
599 return false;
600 }
601 if (!program_points_per_bb.is_empty ())
602 {
603 for (hash_map<basic_block, vec<stmt_point>>::iterator iter
604 = program_points_per_bb.begin ();
605 iter != program_points_per_bb.end (); ++iter)
606 {
607 vec<stmt_point> program_points = (*iter).second;
608 if (!program_points.is_empty ())
609 program_points.release ();
610 }
611 program_points_per_bb.empty ();
612 }
613 return lmul > RVV_M1;
614 }
615
616 bool
617 costs::better_main_loop_than_p (const vector_costs *uncast_other) const
618 {
619 auto other = static_cast<const costs *> (uncast_other);
620
621 if (!flag_vect_cost_model)
622 return vector_costs::better_main_loop_than_p (other);
623
624 if (riscv_autovec_lmul == RVV_DYNAMIC)
625 {
626 bool post_dom_available_p = dom_info_available_p (CDI_POST_DOMINATORS);
627 if (!post_dom_available_p)
628 calculate_dominance_info (CDI_POST_DOMINATORS);
629 bool preferred_p = preferred_new_lmul_p (uncast_other);
630 if (!post_dom_available_p)
631 free_dominance_info (CDI_POST_DOMINATORS);
632 return preferred_p;
633 }
634
635 return vector_costs::better_main_loop_than_p (other);
636 }
637
638 unsigned
639 costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
640 stmt_vec_info stmt_info, slp_tree, tree vectype,
641 int misalign, vect_cost_model_location where)
642 {
643 /* TODO: Use default STMT cost model.
644 We will support more accurate STMT cost model later. */
645 int stmt_cost = default_builtin_vectorization_cost (kind, vectype, misalign);
646 return record_stmt_cost (stmt_info, where, count * stmt_cost);
647 }
648
649 void
650 costs::finish_cost (const vector_costs *scalar_costs)
651 {
652 vector_costs::finish_cost (scalar_costs);
653 }
654
655 } // namespace riscv_vector