]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/riscv/riscv-vector-costs.cc
RISC-V: Fix dynamic LMUL cost model ICE
[thirdparty/gcc.git] / gcc / config / riscv / riscv-vector-costs.cc
1 /* Cost model implementation for RISC-V 'V' Extension for GNU compiler.
2 Copyright (C) 2023-2023 Free Software Foundation, Inc.
3 Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #define INCLUDE_STRING
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "target.h"
29 #include "function.h"
30 #include "tree.h"
31 #include "basic-block.h"
32 #include "rtl.h"
33 #include "gimple.h"
34 #include "targhooks.h"
35 #include "cfgloop.h"
36 #include "fold-const.h"
37 #include "tm_p.h"
38 #include "tree-vectorizer.h"
39 #include "gimple-iterator.h"
40 #include "bitmap.h"
41 #include "ssa.h"
42 #include "backend.h"
43 #include "tree-data-ref.h"
44
45 /* This file should be included last. */
46 #include "riscv-vector-costs.h"
47
48 namespace riscv_vector {
49
50 /* Dynamic LMUL philosophy - Local linear-scan SSA live range based analysis
51 determine LMUL
52
53 - Collect all vectorize STMTs locally for each loop block.
54 - Build program point based graph, ignore non-vectorize STMTs:
55
56 vectorize STMT 0 - point 0
57 scalar STMT 0 - ignore.
58 vectorize STMT 1 - point 1
59 ...
60 - Compute the number of live V_REGs live at each program point
61 - Determine LMUL in VECTOR COST model according to the program point
62 which has maximum live V_REGs.
63
64 Note:
65
66 - BIGGEST_MODE is the biggest LMUL auto-vectorization element mode.
67 It's important for mixed size auto-vectorization (Conversions, ... etc).
68 E.g. For a loop that is vectorizing conversion of INT32 -> INT64.
69 The biggest mode is DImode and LMUL = 8, LMUL = 4 for SImode.
70 We compute the number live V_REGs at each program point according to
71 this information.
72 - We only compute program points and live ranges locally (within a block)
73 since we just need to compute the number of live V_REGs at each program
74 point and we are not really allocating the registers for each SSA.
75 We can make the variable has another local live range in another block
76 if it live out/live in to another block. Such approach doesn't affect
77 out accurate live range analysis.
78 - Current analysis didn't consider any instruction scheduling which
79 may improve the register pressure. So we are conservatively doing the
80 analysis which may end up with smaller LMUL.
81 TODO: Maybe we could support a reasonable live range shrink algorithm
82 which take advantage of instruction scheduling.
83 - We may have these following possible autovec modes analysis:
84
85 1. M8 -> M4 -> M2 -> M1 (stop analysis here) -> MF2 -> MF4 -> MF8
86 2. M8 -> M1(M4) -> MF2(M2) -> MF4(M1) (stop analysis here) -> MF8(MF2)
87 3. M1(M8) -> MF2(M4) -> MF4(M2) -> MF8(M1)
88 */
89 static hash_map<class loop *, autovec_info> loop_autovec_infos;
90
91 /* Collect all STMTs that are vectorized and compute their program points.
92 Note that we don't care about the STMTs that are not vectorized and
93 we only build the local graph (within a block) of program points.
94
95 Loop:
96 bb 2:
97 STMT 1 (be vectorized) -- point 0
98 STMT 2 (not be vectorized) -- ignored
99 STMT 3 (be vectorized) -- point 1
100 STMT 4 (be vectorized) -- point 2
101 STMT 5 (be vectorized) -- point 3
102 ...
103 bb 3:
104 STMT 1 (be vectorized) -- point 0
105 STMT 2 (be vectorized) -- point 1
106 STMT 3 (not be vectorized) -- ignored
107 STMT 4 (not be vectorized) -- ignored
108 STMT 5 (be vectorized) -- point 2
109 ...
110 */
111 static void
112 compute_local_program_points (
113 vec_info *vinfo,
114 hash_map<basic_block, vec<stmt_point>> &program_points_per_bb)
115 {
116 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
117 {
118 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
119 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
120 unsigned int nbbs = loop->num_nodes;
121 gimple_stmt_iterator si;
122 unsigned int i;
123 /* Collect the stmts that is vectorized and mark their program point. */
124 for (i = 0; i < nbbs; i++)
125 {
126 int point = 0;
127 basic_block bb = bbs[i];
128 vec<stmt_point> program_points = vNULL;
129 if (dump_enabled_p ())
130 dump_printf_loc (MSG_NOTE, vect_location,
131 "Compute local program points for bb %d:\n",
132 bb->index);
133 for (si = gsi_start_bb (bbs[i]); !gsi_end_p (si); gsi_next (&si))
134 {
135 if (!(is_gimple_assign (gsi_stmt (si))
136 || is_gimple_call (gsi_stmt (si))))
137 continue;
138 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
139 enum stmt_vec_info_type type
140 = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
141 if (type != undef_vec_info_type)
142 {
143 stmt_point info = {point, gsi_stmt (si)};
144 program_points.safe_push (info);
145 point++;
146 if (dump_enabled_p ())
147 dump_printf_loc (MSG_NOTE, vect_location,
148 "program point %d: %G", info.point,
149 gsi_stmt (si));
150 }
151 }
152 program_points_per_bb.put (bb, program_points);
153 }
154 }
155 }
156
157 static machine_mode
158 get_biggest_mode (machine_mode mode1, machine_mode mode2)
159 {
160 unsigned int mode1_size = GET_MODE_BITSIZE (mode1).to_constant ();
161 unsigned int mode2_size = GET_MODE_BITSIZE (mode2).to_constant ();
162 return mode1_size >= mode2_size ? mode1 : mode2;
163 }
164
165 /* Compute local live ranges of each vectorized variable.
166 Note that we only compute local live ranges (within a block) since
167 local live ranges information is accurate enough for us to determine
168 the LMUL/vectorization factor of the loop.
169
170 Loop:
171 bb 2:
172 STMT 1 -- point 0
173 STMT 2 (def SSA 1) -- point 1
174 STMT 3 (use SSA 1) -- point 2
175 STMT 4 -- point 3
176 bb 3:
177 STMT 1 -- point 0
178 STMT 2 -- point 1
179 STMT 3 -- point 2
180 STMT 4 (use SSA 2) -- point 3
181
182 The live range of SSA 1 is [1, 3] in bb 2.
183 The live range of SSA 2 is [0, 4] in bb 3. */
184 static machine_mode
185 compute_local_live_ranges (
186 const hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
187 hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb)
188 {
189 machine_mode biggest_mode = QImode;
190 if (!program_points_per_bb.is_empty ())
191 {
192 auto_vec<tree> visited_vars;
193 unsigned int i;
194 for (hash_map<basic_block, vec<stmt_point>>::iterator iter
195 = program_points_per_bb.begin ();
196 iter != program_points_per_bb.end (); ++iter)
197 {
198 basic_block bb = (*iter).first;
199 vec<stmt_point> program_points = (*iter).second;
200 bool existed_p = false;
201 hash_map<tree, pair> *live_ranges
202 = &live_ranges_per_bb.get_or_insert (bb, &existed_p);
203 gcc_assert (!existed_p);
204 if (dump_enabled_p ())
205 dump_printf_loc (MSG_NOTE, vect_location,
206 "Compute local live ranges for bb %d:\n",
207 bb->index);
208 for (const auto program_point : program_points)
209 {
210 unsigned int point = program_point.point;
211 gimple *stmt = program_point.stmt;
212 tree lhs = gimple_get_lhs (stmt);
213 if (lhs != NULL_TREE && is_gimple_reg (lhs)
214 && !POINTER_TYPE_P (TREE_TYPE (lhs)))
215 {
216 biggest_mode = get_biggest_mode (biggest_mode,
217 TYPE_MODE (TREE_TYPE (lhs)));
218 bool existed_p = false;
219 pair &live_range
220 = live_ranges->get_or_insert (lhs, &existed_p);
221 gcc_assert (!existed_p);
222 live_range = pair (point, point);
223 }
224 for (i = 0; i < gimple_num_args (stmt); i++)
225 {
226 tree var = gimple_arg (stmt, i);
227 /* Both IMM and REG are included since a VECTOR_CST may be
228 potentially held in a vector register. However, it's not
229 accurate, since a PLUS_EXPR can be vectorized into vadd.vi
230 if IMM is -16 ~ 15.
231
232 TODO: We may elide the cases that the unnecessary IMM in
233 the future. */
234 if (poly_int_tree_p (var)
235 || (is_gimple_val (var)
236 && !POINTER_TYPE_P (TREE_TYPE (var))))
237 {
238 biggest_mode
239 = get_biggest_mode (biggest_mode,
240 TYPE_MODE (TREE_TYPE (var)));
241 bool existed_p = false;
242 pair &live_range
243 = live_ranges->get_or_insert (var, &existed_p);
244 if (existed_p)
245 /* We will grow the live range for each use. */
246 live_range = pair (live_range.first, point);
247 else
248 /* We assume the variable is live from the start of
249 this block. */
250 live_range = pair (0, point);
251 }
252 }
253 }
254 if (dump_enabled_p ())
255 for (hash_map<tree, pair>::iterator iter = live_ranges->begin ();
256 iter != live_ranges->end (); ++iter)
257 dump_printf_loc (MSG_NOTE, vect_location,
258 "%T: type = %T, start = %d, end = %d\n",
259 (*iter).first, TREE_TYPE ((*iter).first),
260 (*iter).second.first, (*iter).second.second);
261 }
262 }
263 if (dump_enabled_p ())
264 dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n",
265 GET_MODE_NAME (biggest_mode));
266 return biggest_mode;
267 }
268
269 /* Compute the mode for MODE, BIGGEST_MODE and LMUL.
270
271 E.g. If mode = SImode, biggest_mode = DImode, LMUL = M4.
272 Then return RVVM4SImode (LMUL = 4, element mode = SImode). */
273 static unsigned int
274 compute_nregs_for_mode (machine_mode mode, machine_mode biggest_mode, int lmul)
275 {
276 unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
277 unsigned int biggest_size = GET_MODE_SIZE (biggest_mode).to_constant ();
278 gcc_assert (biggest_size >= mode_size);
279 unsigned int ratio = biggest_size / mode_size;
280 return lmul / ratio;
281 }
282
283 /* This function helps to determine whether current LMUL will cause
284 potential vector register (V_REG) spillings according to live range
285 information.
286
287 - First, compute how many variable are alive of each program point
288 in each bb of the loop.
289 - Second, compute how many V_REGs are alive of each program point
290 in each bb of the loop according the BIGGEST_MODE and the variable
291 mode.
292 - Third, Return the maximum V_REGs are alive of the loop. */
293 static unsigned int
294 max_number_of_live_regs (const basic_block bb,
295 const hash_map<tree, pair> &live_ranges,
296 unsigned int max_point, machine_mode biggest_mode,
297 int lmul)
298 {
299 unsigned int max_nregs = 0;
300 unsigned int i;
301 unsigned int live_point = 0;
302 auto_vec<unsigned int> live_vars_vec;
303 live_vars_vec.safe_grow_cleared (max_point + 1, true);
304 for (hash_map<tree, pair>::iterator iter = live_ranges.begin ();
305 iter != live_ranges.end (); ++iter)
306 {
307 tree var = (*iter).first;
308 pair live_range = (*iter).second;
309 for (i = live_range.first; i <= live_range.second; i++)
310 {
311 machine_mode mode = TYPE_MODE (TREE_TYPE (var));
312 unsigned int nregs
313 = compute_nregs_for_mode (mode, biggest_mode, lmul);
314 live_vars_vec[i] += nregs;
315 if (live_vars_vec[i] > max_nregs)
316 max_nregs = live_vars_vec[i];
317 }
318 }
319
320 /* Collect user explicit RVV type. */
321 auto_vec<basic_block> all_preds
322 = get_all_dominated_blocks (CDI_POST_DOMINATORS, bb);
323 tree t;
324 FOR_EACH_SSA_NAME (i, t, cfun)
325 {
326 machine_mode mode = TYPE_MODE (TREE_TYPE (t));
327 if (!lookup_vector_type_attribute (TREE_TYPE (t))
328 && !riscv_v_ext_vls_mode_p (mode))
329 continue;
330
331 gimple *def = SSA_NAME_DEF_STMT (t);
332 if (gimple_bb (def) && !all_preds.contains (gimple_bb (def)))
333 continue;
334 use_operand_p use_p;
335 imm_use_iterator iterator;
336
337 FOR_EACH_IMM_USE_FAST (use_p, iterator, t)
338 {
339 if (!USE_STMT (use_p) || is_gimple_debug (USE_STMT (use_p))
340 || !dominated_by_p (CDI_POST_DOMINATORS, bb,
341 gimple_bb (USE_STMT (use_p))))
342 continue;
343
344 int regno_alignment = riscv_get_v_regno_alignment (mode);
345 max_nregs += regno_alignment;
346 if (dump_enabled_p ())
347 dump_printf_loc (
348 MSG_NOTE, vect_location,
349 "Explicit used SSA %T, vectype = %T, mode = %s, cause %d "
350 "V_REG live in bb %d at program point %d\n",
351 t, TREE_TYPE (t), GET_MODE_NAME (mode), regno_alignment,
352 bb->index, live_point);
353 break;
354 }
355 }
356
357 if (dump_enabled_p ())
358 dump_printf_loc (MSG_NOTE, vect_location,
359 "Maximum lmul = %d, %d number of live V_REG at program "
360 "point %d for bb %d\n",
361 lmul, max_nregs, live_point, bb->index);
362 return max_nregs;
363 }
364
365 /* Return the LMUL of the current analysis. */
366 static int
367 get_current_lmul (class loop *loop)
368 {
369 return loop_autovec_infos.get (loop)->current_lmul;
370 }
371
372 /* Get STORE value. */
373 static tree
374 get_store_value (gimple *stmt)
375 {
376 if (is_gimple_call (stmt) && gimple_call_internal_p (stmt))
377 {
378 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
379 return gimple_call_arg (stmt, 3);
380 else
381 gcc_unreachable ();
382 }
383 else
384 return gimple_assign_rhs1 (stmt);
385 }
386
387 /* Return true if it is non-contiguous load/store. */
388 static bool
389 non_contiguous_memory_access_p (stmt_vec_info stmt_info)
390 {
391 enum stmt_vec_info_type type
392 = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
393 return ((type == load_vec_info_type || type == store_vec_info_type)
394 && !adjacent_dr_p (STMT_VINFO_DATA_REF (stmt_info)));
395 }
396
397 /* Update the live ranges according PHI.
398
399 Loop:
400 bb 2:
401 STMT 1 -- point 0
402 STMT 2 (def SSA 1) -- point 1
403 STMT 3 (use SSA 1) -- point 2
404 STMT 4 -- point 3
405 bb 3:
406 SSA 2 = PHI<SSA 1>
407 STMT 1 -- point 0
408 STMT 2 -- point 1
409 STMT 3 (use SSA 2) -- point 2
410 STMT 4 -- point 3
411
412 Before this function, the SSA 1 live range is [2, 3] in bb 2
413 and SSA 2 is [0, 3] in bb 3.
414
415 Then, after this function, we update SSA 1 live range in bb 2
416 into [2, 4] since SSA 1 is live out into bb 3. */
417 static void
418 update_local_live_ranges (
419 vec_info *vinfo,
420 hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
421 hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb,
422 machine_mode *biggest_mode)
423 {
424 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
425 if (!loop_vinfo)
426 return;
427
428 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
429 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
430 unsigned int nbbs = loop->num_nodes;
431 unsigned int i, j;
432 gphi_iterator psi;
433 gimple_stmt_iterator si;
434 for (i = 0; i < nbbs; i++)
435 {
436 basic_block bb = bbs[i];
437 if (dump_enabled_p ())
438 dump_printf_loc (MSG_NOTE, vect_location,
439 "Update local program points for bb %d:\n",
440 bbs[i]->index);
441 for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
442 {
443 gphi *phi = psi.phi ();
444 stmt_vec_info stmt_info = vinfo->lookup_stmt (phi);
445 if (STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info))
446 == undef_vec_info_type)
447 continue;
448
449 for (j = 0; j < gimple_phi_num_args (phi); j++)
450 {
451 edge e = gimple_phi_arg_edge (phi, j);
452 tree def = gimple_phi_arg_def (phi, j);
453 auto *live_ranges = live_ranges_per_bb.get (bb);
454 auto *live_range = live_ranges->get (def);
455 if (live_range && flow_bb_inside_loop_p (loop, e->src))
456 {
457 unsigned int start = (*live_range).first;
458 (*live_range).first = 0;
459 if (dump_enabled_p ())
460 dump_printf_loc (MSG_NOTE, vect_location,
461 "Update %T start point from %d to %d:\n",
462 def, start, (*live_range).first);
463 }
464 live_ranges = live_ranges_per_bb.get (e->src);
465 if (!program_points_per_bb.get (e->src))
466 continue;
467 unsigned int max_point
468 = (*program_points_per_bb.get (e->src)).length () - 1;
469 live_range = live_ranges->get (def);
470 if (!live_range)
471 continue;
472
473 unsigned int end = (*live_range).second;
474 (*live_range).second = max_point;
475 if (dump_enabled_p ())
476 dump_printf_loc (MSG_NOTE, vect_location,
477 "Update %T end point from %d to %d:\n", def,
478 end, (*live_range).second);
479 }
480 }
481 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
482 {
483 if (!(is_gimple_assign (gsi_stmt (si))
484 || is_gimple_call (gsi_stmt (si))))
485 continue;
486 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
487 enum stmt_vec_info_type type
488 = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
489 if (non_contiguous_memory_access_p (stmt_info))
490 {
491 /* For non-adjacent load/store STMT, we will potentially
492 convert it into:
493
494 1. MASK_LEN_GATHER_LOAD (..., perm indice).
495 2. Continguous load/store + VEC_PERM (..., perm indice)
496
497 We will be likely using one more vector variable. */
498 unsigned int max_point
499 = (*program_points_per_bb.get (bb)).length () - 1;
500 auto *live_ranges = live_ranges_per_bb.get (bb);
501 bool existed_p = false;
502 tree var = type == load_vec_info_type
503 ? gimple_get_lhs (gsi_stmt (si))
504 : get_store_value (gsi_stmt (si));
505 tree sel_type = build_nonstandard_integer_type (
506 TYPE_PRECISION (TREE_TYPE (var)), 1);
507 *biggest_mode
508 = get_biggest_mode (*biggest_mode, TYPE_MODE (sel_type));
509 tree sel = build_decl (UNKNOWN_LOCATION, VAR_DECL,
510 get_identifier ("vect_perm"), sel_type);
511 pair &live_range = live_ranges->get_or_insert (sel, &existed_p);
512 gcc_assert (!existed_p);
513 live_range = pair (0, max_point);
514 if (dump_enabled_p ())
515 dump_printf_loc (MSG_NOTE, vect_location,
516 "Add perm indice %T, start = 0, end = %d\n",
517 sel, max_point);
518 }
519 }
520 }
521 }
522
523 costs::costs (vec_info *vinfo, bool costing_for_scalar)
524 : vector_costs (vinfo, costing_for_scalar)
525 {}
526
527 /* Return true that the LMUL of new COST model is preferred. */
528 bool
529 costs::preferred_new_lmul_p (const vector_costs *uncast_other) const
530 {
531 auto other = static_cast<const costs *> (uncast_other);
532 auto this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
533 auto other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
534 class loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
535
536 if (loop_autovec_infos.get (loop) && loop_autovec_infos.get (loop)->end_p)
537 return false;
538 else if (loop_autovec_infos.get (loop))
539 loop_autovec_infos.get (loop)->current_lmul
540 = loop_autovec_infos.get (loop)->current_lmul / 2;
541 else
542 {
543 int regno_alignment
544 = riscv_get_v_regno_alignment (other_loop_vinfo->vector_mode);
545 if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (other_loop_vinfo), 1U))
546 regno_alignment = RVV_M8;
547 loop_autovec_infos.put (loop, {regno_alignment, regno_alignment, false});
548 }
549
550 int lmul = get_current_lmul (loop);
551 if (dump_enabled_p ())
552 dump_printf_loc (MSG_NOTE, vect_location,
553 "Comparing two main loops (%s at VF %d vs %s at VF %d)\n",
554 GET_MODE_NAME (this_loop_vinfo->vector_mode),
555 vect_vf_for_cost (this_loop_vinfo),
556 GET_MODE_NAME (other_loop_vinfo->vector_mode),
557 vect_vf_for_cost (other_loop_vinfo));
558
559 /* Compute local program points.
560 It's a fast and effective computation. */
561 hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
562 compute_local_program_points (other->m_vinfo, program_points_per_bb);
563
564 /* Compute local live ranges. */
565 hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
566 machine_mode biggest_mode
567 = compute_local_live_ranges (program_points_per_bb, live_ranges_per_bb);
568
569 /* If we can use simple VLS modes to handle NITERS element.
570 We don't need to use VLA modes with partial vector auto-vectorization. */
571 if (LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo)
572 && known_le (tree_to_poly_int64 (LOOP_VINFO_NITERS (this_loop_vinfo))
573 * GET_MODE_SIZE (biggest_mode).to_constant (),
574 (int) RVV_M8 * BYTES_PER_RISCV_VECTOR)
575 && pow2p_hwi (LOOP_VINFO_INT_NITERS (this_loop_vinfo)))
576 return vector_costs::better_main_loop_than_p (other);
577
578 /* Update live ranges according to PHI. */
579 update_local_live_ranges (other->m_vinfo, program_points_per_bb,
580 live_ranges_per_bb, &biggest_mode);
581
582 /* TODO: We calculate the maximum live vars base on current STMTS
583 sequence. We can support live range shrink if it can give us
584 big improvement in the future. */
585 if (!live_ranges_per_bb.is_empty ())
586 {
587 unsigned int max_nregs = 0;
588 for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter
589 = live_ranges_per_bb.begin ();
590 iter != live_ranges_per_bb.end (); ++iter)
591 {
592 basic_block bb = (*iter).first;
593 unsigned int max_point
594 = (*program_points_per_bb.get (bb)).length () - 1;
595 if ((*iter).second.is_empty ())
596 continue;
597 /* We prefer larger LMUL unless it causes register spillings. */
598 unsigned int nregs
599 = max_number_of_live_regs (bb, (*iter).second, max_point,
600 biggest_mode, lmul);
601 if (nregs > max_nregs)
602 max_nregs = nregs;
603 live_ranges_per_bb.empty ();
604 }
605 live_ranges_per_bb.empty ();
606 if (loop_autovec_infos.get (loop)->current_lmul == RVV_M1
607 || max_nregs <= V_REG_NUM)
608 loop_autovec_infos.get (loop)->end_p = true;
609 if (loop_autovec_infos.get (loop)->current_lmul > RVV_M1)
610 return max_nregs > V_REG_NUM;
611 return false;
612 }
613 if (!program_points_per_bb.is_empty ())
614 {
615 for (hash_map<basic_block, vec<stmt_point>>::iterator iter
616 = program_points_per_bb.begin ();
617 iter != program_points_per_bb.end (); ++iter)
618 {
619 vec<stmt_point> program_points = (*iter).second;
620 if (!program_points.is_empty ())
621 program_points.release ();
622 }
623 program_points_per_bb.empty ();
624 }
625 return lmul > RVV_M1;
626 }
627
628 bool
629 costs::better_main_loop_than_p (const vector_costs *uncast_other) const
630 {
631 auto other = static_cast<const costs *> (uncast_other);
632
633 if (!flag_vect_cost_model)
634 return vector_costs::better_main_loop_than_p (other);
635
636 if (riscv_autovec_lmul == RVV_DYNAMIC)
637 {
638 bool post_dom_available_p = dom_info_available_p (CDI_POST_DOMINATORS);
639 if (!post_dom_available_p)
640 calculate_dominance_info (CDI_POST_DOMINATORS);
641 bool preferred_p = preferred_new_lmul_p (uncast_other);
642 if (!post_dom_available_p)
643 free_dominance_info (CDI_POST_DOMINATORS);
644 return preferred_p;
645 }
646
647 return vector_costs::better_main_loop_than_p (other);
648 }
649
650 unsigned
651 costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
652 stmt_vec_info stmt_info, slp_tree, tree vectype,
653 int misalign, vect_cost_model_location where)
654 {
655 /* TODO: Use default STMT cost model.
656 We will support more accurate STMT cost model later. */
657 int stmt_cost = default_builtin_vectorization_cost (kind, vectype, misalign);
658 return record_stmt_cost (stmt_info, where, count * stmt_cost);
659 }
660
661 void
662 costs::finish_cost (const vector_costs *scalar_costs)
663 {
664 vector_costs::finish_cost (scalar_costs);
665 }
666
667 } // namespace riscv_vector