]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/riscv/riscv.cc
0d1cbc5cb5f5556e7869372333b0ec14dfb7e051
[thirdparty/gcc.git] / gcc / config / riscv / riscv.cc
1 /* Subroutines used for code generation for RISC-V.
2 Copyright (C) 2011-2023 Free Software Foundation, Inc.
3 Contributed by Andrew Waterman (andrew@sifive.com).
4 Based on MIPS target for GNU compiler.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #define INCLUDE_STRING
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "target.h"
29 #include "backend.h"
30 #include "tm.h"
31 #include "rtl.h"
32 #include "regs.h"
33 #include "insn-config.h"
34 #include "insn-attr.h"
35 #include "recog.h"
36 #include "output.h"
37 #include "alias.h"
38 #include "tree.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "varasm.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "function.h"
45 #include "explow.h"
46 #include "ifcvt.h"
47 #include "memmodel.h"
48 #include "emit-rtl.h"
49 #include "reload.h"
50 #include "tm_p.h"
51 #include "basic-block.h"
52 #include "expr.h"
53 #include "optabs.h"
54 #include "bitmap.h"
55 #include "df.h"
56 #include "function-abi.h"
57 #include "diagnostic.h"
58 #include "builtins.h"
59 #include "predict.h"
60 #include "tree-pass.h"
61 #include "opts.h"
62 #include "tm-constrs.h"
63 #include "rtl-iter.h"
64 #include "gimple.h"
65 #include "cfghooks.h"
66 #include "cfgloop.h"
67 #include "cfgrtl.h"
68 #include "shrink-wrap.h"
69 #include "sel-sched.h"
70 #include "sched-int.h"
71 #include "fold-const.h"
72 #include "gimple-iterator.h"
73 #include "gimple-expr.h"
74 #include "tree-vectorizer.h"
75 #include "gcse.h"
76 #include "tree-dfa.h"
77 #include "target-globals.h"
78
79 /* This file should be included last. */
80 #include "target-def.h"
81 #include "riscv-vector-costs.h"
82 #include "riscv-subset.h"
83
84 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */
85 #define UNSPEC_ADDRESS_P(X) \
86 (GET_CODE (X) == UNSPEC \
87 && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \
88 && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
89
90 /* Extract the symbol or label from UNSPEC wrapper X. */
91 #define UNSPEC_ADDRESS(X) \
92 XVECEXP (X, 0, 0)
93
94 /* Extract the symbol type from UNSPEC wrapper X. */
95 #define UNSPEC_ADDRESS_TYPE(X) \
96 ((enum riscv_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
97
98 /* True if bit BIT is set in VALUE. */
99 #define BITSET_P(VALUE, BIT) (((VALUE) & (1ULL << (BIT))) != 0)
100
101 /* Extract the backup dynamic frm rtl. */
102 #define DYNAMIC_FRM_RTL(c) ((c)->machine->mode_sw_info.dynamic_frm)
103
104 /* True the mode switching has static frm, or false. */
105 #define STATIC_FRM_P(c) ((c)->machine->mode_sw_info.static_frm_p)
106
107 /* Information about a function's frame layout. */
108 struct GTY(()) riscv_frame_info {
109 /* The size of the frame in bytes. */
110 poly_int64 total_size;
111
112 /* Bit X is set if the function saves or restores GPR X. */
113 unsigned int mask;
114
115 /* Likewise FPR X. */
116 unsigned int fmask;
117
118 /* Likewise for vector registers. */
119 unsigned int vmask;
120
121 /* How much the GPR save/restore routines adjust sp (or 0 if unused). */
122 unsigned save_libcall_adjustment;
123
124 /* the minimum number of bytes, in multiples of 16-byte address increments,
125 required to cover the registers in a multi push & pop. */
126 unsigned multi_push_adj_base;
127
128 /* the number of additional 16-byte address increments allocated for the stack
129 frame in a multi push & pop. */
130 unsigned multi_push_adj_addi;
131
132 /* Offsets of fixed-point and floating-point save areas from frame bottom */
133 poly_int64 gp_sp_offset;
134 poly_int64 fp_sp_offset;
135
136 /* Top and bottom offsets of vector save areas from frame bottom. */
137 poly_int64 v_sp_offset_top;
138 poly_int64 v_sp_offset_bottom;
139
140 /* Offset of virtual frame pointer from stack pointer/frame bottom */
141 poly_int64 frame_pointer_offset;
142
143 /* Offset of hard frame pointer from stack pointer/frame bottom */
144 poly_int64 hard_frame_pointer_offset;
145
146 /* The offset of arg_pointer_rtx from the bottom of the frame. */
147 poly_int64 arg_pointer_offset;
148
149 /* Reset this struct, clean all field to zero. */
150 void reset(void);
151 };
152
153 enum riscv_privilege_levels {
154 UNKNOWN_MODE, USER_MODE, SUPERVISOR_MODE, MACHINE_MODE
155 };
156
157 struct GTY(()) mode_switching_info {
158 /* The RTL variable which stores the dynamic FRM value. We always use this
159 RTX to restore dynamic FRM rounding mode in mode switching. */
160 rtx dynamic_frm;
161
162 /* The boolean variables indicates there is at least one static rounding
163 mode instruction in the function or not. */
164 bool static_frm_p;
165
166 mode_switching_info ()
167 {
168 dynamic_frm = NULL_RTX;
169 static_frm_p = false;
170 }
171 };
172
173 struct GTY(()) machine_function {
174 /* The number of extra stack bytes taken up by register varargs.
175 This area is allocated by the callee at the very top of the frame. */
176 int varargs_size;
177
178 /* True if current function is a naked function. */
179 bool naked_p;
180
181 /* True if current function is an interrupt function. */
182 bool interrupt_handler_p;
183 /* For an interrupt handler, indicates the privilege level. */
184 enum riscv_privilege_levels interrupt_mode;
185
186 /* True if attributes on current function have been checked. */
187 bool attributes_checked_p;
188
189 /* True if RA must be saved because of a far jump. */
190 bool far_jump_used;
191
192 /* The current frame information, calculated by riscv_compute_frame_info. */
193 struct riscv_frame_info frame;
194
195 /* The components already handled by separate shrink-wrapping, which should
196 not be considered by the prologue and epilogue. */
197 bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
198
199 /* The mode swithching information for the FRM rounding modes. */
200 struct mode_switching_info mode_sw_info;
201 };
202
203 /* Information about a single argument. */
204 struct riscv_arg_info {
205 /* True if the argument is at least partially passed on the stack. */
206 bool stack_p;
207
208 /* The number of integer registers allocated to this argument. */
209 unsigned int num_gprs;
210
211 /* The offset of the first register used, provided num_gprs is nonzero.
212 If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */
213 unsigned int gpr_offset;
214
215 /* The number of floating-point registers allocated to this argument. */
216 unsigned int num_fprs;
217
218 /* The offset of the first register used, provided num_fprs is nonzero. */
219 unsigned int fpr_offset;
220
221 /* The number of vector registers allocated to this argument. */
222 unsigned int num_vrs;
223
224 /* The offset of the first register used, provided num_vrs is nonzero. */
225 unsigned int vr_offset;
226
227 /* The number of mask registers allocated to this argument. */
228 unsigned int num_mrs;
229
230 /* The offset of the first register used, provided num_mrs is nonzero. */
231 unsigned int mr_offset;
232 };
233
234 /* One stage in a constant building sequence. These sequences have
235 the form:
236
237 A = VALUE[0]
238 A = A CODE[1] VALUE[1]
239 A = A CODE[2] VALUE[2]
240 ...
241
242 where A is an accumulator, each CODE[i] is a binary rtl operation
243 and each VALUE[i] is a constant integer. CODE[0] is undefined. */
244 struct riscv_integer_op {
245 enum rtx_code code;
246 unsigned HOST_WIDE_INT value;
247 };
248
249 /* The largest number of operations needed to load an integer constant.
250 The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI. */
251 #define RISCV_MAX_INTEGER_OPS 8
252
253 enum riscv_fusion_pairs
254 {
255 RISCV_FUSE_NOTHING = 0,
256 RISCV_FUSE_ZEXTW = (1 << 0),
257 RISCV_FUSE_ZEXTH = (1 << 1),
258 RISCV_FUSE_ZEXTWS = (1 << 2),
259 RISCV_FUSE_LDINDEXED = (1 << 3),
260 RISCV_FUSE_LUI_ADDI = (1 << 4),
261 RISCV_FUSE_AUIPC_ADDI = (1 << 5),
262 RISCV_FUSE_LUI_LD = (1 << 6),
263 RISCV_FUSE_AUIPC_LD = (1 << 7),
264 RISCV_FUSE_LDPREINCREMENT = (1 << 8),
265 RISCV_FUSE_ALIGNED_STD = (1 << 9),
266 };
267
268 /* Costs of various operations on the different architectures. */
269
270 struct riscv_tune_param
271 {
272 unsigned short fp_add[2];
273 unsigned short fp_mul[2];
274 unsigned short fp_div[2];
275 unsigned short int_mul[2];
276 unsigned short int_div[2];
277 unsigned short issue_rate;
278 unsigned short branch_cost;
279 unsigned short memory_cost;
280 unsigned short fmv_cost;
281 bool slow_unaligned_access;
282 bool use_divmod_expansion;
283 unsigned int fusible_ops;
284 const struct cpu_vector_cost *vec_costs;
285 };
286
287
288 /* Global variables for machine-dependent things. */
289
290 /* Whether unaligned accesses execute very slowly. */
291 bool riscv_slow_unaligned_access_p;
292
293 /* Whether user explicitly passed -mstrict-align. */
294 bool riscv_user_wants_strict_align;
295
296 /* Stack alignment to assume/maintain. */
297 unsigned riscv_stack_boundary;
298
299 /* Whether in riscv_output_mi_thunk. */
300 static bool riscv_in_thunk_func = false;
301
302 /* If non-zero, this is an offset to be added to SP to redefine the CFA
303 when restoring the FP register from the stack. Only valid when generating
304 the epilogue. */
305 static poly_int64 epilogue_cfa_sp_offset;
306
307 /* Which tuning parameters to use. */
308 static const struct riscv_tune_param *tune_param;
309
310 /* Which automaton to use for tuning. */
311 enum riscv_microarchitecture_type riscv_microarchitecture;
312
313 /* The number of chunks in a single vector register. */
314 poly_uint16 riscv_vector_chunks;
315
316 /* The number of bytes in a vector chunk. */
317 unsigned riscv_bytes_per_vector_chunk;
318
319 /* Index R is the smallest register class that contains register R. */
320 const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
321 GR_REGS, GR_REGS, GR_REGS, GR_REGS,
322 GR_REGS, GR_REGS, SIBCALL_REGS, SIBCALL_REGS,
323 JALR_REGS, JALR_REGS, SIBCALL_REGS, SIBCALL_REGS,
324 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
325 SIBCALL_REGS, SIBCALL_REGS, JALR_REGS, JALR_REGS,
326 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
327 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
328 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
329 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
330 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
331 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
332 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
333 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
334 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
335 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
336 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
337 FRAME_REGS, FRAME_REGS, NO_REGS, NO_REGS,
338 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
339 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
340 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
341 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
342 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
343 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
344 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
345 VM_REGS, VD_REGS, VD_REGS, VD_REGS,
346 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
347 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
348 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
349 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
350 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
351 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
352 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
353 };
354
355 /* Generic costs for VLS vector operations. */
356 static const common_vector_cost generic_vls_vector_cost = {
357 1, /* int_stmt_cost */
358 1, /* fp_stmt_cost */
359 1, /* gather_load_cost */
360 1, /* scatter_store_cost */
361 2, /* vec_to_scalar_cost */
362 1, /* scalar_to_vec_cost */
363 2, /* permute_cost */
364 1, /* align_load_cost */
365 1, /* align_store_cost */
366 1, /* unalign_load_cost */
367 1, /* unalign_store_cost */
368 };
369
370 /* Generic costs for VLA vector operations. */
371 static const scalable_vector_cost generic_vla_vector_cost = {
372 {
373 1, /* int_stmt_cost */
374 1, /* fp_stmt_cost */
375 1, /* gather_load_cost */
376 1, /* scatter_store_cost */
377 2, /* vec_to_scalar_cost */
378 1, /* scalar_to_vec_cost */
379 2, /* permute_cost */
380 1, /* align_load_cost */
381 1, /* align_store_cost */
382 1, /* unalign_load_cost */
383 1, /* unalign_store_cost */
384 },
385 };
386
387 /* Generic costs for vector insn classes. */
388 static const struct cpu_vector_cost generic_vector_cost = {
389 1, /* scalar_int_stmt_cost */
390 1, /* scalar_fp_stmt_cost */
391 1, /* scalar_load_cost */
392 1, /* scalar_store_cost */
393 3, /* cond_taken_branch_cost */
394 1, /* cond_not_taken_branch_cost */
395 &generic_vls_vector_cost, /* vls */
396 &generic_vla_vector_cost, /* vla */
397 };
398
399 /* Costs to use when optimizing for rocket. */
400 static const struct riscv_tune_param rocket_tune_info = {
401 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
402 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
403 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
404 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
405 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
406 1, /* issue_rate */
407 3, /* branch_cost */
408 5, /* memory_cost */
409 8, /* fmv_cost */
410 true, /* slow_unaligned_access */
411 false, /* use_divmod_expansion */
412 RISCV_FUSE_NOTHING, /* fusible_ops */
413 NULL, /* vector cost */
414 };
415
416 /* Costs to use when optimizing for Sifive 7 Series. */
417 static const struct riscv_tune_param sifive_7_tune_info = {
418 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
419 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
420 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
421 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
422 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
423 2, /* issue_rate */
424 4, /* branch_cost */
425 3, /* memory_cost */
426 8, /* fmv_cost */
427 true, /* slow_unaligned_access */
428 false, /* use_divmod_expansion */
429 RISCV_FUSE_NOTHING, /* fusible_ops */
430 NULL, /* vector cost */
431 };
432
433 /* Costs to use when optimizing for T-HEAD c906. */
434 static const struct riscv_tune_param thead_c906_tune_info = {
435 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
436 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
437 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
438 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
439 {COSTS_N_INSNS (18), COSTS_N_INSNS (34)}, /* int_div */
440 1, /* issue_rate */
441 3, /* branch_cost */
442 5, /* memory_cost */
443 8, /* fmv_cost */
444 false, /* slow_unaligned_access */
445 false, /* use_divmod_expansion */
446 RISCV_FUSE_NOTHING, /* fusible_ops */
447 NULL, /* vector cost */
448 };
449
450 /* Costs to use when optimizing for a generic ooo profile. */
451 static const struct riscv_tune_param generic_ooo_tune_info = {
452 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_add */
453 {COSTS_N_INSNS (5), COSTS_N_INSNS (6)}, /* fp_mul */
454 {COSTS_N_INSNS (7), COSTS_N_INSNS (8)}, /* fp_div */
455 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
456 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
457 1, /* issue_rate */
458 3, /* branch_cost */
459 4, /* memory_cost */
460 4, /* fmv_cost */
461 false, /* slow_unaligned_access */
462 false, /* use_divmod_expansion */
463 RISCV_FUSE_NOTHING, /* fusible_ops */
464 &generic_vector_cost, /* vector cost */
465 };
466
467 /* Costs to use when optimizing for size. */
468 static const struct riscv_tune_param optimize_size_tune_info = {
469 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */
470 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_mul */
471 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_div */
472 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_mul */
473 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_div */
474 1, /* issue_rate */
475 1, /* branch_cost */
476 2, /* memory_cost */
477 8, /* fmv_cost */
478 false, /* slow_unaligned_access */
479 false, /* use_divmod_expansion */
480 RISCV_FUSE_NOTHING, /* fusible_ops */
481 NULL, /* vector cost */
482 };
483
484 static bool riscv_avoid_shrink_wrapping_separate ();
485 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
486 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
487
488 /* Defining target-specific uses of __attribute__. */
489 TARGET_GNU_ATTRIBUTES (riscv_attribute_table,
490 {
491 /* Syntax: { name, min_len, max_len, decl_required, type_required,
492 function_type_required, affects_type_identity, handler,
493 exclude } */
494
495 /* The attribute telling no prologue/epilogue. */
496 { "naked", 0, 0, true, false, false, false,
497 riscv_handle_fndecl_attribute, NULL },
498 /* This attribute generates prologue/epilogue for interrupt handlers. */
499 { "interrupt", 0, 1, false, true, true, false,
500 riscv_handle_type_attribute, NULL },
501
502 /* The following two are used for the built-in properties of the Vector type
503 and are not used externally */
504 {"RVV sizeless type", 4, 4, false, true, false, true, NULL, NULL},
505 {"RVV type", 0, 0, false, true, false, true, NULL, NULL}
506 });
507
508 /* Order for the CLOBBERs/USEs of gpr_save. */
509 static const unsigned gpr_save_reg_order[] = {
510 INVALID_REGNUM, T0_REGNUM, T1_REGNUM, RETURN_ADDR_REGNUM,
511 S0_REGNUM, S1_REGNUM, S2_REGNUM, S3_REGNUM, S4_REGNUM,
512 S5_REGNUM, S6_REGNUM, S7_REGNUM, S8_REGNUM, S9_REGNUM,
513 S10_REGNUM, S11_REGNUM
514 };
515
516 /* A table describing all the processors GCC knows about. */
517 static const struct riscv_tune_info riscv_tune_info_table[] = {
518 #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
519 { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO},
520 #include "riscv-cores.def"
521 };
522
523 /* Global variable to distinguish whether we should save and restore s0/fp for
524 function. */
525 static bool riscv_save_frame_pointer;
526
527 typedef enum
528 {
529 PUSH_IDX = 0,
530 POP_IDX,
531 POPRET_IDX,
532 POPRETZ_IDX,
533 ZCMP_OP_NUM
534 } riscv_zcmp_op_t;
535
536 typedef insn_code (*code_for_push_pop_t) (machine_mode);
537
538 void riscv_frame_info::reset(void)
539 {
540 total_size = 0;
541 mask = 0;
542 fmask = 0;
543 vmask = 0;
544 save_libcall_adjustment = 0;
545
546 gp_sp_offset = 0;
547 fp_sp_offset = 0;
548 v_sp_offset_top = 0;
549 v_sp_offset_bottom = 0;
550
551 frame_pointer_offset = 0;
552
553 hard_frame_pointer_offset = 0;
554
555 arg_pointer_offset = 0;
556 }
557
558 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
559
560 static unsigned int
561 riscv_min_arithmetic_precision (void)
562 {
563 return 32;
564 }
565
566 template <class T>
567 static const char *
568 get_tune_str (const T *opts)
569 {
570 const char *tune_string = RISCV_TUNE_STRING_DEFAULT;
571 if (opts->x_riscv_tune_string)
572 tune_string = opts->x_riscv_tune_string;
573 else if (opts->x_riscv_cpu_string)
574 tune_string = opts->x_riscv_cpu_string;
575 return tune_string;
576 }
577
578 /* Return the riscv_tune_info entry for the given name string, return nullptr
579 if NULL_P is true, otherwise return an placeholder and report error. */
580
581 const struct riscv_tune_info *
582 riscv_parse_tune (const char *tune_string, bool null_p)
583 {
584 const riscv_cpu_info *cpu = riscv_find_cpu (tune_string);
585
586 if (cpu)
587 tune_string = cpu->tune;
588
589 for (unsigned i = 0; i < ARRAY_SIZE (riscv_tune_info_table); i++)
590 if (strcmp (riscv_tune_info_table[i].name, tune_string) == 0)
591 return riscv_tune_info_table + i;
592
593 if (null_p)
594 return nullptr;
595
596 error ("unknown cpu %qs for %<-mtune%>", tune_string);
597 return riscv_tune_info_table;
598 }
599
600 /* Helper function for riscv_build_integer; arguments are as for
601 riscv_build_integer. */
602
603 static int
604 riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
605 HOST_WIDE_INT value, machine_mode mode)
606 {
607 HOST_WIDE_INT low_part = CONST_LOW_PART (value);
608 int cost = RISCV_MAX_INTEGER_OPS + 1, alt_cost;
609 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
610
611 if (SMALL_OPERAND (value) || LUI_OPERAND (value))
612 {
613 /* Simply ADDI or LUI. */
614 codes[0].code = UNKNOWN;
615 codes[0].value = value;
616 return 1;
617 }
618 if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value))
619 {
620 /* Simply BSETI. */
621 codes[0].code = UNKNOWN;
622 codes[0].value = value;
623
624 /* RISC-V sign-extends all 32bit values that live in a 32bit
625 register. To avoid paradoxes, we thus need to use the
626 sign-extended (negative) representation (-1 << 31) for the
627 value, if we want to build (1 << 31) in SImode. This will
628 then expand to an LUI instruction. */
629 if (TARGET_64BIT && mode == SImode && value == (HOST_WIDE_INT_1U << 31))
630 codes[0].value = (HOST_WIDE_INT_M1U << 31);
631
632 return 1;
633 }
634
635 /* End with ADDI. When constructing HImode constants, do not generate any
636 intermediate value that is not itself a valid HImode constant. The
637 XORI case below will handle those remaining HImode constants. */
638 if (low_part != 0
639 && (mode != HImode
640 || value - low_part <= ((1 << (GET_MODE_BITSIZE (HImode) - 1)) - 1)))
641 {
642 HOST_WIDE_INT upper_part = value - low_part;
643 if (mode != VOIDmode)
644 upper_part = trunc_int_for_mode (value - low_part, mode);
645
646 alt_cost = 1 + riscv_build_integer_1 (alt_codes, upper_part, mode);
647 if (alt_cost < cost)
648 {
649 alt_codes[alt_cost-1].code = PLUS;
650 alt_codes[alt_cost-1].value = low_part;
651 memcpy (codes, alt_codes, sizeof (alt_codes));
652 cost = alt_cost;
653 }
654 }
655
656 /* End with XORI. */
657 if (cost > 2 && (low_part < 0 || mode == HImode))
658 {
659 alt_cost = 1 + riscv_build_integer_1 (alt_codes, value ^ low_part, mode);
660 if (alt_cost < cost)
661 {
662 alt_codes[alt_cost-1].code = XOR;
663 alt_codes[alt_cost-1].value = low_part;
664 memcpy (codes, alt_codes, sizeof (alt_codes));
665 cost = alt_cost;
666 }
667 }
668
669 /* Eliminate trailing zeros and end with SLLI. */
670 if (cost > 2 && (value & 1) == 0)
671 {
672 int shift = ctz_hwi (value);
673 unsigned HOST_WIDE_INT x = value;
674 x = sext_hwi (x >> shift, HOST_BITS_PER_WIDE_INT - shift);
675
676 /* Don't eliminate the lower 12 bits if LUI might apply. */
677 if (shift > IMM_BITS && !SMALL_OPERAND (x) && LUI_OPERAND (x << IMM_BITS))
678 shift -= IMM_BITS, x <<= IMM_BITS;
679
680 alt_cost = 1 + riscv_build_integer_1 (alt_codes, x, mode);
681 if (alt_cost < cost)
682 {
683 alt_codes[alt_cost-1].code = ASHIFT;
684 alt_codes[alt_cost-1].value = shift;
685 memcpy (codes, alt_codes, sizeof (alt_codes));
686 cost = alt_cost;
687 }
688 }
689
690 if (cost > 2 && TARGET_64BIT && (TARGET_ZBB || TARGET_XTHEADBB))
691 {
692 int leading_ones = clz_hwi (~value);
693 int trailing_ones = ctz_hwi (~value);
694
695 /* If all bits are one except a few that are zero, and the zero bits
696 are within a range of 11 bits, then we can synthesize a constant
697 by loading a small negative constant and rotating. */
698 if (leading_ones < 64
699 && ((64 - leading_ones - trailing_ones) < 12))
700 {
701 codes[0].code = UNKNOWN;
702 /* The sign-bit might be zero, so just rotate to be safe. */
703 codes[0].value = (((unsigned HOST_WIDE_INT) value >> trailing_ones)
704 | (value << (64 - trailing_ones)));
705 codes[1].code = ROTATERT;
706 codes[1].value = 64 - trailing_ones;
707 cost = 2;
708 }
709 /* Handle the case where the 11 bit range of zero bits wraps around. */
710 else
711 {
712 int upper_trailing_ones = ctz_hwi (~value >> 32);
713 int lower_leading_ones = clz_hwi (~value << 32);
714
715 if (upper_trailing_ones < 32 && lower_leading_ones < 32
716 && ((64 - upper_trailing_ones - lower_leading_ones) < 12))
717 {
718 codes[0].code = UNKNOWN;
719 /* The sign-bit might be zero, so just rotate to be safe. */
720 codes[0].value = ((value << (32 - upper_trailing_ones))
721 | ((unsigned HOST_WIDE_INT) value
722 >> (32 + upper_trailing_ones)));
723 codes[1].code = ROTATERT;
724 codes[1].value = 32 - upper_trailing_ones;
725 cost = 2;
726 }
727 }
728 }
729
730 gcc_assert (cost <= RISCV_MAX_INTEGER_OPS);
731 return cost;
732 }
733
734 /* Fill CODES with a sequence of rtl operations to load VALUE.
735 Return the number of operations needed. */
736
737 static int
738 riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
739 machine_mode mode)
740 {
741 int cost = riscv_build_integer_1 (codes, value, mode);
742
743 /* Eliminate leading zeros and end with SRLI. */
744 if (value > 0 && cost > 2)
745 {
746 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
747 int alt_cost, shift = clz_hwi (value);
748 HOST_WIDE_INT shifted_val;
749
750 /* Try filling trailing bits with 1s. */
751 shifted_val = (value << shift) | ((((HOST_WIDE_INT) 1) << shift) - 1);
752 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
753 if (alt_cost < cost)
754 {
755 alt_codes[alt_cost-1].code = LSHIFTRT;
756 alt_codes[alt_cost-1].value = shift;
757 memcpy (codes, alt_codes, sizeof (alt_codes));
758 cost = alt_cost;
759 }
760
761 /* Try filling trailing bits with 0s. */
762 shifted_val = value << shift;
763 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
764 if (alt_cost < cost)
765 {
766 alt_codes[alt_cost-1].code = LSHIFTRT;
767 alt_codes[alt_cost-1].value = shift;
768 memcpy (codes, alt_codes, sizeof (alt_codes));
769 cost = alt_cost;
770 }
771 }
772
773 if (!TARGET_64BIT
774 && (value > INT32_MAX || value < INT32_MIN))
775 {
776 unsigned HOST_WIDE_INT loval = sext_hwi (value, 32);
777 unsigned HOST_WIDE_INT hival = sext_hwi ((value - loval) >> 32, 32);
778 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
779 struct riscv_integer_op hicode[RISCV_MAX_INTEGER_OPS];
780 int hi_cost, lo_cost;
781
782 hi_cost = riscv_build_integer_1 (hicode, hival, mode);
783 if (hi_cost < cost)
784 {
785 lo_cost = riscv_build_integer_1 (alt_codes, loval, mode);
786 if (lo_cost + hi_cost < cost)
787 {
788 memcpy (codes, alt_codes,
789 lo_cost * sizeof (struct riscv_integer_op));
790 memcpy (codes + lo_cost, hicode,
791 hi_cost * sizeof (struct riscv_integer_op));
792 cost = lo_cost + hi_cost;
793 }
794 }
795 }
796
797 return cost;
798 }
799
800 /* Return the cost of constructing VAL in the event that a scratch
801 register is available. */
802
803 static int
804 riscv_split_integer_cost (HOST_WIDE_INT val)
805 {
806 int cost;
807 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
808 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
809 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
810
811 cost = 2 + riscv_build_integer (codes, loval, VOIDmode);
812 if (loval != hival)
813 cost += riscv_build_integer (codes, hival, VOIDmode);
814
815 return cost;
816 }
817
818 /* Return the cost of constructing the integer constant VAL. */
819
820 static int
821 riscv_integer_cost (HOST_WIDE_INT val)
822 {
823 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
824 return MIN (riscv_build_integer (codes, val, VOIDmode),
825 riscv_split_integer_cost (val));
826 }
827
828 /* Try to split a 64b integer into 32b parts, then reassemble. */
829
830 static rtx
831 riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
832 {
833 unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
834 unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
835 rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode);
836
837 riscv_move_integer (lo, lo, loval, mode);
838
839 if (loval == hival)
840 hi = gen_rtx_ASHIFT (mode, lo, GEN_INT (32));
841 else
842 {
843 riscv_move_integer (hi, hi, hival, mode);
844 hi = gen_rtx_ASHIFT (mode, hi, GEN_INT (32));
845 }
846
847 hi = force_reg (mode, hi);
848 return gen_rtx_PLUS (mode, hi, lo);
849 }
850
851 /* Return true if X is a thread-local symbol. */
852
853 static bool
854 riscv_tls_symbol_p (const_rtx x)
855 {
856 return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0;
857 }
858
859 /* Return true if symbol X binds locally. */
860
861 static bool
862 riscv_symbol_binds_local_p (const_rtx x)
863 {
864 if (SYMBOL_REF_P (x))
865 return (SYMBOL_REF_DECL (x)
866 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
867 : SYMBOL_REF_LOCAL_P (x));
868 else
869 return false;
870 }
871
872 /* Return the method that should be used to access SYMBOL_REF or
873 LABEL_REF X. */
874
875 static enum riscv_symbol_type
876 riscv_classify_symbol (const_rtx x)
877 {
878 if (riscv_tls_symbol_p (x))
879 return SYMBOL_TLS;
880
881 if (GET_CODE (x) == SYMBOL_REF && flag_pic && !riscv_symbol_binds_local_p (x))
882 return SYMBOL_GOT_DISP;
883
884 switch (riscv_cmodel)
885 {
886 case CM_MEDLOW:
887 return SYMBOL_ABSOLUTE;
888 case CM_LARGE:
889 if (SYMBOL_REF_P (x))
890 return CONSTANT_POOL_ADDRESS_P (x) ? SYMBOL_PCREL : SYMBOL_FORCE_TO_MEM;
891 return SYMBOL_PCREL;
892 default:
893 return SYMBOL_PCREL;
894 }
895 }
896
897 /* Classify the base of symbolic expression X. */
898
899 enum riscv_symbol_type
900 riscv_classify_symbolic_expression (rtx x)
901 {
902 rtx offset;
903
904 split_const (x, &x, &offset);
905 if (UNSPEC_ADDRESS_P (x))
906 return UNSPEC_ADDRESS_TYPE (x);
907
908 return riscv_classify_symbol (x);
909 }
910
911 /* Return true if X is a symbolic constant. If it is, store the type of
912 the symbol in *SYMBOL_TYPE. */
913
914 bool
915 riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type)
916 {
917 rtx offset;
918
919 split_const (x, &x, &offset);
920 if (UNSPEC_ADDRESS_P (x))
921 {
922 *symbol_type = UNSPEC_ADDRESS_TYPE (x);
923 x = UNSPEC_ADDRESS (x);
924 }
925 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
926 *symbol_type = riscv_classify_symbol (x);
927 else
928 return false;
929
930 if (offset == const0_rtx)
931 return true;
932
933 /* Nonzero offsets are only valid for references that don't use the GOT. */
934 switch (*symbol_type)
935 {
936 case SYMBOL_ABSOLUTE:
937 case SYMBOL_PCREL:
938 case SYMBOL_TLS_LE:
939 /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */
940 return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
941
942 default:
943 return false;
944 }
945 }
946
947 /* Returns the number of instructions necessary to reference a symbol. */
948
949 static int riscv_symbol_insns (enum riscv_symbol_type type)
950 {
951 switch (type)
952 {
953 case SYMBOL_TLS: return 0; /* Depends on the TLS model. */
954 case SYMBOL_ABSOLUTE: return 2; /* LUI + the reference. */
955 case SYMBOL_PCREL: return 2; /* AUIPC + the reference. */
956 case SYMBOL_TLS_LE: return 3; /* LUI + ADD TP + the reference. */
957 case SYMBOL_GOT_DISP: return 3; /* AUIPC + LD GOT + the reference. */
958 case SYMBOL_FORCE_TO_MEM: return 3; /* AUIPC + LD + the reference. */
959 default: gcc_unreachable ();
960 }
961 }
962
963 /* Immediate values loaded by the FLI.S instruction in Chapter 25 of the latest RISC-V ISA
964 Manual draft. For details, please see:
965 https://github.com/riscv/riscv-isa-manual/releases/tag/isa-449cd0c */
966
967 static unsigned HOST_WIDE_INT fli_value_hf[32] =
968 {
969 0xbcp8, 0x4p8, 0x1p8, 0x2p8, 0x1cp8, 0x20p8, 0x2cp8, 0x30p8,
970 0x34p8, 0x35p8, 0x36p8, 0x37p8, 0x38p8, 0x39p8, 0x3ap8, 0x3bp8,
971 0x3cp8, 0x3dp8, 0x3ep8, 0x3fp8, 0x40p8, 0x41p8, 0x42p8, 0x44p8,
972 0x48p8, 0x4cp8, 0x58p8, 0x5cp8, 0x78p8,
973 /* Only used for filling, ensuring that 29 and 30 of HF are the same. */
974 0x78p8,
975 0x7cp8, 0x7ep8
976 };
977
978 static unsigned HOST_WIDE_INT fli_value_sf[32] =
979 {
980 0xbf8p20, 0x008p20, 0x378p20, 0x380p20, 0x3b8p20, 0x3c0p20, 0x3d8p20, 0x3e0p20,
981 0x3e8p20, 0x3eap20, 0x3ecp20, 0x3eep20, 0x3f0p20, 0x3f2p20, 0x3f4p20, 0x3f6p20,
982 0x3f8p20, 0x3fap20, 0x3fcp20, 0x3fep20, 0x400p20, 0x402p20, 0x404p20, 0x408p20,
983 0x410p20, 0x418p20, 0x430p20, 0x438p20, 0x470p20, 0x478p20, 0x7f8p20, 0x7fcp20
984 };
985
986 static unsigned HOST_WIDE_INT fli_value_df[32] =
987 {
988 0xbff0p48, 0x10p48, 0x3ef0p48, 0x3f00p48,
989 0x3f70p48, 0x3f80p48, 0x3fb0p48, 0x3fc0p48,
990 0x3fd0p48, 0x3fd4p48, 0x3fd8p48, 0x3fdcp48,
991 0x3fe0p48, 0x3fe4p48, 0x3fe8p48, 0x3fecp48,
992 0x3ff0p48, 0x3ff4p48, 0x3ff8p48, 0x3ffcp48,
993 0x4000p48, 0x4004p48, 0x4008p48, 0x4010p48,
994 0x4020p48, 0x4030p48, 0x4060p48, 0x4070p48,
995 0x40e0p48, 0x40f0p48, 0x7ff0p48, 0x7ff8p48
996 };
997
998 /* Display floating-point values at the assembly level, which is consistent
999 with the zfa extension of llvm:
1000 https://reviews.llvm.org/D145645. */
1001
1002 const char *fli_value_print[32] =
1003 {
1004 "-1.0", "min", "1.52587890625e-05", "3.0517578125e-05", "0.00390625", "0.0078125", "0.0625", "0.125",
1005 "0.25", "0.3125", "0.375", "0.4375", "0.5", "0.625", "0.75", "0.875",
1006 "1.0", "1.25", "1.5", "1.75", "2.0", "2.5", "3.0", "4.0",
1007 "8.0", "16.0", "128.0", "256.0", "32768.0", "65536.0", "inf", "nan"
1008 };
1009
1010 /* Return index of the FLI instruction table if rtx X is an immediate constant that can
1011 be moved using a single FLI instruction in zfa extension. Return -1 if not found. */
1012
1013 int
1014 riscv_float_const_rtx_index_for_fli (rtx x)
1015 {
1016 unsigned HOST_WIDE_INT *fli_value_array;
1017
1018 machine_mode mode = GET_MODE (x);
1019
1020 if (!TARGET_ZFA
1021 || !CONST_DOUBLE_P(x)
1022 || mode == VOIDmode
1023 || (mode == HFmode && !(TARGET_ZFH || TARGET_ZVFH))
1024 || (mode == SFmode && !TARGET_HARD_FLOAT)
1025 || (mode == DFmode && !TARGET_DOUBLE_FLOAT))
1026 return -1;
1027
1028 if (!SCALAR_FLOAT_MODE_P (mode)
1029 || GET_MODE_BITSIZE (mode).to_constant () > HOST_BITS_PER_WIDE_INT
1030 /* Only support up to DF mode. */
1031 || GET_MODE_BITSIZE (mode).to_constant () > GET_MODE_BITSIZE (DFmode))
1032 return -1;
1033
1034 unsigned HOST_WIDE_INT ival = 0;
1035
1036 long res[2];
1037 real_to_target (res,
1038 CONST_DOUBLE_REAL_VALUE (x),
1039 REAL_MODE_FORMAT (mode));
1040
1041 if (mode == DFmode)
1042 {
1043 int order = BYTES_BIG_ENDIAN ? 1 : 0;
1044 ival = zext_hwi (res[order], 32);
1045 ival |= (zext_hwi (res[1 - order], 32) << 32);
1046
1047 /* When the lower 32 bits are not all 0, it is impossible to be in the table. */
1048 if (ival & (unsigned HOST_WIDE_INT)0xffffffff)
1049 return -1;
1050 }
1051 else
1052 ival = zext_hwi (res[0], 32);
1053
1054 switch (mode)
1055 {
1056 case E_HFmode:
1057 fli_value_array = fli_value_hf;
1058 break;
1059 case E_SFmode:
1060 fli_value_array = fli_value_sf;
1061 break;
1062 case E_DFmode:
1063 fli_value_array = fli_value_df;
1064 break;
1065 default:
1066 return -1;
1067 }
1068
1069 if (fli_value_array[0] == ival)
1070 return 0;
1071
1072 if (fli_value_array[1] == ival)
1073 return 1;
1074
1075 /* Perform a binary search to find target index. */
1076 unsigned l, r, m;
1077
1078 l = 2;
1079 r = 31;
1080
1081 while (l <= r)
1082 {
1083 m = (l + r) / 2;
1084 if (fli_value_array[m] == ival)
1085 return m;
1086 else if (fli_value_array[m] < ival)
1087 l = m+1;
1088 else
1089 r = m-1;
1090 }
1091
1092 return -1;
1093 }
1094
1095 /* Implement TARGET_LEGITIMATE_CONSTANT_P. */
1096
1097 static bool
1098 riscv_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1099 {
1100 return riscv_const_insns (x) > 0;
1101 }
1102
1103 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1104
1105 static bool
1106 riscv_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1107 {
1108 enum riscv_symbol_type type;
1109 rtx base, offset;
1110
1111 /* There's no way to calculate VL-based values using relocations. */
1112 subrtx_iterator::array_type array;
1113 FOR_EACH_SUBRTX (iter, array, x, ALL)
1114 if (GET_CODE (*iter) == CONST_POLY_INT)
1115 return true;
1116
1117 /* There is no assembler syntax for expressing an address-sized
1118 high part. */
1119 if (GET_CODE (x) == HIGH)
1120 return true;
1121
1122 if (satisfies_constraint_zfli (x))
1123 return true;
1124
1125 split_const (x, &base, &offset);
1126 if (riscv_symbolic_constant_p (base, &type))
1127 {
1128 if (type == SYMBOL_FORCE_TO_MEM)
1129 return false;
1130
1131 /* As an optimization, don't spill symbolic constants that are as
1132 cheap to rematerialize as to access in the constant pool. */
1133 if (SMALL_OPERAND (INTVAL (offset)) && riscv_symbol_insns (type) > 0)
1134 return true;
1135
1136 /* As an optimization, avoid needlessly generate dynamic relocations. */
1137 if (flag_pic)
1138 return true;
1139 }
1140
1141 /* TLS symbols must be computed by riscv_legitimize_move. */
1142 if (tls_referenced_p (x))
1143 return true;
1144
1145 return false;
1146 }
1147
1148 /* Return true if register REGNO is a valid base register for mode MODE.
1149 STRICT_P is true if REG_OK_STRICT is in effect. */
1150
1151 int
1152 riscv_regno_mode_ok_for_base_p (int regno,
1153 machine_mode mode ATTRIBUTE_UNUSED,
1154 bool strict_p)
1155 {
1156 if (!HARD_REGISTER_NUM_P (regno))
1157 {
1158 if (!strict_p)
1159 return true;
1160 regno = reg_renumber[regno];
1161 }
1162
1163 /* These fake registers will be eliminated to either the stack or
1164 hard frame pointer, both of which are usually valid base registers.
1165 Reload deals with the cases where the eliminated form isn't valid. */
1166 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
1167 return true;
1168
1169 return GP_REG_P (regno);
1170 }
1171
1172 /* Get valid index register class.
1173 The RISC-V base instructions don't support index registers,
1174 but extensions might support that. */
1175
1176 enum reg_class
1177 riscv_index_reg_class ()
1178 {
1179 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1180 return GR_REGS;
1181
1182 return NO_REGS;
1183 }
1184
1185 /* Return true if register REGNO is a valid index register.
1186 The RISC-V base instructions don't support index registers,
1187 but extensions might support that. */
1188
1189 int
1190 riscv_regno_ok_for_index_p (int regno)
1191 {
1192 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1193 return riscv_regno_mode_ok_for_base_p (regno, VOIDmode, 1);
1194
1195 return 0;
1196 }
1197
1198 /* Return true if X is a valid base register for mode MODE.
1199 STRICT_P is true if REG_OK_STRICT is in effect. */
1200
1201 bool
1202 riscv_valid_base_register_p (rtx x, machine_mode mode, bool strict_p)
1203 {
1204 if (!strict_p && GET_CODE (x) == SUBREG)
1205 x = SUBREG_REG (x);
1206
1207 return (REG_P (x)
1208 && riscv_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
1209 }
1210
1211 /* Return true if, for every base register BASE_REG, (plus BASE_REG X)
1212 can address a value of mode MODE. */
1213
1214 static bool
1215 riscv_valid_offset_p (rtx x, machine_mode mode)
1216 {
1217 /* Check that X is a signed 12-bit number. */
1218 if (!const_arith_operand (x, Pmode))
1219 return false;
1220
1221 /* We may need to split multiword moves, so make sure that every word
1222 is accessible. */
1223 if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
1224 && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode).to_constant () - UNITS_PER_WORD))
1225 return false;
1226
1227 return true;
1228 }
1229
1230 /* Should a symbol of type SYMBOL_TYPE should be split in two? */
1231
1232 bool
1233 riscv_split_symbol_type (enum riscv_symbol_type symbol_type)
1234 {
1235 if (symbol_type == SYMBOL_TLS_LE)
1236 return true;
1237
1238 if (!TARGET_EXPLICIT_RELOCS)
1239 return false;
1240
1241 return symbol_type == SYMBOL_ABSOLUTE || symbol_type == SYMBOL_PCREL;
1242 }
1243
1244 /* Return true if a LO_SUM can address a value of mode MODE when the
1245 LO_SUM symbol has type SYM_TYPE. X is the LO_SUM second operand, which
1246 is used when the mode is BLKmode. */
1247
1248 static bool
1249 riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, machine_mode mode,
1250 rtx x)
1251 {
1252 int align, size;
1253
1254 /* Check that symbols of type SYMBOL_TYPE can be used to access values
1255 of mode MODE. */
1256 if (riscv_symbol_insns (sym_type) == 0)
1257 return false;
1258
1259 /* Check that there is a known low-part relocation. */
1260 if (!riscv_split_symbol_type (sym_type))
1261 return false;
1262
1263 /* We can't tell size or alignment when we have BLKmode, so try extracing a
1264 decl from the symbol if possible. */
1265 if (mode == BLKmode)
1266 {
1267 rtx offset;
1268
1269 /* Extract the symbol from the LO_SUM operand, if any. */
1270 split_const (x, &x, &offset);
1271
1272 /* Might be a CODE_LABEL. We can compute align but not size for that,
1273 so don't bother trying to handle it. */
1274 if (!SYMBOL_REF_P (x))
1275 return false;
1276
1277 /* Use worst case assumptions if we don't have a SYMBOL_REF_DECL. */
1278 align = (SYMBOL_REF_DECL (x)
1279 ? DECL_ALIGN (SYMBOL_REF_DECL (x))
1280 : 1);
1281 size = (SYMBOL_REF_DECL (x) && DECL_SIZE (SYMBOL_REF_DECL (x))
1282 ? tree_to_uhwi (DECL_SIZE (SYMBOL_REF_DECL (x)))
1283 : 2*BITS_PER_WORD);
1284 }
1285 else
1286 {
1287 align = GET_MODE_ALIGNMENT (mode);
1288 size = GET_MODE_BITSIZE (mode).to_constant ();
1289 }
1290
1291 /* We may need to split multiword moves, so make sure that each word
1292 can be accessed without inducing a carry. */
1293 if (size > BITS_PER_WORD
1294 && (!TARGET_STRICT_ALIGN || size > align))
1295 return false;
1296
1297 return true;
1298 }
1299
1300 /* Return true if mode is the RVV enabled mode.
1301 For example: 'RVVMF2SI' mode is disabled,
1302 wheras 'RVVM1SI' mode is enabled if MIN_VLEN == 32. */
1303
1304 bool
1305 riscv_v_ext_vector_mode_p (machine_mode mode)
1306 {
1307 #define ENTRY(MODE, REQUIREMENT, ...) \
1308 case MODE##mode: \
1309 return REQUIREMENT;
1310 switch (mode)
1311 {
1312 #include "riscv-vector-switch.def"
1313 default:
1314 return false;
1315 }
1316
1317 return false;
1318 }
1319
1320 /* Return true if mode is the RVV enabled tuple mode. */
1321
1322 bool
1323 riscv_v_ext_tuple_mode_p (machine_mode mode)
1324 {
1325 #define TUPLE_ENTRY(MODE, REQUIREMENT, ...) \
1326 case MODE##mode: \
1327 return REQUIREMENT;
1328 switch (mode)
1329 {
1330 #include "riscv-vector-switch.def"
1331 default:
1332 return false;
1333 }
1334
1335 return false;
1336 }
1337
1338 /* Return true if mode is the RVV enabled vls mode. */
1339
1340 bool
1341 riscv_v_ext_vls_mode_p (machine_mode mode)
1342 {
1343 #define VLS_ENTRY(MODE, REQUIREMENT) \
1344 case MODE##mode: \
1345 return REQUIREMENT;
1346 switch (mode)
1347 {
1348 #include "riscv-vector-switch.def"
1349 default:
1350 return false;
1351 }
1352
1353 return false;
1354 }
1355
1356 /* Return true if it is either RVV vector mode or RVV tuple mode. */
1357
1358 static bool
1359 riscv_v_ext_mode_p (machine_mode mode)
1360 {
1361 return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode)
1362 || riscv_v_ext_vls_mode_p (mode);
1363 }
1364
1365 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
1366 NUNITS size for corresponding machine_mode. */
1367
1368 poly_int64
1369 riscv_v_adjust_nunits (machine_mode mode, int scale)
1370 {
1371 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
1372 if (riscv_v_ext_mode_p (mode))
1373 {
1374 if (TARGET_MIN_VLEN == 32)
1375 scale = scale / 2;
1376 return riscv_vector_chunks * scale;
1377 }
1378 return scale;
1379 }
1380
1381 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
1382 NUNITS size for corresponding machine_mode. */
1383
1384 poly_int64
1385 riscv_v_adjust_nunits (machine_mode mode, bool fractional_p, int lmul, int nf)
1386 {
1387 if (riscv_v_ext_mode_p (mode))
1388 {
1389 scalar_mode smode = GET_MODE_INNER (mode);
1390 int size = GET_MODE_SIZE (smode);
1391 int nunits_per_chunk = riscv_bytes_per_vector_chunk / size;
1392 if (fractional_p)
1393 return nunits_per_chunk / lmul * riscv_vector_chunks * nf;
1394 else
1395 return nunits_per_chunk * lmul * riscv_vector_chunks * nf;
1396 }
1397 /* Set the disabled RVV modes size as 1 by default. */
1398 return 1;
1399 }
1400
1401 /* Call from ADJUST_BYTESIZE in riscv-modes.def. Return the correct
1402 BYTE size for corresponding machine_mode. */
1403
1404 poly_int64
1405 riscv_v_adjust_bytesize (machine_mode mode, int scale)
1406 {
1407 if (riscv_v_ext_vector_mode_p (mode))
1408 {
1409 poly_int64 nunits = GET_MODE_NUNITS (mode);
1410 poly_int64 mode_size = GET_MODE_SIZE (mode);
1411
1412 if (maybe_eq (mode_size, (uint16_t) -1))
1413 mode_size = riscv_vector_chunks * scale;
1414
1415 if (nunits.coeffs[0] > 8)
1416 return exact_div (nunits, 8);
1417 else if (nunits.is_constant ())
1418 return 1;
1419 else
1420 return poly_int64 (1, 1);
1421 }
1422
1423 return scale;
1424 }
1425
1426 /* Call from ADJUST_PRECISION in riscv-modes.def. Return the correct
1427 PRECISION size for corresponding machine_mode. */
1428
1429 poly_int64
1430 riscv_v_adjust_precision (machine_mode mode, int scale)
1431 {
1432 return riscv_v_adjust_nunits (mode, scale);
1433 }
1434
1435 /* Return true if X is a valid address for machine mode MODE. If it is,
1436 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
1437 effect. */
1438
1439 static bool
1440 riscv_classify_address (struct riscv_address_info *info, rtx x,
1441 machine_mode mode, bool strict_p)
1442 {
1443 if (th_classify_address (info, x, mode, strict_p))
1444 return true;
1445
1446 switch (GET_CODE (x))
1447 {
1448 case REG:
1449 case SUBREG:
1450 info->type = ADDRESS_REG;
1451 info->reg = x;
1452 info->offset = const0_rtx;
1453 return riscv_valid_base_register_p (info->reg, mode, strict_p);
1454
1455 case PLUS:
1456 /* RVV load/store disallow any offset. */
1457 if (riscv_v_ext_mode_p (mode))
1458 return false;
1459
1460 info->type = ADDRESS_REG;
1461 info->reg = XEXP (x, 0);
1462 info->offset = XEXP (x, 1);
1463 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
1464 && riscv_valid_offset_p (info->offset, mode));
1465
1466 case LO_SUM:
1467 /* RVV load/store disallow LO_SUM. */
1468 if (riscv_v_ext_mode_p (mode))
1469 return false;
1470
1471 info->type = ADDRESS_LO_SUM;
1472 info->reg = XEXP (x, 0);
1473 info->offset = XEXP (x, 1);
1474 /* We have to trust the creator of the LO_SUM to do something vaguely
1475 sane. Target-independent code that creates a LO_SUM should also
1476 create and verify the matching HIGH. Target-independent code that
1477 adds an offset to a LO_SUM must prove that the offset will not
1478 induce a carry. Failure to do either of these things would be
1479 a bug, and we are not required to check for it here. The RISC-V
1480 backend itself should only create LO_SUMs for valid symbolic
1481 constants, with the high part being either a HIGH or a copy
1482 of _gp. */
1483 info->symbol_type
1484 = riscv_classify_symbolic_expression (info->offset);
1485 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
1486 && riscv_valid_lo_sum_p (info->symbol_type, mode, info->offset));
1487
1488 case CONST_INT:
1489 /* We only allow the const0_rtx for the RVV load/store. For example:
1490 +----------------------------------------------------------+
1491 | li a5,0 |
1492 | vsetvli zero,a1,e32,m1,ta,ma |
1493 | vle32.v v24,0(a5) <- propagate the const 0 to a5 here. |
1494 | vs1r.v v24,0(a0) |
1495 +----------------------------------------------------------+
1496 It can be folded to:
1497 +----------------------------------------------------------+
1498 | vsetvli zero,a1,e32,m1,ta,ma |
1499 | vle32.v v24,0(zero) |
1500 | vs1r.v v24,0(a0) |
1501 +----------------------------------------------------------+
1502 This behavior will benefit the underlying RVV auto vectorization. */
1503 if (riscv_v_ext_mode_p (mode))
1504 return x == const0_rtx;
1505
1506 /* Small-integer addresses don't occur very often, but they
1507 are legitimate if x0 is a valid base register. */
1508 info->type = ADDRESS_CONST_INT;
1509 return SMALL_OPERAND (INTVAL (x));
1510
1511 default:
1512 return false;
1513 }
1514 }
1515
1516 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1517
1518 static bool
1519 riscv_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
1520 code_helper = ERROR_MARK)
1521 {
1522 /* Disallow RVV modes base address.
1523 E.g. (mem:SI (subreg:DI (reg:V1DI 155) 0). */
1524 if (SUBREG_P (x) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (x))))
1525 return false;
1526 struct riscv_address_info addr;
1527
1528 return riscv_classify_address (&addr, x, mode, strict_p);
1529 }
1530
1531 /* Return true if hard reg REGNO can be used in compressed instructions. */
1532
1533 static bool
1534 riscv_compressed_reg_p (int regno)
1535 {
1536 /* x8-x15/f8-f15 are compressible registers. */
1537 return ((TARGET_RVC || TARGET_ZCA)
1538 && (IN_RANGE (regno, GP_REG_FIRST + 8, GP_REG_FIRST + 15)
1539 || IN_RANGE (regno, FP_REG_FIRST + 8, FP_REG_FIRST + 15)));
1540 }
1541
1542 /* Return true if x is an unsigned 5-bit immediate scaled by 4. */
1543
1544 static bool
1545 riscv_compressed_lw_offset_p (rtx x)
1546 {
1547 return (CONST_INT_P (x)
1548 && (INTVAL (x) & 3) == 0
1549 && IN_RANGE (INTVAL (x), 0, CSW_MAX_OFFSET));
1550 }
1551
1552 /* Return true if load/store from/to address x can be compressed. */
1553
1554 static bool
1555 riscv_compressed_lw_address_p (rtx x)
1556 {
1557 struct riscv_address_info addr;
1558 bool result = riscv_classify_address (&addr, x, GET_MODE (x),
1559 reload_completed);
1560
1561 /* Return false if address is not compressed_reg + small_offset. */
1562 if (!result
1563 || addr.type != ADDRESS_REG
1564 /* Before reload, assume all registers are OK. */
1565 || (reload_completed
1566 && !riscv_compressed_reg_p (REGNO (addr.reg))
1567 && addr.reg != stack_pointer_rtx)
1568 || !riscv_compressed_lw_offset_p (addr.offset))
1569 return false;
1570
1571 return result;
1572 }
1573
1574 /* Return the number of instructions needed to load or store a value
1575 of mode MODE at address X. Return 0 if X isn't valid for MODE.
1576 Assume that multiword moves may need to be split into word moves
1577 if MIGHT_SPLIT_P, otherwise assume that a single load or store is
1578 enough. */
1579
1580 int
1581 riscv_address_insns (rtx x, machine_mode mode, bool might_split_p)
1582 {
1583 struct riscv_address_info addr = {};
1584 int n = 1;
1585
1586 if (!riscv_classify_address (&addr, x, mode, false))
1587 {
1588 /* This could be a pattern from the pic.md file. In which case we want
1589 this address to always have a cost of 3 to make it as expensive as the
1590 most expensive symbol. This prevents constant propagation from
1591 preferring symbols over register plus offset. */
1592 return 3;
1593 }
1594
1595 /* BLKmode is used for single unaligned loads and stores and should
1596 not count as a multiword mode. */
1597 if (!riscv_v_ext_vector_mode_p (mode) && mode != BLKmode && might_split_p)
1598 n += (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1599
1600 if (addr.type == ADDRESS_LO_SUM)
1601 n += riscv_symbol_insns (addr.symbol_type) - 1;
1602
1603 return n;
1604 }
1605
1606 /* Return the number of instructions needed to load constant X.
1607 Return 0 if X isn't a valid constant. */
1608
1609 int
1610 riscv_const_insns (rtx x)
1611 {
1612 enum riscv_symbol_type symbol_type;
1613 rtx offset;
1614
1615 switch (GET_CODE (x))
1616 {
1617 case HIGH:
1618 if (!riscv_symbolic_constant_p (XEXP (x, 0), &symbol_type)
1619 || !riscv_split_symbol_type (symbol_type))
1620 return 0;
1621
1622 /* This is simply an LUI. */
1623 return 1;
1624
1625 case CONST_INT:
1626 {
1627 int cost = riscv_integer_cost (INTVAL (x));
1628 /* Force complicated constants to memory. */
1629 return cost < 4 ? cost : 0;
1630 }
1631
1632 case CONST_DOUBLE:
1633 /* See if we can use FMV directly. */
1634 if (satisfies_constraint_zfli (x))
1635 return 1;
1636
1637 /* We can use x0 to load floating-point zero. */
1638 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
1639 case CONST_VECTOR:
1640 {
1641 /* TODO: This is not accurate, we will need to
1642 adapt the COST of CONST_VECTOR in the future
1643 for the following cases:
1644
1645 - 1. const duplicate vector with element value
1646 in range of [-16, 15].
1647 - 2. const duplicate vector with element value
1648 out range of [-16, 15].
1649 - 3. const series vector.
1650 ...etc. */
1651 if (riscv_v_ext_mode_p (GET_MODE (x)))
1652 {
1653 /* const series vector. */
1654 rtx base, step;
1655 if (const_vec_series_p (x, &base, &step))
1656 {
1657 /* This is not accurate, we will need to adapt the COST
1658 * accurately according to BASE && STEP. */
1659 return 1;
1660 }
1661
1662 rtx elt;
1663 if (const_vec_duplicate_p (x, &elt))
1664 {
1665 /* We don't allow CONST_VECTOR for DI vector on RV32
1666 system since the ELT constant value can not held
1667 within a single register to disable reload a DI
1668 register vec_duplicate into vmv.v.x. */
1669 scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
1670 if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
1671 && !immediate_operand (elt, Pmode))
1672 return 0;
1673 /* Constants from -16 to 15 can be loaded with vmv.v.i.
1674 The Wc0, Wc1 constraints are already covered by the
1675 vi constraint so we do not need to check them here
1676 separately. */
1677 if (satisfies_constraint_vi (x))
1678 return 1;
1679
1680 /* Any int/FP constants can always be broadcast from a
1681 scalar register. Loading of a floating-point
1682 constant incurs a literal-pool access. Allow this in
1683 order to increase vectorization possibilities. */
1684 int n = riscv_const_insns (elt);
1685 if (CONST_DOUBLE_P (elt))
1686 return 1 + 4; /* vfmv.v.f + memory access. */
1687 else
1688 {
1689 /* We need as many insns as it takes to load the constant
1690 into a GPR and one vmv.v.x. */
1691 if (n != 0)
1692 return 1 + n;
1693 else
1694 return 1 + 4; /*vmv.v.x + memory access. */
1695 }
1696 }
1697 }
1698
1699 /* TODO: We may support more const vector in the future. */
1700 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
1701 }
1702
1703 case CONST:
1704 /* See if we can refer to X directly. */
1705 if (riscv_symbolic_constant_p (x, &symbol_type))
1706 return riscv_symbol_insns (symbol_type);
1707
1708 /* Otherwise try splitting the constant into a base and offset. */
1709 split_const (x, &x, &offset);
1710 if (offset != 0)
1711 {
1712 int n = riscv_const_insns (x);
1713 if (n != 0)
1714 return n + riscv_integer_cost (INTVAL (offset));
1715 }
1716 return 0;
1717
1718 case SYMBOL_REF:
1719 case LABEL_REF:
1720 return riscv_symbol_insns (riscv_classify_symbol (x));
1721
1722 /* TODO: In RVV, we get CONST_POLY_INT by using csrr VLENB
1723 instruction and several scalar shift or mult instructions,
1724 it is so far unknown. We set it to 4 temporarily. */
1725 case CONST_POLY_INT:
1726 return 4;
1727
1728 default:
1729 return 0;
1730 }
1731 }
1732
1733 /* X is a doubleword constant that can be handled by splitting it into
1734 two words and loading each word separately. Return the number of
1735 instructions required to do this. */
1736
1737 int
1738 riscv_split_const_insns (rtx x)
1739 {
1740 unsigned int low, high;
1741
1742 low = riscv_const_insns (riscv_subword (x, false));
1743 high = riscv_const_insns (riscv_subword (x, true));
1744 gcc_assert (low > 0 && high > 0);
1745 return low + high;
1746 }
1747
1748 /* Return the number of instructions needed to implement INSN,
1749 given that it loads from or stores to MEM. */
1750
1751 int
1752 riscv_load_store_insns (rtx mem, rtx_insn *insn)
1753 {
1754 machine_mode mode;
1755 bool might_split_p;
1756 rtx set;
1757
1758 gcc_assert (MEM_P (mem));
1759 mode = GET_MODE (mem);
1760
1761 /* Try to prove that INSN does not need to be split. */
1762 might_split_p = true;
1763 if (GET_MODE_BITSIZE (mode).to_constant () <= 32)
1764 might_split_p = false;
1765 else if (GET_MODE_BITSIZE (mode).to_constant () == 64)
1766 {
1767 set = single_set (insn);
1768 if (set && !riscv_split_64bit_move_p (SET_DEST (set), SET_SRC (set)))
1769 might_split_p = false;
1770 }
1771
1772 return riscv_address_insns (XEXP (mem, 0), mode, might_split_p);
1773 }
1774
1775 /* Emit a move from SRC to DEST. Assume that the move expanders can
1776 handle all moves if !can_create_pseudo_p (). The distinction is
1777 important because, unlike emit_move_insn, the move expanders know
1778 how to force Pmode objects into the constant pool even when the
1779 constant pool address is not itself legitimate. */
1780
1781 rtx
1782 riscv_emit_move (rtx dest, rtx src)
1783 {
1784 return (can_create_pseudo_p ()
1785 ? emit_move_insn (dest, src)
1786 : emit_move_insn_1 (dest, src));
1787 }
1788
1789 /* Emit an instruction of the form (set TARGET SRC). */
1790
1791 static rtx
1792 riscv_emit_set (rtx target, rtx src)
1793 {
1794 emit_insn (gen_rtx_SET (target, src));
1795 return target;
1796 }
1797
1798 /* Emit an instruction of the form (set DEST (CODE X)). */
1799
1800 rtx
1801 riscv_emit_unary (enum rtx_code code, rtx dest, rtx x)
1802 {
1803 return riscv_emit_set (dest, gen_rtx_fmt_e (code, GET_MODE (dest), x));
1804 }
1805
1806 /* Emit an instruction of the form (set DEST (CODE X Y)). */
1807
1808 rtx
1809 riscv_emit_binary (enum rtx_code code, rtx dest, rtx x, rtx y)
1810 {
1811 return riscv_emit_set (dest, gen_rtx_fmt_ee (code, GET_MODE (dest), x, y));
1812 }
1813
1814 /* Compute (CODE X Y) and store the result in a new register
1815 of mode MODE. Return that new register. */
1816
1817 static rtx
1818 riscv_force_binary (machine_mode mode, enum rtx_code code, rtx x, rtx y)
1819 {
1820 return riscv_emit_binary (code, gen_reg_rtx (mode), x, y);
1821 }
1822
1823 static rtx
1824 riscv_swap_instruction (rtx inst)
1825 {
1826 gcc_assert (GET_MODE (inst) == SImode);
1827 if (BYTES_BIG_ENDIAN)
1828 inst = expand_unop (SImode, bswap_optab, inst, gen_reg_rtx (SImode), 1);
1829 return inst;
1830 }
1831
1832 /* Copy VALUE to a register and return that register. If new pseudos
1833 are allowed, copy it into a new register, otherwise use DEST. */
1834
1835 static rtx
1836 riscv_force_temporary (rtx dest, rtx value)
1837 {
1838 if (can_create_pseudo_p ())
1839 return force_reg (Pmode, value);
1840 else
1841 {
1842 riscv_emit_move (dest, value);
1843 return dest;
1844 }
1845 }
1846
1847 /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
1848 then add CONST_INT OFFSET to the result. */
1849
1850 static rtx
1851 riscv_unspec_address_offset (rtx base, rtx offset,
1852 enum riscv_symbol_type symbol_type)
1853 {
1854 base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
1855 UNSPEC_ADDRESS_FIRST + symbol_type);
1856 if (offset != const0_rtx)
1857 base = gen_rtx_PLUS (Pmode, base, offset);
1858 return gen_rtx_CONST (Pmode, base);
1859 }
1860
1861 /* Return an UNSPEC address with underlying address ADDRESS and symbol
1862 type SYMBOL_TYPE. */
1863
1864 rtx
1865 riscv_unspec_address (rtx address, enum riscv_symbol_type symbol_type)
1866 {
1867 rtx base, offset;
1868
1869 split_const (address, &base, &offset);
1870 return riscv_unspec_address_offset (base, offset, symbol_type);
1871 }
1872
1873 /* If OP is an UNSPEC address, return the address to which it refers,
1874 otherwise return OP itself. */
1875
1876 static rtx
1877 riscv_strip_unspec_address (rtx op)
1878 {
1879 rtx base, offset;
1880
1881 split_const (op, &base, &offset);
1882 if (UNSPEC_ADDRESS_P (base))
1883 op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
1884 return op;
1885 }
1886
1887 /* If riscv_unspec_address (ADDR, SYMBOL_TYPE) is a 32-bit value, add the
1888 high part to BASE and return the result. Just return BASE otherwise.
1889 TEMP is as for riscv_force_temporary.
1890
1891 The returned expression can be used as the first operand to a LO_SUM. */
1892
1893 static rtx
1894 riscv_unspec_offset_high (rtx temp, rtx addr, enum riscv_symbol_type symbol_type)
1895 {
1896 addr = gen_rtx_HIGH (Pmode, riscv_unspec_address (addr, symbol_type));
1897 return riscv_force_temporary (temp, addr);
1898 }
1899
1900 /* Load an entry from the GOT for a TLS GD access. */
1901
1902 static rtx riscv_got_load_tls_gd (rtx dest, rtx sym)
1903 {
1904 if (Pmode == DImode)
1905 return gen_got_load_tls_gddi (dest, sym);
1906 else
1907 return gen_got_load_tls_gdsi (dest, sym);
1908 }
1909
1910 /* Load an entry from the GOT for a TLS IE access. */
1911
1912 static rtx riscv_got_load_tls_ie (rtx dest, rtx sym)
1913 {
1914 if (Pmode == DImode)
1915 return gen_got_load_tls_iedi (dest, sym);
1916 else
1917 return gen_got_load_tls_iesi (dest, sym);
1918 }
1919
1920 /* Add in the thread pointer for a TLS LE access. */
1921
1922 static rtx riscv_tls_add_tp_le (rtx dest, rtx base, rtx sym)
1923 {
1924 rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
1925 if (Pmode == DImode)
1926 return gen_tls_add_tp_ledi (dest, base, tp, sym);
1927 else
1928 return gen_tls_add_tp_lesi (dest, base, tp, sym);
1929 }
1930
1931 /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
1932 it appears in a MEM of that mode. Return true if ADDR is a legitimate
1933 constant in that context and can be split into high and low parts.
1934 If so, and if LOW_OUT is nonnull, emit the high part and store the
1935 low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise.
1936
1937 TEMP is as for riscv_force_temporary and is used to load the high
1938 part into a register.
1939
1940 When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
1941 a legitimize SET_SRC for an .md pattern, otherwise the low part
1942 is guaranteed to be a legitimate address for mode MODE. */
1943
1944 bool
1945 riscv_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
1946 {
1947 enum riscv_symbol_type symbol_type;
1948
1949 if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
1950 || !riscv_symbolic_constant_p (addr, &symbol_type)
1951 || riscv_symbol_insns (symbol_type) == 0
1952 || !riscv_split_symbol_type (symbol_type))
1953 return false;
1954
1955 if (low_out)
1956 switch (symbol_type)
1957 {
1958 case SYMBOL_FORCE_TO_MEM:
1959 return false;
1960
1961 case SYMBOL_ABSOLUTE:
1962 {
1963 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
1964 high = riscv_force_temporary (temp, high);
1965 *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
1966 }
1967 break;
1968
1969 case SYMBOL_PCREL:
1970 {
1971 static unsigned seqno;
1972 char buf[32];
1973 rtx label;
1974
1975 ssize_t bytes = snprintf (buf, sizeof (buf), ".LA%u", seqno);
1976 gcc_assert ((size_t) bytes < sizeof (buf));
1977
1978 label = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
1979 SYMBOL_REF_FLAGS (label) |= SYMBOL_FLAG_LOCAL;
1980 /* ??? Ugly hack to make weak symbols work. May need to change the
1981 RTL for the auipc and/or low patterns to get a better fix for
1982 this. */
1983 if (! nonzero_address_p (addr))
1984 SYMBOL_REF_WEAK (label) = 1;
1985
1986 if (temp == NULL)
1987 temp = gen_reg_rtx (Pmode);
1988
1989 if (Pmode == DImode)
1990 emit_insn (gen_auipcdi (temp, copy_rtx (addr), GEN_INT (seqno)));
1991 else
1992 emit_insn (gen_auipcsi (temp, copy_rtx (addr), GEN_INT (seqno)));
1993
1994 *low_out = gen_rtx_LO_SUM (Pmode, temp, label);
1995
1996 seqno++;
1997 }
1998 break;
1999
2000 default:
2001 gcc_unreachable ();
2002 }
2003
2004 return true;
2005 }
2006
2007 /* Return a legitimate address for REG + OFFSET. TEMP is as for
2008 riscv_force_temporary; it is only needed when OFFSET is not a
2009 SMALL_OPERAND. */
2010
2011 static rtx
2012 riscv_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
2013 {
2014 if (!SMALL_OPERAND (offset))
2015 {
2016 rtx high;
2017
2018 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
2019 The addition inside the macro CONST_HIGH_PART may cause an
2020 overflow, so we need to force a sign-extension check. */
2021 high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
2022 offset = CONST_LOW_PART (offset);
2023 high = riscv_force_temporary (temp, high);
2024 reg = riscv_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
2025 }
2026 return plus_constant (Pmode, reg, offset);
2027 }
2028
2029 /* The __tls_get_attr symbol. */
2030 static GTY(()) rtx riscv_tls_symbol;
2031
2032 /* Return an instruction sequence that calls __tls_get_addr. SYM is
2033 the TLS symbol we are referencing and TYPE is the symbol type to use
2034 (either global dynamic or local dynamic). RESULT is an RTX for the
2035 return value location. */
2036
2037 static rtx_insn *
2038 riscv_call_tls_get_addr (rtx sym, rtx result)
2039 {
2040 rtx a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST), func;
2041 rtx_insn *insn;
2042
2043 if (!riscv_tls_symbol)
2044 riscv_tls_symbol = init_one_libfunc ("__tls_get_addr");
2045 func = gen_rtx_MEM (FUNCTION_MODE, riscv_tls_symbol);
2046
2047 start_sequence ();
2048
2049 emit_insn (riscv_got_load_tls_gd (a0, sym));
2050 insn = emit_call_insn (gen_call_value (result, func, const0_rtx,
2051 gen_int_mode (RISCV_CC_BASE, SImode)));
2052 RTL_CONST_CALL_P (insn) = 1;
2053 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
2054 insn = get_insns ();
2055
2056 end_sequence ();
2057
2058 return insn;
2059 }
2060
2061 /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
2062 its address. The return value will be both a valid address and a valid
2063 SET_SRC (either a REG or a LO_SUM). */
2064
2065 static rtx
2066 riscv_legitimize_tls_address (rtx loc)
2067 {
2068 rtx dest, tp, tmp;
2069 enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
2070
2071 #if 0
2072 /* TLS copy relocs are now deprecated and should not be used. */
2073 /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE. */
2074 if (!flag_pic)
2075 model = TLS_MODEL_LOCAL_EXEC;
2076 #endif
2077
2078 switch (model)
2079 {
2080 case TLS_MODEL_LOCAL_DYNAMIC:
2081 /* Rely on section anchors for the optimization that LDM TLS
2082 provides. The anchor's address is loaded with GD TLS. */
2083 case TLS_MODEL_GLOBAL_DYNAMIC:
2084 tmp = gen_rtx_REG (Pmode, GP_RETURN);
2085 dest = gen_reg_rtx (Pmode);
2086 emit_libcall_block (riscv_call_tls_get_addr (loc, tmp), dest, tmp, loc);
2087 break;
2088
2089 case TLS_MODEL_INITIAL_EXEC:
2090 /* la.tls.ie; tp-relative add */
2091 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2092 tmp = gen_reg_rtx (Pmode);
2093 emit_insn (riscv_got_load_tls_ie (tmp, loc));
2094 dest = gen_reg_rtx (Pmode);
2095 emit_insn (gen_add3_insn (dest, tmp, tp));
2096 break;
2097
2098 case TLS_MODEL_LOCAL_EXEC:
2099 tmp = riscv_unspec_offset_high (NULL, loc, SYMBOL_TLS_LE);
2100 dest = gen_reg_rtx (Pmode);
2101 emit_insn (riscv_tls_add_tp_le (dest, tmp, loc));
2102 dest = gen_rtx_LO_SUM (Pmode, dest,
2103 riscv_unspec_address (loc, SYMBOL_TLS_LE));
2104 break;
2105
2106 default:
2107 gcc_unreachable ();
2108 }
2109 return dest;
2110 }
2111 \f
2112 /* If X is not a valid address for mode MODE, force it into a register. */
2113
2114 static rtx
2115 riscv_force_address (rtx x, machine_mode mode)
2116 {
2117 if (!riscv_legitimate_address_p (mode, x, false))
2118 {
2119 if (can_create_pseudo_p ())
2120 return force_reg (Pmode, x);
2121 else
2122 {
2123 /* It's only safe for the thunk function.
2124 Use ra as the temp regiater. */
2125 gcc_assert (riscv_in_thunk_func);
2126 rtx reg = RISCV_PROLOGUE_TEMP2 (Pmode);
2127 riscv_emit_move (reg, x);
2128 return reg;
2129 }
2130 }
2131
2132 return x;
2133 }
2134
2135 /* Modify base + offset so that offset fits within a compressed load/store insn
2136 and the excess is added to base. */
2137
2138 static rtx
2139 riscv_shorten_lw_offset (rtx base, HOST_WIDE_INT offset)
2140 {
2141 rtx addr, high;
2142 /* Leave OFFSET as an unsigned 5-bit offset scaled by 4 and put the excess
2143 into HIGH. */
2144 high = GEN_INT (offset & ~CSW_MAX_OFFSET);
2145 offset &= CSW_MAX_OFFSET;
2146 if (!SMALL_OPERAND (INTVAL (high)))
2147 high = force_reg (Pmode, high);
2148 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, high, base));
2149 addr = plus_constant (Pmode, base, offset);
2150 return addr;
2151 }
2152
2153 /* Helper for riscv_legitimize_address. Given X, return true if it
2154 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
2155
2156 This respectively represent canonical shift-add rtxs or scaled
2157 memory addresses. */
2158 static bool
2159 mem_shadd_or_shadd_rtx_p (rtx x)
2160 {
2161 return ((GET_CODE (x) == ASHIFT
2162 || GET_CODE (x) == MULT)
2163 && CONST_INT_P (XEXP (x, 1))
2164 && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
2165 || (GET_CODE (x) == MULT
2166 && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3))));
2167 }
2168
2169 /* This function is used to implement LEGITIMIZE_ADDRESS. If X can
2170 be legitimized in a way that the generic machinery might not expect,
2171 return a new address, otherwise return NULL. MODE is the mode of
2172 the memory being accessed. */
2173
2174 static rtx
2175 riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2176 machine_mode mode)
2177 {
2178 rtx addr;
2179
2180 if (riscv_tls_symbol_p (x))
2181 return riscv_legitimize_tls_address (x);
2182
2183 /* See if the address can split into a high part and a LO_SUM. */
2184 if (riscv_split_symbol (NULL, x, mode, &addr))
2185 return riscv_force_address (addr, mode);
2186
2187 /* Handle BASE + OFFSET. */
2188 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
2189 && INTVAL (XEXP (x, 1)) != 0)
2190 {
2191 rtx base = XEXP (x, 0);
2192 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
2193
2194 /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */
2195 if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
2196 && SMALL_OPERAND (offset))
2197 {
2198 rtx index = XEXP (base, 0);
2199 rtx fp = XEXP (base, 1);
2200 if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
2201 {
2202
2203 /* If we were given a MULT, we must fix the constant
2204 as we're going to create the ASHIFT form. */
2205 int shift_val = INTVAL (XEXP (index, 1));
2206 if (GET_CODE (index) == MULT)
2207 shift_val = exact_log2 (shift_val);
2208
2209 rtx reg1 = gen_reg_rtx (Pmode);
2210 rtx reg2 = gen_reg_rtx (Pmode);
2211 rtx reg3 = gen_reg_rtx (Pmode);
2212 riscv_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
2213 riscv_emit_binary (ASHIFT, reg2, XEXP (index, 0), GEN_INT (shift_val));
2214 riscv_emit_binary (PLUS, reg3, reg2, reg1);
2215
2216 return reg3;
2217 }
2218 }
2219
2220 if (!riscv_valid_base_register_p (base, mode, false))
2221 base = copy_to_mode_reg (Pmode, base);
2222 if (optimize_function_for_size_p (cfun)
2223 && (strcmp (current_pass->name, "shorten_memrefs") == 0)
2224 && mode == SImode)
2225 /* Convert BASE + LARGE_OFFSET into NEW_BASE + SMALL_OFFSET to allow
2226 possible compressed load/store. */
2227 addr = riscv_shorten_lw_offset (base, offset);
2228 else
2229 addr = riscv_add_offset (NULL, base, offset);
2230 return riscv_force_address (addr, mode);
2231 }
2232
2233 return x;
2234 }
2235
2236 /* Load VALUE into DEST. TEMP is as for riscv_force_temporary. ORIG_MODE
2237 is the original src mode before promotion. */
2238
2239 void
2240 riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
2241 machine_mode orig_mode)
2242 {
2243 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
2244 machine_mode mode;
2245 int i, num_ops;
2246 rtx x;
2247
2248 mode = GET_MODE (dest);
2249 /* We use the original mode for the riscv_build_integer call, because HImode
2250 values are given special treatment. */
2251 num_ops = riscv_build_integer (codes, value, orig_mode);
2252
2253 if (can_create_pseudo_p () && num_ops > 2 /* not a simple constant */
2254 && num_ops >= riscv_split_integer_cost (value))
2255 x = riscv_split_integer (value, mode);
2256 else
2257 {
2258 codes[0].value = trunc_int_for_mode (codes[0].value, mode);
2259 /* Apply each binary operation to X. */
2260 x = GEN_INT (codes[0].value);
2261
2262 for (i = 1; i < num_ops; i++)
2263 {
2264 if (!can_create_pseudo_p ())
2265 x = riscv_emit_set (temp, x);
2266 else
2267 x = force_reg (mode, x);
2268 codes[i].value = trunc_int_for_mode (codes[i].value, mode);
2269 x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value));
2270 }
2271 }
2272
2273 riscv_emit_set (dest, x);
2274 }
2275
2276 /* Subroutine of riscv_legitimize_move. Move constant SRC into register
2277 DEST given that SRC satisfies immediate_operand but doesn't satisfy
2278 move_operand. */
2279
2280 static void
2281 riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
2282 {
2283 rtx base, offset;
2284
2285 /* Split moves of big integers into smaller pieces. */
2286 if (splittable_const_int_operand (src, mode))
2287 {
2288 riscv_move_integer (dest, dest, INTVAL (src), mode);
2289 return;
2290 }
2291
2292 if (satisfies_constraint_zfli (src))
2293 {
2294 riscv_emit_set (dest, src);
2295 return;
2296 }
2297
2298 /* Split moves of symbolic constants into high/low pairs. */
2299 if (riscv_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
2300 {
2301 riscv_emit_set (dest, src);
2302 return;
2303 }
2304
2305 /* Generate the appropriate access sequences for TLS symbols. */
2306 if (riscv_tls_symbol_p (src))
2307 {
2308 riscv_emit_move (dest, riscv_legitimize_tls_address (src));
2309 return;
2310 }
2311
2312 /* If we have (const (plus symbol offset)), and that expression cannot
2313 be forced into memory, load the symbol first and add in the offset. Also
2314 prefer to do this even if the constant _can_ be forced into memory, as it
2315 usually produces better code. */
2316 split_const (src, &base, &offset);
2317 if (offset != const0_rtx
2318 && (targetm.cannot_force_const_mem (mode, src) || can_create_pseudo_p ()))
2319 {
2320 base = riscv_force_temporary (dest, base);
2321 riscv_emit_move (dest, riscv_add_offset (NULL, base, INTVAL (offset)));
2322 return;
2323 }
2324
2325 /* Handle below format.
2326 (const:DI
2327 (plus:DI
2328 (symbol_ref:DI ("ic") [flags 0x2] <var_decl 0x7fe57740be10 ic>) <- op_0
2329 (const_poly_int:DI [16, 16]) // <- op_1
2330 ))
2331 */
2332 if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS
2333 && CONST_POLY_INT_P (XEXP (XEXP (src, 0), 1)))
2334 {
2335 rtx dest_tmp = gen_reg_rtx (mode);
2336 rtx tmp = gen_reg_rtx (mode);
2337
2338 riscv_emit_move (dest, XEXP (XEXP (src, 0), 0));
2339 riscv_legitimize_poly_move (mode, dest_tmp, tmp, XEXP (XEXP (src, 0), 1));
2340
2341 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, dest, dest_tmp)));
2342 return;
2343 }
2344
2345 src = force_const_mem (mode, src);
2346
2347 /* When using explicit relocs, constant pool references are sometimes
2348 not legitimate addresses. */
2349 riscv_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0));
2350 riscv_emit_move (dest, src);
2351 }
2352
2353 /* Report when we try to do something that requires vector when vector is
2354 disabled. This is an error of last resort and isn't very high-quality. It
2355 usually involves attempts to measure the vector length in some way. */
2356
2357 static void
2358 riscv_report_v_required (void)
2359 {
2360 static bool reported_p = false;
2361
2362 /* Avoid reporting a slew of messages for a single oversight. */
2363 if (reported_p)
2364 return;
2365
2366 error ("this operation requires the RVV ISA extension");
2367 inform (input_location, "you can enable RVV using the command-line"
2368 " option %<-march%>, or by using the %<target%>"
2369 " attribute or pragma");
2370 reported_p = true;
2371 }
2372
2373 /* Helper function to operation for rtx_code CODE. */
2374 static void
2375 riscv_expand_op (enum rtx_code code, machine_mode mode, rtx op0, rtx op1,
2376 rtx op2)
2377 {
2378 if (can_create_pseudo_p ())
2379 {
2380 rtx result;
2381 if (GET_RTX_CLASS (code) == RTX_UNARY)
2382 result = expand_simple_unop (mode, code, op1, NULL_RTX, false);
2383 else
2384 result = expand_simple_binop (mode, code, op1, op2, NULL_RTX, false,
2385 OPTAB_DIRECT);
2386 riscv_emit_move (op0, result);
2387 }
2388 else
2389 {
2390 rtx pat;
2391 /* The following implementation is for prologue and epilogue.
2392 Because prologue and epilogue can not use pseudo register.
2393 We can't using expand_simple_binop or expand_simple_unop. */
2394 if (GET_RTX_CLASS (code) == RTX_UNARY)
2395 pat = gen_rtx_fmt_e (code, mode, op1);
2396 else
2397 pat = gen_rtx_fmt_ee (code, mode, op1, op2);
2398 emit_insn (gen_rtx_SET (op0, pat));
2399 }
2400 }
2401
2402 /* Expand mult operation with constant integer, multiplicand also used as a
2403 * temporary register. */
2404
2405 static void
2406 riscv_expand_mult_with_const_int (machine_mode mode, rtx dest, rtx multiplicand,
2407 HOST_WIDE_INT multiplier)
2408 {
2409 if (multiplier == 0)
2410 {
2411 riscv_emit_move (dest, GEN_INT (0));
2412 return;
2413 }
2414
2415 bool neg_p = multiplier < 0;
2416 unsigned HOST_WIDE_INT multiplier_abs = abs (multiplier);
2417
2418 if (multiplier_abs == 1)
2419 {
2420 if (neg_p)
2421 riscv_expand_op (NEG, mode, dest, multiplicand, NULL_RTX);
2422 else
2423 riscv_emit_move (dest, multiplicand);
2424 }
2425 else
2426 {
2427 if (pow2p_hwi (multiplier_abs))
2428 {
2429 /*
2430 multiplicand = [BYTES_PER_RISCV_VECTOR].
2431 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 8].
2432 Sequence:
2433 csrr a5, vlenb
2434 slli a5, a5, 3
2435 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 8].
2436 Sequence:
2437 csrr a5, vlenb
2438 slli a5, a5, 3
2439 neg a5, a5
2440 */
2441 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2442 gen_int_mode (exact_log2 (multiplier_abs), QImode));
2443 if (neg_p)
2444 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
2445 }
2446 else if (pow2p_hwi (multiplier_abs + 1))
2447 {
2448 /*
2449 multiplicand = [BYTES_PER_RISCV_VECTOR].
2450 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 7].
2451 Sequence:
2452 csrr a5, vlenb
2453 slli a4, a5, 3
2454 sub a5, a4, a5
2455 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 7].
2456 Sequence:
2457 csrr a5, vlenb
2458 slli a4, a5, 3
2459 sub a5, a4, a5 + neg a5, a5 => sub a5, a5, a4
2460 */
2461 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2462 gen_int_mode (exact_log2 (multiplier_abs + 1),
2463 QImode));
2464 if (neg_p)
2465 riscv_expand_op (MINUS, mode, dest, multiplicand, dest);
2466 else
2467 riscv_expand_op (MINUS, mode, dest, dest, multiplicand);
2468 }
2469 else if (pow2p_hwi (multiplier - 1))
2470 {
2471 /*
2472 multiplicand = [BYTES_PER_RISCV_VECTOR].
2473 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 9].
2474 Sequence:
2475 csrr a5, vlenb
2476 slli a4, a5, 3
2477 add a5, a4, a5
2478 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 9].
2479 Sequence:
2480 csrr a5, vlenb
2481 slli a4, a5, 3
2482 add a5, a4, a5
2483 neg a5, a5
2484 */
2485 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
2486 gen_int_mode (exact_log2 (multiplier_abs - 1),
2487 QImode));
2488 riscv_expand_op (PLUS, mode, dest, dest, multiplicand);
2489 if (neg_p)
2490 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
2491 }
2492 else
2493 {
2494 /* We use multiplication for remaining cases. */
2495 gcc_assert (
2496 TARGET_MUL
2497 && "M-extension must be enabled to calculate the poly_int "
2498 "size/offset.");
2499 riscv_emit_move (dest, gen_int_mode (multiplier, mode));
2500 riscv_expand_op (MULT, mode, dest, dest, multiplicand);
2501 }
2502 }
2503 }
2504
2505 /* Analyze src and emit const_poly_int mov sequence. */
2506
2507 void
2508 riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src)
2509 {
2510 poly_int64 value = rtx_to_poly_int64 (src);
2511 /* It use HOST_WIDE_INT intead of int since 32bit type is not enough
2512 for e.g. (const_poly_int:DI [549755813888, 549755813888]). */
2513 HOST_WIDE_INT offset = value.coeffs[0];
2514 HOST_WIDE_INT factor = value.coeffs[1];
2515 int vlenb = BYTES_PER_RISCV_VECTOR.coeffs[1];
2516 int div_factor = 0;
2517 /* Calculate (const_poly_int:MODE [m, n]) using scalar instructions.
2518 For any (const_poly_int:MODE [m, n]), the calculation formula is as
2519 follows.
2520 constant = m - n.
2521 When minimum VLEN = 32, poly of VLENB = (4, 4).
2522 base = vlenb(4, 4) or vlenb/2(2, 2) or vlenb/4(1, 1).
2523 When minimum VLEN > 32, poly of VLENB = (8, 8).
2524 base = vlenb(8, 8) or vlenb/2(4, 4) or vlenb/4(2, 2) or vlenb/8(1, 1).
2525 magn = (n, n) / base.
2526 (m, n) = base * magn + constant.
2527 This calculation doesn't need div operation. */
2528
2529 if (known_le (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)))
2530 emit_move_insn (tmp, gen_int_mode (BYTES_PER_RISCV_VECTOR, mode));
2531 else
2532 {
2533 emit_move_insn (gen_highpart (Pmode, tmp), CONST0_RTX (Pmode));
2534 emit_move_insn (gen_lowpart (Pmode, tmp),
2535 gen_int_mode (BYTES_PER_RISCV_VECTOR, Pmode));
2536 }
2537
2538 if (BYTES_PER_RISCV_VECTOR.is_constant ())
2539 {
2540 gcc_assert (value.is_constant ());
2541 riscv_emit_move (dest, GEN_INT (value.to_constant ()));
2542 return;
2543 }
2544 else
2545 {
2546 int max_power = exact_log2 (MAX_POLY_VARIANT);
2547 for (int i = 0; i <= max_power; i++)
2548 {
2549 int possible_div_factor = 1 << i;
2550 if (factor % (vlenb / possible_div_factor) == 0)
2551 {
2552 div_factor = possible_div_factor;
2553 break;
2554 }
2555 }
2556 gcc_assert (div_factor != 0);
2557 }
2558
2559 if (div_factor != 1)
2560 riscv_expand_op (LSHIFTRT, mode, tmp, tmp,
2561 gen_int_mode (exact_log2 (div_factor), QImode));
2562
2563 riscv_expand_mult_with_const_int (mode, dest, tmp,
2564 factor / (vlenb / div_factor));
2565 HOST_WIDE_INT constant = offset - factor;
2566
2567 if (constant == 0)
2568 return;
2569 else if (SMALL_OPERAND (constant))
2570 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
2571 else
2572 {
2573 /* Handle the constant value is not a 12-bit value. */
2574 rtx high;
2575
2576 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
2577 The addition inside the macro CONST_HIGH_PART may cause an
2578 overflow, so we need to force a sign-extension check. */
2579 high = gen_int_mode (CONST_HIGH_PART (constant), mode);
2580 constant = CONST_LOW_PART (constant);
2581 riscv_emit_move (tmp, high);
2582 riscv_expand_op (PLUS, mode, dest, tmp, dest);
2583 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
2584 }
2585 }
2586
2587 /* Adjust scalable frame of vector for prologue && epilogue. */
2588
2589 static void
2590 riscv_v_adjust_scalable_frame (rtx target, poly_int64 offset, bool epilogue)
2591 {
2592 rtx tmp = RISCV_PROLOGUE_TEMP (Pmode);
2593 rtx adjust_size = RISCV_PROLOGUE_TEMP2 (Pmode);
2594 rtx insn, dwarf, adjust_frame_rtx;
2595
2596 riscv_legitimize_poly_move (Pmode, adjust_size, tmp,
2597 gen_int_mode (offset, Pmode));
2598
2599 if (epilogue)
2600 insn = gen_add3_insn (target, target, adjust_size);
2601 else
2602 insn = gen_sub3_insn (target, target, adjust_size);
2603
2604 insn = emit_insn (insn);
2605
2606 RTX_FRAME_RELATED_P (insn) = 1;
2607
2608 adjust_frame_rtx
2609 = gen_rtx_SET (target,
2610 plus_constant (Pmode, target, epilogue ? offset : -offset));
2611
2612 dwarf = alloc_reg_note (REG_FRAME_RELATED_EXPR, copy_rtx (adjust_frame_rtx),
2613 NULL_RTX);
2614
2615 REG_NOTES (insn) = dwarf;
2616 }
2617
2618 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
2619 sequence that is valid. */
2620
2621 bool
2622 riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
2623 {
2624 if (CONST_POLY_INT_P (src))
2625 {
2626 /*
2627 Handle:
2628 (insn 183 182 184 6 (set (mem:QI (plus:DI (reg/f:DI 156)
2629 (const_int 96 [0x60])) [0 S1 A8])
2630 (const_poly_int:QI [8, 8]))
2631 "../../../../riscv-gcc/libgcc/unwind-dw2.c":1579:3 -1 (nil))
2632 */
2633 if (MEM_P (dest))
2634 {
2635 emit_move_insn (dest, force_reg (mode, src));
2636 return true;
2637 }
2638 poly_int64 value = rtx_to_poly_int64 (src);
2639 if (!value.is_constant () && !TARGET_VECTOR)
2640 {
2641 riscv_report_v_required ();
2642 return false;
2643 }
2644
2645 if (satisfies_constraint_vp (src) && GET_MODE (src) == Pmode)
2646 return false;
2647
2648 if (GET_MODE_SIZE (mode).to_constant () < GET_MODE_SIZE (Pmode))
2649 {
2650 /* In RV32 system, handle (const_poly_int:QI [m, n])
2651 (const_poly_int:HI [m, n]).
2652 In RV64 system, handle (const_poly_int:QI [m, n])
2653 (const_poly_int:HI [m, n])
2654 (const_poly_int:SI [m, n]). */
2655 rtx tmp = gen_reg_rtx (Pmode);
2656 riscv_legitimize_poly_move (Pmode, gen_lowpart (Pmode, dest), tmp,
2657 src);
2658 }
2659 else
2660 {
2661 /* In RV32 system, handle (const_poly_int:SI [m, n])
2662 (const_poly_int:DI [m, n]).
2663 In RV64 system, handle (const_poly_int:DI [m, n]).
2664 FIXME: Maybe we could gen SImode in RV32 and then sign-extend to DImode,
2665 the offset should not exceed 4GiB in general. */
2666 rtx tmp = gen_reg_rtx (mode);
2667 riscv_legitimize_poly_move (mode, dest, tmp, src);
2668 }
2669 return true;
2670 }
2671 /* Expand
2672 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
2673 Expand this data movement instead of simply forbid it since
2674 we can improve the code generation for this following scenario
2675 by RVV auto-vectorization:
2676 (set (reg:V8QI 149) (vec_duplicate:V8QI (reg:QI))
2677 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
2678 Since RVV mode and scalar mode are in different REG_CLASS,
2679 we need to explicitly move data from V_REGS to GR_REGS by scalar move. */
2680 if (SUBREG_P (src) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (src))))
2681 {
2682 machine_mode vmode = GET_MODE (SUBREG_REG (src));
2683 unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
2684 unsigned int vmode_size = GET_MODE_SIZE (vmode).to_constant ();
2685 /* We should be able to handle both partial and paradoxical subreg. */
2686 unsigned int nunits = vmode_size > mode_size ? vmode_size / mode_size : 1;
2687 scalar_mode smode = as_a<scalar_mode> (mode);
2688 unsigned int index = SUBREG_BYTE (src).to_constant () / mode_size;
2689 unsigned int num = known_eq (GET_MODE_SIZE (smode), 8)
2690 && !TARGET_VECTOR_ELEN_64 ? 2 : 1;
2691 bool need_int_reg_p = false;
2692
2693 if (num == 2)
2694 {
2695 /* If we want to extract 64bit value but ELEN < 64,
2696 we use RVV vector mode with EEW = 32 to extract
2697 the highpart and lowpart. */
2698 need_int_reg_p = smode == DFmode;
2699 smode = SImode;
2700 nunits = nunits * 2;
2701 }
2702
2703 if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode))
2704 {
2705 rtx v = gen_lowpart (vmode, SUBREG_REG (src));
2706 rtx int_reg = dest;
2707
2708 if (need_int_reg_p)
2709 {
2710 int_reg = gen_reg_rtx (DImode);
2711 emit_move_insn (int_reg, gen_lowpart (GET_MODE (int_reg), dest));
2712 }
2713
2714 for (unsigned int i = 0; i < num; i++)
2715 {
2716 rtx result;
2717 if (num == 1)
2718 result = int_reg;
2719 else if (i == 0)
2720 result = gen_lowpart (smode, int_reg);
2721 else
2722 result = gen_reg_rtx (smode);
2723
2724 riscv_vector::emit_vec_extract (result, v,
2725 gen_int_mode (index + i, Pmode));
2726
2727 if (i == 1)
2728 {
2729 if (UNITS_PER_WORD < mode_size)
2730 /* If Pmode = SImode and mode = DImode, we just need to
2731 extract element of index = 1 from the vector and move it
2732 into the highpart of the DEST since DEST consists of 2
2733 scalar registers. */
2734 emit_move_insn (gen_highpart (smode, int_reg), result);
2735 else
2736 {
2737 rtx tmp = expand_binop (Pmode, ashl_optab,
2738 gen_lowpart (Pmode, result),
2739 gen_int_mode (32, Pmode),
2740 NULL_RTX, 0, OPTAB_DIRECT);
2741 rtx tmp2 = expand_binop (Pmode, ior_optab, tmp, int_reg,
2742 NULL_RTX, 0, OPTAB_DIRECT);
2743 emit_move_insn (int_reg, tmp2);
2744 }
2745 }
2746 }
2747
2748 if (need_int_reg_p)
2749 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), int_reg));
2750 else
2751 emit_move_insn (dest, int_reg);
2752 }
2753 else
2754 gcc_unreachable ();
2755
2756 return true;
2757 }
2758 /* Expand
2759 (set (reg:QI target) (mem:QI (address)))
2760 to
2761 (set (reg:DI temp) (zero_extend:DI (mem:QI (address))))
2762 (set (reg:QI target) (subreg:QI (reg:DI temp) 0))
2763 with auto-sign/zero extend. */
2764 if (GET_MODE_CLASS (mode) == MODE_INT
2765 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD
2766 && can_create_pseudo_p ()
2767 && MEM_P (src))
2768 {
2769 rtx temp_reg;
2770 int zero_extend_p;
2771
2772 temp_reg = gen_reg_rtx (word_mode);
2773 zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND);
2774 emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode,
2775 zero_extend_p));
2776 riscv_emit_move (dest, gen_lowpart (mode, temp_reg));
2777 return true;
2778 }
2779
2780 if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
2781 {
2782 rtx reg;
2783
2784 if (GET_CODE (src) == CONST_INT)
2785 {
2786 /* Apply the equivalent of PROMOTE_MODE here for constants to
2787 improve cse. */
2788 machine_mode promoted_mode = mode;
2789 if (GET_MODE_CLASS (mode) == MODE_INT
2790 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD)
2791 promoted_mode = word_mode;
2792
2793 if (splittable_const_int_operand (src, mode))
2794 {
2795 reg = gen_reg_rtx (promoted_mode);
2796 riscv_move_integer (reg, reg, INTVAL (src), mode);
2797 }
2798 else
2799 reg = force_reg (promoted_mode, src);
2800
2801 if (promoted_mode != mode)
2802 reg = gen_lowpart (mode, reg);
2803 }
2804 else
2805 reg = force_reg (mode, src);
2806 riscv_emit_move (dest, reg);
2807 return true;
2808 }
2809
2810 /* We need to deal with constants that would be legitimate
2811 immediate_operands but aren't legitimate move_operands. */
2812 if (CONSTANT_P (src) && !move_operand (src, mode))
2813 {
2814 riscv_legitimize_const_move (mode, dest, src);
2815 set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
2816 return true;
2817 }
2818
2819 /* RISC-V GCC may generate non-legitimate address due to we provide some
2820 pattern for optimize access PIC local symbol and it's make GCC generate
2821 unrecognizable instruction during optmizing. */
2822
2823 if (MEM_P (dest) && !riscv_legitimate_address_p (mode, XEXP (dest, 0),
2824 reload_completed))
2825 {
2826 XEXP (dest, 0) = riscv_force_address (XEXP (dest, 0), mode);
2827 }
2828
2829 if (MEM_P (src) && !riscv_legitimate_address_p (mode, XEXP (src, 0),
2830 reload_completed))
2831 {
2832 XEXP (src, 0) = riscv_force_address (XEXP (src, 0), mode);
2833 }
2834
2835 return false;
2836 }
2837
2838 /* Return true if there is an instruction that implements CODE and accepts
2839 X as an immediate operand. */
2840
2841 static int
2842 riscv_immediate_operand_p (int code, HOST_WIDE_INT x)
2843 {
2844 switch (code)
2845 {
2846 case ASHIFT:
2847 case ASHIFTRT:
2848 case LSHIFTRT:
2849 /* All shift counts are truncated to a valid constant. */
2850 return true;
2851
2852 case AND:
2853 case IOR:
2854 case XOR:
2855 case PLUS:
2856 case LT:
2857 case LTU:
2858 /* These instructions take 12-bit signed immediates. */
2859 return SMALL_OPERAND (x);
2860
2861 case LE:
2862 /* We add 1 to the immediate and use SLT. */
2863 return SMALL_OPERAND (x + 1);
2864
2865 case LEU:
2866 /* Likewise SLTU, but reject the always-true case. */
2867 return SMALL_OPERAND (x + 1) && x + 1 != 0;
2868
2869 case GE:
2870 case GEU:
2871 /* We can emulate an immediate of 1 by using GT/GTU against x0. */
2872 return x == 1;
2873
2874 default:
2875 /* By default assume that x0 can be used for 0. */
2876 return x == 0;
2877 }
2878 }
2879
2880 /* Return the cost of binary operation X, given that the instruction
2881 sequence for a word-sized or smaller operation takes SIGNLE_INSNS
2882 instructions and that the sequence of a double-word operation takes
2883 DOUBLE_INSNS instructions. */
2884
2885 static int
2886 riscv_binary_cost (rtx x, int single_insns, int double_insns)
2887 {
2888 if (!riscv_v_ext_mode_p (GET_MODE (x))
2889 && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2)
2890 return COSTS_N_INSNS (double_insns);
2891 return COSTS_N_INSNS (single_insns);
2892 }
2893
2894 /* Return the cost of sign- or zero-extending OP. */
2895
2896 static int
2897 riscv_extend_cost (rtx op, bool unsigned_p)
2898 {
2899 if (MEM_P (op))
2900 return 0;
2901
2902 if (unsigned_p && GET_MODE (op) == QImode)
2903 /* We can use ANDI. */
2904 return COSTS_N_INSNS (1);
2905
2906 /* ZBA provide zext.w. */
2907 if (TARGET_ZBA && TARGET_64BIT && unsigned_p && GET_MODE (op) == SImode)
2908 return COSTS_N_INSNS (1);
2909
2910 /* ZBB provide zext.h, sext.b and sext.h. */
2911 if (TARGET_ZBB)
2912 {
2913 if (!unsigned_p && GET_MODE (op) == QImode)
2914 return COSTS_N_INSNS (1);
2915
2916 if (GET_MODE (op) == HImode)
2917 return COSTS_N_INSNS (1);
2918 }
2919
2920 if (!unsigned_p && GET_MODE (op) == SImode)
2921 /* We can use SEXT.W. */
2922 return COSTS_N_INSNS (1);
2923
2924 /* We need to use a shift left and a shift right. */
2925 return COSTS_N_INSNS (2);
2926 }
2927
2928 /* Implement TARGET_RTX_COSTS. */
2929
2930 #define SINGLE_SHIFT_COST 1
2931
2932 static bool
2933 riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED,
2934 int *total, bool speed)
2935 {
2936 /* TODO: We set RVV instruction cost as 1 by default.
2937 Cost Model need to be well analyzed and supported in the future. */
2938 if (riscv_v_ext_mode_p (mode))
2939 {
2940 *total = COSTS_N_INSNS (1);
2941 return true;
2942 }
2943
2944 bool float_mode_p = FLOAT_MODE_P (mode);
2945 int cost;
2946
2947 switch (GET_CODE (x))
2948 {
2949 case SET:
2950 /* If we are called for an INSN that's a simple set of a register,
2951 then cost based on the SET_SRC alone. */
2952 if (outer_code == INSN && REG_P (SET_DEST (x)))
2953 {
2954 riscv_rtx_costs (SET_SRC (x), mode, outer_code, opno, total, speed);
2955 return true;
2956 }
2957
2958 /* Otherwise return FALSE indicating we should recurse into both the
2959 SET_DEST and SET_SRC combining the cost of both. */
2960 return false;
2961
2962 case CONST_INT:
2963 /* trivial constants checked using OUTER_CODE in case they are
2964 encodable in insn itself w/o need for additional insn(s). */
2965 if (riscv_immediate_operand_p (outer_code, INTVAL (x)))
2966 {
2967 *total = 0;
2968 return true;
2969 }
2970 /* Fall through. */
2971
2972 case SYMBOL_REF:
2973 case LABEL_REF:
2974 case CONST_DOUBLE:
2975 /* With TARGET_SUPPORTS_WIDE_INT const int can't be in CONST_DOUBLE
2976 rtl object. Weird recheck due to switch-case fall through above. */
2977 if (GET_CODE (x) == CONST_DOUBLE)
2978 gcc_assert (GET_MODE (x) != VOIDmode);
2979 /* Fall through. */
2980
2981 case CONST:
2982 /* Non trivial CONST_INT Fall through: check if need multiple insns. */
2983 if ((cost = riscv_const_insns (x)) > 0)
2984 {
2985 /* 1. Hoist will GCSE constants only if TOTAL returned is non-zero.
2986 2. For constants loaded more than once, the approach so far has
2987 been to duplicate the operation than to CSE the constant.
2988 3. TODO: make cost more accurate specially if riscv_const_insns
2989 returns > 1. */
2990 if (outer_code == SET || GET_MODE (x) == VOIDmode)
2991 *total = COSTS_N_INSNS (1);
2992 }
2993 else /* The instruction will be fetched from the constant pool. */
2994 *total = COSTS_N_INSNS (riscv_symbol_insns (SYMBOL_ABSOLUTE));
2995 return true;
2996
2997 case MEM:
2998 /* If the address is legitimate, return the number of
2999 instructions it needs. */
3000 if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
3001 {
3002 /* When optimizing for size, make uncompressible 32-bit addresses
3003 more expensive so that compressible 32-bit addresses are
3004 preferred. */
3005 if ((TARGET_RVC || TARGET_ZCA)
3006 && !speed && riscv_mshorten_memrefs && mode == SImode
3007 && !riscv_compressed_lw_address_p (XEXP (x, 0)))
3008 cost++;
3009
3010 *total = COSTS_N_INSNS (cost + tune_param->memory_cost);
3011 return true;
3012 }
3013 /* Otherwise use the default handling. */
3014 return false;
3015
3016 case IF_THEN_ELSE:
3017 if ((TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
3018 && reg_or_0_operand (XEXP (x, 1), mode)
3019 && sfb_alu_operand (XEXP (x, 2), mode)
3020 && comparison_operator (XEXP (x, 0), VOIDmode))
3021 {
3022 /* For predicated conditional-move operations we assume the cost
3023 of a single instruction even though there are actually two. */
3024 *total = COSTS_N_INSNS (1);
3025 return true;
3026 }
3027 else if (TARGET_ZICOND_LIKE
3028 && outer_code == SET
3029 && ((GET_CODE (XEXP (x, 1)) == REG
3030 && XEXP (x, 2) == CONST0_RTX (GET_MODE (XEXP (x, 1))))
3031 || (GET_CODE (XEXP (x, 2)) == REG
3032 && XEXP (x, 1) == CONST0_RTX (GET_MODE (XEXP (x, 2))))
3033 || (GET_CODE (XEXP (x, 1)) == REG
3034 && rtx_equal_p (XEXP (x, 1), XEXP (XEXP (x, 0), 0)))
3035 || (GET_CODE (XEXP (x, 1)) == REG
3036 && rtx_equal_p (XEXP (x, 2), XEXP (XEXP (x, 0), 0)))))
3037 {
3038 *total = COSTS_N_INSNS (1);
3039 return true;
3040 }
3041 else if (LABEL_REF_P (XEXP (x, 1)) && XEXP (x, 2) == pc_rtx)
3042 {
3043 if (equality_operator (XEXP (x, 0), mode)
3044 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTRACT)
3045 {
3046 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST + 1);
3047 return true;
3048 }
3049 if (ordered_comparison_operator (XEXP (x, 0), mode))
3050 {
3051 *total = COSTS_N_INSNS (1);
3052 return true;
3053 }
3054 }
3055 return false;
3056
3057 case NOT:
3058 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 2 : 1);
3059 return false;
3060
3061 case AND:
3062 /* slli.uw pattern for zba. */
3063 if (TARGET_ZBA && TARGET_64BIT && mode == DImode
3064 && GET_CODE (XEXP (x, 0)) == ASHIFT)
3065 {
3066 rtx and_rhs = XEXP (x, 1);
3067 rtx ashift_lhs = XEXP (XEXP (x, 0), 0);
3068 rtx ashift_rhs = XEXP (XEXP (x, 0), 1);
3069 if (REG_P (ashift_lhs)
3070 && CONST_INT_P (ashift_rhs)
3071 && CONST_INT_P (and_rhs)
3072 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3073 *total = COSTS_N_INSNS (1);
3074 return true;
3075 }
3076 /* bclri pattern for zbs. */
3077 if (TARGET_ZBS
3078 && not_single_bit_mask_operand (XEXP (x, 1), VOIDmode))
3079 {
3080 *total = COSTS_N_INSNS (1);
3081 return true;
3082 }
3083 /* bclr pattern for zbs. */
3084 if (TARGET_ZBS
3085 && REG_P (XEXP (x, 1))
3086 && GET_CODE (XEXP (x, 0)) == ROTATE
3087 && CONST_INT_P (XEXP ((XEXP (x, 0)), 0))
3088 && INTVAL (XEXP ((XEXP (x, 0)), 0)) == -2)
3089 {
3090 *total = COSTS_N_INSNS (1);
3091 return true;
3092 }
3093
3094 gcc_fallthrough ();
3095 case IOR:
3096 case XOR:
3097 /* orn, andn and xorn pattern for zbb. */
3098 if (TARGET_ZBB
3099 && GET_CODE (XEXP (x, 0)) == NOT)
3100 {
3101 *total = riscv_binary_cost (x, 1, 2);
3102 return true;
3103 }
3104
3105 /* bset[i] and binv[i] pattern for zbs. */
3106 if ((GET_CODE (x) == IOR || GET_CODE (x) == XOR)
3107 && TARGET_ZBS
3108 && ((GET_CODE (XEXP (x, 0)) == ASHIFT
3109 && CONST_INT_P (XEXP (XEXP (x, 0), 0)))
3110 || single_bit_mask_operand (XEXP (x, 1), VOIDmode)))
3111 {
3112 *total = COSTS_N_INSNS (1);
3113 return true;
3114 }
3115
3116 /* Double-word operations use two single-word operations. */
3117 *total = riscv_binary_cost (x, 1, 2);
3118 return false;
3119
3120 case ZERO_EXTRACT:
3121 /* This is an SImode shift. */
3122 if (outer_code == SET
3123 && CONST_INT_P (XEXP (x, 1))
3124 && CONST_INT_P (XEXP (x, 2))
3125 && (INTVAL (XEXP (x, 2)) > 0)
3126 && (INTVAL (XEXP (x, 1)) + INTVAL (XEXP (x, 2)) == 32))
3127 {
3128 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3129 return true;
3130 }
3131 /* bit extraction pattern (zbs:bext, xtheadbs:tst). */
3132 if ((TARGET_ZBS || TARGET_XTHEADBS) && outer_code == SET
3133 && GET_CODE (XEXP (x, 1)) == CONST_INT
3134 && INTVAL (XEXP (x, 1)) == 1)
3135 {
3136 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3137 return true;
3138 }
3139 gcc_fallthrough ();
3140 case SIGN_EXTRACT:
3141 if (TARGET_XTHEADBB && outer_code == SET
3142 && CONST_INT_P (XEXP (x, 1))
3143 && CONST_INT_P (XEXP (x, 2)))
3144 {
3145 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3146 return true;
3147 }
3148 return false;
3149
3150 case ASHIFT:
3151 /* bset pattern for zbs. */
3152 if (TARGET_ZBS
3153 && CONST_INT_P (XEXP (x, 0))
3154 && INTVAL (XEXP (x, 0)) == 1)
3155 {
3156 *total = COSTS_N_INSNS (1);
3157 return true;
3158 }
3159 gcc_fallthrough ();
3160 case ASHIFTRT:
3161 case LSHIFTRT:
3162 *total = riscv_binary_cost (x, SINGLE_SHIFT_COST,
3163 CONSTANT_P (XEXP (x, 1)) ? 4 : 9);
3164 return false;
3165
3166 case ABS:
3167 *total = COSTS_N_INSNS (float_mode_p ? 1 : 3);
3168 return false;
3169
3170 case LO_SUM:
3171 *total = set_src_cost (XEXP (x, 0), mode, speed);
3172 return true;
3173
3174 case LT:
3175 /* This is an SImode shift. */
3176 if (outer_code == SET && GET_MODE (x) == DImode
3177 && GET_MODE (XEXP (x, 0)) == SImode)
3178 {
3179 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3180 return true;
3181 }
3182 /* Fall through. */
3183 case LTU:
3184 case LE:
3185 case LEU:
3186 case GT:
3187 case GTU:
3188 case GE:
3189 case GEU:
3190 case EQ:
3191 case NE:
3192 /* Branch comparisons have VOIDmode, so use the first operand's
3193 mode instead. */
3194 mode = GET_MODE (XEXP (x, 0));
3195 if (float_mode_p)
3196 *total = tune_param->fp_add[mode == DFmode];
3197 else
3198 *total = riscv_binary_cost (x, 1, 3);
3199 return false;
3200
3201 case UNORDERED:
3202 case ORDERED:
3203 /* (FEQ(A, A) & FEQ(B, B)) compared against 0. */
3204 mode = GET_MODE (XEXP (x, 0));
3205 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
3206 return false;
3207
3208 case UNEQ:
3209 /* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B). */
3210 mode = GET_MODE (XEXP (x, 0));
3211 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
3212 return false;
3213
3214 case LTGT:
3215 /* (FLT(A, A) || FGT(B, B)). */
3216 mode = GET_MODE (XEXP (x, 0));
3217 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
3218 return false;
3219
3220 case UNGE:
3221 case UNGT:
3222 case UNLE:
3223 case UNLT:
3224 /* FLT or FLE, but guarded by an FFLAGS read and write. */
3225 mode = GET_MODE (XEXP (x, 0));
3226 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
3227 return false;
3228
3229 case MINUS:
3230 if (float_mode_p)
3231 *total = tune_param->fp_add[mode == DFmode];
3232 else
3233 *total = riscv_binary_cost (x, 1, 4);
3234 return false;
3235
3236 case PLUS:
3237 /* add.uw pattern for zba. */
3238 if (TARGET_ZBA
3239 && (TARGET_64BIT && (mode == DImode))
3240 && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
3241 && REG_P (XEXP (XEXP (x, 0), 0))
3242 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
3243 {
3244 *total = COSTS_N_INSNS (1);
3245 return true;
3246 }
3247 /* shNadd pattern for zba. */
3248 if (TARGET_ZBA
3249 && ((!TARGET_64BIT && (mode == SImode)) ||
3250 (TARGET_64BIT && (mode == DImode)))
3251 && (GET_CODE (XEXP (x, 0)) == ASHIFT)
3252 && REG_P (XEXP (XEXP (x, 0), 0))
3253 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3254 && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3))
3255 {
3256 *total = COSTS_N_INSNS (1);
3257 return true;
3258 }
3259 /* Before strength-reduction, the shNadd can be expressed as the addition
3260 of a multiplication with a power-of-two. If this case is not handled,
3261 the strength-reduction in expmed.c will calculate an inflated cost. */
3262 if (TARGET_ZBA
3263 && mode == word_mode
3264 && GET_CODE (XEXP (x, 0)) == MULT
3265 && REG_P (XEXP (XEXP (x, 0), 0))
3266 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3267 && pow2p_hwi (INTVAL (XEXP (XEXP (x, 0), 1)))
3268 && IN_RANGE (exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))), 1, 3))
3269 {
3270 *total = COSTS_N_INSNS (1);
3271 return true;
3272 }
3273 /* shNadd.uw pattern for zba.
3274 [(set (match_operand:DI 0 "register_operand" "=r")
3275 (plus:DI
3276 (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
3277 (match_operand:QI 2 "immediate_operand" "I"))
3278 (match_operand 3 "immediate_operand" ""))
3279 (match_operand:DI 4 "register_operand" "r")))]
3280 "TARGET_64BIT && TARGET_ZBA
3281 && (INTVAL (operands[2]) >= 1) && (INTVAL (operands[2]) <= 3)
3282 && (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff"
3283 */
3284 if (TARGET_ZBA
3285 && (TARGET_64BIT && (mode == DImode))
3286 && (GET_CODE (XEXP (x, 0)) == AND)
3287 && (REG_P (XEXP (x, 1))))
3288 {
3289 do {
3290 rtx and_lhs = XEXP (XEXP (x, 0), 0);
3291 rtx and_rhs = XEXP (XEXP (x, 0), 1);
3292 if (GET_CODE (and_lhs) != ASHIFT)
3293 break;
3294 if (!CONST_INT_P (and_rhs))
3295 break;
3296
3297 rtx ashift_rhs = XEXP (and_lhs, 1);
3298
3299 if (!CONST_INT_P (ashift_rhs)
3300 || !IN_RANGE (INTVAL (ashift_rhs), 1, 3))
3301 break;
3302
3303 if (CONST_INT_P (and_rhs)
3304 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3305 {
3306 *total = COSTS_N_INSNS (1);
3307 return true;
3308 }
3309 } while (false);
3310 }
3311
3312 if (float_mode_p)
3313 *total = tune_param->fp_add[mode == DFmode];
3314 else
3315 *total = riscv_binary_cost (x, 1, 4);
3316 return false;
3317
3318 case NEG:
3319 {
3320 rtx op = XEXP (x, 0);
3321 if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode))
3322 {
3323 *total = (tune_param->fp_mul[mode == DFmode]
3324 + set_src_cost (XEXP (op, 0), mode, speed)
3325 + set_src_cost (XEXP (op, 1), mode, speed)
3326 + set_src_cost (XEXP (op, 2), mode, speed));
3327 return true;
3328 }
3329 }
3330
3331 if (float_mode_p)
3332 *total = tune_param->fp_add[mode == DFmode];
3333 else
3334 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 4 : 1);
3335 return false;
3336
3337 case MULT:
3338 if (float_mode_p)
3339 *total = tune_param->fp_mul[mode == DFmode];
3340 else if (!TARGET_MUL)
3341 /* Estimate the cost of a library call. */
3342 *total = COSTS_N_INSNS (speed ? 32 : 6);
3343 else if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD)
3344 *total = 3 * tune_param->int_mul[0] + COSTS_N_INSNS (2);
3345 else if (!speed)
3346 *total = COSTS_N_INSNS (1);
3347 else
3348 *total = tune_param->int_mul[mode == DImode];
3349 return false;
3350
3351 case DIV:
3352 case SQRT:
3353 case MOD:
3354 if (float_mode_p)
3355 {
3356 *total = tune_param->fp_div[mode == DFmode];
3357 return false;
3358 }
3359 /* Fall through. */
3360
3361 case UDIV:
3362 case UMOD:
3363 if (!TARGET_DIV)
3364 /* Estimate the cost of a library call. */
3365 *total = COSTS_N_INSNS (speed ? 32 : 6);
3366 else if (speed)
3367 *total = tune_param->int_div[mode == DImode];
3368 else
3369 *total = COSTS_N_INSNS (1);
3370 return false;
3371
3372 case ZERO_EXTEND:
3373 /* This is an SImode shift. */
3374 if (GET_CODE (XEXP (x, 0)) == LSHIFTRT)
3375 {
3376 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3377 return true;
3378 }
3379 /* Fall through. */
3380 case SIGN_EXTEND:
3381 *total = riscv_extend_cost (XEXP (x, 0), GET_CODE (x) == ZERO_EXTEND);
3382 return false;
3383
3384 case BSWAP:
3385 if (TARGET_ZBB)
3386 {
3387 /* RISC-V only defines rev8 for XLEN, so we will need an extra
3388 shift-right instruction for smaller modes. */
3389 *total = COSTS_N_INSNS (mode == word_mode ? 1 : 2);
3390 return true;
3391 }
3392 return false;
3393
3394 case FLOAT:
3395 case UNSIGNED_FLOAT:
3396 case FIX:
3397 case FLOAT_EXTEND:
3398 case FLOAT_TRUNCATE:
3399 *total = tune_param->fp_add[mode == DFmode];
3400 return false;
3401
3402 case FMA:
3403 *total = (tune_param->fp_mul[mode == DFmode]
3404 + set_src_cost (XEXP (x, 0), mode, speed)
3405 + set_src_cost (XEXP (x, 1), mode, speed)
3406 + set_src_cost (XEXP (x, 2), mode, speed));
3407 return true;
3408
3409 case UNSPEC:
3410 if (XINT (x, 1) == UNSPEC_AUIPC)
3411 {
3412 /* Make AUIPC cheap to avoid spilling its result to the stack. */
3413 *total = 1;
3414 return true;
3415 }
3416 return false;
3417
3418 default:
3419 return false;
3420 }
3421 }
3422
3423 /* Implement TARGET_ADDRESS_COST. */
3424
3425 static int
3426 riscv_address_cost (rtx addr, machine_mode mode,
3427 addr_space_t as ATTRIBUTE_UNUSED,
3428 bool speed ATTRIBUTE_UNUSED)
3429 {
3430 /* When optimizing for size, make uncompressible 32-bit addresses more
3431 * expensive so that compressible 32-bit addresses are preferred. */
3432 if ((TARGET_RVC || TARGET_ZCA)
3433 && !speed && riscv_mshorten_memrefs && mode == SImode
3434 && !riscv_compressed_lw_address_p (addr))
3435 return riscv_address_insns (addr, mode, false) + 1;
3436 return riscv_address_insns (addr, mode, false);
3437 }
3438
3439 /* Implement TARGET_INSN_COST. We factor in the branch cost in the cost
3440 calculation for conditional branches: one unit is considered the cost
3441 of microarchitecture-dependent actual branch execution and therefore
3442 multiplied by BRANCH_COST and any remaining units are considered fixed
3443 branch overhead. Branches on a floating-point condition incur an extra
3444 instruction cost as they will be split into an FCMP operation followed
3445 by a branch on an integer condition. */
3446
3447 static int
3448 riscv_insn_cost (rtx_insn *insn, bool speed)
3449 {
3450 rtx x = PATTERN (insn);
3451 int cost = pattern_cost (x, speed);
3452
3453 if (JUMP_P (insn))
3454 {
3455 if (GET_CODE (x) == PARALLEL)
3456 x = XVECEXP (x, 0, 0);
3457 if (GET_CODE (x) == SET
3458 && GET_CODE (SET_DEST (x)) == PC
3459 && GET_CODE (SET_SRC (x)) == IF_THEN_ELSE)
3460 {
3461 cost += COSTS_N_INSNS (BRANCH_COST (speed, false) - 1);
3462 if (FLOAT_MODE_P (GET_MODE (XEXP (XEXP (SET_SRC (x), 0), 0))))
3463 cost += COSTS_N_INSNS (1);
3464 }
3465 }
3466 return cost;
3467 }
3468
3469 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
3470 but we consider cost units of branch instructions equal to cost units of
3471 other instructions. */
3472
3473 static unsigned int
3474 riscv_max_noce_ifcvt_seq_cost (edge e)
3475 {
3476 bool predictable_p = predictable_edge_p (e);
3477
3478 if (predictable_p)
3479 {
3480 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
3481 return param_max_rtl_if_conversion_predictable_cost;
3482 }
3483 else
3484 {
3485 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
3486 return param_max_rtl_if_conversion_unpredictable_cost;
3487 }
3488
3489 return COSTS_N_INSNS (BRANCH_COST (true, predictable_p));
3490 }
3491
3492 /* Implement TARGET_NOCE_CONVERSION_PROFITABLE_P. We replace the cost of a
3493 conditional branch assumed by `noce_find_if_block' at `COSTS_N_INSNS (2)'
3494 by our actual conditional branch cost, observing that our branches test
3495 conditions directly, so there is no preparatory extra condition-set
3496 instruction. */
3497
3498 static bool
3499 riscv_noce_conversion_profitable_p (rtx_insn *seq,
3500 struct noce_if_info *if_info)
3501 {
3502 struct noce_if_info riscv_if_info = *if_info;
3503
3504 riscv_if_info.original_cost -= COSTS_N_INSNS (2);
3505 riscv_if_info.original_cost += insn_cost (if_info->jump, if_info->speed_p);
3506
3507 /* Hack alert! When `noce_try_store_flag_mask' uses `cstore<mode>4'
3508 to emit a conditional set operation on DImode output it comes up
3509 with a sequence such as:
3510
3511 (insn 26 0 27 (set (reg:SI 140)
3512 (eq:SI (reg/v:DI 137 [ c ])
3513 (const_int 0 [0]))) 302 {*seq_zero_disi}
3514 (nil))
3515 (insn 27 26 28 (set (reg:DI 139)
3516 (zero_extend:DI (reg:SI 140))) 116 {*zero_extendsidi2_internal}
3517 (nil))
3518
3519 because our `cstore<mode>4' pattern expands to an insn that gives
3520 a SImode output. The output of conditional set is 0 or 1 boolean,
3521 so it is valid for input in any scalar integer mode and therefore
3522 combine later folds the zero extend operation into an equivalent
3523 conditional set operation that produces a DImode output, however
3524 this redundant zero extend operation counts towards the cost of
3525 the replacement sequence. Compensate for that by incrementing the
3526 cost of the original sequence as well as the maximum sequence cost
3527 accordingly. */
3528 rtx last_dest = NULL_RTX;
3529 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
3530 {
3531 if (!NONDEBUG_INSN_P (insn))
3532 continue;
3533
3534 rtx x = PATTERN (insn);
3535 if (NONJUMP_INSN_P (insn)
3536 && GET_CODE (x) == SET)
3537 {
3538 rtx src = SET_SRC (x);
3539 if (last_dest != NULL_RTX
3540 && GET_CODE (src) == ZERO_EXTEND
3541 && REG_P (XEXP (src, 0))
3542 && REGNO (XEXP (src, 0)) == REGNO (last_dest))
3543 {
3544 riscv_if_info.original_cost += COSTS_N_INSNS (1);
3545 riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
3546 }
3547 last_dest = NULL_RTX;
3548 rtx dest = SET_DEST (x);
3549 if (COMPARISON_P (src)
3550 && REG_P (dest)
3551 && GET_MODE (dest) == SImode)
3552 last_dest = dest;
3553 }
3554 else
3555 last_dest = NULL_RTX;
3556 }
3557
3558 return default_noce_conversion_profitable_p (seq, &riscv_if_info);
3559 }
3560
3561 /* Return one word of double-word value OP. HIGH_P is true to select the
3562 high part or false to select the low part. */
3563
3564 rtx
3565 riscv_subword (rtx op, bool high_p)
3566 {
3567 unsigned int byte = (high_p != BYTES_BIG_ENDIAN) ? UNITS_PER_WORD : 0;
3568 machine_mode mode = GET_MODE (op);
3569
3570 if (mode == VOIDmode)
3571 mode = TARGET_64BIT ? TImode : DImode;
3572
3573 if (MEM_P (op))
3574 return adjust_address (op, word_mode, byte);
3575
3576 if (REG_P (op))
3577 gcc_assert (!FP_REG_RTX_P (op));
3578
3579 return simplify_gen_subreg (word_mode, op, mode, byte);
3580 }
3581
3582 /* Return true if a 64-bit move from SRC to DEST should be split into two. */
3583
3584 bool
3585 riscv_split_64bit_move_p (rtx dest, rtx src)
3586 {
3587 if (TARGET_64BIT)
3588 return false;
3589
3590 /* There is no need to split if the FLI instruction in the `Zfa` extension can be used. */
3591 if (satisfies_constraint_zfli (src))
3592 return false;
3593
3594 /* Allow FPR <-> FPR and FPR <-> MEM moves, and permit the special case
3595 of zeroing an FPR with FCVT.D.W. */
3596 if (TARGET_DOUBLE_FLOAT
3597 && ((FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
3598 || (FP_REG_RTX_P (dest) && MEM_P (src))
3599 || (FP_REG_RTX_P (src) && MEM_P (dest))
3600 || (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))))
3601 return false;
3602
3603 return true;
3604 }
3605
3606 /* Split a doubleword move from SRC to DEST. On 32-bit targets,
3607 this function handles 64-bit moves for which riscv_split_64bit_move_p
3608 holds. For 64-bit targets, this function handles 128-bit moves. */
3609
3610 void
3611 riscv_split_doubleword_move (rtx dest, rtx src)
3612 {
3613 /* ZFA or XTheadFmv has instructions for accessing the upper bits of a double. */
3614 if (!TARGET_64BIT && (TARGET_ZFA || TARGET_XTHEADFMV))
3615 {
3616 if (FP_REG_RTX_P (dest))
3617 {
3618 rtx low_src = riscv_subword (src, false);
3619 rtx high_src = riscv_subword (src, true);
3620
3621 if (TARGET_ZFA)
3622 emit_insn (gen_movdfsisi3_rv32 (dest, high_src, low_src));
3623 else
3624 emit_insn (gen_th_fmv_hw_w_x (dest, high_src, low_src));
3625 return;
3626 }
3627 if (FP_REG_RTX_P (src))
3628 {
3629 rtx low_dest = riscv_subword (dest, false);
3630 rtx high_dest = riscv_subword (dest, true);
3631
3632 if (TARGET_ZFA)
3633 {
3634 emit_insn (gen_movsidf2_low_rv32 (low_dest, src));
3635 emit_insn (gen_movsidf2_high_rv32 (high_dest, src));
3636 return;
3637 }
3638 else
3639 {
3640 emit_insn (gen_th_fmv_x_w (low_dest, src));
3641 emit_insn (gen_th_fmv_x_hw (high_dest, src));
3642 }
3643 return;
3644 }
3645 }
3646
3647 /* The operation can be split into two normal moves. Decide in
3648 which order to do them. */
3649 rtx low_dest = riscv_subword (dest, false);
3650 if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
3651 {
3652 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
3653 riscv_emit_move (low_dest, riscv_subword (src, false));
3654 }
3655 else
3656 {
3657 riscv_emit_move (low_dest, riscv_subword (src, false));
3658 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
3659 }
3660 }
3661 \f
3662 /* Return the appropriate instructions to move SRC into DEST. Assume
3663 that SRC is operand 1 and DEST is operand 0. */
3664
3665 const char *
3666 riscv_output_move (rtx dest, rtx src)
3667 {
3668 enum rtx_code dest_code, src_code;
3669 machine_mode mode;
3670 bool dbl_p;
3671 unsigned width;
3672 const char *insn;
3673
3674 if ((insn = th_output_move (dest, src)))
3675 return insn;
3676
3677 dest_code = GET_CODE (dest);
3678 src_code = GET_CODE (src);
3679 mode = GET_MODE (dest);
3680 dbl_p = (GET_MODE_SIZE (mode).to_constant () == 8);
3681 width = GET_MODE_SIZE (mode).to_constant ();
3682
3683 if (dbl_p && riscv_split_64bit_move_p (dest, src))
3684 return "#";
3685
3686 if (dest_code == REG && GP_REG_P (REGNO (dest)))
3687 {
3688 if (src_code == REG && FP_REG_P (REGNO (src)))
3689 switch (width)
3690 {
3691 case 2:
3692 if (TARGET_ZFHMIN)
3693 return "fmv.x.h\t%0,%1";
3694 /* Using fmv.x.s + sign-extend to emulate fmv.x.h. */
3695 return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16";
3696 case 4:
3697 return "fmv.x.s\t%0,%1";
3698 case 8:
3699 return "fmv.x.d\t%0,%1";
3700 }
3701
3702 if (src_code == MEM)
3703 switch (width)
3704 {
3705 case 1: return "lbu\t%0,%1";
3706 case 2: return "lhu\t%0,%1";
3707 case 4: return "lw\t%0,%1";
3708 case 8: return "ld\t%0,%1";
3709 }
3710
3711 if (src_code == CONST_INT)
3712 {
3713 if (SMALL_OPERAND (INTVAL (src)) || LUI_OPERAND (INTVAL (src)))
3714 return "li\t%0,%1";
3715
3716 if (TARGET_ZBS
3717 && SINGLE_BIT_MASK_OPERAND (INTVAL (src)))
3718 return "bseti\t%0,zero,%S1";
3719
3720 /* Should never reach here. */
3721 abort ();
3722 }
3723
3724 if (src_code == HIGH)
3725 return "lui\t%0,%h1";
3726
3727 if (symbolic_operand (src, VOIDmode))
3728 switch (riscv_classify_symbolic_expression (src))
3729 {
3730 case SYMBOL_GOT_DISP: return "la\t%0,%1";
3731 case SYMBOL_ABSOLUTE: return "lla\t%0,%1";
3732 case SYMBOL_PCREL: return "lla\t%0,%1";
3733 default: gcc_unreachable ();
3734 }
3735 }
3736 if ((src_code == REG && GP_REG_P (REGNO (src)))
3737 || (src == CONST0_RTX (mode)))
3738 {
3739 if (dest_code == REG)
3740 {
3741 if (GP_REG_P (REGNO (dest)))
3742 return "mv\t%0,%z1";
3743
3744 if (FP_REG_P (REGNO (dest)))
3745 switch (width)
3746 {
3747 case 2:
3748 if (TARGET_ZFHMIN)
3749 return "fmv.h.x\t%0,%z1";
3750 /* High 16 bits should be all-1, otherwise HW will treated
3751 as a n-bit canonical NaN, but isn't matter for softfloat. */
3752 return "fmv.s.x\t%0,%1";
3753 case 4:
3754 return "fmv.s.x\t%0,%z1";
3755 case 8:
3756 if (TARGET_64BIT)
3757 return "fmv.d.x\t%0,%z1";
3758 /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */
3759 gcc_assert (src == CONST0_RTX (mode));
3760 return "fcvt.d.w\t%0,x0";
3761 }
3762 }
3763 if (dest_code == MEM)
3764 switch (width)
3765 {
3766 case 1: return "sb\t%z1,%0";
3767 case 2: return "sh\t%z1,%0";
3768 case 4: return "sw\t%z1,%0";
3769 case 8: return "sd\t%z1,%0";
3770 }
3771 }
3772 if (src_code == REG && FP_REG_P (REGNO (src)))
3773 {
3774 if (dest_code == REG && FP_REG_P (REGNO (dest)))
3775 switch (width)
3776 {
3777 case 2:
3778 if (TARGET_ZFH)
3779 return "fmv.h\t%0,%1";
3780 return "fmv.s\t%0,%1";
3781 case 4:
3782 return "fmv.s\t%0,%1";
3783 case 8:
3784 return "fmv.d\t%0,%1";
3785 }
3786
3787 if (dest_code == MEM)
3788 switch (width)
3789 {
3790 case 2:
3791 return "fsh\t%1,%0";
3792 case 4:
3793 return "fsw\t%1,%0";
3794 case 8:
3795 return "fsd\t%1,%0";
3796 }
3797 }
3798 if (dest_code == REG && FP_REG_P (REGNO (dest)))
3799 {
3800 if (src_code == MEM)
3801 switch (width)
3802 {
3803 case 2:
3804 return "flh\t%0,%1";
3805 case 4:
3806 return "flw\t%0,%1";
3807 case 8:
3808 return "fld\t%0,%1";
3809 }
3810
3811 if (src_code == CONST_DOUBLE && satisfies_constraint_zfli (src))
3812 switch (width)
3813 {
3814 case 2:
3815 return "fli.h\t%0,%1";
3816 case 4:
3817 return "fli.s\t%0,%1";
3818 case 8:
3819 return "fli.d\t%0,%1";
3820 }
3821 }
3822 if (dest_code == REG && GP_REG_P (REGNO (dest)) && src_code == CONST_POLY_INT)
3823 {
3824 /* We only want a single full vector register VLEN read after reload. */
3825 gcc_assert (known_eq (rtx_to_poly_int64 (src), BYTES_PER_RISCV_VECTOR));
3826 return "csrr\t%0,vlenb";
3827 }
3828 gcc_unreachable ();
3829 }
3830
3831 const char *
3832 riscv_output_return ()
3833 {
3834 if (cfun->machine->naked_p)
3835 return "";
3836
3837 return "ret";
3838 }
3839
3840 \f
3841 /* Return true if CMP1 is a suitable second operand for integer ordering
3842 test CODE. See also the *sCC patterns in riscv.md. */
3843
3844 static bool
3845 riscv_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
3846 {
3847 switch (code)
3848 {
3849 case GT:
3850 case GTU:
3851 return reg_or_0_operand (cmp1, VOIDmode);
3852
3853 case GE:
3854 case GEU:
3855 return cmp1 == const1_rtx;
3856
3857 case LT:
3858 case LTU:
3859 return arith_operand (cmp1, VOIDmode);
3860
3861 case LE:
3862 return sle_operand (cmp1, VOIDmode);
3863
3864 case LEU:
3865 return sleu_operand (cmp1, VOIDmode);
3866
3867 default:
3868 gcc_unreachable ();
3869 }
3870 }
3871
3872 /* Return true if *CMP1 (of mode MODE) is a valid second operand for
3873 integer ordering test *CODE, or if an equivalent combination can
3874 be formed by adjusting *CODE and *CMP1. When returning true, update
3875 *CODE and *CMP1 with the chosen code and operand, otherwise leave
3876 them alone. */
3877
3878 static bool
3879 riscv_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
3880 machine_mode mode)
3881 {
3882 HOST_WIDE_INT plus_one;
3883
3884 if (riscv_int_order_operand_ok_p (*code, *cmp1))
3885 return true;
3886
3887 if (CONST_INT_P (*cmp1))
3888 switch (*code)
3889 {
3890 case LE:
3891 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
3892 if (INTVAL (*cmp1) < plus_one)
3893 {
3894 *code = LT;
3895 *cmp1 = force_reg (mode, GEN_INT (plus_one));
3896 return true;
3897 }
3898 break;
3899
3900 case LEU:
3901 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
3902 if (plus_one != 0)
3903 {
3904 *code = LTU;
3905 *cmp1 = force_reg (mode, GEN_INT (plus_one));
3906 return true;
3907 }
3908 break;
3909
3910 default:
3911 break;
3912 }
3913 return false;
3914 }
3915
3916 /* Compare CMP0 and CMP1 using ordering test CODE and store the result
3917 in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR
3918 is nonnull, it's OK to set TARGET to the inverse of the result and
3919 flip *INVERT_PTR instead. */
3920
3921 static void
3922 riscv_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
3923 rtx target, rtx cmp0, rtx cmp1)
3924 {
3925 machine_mode mode;
3926
3927 /* First see if there is a RISCV instruction that can do this operation.
3928 If not, try doing the same for the inverse operation. If that also
3929 fails, force CMP1 into a register and try again. */
3930 mode = GET_MODE (cmp0);
3931 if (riscv_canonicalize_int_order_test (&code, &cmp1, mode))
3932 riscv_emit_binary (code, target, cmp0, cmp1);
3933 else
3934 {
3935 enum rtx_code inv_code = reverse_condition (code);
3936 if (!riscv_canonicalize_int_order_test (&inv_code, &cmp1, mode))
3937 {
3938 cmp1 = force_reg (mode, cmp1);
3939 riscv_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
3940 }
3941 else if (invert_ptr == 0)
3942 {
3943 rtx inv_target = riscv_force_binary (word_mode,
3944 inv_code, cmp0, cmp1);
3945 riscv_emit_binary (EQ, target, inv_target, const0_rtx);
3946 }
3947 else
3948 {
3949 *invert_ptr = !*invert_ptr;
3950 riscv_emit_binary (inv_code, target, cmp0, cmp1);
3951 }
3952 }
3953 }
3954
3955 /* Return a register that is zero iff CMP0 and CMP1 are equal.
3956 The register will have the same mode as CMP0. */
3957
3958 static rtx
3959 riscv_zero_if_equal (rtx cmp0, rtx cmp1)
3960 {
3961 if (cmp1 == const0_rtx)
3962 return cmp0;
3963
3964 return expand_binop (GET_MODE (cmp0), sub_optab,
3965 cmp0, cmp1, 0, 0, OPTAB_DIRECT);
3966 }
3967
3968 /* Helper function for riscv_extend_comparands to Sign-extend the OP.
3969 However if the OP is SI subreg promoted with an inner DI, such as
3970 (subreg/s/v:SI (reg/v:DI) 0)
3971 just peel off the SUBREG to get DI, avoiding extraneous extension. */
3972
3973 static void
3974 riscv_sign_extend_if_not_subreg_prom (rtx *op)
3975 {
3976 if (GET_CODE (*op) == SUBREG
3977 && SUBREG_PROMOTED_VAR_P (*op)
3978 && SUBREG_PROMOTED_SIGNED_P (*op)
3979 && (GET_MODE_SIZE (GET_MODE (XEXP (*op, 0))).to_constant ()
3980 == GET_MODE_SIZE (word_mode)))
3981 *op = XEXP (*op, 0);
3982 else
3983 *op = gen_rtx_SIGN_EXTEND (word_mode, *op);
3984 }
3985
3986 /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */
3987
3988 static void
3989 riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
3990 {
3991 /* Comparisons consider all XLEN bits, so extend sub-XLEN values. */
3992 if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)).to_constant ())
3993 {
3994 /* It is more profitable to zero-extend QImode values. But not if the
3995 first operand has already been sign-extended, and the second one is
3996 is a constant or has already been sign-extended also. */
3997 if (unsigned_condition (code) == code
3998 && (GET_MODE (*op0) == QImode
3999 && ! (GET_CODE (*op0) == SUBREG
4000 && SUBREG_PROMOTED_VAR_P (*op0)
4001 && SUBREG_PROMOTED_SIGNED_P (*op0)
4002 && (CONST_INT_P (*op1)
4003 || (GET_CODE (*op1) == SUBREG
4004 && SUBREG_PROMOTED_VAR_P (*op1)
4005 && SUBREG_PROMOTED_SIGNED_P (*op1))))))
4006 {
4007 *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
4008 if (CONST_INT_P (*op1))
4009 *op1 = GEN_INT ((uint8_t) INTVAL (*op1));
4010 else
4011 *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1);
4012 }
4013 else
4014 {
4015 riscv_sign_extend_if_not_subreg_prom (op0);
4016
4017 if (*op1 != const0_rtx)
4018 riscv_sign_extend_if_not_subreg_prom (op1);
4019 }
4020 }
4021 }
4022
4023 /* Convert a comparison into something that can be used in a branch or
4024 conditional move. On entry, *OP0 and *OP1 are the values being
4025 compared and *CODE is the code used to compare them.
4026
4027 Update *CODE, *OP0 and *OP1 so that they describe the final comparison.
4028 If NEED_EQ_NE_P, then only EQ or NE comparisons against zero are
4029 emitted. */
4030
4031 static void
4032 riscv_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4033 bool need_eq_ne_p = false)
4034 {
4035 if (need_eq_ne_p)
4036 {
4037 rtx cmp_op0 = *op0;
4038 rtx cmp_op1 = *op1;
4039 if (*code == EQ || *code == NE)
4040 {
4041 *op0 = riscv_zero_if_equal (cmp_op0, cmp_op1);
4042 *op1 = const0_rtx;
4043 return;
4044 }
4045 gcc_unreachable ();
4046 }
4047
4048 if (splittable_const_int_operand (*op1, VOIDmode))
4049 {
4050 HOST_WIDE_INT rhs = INTVAL (*op1);
4051
4052 if (*code == EQ || *code == NE)
4053 {
4054 /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */
4055 if (SMALL_OPERAND (-rhs))
4056 {
4057 *op0 = riscv_force_binary (GET_MODE (*op0), PLUS, *op0,
4058 GEN_INT (-rhs));
4059 *op1 = const0_rtx;
4060 }
4061 }
4062 else
4063 {
4064 static const enum rtx_code mag_comparisons[][2] = {
4065 {LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}
4066 };
4067
4068 /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */
4069 for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
4070 {
4071 HOST_WIDE_INT new_rhs;
4072 bool increment = *code == mag_comparisons[i][0];
4073 bool decrement = *code == mag_comparisons[i][1];
4074 if (!increment && !decrement)
4075 continue;
4076
4077 new_rhs = rhs + (increment ? 1 : -1);
4078 new_rhs = trunc_int_for_mode (new_rhs, GET_MODE (*op0));
4079 if (riscv_integer_cost (new_rhs) < riscv_integer_cost (rhs)
4080 && (rhs < 0) == (new_rhs < 0))
4081 {
4082 *op1 = GEN_INT (new_rhs);
4083 *code = mag_comparisons[i][increment];
4084 }
4085 break;
4086 }
4087 }
4088 }
4089
4090 riscv_extend_comparands (*code, op0, op1);
4091
4092 *op0 = force_reg (word_mode, *op0);
4093 if (*op1 != const0_rtx)
4094 *op1 = force_reg (word_mode, *op1);
4095 }
4096
4097 /* Like riscv_emit_int_compare, but for floating-point comparisons. */
4098
4099 static void
4100 riscv_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4101 bool *invert_ptr = nullptr)
4102 {
4103 rtx tmp0, tmp1, cmp_op0 = *op0, cmp_op1 = *op1;
4104 enum rtx_code fp_code = *code;
4105 *code = NE;
4106
4107 switch (fp_code)
4108 {
4109 case UNORDERED:
4110 *code = EQ;
4111 /* Fall through. */
4112
4113 case ORDERED:
4114 /* a == a && b == b */
4115 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4116 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4117 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4118 *op1 = const0_rtx;
4119 break;
4120
4121 case UNEQ:
4122 /* ordered(a, b) > (a == b) */
4123 *code = EQ;
4124 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4125 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4126 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4127 *op1 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op1);
4128 break;
4129
4130 #define UNORDERED_COMPARISON(CODE, CMP) \
4131 case CODE: \
4132 *code = EQ; \
4133 *op0 = gen_reg_rtx (word_mode); \
4134 if (GET_MODE (cmp_op0) == SFmode && TARGET_64BIT) \
4135 emit_insn (gen_f##CMP##_quietsfdi4 (*op0, cmp_op0, cmp_op1)); \
4136 else if (GET_MODE (cmp_op0) == SFmode) \
4137 emit_insn (gen_f##CMP##_quietsfsi4 (*op0, cmp_op0, cmp_op1)); \
4138 else if (GET_MODE (cmp_op0) == DFmode && TARGET_64BIT) \
4139 emit_insn (gen_f##CMP##_quietdfdi4 (*op0, cmp_op0, cmp_op1)); \
4140 else if (GET_MODE (cmp_op0) == DFmode) \
4141 emit_insn (gen_f##CMP##_quietdfsi4 (*op0, cmp_op0, cmp_op1)); \
4142 else if (GET_MODE (cmp_op0) == HFmode && TARGET_64BIT) \
4143 emit_insn (gen_f##CMP##_quiethfdi4 (*op0, cmp_op0, cmp_op1)); \
4144 else if (GET_MODE (cmp_op0) == HFmode) \
4145 emit_insn (gen_f##CMP##_quiethfsi4 (*op0, cmp_op0, cmp_op1)); \
4146 else \
4147 gcc_unreachable (); \
4148 *op1 = const0_rtx; \
4149 break;
4150
4151 case UNLT:
4152 std::swap (cmp_op0, cmp_op1);
4153 gcc_fallthrough ();
4154
4155 UNORDERED_COMPARISON(UNGT, le)
4156
4157 case UNLE:
4158 std::swap (cmp_op0, cmp_op1);
4159 gcc_fallthrough ();
4160
4161 UNORDERED_COMPARISON(UNGE, lt)
4162 #undef UNORDERED_COMPARISON
4163
4164 case NE:
4165 fp_code = EQ;
4166 if (invert_ptr != nullptr)
4167 *invert_ptr = !*invert_ptr;
4168 else
4169 {
4170 cmp_op0 = riscv_force_binary (word_mode, fp_code, cmp_op0, cmp_op1);
4171 cmp_op1 = const0_rtx;
4172 }
4173 gcc_fallthrough ();
4174
4175 case EQ:
4176 case LE:
4177 case LT:
4178 case GE:
4179 case GT:
4180 /* We have instructions for these cases. */
4181 *code = fp_code;
4182 *op0 = cmp_op0;
4183 *op1 = cmp_op1;
4184 break;
4185
4186 case LTGT:
4187 /* (a < b) | (a > b) */
4188 tmp0 = riscv_force_binary (word_mode, LT, cmp_op0, cmp_op1);
4189 tmp1 = riscv_force_binary (word_mode, GT, cmp_op0, cmp_op1);
4190 *op0 = riscv_force_binary (word_mode, IOR, tmp0, tmp1);
4191 *op1 = const0_rtx;
4192 break;
4193
4194 default:
4195 gcc_unreachable ();
4196 }
4197 }
4198
4199 /* CODE-compare OP0 and OP1. Store the result in TARGET. */
4200
4201 void
4202 riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1, bool *invert_ptr)
4203 {
4204 riscv_extend_comparands (code, &op0, &op1);
4205 op0 = force_reg (word_mode, op0);
4206
4207 if (code == EQ || code == NE)
4208 {
4209 rtx zie = riscv_zero_if_equal (op0, op1);
4210 riscv_emit_binary (code, target, zie, const0_rtx);
4211 }
4212 else
4213 riscv_emit_int_order_test (code, invert_ptr, target, op0, op1);
4214 }
4215
4216 /* Like riscv_expand_int_scc, but for floating-point comparisons. */
4217
4218 void
4219 riscv_expand_float_scc (rtx target, enum rtx_code code, rtx op0, rtx op1,
4220 bool *invert_ptr)
4221 {
4222 riscv_emit_float_compare (&code, &op0, &op1, invert_ptr);
4223
4224 machine_mode mode = GET_MODE (target);
4225 if (mode != word_mode)
4226 {
4227 rtx cmp = riscv_force_binary (word_mode, code, op0, op1);
4228 riscv_emit_set (target, lowpart_subreg (mode, cmp, word_mode));
4229 }
4230 else
4231 riscv_emit_binary (code, target, op0, op1);
4232 }
4233
4234 /* Jump to LABEL if (CODE OP0 OP1) holds. */
4235
4236 void
4237 riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
4238 {
4239 if (FLOAT_MODE_P (GET_MODE (op1)))
4240 riscv_emit_float_compare (&code, &op0, &op1);
4241 else
4242 riscv_emit_int_compare (&code, &op0, &op1);
4243
4244 if (FLOAT_MODE_P (GET_MODE (op0)))
4245 {
4246 op0 = riscv_force_binary (word_mode, code, op0, op1);
4247 op1 = const0_rtx;
4248 code = NE;
4249 }
4250
4251 rtx condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
4252 emit_jump_insn (gen_condjump (condition, label));
4253 }
4254
4255 /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to DEST.
4256 Return 0 if expansion failed. */
4257
4258 bool
4259 riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
4260 {
4261 machine_mode mode = GET_MODE (dest);
4262 rtx_code code = GET_CODE (op);
4263 rtx op0 = XEXP (op, 0);
4264 rtx op1 = XEXP (op, 1);
4265
4266 if (((TARGET_ZICOND_LIKE
4267 || (arith_operand (cons, mode) && arith_operand (alt, mode)))
4268 && (GET_MODE_CLASS (mode) == MODE_INT))
4269 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4270 {
4271 machine_mode mode0 = GET_MODE (op0);
4272 machine_mode mode1 = GET_MODE (op1);
4273
4274 /* An integer comparison must be comparing WORD_MODE objects. We
4275 must enforce that so that we don't strip away a sign_extension
4276 thinking it is unnecessary. We might consider using
4277 riscv_extend_operands if they are not already properly extended. */
4278 if ((INTEGRAL_MODE_P (mode0) && mode0 != word_mode)
4279 || (INTEGRAL_MODE_P (mode1) && mode1 != word_mode))
4280 return false;
4281
4282 /* In the fallback generic case use MODE rather than WORD_MODE for
4283 the output of the SCC instruction, to match the mode of the NEG
4284 operation below. The output of SCC is 0 or 1 boolean, so it is
4285 valid for input in any scalar integer mode. */
4286 rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE
4287 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4288 ? word_mode : mode);
4289 bool invert = false;
4290
4291 /* Canonicalize the comparison. It must be an equality comparison
4292 of integer operands, or with SFB it can be any comparison of
4293 integer operands. If it isn't, then emit an SCC instruction
4294 so that we can then use an equality comparison against zero. */
4295 if ((!TARGET_SFB_ALU && !equality_operator (op, VOIDmode))
4296 || !INTEGRAL_MODE_P (mode0))
4297 {
4298 bool *invert_ptr = nullptr;
4299
4300 /* If riscv_expand_int_scc inverts the condition, then it will
4301 flip the value of INVERT. We need to know where so that
4302 we can adjust it for our needs. */
4303 if (code == LE || code == LEU || code == GE || code == GEU)
4304 invert_ptr = &invert;
4305
4306 /* Emit an SCC-like instruction into a temporary so that we can
4307 use an EQ/NE comparison. We can support both FP and integer
4308 conditional moves. */
4309 if (INTEGRAL_MODE_P (mode0))
4310 riscv_expand_int_scc (tmp, code, op0, op1, invert_ptr);
4311 else if (FLOAT_MODE_P (mode0)
4312 && fp_scc_comparison (op, GET_MODE (op)))
4313 riscv_expand_float_scc (tmp, code, op0, op1, &invert);
4314 else
4315 return false;
4316
4317 op = gen_rtx_fmt_ee (invert ? EQ : NE, mode, tmp, const0_rtx);
4318
4319 /* We've generated a new comparison. Update the local variables. */
4320 code = GET_CODE (op);
4321 op0 = XEXP (op, 0);
4322 op1 = XEXP (op, 1);
4323 }
4324 else if (!TARGET_ZICOND_LIKE && !TARGET_SFB_ALU && !TARGET_XTHEADCONDMOV)
4325 riscv_expand_int_scc (tmp, code, op0, op1, &invert);
4326
4327 if (TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
4328 {
4329 riscv_emit_int_compare (&code, &op0, &op1, !TARGET_SFB_ALU);
4330 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4331
4332 /* The expander is a bit loose in its specification of the true
4333 arm of the conditional move. That allows us to support more
4334 cases for extensions which are more general than SFB. But
4335 does mean we need to force CONS into a register at this point. */
4336 cons = force_reg (mode, cons);
4337 /* With XTheadCondMov we need to force ALT into a register too. */
4338 alt = force_reg (mode, alt);
4339 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
4340 cons, alt)));
4341 return true;
4342 }
4343 else if (!TARGET_ZICOND_LIKE)
4344 {
4345 if (invert)
4346 std::swap (cons, alt);
4347
4348 rtx reg1 = gen_reg_rtx (mode);
4349 rtx reg2 = gen_reg_rtx (mode);
4350 rtx reg3 = gen_reg_rtx (mode);
4351 rtx reg4 = gen_reg_rtx (mode);
4352
4353 riscv_emit_unary (NEG, reg1, tmp);
4354 riscv_emit_binary (AND, reg2, reg1, cons);
4355 riscv_emit_unary (NOT, reg3, reg1);
4356 riscv_emit_binary (AND, reg4, reg3, alt);
4357 riscv_emit_binary (IOR, dest, reg2, reg4);
4358 return true;
4359 }
4360 /* 0, reg or 0, imm */
4361 else if (cons == CONST0_RTX (mode)
4362 && (REG_P (alt)
4363 || (CONST_INT_P (alt) && alt != CONST0_RTX (mode))))
4364 {
4365 riscv_emit_int_compare (&code, &op0, &op1, true);
4366 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4367 alt = force_reg (mode, alt);
4368 emit_insn (gen_rtx_SET (dest,
4369 gen_rtx_IF_THEN_ELSE (mode, cond,
4370 cons, alt)));
4371 return true;
4372 }
4373 /* imm, imm */
4374 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode)
4375 && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
4376 {
4377 riscv_emit_int_compare (&code, &op0, &op1, true);
4378 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4379 HOST_WIDE_INT t = INTVAL (alt) - INTVAL (cons);
4380 alt = force_reg (mode, gen_int_mode (t, mode));
4381 emit_insn (gen_rtx_SET (dest,
4382 gen_rtx_IF_THEN_ELSE (mode, cond,
4383 CONST0_RTX (mode),
4384 alt)));
4385 /* CONS might not fit into a signed 12 bit immediate suitable
4386 for an addi instruction. If that's the case, force it
4387 into a register. */
4388 if (!SMALL_OPERAND (INTVAL (cons)))
4389 cons = force_reg (mode, cons);
4390 riscv_emit_binary (PLUS, dest, dest, cons);
4391 return true;
4392 }
4393 /* imm, reg */
4394 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) && REG_P (alt))
4395 {
4396 /* Optimize for register value of 0. */
4397 if (code == NE && rtx_equal_p (op0, alt) && op1 == CONST0_RTX (mode))
4398 {
4399 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4400 cons = force_reg (mode, cons);
4401 emit_insn (gen_rtx_SET (dest,
4402 gen_rtx_IF_THEN_ELSE (mode, cond,
4403 cons, alt)));
4404 return true;
4405 }
4406
4407 riscv_emit_int_compare (&code, &op0, &op1, true);
4408 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4409
4410 rtx temp1 = gen_reg_rtx (mode);
4411 rtx temp2 = gen_int_mode (-1 * INTVAL (cons), mode);
4412
4413 /* TEMP2 and/or CONS might not fit into a signed 12 bit immediate
4414 suitable for an addi instruction. If that's the case, force it
4415 into a register. */
4416 if (!SMALL_OPERAND (INTVAL (temp2)))
4417 temp2 = force_reg (mode, temp2);
4418 if (!SMALL_OPERAND (INTVAL (cons)))
4419 cons = force_reg (mode, cons);
4420
4421 riscv_emit_binary (PLUS, temp1, alt, temp2);
4422 emit_insn (gen_rtx_SET (dest,
4423 gen_rtx_IF_THEN_ELSE (mode, cond,
4424 CONST0_RTX (mode),
4425 temp1)));
4426 riscv_emit_binary (PLUS, dest, dest, cons);
4427 return true;
4428 }
4429 /* reg, 0 or imm, 0 */
4430 else if ((REG_P (cons)
4431 || (CONST_INT_P (cons) && cons != CONST0_RTX (mode)))
4432 && alt == CONST0_RTX (mode))
4433 {
4434 riscv_emit_int_compare (&code, &op0, &op1, true);
4435 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4436 cons = force_reg (mode, cons);
4437 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
4438 cons, alt)));
4439 return true;
4440 }
4441 /* reg, imm */
4442 else if (REG_P (cons) && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
4443 {
4444 /* Optimize for register value of 0. */
4445 if (code == EQ && rtx_equal_p (op0, cons) && op1 == CONST0_RTX (mode))
4446 {
4447 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4448 alt = force_reg (mode, alt);
4449 emit_insn (gen_rtx_SET (dest,
4450 gen_rtx_IF_THEN_ELSE (mode, cond,
4451 cons, alt)));
4452 return true;
4453 }
4454
4455 riscv_emit_int_compare (&code, &op0, &op1, true);
4456 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4457
4458 rtx temp1 = gen_reg_rtx (mode);
4459 rtx temp2 = gen_int_mode (-1 * INTVAL (alt), mode);
4460
4461 /* TEMP2 and/or ALT might not fit into a signed 12 bit immediate
4462 suitable for an addi instruction. If that's the case, force it
4463 into a register. */
4464 if (!SMALL_OPERAND (INTVAL (temp2)))
4465 temp2 = force_reg (mode, temp2);
4466 if (!SMALL_OPERAND (INTVAL (alt)))
4467 alt = force_reg (mode, alt);
4468
4469 riscv_emit_binary (PLUS, temp1, cons, temp2);
4470 emit_insn (gen_rtx_SET (dest,
4471 gen_rtx_IF_THEN_ELSE (mode, cond,
4472 temp1,
4473 CONST0_RTX (mode))));
4474 riscv_emit_binary (PLUS, dest, dest, alt);
4475 return true;
4476 }
4477 /* reg, reg */
4478 else if (REG_P (cons) && REG_P (alt))
4479 {
4480 if ((code == EQ && rtx_equal_p (cons, op0))
4481 || (code == NE && rtx_equal_p (alt, op0)))
4482 {
4483 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4484 if (!rtx_equal_p (cons, op0))
4485 std::swap (alt, cons);
4486 alt = force_reg (mode, alt);
4487 emit_insn (gen_rtx_SET (dest,
4488 gen_rtx_IF_THEN_ELSE (mode, cond,
4489 cons, alt)));
4490 return true;
4491 }
4492
4493 rtx reg1 = gen_reg_rtx (mode);
4494 rtx reg2 = gen_reg_rtx (mode);
4495 riscv_emit_int_compare (&code, &op0, &op1, true);
4496 rtx cond1 = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4497 rtx cond2 = gen_rtx_fmt_ee (code == NE ? EQ : NE,
4498 GET_MODE (op0), op0, op1);
4499 emit_insn (gen_rtx_SET (reg2,
4500 gen_rtx_IF_THEN_ELSE (mode, cond2,
4501 CONST0_RTX (mode),
4502 cons)));
4503 emit_insn (gen_rtx_SET (reg1,
4504 gen_rtx_IF_THEN_ELSE (mode, cond1,
4505 CONST0_RTX (mode),
4506 alt)));
4507 riscv_emit_binary (IOR, dest, reg1, reg2);
4508 return true;
4509 }
4510 }
4511
4512 return false;
4513 }
4514
4515 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at
4516 least PARM_BOUNDARY bits of alignment, but will be given anything up
4517 to PREFERRED_STACK_BOUNDARY bits if the type requires it. */
4518
4519 static unsigned int
4520 riscv_function_arg_boundary (machine_mode mode, const_tree type)
4521 {
4522 unsigned int alignment;
4523
4524 /* Use natural alignment if the type is not aggregate data. */
4525 if (type && !AGGREGATE_TYPE_P (type))
4526 alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
4527 else
4528 alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
4529
4530 return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment));
4531 }
4532
4533 /* If MODE represents an argument that can be passed or returned in
4534 floating-point registers, return the number of registers, else 0. */
4535
4536 static unsigned
4537 riscv_pass_mode_in_fpr_p (machine_mode mode)
4538 {
4539 if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG)
4540 {
4541 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4542 return 1;
4543
4544 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4545 return 2;
4546 }
4547
4548 return 0;
4549 }
4550
4551 typedef struct {
4552 const_tree type;
4553 HOST_WIDE_INT offset;
4554 } riscv_aggregate_field;
4555
4556 /* Identify subfields of aggregates that are candidates for passing in
4557 floating-point registers. */
4558
4559 static int
4560 riscv_flatten_aggregate_field (const_tree type,
4561 riscv_aggregate_field fields[2],
4562 int n, HOST_WIDE_INT offset,
4563 bool ignore_zero_width_bit_field_p)
4564 {
4565 switch (TREE_CODE (type))
4566 {
4567 case RECORD_TYPE:
4568 /* Can't handle incomplete types nor sizes that are not fixed. */
4569 if (!COMPLETE_TYPE_P (type)
4570 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4571 || !tree_fits_uhwi_p (TYPE_SIZE (type)))
4572 return -1;
4573
4574 for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
4575 if (TREE_CODE (f) == FIELD_DECL)
4576 {
4577 if (!TYPE_P (TREE_TYPE (f)))
4578 return -1;
4579
4580 /* The C++ front end strips zero-length bit-fields from structs.
4581 So we need to ignore them in the C front end to make C code
4582 compatible with C++ code. */
4583 if (ignore_zero_width_bit_field_p
4584 && DECL_BIT_FIELD (f)
4585 && (DECL_SIZE (f) == NULL_TREE
4586 || integer_zerop (DECL_SIZE (f))))
4587 ;
4588 else
4589 {
4590 HOST_WIDE_INT pos = offset + int_byte_position (f);
4591 n = riscv_flatten_aggregate_field (TREE_TYPE (f),
4592 fields, n, pos,
4593 ignore_zero_width_bit_field_p);
4594 }
4595 if (n < 0)
4596 return -1;
4597 }
4598 return n;
4599
4600 case ARRAY_TYPE:
4601 {
4602 HOST_WIDE_INT n_elts;
4603 riscv_aggregate_field subfields[2];
4604 tree index = TYPE_DOMAIN (type);
4605 tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
4606 int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type),
4607 subfields, 0, offset,
4608 ignore_zero_width_bit_field_p);
4609
4610 /* Can't handle incomplete types nor sizes that are not fixed. */
4611 if (n_subfields <= 0
4612 || !COMPLETE_TYPE_P (type)
4613 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4614 || !index
4615 || !TYPE_MAX_VALUE (index)
4616 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
4617 || !TYPE_MIN_VALUE (index)
4618 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
4619 || !tree_fits_uhwi_p (elt_size))
4620 return -1;
4621
4622 n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
4623 - tree_to_uhwi (TYPE_MIN_VALUE (index));
4624 gcc_assert (n_elts >= 0);
4625
4626 for (HOST_WIDE_INT i = 0; i < n_elts; i++)
4627 for (int j = 0; j < n_subfields; j++)
4628 {
4629 if (n >= 2)
4630 return -1;
4631
4632 fields[n] = subfields[j];
4633 fields[n++].offset += i * tree_to_uhwi (elt_size);
4634 }
4635
4636 return n;
4637 }
4638
4639 case COMPLEX_TYPE:
4640 {
4641 /* Complex type need consume 2 field, so n must be 0. */
4642 if (n != 0)
4643 return -1;
4644
4645 HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))).to_constant ();
4646
4647 if (elt_size <= UNITS_PER_FP_ARG)
4648 {
4649 fields[0].type = TREE_TYPE (type);
4650 fields[0].offset = offset;
4651 fields[1].type = TREE_TYPE (type);
4652 fields[1].offset = offset + elt_size;
4653
4654 return 2;
4655 }
4656
4657 return -1;
4658 }
4659
4660 default:
4661 if (n < 2
4662 && ((SCALAR_FLOAT_TYPE_P (type)
4663 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_FP_ARG)
4664 || (INTEGRAL_TYPE_P (type)
4665 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_WORD)))
4666 {
4667 fields[n].type = type;
4668 fields[n].offset = offset;
4669 return n + 1;
4670 }
4671 else
4672 return -1;
4673 }
4674 }
4675
4676 /* Identify candidate aggregates for passing in floating-point registers.
4677 Candidates have at most two fields after flattening. */
4678
4679 static int
4680 riscv_flatten_aggregate_argument (const_tree type,
4681 riscv_aggregate_field fields[2],
4682 bool ignore_zero_width_bit_field_p)
4683 {
4684 if (!type || TREE_CODE (type) != RECORD_TYPE)
4685 return -1;
4686
4687 return riscv_flatten_aggregate_field (type, fields, 0, 0,
4688 ignore_zero_width_bit_field_p);
4689 }
4690
4691 /* See whether TYPE is a record whose fields should be returned in one or
4692 two floating-point registers. If so, populate FIELDS accordingly. */
4693
4694 static unsigned
4695 riscv_pass_aggregate_in_fpr_pair_p (const_tree type,
4696 riscv_aggregate_field fields[2])
4697 {
4698 static int warned = 0;
4699
4700 /* This is the old ABI, which differs for C++ and C. */
4701 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
4702 for (int i = 0; i < n_old; i++)
4703 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
4704 {
4705 n_old = -1;
4706 break;
4707 }
4708
4709 /* This is the new ABI, which is the same for C++ and C. */
4710 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
4711 for (int i = 0; i < n_new; i++)
4712 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
4713 {
4714 n_new = -1;
4715 break;
4716 }
4717
4718 if ((n_old != n_new) && (warned == 0))
4719 {
4720 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
4721 "bit-fields changed in GCC 10");
4722 warned = 1;
4723 }
4724
4725 return n_new > 0 ? n_new : 0;
4726 }
4727
4728 /* See whether TYPE is a record whose fields should be returned in one or
4729 floating-point register and one integer register. If so, populate
4730 FIELDS accordingly. */
4731
4732 static bool
4733 riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
4734 riscv_aggregate_field fields[2])
4735 {
4736 static int warned = 0;
4737
4738 /* This is the old ABI, which differs for C++ and C. */
4739 unsigned num_int_old = 0, num_float_old = 0;
4740 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
4741 for (int i = 0; i < n_old; i++)
4742 {
4743 num_float_old += SCALAR_FLOAT_TYPE_P (fields[i].type);
4744 num_int_old += INTEGRAL_TYPE_P (fields[i].type);
4745 }
4746
4747 /* This is the new ABI, which is the same for C++ and C. */
4748 unsigned num_int_new = 0, num_float_new = 0;
4749 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
4750 for (int i = 0; i < n_new; i++)
4751 {
4752 num_float_new += SCALAR_FLOAT_TYPE_P (fields[i].type);
4753 num_int_new += INTEGRAL_TYPE_P (fields[i].type);
4754 }
4755
4756 if (((num_int_old == 1 && num_float_old == 1
4757 && (num_int_old != num_int_new || num_float_old != num_float_new))
4758 || (num_int_new == 1 && num_float_new == 1
4759 && (num_int_old != num_int_new || num_float_old != num_float_new)))
4760 && (warned == 0))
4761 {
4762 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
4763 "bit-fields changed in GCC 10");
4764 warned = 1;
4765 }
4766
4767 return num_int_new == 1 && num_float_new == 1;
4768 }
4769
4770 /* Return the representation of an argument passed or returned in an FPR
4771 when the value has mode VALUE_MODE and the type has TYPE_MODE. The
4772 two modes may be different for structures like:
4773
4774 struct __attribute__((packed)) foo { float f; }
4775
4776 where the SFmode value "f" is passed in REGNO but the struct itself
4777 has mode BLKmode. */
4778
4779 static rtx
4780 riscv_pass_fpr_single (machine_mode type_mode, unsigned regno,
4781 machine_mode value_mode,
4782 HOST_WIDE_INT offset)
4783 {
4784 rtx x = gen_rtx_REG (value_mode, regno);
4785
4786 if (type_mode != value_mode)
4787 {
4788 x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset));
4789 x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
4790 }
4791 return x;
4792 }
4793
4794 /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1.
4795 MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and
4796 byte offset for the first value, likewise MODE2 and OFFSET2 for the
4797 second value. */
4798
4799 static rtx
4800 riscv_pass_fpr_pair (machine_mode mode, unsigned regno1,
4801 machine_mode mode1, HOST_WIDE_INT offset1,
4802 unsigned regno2, machine_mode mode2,
4803 HOST_WIDE_INT offset2)
4804 {
4805 return gen_rtx_PARALLEL
4806 (mode,
4807 gen_rtvec (2,
4808 gen_rtx_EXPR_LIST (VOIDmode,
4809 gen_rtx_REG (mode1, regno1),
4810 GEN_INT (offset1)),
4811 gen_rtx_EXPR_LIST (VOIDmode,
4812 gen_rtx_REG (mode2, regno2),
4813 GEN_INT (offset2))));
4814 }
4815
4816 /* Return true if a vector type is included in the type TYPE. */
4817
4818 static bool
4819 riscv_arg_has_vector (const_tree type)
4820 {
4821 if (riscv_v_ext_mode_p (TYPE_MODE (type)))
4822 return true;
4823
4824 if (!COMPLETE_TYPE_P (type))
4825 return false;
4826
4827 switch (TREE_CODE (type))
4828 {
4829 case RECORD_TYPE:
4830 /* If it is a record, it is further determined whether its fields have
4831 vector type. */
4832 for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
4833 if (TREE_CODE (f) == FIELD_DECL)
4834 {
4835 tree field_type = TREE_TYPE (f);
4836 if (!TYPE_P (field_type))
4837 break;
4838
4839 if (riscv_arg_has_vector (field_type))
4840 return true;
4841 }
4842 break;
4843 case ARRAY_TYPE:
4844 return riscv_arg_has_vector (TREE_TYPE (type));
4845 default:
4846 break;
4847 }
4848
4849 return false;
4850 }
4851
4852 /* Pass the type to check whether it's a vector type or contains vector type.
4853 Only check the value type and no checking for vector pointer type. */
4854
4855 static void
4856 riscv_pass_in_vector_p (const_tree type)
4857 {
4858 static int warned = 0;
4859
4860 if (type && riscv_vector::lookup_vector_type_attribute (type) && !warned)
4861 {
4862 warning (OPT_Wpsabi,
4863 "ABI for the vector type is currently in experimental stage and "
4864 "may changes in the upcoming version of GCC.");
4865 warned = 1;
4866 }
4867 }
4868
4869 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4870 for a call to a function whose data type is FNTYPE.
4871 For a library call, FNTYPE is 0. */
4872
4873 void
4874 riscv_init_cumulative_args (CUMULATIVE_ARGS *cum,
4875 tree fntype ATTRIBUTE_UNUSED,
4876 rtx libname ATTRIBUTE_UNUSED,
4877 tree fndecl,
4878 int caller ATTRIBUTE_UNUSED)
4879 {
4880 memset (cum, 0, sizeof (*cum));
4881
4882 if (fntype)
4883 cum->variant_cc = (riscv_cc) fntype_abi (fntype).id ();
4884 else
4885 cum->variant_cc = RISCV_CC_BASE;
4886
4887 if (fndecl)
4888 {
4889 const tree_function_decl &fn
4890 = FUNCTION_DECL_CHECK (fndecl)->function_decl;
4891
4892 if (fn.built_in_class == NOT_BUILT_IN)
4893 cum->rvv_psabi_warning = 1;
4894 }
4895 }
4896
4897 /* Return true if TYPE is a vector type that can be passed in vector registers.
4898 */
4899
4900 static bool
4901 riscv_vector_type_p (const_tree type)
4902 {
4903 /* Currently, only builtin scalabler vector type is allowed, in the future,
4904 more vector types may be allowed, such as GNU vector type, etc. */
4905 return riscv_vector::builtin_type_p (type);
4906 }
4907
4908 static unsigned int
4909 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode);
4910
4911 /* Subroutine of riscv_get_arg_info. */
4912
4913 static rtx
4914 riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
4915 machine_mode mode, bool return_p)
4916 {
4917 gcc_assert (riscv_v_ext_mode_p (mode));
4918
4919 info->mr_offset = cum->num_mrs;
4920 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
4921 {
4922 /* For scalable mask return value. */
4923 if (return_p)
4924 return gen_rtx_REG (mode, V_REG_FIRST);
4925
4926 /* For the first scalable mask argument. */
4927 if (info->mr_offset < MAX_ARGS_IN_MASK_REGISTERS)
4928 {
4929 info->num_mrs = 1;
4930 return gen_rtx_REG (mode, V_REG_FIRST);
4931 }
4932 else
4933 {
4934 /* Rest scalable mask arguments are treated as scalable data
4935 arguments. */
4936 }
4937 }
4938
4939 /* The number and alignment of vector registers need for this scalable vector
4940 argument. When the mode size is less than a full vector, we use 1 vector
4941 register to pass. Just call TARGET_HARD_REGNO_NREGS for the number
4942 information. */
4943 int nregs = riscv_hard_regno_nregs (V_ARG_FIRST, mode);
4944 int LMUL = riscv_v_ext_tuple_mode_p (mode)
4945 ? nregs / riscv_vector::get_nf (mode)
4946 : nregs;
4947 int arg_reg_start = V_ARG_FIRST - V_REG_FIRST;
4948 int arg_reg_end = V_ARG_LAST - V_REG_FIRST;
4949 int aligned_reg_start = ROUND_UP (arg_reg_start, LMUL);
4950
4951 /* For scalable data and scalable tuple return value. */
4952 if (return_p)
4953 return gen_rtx_REG (mode, aligned_reg_start + V_REG_FIRST);
4954
4955 /* Iterate through the USED_VRS array to find vector register groups that have
4956 not been allocated and the first register is aligned with LMUL. */
4957 for (int i = aligned_reg_start; i + nregs - 1 <= arg_reg_end; i += LMUL)
4958 {
4959 /* The index in USED_VRS array. */
4960 int idx = i - arg_reg_start;
4961 /* Find the first register unused. */
4962 if (!cum->used_vrs[idx])
4963 {
4964 bool find_set = true;
4965 /* Ensure there are NREGS continuous unused registers. */
4966 for (int j = 1; j < nregs; j++)
4967 if (cum->used_vrs[idx + j])
4968 {
4969 find_set = false;
4970 /* Update I to the last aligned register which
4971 cannot be used and the next iteration will add
4972 LMUL step to I. */
4973 i += (j / LMUL) * LMUL;
4974 break;
4975 }
4976
4977 if (find_set)
4978 {
4979 info->num_vrs = nregs;
4980 info->vr_offset = idx;
4981 return gen_rtx_REG (mode, i + V_REG_FIRST);
4982 }
4983 }
4984 }
4985
4986 return NULL_RTX;
4987 }
4988
4989 /* Fill INFO with information about a single argument, and return an RTL
4990 pattern to pass or return the argument. Return NULL_RTX if argument cannot
4991 pass or return in registers, then the argument may be passed by reference or
4992 through the stack or . CUM is the cumulative state for earlier arguments.
4993 MODE is the mode of this argument and TYPE is its type (if known). NAMED is
4994 true if this is a named (fixed) argument rather than a variable one. RETURN_P
4995 is true if returning the argument, or false if passing the argument. */
4996
4997 static rtx
4998 riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
4999 machine_mode mode, const_tree type, bool named,
5000 bool return_p)
5001 {
5002 unsigned num_bytes, num_words;
5003 unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
5004 unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
5005 unsigned alignment = riscv_function_arg_boundary (mode, type);
5006
5007 memset (info, 0, sizeof (*info));
5008 info->gpr_offset = cum->num_gprs;
5009 info->fpr_offset = cum->num_fprs;
5010
5011 if (cum->rvv_psabi_warning)
5012 {
5013 /* Only check existing of vector type. */
5014 riscv_pass_in_vector_p (type);
5015 }
5016
5017 /* When disable vector_abi or scalable vector argument is anonymous, this
5018 argument is passed by reference. */
5019 if (riscv_v_ext_mode_p (mode) && (!riscv_vector_abi || !named))
5020 return NULL_RTX;
5021
5022 if (named)
5023 {
5024 riscv_aggregate_field fields[2];
5025 unsigned fregno = fpr_base + info->fpr_offset;
5026 unsigned gregno = gpr_base + info->gpr_offset;
5027
5028 /* Pass one- or two-element floating-point aggregates in FPRs. */
5029 if ((info->num_fprs = riscv_pass_aggregate_in_fpr_pair_p (type, fields))
5030 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5031 switch (info->num_fprs)
5032 {
5033 case 1:
5034 return riscv_pass_fpr_single (mode, fregno,
5035 TYPE_MODE (fields[0].type),
5036 fields[0].offset);
5037
5038 case 2:
5039 return riscv_pass_fpr_pair (mode, fregno,
5040 TYPE_MODE (fields[0].type),
5041 fields[0].offset,
5042 fregno + 1,
5043 TYPE_MODE (fields[1].type),
5044 fields[1].offset);
5045
5046 default:
5047 gcc_unreachable ();
5048 }
5049
5050 /* Pass real and complex floating-point numbers in FPRs. */
5051 if ((info->num_fprs = riscv_pass_mode_in_fpr_p (mode))
5052 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5053 switch (GET_MODE_CLASS (mode))
5054 {
5055 case MODE_FLOAT:
5056 return gen_rtx_REG (mode, fregno);
5057
5058 case MODE_COMPLEX_FLOAT:
5059 return riscv_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0,
5060 fregno + 1, GET_MODE_INNER (mode),
5061 GET_MODE_UNIT_SIZE (mode));
5062
5063 default:
5064 gcc_unreachable ();
5065 }
5066
5067 /* Pass structs with one float and one integer in an FPR and a GPR. */
5068 if (riscv_pass_aggregate_in_fpr_and_gpr_p (type, fields)
5069 && info->gpr_offset < MAX_ARGS_IN_REGISTERS
5070 && info->fpr_offset < MAX_ARGS_IN_REGISTERS)
5071 {
5072 info->num_gprs = 1;
5073 info->num_fprs = 1;
5074
5075 if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
5076 std::swap (fregno, gregno);
5077
5078 return riscv_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type),
5079 fields[0].offset,
5080 gregno, TYPE_MODE (fields[1].type),
5081 fields[1].offset);
5082 }
5083
5084 /* For scalable vector argument. */
5085 if (riscv_vector_type_p (type) && riscv_v_ext_mode_p (mode))
5086 return riscv_get_vector_arg (info, cum, mode, return_p);
5087 }
5088
5089 /* Work out the size of the argument. */
5090 num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode).to_constant ();
5091 num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5092
5093 /* Doubleword-aligned varargs start on an even register boundary. */
5094 if (!named && num_bytes != 0 && alignment > BITS_PER_WORD)
5095 info->gpr_offset += info->gpr_offset & 1;
5096
5097 /* Partition the argument between registers and stack. */
5098 info->num_fprs = 0;
5099 info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
5100 info->stack_p = (num_words - info->num_gprs) != 0;
5101
5102 if (info->num_gprs || return_p)
5103 return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
5104
5105 return NULL_RTX;
5106 }
5107
5108 /* Implement TARGET_FUNCTION_ARG. */
5109
5110 static rtx
5111 riscv_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5112 {
5113 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5114 struct riscv_arg_info info;
5115
5116 if (arg.end_marker_p ())
5117 /* Return the calling convention that used by the current function. */
5118 return gen_int_mode (cum->variant_cc, SImode);
5119
5120 return riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5121 }
5122
5123 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
5124
5125 static void
5126 riscv_function_arg_advance (cumulative_args_t cum_v,
5127 const function_arg_info &arg)
5128 {
5129 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5130 struct riscv_arg_info info;
5131
5132 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5133
5134 /* Set the corresponding register in USED_VRS to used status. */
5135 for (unsigned int i = 0; i < info.num_vrs; i++)
5136 {
5137 gcc_assert (!cum->used_vrs[info.vr_offset + i]);
5138 cum->used_vrs[info.vr_offset + i] = true;
5139 }
5140
5141 if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V)
5142 {
5143 error ("RVV type %qT cannot be passed to an unprototyped function",
5144 arg.type);
5145 /* Avoid repeating the message */
5146 cum->variant_cc = RISCV_CC_V;
5147 }
5148
5149 /* Advance the register count. This has the effect of setting
5150 num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
5151 argument required us to skip the final GPR and pass the whole
5152 argument on the stack. */
5153 cum->num_fprs = info.fpr_offset + info.num_fprs;
5154 cum->num_gprs = info.gpr_offset + info.num_gprs;
5155 cum->num_mrs = info.mr_offset + info.num_mrs;
5156 }
5157
5158 /* Implement TARGET_ARG_PARTIAL_BYTES. */
5159
5160 static int
5161 riscv_arg_partial_bytes (cumulative_args_t cum,
5162 const function_arg_info &generic_arg)
5163 {
5164 struct riscv_arg_info arg;
5165
5166 riscv_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode,
5167 generic_arg.type, generic_arg.named, false);
5168 return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
5169 }
5170
5171 /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
5172 VALTYPE is the return type and MODE is VOIDmode. For libcalls,
5173 VALTYPE is null and MODE is the mode of the return value. */
5174
5175 rtx
5176 riscv_function_value (const_tree type, const_tree func, machine_mode mode)
5177 {
5178 struct riscv_arg_info info;
5179 CUMULATIVE_ARGS args;
5180
5181 if (type)
5182 {
5183 int unsigned_p = TYPE_UNSIGNED (type);
5184
5185 mode = TYPE_MODE (type);
5186
5187 /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
5188 return values, promote the mode here too. */
5189 mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
5190 }
5191
5192 memset (&args, 0, sizeof args);
5193
5194 const_tree arg_type = type;
5195 if (func && DECL_RESULT (func))
5196 {
5197 const tree_function_decl &fn = FUNCTION_DECL_CHECK (func)->function_decl;
5198 if (fn.built_in_class == NOT_BUILT_IN)
5199 args.rvv_psabi_warning = 1;
5200
5201 arg_type = TREE_TYPE (DECL_RESULT (func));
5202 }
5203
5204 return riscv_get_arg_info (&info, &args, mode, arg_type, true, true);
5205 }
5206
5207 /* Implement TARGET_PASS_BY_REFERENCE. */
5208
5209 static bool
5210 riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
5211 {
5212 HOST_WIDE_INT size = arg.type_size_in_bytes ().to_constant ();;
5213 struct riscv_arg_info info;
5214 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5215
5216 /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we
5217 never pass variadic arguments in floating-point and vector registers,
5218 so we can avoid the call to riscv_get_arg_info in this case. */
5219 if (cum != NULL)
5220 {
5221 /* Don't pass by reference if we can use a floating-point register. */
5222 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5223 if (info.num_fprs)
5224 return false;
5225
5226 /* Don't pass by reference if we can use vector register groups. */
5227 if (info.num_vrs > 0 || info.num_mrs > 0)
5228 return false;
5229 }
5230
5231 /* When vector abi disabled(without --param=riscv-vector-abi option) or
5232 scalable vector argument is anonymous or cannot be passed through vector
5233 registers, this argument is passed by reference. */
5234 if (riscv_v_ext_mode_p (arg.mode))
5235 return true;
5236
5237 /* Pass by reference if the data do not fit in two integer registers. */
5238 return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
5239 }
5240
5241 /* Implement TARGET_RETURN_IN_MEMORY. */
5242
5243 static bool
5244 riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5245 {
5246 CUMULATIVE_ARGS args;
5247 cumulative_args_t cum = pack_cumulative_args (&args);
5248
5249 /* The rules for returning in memory are the same as for passing the
5250 first named argument by reference. */
5251 memset (&args, 0, sizeof args);
5252 function_arg_info arg (const_cast<tree> (type), /*named=*/true);
5253 return riscv_pass_by_reference (cum, arg);
5254 }
5255
5256 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5257
5258 static void
5259 riscv_setup_incoming_varargs (cumulative_args_t cum,
5260 const function_arg_info &arg,
5261 int *pretend_size ATTRIBUTE_UNUSED, int no_rtl)
5262 {
5263 CUMULATIVE_ARGS local_cum;
5264 int gp_saved;
5265
5266 /* The caller has advanced CUM up to, but not beyond, the last named
5267 argument. Advance a local copy of CUM past the last "real" named
5268 argument, to find out how many registers are left over. */
5269 local_cum = *get_cumulative_args (cum);
5270 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
5271 riscv_function_arg_advance (pack_cumulative_args (&local_cum), arg);
5272
5273 /* Found out how many registers we need to save. */
5274 gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
5275
5276 if (!no_rtl && gp_saved > 0)
5277 {
5278 rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5279 REG_PARM_STACK_SPACE (cfun->decl)
5280 - gp_saved * UNITS_PER_WORD);
5281 rtx mem = gen_frame_mem (BLKmode, ptr);
5282 set_mem_alias_set (mem, get_varargs_alias_set ());
5283
5284 move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST,
5285 mem, gp_saved);
5286 }
5287 if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
5288 cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
5289 }
5290
5291 /* Return the descriptor of the Standard Vector Calling Convention Variant. */
5292
5293 static const predefined_function_abi &
5294 riscv_v_abi ()
5295 {
5296 predefined_function_abi &v_abi = function_abis[RISCV_CC_V];
5297 if (!v_abi.initialized_p ())
5298 {
5299 HARD_REG_SET full_reg_clobbers
5300 = default_function_abi.full_reg_clobbers ();
5301 /* Callee-saved vector registers: v1-v7, v24-v31. */
5302 for (int regno = V_REG_FIRST + 1; regno <= V_REG_FIRST + 7; regno += 1)
5303 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
5304 for (int regno = V_REG_FIRST + 24; regno <= V_REG_FIRST + 31; regno += 1)
5305 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
5306 v_abi.initialize (RISCV_CC_V, full_reg_clobbers);
5307 }
5308 return v_abi;
5309 }
5310
5311 /* Return true if a function with type FNTYPE returns its value in
5312 RISC-V V registers. */
5313
5314 static bool
5315 riscv_return_value_is_vector_type_p (const_tree fntype)
5316 {
5317 tree return_type = TREE_TYPE (fntype);
5318
5319 return riscv_vector_type_p (return_type);
5320 }
5321
5322 /* Return true if a function with type FNTYPE takes arguments in
5323 RISC-V V registers. */
5324
5325 static bool
5326 riscv_arguments_is_vector_type_p (const_tree fntype)
5327 {
5328 for (tree chain = TYPE_ARG_TYPES (fntype); chain && chain != void_list_node;
5329 chain = TREE_CHAIN (chain))
5330 {
5331 tree arg_type = TREE_VALUE (chain);
5332 if (riscv_vector_type_p (arg_type))
5333 return true;
5334 }
5335
5336 return false;
5337 }
5338
5339 /* Implement TARGET_FNTYPE_ABI. */
5340
5341 static const predefined_function_abi &
5342 riscv_fntype_abi (const_tree fntype)
5343 {
5344 /* Implementing an experimental vector calling convention, the proposal
5345 can be viewed at the bellow link:
5346 https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389
5347
5348 You can enable this feature via the `--param=riscv-vector-abi` compiler
5349 option. */
5350 if (riscv_vector_abi
5351 && (riscv_return_value_is_vector_type_p (fntype)
5352 || riscv_arguments_is_vector_type_p (fntype)))
5353 return riscv_v_abi ();
5354
5355 return default_function_abi;
5356 }
5357
5358 /* Return riscv calling convention of call_insn. */
5359 riscv_cc
5360 get_riscv_cc (const rtx use)
5361 {
5362 gcc_assert (GET_CODE (use) == USE);
5363 rtx unspec = XEXP (use, 0);
5364 gcc_assert (GET_CODE (unspec) == UNSPEC
5365 && XINT (unspec, 1) == UNSPEC_CALLEE_CC);
5366 riscv_cc cc = (riscv_cc) INTVAL (XVECEXP (unspec, 0, 0));
5367 gcc_assert (cc < RISCV_CC_UNKNOWN);
5368 return cc;
5369 }
5370
5371 /* Implement TARGET_INSN_CALLEE_ABI. */
5372
5373 const predefined_function_abi &
5374 riscv_insn_callee_abi (const rtx_insn *insn)
5375 {
5376 rtx pat = PATTERN (insn);
5377 gcc_assert (GET_CODE (pat) == PARALLEL);
5378 riscv_cc cc = get_riscv_cc (XVECEXP (pat, 0, 1));
5379 return function_abis[cc];
5380 }
5381
5382 /* Handle an attribute requiring a FUNCTION_DECL;
5383 arguments as in struct attribute_spec.handler. */
5384 static tree
5385 riscv_handle_fndecl_attribute (tree *node, tree name,
5386 tree args ATTRIBUTE_UNUSED,
5387 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5388 {
5389 if (TREE_CODE (*node) != FUNCTION_DECL)
5390 {
5391 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5392 name);
5393 *no_add_attrs = true;
5394 }
5395
5396 return NULL_TREE;
5397 }
5398
5399 /* Verify type based attributes. NODE is the what the attribute is being
5400 applied to. NAME is the attribute name. ARGS are the attribute args.
5401 FLAGS gives info about the context. NO_ADD_ATTRS should be set to true if
5402 the attribute should be ignored. */
5403
5404 static tree
5405 riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5406 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5407 {
5408 /* Check for an argument. */
5409 if (is_attribute_p ("interrupt", name))
5410 {
5411 if (args)
5412 {
5413 tree cst = TREE_VALUE (args);
5414 const char *string;
5415
5416 if (TREE_CODE (cst) != STRING_CST)
5417 {
5418 warning (OPT_Wattributes,
5419 "%qE attribute requires a string argument",
5420 name);
5421 *no_add_attrs = true;
5422 return NULL_TREE;
5423 }
5424
5425 string = TREE_STRING_POINTER (cst);
5426 if (strcmp (string, "user") && strcmp (string, "supervisor")
5427 && strcmp (string, "machine"))
5428 {
5429 warning (OPT_Wattributes,
5430 "argument to %qE attribute is not %<\"user\"%>, %<\"supervisor\"%>, "
5431 "or %<\"machine\"%>", name);
5432 *no_add_attrs = true;
5433 }
5434 }
5435 }
5436
5437 return NULL_TREE;
5438 }
5439
5440 /* Return true if function TYPE is an interrupt function. */
5441 static bool
5442 riscv_interrupt_type_p (tree type)
5443 {
5444 return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL;
5445 }
5446
5447 /* Return true if FUNC is a naked function. */
5448 static bool
5449 riscv_naked_function_p (tree func)
5450 {
5451 tree func_decl = func;
5452 if (func == NULL_TREE)
5453 func_decl = current_function_decl;
5454 return NULL_TREE != lookup_attribute ("naked", DECL_ATTRIBUTES (func_decl));
5455 }
5456
5457 /* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS. */
5458 static bool
5459 riscv_allocate_stack_slots_for_args ()
5460 {
5461 /* Naked functions should not allocate stack slots for arguments. */
5462 return !riscv_naked_function_p (current_function_decl);
5463 }
5464
5465 /* Implement TARGET_WARN_FUNC_RETURN. */
5466 static bool
5467 riscv_warn_func_return (tree decl)
5468 {
5469 /* Naked functions are implemented entirely in assembly, including the
5470 return sequence, so suppress warnings about this. */
5471 return !riscv_naked_function_p (decl);
5472 }
5473
5474 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5475
5476 static void
5477 riscv_va_start (tree valist, rtx nextarg)
5478 {
5479 nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
5480 std_expand_builtin_va_start (valist, nextarg);
5481 }
5482
5483 /* Make ADDR suitable for use as a call or sibcall target. */
5484
5485 rtx
5486 riscv_legitimize_call_address (rtx addr)
5487 {
5488 if (!call_insn_operand (addr, VOIDmode))
5489 {
5490 rtx reg = RISCV_CALL_ADDRESS_TEMP (Pmode);
5491 riscv_emit_move (reg, addr);
5492 return reg;
5493 }
5494 return addr;
5495 }
5496
5497 /* Print symbolic operand OP, which is part of a HIGH or LO_SUM
5498 in context CONTEXT. HI_RELOC indicates a high-part reloc. */
5499
5500 static void
5501 riscv_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
5502 {
5503 const char *reloc;
5504
5505 switch (riscv_classify_symbolic_expression (op))
5506 {
5507 case SYMBOL_ABSOLUTE:
5508 reloc = hi_reloc ? "%hi" : "%lo";
5509 break;
5510
5511 case SYMBOL_PCREL:
5512 reloc = hi_reloc ? "%pcrel_hi" : "%pcrel_lo";
5513 break;
5514
5515 case SYMBOL_TLS_LE:
5516 reloc = hi_reloc ? "%tprel_hi" : "%tprel_lo";
5517 break;
5518
5519 default:
5520 output_operand_lossage ("invalid use of '%%%c'", hi_reloc ? 'h' : 'R');
5521 return;
5522 }
5523
5524 fprintf (file, "%s(", reloc);
5525 output_addr_const (file, riscv_strip_unspec_address (op));
5526 fputc (')', file);
5527 }
5528
5529 /* Return the memory model that encapuslates both given models. */
5530
5531 enum memmodel
5532 riscv_union_memmodels (enum memmodel model1, enum memmodel model2)
5533 {
5534 model1 = memmodel_base (model1);
5535 model2 = memmodel_base (model2);
5536
5537 enum memmodel weaker = model1 <= model2 ? model1: model2;
5538 enum memmodel stronger = model1 > model2 ? model1: model2;
5539
5540 switch (stronger)
5541 {
5542 case MEMMODEL_SEQ_CST:
5543 case MEMMODEL_ACQ_REL:
5544 return stronger;
5545 case MEMMODEL_RELEASE:
5546 if (weaker == MEMMODEL_ACQUIRE || weaker == MEMMODEL_CONSUME)
5547 return MEMMODEL_ACQ_REL;
5548 else
5549 return stronger;
5550 case MEMMODEL_ACQUIRE:
5551 case MEMMODEL_CONSUME:
5552 case MEMMODEL_RELAXED:
5553 return stronger;
5554 default:
5555 gcc_unreachable ();
5556 }
5557 }
5558
5559 /* Return true if the .AQ suffix should be added to an AMO to implement the
5560 acquire portion of memory model MODEL. */
5561
5562 static bool
5563 riscv_memmodel_needs_amo_acquire (enum memmodel model)
5564 {
5565 /* ZTSO amo mappings require no annotations. */
5566 if (TARGET_ZTSO)
5567 return false;
5568
5569 switch (model)
5570 {
5571 case MEMMODEL_ACQ_REL:
5572 case MEMMODEL_SEQ_CST:
5573 case MEMMODEL_ACQUIRE:
5574 case MEMMODEL_CONSUME:
5575 return true;
5576
5577 case MEMMODEL_RELEASE:
5578 case MEMMODEL_RELAXED:
5579 return false;
5580
5581 default:
5582 gcc_unreachable ();
5583 }
5584 }
5585
5586 /* Return true if the .RL suffix should be added to an AMO to implement the
5587 release portion of memory model MODEL. */
5588
5589 static bool
5590 riscv_memmodel_needs_amo_release (enum memmodel model)
5591 {
5592 /* ZTSO amo mappings require no annotations. */
5593 if (TARGET_ZTSO)
5594 return false;
5595
5596 switch (model)
5597 {
5598 case MEMMODEL_ACQ_REL:
5599 case MEMMODEL_SEQ_CST:
5600 case MEMMODEL_RELEASE:
5601 return true;
5602
5603 case MEMMODEL_ACQUIRE:
5604 case MEMMODEL_CONSUME:
5605 case MEMMODEL_RELAXED:
5606 return false;
5607
5608 default:
5609 gcc_unreachable ();
5610 }
5611 }
5612
5613 /* Get REGNO alignment of vector mode.
5614 The alignment = LMUL when the LMUL >= 1.
5615 Otherwise, alignment = 1. */
5616 int
5617 riscv_get_v_regno_alignment (machine_mode mode)
5618 {
5619 /* 3.3.2. LMUL = 2,4,8, register numbers should be multiple of 2,4,8.
5620 but for mask vector register, register numbers can be any number. */
5621 int lmul = 1;
5622 machine_mode rvv_mode = mode;
5623 if (riscv_v_ext_vls_mode_p (rvv_mode))
5624 {
5625 int size = GET_MODE_BITSIZE (rvv_mode).to_constant ();
5626 if (size < TARGET_MIN_VLEN)
5627 return 1;
5628 else
5629 return size / TARGET_MIN_VLEN;
5630 }
5631 if (riscv_v_ext_tuple_mode_p (rvv_mode))
5632 rvv_mode = riscv_vector::get_subpart_mode (rvv_mode);
5633 poly_int64 size = GET_MODE_SIZE (rvv_mode);
5634 if (known_gt (size, UNITS_PER_V_REG))
5635 lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
5636 return lmul;
5637 }
5638
5639 /* Implement TARGET_PRINT_OPERAND. The RISCV-specific operand codes are:
5640
5641 'h' Print the high-part relocation associated with OP, after stripping
5642 any outermost HIGH.
5643 'R' Print the low-part relocation associated with OP.
5644 'C' Print the integer branch condition for comparison OP.
5645 'N' Print the inverse of the integer branch condition for comparison OP.
5646 'A' Print the atomic operation suffix for memory model OP.
5647 'I' Print the LR suffix for memory model OP.
5648 'J' Print the SC suffix for memory model OP.
5649 'z' Print x0 if OP is zero, otherwise print OP normally.
5650 'i' Print i if the operand is not a register.
5651 'S' Print shift-index of single-bit mask OP.
5652 'T' Print shift-index of inverted single-bit mask OP.
5653 '~' Print w if TARGET_64BIT is true; otherwise not print anything.
5654
5655 Note please keep this list and the list in riscv.md in sync. */
5656
5657 static void
5658 riscv_print_operand (FILE *file, rtx op, int letter)
5659 {
5660 /* `~` does not take an operand so op will be null
5661 Check for before accessing op.
5662 */
5663 if (letter == '~')
5664 {
5665 if (TARGET_64BIT)
5666 fputc('w', file);
5667 return;
5668 }
5669 machine_mode mode = GET_MODE (op);
5670 enum rtx_code code = GET_CODE (op);
5671
5672 switch (letter)
5673 {
5674 case 'o': {
5675 /* Print 'OP' variant for RVV instructions.
5676 1. If the operand is VECTOR REG, we print 'v'(vnsrl.wv).
5677 2. If the operand is CONST_INT/CONST_VECTOR, we print 'i'(vnsrl.wi).
5678 3. If the operand is SCALAR REG, we print 'x'(vnsrl.wx). */
5679 if (riscv_v_ext_mode_p (mode))
5680 {
5681 if (REG_P (op))
5682 asm_fprintf (file, "v");
5683 else if (CONST_VECTOR_P (op))
5684 asm_fprintf (file, "i");
5685 else
5686 output_operand_lossage ("invalid vector operand");
5687 }
5688 else
5689 {
5690 if (CONST_INT_P (op))
5691 asm_fprintf (file, "i");
5692 else
5693 asm_fprintf (file, "x");
5694 }
5695 break;
5696 }
5697 case 'v': {
5698 rtx elt;
5699
5700 if (REG_P (op))
5701 asm_fprintf (file, "%s", reg_names[REGNO (op)]);
5702 else
5703 {
5704 if (!const_vec_duplicate_p (op, &elt))
5705 output_operand_lossage ("invalid vector constant");
5706 else if (satisfies_constraint_Wc0 (op))
5707 asm_fprintf (file, "0");
5708 else if (satisfies_constraint_vi (op)
5709 || satisfies_constraint_vj (op)
5710 || satisfies_constraint_vk (op))
5711 asm_fprintf (file, "%wd", INTVAL (elt));
5712 else
5713 output_operand_lossage ("invalid vector constant");
5714 }
5715 break;
5716 }
5717 case 'V': {
5718 rtx elt;
5719 if (!const_vec_duplicate_p (op, &elt))
5720 output_operand_lossage ("invalid vector constant");
5721 else if (satisfies_constraint_vj (op))
5722 asm_fprintf (file, "%wd", -INTVAL (elt));
5723 else
5724 output_operand_lossage ("invalid vector constant");
5725 break;
5726 }
5727 case 'm': {
5728 if (riscv_v_ext_mode_p (mode))
5729 {
5730 /* Calculate lmul according to mode and print the value. */
5731 int lmul = riscv_get_v_regno_alignment (mode);
5732 asm_fprintf (file, "%d", lmul);
5733 }
5734 else if (code == CONST_INT)
5735 {
5736 /* If it is a const_int value, it denotes the VLMUL field enum. */
5737 unsigned int vlmul = UINTVAL (op);
5738 switch (vlmul)
5739 {
5740 case riscv_vector::LMUL_1:
5741 asm_fprintf (file, "%s", "m1");
5742 break;
5743 case riscv_vector::LMUL_2:
5744 asm_fprintf (file, "%s", "m2");
5745 break;
5746 case riscv_vector::LMUL_4:
5747 asm_fprintf (file, "%s", "m4");
5748 break;
5749 case riscv_vector::LMUL_8:
5750 asm_fprintf (file, "%s", "m8");
5751 break;
5752 case riscv_vector::LMUL_F8:
5753 asm_fprintf (file, "%s", "mf8");
5754 break;
5755 case riscv_vector::LMUL_F4:
5756 asm_fprintf (file, "%s", "mf4");
5757 break;
5758 case riscv_vector::LMUL_F2:
5759 asm_fprintf (file, "%s", "mf2");
5760 break;
5761 default:
5762 gcc_unreachable ();
5763 }
5764 }
5765 else
5766 output_operand_lossage ("invalid vector constant");
5767 break;
5768 }
5769 case 'p': {
5770 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
5771 {
5772 /* Print for RVV mask operand.
5773 If op is reg, print ",v0.t".
5774 Otherwise, don't print anything. */
5775 if (code == REG)
5776 fprintf (file, ",%s.t", reg_names[REGNO (op)]);
5777 }
5778 else if (code == CONST_INT)
5779 {
5780 /* Tail && Mask policy. */
5781 asm_fprintf (file, "%s", IS_AGNOSTIC (UINTVAL (op)) ? "a" : "u");
5782 }
5783 else
5784 output_operand_lossage ("invalid vector constant");
5785 break;
5786 }
5787 case 'h':
5788 if (code == HIGH)
5789 op = XEXP (op, 0);
5790 riscv_print_operand_reloc (file, op, true);
5791 break;
5792
5793 case 'R':
5794 riscv_print_operand_reloc (file, op, false);
5795 break;
5796
5797 case 'C':
5798 /* The RTL names match the instruction names. */
5799 fputs (GET_RTX_NAME (code), file);
5800 break;
5801
5802 case 'N':
5803 /* The RTL names match the instruction names. */
5804 fputs (GET_RTX_NAME (reverse_condition (code)), file);
5805 break;
5806
5807 case 'A': {
5808 const enum memmodel model = memmodel_base (INTVAL (op));
5809 if (riscv_memmodel_needs_amo_acquire (model)
5810 && riscv_memmodel_needs_amo_release (model))
5811 fputs (".aqrl", file);
5812 else if (riscv_memmodel_needs_amo_acquire (model))
5813 fputs (".aq", file);
5814 else if (riscv_memmodel_needs_amo_release (model))
5815 fputs (".rl", file);
5816 break;
5817 }
5818
5819 case 'I': {
5820 const enum memmodel model = memmodel_base (INTVAL (op));
5821 if (TARGET_ZTSO && model != MEMMODEL_SEQ_CST)
5822 /* LR ops only have an annotation for SEQ_CST in the Ztso mapping. */
5823 break;
5824 else if (model == MEMMODEL_SEQ_CST)
5825 fputs (".aqrl", file);
5826 else if (riscv_memmodel_needs_amo_acquire (model))
5827 fputs (".aq", file);
5828 break;
5829 }
5830
5831 case 'J': {
5832 const enum memmodel model = memmodel_base (INTVAL (op));
5833 if (TARGET_ZTSO && model == MEMMODEL_SEQ_CST)
5834 /* SC ops only have an annotation for SEQ_CST in the Ztso mapping. */
5835 fputs (".rl", file);
5836 else if (TARGET_ZTSO)
5837 break;
5838 else if (riscv_memmodel_needs_amo_release (model))
5839 fputs (".rl", file);
5840 break;
5841 }
5842
5843 case 'i':
5844 if (code != REG)
5845 fputs ("i", file);
5846 break;
5847
5848 case 'B':
5849 fputs (GET_RTX_NAME (code), file);
5850 break;
5851
5852 case 'S':
5853 {
5854 rtx newop = GEN_INT (ctz_hwi (INTVAL (op)));
5855 output_addr_const (file, newop);
5856 break;
5857 }
5858 case 'T':
5859 {
5860 rtx newop = GEN_INT (ctz_hwi (~INTVAL (op)));
5861 output_addr_const (file, newop);
5862 break;
5863 }
5864 case 'X':
5865 {
5866 int ival = INTVAL (op) + 1;
5867 rtx newop = GEN_INT (ctz_hwi (ival) + 1);
5868 output_addr_const (file, newop);
5869 break;
5870 }
5871 default:
5872 switch (code)
5873 {
5874 case REG:
5875 if (letter && letter != 'z')
5876 output_operand_lossage ("invalid use of '%%%c'", letter);
5877 fprintf (file, "%s", reg_names[REGNO (op)]);
5878 break;
5879
5880 case MEM:
5881 if (letter && letter != 'z')
5882 output_operand_lossage ("invalid use of '%%%c'", letter);
5883 else
5884 output_address (mode, XEXP (op, 0));
5885 break;
5886
5887 case CONST_DOUBLE:
5888 {
5889 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
5890 {
5891 fputs (reg_names[GP_REG_FIRST], file);
5892 break;
5893 }
5894
5895 int fli_index = riscv_float_const_rtx_index_for_fli (op);
5896 if (fli_index == -1 || fli_index > 31)
5897 {
5898 output_operand_lossage ("invalid use of '%%%c'", letter);
5899 break;
5900 }
5901 asm_fprintf (file, "%s", fli_value_print[fli_index]);
5902 break;
5903 }
5904
5905 default:
5906 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
5907 fputs (reg_names[GP_REG_FIRST], file);
5908 else if (letter && letter != 'z')
5909 output_operand_lossage ("invalid use of '%%%c'", letter);
5910 else
5911 output_addr_const (file, riscv_strip_unspec_address (op));
5912 break;
5913 }
5914 }
5915 }
5916
5917 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P */
5918 static bool
5919 riscv_print_operand_punct_valid_p (unsigned char code)
5920 {
5921 return (code == '~');
5922 }
5923
5924 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
5925
5926 static void
5927 riscv_print_operand_address (FILE *file, machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5928 {
5929 struct riscv_address_info addr;
5930
5931 if (th_print_operand_address (file, mode, x))
5932 return;
5933
5934 if (riscv_classify_address (&addr, x, word_mode, true))
5935 switch (addr.type)
5936 {
5937 case ADDRESS_REG:
5938 output_addr_const (file, riscv_strip_unspec_address (addr.offset));
5939 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
5940 return;
5941
5942 case ADDRESS_LO_SUM:
5943 riscv_print_operand_reloc (file, addr.offset, false);
5944 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
5945 return;
5946
5947 case ADDRESS_CONST_INT:
5948 output_addr_const (file, x);
5949 fprintf (file, "(%s)", reg_names[GP_REG_FIRST]);
5950 return;
5951
5952 case ADDRESS_SYMBOLIC:
5953 output_addr_const (file, riscv_strip_unspec_address (x));
5954 return;
5955
5956 default:
5957 gcc_unreachable ();
5958 }
5959
5960 gcc_unreachable ();
5961 }
5962
5963 static bool
5964 riscv_size_ok_for_small_data_p (int size)
5965 {
5966 return g_switch_value && IN_RANGE (size, 1, g_switch_value);
5967 }
5968
5969 /* Return true if EXP should be placed in the small data section. */
5970
5971 static bool
5972 riscv_in_small_data_p (const_tree x)
5973 {
5974 /* Because default_use_anchors_for_symbol_p doesn't gather small data to use
5975 the anchor symbol to address nearby objects. In large model, it can get
5976 the better result using the anchor optiomization. */
5977 if (riscv_cmodel == CM_LARGE)
5978 return false;
5979
5980 if (TREE_CODE (x) == STRING_CST || TREE_CODE (x) == FUNCTION_DECL)
5981 return false;
5982
5983 if (VAR_P (x) && DECL_SECTION_NAME (x))
5984 {
5985 const char *sec = DECL_SECTION_NAME (x);
5986 return strcmp (sec, ".sdata") == 0 || strcmp (sec, ".sbss") == 0;
5987 }
5988
5989 return riscv_size_ok_for_small_data_p (int_size_in_bytes (TREE_TYPE (x)));
5990 }
5991
5992 /* Switch to the appropriate section for output of DECL. */
5993
5994 static section *
5995 riscv_select_section (tree decl, int reloc,
5996 unsigned HOST_WIDE_INT align)
5997 {
5998 switch (categorize_decl_for_section (decl, reloc))
5999 {
6000 case SECCAT_SRODATA:
6001 return get_named_section (decl, ".srodata", reloc);
6002
6003 default:
6004 return default_elf_select_section (decl, reloc, align);
6005 }
6006 }
6007
6008 /* Switch to the appropriate section for output of DECL. */
6009
6010 static void
6011 riscv_unique_section (tree decl, int reloc)
6012 {
6013 const char *prefix = NULL;
6014 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
6015
6016 switch (categorize_decl_for_section (decl, reloc))
6017 {
6018 case SECCAT_SRODATA:
6019 prefix = one_only ? ".sr" : ".srodata";
6020 break;
6021
6022 default:
6023 break;
6024 }
6025 if (prefix)
6026 {
6027 const char *name, *linkonce;
6028 char *string;
6029
6030 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6031 name = targetm.strip_name_encoding (name);
6032
6033 /* If we're using one_only, then there needs to be a .gnu.linkonce
6034 prefix to the section name. */
6035 linkonce = one_only ? ".gnu.linkonce" : "";
6036
6037 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6038
6039 set_decl_section_name (decl, string);
6040 return;
6041 }
6042 default_unique_section (decl, reloc);
6043 }
6044
6045 /* Constant pools are per-function when in large code model. */
6046
6047 static inline bool
6048 riscv_can_use_per_function_literal_pools_p (void)
6049 {
6050 return riscv_cmodel == CM_LARGE;
6051 }
6052
6053 static bool
6054 riscv_use_blocks_for_constant_p (machine_mode, const_rtx)
6055 {
6056 /* We can't use blocks for constants when we're using a per-function
6057 constant pool. */
6058 return !riscv_can_use_per_function_literal_pools_p ();
6059 }
6060
6061 /* Return a section for X, handling small data. */
6062
6063 static section *
6064 riscv_elf_select_rtx_section (machine_mode mode, rtx x,
6065 unsigned HOST_WIDE_INT align)
6066 {
6067 /* The literal pool stays with the function. */
6068 if (riscv_can_use_per_function_literal_pools_p ())
6069 return function_section (current_function_decl);
6070
6071 section *s = default_elf_select_rtx_section (mode, x, align);
6072
6073 if (riscv_size_ok_for_small_data_p (GET_MODE_SIZE (mode).to_constant ()))
6074 {
6075 if (startswith (s->named.name, ".rodata.cst"))
6076 {
6077 /* Rename .rodata.cst* to .srodata.cst*. */
6078 char *name = (char *) alloca (strlen (s->named.name) + 2);
6079 sprintf (name, ".s%s", s->named.name + 1);
6080 return get_section (name, s->named.common.flags, NULL);
6081 }
6082
6083 if (s == data_section)
6084 return sdata_section;
6085 }
6086
6087 return s;
6088 }
6089
6090 /* Make the last instruction frame-related and note that it performs
6091 the operation described by FRAME_PATTERN. */
6092
6093 static void
6094 riscv_set_frame_expr (rtx frame_pattern)
6095 {
6096 rtx insn;
6097
6098 insn = get_last_insn ();
6099 RTX_FRAME_RELATED_P (insn) = 1;
6100 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6101 frame_pattern,
6102 REG_NOTES (insn));
6103 }
6104
6105 /* Return a frame-related rtx that stores REG at MEM.
6106 REG must be a single register. */
6107
6108 static rtx
6109 riscv_frame_set (rtx mem, rtx reg)
6110 {
6111 rtx set = gen_rtx_SET (mem, reg);
6112 RTX_FRAME_RELATED_P (set) = 1;
6113 return set;
6114 }
6115
6116 /* Returns true if the current function might contain a far jump. */
6117
6118 static bool
6119 riscv_far_jump_used_p ()
6120 {
6121 size_t func_size = 0;
6122
6123 if (cfun->machine->far_jump_used)
6124 return true;
6125
6126 /* We can't change far_jump_used during or after reload, as there is
6127 no chance to change stack frame layout. So we must rely on the
6128 conservative heuristic below having done the right thing. */
6129 if (reload_in_progress || reload_completed)
6130 return false;
6131
6132 /* Estimate the function length. */
6133 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
6134 func_size += get_attr_length (insn);
6135
6136 /* Conservatively determine whether some jump might exceed 1 MiB
6137 displacement. */
6138 if (func_size * 2 >= 0x100000)
6139 cfun->machine->far_jump_used = true;
6140
6141 return cfun->machine->far_jump_used;
6142 }
6143
6144 /* Return true, if the current function must save the incoming return
6145 address. */
6146
6147 static bool
6148 riscv_save_return_addr_reg_p (void)
6149 {
6150 /* The $ra register is call-clobbered: if this is not a leaf function,
6151 save it. */
6152 if (!crtl->is_leaf)
6153 return true;
6154
6155 /* We need to save the incoming return address if __builtin_eh_return
6156 is being used to set a different return address. */
6157 if (crtl->calls_eh_return)
6158 return true;
6159
6160 /* Far jumps/branches use $ra as a temporary to set up the target jump
6161 location (clobbering the incoming return address). */
6162 if (riscv_far_jump_used_p ())
6163 return true;
6164
6165 /* We need to save it if anyone has used that. */
6166 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
6167 return true;
6168
6169 /* Need not to use ra for leaf when frame pointer is turned off by
6170 option whatever the omit-leaf-frame's value. */
6171 if (frame_pointer_needed && crtl->is_leaf
6172 && !TARGET_OMIT_LEAF_FRAME_POINTER)
6173 return true;
6174
6175 return false;
6176 }
6177
6178 /* Return true if the current function must save register REGNO. */
6179
6180 static bool
6181 riscv_save_reg_p (unsigned int regno)
6182 {
6183 bool call_saved = !global_regs[regno] && !call_used_or_fixed_reg_p (regno);
6184 bool might_clobber = crtl->saves_all_registers
6185 || df_regs_ever_live_p (regno);
6186
6187 if (call_saved && might_clobber)
6188 return true;
6189
6190 /* Save callee-saved V registers. */
6191 if (V_REG_P (regno) && !crtl->abi->clobbers_full_reg_p (regno)
6192 && might_clobber)
6193 return true;
6194
6195 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
6196 return true;
6197
6198 if (regno == RETURN_ADDR_REGNUM && riscv_save_return_addr_reg_p ())
6199 return true;
6200
6201 /* If this is an interrupt handler, then must save extra registers. */
6202 if (cfun->machine->interrupt_handler_p)
6203 {
6204 /* zero register is always zero. */
6205 if (regno == GP_REG_FIRST)
6206 return false;
6207
6208 /* The function will return the stack pointer to its original value. */
6209 if (regno == STACK_POINTER_REGNUM)
6210 return false;
6211
6212 /* By convention, we assume that gp and tp are safe. */
6213 if (regno == GP_REGNUM || regno == THREAD_POINTER_REGNUM)
6214 return false;
6215
6216 /* We must save every register used in this function. If this is not a
6217 leaf function, then we must save all temporary registers. */
6218 if (df_regs_ever_live_p (regno)
6219 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
6220 return true;
6221 }
6222
6223 return false;
6224 }
6225
6226 /* Return TRUE if Zcmp push and pop insns should be
6227 avoided. FALSE otherwise.
6228 Only use multi push & pop if all GPRs masked can be covered,
6229 and stack access is SP based,
6230 and GPRs are at top of the stack frame,
6231 and no conflicts in stack allocation with other features */
6232 static bool
6233 riscv_avoid_multi_push (const struct riscv_frame_info *frame)
6234 {
6235 if (!TARGET_ZCMP || crtl->calls_eh_return || frame_pointer_needed
6236 || cfun->machine->interrupt_handler_p || cfun->machine->varargs_size != 0
6237 || crtl->args.pretend_args_size != 0
6238 || (use_shrink_wrapping_separate ()
6239 && !riscv_avoid_shrink_wrapping_separate ())
6240 || (frame->mask & ~MULTI_PUSH_GPR_MASK))
6241 return true;
6242
6243 return false;
6244 }
6245
6246 /* Determine whether to use multi push insn. */
6247 static bool
6248 riscv_use_multi_push (const struct riscv_frame_info *frame)
6249 {
6250 if (riscv_avoid_multi_push (frame))
6251 return false;
6252
6253 return (frame->multi_push_adj_base != 0);
6254 }
6255
6256 /* Return TRUE if a libcall to save/restore GPRs should be
6257 avoided. FALSE otherwise. */
6258 static bool
6259 riscv_avoid_save_libcall (void)
6260 {
6261 if (!TARGET_SAVE_RESTORE
6262 || crtl->calls_eh_return
6263 || frame_pointer_needed
6264 || cfun->machine->interrupt_handler_p
6265 || cfun->machine->varargs_size != 0
6266 || crtl->args.pretend_args_size != 0)
6267 return true;
6268
6269 return false;
6270 }
6271
6272 /* Determine whether to call GPR save/restore routines. */
6273 static bool
6274 riscv_use_save_libcall (const struct riscv_frame_info *frame)
6275 {
6276 if (riscv_avoid_save_libcall ())
6277 return false;
6278
6279 return frame->save_libcall_adjustment != 0;
6280 }
6281
6282 /* Determine which GPR save/restore routine to call. */
6283
6284 static unsigned
6285 riscv_save_libcall_count (unsigned mask)
6286 {
6287 for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
6288 if (BITSET_P (mask, n))
6289 return CALLEE_SAVED_REG_NUMBER (n) + 1;
6290 abort ();
6291 }
6292
6293 /* calculate number of s regs in multi push and pop.
6294 Note that {s0-s10} is not valid in Zcmp, use {s0-s11} instead. */
6295 static unsigned
6296 riscv_multi_push_sregs_count (unsigned mask)
6297 {
6298 unsigned num = riscv_save_libcall_count (mask);
6299 return (num == ZCMP_INVALID_S0S10_SREGS_COUNTS) ? ZCMP_S0S11_SREGS_COUNTS
6300 : num;
6301 }
6302
6303 /* calculate number of regs(ra, s0-sx) in multi push and pop. */
6304 static unsigned
6305 riscv_multi_push_regs_count (unsigned mask)
6306 {
6307 /* 1 is for ra */
6308 return riscv_multi_push_sregs_count (mask) + 1;
6309 }
6310
6311 /* Handle 16 bytes align for poly_int. */
6312 static poly_int64
6313 riscv_16bytes_align (poly_int64 value)
6314 {
6315 return aligned_upper_bound (value, 16);
6316 }
6317
6318 static HOST_WIDE_INT
6319 riscv_16bytes_align (HOST_WIDE_INT value)
6320 {
6321 return ROUND_UP (value, 16);
6322 }
6323
6324 /* Handle stack align for poly_int. */
6325 static poly_int64
6326 riscv_stack_align (poly_int64 value)
6327 {
6328 return aligned_upper_bound (value, PREFERRED_STACK_BOUNDARY / 8);
6329 }
6330
6331 static HOST_WIDE_INT
6332 riscv_stack_align (HOST_WIDE_INT value)
6333 {
6334 return RISCV_STACK_ALIGN (value);
6335 }
6336
6337 /* Populate the current function's riscv_frame_info structure.
6338
6339 RISC-V stack frames grown downward. High addresses are at the top.
6340
6341 +-------------------------------+
6342 | |
6343 | incoming stack arguments |
6344 | |
6345 +-------------------------------+ <-- incoming stack pointer
6346 | |
6347 | callee-allocated save area |
6348 | for arguments that are |
6349 | split between registers and |
6350 | the stack |
6351 | |
6352 +-------------------------------+ <-- arg_pointer_rtx
6353 | |
6354 | callee-allocated save area |
6355 | for register varargs |
6356 | |
6357 +-------------------------------+ <-- hard_frame_pointer_rtx;
6358 | | stack_pointer_rtx + gp_sp_offset
6359 | GPR save area | + UNITS_PER_WORD
6360 | |
6361 +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
6362 | | + UNITS_PER_FP_REG
6363 | FPR save area |
6364 | |
6365 +-------------------------------+ <-- stack_pointer_rtx
6366 | | + v_sp_offset_top
6367 | Vector Registers save area |
6368 | |
6369 | ----------------------------- | <-- stack_pointer_rtx
6370 | padding | + v_sp_offset_bottom
6371 +-------------------------------+ <-- frame_pointer_rtx (virtual)
6372 | |
6373 | local variables |
6374 | |
6375 P +-------------------------------+
6376 | |
6377 | outgoing stack arguments |
6378 | |
6379 +-------------------------------+ <-- stack_pointer_rtx
6380
6381 Dynamic stack allocations such as alloca insert data at point P.
6382 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
6383 hard_frame_pointer_rtx unchanged. */
6384
6385 static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
6386
6387 static void
6388 riscv_compute_frame_info (void)
6389 {
6390 struct riscv_frame_info *frame;
6391 poly_int64 offset;
6392 bool interrupt_save_prologue_temp = false;
6393 unsigned int regno, i, num_x_saved = 0, num_f_saved = 0, x_save_size = 0;
6394 unsigned int num_v_saved = 0;
6395
6396 frame = &cfun->machine->frame;
6397
6398 /* In an interrupt function, there are two cases in which t0 needs to be used:
6399 1, If we have a large frame, then we need to save/restore t0. We check for
6400 this before clearing the frame struct.
6401 2, Need to save and restore some CSRs in the frame. */
6402 if (cfun->machine->interrupt_handler_p)
6403 {
6404 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
6405 if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))
6406 || (TARGET_HARD_FLOAT || TARGET_ZFINX))
6407 interrupt_save_prologue_temp = true;
6408 }
6409
6410 frame->reset();
6411
6412 if (!cfun->machine->naked_p)
6413 {
6414 /* Find out which GPRs we need to save. */
6415 for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
6416 if (riscv_save_reg_p (regno)
6417 || (interrupt_save_prologue_temp
6418 && (regno == RISCV_PROLOGUE_TEMP_REGNUM)))
6419 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
6420
6421 /* If this function calls eh_return, we must also save and restore the
6422 EH data registers. */
6423 if (crtl->calls_eh_return)
6424 for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
6425 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
6426
6427 /* Find out which FPRs we need to save. This loop must iterate over
6428 the same space as its companion in riscv_for_each_saved_reg. */
6429 if (TARGET_HARD_FLOAT)
6430 for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
6431 if (riscv_save_reg_p (regno))
6432 frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
6433
6434 /* Find out which V registers we need to save. */
6435 if (TARGET_VECTOR)
6436 for (regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
6437 if (riscv_save_reg_p (regno))
6438 {
6439 frame->vmask |= 1 << (regno - V_REG_FIRST);
6440 num_v_saved++;
6441 }
6442 }
6443
6444 if (frame->mask)
6445 {
6446 x_save_size = riscv_stack_align (num_x_saved * UNITS_PER_WORD);
6447
6448 /* 1 is for ra */
6449 unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask);
6450 /* Only use save/restore routines if they don't alter the stack size. */
6451 if (riscv_stack_align (num_save_restore * UNITS_PER_WORD) == x_save_size
6452 && !riscv_avoid_save_libcall ())
6453 {
6454 /* Libcall saves/restores 3 registers at once, so we need to
6455 allocate 12 bytes for callee-saved register. */
6456 if (TARGET_RVE)
6457 x_save_size = 3 * UNITS_PER_WORD;
6458
6459 frame->save_libcall_adjustment = x_save_size;
6460 }
6461
6462 if (!riscv_avoid_multi_push (frame))
6463 {
6464 /* num(ra, s0-sx) */
6465 unsigned num_multi_push = riscv_multi_push_regs_count (frame->mask);
6466 x_save_size = riscv_stack_align (num_multi_push * UNITS_PER_WORD);
6467 frame->multi_push_adj_base = riscv_16bytes_align (x_save_size);
6468 }
6469 }
6470
6471 /* In an interrupt function, we need extra space for the initial saves of CSRs. */
6472 if (cfun->machine->interrupt_handler_p
6473 && ((TARGET_HARD_FLOAT && frame->fmask)
6474 || (TARGET_ZFINX
6475 /* Except for RISCV_PROLOGUE_TEMP_REGNUM. */
6476 && (frame->mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
6477 /* Save and restore FCSR. */
6478 /* TODO: When P or V extensions support interrupts, some of their CSRs
6479 may also need to be saved and restored. */
6480 x_save_size += riscv_stack_align (1 * UNITS_PER_WORD);
6481
6482 /* At the bottom of the frame are any outgoing stack arguments. */
6483 offset = riscv_stack_align (crtl->outgoing_args_size);
6484 /* Next are local stack variables. */
6485 offset += riscv_stack_align (get_frame_size ());
6486 /* The virtual frame pointer points above the local variables. */
6487 frame->frame_pointer_offset = offset;
6488 /* Next are the callee-saved VRs. */
6489 if (frame->vmask)
6490 offset += riscv_stack_align (num_v_saved * UNITS_PER_V_REG);
6491 frame->v_sp_offset_top = offset;
6492 frame->v_sp_offset_bottom
6493 = frame->v_sp_offset_top - num_v_saved * UNITS_PER_V_REG;
6494 /* Next are the callee-saved FPRs. */
6495 if (frame->fmask)
6496 offset += riscv_stack_align (num_f_saved * UNITS_PER_FP_REG);
6497 frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
6498 /* Next are the callee-saved GPRs. */
6499 if (frame->mask)
6500 {
6501 offset += x_save_size;
6502 /* align to 16 bytes and add paddings to GPR part to honor
6503 both stack alignment and zcmp pus/pop size alignment. */
6504 if (riscv_use_multi_push (frame)
6505 && known_lt (offset, frame->multi_push_adj_base
6506 + ZCMP_SP_INC_STEP * ZCMP_MAX_SPIMM))
6507 offset = riscv_16bytes_align (offset);
6508 }
6509 frame->gp_sp_offset = offset - UNITS_PER_WORD;
6510 /* The hard frame pointer points above the callee-saved GPRs. */
6511 frame->hard_frame_pointer_offset = offset;
6512 /* Above the hard frame pointer is the callee-allocated varags save area. */
6513 offset += riscv_stack_align (cfun->machine->varargs_size);
6514 /* Next is the callee-allocated area for pretend stack arguments. */
6515 offset += riscv_stack_align (crtl->args.pretend_args_size);
6516 /* Arg pointer must be below pretend args, but must be above alignment
6517 padding. */
6518 frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
6519 frame->total_size = offset;
6520
6521 /* Next points the incoming stack pointer and any incoming arguments. */
6522 }
6523
6524 /* Make sure that we're not trying to eliminate to the wrong hard frame
6525 pointer. */
6526
6527 static bool
6528 riscv_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
6529 {
6530 return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
6531 }
6532
6533 /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer
6534 or argument pointer. TO is either the stack pointer or hard frame
6535 pointer. */
6536
6537 poly_int64
6538 riscv_initial_elimination_offset (int from, int to)
6539 {
6540 poly_int64 src, dest;
6541
6542 riscv_compute_frame_info ();
6543
6544 if (to == HARD_FRAME_POINTER_REGNUM)
6545 dest = cfun->machine->frame.hard_frame_pointer_offset;
6546 else if (to == STACK_POINTER_REGNUM)
6547 dest = 0; /* The stack pointer is the base of all offsets, hence 0. */
6548 else
6549 gcc_unreachable ();
6550
6551 if (from == FRAME_POINTER_REGNUM)
6552 src = cfun->machine->frame.frame_pointer_offset;
6553 else if (from == ARG_POINTER_REGNUM)
6554 src = cfun->machine->frame.arg_pointer_offset;
6555 else
6556 gcc_unreachable ();
6557
6558 return src - dest;
6559 }
6560
6561 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
6562 previous frame. */
6563
6564 rtx
6565 riscv_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
6566 {
6567 if (count != 0)
6568 return const0_rtx;
6569
6570 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
6571 }
6572
6573 /* Emit code to change the current function's return address to
6574 ADDRESS. SCRATCH is available as a scratch register, if needed.
6575 ADDRESS and SCRATCH are both word-mode GPRs. */
6576
6577 void
6578 riscv_set_return_address (rtx address, rtx scratch)
6579 {
6580 rtx slot_address;
6581
6582 gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
6583 slot_address = riscv_add_offset (scratch, stack_pointer_rtx,
6584 cfun->machine->frame.gp_sp_offset.to_constant());
6585 riscv_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
6586 }
6587
6588 /* Save register REG to MEM. Make the instruction frame-related. */
6589
6590 static void
6591 riscv_save_reg (rtx reg, rtx mem)
6592 {
6593 riscv_emit_move (mem, reg);
6594 riscv_set_frame_expr (riscv_frame_set (mem, reg));
6595 }
6596
6597 /* Restore register REG from MEM. */
6598
6599 static void
6600 riscv_restore_reg (rtx reg, rtx mem)
6601 {
6602 rtx insn = riscv_emit_move (reg, mem);
6603 rtx dwarf = NULL_RTX;
6604 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
6605
6606 if (known_gt (epilogue_cfa_sp_offset, 0)
6607 && REGNO (reg) == HARD_FRAME_POINTER_REGNUM)
6608 {
6609 rtx cfa_adjust_rtx
6610 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
6611 gen_int_mode (epilogue_cfa_sp_offset, Pmode));
6612 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
6613 }
6614
6615 REG_NOTES (insn) = dwarf;
6616 RTX_FRAME_RELATED_P (insn) = 1;
6617 }
6618
6619 /* A function to save or store a register. The first argument is the
6620 register and the second is the stack slot. */
6621 typedef void (*riscv_save_restore_fn) (rtx, rtx);
6622
6623 /* Use FN to save or restore register REGNO. MODE is the register's
6624 mode and OFFSET is the offset of its save slot from the current
6625 stack pointer. */
6626
6627 static void
6628 riscv_save_restore_reg (machine_mode mode, int regno,
6629 HOST_WIDE_INT offset, riscv_save_restore_fn fn)
6630 {
6631 rtx mem;
6632
6633 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset));
6634 fn (gen_rtx_REG (mode, regno), mem);
6635 }
6636
6637 /* Return the next register up from REGNO up to LIMIT for the callee
6638 to save or restore. OFFSET will be adjusted accordingly.
6639 If INC is set, then REGNO will be incremented first.
6640 Returns INVALID_REGNUM if there is no such next register. */
6641
6642 static unsigned int
6643 riscv_next_saved_reg (unsigned int regno, unsigned int limit,
6644 HOST_WIDE_INT *offset, bool inc = true)
6645 {
6646 if (inc)
6647 regno++;
6648
6649 while (regno <= limit)
6650 {
6651 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
6652 {
6653 *offset = *offset - UNITS_PER_WORD;
6654 return regno;
6655 }
6656
6657 regno++;
6658 }
6659 return INVALID_REGNUM;
6660 }
6661
6662 /* Return TRUE if provided REGNO is eh return data register. */
6663
6664 static bool
6665 riscv_is_eh_return_data_register (unsigned int regno)
6666 {
6667 unsigned int i, regnum;
6668
6669 if (!crtl->calls_eh_return)
6670 return false;
6671
6672 for (i = 0; (regnum = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
6673 if (regno == regnum)
6674 {
6675 return true;
6676 }
6677
6678 return false;
6679 }
6680
6681 /* Call FN for each register that is saved by the current function.
6682 SP_OFFSET is the offset of the current stack pointer from the start
6683 of the frame. */
6684
6685 static void
6686 riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
6687 bool epilogue, bool maybe_eh_return)
6688 {
6689 HOST_WIDE_INT offset, first_fp_offset;
6690 unsigned int regno, num_masked_fp = 0;
6691 unsigned int start = GP_REG_FIRST;
6692 unsigned int limit = GP_REG_LAST;
6693
6694 /* Save the link register and s-registers. */
6695 offset = (cfun->machine->frame.gp_sp_offset - sp_offset).to_constant ()
6696 + UNITS_PER_WORD;
6697 for (regno = riscv_next_saved_reg (start, limit, &offset, false);
6698 regno != INVALID_REGNUM;
6699 regno = riscv_next_saved_reg (regno, limit, &offset))
6700 {
6701 if (cfun->machine->reg_is_wrapped_separately[regno])
6702 continue;
6703
6704 /* If this is a normal return in a function that calls the eh_return
6705 builtin, then do not restore the eh return data registers as that
6706 would clobber the return value. But we do still need to save them
6707 in the prologue, and restore them for an exception return, so we
6708 need special handling here. */
6709 if (epilogue && !maybe_eh_return
6710 && riscv_is_eh_return_data_register (regno))
6711 continue;
6712
6713 /* In an interrupt function, save and restore some necessary CSRs in the stack
6714 to avoid changes in CSRs. */
6715 if (regno == RISCV_PROLOGUE_TEMP_REGNUM
6716 && cfun->machine->interrupt_handler_p
6717 && ((TARGET_HARD_FLOAT && cfun->machine->frame.fmask)
6718 || (TARGET_ZFINX
6719 && (cfun->machine->frame.mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
6720 {
6721 unsigned int fcsr_size = GET_MODE_SIZE (SImode);
6722 if (!epilogue)
6723 {
6724 riscv_save_restore_reg (word_mode, regno, offset, fn);
6725 offset -= fcsr_size;
6726 emit_insn (gen_riscv_frcsr (RISCV_PROLOGUE_TEMP (SImode)));
6727 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
6728 offset, riscv_save_reg);
6729 }
6730 else
6731 {
6732 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
6733 offset - fcsr_size, riscv_restore_reg);
6734 emit_insn (gen_riscv_fscsr (RISCV_PROLOGUE_TEMP (SImode)));
6735 riscv_save_restore_reg (word_mode, regno, offset, fn);
6736 offset -= fcsr_size;
6737 }
6738 continue;
6739 }
6740
6741 if (TARGET_XTHEADMEMPAIR)
6742 {
6743 /* Get the next reg/offset pair. */
6744 HOST_WIDE_INT offset2 = offset;
6745 unsigned int regno2 = riscv_next_saved_reg (regno, limit, &offset2);
6746
6747 /* Validate everything before emitting a mempair instruction. */
6748 if (regno2 != INVALID_REGNUM
6749 && !cfun->machine->reg_is_wrapped_separately[regno2]
6750 && !(epilogue && !maybe_eh_return
6751 && riscv_is_eh_return_data_register (regno2)))
6752 {
6753 bool load_p = (fn == riscv_restore_reg);
6754 rtx operands[4];
6755 th_mempair_prepare_save_restore_operands (operands,
6756 load_p, word_mode,
6757 regno, offset,
6758 regno2, offset2);
6759
6760 /* If the operands fit into a mempair insn, then emit one. */
6761 if (th_mempair_operands_p (operands, load_p, word_mode))
6762 {
6763 th_mempair_save_restore_regs (operands, load_p, word_mode);
6764 offset = offset2;
6765 regno = regno2;
6766 continue;
6767 }
6768 }
6769 }
6770
6771 riscv_save_restore_reg (word_mode, regno, offset, fn);
6772 }
6773
6774 /* This loop must iterate over the same space as its companion in
6775 riscv_compute_frame_info. */
6776 first_fp_offset
6777 = (cfun->machine->frame.fp_sp_offset - sp_offset).to_constant ();
6778 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
6779 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
6780 {
6781 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
6782 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
6783 unsigned int slot = (riscv_use_multi_push (&cfun->machine->frame))
6784 ? CALLEE_SAVED_FREG_NUMBER (regno)
6785 : num_masked_fp;
6786 offset = first_fp_offset - slot * GET_MODE_SIZE (mode).to_constant ();
6787 if (handle_reg)
6788 riscv_save_restore_reg (mode, regno, offset, fn);
6789 num_masked_fp++;
6790 }
6791 }
6792
6793 /* Call FN for each V register that is saved by the current function. */
6794
6795 static void
6796 riscv_for_each_saved_v_reg (poly_int64 &remaining_size,
6797 riscv_save_restore_fn fn, bool prologue)
6798 {
6799 rtx vlen = NULL_RTX;
6800 if (cfun->machine->frame.vmask != 0)
6801 {
6802 if (UNITS_PER_V_REG.is_constant ()
6803 && SMALL_OPERAND (UNITS_PER_V_REG.to_constant ()))
6804 vlen = GEN_INT (UNITS_PER_V_REG.to_constant ());
6805 else
6806 {
6807 vlen = RISCV_PROLOGUE_TEMP (Pmode);
6808 rtx insn
6809 = emit_move_insn (vlen, gen_int_mode (UNITS_PER_V_REG, Pmode));
6810 RTX_FRAME_RELATED_P (insn) = 1;
6811 }
6812 }
6813
6814 /* Select the mode where LMUL is 1 and SEW is largest. */
6815 machine_mode m1_mode = TARGET_VECTOR_ELEN_64 ? RVVM1DImode : RVVM1SImode;
6816
6817 if (prologue)
6818 {
6819 /* This loop must iterate over the same space as its companion in
6820 riscv_compute_frame_info. */
6821 for (unsigned int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
6822 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
6823 {
6824 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
6825 if (handle_reg)
6826 {
6827 rtx insn = NULL_RTX;
6828 if (CONST_INT_P (vlen))
6829 {
6830 gcc_assert (SMALL_OPERAND (-INTVAL (vlen)));
6831 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
6832 stack_pointer_rtx,
6833 GEN_INT (-INTVAL (vlen))));
6834 }
6835 else
6836 insn = emit_insn (
6837 gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
6838 gcc_assert (insn != NULL_RTX);
6839 RTX_FRAME_RELATED_P (insn) = 1;
6840 riscv_save_restore_reg (m1_mode, regno, 0, fn);
6841 remaining_size -= UNITS_PER_V_REG;
6842 }
6843 }
6844 }
6845 else
6846 {
6847 /* This loop must iterate over the same space as its companion in
6848 riscv_compute_frame_info. */
6849 for (unsigned int regno = V_REG_LAST; regno >= V_REG_FIRST; regno--)
6850 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
6851 {
6852 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
6853 if (handle_reg)
6854 {
6855 riscv_save_restore_reg (m1_mode, regno, 0, fn);
6856 rtx insn = emit_insn (
6857 gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
6858 gcc_assert (insn != NULL_RTX);
6859 RTX_FRAME_RELATED_P (insn) = 1;
6860 remaining_size -= UNITS_PER_V_REG;
6861 }
6862 }
6863 }
6864 }
6865
6866 /* For stack frames that can't be allocated with a single ADDI instruction,
6867 compute the best value to initially allocate. It must at a minimum
6868 allocate enough space to spill the callee-saved registers. If TARGET_RVC,
6869 try to pick a value that will allow compression of the register saves
6870 without adding extra instructions. */
6871
6872 static HOST_WIDE_INT
6873 riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
6874 {
6875 HOST_WIDE_INT remaining_const_size;
6876 if (!remaining_size.is_constant ())
6877 remaining_const_size
6878 = riscv_stack_align (remaining_size.coeffs[0])
6879 - riscv_stack_align (remaining_size.coeffs[1]);
6880 else
6881 remaining_const_size = remaining_size.to_constant ();
6882
6883 /* First step must be set to the top of vector registers save area if any
6884 vector registers need be preversed. */
6885 if (frame->vmask != 0)
6886 return (remaining_size - frame->v_sp_offset_top).to_constant ();
6887
6888 if (SMALL_OPERAND (remaining_const_size))
6889 return remaining_const_size;
6890
6891 poly_int64 callee_saved_first_step =
6892 remaining_size - frame->frame_pointer_offset;
6893 gcc_assert(callee_saved_first_step.is_constant ());
6894 HOST_WIDE_INT min_first_step =
6895 riscv_stack_align (callee_saved_first_step.to_constant ());
6896 HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
6897 HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
6898 gcc_assert (min_first_step <= max_first_step);
6899
6900 /* As an optimization, use the least-significant bits of the total frame
6901 size, so that the second adjustment step is just LUI + ADD. */
6902 if (!SMALL_OPERAND (min_second_step)
6903 && remaining_const_size % IMM_REACH <= max_first_step
6904 && remaining_const_size % IMM_REACH >= min_first_step)
6905 return remaining_const_size % IMM_REACH;
6906
6907 if (TARGET_RVC || TARGET_ZCA)
6908 {
6909 /* If we need two subtracts, and one is small enough to allow compressed
6910 loads and stores, then put that one first. */
6911 if (IN_RANGE (min_second_step, 0,
6912 (TARGET_64BIT ? SDSP_REACH : SWSP_REACH)))
6913 return MAX (min_second_step, min_first_step);
6914
6915 /* If we need LUI + ADDI + ADD for the second adjustment step, then start
6916 with the minimum first step, so that we can get compressed loads and
6917 stores. */
6918 else if (!SMALL_OPERAND (min_second_step))
6919 return min_first_step;
6920 }
6921
6922 return max_first_step;
6923 }
6924
6925 static rtx
6926 riscv_adjust_libcall_cfi_prologue ()
6927 {
6928 rtx dwarf = NULL_RTX;
6929 rtx adjust_sp_rtx, reg, mem, insn;
6930 int saved_size = cfun->machine->frame.save_libcall_adjustment;
6931 int offset;
6932
6933 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
6934 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
6935 {
6936 /* The save order is ra, s0, s1, s2 to s11. */
6937 if (regno == RETURN_ADDR_REGNUM)
6938 offset = saved_size - UNITS_PER_WORD;
6939 else if (regno == S0_REGNUM)
6940 offset = saved_size - UNITS_PER_WORD * 2;
6941 else if (regno == S1_REGNUM)
6942 offset = saved_size - UNITS_PER_WORD * 3;
6943 else
6944 offset = saved_size - ((regno - S2_REGNUM + 4) * UNITS_PER_WORD);
6945
6946 reg = gen_rtx_REG (Pmode, regno);
6947 mem = gen_frame_mem (Pmode, plus_constant (Pmode,
6948 stack_pointer_rtx,
6949 offset));
6950
6951 insn = gen_rtx_SET (mem, reg);
6952 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
6953 }
6954
6955 /* Debug info for adjust sp. */
6956 adjust_sp_rtx =
6957 gen_rtx_SET (stack_pointer_rtx,
6958 gen_rtx_PLUS (GET_MODE(stack_pointer_rtx), stack_pointer_rtx, GEN_INT (-saved_size)));
6959 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
6960 dwarf);
6961 return dwarf;
6962 }
6963
6964 static rtx
6965 riscv_adjust_multi_push_cfi_prologue (int saved_size)
6966 {
6967 rtx dwarf = NULL_RTX;
6968 rtx adjust_sp_rtx, reg, mem, insn;
6969 unsigned int mask = cfun->machine->frame.mask;
6970 int offset;
6971 int saved_cnt = 0;
6972
6973 if (mask & S10_MASK)
6974 mask |= S11_MASK;
6975
6976 for (int regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
6977 if (BITSET_P (mask & MULTI_PUSH_GPR_MASK, regno - GP_REG_FIRST))
6978 {
6979 /* The save order is s11-s0, ra
6980 from high to low addr. */
6981 offset = saved_size - UNITS_PER_WORD * (++saved_cnt);
6982
6983 reg = gen_rtx_REG (Pmode, regno);
6984 mem = gen_frame_mem (Pmode,
6985 plus_constant (Pmode, stack_pointer_rtx, offset));
6986
6987 insn = gen_rtx_SET (mem, reg);
6988 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
6989 }
6990
6991 /* Debug info for adjust sp. */
6992 adjust_sp_rtx
6993 = gen_rtx_SET (stack_pointer_rtx,
6994 plus_constant (Pmode, stack_pointer_rtx, -saved_size));
6995 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
6996 return dwarf;
6997 }
6998
6999 static void
7000 riscv_emit_stack_tie (void)
7001 {
7002 if (Pmode == SImode)
7003 emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx));
7004 else
7005 emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx));
7006 }
7007
7008 /*zcmp multi push and pop code_for_push_pop function ptr array */
7009 static const code_for_push_pop_t code_for_push_pop[ZCMP_MAX_GRP_SLOTS][ZCMP_OP_NUM]
7010 = {{code_for_gpr_multi_push_up_to_ra, code_for_gpr_multi_pop_up_to_ra,
7011 code_for_gpr_multi_popret_up_to_ra, code_for_gpr_multi_popretz_up_to_ra},
7012 {code_for_gpr_multi_push_up_to_s0, code_for_gpr_multi_pop_up_to_s0,
7013 code_for_gpr_multi_popret_up_to_s0, code_for_gpr_multi_popretz_up_to_s0},
7014 {code_for_gpr_multi_push_up_to_s1, code_for_gpr_multi_pop_up_to_s1,
7015 code_for_gpr_multi_popret_up_to_s1, code_for_gpr_multi_popretz_up_to_s1},
7016 {code_for_gpr_multi_push_up_to_s2, code_for_gpr_multi_pop_up_to_s2,
7017 code_for_gpr_multi_popret_up_to_s2, code_for_gpr_multi_popretz_up_to_s2},
7018 {code_for_gpr_multi_push_up_to_s3, code_for_gpr_multi_pop_up_to_s3,
7019 code_for_gpr_multi_popret_up_to_s3, code_for_gpr_multi_popretz_up_to_s3},
7020 {code_for_gpr_multi_push_up_to_s4, code_for_gpr_multi_pop_up_to_s4,
7021 code_for_gpr_multi_popret_up_to_s4, code_for_gpr_multi_popretz_up_to_s4},
7022 {code_for_gpr_multi_push_up_to_s5, code_for_gpr_multi_pop_up_to_s5,
7023 code_for_gpr_multi_popret_up_to_s5, code_for_gpr_multi_popretz_up_to_s5},
7024 {code_for_gpr_multi_push_up_to_s6, code_for_gpr_multi_pop_up_to_s6,
7025 code_for_gpr_multi_popret_up_to_s6, code_for_gpr_multi_popretz_up_to_s6},
7026 {code_for_gpr_multi_push_up_to_s7, code_for_gpr_multi_pop_up_to_s7,
7027 code_for_gpr_multi_popret_up_to_s7, code_for_gpr_multi_popretz_up_to_s7},
7028 {code_for_gpr_multi_push_up_to_s8, code_for_gpr_multi_pop_up_to_s8,
7029 code_for_gpr_multi_popret_up_to_s8, code_for_gpr_multi_popretz_up_to_s8},
7030 {code_for_gpr_multi_push_up_to_s9, code_for_gpr_multi_pop_up_to_s9,
7031 code_for_gpr_multi_popret_up_to_s9, code_for_gpr_multi_popretz_up_to_s9},
7032 {nullptr, nullptr, nullptr, nullptr},
7033 {code_for_gpr_multi_push_up_to_s11, code_for_gpr_multi_pop_up_to_s11,
7034 code_for_gpr_multi_popret_up_to_s11,
7035 code_for_gpr_multi_popretz_up_to_s11}};
7036
7037 static rtx
7038 riscv_gen_multi_push_pop_insn (riscv_zcmp_op_t op, HOST_WIDE_INT adj_size,
7039 unsigned int regs_num)
7040 {
7041 gcc_assert (op < ZCMP_OP_NUM);
7042 gcc_assert (regs_num <= ZCMP_MAX_GRP_SLOTS
7043 && regs_num != ZCMP_INVALID_S0S10_SREGS_COUNTS + 1); /* 1 for ra*/
7044 rtx stack_adj = GEN_INT (adj_size);
7045 return GEN_FCN (code_for_push_pop[regs_num - 1][op](Pmode)) (stack_adj);
7046 }
7047
7048 static unsigned
7049 get_multi_push_fpr_mask (unsigned max_fprs_push)
7050 {
7051 unsigned mask_fprs_push = 0, num_f_pushed = 0;
7052 for (unsigned regno = FP_REG_FIRST;
7053 regno <= FP_REG_LAST && num_f_pushed < max_fprs_push; regno++)
7054 if (riscv_save_reg_p (regno))
7055 mask_fprs_push |= 1 << (regno - FP_REG_FIRST), num_f_pushed++;
7056 return mask_fprs_push;
7057 }
7058
7059 /* Expand the "prologue" pattern. */
7060
7061 void
7062 riscv_expand_prologue (void)
7063 {
7064 struct riscv_frame_info *frame = &cfun->machine->frame;
7065 poly_int64 remaining_size = frame->total_size;
7066 unsigned mask = frame->mask;
7067 unsigned fmask = frame->fmask;
7068 int spimm, multi_push_additional, stack_adj;
7069 rtx insn, dwarf = NULL_RTX;
7070
7071 if (flag_stack_usage_info)
7072 current_function_static_stack_size = constant_lower_bound (remaining_size);
7073
7074 if (cfun->machine->naked_p)
7075 return;
7076
7077 /* prefer muti-push to save-restore libcall. */
7078 if (riscv_use_multi_push (frame))
7079 {
7080 remaining_size -= frame->multi_push_adj_base;
7081 /* If there are vector registers that need to be saved, then it can only
7082 be reduced to the frame->v_sp_offset_top position at most, since the
7083 vector registers will need to be saved one by one by decreasing the SP
7084 later. */
7085 poly_int64 remaining_size_above_varea
7086 = frame->vmask != 0
7087 ? remaining_size - frame->v_sp_offset_top
7088 : remaining_size;
7089
7090 if (known_gt (remaining_size_above_varea, 2 * ZCMP_SP_INC_STEP))
7091 spimm = 3;
7092 else if (known_gt (remaining_size_above_varea, ZCMP_SP_INC_STEP))
7093 spimm = 2;
7094 else if (known_gt (remaining_size_above_varea, 0))
7095 spimm = 1;
7096 else
7097 spimm = 0;
7098 multi_push_additional = spimm * ZCMP_SP_INC_STEP;
7099 frame->multi_push_adj_addi = multi_push_additional;
7100 remaining_size -= multi_push_additional;
7101
7102 /* emit multi push insn & dwarf along with it. */
7103 stack_adj = frame->multi_push_adj_base + multi_push_additional;
7104 insn = emit_insn (riscv_gen_multi_push_pop_insn (
7105 PUSH_IDX, -stack_adj, riscv_multi_push_regs_count (frame->mask)));
7106 dwarf = riscv_adjust_multi_push_cfi_prologue (stack_adj);
7107 RTX_FRAME_RELATED_P (insn) = 1;
7108 REG_NOTES (insn) = dwarf;
7109
7110 /* Temporarily fib that we need not save GPRs. */
7111 frame->mask = 0;
7112
7113 /* push FPRs into the addtional reserved space by cm.push. */
7114 if (fmask)
7115 {
7116 unsigned mask_fprs_push
7117 = get_multi_push_fpr_mask (multi_push_additional / UNITS_PER_WORD);
7118 frame->fmask &= mask_fprs_push;
7119 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false,
7120 false);
7121 frame->fmask = fmask & ~mask_fprs_push; /* mask for the rest FPRs. */
7122 }
7123 }
7124 /* When optimizing for size, call a subroutine to save the registers. */
7125 else if (riscv_use_save_libcall (frame))
7126 {
7127 rtx dwarf = NULL_RTX;
7128 dwarf = riscv_adjust_libcall_cfi_prologue ();
7129
7130 remaining_size -= frame->save_libcall_adjustment;
7131 insn = emit_insn (riscv_gen_gpr_save_insn (frame));
7132 frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
7133
7134 RTX_FRAME_RELATED_P (insn) = 1;
7135 REG_NOTES (insn) = dwarf;
7136 }
7137
7138 /* Save the GP, FP registers. */
7139 if ((frame->mask | frame->fmask) != 0)
7140 {
7141 if (known_gt (remaining_size, frame->frame_pointer_offset))
7142 {
7143 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size);
7144 remaining_size -= step1;
7145 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7146 GEN_INT (-step1));
7147 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
7148 }
7149 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false);
7150 }
7151
7152 /* Undo the above fib. */
7153 frame->mask = mask;
7154 frame->fmask = fmask;
7155
7156 /* Set up the frame pointer, if we're using one. */
7157 if (frame_pointer_needed)
7158 {
7159 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
7160 GEN_INT ((frame->hard_frame_pointer_offset - remaining_size).to_constant ()));
7161 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
7162
7163 riscv_emit_stack_tie ();
7164 }
7165
7166 /* Save the V registers. */
7167 if (frame->vmask != 0)
7168 riscv_for_each_saved_v_reg (remaining_size, riscv_save_reg, true);
7169
7170 /* Allocate the rest of the frame. */
7171 if (known_gt (remaining_size, 0))
7172 {
7173 /* Two step adjustment:
7174 1.scalable frame. 2.constant frame. */
7175 poly_int64 scalable_frame (0, 0);
7176 if (!remaining_size.is_constant ())
7177 {
7178 /* First for scalable frame. */
7179 poly_int64 scalable_frame = remaining_size;
7180 scalable_frame.coeffs[0] = remaining_size.coeffs[1];
7181 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false);
7182 remaining_size -= scalable_frame;
7183 }
7184
7185 /* Second step for constant frame. */
7186 HOST_WIDE_INT constant_frame = remaining_size.to_constant ();
7187 if (constant_frame == 0)
7188 return;
7189
7190 if (SMALL_OPERAND (-constant_frame))
7191 {
7192 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7193 GEN_INT (-constant_frame));
7194 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
7195 }
7196 else
7197 {
7198 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-constant_frame));
7199 emit_insn (gen_add3_insn (stack_pointer_rtx,
7200 stack_pointer_rtx,
7201 RISCV_PROLOGUE_TEMP (Pmode)));
7202
7203 /* Describe the effect of the previous instructions. */
7204 insn = plus_constant (Pmode, stack_pointer_rtx, -constant_frame);
7205 insn = gen_rtx_SET (stack_pointer_rtx, insn);
7206 riscv_set_frame_expr (insn);
7207 }
7208 }
7209 }
7210
7211 static rtx
7212 riscv_adjust_multi_pop_cfi_epilogue (int saved_size)
7213 {
7214 rtx dwarf = NULL_RTX;
7215 rtx adjust_sp_rtx, reg;
7216 unsigned int mask = cfun->machine->frame.mask;
7217
7218 if (mask & S10_MASK)
7219 mask |= S11_MASK;
7220
7221 /* Debug info for adjust sp. */
7222 adjust_sp_rtx
7223 = gen_rtx_SET (stack_pointer_rtx,
7224 plus_constant (Pmode, stack_pointer_rtx, saved_size));
7225 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
7226
7227 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7228 if (BITSET_P (mask, regno - GP_REG_FIRST))
7229 {
7230 reg = gen_rtx_REG (Pmode, regno);
7231 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
7232 }
7233
7234 return dwarf;
7235 }
7236
7237 static rtx
7238 riscv_adjust_libcall_cfi_epilogue ()
7239 {
7240 rtx dwarf = NULL_RTX;
7241 rtx adjust_sp_rtx, reg;
7242 int saved_size = cfun->machine->frame.save_libcall_adjustment;
7243
7244 /* Debug info for adjust sp. */
7245 adjust_sp_rtx =
7246 gen_rtx_SET (stack_pointer_rtx,
7247 gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (saved_size)));
7248 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
7249 dwarf);
7250
7251 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7252 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7253 {
7254 reg = gen_rtx_REG (Pmode, regno);
7255 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
7256 }
7257
7258 return dwarf;
7259 }
7260
7261 /* return true if popretz pattern can be matched.
7262 set (reg 10 a0) (const_int 0)
7263 use (reg 10 a0)
7264 NOTE_INSN_EPILOGUE_BEG */
7265 static rtx_insn *
7266 riscv_zcmp_can_use_popretz (void)
7267 {
7268 rtx_insn *insn = NULL, *use = NULL, *clear = NULL;
7269
7270 /* sequence stack for NOTE_INSN_EPILOGUE_BEG*/
7271 struct sequence_stack *outer_seq = get_current_sequence ()->next;
7272 if (!outer_seq)
7273 return NULL;
7274 insn = outer_seq->first;
7275 if (!insn || !NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)
7276 return NULL;
7277
7278 /* sequence stack for the insn before NOTE_INSN_EPILOGUE_BEG*/
7279 outer_seq = outer_seq->next;
7280 if (outer_seq)
7281 insn = outer_seq->last;
7282
7283 /* skip notes */
7284 while (insn && NOTE_P (insn))
7285 {
7286 insn = PREV_INSN (insn);
7287 }
7288 use = insn;
7289
7290 /* match use (reg 10 a0) */
7291 if (use == NULL || !INSN_P (use) || GET_CODE (PATTERN (use)) != USE
7292 || !REG_P (XEXP (PATTERN (use), 0))
7293 || REGNO (XEXP (PATTERN (use), 0)) != A0_REGNUM)
7294 return NULL;
7295
7296 /* match set (reg 10 a0) (const_int 0 [0]) */
7297 clear = PREV_INSN (use);
7298 if (clear != NULL && INSN_P (clear) && GET_CODE (PATTERN (clear)) == SET
7299 && REG_P (SET_DEST (PATTERN (clear)))
7300 && REGNO (SET_DEST (PATTERN (clear))) == A0_REGNUM
7301 && SET_SRC (PATTERN (clear)) == const0_rtx)
7302 return clear;
7303
7304 return NULL;
7305 }
7306
7307 static void
7308 riscv_gen_multi_pop_insn (bool use_multi_pop_normal, unsigned mask,
7309 unsigned multipop_size)
7310 {
7311 rtx insn;
7312 unsigned regs_count = riscv_multi_push_regs_count (mask);
7313
7314 if (!use_multi_pop_normal)
7315 insn = emit_insn (
7316 riscv_gen_multi_push_pop_insn (POP_IDX, multipop_size, regs_count));
7317 else if (rtx_insn *clear_a0_insn = riscv_zcmp_can_use_popretz ())
7318 {
7319 delete_insn (NEXT_INSN (clear_a0_insn));
7320 delete_insn (clear_a0_insn);
7321 insn = emit_jump_insn (
7322 riscv_gen_multi_push_pop_insn (POPRETZ_IDX, multipop_size, regs_count));
7323 }
7324 else
7325 insn = emit_jump_insn (
7326 riscv_gen_multi_push_pop_insn (POPRET_IDX, multipop_size, regs_count));
7327
7328 rtx dwarf = riscv_adjust_multi_pop_cfi_epilogue (multipop_size);
7329 RTX_FRAME_RELATED_P (insn) = 1;
7330 REG_NOTES (insn) = dwarf;
7331 }
7332
7333 /* Expand an "epilogue", "sibcall_epilogue", or "eh_return_internal" pattern;
7334 style says which. */
7335
7336 void
7337 riscv_expand_epilogue (int style)
7338 {
7339 /* Split the frame into 3 steps. STEP1 is the amount of stack we should
7340 deallocate before restoring the registers. STEP2 is the amount we
7341 should deallocate afterwards including the callee saved regs. STEP3
7342 is the amount deallocated by save-restore libcall.
7343
7344 Start off by assuming that no registers need to be restored. */
7345 struct riscv_frame_info *frame = &cfun->machine->frame;
7346 unsigned mask = frame->mask;
7347 unsigned fmask = frame->fmask;
7348 unsigned mask_fprs_push = 0;
7349 poly_int64 step2 = 0;
7350 bool use_multi_pop_normal
7351 = ((style == NORMAL_RETURN) && riscv_use_multi_push (frame));
7352 bool use_multi_pop_sibcall
7353 = ((style == SIBCALL_RETURN) && riscv_use_multi_push (frame));
7354 bool use_multi_pop = use_multi_pop_normal || use_multi_pop_sibcall;
7355
7356 bool use_restore_libcall
7357 = !use_multi_pop
7358 && ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame));
7359 unsigned libcall_size = use_restore_libcall && !use_multi_pop
7360 ? frame->save_libcall_adjustment
7361 : 0;
7362 unsigned multipop_size
7363 = use_multi_pop ? frame->multi_push_adj_base + frame->multi_push_adj_addi
7364 : 0;
7365 rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7366 rtx insn;
7367
7368 /* We need to add memory barrier to prevent read from deallocated stack. */
7369 bool need_barrier_p = known_ne (get_frame_size ()
7370 + cfun->machine->frame.arg_pointer_offset, 0);
7371
7372 if (cfun->machine->naked_p)
7373 {
7374 gcc_assert (style == NORMAL_RETURN);
7375
7376 emit_jump_insn (gen_return ());
7377
7378 return;
7379 }
7380
7381 if ((style == NORMAL_RETURN) && riscv_can_use_return_insn ())
7382 {
7383 emit_jump_insn (gen_return ());
7384 return;
7385 }
7386
7387 /* Reset the epilogue cfa info before starting to emit the epilogue. */
7388 epilogue_cfa_sp_offset = 0;
7389
7390 /* Move past any dynamic stack allocations. */
7391 if (cfun->calls_alloca)
7392 {
7393 /* Emit a barrier to prevent loads from a deallocated stack. */
7394 riscv_emit_stack_tie ();
7395 need_barrier_p = false;
7396
7397 poly_int64 adjust_offset = -frame->hard_frame_pointer_offset;
7398 rtx adjust = NULL_RTX;
7399
7400 if (!adjust_offset.is_constant ())
7401 {
7402 rtx tmp1 = RISCV_PROLOGUE_TEMP (Pmode);
7403 rtx tmp2 = RISCV_PROLOGUE_TEMP2 (Pmode);
7404 riscv_legitimize_poly_move (Pmode, tmp1, tmp2,
7405 gen_int_mode (adjust_offset, Pmode));
7406 adjust = tmp1;
7407 }
7408 else
7409 {
7410 if (!SMALL_OPERAND (adjust_offset.to_constant ()))
7411 {
7412 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode),
7413 GEN_INT (adjust_offset.to_constant ()));
7414 adjust = RISCV_PROLOGUE_TEMP (Pmode);
7415 }
7416 else
7417 adjust = GEN_INT (adjust_offset.to_constant ());
7418 }
7419
7420 insn = emit_insn (
7421 gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx,
7422 adjust));
7423
7424 rtx dwarf = NULL_RTX;
7425 rtx cfa_adjust_value = gen_rtx_PLUS (
7426 Pmode, hard_frame_pointer_rtx,
7427 gen_int_mode (-frame->hard_frame_pointer_offset, Pmode));
7428 rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
7429 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
7430 RTX_FRAME_RELATED_P (insn) = 1;
7431
7432 REG_NOTES (insn) = dwarf;
7433 }
7434
7435 if (use_restore_libcall || use_multi_pop)
7436 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
7437
7438 /* If we need to restore registers, deallocate as much stack as
7439 possible in the second step without going out of range. */
7440 if (use_multi_pop)
7441 {
7442 if (frame->fmask
7443 && known_gt (frame->total_size - multipop_size,
7444 frame->frame_pointer_offset))
7445 step2
7446 = riscv_first_stack_step (frame, frame->total_size - multipop_size);
7447 }
7448 else if ((frame->mask | frame->fmask) != 0)
7449 step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
7450
7451 if (use_restore_libcall || use_multi_pop)
7452 frame->mask = mask; /* Undo the above fib. */
7453
7454 poly_int64 step1;
7455 /* STEP1 must be set to the bottom of vector registers save area if any
7456 vector registers need be preversed. */
7457 if (frame->vmask != 0)
7458 {
7459 step1 = frame->v_sp_offset_bottom;
7460 step2 = frame->total_size - step1 - libcall_size - multipop_size;
7461 }
7462 else
7463 step1 = frame->total_size - step2 - libcall_size - multipop_size;
7464
7465 /* Set TARGET to BASE + STEP1. */
7466 if (known_gt (step1, 0))
7467 {
7468 /* Emit a barrier to prevent loads from a deallocated stack. */
7469 riscv_emit_stack_tie ();
7470 need_barrier_p = false;
7471
7472 /* Restore the scalable frame which is assigned in prologue. */
7473 if (!step1.is_constant ())
7474 {
7475 poly_int64 scalable_frame = step1;
7476 scalable_frame.coeffs[0] = step1.coeffs[1];
7477 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame,
7478 true);
7479 step1 -= scalable_frame;
7480 }
7481
7482 /* Get an rtx for STEP1 that we can add to BASE.
7483 Skip if adjust equal to zero. */
7484 if (step1.to_constant () != 0)
7485 {
7486 rtx adjust = GEN_INT (step1.to_constant ());
7487 if (!SMALL_OPERAND (step1.to_constant ()))
7488 {
7489 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
7490 adjust = RISCV_PROLOGUE_TEMP (Pmode);
7491 }
7492
7493 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
7494 stack_pointer_rtx,
7495 adjust));
7496 rtx dwarf = NULL_RTX;
7497 rtx cfa_adjust_rtx
7498 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7499 gen_int_mode (step2 + libcall_size + multipop_size,
7500 Pmode));
7501
7502 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
7503 RTX_FRAME_RELATED_P (insn) = 1;
7504
7505 REG_NOTES (insn) = dwarf;
7506 }
7507 }
7508 else if (frame_pointer_needed)
7509 {
7510 /* Tell riscv_restore_reg to emit dwarf to redefine CFA when restoring
7511 old value of FP. */
7512 epilogue_cfa_sp_offset = step2;
7513 }
7514
7515 if (use_multi_pop)
7516 {
7517 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
7518 if (fmask)
7519 {
7520 mask_fprs_push = get_multi_push_fpr_mask (frame->multi_push_adj_addi
7521 / UNITS_PER_WORD);
7522 frame->fmask &= ~mask_fprs_push; /* FPRs not saved by cm.push */
7523 }
7524 }
7525 else if (use_restore_libcall)
7526 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
7527
7528 /* Restore the registers. */
7529 riscv_for_each_saved_v_reg (step2, riscv_restore_reg, false);
7530 riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size
7531 - multipop_size,
7532 riscv_restore_reg, true, style == EXCEPTION_RETURN);
7533
7534 if (use_restore_libcall)
7535 frame->mask = mask; /* Undo the above fib. */
7536
7537 if (need_barrier_p)
7538 riscv_emit_stack_tie ();
7539
7540 /* Deallocate the final bit of the frame. */
7541 if (step2.to_constant () > 0)
7542 {
7543 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7544 GEN_INT (step2.to_constant ())));
7545
7546 rtx dwarf = NULL_RTX;
7547 rtx cfa_adjust_rtx
7548 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7549 GEN_INT (libcall_size + multipop_size));
7550 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
7551 RTX_FRAME_RELATED_P (insn) = 1;
7552
7553 REG_NOTES (insn) = dwarf;
7554 }
7555
7556 if (use_multi_pop)
7557 {
7558 /* restore FPRs pushed by cm.push. */
7559 frame->fmask = fmask & mask_fprs_push;
7560 if (frame->fmask)
7561 riscv_for_each_saved_reg (frame->total_size - libcall_size
7562 - multipop_size,
7563 riscv_restore_reg, true,
7564 style == EXCEPTION_RETURN);
7565 /* Undo the above fib. */
7566 frame->mask = mask;
7567 frame->fmask = fmask;
7568 riscv_gen_multi_pop_insn (use_multi_pop_normal, frame->mask,
7569 multipop_size);
7570 if (use_multi_pop_normal)
7571 return;
7572 }
7573 else if (use_restore_libcall)
7574 {
7575 rtx dwarf = riscv_adjust_libcall_cfi_epilogue ();
7576 insn = emit_insn (gen_gpr_restore (GEN_INT (riscv_save_libcall_count (mask))));
7577 RTX_FRAME_RELATED_P (insn) = 1;
7578 REG_NOTES (insn) = dwarf;
7579
7580 emit_jump_insn (gen_gpr_restore_return (ra));
7581 return;
7582 }
7583
7584 /* Add in the __builtin_eh_return stack adjustment. */
7585 if ((style == EXCEPTION_RETURN) && crtl->calls_eh_return)
7586 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
7587 EH_RETURN_STACKADJ_RTX));
7588
7589 /* Return from interrupt. */
7590 if (cfun->machine->interrupt_handler_p)
7591 {
7592 enum riscv_privilege_levels mode = cfun->machine->interrupt_mode;
7593
7594 gcc_assert (mode != UNKNOWN_MODE);
7595
7596 if (mode == MACHINE_MODE)
7597 emit_jump_insn (gen_riscv_mret ());
7598 else if (mode == SUPERVISOR_MODE)
7599 emit_jump_insn (gen_riscv_sret ());
7600 else
7601 emit_jump_insn (gen_riscv_uret ());
7602 }
7603 else if (style != SIBCALL_RETURN)
7604 emit_jump_insn (gen_simple_return_internal (ra));
7605 }
7606
7607 /* Implement EPILOGUE_USES. */
7608
7609 bool
7610 riscv_epilogue_uses (unsigned int regno)
7611 {
7612 if (regno == RETURN_ADDR_REGNUM)
7613 return true;
7614
7615 if (epilogue_completed && cfun->machine->interrupt_handler_p)
7616 {
7617 /* An interrupt function restores temp regs, so we must indicate that
7618 they are live at function end. */
7619 if (df_regs_ever_live_p (regno)
7620 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
7621 return true;
7622 }
7623
7624 return false;
7625 }
7626
7627 static bool
7628 riscv_avoid_shrink_wrapping_separate ()
7629 {
7630 if (riscv_use_save_libcall (&cfun->machine->frame)
7631 || cfun->machine->interrupt_handler_p
7632 || !cfun->machine->frame.gp_sp_offset.is_constant ())
7633 return true;
7634
7635 return false;
7636 }
7637
7638 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
7639
7640 static sbitmap
7641 riscv_get_separate_components (void)
7642 {
7643 HOST_WIDE_INT offset;
7644 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
7645 bitmap_clear (components);
7646
7647 if (riscv_avoid_shrink_wrapping_separate ())
7648 return components;
7649
7650 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
7651 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7652 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7653 {
7654 /* We can only wrap registers that have small operand offsets.
7655 For large offsets a pseudo register might be needed which
7656 cannot be created during the shrink wrapping pass. */
7657 if (SMALL_OPERAND (offset))
7658 bitmap_set_bit (components, regno);
7659
7660 offset -= UNITS_PER_WORD;
7661 }
7662
7663 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
7664 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7665 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
7666 {
7667 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
7668
7669 /* We can only wrap registers that have small operand offsets.
7670 For large offsets a pseudo register might be needed which
7671 cannot be created during the shrink wrapping pass. */
7672 if (SMALL_OPERAND (offset))
7673 bitmap_set_bit (components, regno);
7674
7675 offset -= GET_MODE_SIZE (mode).to_constant ();
7676 }
7677
7678 /* Don't mess with the hard frame pointer. */
7679 if (frame_pointer_needed)
7680 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
7681
7682 bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
7683
7684 return components;
7685 }
7686
7687 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
7688
7689 static sbitmap
7690 riscv_components_for_bb (basic_block bb)
7691 {
7692 bitmap in = DF_LIVE_IN (bb);
7693 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
7694 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
7695
7696 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
7697 bitmap_clear (components);
7698
7699 function_abi_aggregator callee_abis;
7700 rtx_insn *insn;
7701 FOR_BB_INSNS (bb, insn)
7702 if (CALL_P (insn))
7703 callee_abis.note_callee_abi (insn_callee_abi (insn));
7704 HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
7705
7706 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
7707 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7708 if (!fixed_regs[regno]
7709 && !crtl->abi->clobbers_full_reg_p (regno)
7710 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
7711 || bitmap_bit_p (in, regno)
7712 || bitmap_bit_p (gen, regno)
7713 || bitmap_bit_p (kill, regno)))
7714 bitmap_set_bit (components, regno);
7715
7716 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7717 if (!fixed_regs[regno]
7718 && !crtl->abi->clobbers_full_reg_p (regno)
7719 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
7720 || bitmap_bit_p (in, regno)
7721 || bitmap_bit_p (gen, regno)
7722 || bitmap_bit_p (kill, regno)))
7723 bitmap_set_bit (components, regno);
7724
7725 return components;
7726 }
7727
7728 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
7729
7730 static void
7731 riscv_disqualify_components (sbitmap, edge, sbitmap, bool)
7732 {
7733 /* Nothing to do for riscv. */
7734 }
7735
7736 static void
7737 riscv_process_components (sbitmap components, bool prologue_p)
7738 {
7739 HOST_WIDE_INT offset;
7740 riscv_save_restore_fn fn = prologue_p? riscv_save_reg : riscv_restore_reg;
7741
7742 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
7743 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7744 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7745 {
7746 if (bitmap_bit_p (components, regno))
7747 riscv_save_restore_reg (word_mode, regno, offset, fn);
7748
7749 offset -= UNITS_PER_WORD;
7750 }
7751
7752 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
7753 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7754 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
7755 {
7756 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
7757
7758 if (bitmap_bit_p (components, regno))
7759 riscv_save_restore_reg (mode, regno, offset, fn);
7760
7761 offset -= GET_MODE_SIZE (mode).to_constant ();
7762 }
7763 }
7764
7765 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
7766
7767 static void
7768 riscv_emit_prologue_components (sbitmap components)
7769 {
7770 riscv_process_components (components, true);
7771 }
7772
7773 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
7774
7775 static void
7776 riscv_emit_epilogue_components (sbitmap components)
7777 {
7778 riscv_process_components (components, false);
7779 }
7780
7781 static void
7782 riscv_set_handled_components (sbitmap components)
7783 {
7784 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7785 if (bitmap_bit_p (components, regno))
7786 cfun->machine->reg_is_wrapped_separately[regno] = true;
7787
7788 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7789 if (bitmap_bit_p (components, regno))
7790 cfun->machine->reg_is_wrapped_separately[regno] = true;
7791 }
7792
7793 /* Return nonzero if this function is known to have a null epilogue.
7794 This allows the optimizer to omit jumps to jumps if no stack
7795 was created. */
7796
7797 bool
7798 riscv_can_use_return_insn (void)
7799 {
7800 return (reload_completed && known_eq (cfun->machine->frame.total_size, 0)
7801 && ! cfun->machine->interrupt_handler_p);
7802 }
7803
7804 /* Given that there exists at least one variable that is set (produced)
7805 by OUT_INSN and read (consumed) by IN_INSN, return true iff
7806 IN_INSN represents one or more memory store operations and none of
7807 the variables set by OUT_INSN is used by IN_INSN as the address of a
7808 store operation. If either IN_INSN or OUT_INSN does not represent
7809 a "single" RTL SET expression (as loosely defined by the
7810 implementation of the single_set function) or a PARALLEL with only
7811 SETs, CLOBBERs, and USEs inside, this function returns false.
7812
7813 Borrowed from rs6000, riscv_store_data_bypass_p checks for certain
7814 conditions that result in assertion failures in the generic
7815 store_data_bypass_p function and returns FALSE in such cases.
7816
7817 This is required to make -msave-restore work with the sifive-7
7818 pipeline description. */
7819
7820 bool
7821 riscv_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
7822 {
7823 rtx out_set, in_set;
7824 rtx out_pat, in_pat;
7825 rtx out_exp, in_exp;
7826 int i, j;
7827
7828 in_set = single_set (in_insn);
7829 if (in_set)
7830 {
7831 if (MEM_P (SET_DEST (in_set)))
7832 {
7833 out_set = single_set (out_insn);
7834 if (!out_set)
7835 {
7836 out_pat = PATTERN (out_insn);
7837 if (GET_CODE (out_pat) == PARALLEL)
7838 {
7839 for (i = 0; i < XVECLEN (out_pat, 0); i++)
7840 {
7841 out_exp = XVECEXP (out_pat, 0, i);
7842 if ((GET_CODE (out_exp) == CLOBBER)
7843 || (GET_CODE (out_exp) == USE))
7844 continue;
7845 else if (GET_CODE (out_exp) != SET)
7846 return false;
7847 }
7848 }
7849 }
7850 }
7851 }
7852 else
7853 {
7854 in_pat = PATTERN (in_insn);
7855 if (GET_CODE (in_pat) != PARALLEL)
7856 return false;
7857
7858 for (i = 0; i < XVECLEN (in_pat, 0); i++)
7859 {
7860 in_exp = XVECEXP (in_pat, 0, i);
7861 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
7862 continue;
7863 else if (GET_CODE (in_exp) != SET)
7864 return false;
7865
7866 if (MEM_P (SET_DEST (in_exp)))
7867 {
7868 out_set = single_set (out_insn);
7869 if (!out_set)
7870 {
7871 out_pat = PATTERN (out_insn);
7872 if (GET_CODE (out_pat) != PARALLEL)
7873 return false;
7874 for (j = 0; j < XVECLEN (out_pat, 0); j++)
7875 {
7876 out_exp = XVECEXP (out_pat, 0, j);
7877 if ((GET_CODE (out_exp) == CLOBBER)
7878 || (GET_CODE (out_exp) == USE))
7879 continue;
7880 else if (GET_CODE (out_exp) != SET)
7881 return false;
7882 }
7883 }
7884 }
7885 }
7886 }
7887
7888 return store_data_bypass_p (out_insn, in_insn);
7889 }
7890
7891 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
7892
7893 When floating-point registers are wider than integer ones, moves between
7894 them must go through memory. */
7895
7896 static bool
7897 riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1,
7898 reg_class_t class2)
7899 {
7900 return (!riscv_v_ext_mode_p (mode)
7901 && GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
7902 && (class1 == FP_REGS) != (class2 == FP_REGS)
7903 && !TARGET_XTHEADFMV
7904 && !TARGET_ZFA);
7905 }
7906
7907 /* Implement TARGET_REGISTER_MOVE_COST. */
7908
7909 static int
7910 riscv_register_move_cost (machine_mode mode,
7911 reg_class_t from, reg_class_t to)
7912 {
7913 if ((from == FP_REGS && to == GR_REGS) ||
7914 (from == GR_REGS && to == FP_REGS))
7915 return tune_param->fmv_cost;
7916
7917 return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
7918 }
7919
7920 /* Implement TARGET_HARD_REGNO_NREGS. */
7921
7922 static unsigned int
7923 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode)
7924 {
7925 if (riscv_v_ext_vector_mode_p (mode))
7926 {
7927 /* Handle fractional LMUL, it only occupy part of vector register but
7928 still need one vector register to hold. */
7929 if (maybe_lt (GET_MODE_SIZE (mode), UNITS_PER_V_REG))
7930 return 1;
7931
7932 return exact_div (GET_MODE_SIZE (mode), UNITS_PER_V_REG).to_constant ();
7933 }
7934
7935 /* For tuple modes, the number of register = NF * LMUL. */
7936 if (riscv_v_ext_tuple_mode_p (mode))
7937 {
7938 unsigned int nf = riscv_vector::get_nf (mode);
7939 machine_mode subpart_mode = riscv_vector::get_subpart_mode (mode);
7940 poly_int64 size = GET_MODE_SIZE (subpart_mode);
7941 gcc_assert (known_eq (size * nf, GET_MODE_SIZE (mode)));
7942 if (maybe_lt (size, UNITS_PER_V_REG))
7943 return nf;
7944 else
7945 {
7946 unsigned int lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
7947 return nf * lmul;
7948 }
7949 }
7950
7951 /* For VLS modes, we allocate registers according to TARGET_MIN_VLEN. */
7952 if (riscv_v_ext_vls_mode_p (mode))
7953 {
7954 int size = GET_MODE_BITSIZE (mode).to_constant ();
7955 if (size < TARGET_MIN_VLEN)
7956 return 1;
7957 else
7958 return size / TARGET_MIN_VLEN;
7959 }
7960
7961 /* mode for VL or VTYPE are just a marker, not holding value,
7962 so it always consume one register. */
7963 if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
7964 || FRM_REG_P (regno))
7965 return 1;
7966
7967 /* Assume every valid non-vector mode fits in one vector register. */
7968 if (V_REG_P (regno))
7969 return 1;
7970
7971 if (FP_REG_P (regno))
7972 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG;
7973
7974 /* All other registers are word-sized. */
7975 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7976 }
7977
7978 /* Implement TARGET_HARD_REGNO_MODE_OK. */
7979
7980 static bool
7981 riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
7982 {
7983 unsigned int nregs = riscv_hard_regno_nregs (regno, mode);
7984
7985 if (GP_REG_P (regno))
7986 {
7987 if (riscv_v_ext_mode_p (mode))
7988 return false;
7989
7990 if (!GP_REG_P (regno + nregs - 1))
7991 return false;
7992 }
7993 else if (FP_REG_P (regno))
7994 {
7995 if (riscv_v_ext_mode_p (mode))
7996 return false;
7997
7998 if (!FP_REG_P (regno + nregs - 1))
7999 return false;
8000
8001 if (GET_MODE_CLASS (mode) != MODE_FLOAT
8002 && GET_MODE_CLASS (mode) != MODE_COMPLEX_FLOAT)
8003 return false;
8004
8005 /* Only use callee-saved registers if a potential callee is guaranteed
8006 to spill the requisite width. */
8007 if (GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_REG
8008 || (!call_used_or_fixed_reg_p (regno)
8009 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_ARG))
8010 return false;
8011 }
8012 else if (V_REG_P (regno))
8013 {
8014 if (!riscv_v_ext_mode_p (mode))
8015 return false;
8016
8017 if (!V_REG_P (regno + nregs - 1))
8018 return false;
8019
8020 int regno_alignment = riscv_get_v_regno_alignment (mode);
8021 if (regno_alignment != 1)
8022 return ((regno % regno_alignment) == 0);
8023 }
8024 else if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
8025 || FRM_REG_P (regno))
8026 return true;
8027 else
8028 return false;
8029
8030 /* Require same callee-savedness for all registers. */
8031 for (unsigned i = 1; i < nregs; i++)
8032 if (call_used_or_fixed_reg_p (regno)
8033 != call_used_or_fixed_reg_p (regno + i))
8034 return false;
8035
8036 /* Only use even registers in RV32 ZDINX */
8037 if (!TARGET_64BIT && TARGET_ZDINX){
8038 if (GET_MODE_CLASS (mode) == MODE_FLOAT &&
8039 GET_MODE_UNIT_SIZE (mode) == GET_MODE_SIZE (DFmode))
8040 return !(regno & 1);
8041 }
8042
8043 return true;
8044 }
8045
8046 /* Implement TARGET_MODES_TIEABLE_P.
8047
8048 Don't allow floating-point modes to be tied, since type punning of
8049 single-precision and double-precision is implementation defined. */
8050
8051 static bool
8052 riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
8053 {
8054 /* We don't allow different REG_CLASS modes tieable since it
8055 will cause ICE in register allocation (RA).
8056 E.g. V2SI and DI are not tieable. */
8057 if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
8058 return false;
8059 return (mode1 == mode2
8060 || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
8061 && GET_MODE_CLASS (mode2) == MODE_FLOAT));
8062 }
8063
8064 /* Implement CLASS_MAX_NREGS. */
8065
8066 static unsigned char
8067 riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
8068 {
8069 if (reg_class_subset_p (rclass, FP_REGS))
8070 return riscv_hard_regno_nregs (FP_REG_FIRST, mode);
8071
8072 if (reg_class_subset_p (rclass, GR_REGS))
8073 return riscv_hard_regno_nregs (GP_REG_FIRST, mode);
8074
8075 if (reg_class_subset_p (rclass, V_REGS))
8076 return riscv_hard_regno_nregs (V_REG_FIRST, mode);
8077
8078 return 0;
8079 }
8080
8081 /* Implement TARGET_MEMORY_MOVE_COST. */
8082
8083 static int
8084 riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
8085 {
8086 return (tune_param->memory_cost
8087 + memory_move_secondary_cost (mode, rclass, in));
8088 }
8089
8090 /* Return the number of instructions that can be issued per cycle. */
8091
8092 static int
8093 riscv_issue_rate (void)
8094 {
8095 return tune_param->issue_rate;
8096 }
8097
8098 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
8099 static int
8100 riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
8101 {
8102 if (DEBUG_INSN_P (insn))
8103 return more;
8104
8105 rtx_code code = GET_CODE (PATTERN (insn));
8106 if (code == USE || code == CLOBBER)
8107 return more;
8108
8109 /* GHOST insns are used for blockage and similar cases which
8110 effectively end a cycle. */
8111 if (get_attr_type (insn) == TYPE_GHOST)
8112 return 0;
8113
8114 /* If we ever encounter an insn with an unknown type, trip
8115 an assert so we can find and fix this problem. */
8116 gcc_assert (get_attr_type (insn) != TYPE_UNKNOWN);
8117
8118 /* If we ever encounter an insn without an insn reservation, trip
8119 an assert so we can find and fix this problem. */
8120 #if 0
8121 gcc_assert (insn_has_dfa_reservation_p (insn));
8122 #endif
8123
8124 return more - 1;
8125 }
8126
8127 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
8128 instruction fusion of some sort. */
8129
8130 static bool
8131 riscv_macro_fusion_p (void)
8132 {
8133 return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
8134 }
8135
8136 /* Return true iff the instruction fusion described by OP is enabled. */
8137
8138 static bool
8139 riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
8140 {
8141 return tune_param->fusible_ops & op;
8142 }
8143
8144 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
8145 should be kept together during scheduling. */
8146
8147 static bool
8148 riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
8149 {
8150 rtx prev_set = single_set (prev);
8151 rtx curr_set = single_set (curr);
8152 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
8153 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
8154
8155 if (!riscv_macro_fusion_p ())
8156 return false;
8157
8158 if (simple_sets_p
8159 && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW)
8160 || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS)))
8161 {
8162 /* We are trying to match the following:
8163 prev (slli) == (set (reg:DI rD)
8164 (ashift:DI (reg:DI rS) (const_int 32)))
8165 curr (slri) == (set (reg:DI rD)
8166 (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
8167 with <shift> being either 32 for FUSE_ZEXTW, or
8168 `less than 32 for FUSE_ZEXTWS. */
8169
8170 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
8171 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
8172 && REG_P (SET_DEST (prev_set))
8173 && REG_P (SET_DEST (curr_set))
8174 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
8175 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
8176 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
8177 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
8178 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
8179 && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
8180 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
8181 || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
8182 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
8183 return true;
8184 }
8185
8186 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
8187 {
8188 /* We are trying to match the following:
8189 prev (slli) == (set (reg:DI rD)
8190 (ashift:DI (reg:DI rS) (const_int 48)))
8191 curr (slri) == (set (reg:DI rD)
8192 (lshiftrt:DI (reg:DI rD) (const_int 48))) */
8193
8194 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
8195 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
8196 && REG_P (SET_DEST (prev_set))
8197 && REG_P (SET_DEST (curr_set))
8198 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
8199 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
8200 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
8201 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
8202 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
8203 && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
8204 return true;
8205 }
8206
8207 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
8208 {
8209 /* We are trying to match the following:
8210 prev (add) == (set (reg:DI rD)
8211 (plus:DI (reg:DI rS1) (reg:DI rS2))
8212 curr (ld) == (set (reg:DI rD)
8213 (mem:DI (reg:DI rD))) */
8214
8215 if (MEM_P (SET_SRC (curr_set))
8216 && REG_P (XEXP (SET_SRC (curr_set), 0))
8217 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
8218 && GET_CODE (SET_SRC (prev_set)) == PLUS
8219 && REG_P (XEXP (SET_SRC (prev_set), 0))
8220 && REG_P (XEXP (SET_SRC (prev_set), 1)))
8221 return true;
8222
8223 /* We are trying to match the following:
8224 prev (add) == (set (reg:DI rD)
8225 (plus:DI (reg:DI rS1) (reg:DI rS2)))
8226 curr (lw) == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
8227
8228 if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
8229 || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
8230 && MEM_P (XEXP (SET_SRC (curr_set), 0))
8231 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
8232 && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
8233 && GET_CODE (SET_SRC (prev_set)) == PLUS
8234 && REG_P (XEXP (SET_SRC (prev_set), 0))
8235 && REG_P (XEXP (SET_SRC (prev_set), 1)))
8236 return true;
8237 }
8238
8239 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT))
8240 {
8241 /* We are trying to match the following:
8242 prev (add) == (set (reg:DI rS)
8243 (plus:DI (reg:DI rS) (const_int))
8244 curr (ld) == (set (reg:DI rD)
8245 (mem:DI (reg:DI rS))) */
8246
8247 if (MEM_P (SET_SRC (curr_set))
8248 && REG_P (XEXP (SET_SRC (curr_set), 0))
8249 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
8250 && GET_CODE (SET_SRC (prev_set)) == PLUS
8251 && REG_P (XEXP (SET_SRC (prev_set), 0))
8252 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)))
8253 return true;
8254 }
8255
8256 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
8257 {
8258 /* We are trying to match the following:
8259 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
8260 curr (addi) == (set (reg:DI rD)
8261 (plus:DI (reg:DI rD) (const_int IMM12))) */
8262
8263 if ((GET_CODE (SET_SRC (curr_set)) == LO_SUM
8264 || (GET_CODE (SET_SRC (curr_set)) == PLUS
8265 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
8266 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))))
8267 && (GET_CODE (SET_SRC (prev_set)) == HIGH
8268 || (CONST_INT_P (SET_SRC (prev_set))
8269 && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))))
8270 return true;
8271 }
8272
8273 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
8274 {
8275 /* We are trying to match the following:
8276 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
8277 curr (addi) == (set (reg:DI rD)
8278 (plus:DI (reg:DI rD) (const_int IMM12)))
8279 and
8280 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
8281 curr (addi) == (set (reg:DI rD)
8282 (lo_sum:DI (reg:DI rD) (const_int IMM12))) */
8283
8284 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
8285 && XINT (prev_set, 1) == UNSPEC_AUIPC
8286 && (GET_CODE (SET_SRC (curr_set)) == LO_SUM
8287 || (GET_CODE (SET_SRC (curr_set)) == PLUS
8288 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))))
8289
8290 return true;
8291 }
8292
8293 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
8294 {
8295 /* We are trying to match the following:
8296 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
8297 curr (ld) == (set (reg:DI rD)
8298 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
8299
8300 if (CONST_INT_P (SET_SRC (prev_set))
8301 && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
8302 && MEM_P (SET_SRC (curr_set))
8303 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
8304 return true;
8305
8306 if (GET_CODE (SET_SRC (prev_set)) == HIGH
8307 && MEM_P (SET_SRC (curr_set))
8308 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == LO_SUM
8309 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
8310 return true;
8311
8312 if (GET_CODE (SET_SRC (prev_set)) == HIGH
8313 && (GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
8314 || GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)
8315 && MEM_P (XEXP (SET_SRC (curr_set), 0))
8316 && (GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == LO_SUM
8317 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0))))
8318 return true;
8319 }
8320
8321 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
8322 {
8323 /* We are trying to match the following:
8324 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
8325 curr (ld) == (set (reg:DI rD)
8326 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
8327
8328 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
8329 && XINT (prev_set, 1) == UNSPEC_AUIPC
8330 && MEM_P (SET_SRC (curr_set))
8331 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
8332 return true;
8333 }
8334
8335 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD))
8336 {
8337 /* We are trying to match the following:
8338 prev (sd) == (set (mem (plus (reg sp|fp) (const_int)))
8339 (reg rS1))
8340 curr (sd) == (set (mem (plus (reg sp|fp) (const_int)))
8341 (reg rS2)) */
8342
8343 if (MEM_P (SET_DEST (prev_set))
8344 && MEM_P (SET_DEST (curr_set))
8345 /* We can probably relax this condition. The documentation is a bit
8346 unclear about sub-word cases. So we just model DImode for now. */
8347 && GET_MODE (SET_DEST (curr_set)) == DImode
8348 && GET_MODE (SET_DEST (prev_set)) == DImode)
8349 {
8350 rtx base_prev, base_curr, offset_prev, offset_curr;
8351
8352 extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, &offset_prev);
8353 extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, &offset_curr);
8354
8355 /* The two stores must be contained within opposite halves of the same
8356 16 byte aligned block of memory. We know that the stack pointer and
8357 the frame pointer have suitable alignment. So we just need to check
8358 the offsets of the two stores for suitable alignment.
8359
8360 Originally the thought was to check MEM_ALIGN, but that was reporting
8361 incorrect alignments, even for SP/FP accesses, so we gave up on that
8362 approach. */
8363 if (base_prev != NULL_RTX
8364 && base_curr != NULL_RTX
8365 && REG_P (base_prev)
8366 && REG_P (base_curr)
8367 && REGNO (base_prev) == REGNO (base_curr)
8368 && (REGNO (base_prev) == STACK_POINTER_REGNUM
8369 || REGNO (base_prev) == HARD_FRAME_POINTER_REGNUM)
8370 && ((INTVAL (offset_prev) == INTVAL (offset_curr) + 8
8371 && (INTVAL (offset_prev) % 16) == 0)
8372 || ((INTVAL (offset_curr) == INTVAL (offset_prev) + 8)
8373 && (INTVAL (offset_curr) % 16) == 0)))
8374 return true;
8375 }
8376 }
8377
8378 return false;
8379 }
8380
8381 /* Adjust the cost/latency of instructions for scheduling.
8382 For now this is just used to change the latency of vector instructions
8383 according to their LMUL. We assume that an insn with LMUL == 8 requires
8384 eight times more execution cycles than the same insn with LMUL == 1.
8385 As this may cause very high latencies which lead to scheduling artifacts
8386 we currently only perform the adjustment when -madjust-lmul-cost is given.
8387 */
8388 static int
8389 riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
8390 unsigned int)
8391 {
8392 /* Only do adjustments for the generic out-of-order scheduling model. */
8393 if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
8394 return cost;
8395
8396 if (recog_memoized (insn) < 0)
8397 return cost;
8398
8399 enum attr_type type = get_attr_type (insn);
8400
8401 if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
8402 {
8403 /* TODO: For ordered reductions scale the base cost relative to the
8404 number of units. */
8405 ;
8406 }
8407
8408 /* Don't do any LMUL-based latency adjustment unless explicitly asked to. */
8409 if (!TARGET_ADJUST_LMUL_COST)
8410 return cost;
8411
8412 /* vsetvl has a vlmul attribute but its latency does not depend on it. */
8413 if (type == TYPE_VSETVL || type == TYPE_VSETVL_PRE)
8414 return cost;
8415
8416 enum riscv_vector::vlmul_type lmul =
8417 (riscv_vector::vlmul_type)get_attr_vlmul (insn);
8418
8419 double factor = 1;
8420 switch (lmul)
8421 {
8422 case riscv_vector::LMUL_2:
8423 factor = 2;
8424 break;
8425 case riscv_vector::LMUL_4:
8426 factor = 4;
8427 break;
8428 case riscv_vector::LMUL_8:
8429 factor = 8;
8430 break;
8431 case riscv_vector::LMUL_F2:
8432 factor = 0.5;
8433 break;
8434 case riscv_vector::LMUL_F4:
8435 factor = 0.25;
8436 break;
8437 case riscv_vector::LMUL_F8:
8438 factor = 0.125;
8439 break;
8440 default:
8441 factor = 1;
8442 }
8443
8444 /* If the latency was nonzero, keep it that way. */
8445 int new_cost = MAX (cost > 0 ? 1 : 0, cost * factor);
8446
8447 return new_cost;
8448 }
8449
8450 /* Auxiliary function to emit RISC-V ELF attribute. */
8451 static void
8452 riscv_emit_attribute ()
8453 {
8454 fprintf (asm_out_file, "\t.attribute arch, \"%s\"\n",
8455 riscv_arch_str ().c_str ());
8456
8457 fprintf (asm_out_file, "\t.attribute unaligned_access, %d\n",
8458 TARGET_STRICT_ALIGN ? 0 : 1);
8459
8460 fprintf (asm_out_file, "\t.attribute stack_align, %d\n",
8461 riscv_stack_boundary / 8);
8462 }
8463
8464 /* Output .variant_cc for function symbol which follows vector calling
8465 convention. */
8466
8467 static void
8468 riscv_asm_output_variant_cc (FILE *stream, const tree decl, const char *name)
8469 {
8470 if (TREE_CODE (decl) == FUNCTION_DECL)
8471 {
8472 riscv_cc cc = (riscv_cc) fndecl_abi (decl).id ();
8473 if (cc == RISCV_CC_V)
8474 {
8475 fprintf (stream, "\t.variant_cc\t");
8476 assemble_name (stream, name);
8477 fprintf (stream, "\n");
8478 }
8479 }
8480 }
8481
8482 /* Implement ASM_DECLARE_FUNCTION_NAME. */
8483
8484 void
8485 riscv_declare_function_name (FILE *stream, const char *name, tree fndecl)
8486 {
8487 riscv_asm_output_variant_cc (stream, fndecl, name);
8488 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
8489 ASM_OUTPUT_LABEL (stream, name);
8490 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
8491 {
8492 fprintf (stream, "\t.option push\n");
8493 std::string isa = riscv_current_subset_list ()->to_string (true);
8494 fprintf (stream, "\t.option arch, %s\n", isa.c_str ());
8495
8496 struct cl_target_option *local_cl_target =
8497 TREE_TARGET_OPTION (DECL_FUNCTION_SPECIFIC_TARGET (fndecl));
8498 struct cl_target_option *global_cl_target =
8499 TREE_TARGET_OPTION (target_option_default_node);
8500 const char *local_tune_str = get_tune_str (local_cl_target);
8501 const char *global_tune_str = get_tune_str (global_cl_target);
8502 if (strcmp (local_tune_str, global_tune_str) != 0)
8503 fprintf (stream, "\t# tune = %s\n", local_tune_str);
8504 }
8505 }
8506
8507 void
8508 riscv_declare_function_size (FILE *stream, const char *name, tree fndecl)
8509 {
8510 if (!flag_inhibit_size_directive)
8511 ASM_OUTPUT_MEASURED_SIZE (stream, name);
8512
8513 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
8514 {
8515 fprintf (stream, "\t.option pop\n");
8516 }
8517 }
8518
8519 /* Implement ASM_OUTPUT_DEF_FROM_DECLS. */
8520
8521 void
8522 riscv_asm_output_alias (FILE *stream, const tree decl, const tree target)
8523 {
8524 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8525 const char *value = IDENTIFIER_POINTER (target);
8526 riscv_asm_output_variant_cc (stream, decl, name);
8527 ASM_OUTPUT_DEF (stream, name, value);
8528 }
8529
8530 /* Implement ASM_OUTPUT_EXTERNAL. */
8531
8532 void
8533 riscv_asm_output_external (FILE *stream, tree decl, const char *name)
8534 {
8535 default_elf_asm_output_external (stream, decl, name);
8536 riscv_asm_output_variant_cc (stream, decl, name);
8537 }
8538
8539 /* Implement TARGET_ASM_FILE_START. */
8540
8541 static void
8542 riscv_file_start (void)
8543 {
8544 default_file_start ();
8545
8546 /* Instruct GAS to generate position-[in]dependent code. */
8547 fprintf (asm_out_file, "\t.option %spic\n", (flag_pic ? "" : "no"));
8548
8549 /* If the user specifies "-mno-relax" on the command line then disable linker
8550 relaxation in the assembler. */
8551 if (! riscv_mrelax)
8552 fprintf (asm_out_file, "\t.option norelax\n");
8553
8554 /* If the user specifies "-mcsr-check" on the command line then enable csr
8555 check in the assembler. */
8556 if (riscv_mcsr_check)
8557 fprintf (asm_out_file, "\t.option csr-check\n");
8558
8559 if (riscv_emit_attribute_p)
8560 riscv_emit_attribute ();
8561 }
8562
8563 /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text
8564 in order to avoid duplicating too much logic from elsewhere. */
8565
8566 static void
8567 riscv_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8568 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8569 tree function)
8570 {
8571 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8572 rtx this_rtx, temp1, temp2, fnaddr;
8573 rtx_insn *insn;
8574
8575 riscv_in_thunk_func = true;
8576
8577 /* Pretend to be a post-reload pass while generating rtl. */
8578 reload_completed = 1;
8579
8580 /* Mark the end of the (empty) prologue. */
8581 emit_note (NOTE_INSN_PROLOGUE_END);
8582
8583 /* Determine if we can use a sibcall to call FUNCTION directly. */
8584 fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0));
8585
8586 /* We need two temporary registers in some cases. */
8587 temp1 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP_REGNUM);
8588 temp2 = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
8589
8590 /* Find out which register contains the "this" pointer. */
8591 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8592 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
8593 else
8594 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
8595
8596 /* Add DELTA to THIS_RTX. */
8597 if (delta != 0)
8598 {
8599 rtx offset = GEN_INT (delta);
8600 if (!SMALL_OPERAND (delta))
8601 {
8602 riscv_emit_move (temp1, offset);
8603 offset = temp1;
8604 }
8605 emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
8606 }
8607
8608 /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
8609 if (vcall_offset != 0)
8610 {
8611 rtx addr;
8612
8613 /* Set TEMP1 to *THIS_RTX. */
8614 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
8615
8616 /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */
8617 addr = riscv_add_offset (temp2, temp1, vcall_offset);
8618
8619 /* Load the offset and add it to THIS_RTX. */
8620 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
8621 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
8622 }
8623
8624 /* Jump to the target function. */
8625 rtx callee_cc = gen_int_mode (fndecl_abi (function).id (), SImode);
8626 insn = emit_call_insn (gen_sibcall (fnaddr, const0_rtx, callee_cc));
8627 SIBLING_CALL_P (insn) = 1;
8628
8629 /* Run just enough of rest_of_compilation. This sequence was
8630 "borrowed" from alpha.cc. */
8631 insn = get_insns ();
8632 split_all_insns_noflow ();
8633 shorten_branches (insn);
8634 assemble_start_function (thunk_fndecl, fnname);
8635 final_start_function (insn, file, 1);
8636 final (insn, file, 1);
8637 final_end_function ();
8638 assemble_end_function (thunk_fndecl, fnname);
8639
8640 /* Clean up the vars set above. Note that final_end_function resets
8641 the global pointer for us. */
8642 reload_completed = 0;
8643 riscv_in_thunk_func = false;
8644 }
8645
8646 /* Allocate a chunk of memory for per-function machine-dependent data. */
8647
8648 static struct machine_function *
8649 riscv_init_machine_status (void)
8650 {
8651 return ggc_cleared_alloc<machine_function> ();
8652 }
8653
8654 /* Return the VLEN value associated with -march.
8655 TODO: So far we only support length-agnostic value. */
8656 static poly_uint16
8657 riscv_convert_vector_bits (struct gcc_options *opts)
8658 {
8659 int chunk_num;
8660 int min_vlen = TARGET_MIN_VLEN_OPTS (opts);
8661 if (min_vlen > 32)
8662 {
8663 /* When targetting minimum VLEN > 32, we should use 64-bit chunk size.
8664 Otherwise we can not include SEW = 64bits.
8665 Runtime invariant: The single indeterminate represent the
8666 number of 64-bit chunks in a vector beyond minimum length of 64 bits.
8667 Thus the number of bytes in a vector is 8 + 8 * x1 which is
8668 riscv_vector_chunks * 8 = poly_int (8, 8). */
8669 riscv_bytes_per_vector_chunk = 8;
8670 /* Adjust BYTES_PER_RISCV_VECTOR according to TARGET_MIN_VLEN:
8671 - TARGET_MIN_VLEN = 64bit: [8,8]
8672 - TARGET_MIN_VLEN = 128bit: [16,16]
8673 - TARGET_MIN_VLEN = 256bit: [32,32]
8674 - TARGET_MIN_VLEN = 512bit: [64,64]
8675 - TARGET_MIN_VLEN = 1024bit: [128,128]
8676 - TARGET_MIN_VLEN = 2048bit: [256,256]
8677 - TARGET_MIN_VLEN = 4096bit: [512,512]
8678 FIXME: We currently DON'T support TARGET_MIN_VLEN > 4096bit. */
8679 chunk_num = min_vlen / 64;
8680 }
8681 else
8682 {
8683 /* When targetting minimum VLEN = 32, we should use 32-bit
8684 chunk size. Runtime invariant: The single indeterminate represent the
8685 number of 32-bit chunks in a vector beyond minimum length of 32 bits.
8686 Thus the number of bytes in a vector is 4 + 4 * x1 which is
8687 riscv_vector_chunks * 4 = poly_int (4, 4). */
8688 riscv_bytes_per_vector_chunk = 4;
8689 chunk_num = 1;
8690 }
8691
8692 /* Set riscv_vector_chunks as poly (1, 1) run-time constant if TARGET_VECTOR
8693 is enabled. Set riscv_vector_chunks as 1 compile-time constant if
8694 TARGET_VECTOR is disabled. riscv_vector_chunks is used in "riscv-modes.def"
8695 to set RVV mode size. The RVV machine modes size are run-time constant if
8696 TARGET_VECTOR is enabled. The RVV machine modes size remains default
8697 compile-time constant if TARGET_VECTOR is disabled. */
8698 if (TARGET_VECTOR_OPTS_P (opts))
8699 {
8700 if (opts->x_riscv_autovec_preference == RVV_FIXED_VLMAX)
8701 return (int) min_vlen / (riscv_bytes_per_vector_chunk * 8);
8702 else
8703 return poly_uint16 (chunk_num, chunk_num);
8704 }
8705 else
8706 return 1;
8707 }
8708
8709 /* 'Unpack' up the internal tuning structs and update the options
8710 in OPTS. The caller must have set up selected_tune and selected_arch
8711 as all the other target-specific codegen decisions are
8712 derived from them. */
8713 void
8714 riscv_override_options_internal (struct gcc_options *opts)
8715 {
8716 const struct riscv_tune_info *cpu;
8717
8718 /* The presence of the M extension implies that division instructions
8719 are present, so include them unless explicitly disabled. */
8720 if (TARGET_MUL_OPTS_P (opts) && (target_flags_explicit & MASK_DIV) == 0)
8721 opts->x_target_flags |= MASK_DIV;
8722 else if (!TARGET_MUL_OPTS_P (opts) && TARGET_DIV_OPTS_P (opts))
8723 error ("%<-mdiv%> requires %<-march%> to subsume the %<M%> extension");
8724
8725 /* Likewise floating-point division and square root. */
8726 if ((TARGET_HARD_FLOAT_OPTS_P (opts) || TARGET_ZFINX_OPTS_P (opts))
8727 && ((target_flags_explicit & MASK_FDIV) == 0))
8728 opts->x_target_flags |= MASK_FDIV;
8729
8730 /* Handle -mtune, use -mcpu if -mtune is not given, and use default -mtune
8731 if both -mtune and -mcpu are not given. */
8732 const char *tune_string = get_tune_str (opts);
8733 cpu = riscv_parse_tune (tune_string, false);
8734 riscv_microarchitecture = cpu->microarchitecture;
8735 tune_param = opts->x_optimize_size
8736 ? &optimize_size_tune_info
8737 : cpu->tune_param;
8738
8739 /* Use -mtune's setting for slow_unaligned_access, even when optimizing
8740 for size. For architectures that trap and emulate unaligned accesses,
8741 the performance cost is too great, even for -Os. Similarly, if
8742 -m[no-]strict-align is left unspecified, heed -mtune's advice. */
8743 riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
8744 || TARGET_STRICT_ALIGN);
8745
8746 /* Make a note if user explicity passed -mstrict-align for later
8747 builtin macro generation. Can't use target_flags_explicitly since
8748 it is set even for -mno-strict-align. */
8749 riscv_user_wants_strict_align = TARGET_STRICT_ALIGN_OPTS_P (opts);
8750
8751 if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0
8752 && cpu->tune_param->slow_unaligned_access)
8753 opts->x_target_flags |= MASK_STRICT_ALIGN;
8754
8755 /* If the user hasn't specified a branch cost, use the processor's
8756 default. */
8757 if (opts->x_riscv_branch_cost == 0)
8758 opts->x_riscv_branch_cost = tune_param->branch_cost;
8759
8760 /* FIXME: We don't allow TARGET_MIN_VLEN > 4096 since the datatypes of
8761 both GET_MODE_SIZE and GET_MODE_BITSIZE are poly_uint16.
8762
8763 We can only allow TARGET_MIN_VLEN * 8 (LMUL) < 65535. */
8764 if (TARGET_MIN_VLEN_OPTS (opts) > 4096)
8765 sorry ("Current RISC-V GCC cannot support VLEN greater than 4096bit for "
8766 "'V' Extension");
8767
8768 /* Convert -march to a chunks count. */
8769 riscv_vector_chunks = riscv_convert_vector_bits (opts);
8770 }
8771
8772 /* Implement TARGET_OPTION_OVERRIDE. */
8773
8774 static void
8775 riscv_option_override (void)
8776 {
8777 #ifdef SUBTARGET_OVERRIDE_OPTIONS
8778 SUBTARGET_OVERRIDE_OPTIONS;
8779 #endif
8780
8781 flag_pcc_struct_return = 0;
8782
8783 if (flag_pic)
8784 g_switch_value = 0;
8785
8786 /* Always prefer medlow than medany for RV32 since medlow can access
8787 full address space. */
8788 if (riscv_cmodel == CM_LARGE && !TARGET_64BIT)
8789 riscv_cmodel = CM_MEDLOW;
8790
8791 if (riscv_cmodel == CM_LARGE && TARGET_EXPLICIT_RELOCS)
8792 sorry ("code model %qs with %qs", "large", "-mexplicit-relocs");
8793
8794 if (riscv_cmodel == CM_LARGE && flag_pic)
8795 sorry ("code model %qs with %qs", "large",
8796 global_options.x_flag_pic > 1 ? "-fPIC" : "-fpic");
8797
8798 if (flag_pic)
8799 riscv_cmodel = CM_PIC;
8800
8801 /* We need to save the fp with ra for non-leaf functions with no fp and ra
8802 for leaf functions while no-omit-frame-pointer with
8803 omit-leaf-frame-pointer. The x_flag_omit_frame_pointer has the first
8804 priority to determine whether the frame pointer is needed. If we do not
8805 override it, the fp and ra will be stored for leaf functions, which is not
8806 our wanted. */
8807 riscv_save_frame_pointer = false;
8808 if (TARGET_OMIT_LEAF_FRAME_POINTER_P (global_options.x_target_flags))
8809 {
8810 if (!global_options.x_flag_omit_frame_pointer)
8811 riscv_save_frame_pointer = true;
8812
8813 global_options.x_flag_omit_frame_pointer = 1;
8814 }
8815
8816 /* We get better code with explicit relocs for CM_MEDLOW, but
8817 worse code for the others (for now). Pick the best default. */
8818 if ((target_flags_explicit & MASK_EXPLICIT_RELOCS) == 0)
8819 if (riscv_cmodel == CM_MEDLOW)
8820 target_flags |= MASK_EXPLICIT_RELOCS;
8821
8822 /* Require that the ISA supports the requested floating-point ABI. */
8823 if (UNITS_PER_FP_ARG > (TARGET_HARD_FLOAT ? UNITS_PER_FP_REG : 0))
8824 error ("requested ABI requires %<-march%> to subsume the %qc extension",
8825 UNITS_PER_FP_ARG > 8 ? 'Q' : (UNITS_PER_FP_ARG > 4 ? 'D' : 'F'));
8826
8827 /* RVE requires specific ABI. */
8828 if (TARGET_RVE)
8829 {
8830 if (!TARGET_64BIT && riscv_abi != ABI_ILP32E)
8831 error ("rv32e requires ilp32e ABI");
8832 else if (TARGET_64BIT && riscv_abi != ABI_LP64E)
8833 error ("rv64e requires lp64e ABI");
8834 }
8835
8836 /* Zfinx require abi ilp32, ilp32e, lp64 or lp64e. */
8837 if (TARGET_ZFINX
8838 && riscv_abi != ABI_ILP32 && riscv_abi != ABI_LP64
8839 && riscv_abi != ABI_ILP32E && riscv_abi != ABI_LP64E)
8840 error ("z*inx requires ABI ilp32, ilp32e, lp64 or lp64e");
8841
8842 /* We do not yet support ILP32 on RV64. */
8843 if (BITS_PER_WORD != POINTER_SIZE)
8844 error ("ABI requires %<-march=rv%d%>", POINTER_SIZE);
8845
8846 /* Validate -mpreferred-stack-boundary= value. */
8847 riscv_stack_boundary = ABI_STACK_BOUNDARY;
8848 if (riscv_preferred_stack_boundary_arg)
8849 {
8850 int min = ctz_hwi (STACK_BOUNDARY / 8);
8851 int max = 8;
8852
8853 if (!IN_RANGE (riscv_preferred_stack_boundary_arg, min, max))
8854 error ("%<-mpreferred-stack-boundary=%d%> must be between %d and %d",
8855 riscv_preferred_stack_boundary_arg, min, max);
8856
8857 riscv_stack_boundary = 8 << riscv_preferred_stack_boundary_arg;
8858 }
8859
8860 if (riscv_emit_attribute_p < 0)
8861 #ifdef HAVE_AS_RISCV_ATTRIBUTE
8862 riscv_emit_attribute_p = TARGET_RISCV_ATTRIBUTE;
8863 #else
8864 riscv_emit_attribute_p = 0;
8865
8866 if (riscv_emit_attribute_p)
8867 error ("%<-mriscv-attribute%> RISC-V ELF attribute requires GNU as 2.32"
8868 " [%<-mriscv-attribute%>]");
8869 #endif
8870
8871 if (riscv_stack_protector_guard == SSP_GLOBAL
8872 && OPTION_SET_P (riscv_stack_protector_guard_offset_str))
8873 {
8874 error ("incompatible options %<-mstack-protector-guard=global%> and "
8875 "%<-mstack-protector-guard-offset=%s%>",
8876 riscv_stack_protector_guard_offset_str);
8877 }
8878
8879 if (riscv_stack_protector_guard == SSP_TLS
8880 && !(OPTION_SET_P (riscv_stack_protector_guard_offset_str)
8881 && OPTION_SET_P (riscv_stack_protector_guard_reg_str)))
8882 {
8883 error ("both %<-mstack-protector-guard-offset%> and "
8884 "%<-mstack-protector-guard-reg%> must be used "
8885 "with %<-mstack-protector-guard=sysreg%>");
8886 }
8887
8888 if (OPTION_SET_P (riscv_stack_protector_guard_reg_str))
8889 {
8890 const char *str = riscv_stack_protector_guard_reg_str;
8891 int reg = decode_reg_name (str);
8892
8893 if (!IN_RANGE (reg, GP_REG_FIRST + 1, GP_REG_LAST))
8894 error ("%qs is not a valid base register in %qs", str,
8895 "-mstack-protector-guard-reg=");
8896
8897 riscv_stack_protector_guard_reg = reg;
8898 }
8899
8900 if (OPTION_SET_P (riscv_stack_protector_guard_offset_str))
8901 {
8902 char *end;
8903 const char *str = riscv_stack_protector_guard_offset_str;
8904 errno = 0;
8905 long offs = strtol (riscv_stack_protector_guard_offset_str, &end, 0);
8906
8907 if (!*str || *end || errno)
8908 error ("%qs is not a valid number in %qs", str,
8909 "-mstack-protector-guard-offset=");
8910
8911 if (!SMALL_OPERAND (offs))
8912 error ("%qs is not a valid offset in %qs", str,
8913 "-mstack-protector-guard-offset=");
8914
8915 riscv_stack_protector_guard_offset = offs;
8916 }
8917
8918 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
8919 param_sched_pressure_algorithm,
8920 SCHED_PRESSURE_MODEL);
8921
8922 /* Function to allocate machine-dependent function status. */
8923 init_machine_status = &riscv_init_machine_status;
8924
8925 riscv_override_options_internal (&global_options);
8926
8927 /* Save these options as the default ones in case we push and pop them later
8928 while processing functions with potential target attributes. */
8929 target_option_default_node = target_option_current_node
8930 = build_target_option_node (&global_options, &global_options_set);
8931 }
8932
8933 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
8934 Used by riscv_set_current_function to
8935 make sure optab availability predicates are recomputed when necessary. */
8936
8937 void
8938 riscv_save_restore_target_globals (tree new_tree)
8939 {
8940 if (TREE_TARGET_GLOBALS (new_tree))
8941 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
8942 else if (new_tree == target_option_default_node)
8943 restore_target_globals (&default_target_globals);
8944 else
8945 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
8946 }
8947
8948 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
8949 using the information saved in PTR. */
8950
8951 static void
8952 riscv_option_restore (struct gcc_options *opts,
8953 struct gcc_options * /* opts_set */,
8954 struct cl_target_option * /* ptr */)
8955 {
8956 riscv_override_options_internal (opts);
8957 }
8958
8959 static GTY (()) tree riscv_previous_fndecl;
8960
8961 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
8962
8963 static void
8964 riscv_conditional_register_usage (void)
8965 {
8966 /* We have only x0~x15 on RV32E/RV64E. */
8967 if (TARGET_RVE)
8968 {
8969 for (int r = 16; r <= 31; r++)
8970 fixed_regs[r] = 1;
8971 }
8972
8973 if (riscv_abi == ABI_ILP32E)
8974 {
8975 for (int r = 16; r <= 31; r++)
8976 call_used_regs[r] = 1;
8977 }
8978
8979 if (!TARGET_HARD_FLOAT)
8980 {
8981 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8982 fixed_regs[regno] = call_used_regs[regno] = 1;
8983 }
8984
8985 /* In the soft-float ABI, there are no callee-saved FP registers. */
8986 if (UNITS_PER_FP_ARG == 0)
8987 {
8988 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8989 call_used_regs[regno] = 1;
8990 }
8991
8992 if (!TARGET_VECTOR)
8993 {
8994 for (int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
8995 fixed_regs[regno] = call_used_regs[regno] = 1;
8996
8997 fixed_regs[VTYPE_REGNUM] = call_used_regs[VTYPE_REGNUM] = 1;
8998 fixed_regs[VL_REGNUM] = call_used_regs[VL_REGNUM] = 1;
8999 fixed_regs[VXRM_REGNUM] = call_used_regs[VXRM_REGNUM] = 1;
9000 fixed_regs[FRM_REGNUM] = call_used_regs[FRM_REGNUM] = 1;
9001 }
9002 }
9003
9004 /* Return a register priority for hard reg REGNO. */
9005
9006 static int
9007 riscv_register_priority (int regno)
9008 {
9009 /* Favor compressed registers to improve the odds of RVC instruction
9010 selection. */
9011 if (riscv_compressed_reg_p (regno))
9012 return 1;
9013
9014 return 0;
9015 }
9016
9017 /* Implement TARGET_TRAMPOLINE_INIT. */
9018
9019 static void
9020 riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
9021 {
9022 rtx addr, end_addr, mem;
9023 uint32_t trampoline[4];
9024 unsigned int i;
9025 HOST_WIDE_INT static_chain_offset, target_function_offset;
9026
9027 /* Work out the offsets of the pointers from the start of the
9028 trampoline code. */
9029 gcc_assert (ARRAY_SIZE (trampoline) * 4 == TRAMPOLINE_CODE_SIZE);
9030
9031 /* Get pointers to the beginning and end of the code block. */
9032 addr = force_reg (Pmode, XEXP (m_tramp, 0));
9033 end_addr = riscv_force_binary (Pmode, PLUS, addr,
9034 GEN_INT (TRAMPOLINE_CODE_SIZE));
9035
9036
9037 if (Pmode == SImode)
9038 {
9039 chain_value = force_reg (Pmode, chain_value);
9040
9041 rtx target_function = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9042 /* lui t2, hi(chain)
9043 lui t0, hi(func)
9044 addi t2, t2, lo(chain)
9045 jr t0, lo(func)
9046 */
9047 unsigned HOST_WIDE_INT lui_hi_chain_code, lui_hi_func_code;
9048 unsigned HOST_WIDE_INT lo_chain_code, lo_func_code;
9049
9050 rtx uimm_mask = force_reg (SImode, gen_int_mode (-IMM_REACH, SImode));
9051
9052 /* 0xfff. */
9053 rtx imm12_mask = gen_reg_rtx (SImode);
9054 emit_insn (gen_one_cmplsi2 (imm12_mask, uimm_mask));
9055
9056 rtx fixup_value = force_reg (SImode, gen_int_mode (IMM_REACH/2, SImode));
9057
9058 /* Gen lui t2, hi(chain). */
9059 rtx hi_chain = riscv_force_binary (SImode, PLUS, chain_value,
9060 fixup_value);
9061 hi_chain = riscv_force_binary (SImode, AND, hi_chain,
9062 uimm_mask);
9063 lui_hi_chain_code = OPCODE_LUI | (STATIC_CHAIN_REGNUM << SHIFT_RD);
9064 rtx lui_hi_chain = riscv_force_binary (SImode, IOR, hi_chain,
9065 gen_int_mode (lui_hi_chain_code, SImode));
9066
9067 mem = adjust_address (m_tramp, SImode, 0);
9068 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_chain));
9069
9070 /* Gen lui t0, hi(func). */
9071 rtx hi_func = riscv_force_binary (SImode, PLUS, target_function,
9072 fixup_value);
9073 hi_func = riscv_force_binary (SImode, AND, hi_func,
9074 uimm_mask);
9075 lui_hi_func_code = OPCODE_LUI | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD);
9076 rtx lui_hi_func = riscv_force_binary (SImode, IOR, hi_func,
9077 gen_int_mode (lui_hi_func_code, SImode));
9078
9079 mem = adjust_address (m_tramp, SImode, 1 * GET_MODE_SIZE (SImode));
9080 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_func));
9081
9082 /* Gen addi t2, t2, lo(chain). */
9083 rtx lo_chain = riscv_force_binary (SImode, AND, chain_value,
9084 imm12_mask);
9085 lo_chain = riscv_force_binary (SImode, ASHIFT, lo_chain, GEN_INT (20));
9086
9087 lo_chain_code = OPCODE_ADDI
9088 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
9089 | (STATIC_CHAIN_REGNUM << SHIFT_RS1);
9090
9091 rtx addi_lo_chain = riscv_force_binary (SImode, IOR, lo_chain,
9092 force_reg (SImode, GEN_INT (lo_chain_code)));
9093
9094 mem = adjust_address (m_tramp, SImode, 2 * GET_MODE_SIZE (SImode));
9095 riscv_emit_move (mem, riscv_swap_instruction (addi_lo_chain));
9096
9097 /* Gen jr t0, lo(func). */
9098 rtx lo_func = riscv_force_binary (SImode, AND, target_function,
9099 imm12_mask);
9100 lo_func = riscv_force_binary (SImode, ASHIFT, lo_func, GEN_INT (20));
9101
9102 lo_func_code = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
9103
9104 rtx jr_lo_func = riscv_force_binary (SImode, IOR, lo_func,
9105 force_reg (SImode, GEN_INT (lo_func_code)));
9106
9107 mem = adjust_address (m_tramp, SImode, 3 * GET_MODE_SIZE (SImode));
9108 riscv_emit_move (mem, riscv_swap_instruction (jr_lo_func));
9109 }
9110 else
9111 {
9112 static_chain_offset = TRAMPOLINE_CODE_SIZE;
9113 target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
9114
9115 /* auipc t2, 0
9116 l[wd] t0, target_function_offset(t2)
9117 l[wd] t2, static_chain_offset(t2)
9118 jr t0
9119 */
9120 trampoline[0] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD);
9121 trampoline[1] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
9122 | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD)
9123 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
9124 | (target_function_offset << SHIFT_IMM);
9125 trampoline[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
9126 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
9127 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
9128 | (static_chain_offset << SHIFT_IMM);
9129 trampoline[3] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
9130
9131 /* Copy the trampoline code. */
9132 for (i = 0; i < ARRAY_SIZE (trampoline); i++)
9133 {
9134 if (BYTES_BIG_ENDIAN)
9135 trampoline[i] = __builtin_bswap32(trampoline[i]);
9136 mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode));
9137 riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode));
9138 }
9139
9140 /* Set up the static chain pointer field. */
9141 mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
9142 riscv_emit_move (mem, chain_value);
9143
9144 /* Set up the target function field. */
9145 mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
9146 riscv_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
9147 }
9148
9149 /* Flush the code part of the trampoline. */
9150 emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
9151 emit_insn (gen_clear_cache (addr, end_addr));
9152 }
9153
9154 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */
9155
9156 static bool
9157 riscv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
9158 tree exp ATTRIBUTE_UNUSED)
9159 {
9160 /* Don't use sibcalls when use save-restore routine. */
9161 if (TARGET_SAVE_RESTORE)
9162 return false;
9163
9164 /* Don't use sibcall for naked functions. */
9165 if (cfun->machine->naked_p)
9166 return false;
9167
9168 /* Don't use sibcall for interrupt functions. */
9169 if (cfun->machine->interrupt_handler_p)
9170 return false;
9171
9172 /* Don't use sibcalls in the large model, because a sibcall instruction
9173 expanding and a epilogue expanding both use RISCV_PROLOGUE_TEMP
9174 register. */
9175 if (riscv_cmodel == CM_LARGE)
9176 return false;
9177
9178 return true;
9179 }
9180
9181 /* Get the interrupt type, return UNKNOWN_MODE if it's not
9182 interrupt function. */
9183 static enum riscv_privilege_levels
9184 riscv_get_interrupt_type (tree decl)
9185 {
9186 gcc_assert (decl != NULL_TREE);
9187
9188 if ((TREE_CODE(decl) != FUNCTION_DECL)
9189 || (!riscv_interrupt_type_p (TREE_TYPE (decl))))
9190 return UNKNOWN_MODE;
9191
9192 tree attr_args
9193 = TREE_VALUE (lookup_attribute ("interrupt",
9194 TYPE_ATTRIBUTES (TREE_TYPE (decl))));
9195
9196 if (attr_args && TREE_CODE (TREE_VALUE (attr_args)) != VOID_TYPE)
9197 {
9198 const char *string = TREE_STRING_POINTER (TREE_VALUE (attr_args));
9199
9200 if (!strcmp (string, "user"))
9201 return USER_MODE;
9202 else if (!strcmp (string, "supervisor"))
9203 return SUPERVISOR_MODE;
9204 else /* Must be "machine". */
9205 return MACHINE_MODE;
9206 }
9207 else
9208 /* Interrupt attributes are machine mode by default. */
9209 return MACHINE_MODE;
9210 }
9211
9212 /* Implement `TARGET_SET_CURRENT_FUNCTION'. Unpack the codegen decisions
9213 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9214 of the function, if such exists. This function may be called multiple
9215 times on a single function so use aarch64_previous_fndecl to avoid
9216 setting up identical state. */
9217
9218 /* Sanity cheching for above function attributes. */
9219 static void
9220 riscv_set_current_function (tree decl)
9221 {
9222 if (decl == NULL_TREE
9223 || current_function_decl == NULL_TREE
9224 || current_function_decl == error_mark_node
9225 || ! cfun->machine)
9226 return;
9227
9228 if (!cfun->machine->attributes_checked_p)
9229 {
9230 cfun->machine->naked_p = riscv_naked_function_p (decl);
9231 cfun->machine->interrupt_handler_p
9232 = riscv_interrupt_type_p (TREE_TYPE (decl));
9233
9234 if (cfun->machine->naked_p && cfun->machine->interrupt_handler_p)
9235 error ("function attributes %qs and %qs are mutually exclusive",
9236 "interrupt", "naked");
9237
9238 if (cfun->machine->interrupt_handler_p)
9239 {
9240 tree ret = TREE_TYPE (TREE_TYPE (decl));
9241 tree args = TYPE_ARG_TYPES (TREE_TYPE (decl));
9242
9243 if (TREE_CODE (ret) != VOID_TYPE)
9244 error ("%qs function cannot return a value", "interrupt");
9245
9246 if (args && TREE_CODE (TREE_VALUE (args)) != VOID_TYPE)
9247 error ("%qs function cannot have arguments", "interrupt");
9248
9249 cfun->machine->interrupt_mode = riscv_get_interrupt_type (decl);
9250
9251 gcc_assert (cfun->machine->interrupt_mode != UNKNOWN_MODE);
9252 }
9253
9254 /* Don't print the above diagnostics more than once. */
9255 cfun->machine->attributes_checked_p = 1;
9256 }
9257
9258 if (!decl || decl == riscv_previous_fndecl)
9259 return;
9260
9261 tree old_tree = (riscv_previous_fndecl
9262 ? DECL_FUNCTION_SPECIFIC_TARGET (riscv_previous_fndecl)
9263 : NULL_TREE);
9264
9265 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (decl);
9266
9267 /* If current function has no attributes but the previous one did,
9268 use the default node. */
9269 if (!new_tree && old_tree)
9270 new_tree = target_option_default_node;
9271
9272 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9273 the default have been handled by aarch64_save_restore_target_globals from
9274 aarch64_pragma_target_parse. */
9275 if (old_tree == new_tree)
9276 return;
9277
9278 riscv_previous_fndecl = decl;
9279
9280 /* First set the target options. */
9281 cl_target_option_restore (&global_options, &global_options_set,
9282 TREE_TARGET_OPTION (new_tree));
9283
9284 riscv_save_restore_target_globals (new_tree);
9285 }
9286
9287 /* Implement TARGET_MERGE_DECL_ATTRIBUTES. */
9288 static tree
9289 riscv_merge_decl_attributes (tree olddecl, tree newdecl)
9290 {
9291 tree combined_attrs;
9292
9293 enum riscv_privilege_levels old_interrupt_type
9294 = riscv_get_interrupt_type (olddecl);
9295 enum riscv_privilege_levels new_interrupt_type
9296 = riscv_get_interrupt_type (newdecl);
9297
9298 /* Check old and new has same interrupt type. */
9299 if ((old_interrupt_type != UNKNOWN_MODE)
9300 && (new_interrupt_type != UNKNOWN_MODE)
9301 && (old_interrupt_type != new_interrupt_type))
9302 error ("%qs function cannot have different interrupt type", "interrupt");
9303
9304 /* Create combined attributes. */
9305 combined_attrs = merge_attributes (DECL_ATTRIBUTES (olddecl),
9306 DECL_ATTRIBUTES (newdecl));
9307
9308 return combined_attrs;
9309 }
9310
9311 /* Implement TARGET_CANNOT_COPY_INSN_P. */
9312
9313 static bool
9314 riscv_cannot_copy_insn_p (rtx_insn *insn)
9315 {
9316 return recog_memoized (insn) >= 0 && get_attr_cannot_copy (insn);
9317 }
9318
9319 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. */
9320
9321 static bool
9322 riscv_slow_unaligned_access (machine_mode, unsigned int)
9323 {
9324 return riscv_slow_unaligned_access_p;
9325 }
9326
9327 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
9328
9329 static bool
9330 riscv_can_change_mode_class (machine_mode from, machine_mode to,
9331 reg_class_t rclass)
9332 {
9333 /* We have RVV VLS modes and VLA modes sharing same REG_CLASS.
9334 In 'cprop_hardreg' stage, we will try to do hard reg copy propagation
9335 between wider mode (FROM) and narrow mode (TO).
9336
9337 E.g. We should not allow copy propagation
9338 - RVVMF8BI (precision = [16, 16]) -> V32BI (precision = [32, 0])
9339 since we can't order their size which will cause ICE in regcprop.
9340
9341 TODO: Even though they are have different size, they always change
9342 the whole register. We may enhance such case in regcprop to optimize
9343 it in the future. */
9344 if (reg_classes_intersect_p (V_REGS, rclass)
9345 && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to)))
9346 return false;
9347 return !reg_classes_intersect_p (FP_REGS, rclass);
9348 }
9349
9350 /* Implement TARGET_CONSTANT_ALIGNMENT. */
9351
9352 static HOST_WIDE_INT
9353 riscv_constant_alignment (const_tree exp, HOST_WIDE_INT align)
9354 {
9355 if ((TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
9356 && (riscv_align_data_type == riscv_align_data_type_xlen))
9357 return MAX (align, BITS_PER_WORD);
9358 return align;
9359 }
9360
9361 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
9362
9363 /* This function is equivalent to default_promote_function_mode_always_promote
9364 except that it returns a promoted mode even if type is NULL_TREE. This is
9365 needed by libcalls which have no type (only a mode) such as fixed conversion
9366 routines that take a signed or unsigned char/short/int argument and convert
9367 it to a fixed type. */
9368
9369 static machine_mode
9370 riscv_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9371 machine_mode mode,
9372 int *punsignedp ATTRIBUTE_UNUSED,
9373 const_tree fntype ATTRIBUTE_UNUSED,
9374 int for_return ATTRIBUTE_UNUSED)
9375 {
9376 int unsignedp;
9377
9378 if (type != NULL_TREE)
9379 return promote_mode (type, mode, punsignedp);
9380
9381 unsignedp = *punsignedp;
9382 scalar_mode smode = as_a <scalar_mode> (mode);
9383 PROMOTE_MODE (smode, unsignedp, type);
9384 *punsignedp = unsignedp;
9385 return smode;
9386 }
9387
9388 /* Implement TARGET_MACHINE_DEPENDENT_REORG. */
9389
9390 static void
9391 riscv_reorg (void)
9392 {
9393 /* Do nothing unless we have -msave-restore */
9394 if (TARGET_SAVE_RESTORE)
9395 riscv_remove_unneeded_save_restore_calls ();
9396 }
9397
9398 /* Return nonzero if register FROM_REGNO can be renamed to register
9399 TO_REGNO. */
9400
9401 bool
9402 riscv_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED,
9403 unsigned to_regno)
9404 {
9405 /* Interrupt functions can only use registers that have already been
9406 saved by the prologue, even if they would normally be
9407 call-clobbered. */
9408 return !cfun->machine->interrupt_handler_p || df_regs_ever_live_p (to_regno);
9409 }
9410
9411 /* Implement TARGET_NEW_ADDRESS_PROFITABLE_P. */
9412
9413 bool
9414 riscv_new_address_profitable_p (rtx memref, rtx_insn *insn, rtx new_addr)
9415 {
9416 /* Prefer old address if it is less expensive. */
9417 addr_space_t as = MEM_ADDR_SPACE (memref);
9418 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
9419 int old_cost = address_cost (XEXP (memref, 0), GET_MODE (memref), as, speed);
9420 int new_cost = address_cost (new_addr, GET_MODE (memref), as, speed);
9421 return new_cost <= old_cost;
9422 }
9423
9424 /* Helper function for generating gpr_save pattern. */
9425
9426 rtx
9427 riscv_gen_gpr_save_insn (struct riscv_frame_info *frame)
9428 {
9429 unsigned count = riscv_save_libcall_count (frame->mask);
9430 /* 1 for unspec 2 for clobber t0/t1 and 1 for ra. */
9431 unsigned veclen = 1 + 2 + 1 + count;
9432 rtvec vec = rtvec_alloc (veclen);
9433
9434 gcc_assert (veclen <= ARRAY_SIZE (gpr_save_reg_order));
9435
9436 RTVEC_ELT (vec, 0) =
9437 gen_rtx_UNSPEC_VOLATILE (VOIDmode,
9438 gen_rtvec (1, GEN_INT (count)), UNSPECV_GPR_SAVE);
9439
9440 for (unsigned i = 1; i < veclen; ++i)
9441 {
9442 unsigned regno = gpr_save_reg_order[i];
9443 rtx reg = gen_rtx_REG (Pmode, regno);
9444 rtx elt;
9445
9446 /* t0 and t1 are CLOBBERs, others are USEs. */
9447 if (i < 3)
9448 elt = gen_rtx_CLOBBER (Pmode, reg);
9449 else
9450 elt = gen_rtx_USE (Pmode, reg);
9451
9452 RTVEC_ELT (vec, i) = elt;
9453 }
9454
9455 /* Largest number of caller-save register must set in mask if we are
9456 not using __riscv_save_0. */
9457 gcc_assert ((count == 0) ||
9458 BITSET_P (frame->mask, gpr_save_reg_order[veclen - 1]));
9459
9460 return gen_rtx_PARALLEL (VOIDmode, vec);
9461 }
9462
9463 static HOST_WIDE_INT
9464 zcmp_base_adj (int regs_num)
9465 {
9466 return riscv_16bytes_align ((regs_num) *GET_MODE_SIZE (word_mode));
9467 }
9468
9469 static HOST_WIDE_INT
9470 zcmp_additional_adj (HOST_WIDE_INT total, int regs_num)
9471 {
9472 return total - zcmp_base_adj (regs_num);
9473 }
9474
9475 bool
9476 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT total, int regs_num)
9477 {
9478 HOST_WIDE_INT additioanl_bytes = zcmp_additional_adj (total, regs_num);
9479 return additioanl_bytes == 0 || additioanl_bytes == 1 * ZCMP_SP_INC_STEP
9480 || additioanl_bytes == 2 * ZCMP_SP_INC_STEP
9481 || additioanl_bytes == ZCMP_MAX_SPIMM * ZCMP_SP_INC_STEP;
9482 }
9483
9484 /* Return true if it's valid gpr_save pattern. */
9485
9486 bool
9487 riscv_gpr_save_operation_p (rtx op)
9488 {
9489 unsigned len = XVECLEN (op, 0);
9490
9491 if (len > ARRAY_SIZE (gpr_save_reg_order))
9492 return false;
9493
9494 for (unsigned i = 0; i < len; i++)
9495 {
9496 rtx elt = XVECEXP (op, 0, i);
9497 if (i == 0)
9498 {
9499 /* First element in parallel is unspec. */
9500 if (GET_CODE (elt) != UNSPEC_VOLATILE
9501 || GET_CODE (XVECEXP (elt, 0, 0)) != CONST_INT
9502 || XINT (elt, 1) != UNSPECV_GPR_SAVE)
9503 return false;
9504 }
9505 else
9506 {
9507 /* Two CLOBBER and USEs, must check the order. */
9508 unsigned expect_code = i < 3 ? CLOBBER : USE;
9509 if (GET_CODE (elt) != expect_code
9510 || !REG_P (XEXP (elt, 1))
9511 || (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i]))
9512 return false;
9513 }
9514 break;
9515 }
9516 return true;
9517 }
9518
9519 /* Implement TARGET_ASAN_SHADOW_OFFSET. */
9520
9521 static unsigned HOST_WIDE_INT
9522 riscv_asan_shadow_offset (void)
9523 {
9524 /* We only have libsanitizer support for RV64 at present.
9525
9526 This number must match ASAN_SHADOW_OFFSET_CONST in the file
9527 libsanitizer/asan/asan_mapping.h. */
9528 return TARGET_64BIT ? HOST_WIDE_INT_UC (0xd55550000) : 0;
9529 }
9530
9531 /* Implement TARGET_MANGLE_TYPE. */
9532
9533 static const char *
9534 riscv_mangle_type (const_tree type)
9535 {
9536 /* Half-precision float, _Float16 is "DF16_". */
9537 if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
9538 return "DF16_";
9539
9540 /* Mangle all vector type for vector extension. */
9541 /* The mangle name follows the rule of RVV LLVM
9542 that is "u" + length of (abi_name) + abi_name. */
9543 if (TYPE_NAME (type) != NULL)
9544 {
9545 const char *res = riscv_vector::mangle_builtin_type (type);
9546 if (res)
9547 return res;
9548 }
9549
9550 /* Use the default mangling. */
9551 return NULL;
9552 }
9553
9554 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
9555
9556 static bool
9557 riscv_scalar_mode_supported_p (scalar_mode mode)
9558 {
9559 if (mode == HFmode)
9560 return true;
9561 else
9562 return default_scalar_mode_supported_p (mode);
9563 }
9564
9565 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P - return TRUE
9566 if MODE is HFmode, and punt to the generic implementation otherwise. */
9567
9568 static bool
9569 riscv_libgcc_floating_mode_supported_p (scalar_float_mode mode)
9570 {
9571 if (mode == HFmode)
9572 return true;
9573 else
9574 return default_libgcc_floating_mode_supported_p (mode);
9575 }
9576
9577 /* Set the value of FLT_EVAL_METHOD.
9578 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
9579
9580 0: evaluate all operations and constants, whose semantic type has at
9581 most the range and precision of type float, to the range and
9582 precision of float; evaluate all other operations and constants to
9583 the range and precision of the semantic type;
9584
9585 N, where _FloatN is a supported interchange floating type
9586 evaluate all operations and constants, whose semantic type has at
9587 most the range and precision of _FloatN type, to the range and
9588 precision of the _FloatN type; evaluate all other operations and
9589 constants to the range and precision of the semantic type;
9590
9591 If we have the zfh/zhinx/zvfh extensions then we support _Float16
9592 in native precision, so we should set this to 16. */
9593 static enum flt_eval_method
9594 riscv_excess_precision (enum excess_precision_type type)
9595 {
9596 switch (type)
9597 {
9598 case EXCESS_PRECISION_TYPE_FAST:
9599 case EXCESS_PRECISION_TYPE_STANDARD:
9600 return ((TARGET_ZFH || TARGET_ZHINX || TARGET_ZVFH)
9601 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
9602 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
9603 case EXCESS_PRECISION_TYPE_IMPLICIT:
9604 case EXCESS_PRECISION_TYPE_FLOAT16:
9605 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
9606 default:
9607 gcc_unreachable ();
9608 }
9609 return FLT_EVAL_METHOD_UNPREDICTABLE;
9610 }
9611
9612 /* Implement TARGET_FLOATN_MODE. */
9613 static opt_scalar_float_mode
9614 riscv_floatn_mode (int n, bool extended)
9615 {
9616 if (!extended && n == 16)
9617 return HFmode;
9618
9619 return default_floatn_mode (n, extended);
9620 }
9621
9622 static void
9623 riscv_init_libfuncs (void)
9624 {
9625 /* Half-precision float operations. The compiler handles all operations
9626 with NULL libfuncs by converting to SFmode. */
9627
9628 /* Arithmetic. */
9629 set_optab_libfunc (add_optab, HFmode, NULL);
9630 set_optab_libfunc (sdiv_optab, HFmode, NULL);
9631 set_optab_libfunc (smul_optab, HFmode, NULL);
9632 set_optab_libfunc (neg_optab, HFmode, NULL);
9633 set_optab_libfunc (sub_optab, HFmode, NULL);
9634
9635 /* Comparisons. */
9636 set_optab_libfunc (eq_optab, HFmode, NULL);
9637 set_optab_libfunc (ne_optab, HFmode, NULL);
9638 set_optab_libfunc (lt_optab, HFmode, NULL);
9639 set_optab_libfunc (le_optab, HFmode, NULL);
9640 set_optab_libfunc (ge_optab, HFmode, NULL);
9641 set_optab_libfunc (gt_optab, HFmode, NULL);
9642 set_optab_libfunc (unord_optab, HFmode, NULL);
9643 }
9644
9645 #if CHECKING_P
9646 void
9647 riscv_reinit (void)
9648 {
9649 riscv_option_override ();
9650 init_adjust_machine_modes ();
9651 init_derived_machine_modes ();
9652 reinit_regs ();
9653 init_optabs ();
9654 }
9655 #endif
9656
9657 #if CHECKING_P
9658 #undef TARGET_RUN_TARGET_SELFTESTS
9659 #define TARGET_RUN_TARGET_SELFTESTS selftest::riscv_run_selftests
9660 #endif /* #if CHECKING_P */
9661
9662 /* Implement TARGET_VECTOR_MODE_SUPPORTED_P. */
9663
9664 static bool
9665 riscv_vector_mode_supported_p (machine_mode mode)
9666 {
9667 if (TARGET_VECTOR)
9668 return riscv_v_ext_mode_p (mode);
9669
9670 return false;
9671 }
9672
9673 /* Implement TARGET_VERIFY_TYPE_CONTEXT. */
9674
9675 static bool
9676 riscv_verify_type_context (location_t loc, type_context_kind context,
9677 const_tree type, bool silent_p)
9678 {
9679 return riscv_vector::verify_type_context (loc, context, type, silent_p);
9680 }
9681
9682 /* Implement TARGET_VECTOR_ALIGNMENT. */
9683
9684 static HOST_WIDE_INT
9685 riscv_vector_alignment (const_tree type)
9686 {
9687 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
9688 be set for non-predicate vectors of booleans. Modes are the most
9689 direct way we have of identifying real RVV predicate types. */
9690 /* FIXME: RVV didn't mention the alignment of bool, we uses
9691 one byte align. */
9692 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL)
9693 return 8;
9694
9695 widest_int min_size
9696 = constant_lower_bound (wi::to_poly_widest (TYPE_SIZE (type)));
9697 return wi::umin (min_size, 128).to_uhwi ();
9698 }
9699
9700 /* Implement REGMODE_NATURAL_SIZE. */
9701
9702 poly_uint64
9703 riscv_regmode_natural_size (machine_mode mode)
9704 {
9705 /* The natural size for RVV data modes is one RVV data vector,
9706 and similarly for predicates. We can't independently modify
9707 anything smaller than that. */
9708 /* ??? For now, only do this for variable-width RVV registers.
9709 Doing it for constant-sized registers breaks lower-subreg.c. */
9710
9711 if (riscv_v_ext_mode_p (mode))
9712 {
9713 poly_uint64 size = GET_MODE_SIZE (mode);
9714 if (riscv_v_ext_tuple_mode_p (mode))
9715 {
9716 size = GET_MODE_SIZE (riscv_vector::get_subpart_mode (mode));
9717 if (known_lt (size, BYTES_PER_RISCV_VECTOR))
9718 return size;
9719 }
9720 else if (riscv_v_ext_vector_mode_p (mode))
9721 {
9722 /* RVV mask modes always consume a single register. */
9723 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
9724 return BYTES_PER_RISCV_VECTOR;
9725 }
9726 if (!size.is_constant ())
9727 return BYTES_PER_RISCV_VECTOR;
9728 else if (!riscv_v_ext_vls_mode_p (mode))
9729 /* For -march=rv64gc_zve32f, the natural vector register size
9730 is 32bits which is smaller than scalar register size, so we
9731 return minimum size between vector register size and scalar
9732 register size. */
9733 return MIN (size.to_constant (), UNITS_PER_WORD);
9734 }
9735 return UNITS_PER_WORD;
9736 }
9737
9738 /* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
9739
9740 static unsigned int
9741 riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
9742 int *offset)
9743 {
9744 /* Polynomial invariant 1 == (VLENB / riscv_bytes_per_vector_chunk) - 1.
9745 1. TARGET_MIN_VLEN == 32, polynomial invariant 1 == (VLENB / 4) - 1.
9746 2. TARGET_MIN_VLEN > 32, polynomial invariant 1 == (VLENB / 8) - 1.
9747 */
9748 gcc_assert (i == 1);
9749 *factor = riscv_bytes_per_vector_chunk;
9750 *offset = 1;
9751 return RISCV_DWARF_VLENB;
9752 }
9753
9754 /* Implement TARGET_ESTIMATED_POLY_VALUE. */
9755
9756 static HOST_WIDE_INT
9757 riscv_estimated_poly_value (poly_int64 val,
9758 poly_value_estimate_kind kind = POLY_VALUE_LIKELY)
9759 {
9760 if (TARGET_VECTOR)
9761 return riscv_vector::estimated_poly_value (val, kind);
9762 return default_estimated_poly_value (val, kind);
9763 }
9764
9765 /* Return true if the vector misalignment factor is supported by the
9766 target. */
9767 bool
9768 riscv_support_vector_misalignment (machine_mode mode,
9769 const_tree type ATTRIBUTE_UNUSED,
9770 int misalignment,
9771 bool is_packed ATTRIBUTE_UNUSED)
9772 {
9773 /* Depend on movmisalign pattern. */
9774 return default_builtin_support_vector_misalignment (mode, type, misalignment,
9775 is_packed);
9776 }
9777
9778 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
9779
9780 static opt_machine_mode
9781 riscv_get_mask_mode (machine_mode mode)
9782 {
9783 if (TARGET_VECTOR && riscv_v_ext_mode_p (mode))
9784 return riscv_vector::get_mask_mode (mode);
9785
9786 return default_get_mask_mode (mode);
9787 }
9788
9789 /* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
9790 it isn't worth branching around empty masked ops (including masked
9791 stores). */
9792
9793 static bool
9794 riscv_empty_mask_is_expensive (unsigned)
9795 {
9796 return false;
9797 }
9798
9799 /* Return true if a shift-amount matches the trailing cleared bits on
9800 a bitmask. */
9801
9802 bool
9803 riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
9804 {
9805 return shamt == ctz_hwi (mask);
9806 }
9807
9808 static HARD_REG_SET
9809 vector_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
9810 {
9811 HARD_REG_SET zeroed_hardregs;
9812 CLEAR_HARD_REG_SET (zeroed_hardregs);
9813
9814 /* Find a register to hold vl. */
9815 unsigned vl_regno = INVALID_REGNUM;
9816 /* Skip the first GPR, otherwise the existing vl is kept due to the same
9817 between vl and avl. */
9818 for (unsigned regno = GP_REG_FIRST + 1; regno <= GP_REG_LAST; regno++)
9819 {
9820 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
9821 {
9822 vl_regno = regno;
9823 break;
9824 }
9825 }
9826
9827 if (vl_regno > GP_REG_LAST)
9828 sorry ("cannot allocate vl register for %qs on this target",
9829 "-fzero-call-used-regs");
9830
9831 /* Vector configurations need not be saved and restored here. The
9832 -fzero-call-used-regs=* option will zero all vector registers and
9833 return. So there's no vector operations between them. */
9834
9835 bool emitted_vlmax_vsetvl = false;
9836 rtx vl = gen_rtx_REG (Pmode, vl_regno); /* vl is VLMAX. */
9837 for (unsigned regno = V_REG_FIRST; regno <= V_REG_LAST; ++regno)
9838 {
9839 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
9840 {
9841 rtx target = regno_reg_rtx[regno];
9842 machine_mode mode = GET_MODE (target);
9843
9844 if (!emitted_vlmax_vsetvl)
9845 {
9846 riscv_vector::emit_hard_vlmax_vsetvl (mode, vl);
9847 emitted_vlmax_vsetvl = true;
9848 }
9849
9850 rtx ops[] = {target, CONST0_RTX (mode)};
9851 riscv_vector::emit_vlmax_insn_lra (code_for_pred_mov (mode),
9852 riscv_vector::UNARY_OP, ops, vl);
9853
9854 SET_HARD_REG_BIT (zeroed_hardregs, regno);
9855 }
9856 }
9857
9858 return zeroed_hardregs;
9859 }
9860
9861 /* Generate a sequence of instructions that zero registers specified by
9862 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
9863 zeroed. */
9864 HARD_REG_SET
9865 riscv_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
9866 {
9867 HARD_REG_SET zeroed_hardregs;
9868 CLEAR_HARD_REG_SET (zeroed_hardregs);
9869
9870 if (TARGET_VECTOR)
9871 zeroed_hardregs |= vector_zero_call_used_regs (need_zeroed_hardregs);
9872
9873 return zeroed_hardregs | default_zero_call_used_regs (need_zeroed_hardregs
9874 & ~zeroed_hardregs);
9875 }
9876
9877 /* Implement target hook TARGET_ARRAY_MODE. */
9878
9879 static opt_machine_mode
9880 riscv_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
9881 {
9882 machine_mode vmode;
9883 if (TARGET_VECTOR
9884 && riscv_vector::get_tuple_mode (mode, nelems).exists (&vmode))
9885 return vmode;
9886
9887 return opt_machine_mode ();
9888 }
9889
9890 /* Given memory reference MEM, expand code to compute the aligned
9891 memory address, shift and mask values and store them into
9892 *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK. */
9893
9894 void
9895 riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
9896 rtx *not_mask)
9897 {
9898 /* Align the memory address to a word. */
9899 rtx addr = force_reg (Pmode, XEXP (mem, 0));
9900
9901 rtx addr_mask = gen_int_mode (-4, Pmode);
9902
9903 rtx aligned_addr = gen_reg_rtx (Pmode);
9904 emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, addr_mask));
9905
9906 *aligned_mem = change_address (mem, SImode, aligned_addr);
9907
9908 /* Calculate the shift amount. */
9909 emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
9910 gen_int_mode (3, SImode)));
9911 emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift,
9912 gen_int_mode (3, SImode)));
9913
9914 /* Calculate the mask. */
9915 int unshifted_mask = GET_MODE_MASK (GET_MODE (mem));
9916
9917 emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode));
9918
9919 emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
9920 gen_lowpart (QImode, *shift)));
9921
9922 emit_move_insn (*not_mask, gen_rtx_NOT (SImode, *mask));
9923 }
9924
9925 /* Leftshift a subword within an SImode register. */
9926
9927 void
9928 riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
9929 rtx *shifted_value)
9930 {
9931 rtx value_reg = gen_reg_rtx (SImode);
9932 emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
9933 mode, 0));
9934
9935 emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
9936 gen_lowpart (QImode, shift)));
9937 }
9938
9939 /* Return TRUE if we should use the divmod expander, FALSE otherwise. This
9940 allows the behavior to be tuned for specific implementations as well as
9941 when optimizing for size. */
9942
9943 bool
9944 riscv_use_divmod_expander (void)
9945 {
9946 return tune_param->use_divmod_expansion;
9947 }
9948
9949 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
9950
9951 static machine_mode
9952 riscv_preferred_simd_mode (scalar_mode mode)
9953 {
9954 if (TARGET_VECTOR)
9955 return riscv_vector::preferred_simd_mode (mode);
9956
9957 return word_mode;
9958 }
9959
9960 /* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
9961
9962 static poly_uint64
9963 riscv_vectorize_preferred_vector_alignment (const_tree type)
9964 {
9965 if (riscv_v_ext_mode_p (TYPE_MODE (type)))
9966 return TYPE_ALIGN (TREE_TYPE (type));
9967 return TYPE_ALIGN (type);
9968 }
9969
9970 /* Return true if it is static FRM rounding mode. */
9971
9972 static bool
9973 riscv_static_frm_mode_p (int mode)
9974 {
9975 switch (mode)
9976 {
9977 case riscv_vector::FRM_RDN:
9978 case riscv_vector::FRM_RUP:
9979 case riscv_vector::FRM_RTZ:
9980 case riscv_vector::FRM_RMM:
9981 case riscv_vector::FRM_RNE:
9982 return true;
9983 default:
9984 return false;
9985 }
9986
9987 gcc_unreachable ();
9988 }
9989
9990 /* Implement the floating-point Mode Switching. */
9991
9992 static void
9993 riscv_emit_frm_mode_set (int mode, int prev_mode)
9994 {
9995 rtx backup_reg = DYNAMIC_FRM_RTL (cfun);
9996
9997 if (prev_mode == riscv_vector::FRM_DYN_CALL)
9998 emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL. */
9999
10000 if (mode != prev_mode)
10001 {
10002 rtx frm = gen_int_mode (mode, SImode);
10003
10004 if (mode == riscv_vector::FRM_DYN_CALL
10005 && prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun))
10006 /* No need to emit when prev mode is DYN already. */
10007 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
10008 else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun)
10009 && prev_mode != riscv_vector::FRM_DYN
10010 && prev_mode != riscv_vector::FRM_DYN_CALL)
10011 /* No need to emit when prev mode is DYN or DYN_CALL already. */
10012 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
10013 else if (mode == riscv_vector::FRM_DYN
10014 && prev_mode != riscv_vector::FRM_DYN_CALL)
10015 /* Restore frm value from backup when switch to DYN mode. */
10016 emit_insn (gen_fsrmsi_restore (backup_reg));
10017 else if (riscv_static_frm_mode_p (mode))
10018 /* Set frm value when switch to static mode. */
10019 emit_insn (gen_fsrmsi_restore (frm));
10020 }
10021 }
10022
10023 /* Implement Mode switching. */
10024
10025 static void
10026 riscv_emit_mode_set (int entity, int mode, int prev_mode,
10027 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
10028 {
10029 switch (entity)
10030 {
10031 case RISCV_VXRM:
10032 if (mode != VXRM_MODE_NONE && mode != prev_mode)
10033 emit_insn (gen_vxrmsi (gen_int_mode (mode, SImode)));
10034 break;
10035 case RISCV_FRM:
10036 riscv_emit_frm_mode_set (mode, prev_mode);
10037 break;
10038 default:
10039 gcc_unreachable ();
10040 }
10041 }
10042
10043 /* Adjust the FRM_NONE insn after a call to FRM_DYN for the
10044 underlying emit. */
10045
10046 static int
10047 riscv_frm_adjust_mode_after_call (rtx_insn *cur_insn, int mode)
10048 {
10049 rtx_insn *insn = prev_nonnote_nondebug_insn_bb (cur_insn);
10050
10051 if (insn && CALL_P (insn))
10052 return riscv_vector::FRM_DYN;
10053
10054 return mode;
10055 }
10056
10057 /* Insert the backup frm insn to the end of the bb if and only if the call
10058 is the last insn of this bb. */
10059
10060 static void
10061 riscv_frm_emit_after_bb_end (rtx_insn *cur_insn)
10062 {
10063 edge eg;
10064 bool abnormal_edge_p = false;
10065 edge_iterator eg_iterator;
10066 basic_block bb = BLOCK_FOR_INSN (cur_insn);
10067
10068 FOR_EACH_EDGE (eg, eg_iterator, bb->succs)
10069 {
10070 if (eg->flags & EDGE_ABNORMAL)
10071 abnormal_edge_p = true;
10072 else
10073 {
10074 start_sequence ();
10075 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
10076 rtx_insn *backup_insn = get_insns ();
10077 end_sequence ();
10078
10079 insert_insn_on_edge (backup_insn, eg);
10080 }
10081 }
10082
10083 if (abnormal_edge_p)
10084 {
10085 start_sequence ();
10086 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
10087 rtx_insn *backup_insn = get_insns ();
10088 end_sequence ();
10089
10090 insert_insn_end_basic_block (backup_insn, bb);
10091 }
10092
10093 commit_edge_insertions ();
10094 }
10095
10096 /* Return mode that frm must be switched into
10097 prior to the execution of insn. */
10098
10099 static int
10100 riscv_frm_mode_needed (rtx_insn *cur_insn, int code)
10101 {
10102 if (!DYNAMIC_FRM_RTL(cfun))
10103 {
10104 /* The dynamic frm will be initialized only onece during cfun. */
10105 DYNAMIC_FRM_RTL (cfun) = gen_reg_rtx (SImode);
10106 emit_insn_at_entry (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
10107 }
10108
10109 if (CALL_P (cur_insn))
10110 {
10111 rtx_insn *insn = next_nonnote_nondebug_insn_bb (cur_insn);
10112
10113 if (!insn)
10114 riscv_frm_emit_after_bb_end (cur_insn);
10115
10116 return riscv_vector::FRM_DYN_CALL;
10117 }
10118
10119 int mode = code >= 0 ? get_attr_frm_mode (cur_insn) : riscv_vector::FRM_NONE;
10120
10121 if (mode == riscv_vector::FRM_NONE)
10122 /* After meet a call, we need to backup the frm because it may be
10123 updated during the call. Here, for each insn, we will check if
10124 the previous insn is a call or not. When previous insn is call,
10125 there will be 2 cases for the emit mode set.
10126
10127 1. Current insn is not MODE_NONE, then the mode switch framework
10128 will do the mode switch from MODE_CALL to MODE_NONE natively.
10129 2. Current insn is MODE_NONE, we need to adjust the MODE_NONE to
10130 the MODE_DYN, and leave the mode switch itself to perform
10131 the emit mode set.
10132 */
10133 mode = riscv_frm_adjust_mode_after_call (cur_insn, mode);
10134
10135 return mode;
10136 }
10137
10138 /* Return mode that entity must be switched into
10139 prior to the execution of insn. */
10140
10141 static int
10142 riscv_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
10143 {
10144 int code = recog_memoized (insn);
10145
10146 switch (entity)
10147 {
10148 case RISCV_VXRM:
10149 return code >= 0 ? get_attr_vxrm_mode (insn) : VXRM_MODE_NONE;
10150 case RISCV_FRM:
10151 return riscv_frm_mode_needed (insn, code);
10152 default:
10153 gcc_unreachable ();
10154 }
10155 }
10156
10157 /* Return TRUE that an insn is asm. */
10158
10159 static bool
10160 asm_insn_p (rtx_insn *insn)
10161 {
10162 extract_insn (insn);
10163
10164 return recog_data.is_asm;
10165 }
10166
10167 /* Return TRUE that an insn is unknown for VXRM. */
10168
10169 static bool
10170 vxrm_unknown_p (rtx_insn *insn)
10171 {
10172 /* Return true if there is a definition of VXRM. */
10173 if (reg_set_p (gen_rtx_REG (SImode, VXRM_REGNUM), insn))
10174 return true;
10175
10176 /* A CALL function may contain an instruction that modifies the VXRM,
10177 return true in this situation. */
10178 if (CALL_P (insn))
10179 return true;
10180
10181 /* Return true for all assembly since users may hardcode a assembly
10182 like this: asm volatile ("csrwi vxrm, 0"). */
10183 if (asm_insn_p (insn))
10184 return true;
10185
10186 return false;
10187 }
10188
10189 /* Return TRUE that an insn is unknown dynamic for FRM. */
10190
10191 static bool
10192 frm_unknown_dynamic_p (rtx_insn *insn)
10193 {
10194 /* Return true if there is a definition of FRM. */
10195 if (reg_set_p (gen_rtx_REG (SImode, FRM_REGNUM), insn))
10196 return true;
10197
10198 return false;
10199 }
10200
10201 /* Return the mode that an insn results in for VXRM. */
10202
10203 static int
10204 riscv_vxrm_mode_after (rtx_insn *insn, int mode)
10205 {
10206 if (vxrm_unknown_p (insn))
10207 return VXRM_MODE_NONE;
10208
10209 if (recog_memoized (insn) < 0)
10210 return mode;
10211
10212 if (reg_mentioned_p (gen_rtx_REG (SImode, VXRM_REGNUM), PATTERN (insn)))
10213 return get_attr_vxrm_mode (insn);
10214 else
10215 return mode;
10216 }
10217
10218 /* Return the mode that an insn results in for FRM. */
10219
10220 static int
10221 riscv_frm_mode_after (rtx_insn *insn, int mode)
10222 {
10223 STATIC_FRM_P (cfun) = STATIC_FRM_P (cfun) || riscv_static_frm_mode_p (mode);
10224
10225 if (CALL_P (insn))
10226 return mode;
10227
10228 if (frm_unknown_dynamic_p (insn))
10229 return riscv_vector::FRM_DYN;
10230
10231 if (recog_memoized (insn) < 0)
10232 return mode;
10233
10234 if (reg_mentioned_p (gen_rtx_REG (SImode, FRM_REGNUM), PATTERN (insn)))
10235 return get_attr_frm_mode (insn);
10236 else
10237 return mode;
10238 }
10239
10240 /* Return the mode that an insn results in. */
10241
10242 static int
10243 riscv_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
10244 {
10245 switch (entity)
10246 {
10247 case RISCV_VXRM:
10248 return riscv_vxrm_mode_after (insn, mode);
10249 case RISCV_FRM:
10250 return riscv_frm_mode_after (insn, mode);
10251 default:
10252 gcc_unreachable ();
10253 }
10254 }
10255
10256 /* Return a mode that ENTITY is assumed to be
10257 switched to at function entry. */
10258
10259 static int
10260 riscv_mode_entry (int entity)
10261 {
10262 switch (entity)
10263 {
10264 case RISCV_VXRM:
10265 return VXRM_MODE_NONE;
10266 case RISCV_FRM:
10267 {
10268 /* According to RVV 1.0 spec, all vector floating-point operations use
10269 the dynamic rounding mode in the frm register. Likewise in other
10270 similar places. */
10271 return riscv_vector::FRM_DYN;
10272 }
10273 default:
10274 gcc_unreachable ();
10275 }
10276 }
10277
10278 /* Return a mode that ENTITY is assumed to be
10279 switched to at function exit. */
10280
10281 static int
10282 riscv_mode_exit (int entity)
10283 {
10284 switch (entity)
10285 {
10286 case RISCV_VXRM:
10287 return VXRM_MODE_NONE;
10288 case RISCV_FRM:
10289 return riscv_vector::FRM_DYN_EXIT;
10290 default:
10291 gcc_unreachable ();
10292 }
10293 }
10294
10295 static int
10296 riscv_mode_priority (int, int n)
10297 {
10298 return n;
10299 }
10300
10301 /* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */
10302 unsigned int
10303 riscv_autovectorize_vector_modes (vector_modes *modes, bool all)
10304 {
10305 if (TARGET_VECTOR)
10306 return riscv_vector::autovectorize_vector_modes (modes, all);
10307
10308 return default_autovectorize_vector_modes (modes, all);
10309 }
10310
10311 /* Implement TARGET_VECTORIZE_RELATED_MODE. */
10312 opt_machine_mode
10313 riscv_vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode,
10314 poly_uint64 nunits)
10315 {
10316 if (TARGET_VECTOR)
10317 return riscv_vector::vectorize_related_mode (vector_mode, element_mode,
10318 nunits);
10319 return default_vectorize_related_mode (vector_mode, element_mode, nunits);
10320 }
10321
10322 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
10323
10324 static bool
10325 riscv_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
10326 rtx target, rtx op0, rtx op1,
10327 const vec_perm_indices &sel)
10328 {
10329 if (TARGET_VECTOR && riscv_v_ext_mode_p (vmode))
10330 return riscv_vector::expand_vec_perm_const (vmode, op_mode, target, op0,
10331 op1, sel);
10332
10333 return false;
10334 }
10335
10336 static bool
10337 riscv_frame_pointer_required (void)
10338 {
10339 return riscv_save_frame_pointer && !crtl->is_leaf;
10340 }
10341
10342 /* Return the appropriate common costs for vectors of type VECTYPE. */
10343 static const common_vector_cost *
10344 get_common_costs (tree vectype)
10345 {
10346 const cpu_vector_cost *costs = tune_param->vec_costs;
10347 gcc_assert (costs);
10348
10349 if (vectype && riscv_v_ext_vls_mode_p (TYPE_MODE (vectype)))
10350 return costs->vls;
10351 return costs->vla;
10352 }
10353
10354 /* Implement targetm.vectorize.builtin_vectorization_cost. */
10355
10356 static int
10357 riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10358 tree vectype, int misalign ATTRIBUTE_UNUSED)
10359 {
10360 unsigned elements;
10361 const cpu_vector_cost *costs = tune_param->vec_costs;
10362 bool fp = false;
10363
10364 if (vectype != NULL)
10365 fp = FLOAT_TYPE_P (vectype);
10366
10367 if (costs != NULL)
10368 {
10369 const common_vector_cost *common_costs = get_common_costs (vectype);
10370 gcc_assert (common_costs != NULL);
10371 switch (type_of_cost)
10372 {
10373 case scalar_stmt:
10374 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
10375
10376 case scalar_load:
10377 return costs->scalar_load_cost;
10378
10379 case scalar_store:
10380 return costs->scalar_store_cost;
10381
10382 case vector_stmt:
10383 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
10384
10385 case vector_load:
10386 return common_costs->align_load_cost;
10387
10388 case vector_store:
10389 return common_costs->align_store_cost;
10390
10391 case vec_to_scalar:
10392 return common_costs->vec_to_scalar_cost;
10393
10394 case scalar_to_vec:
10395 return common_costs->scalar_to_vec_cost;
10396
10397 case unaligned_load:
10398 return common_costs->unalign_load_cost;
10399 case vector_gather_load:
10400 return common_costs->gather_load_cost;
10401
10402 case unaligned_store:
10403 return common_costs->unalign_store_cost;
10404 case vector_scatter_store:
10405 return common_costs->scatter_store_cost;
10406
10407 case cond_branch_taken:
10408 return costs->cond_taken_branch_cost;
10409
10410 case cond_branch_not_taken:
10411 return costs->cond_not_taken_branch_cost;
10412
10413 case vec_perm:
10414 return common_costs->permute_cost;
10415
10416 case vec_promote_demote:
10417 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
10418
10419 case vec_construct:
10420 elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
10421 return elements / 2 + 1;
10422
10423 default:
10424 gcc_unreachable ();
10425 }
10426 }
10427
10428 return default_builtin_vectorization_cost (type_of_cost, vectype, misalign);
10429 }
10430
10431 /* Implement targetm.vectorize.create_costs. */
10432
10433 static vector_costs *
10434 riscv_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
10435 {
10436 if (TARGET_VECTOR)
10437 return new riscv_vector::costs (vinfo, costing_for_scalar);
10438 /* Default vector costs. */
10439 return new vector_costs (vinfo, costing_for_scalar);
10440 }
10441
10442 /* Implement TARGET_PREFERRED_ELSE_VALUE. */
10443
10444 static tree
10445 riscv_preferred_else_value (unsigned ifn, tree vectype, unsigned int nops,
10446 tree *ops)
10447 {
10448 if (riscv_v_ext_mode_p (TYPE_MODE (vectype)))
10449 return get_or_create_ssa_default_def (cfun, create_tmp_var (vectype));
10450
10451 return default_preferred_else_value (ifn, vectype, nops, ops);
10452 }
10453
10454 /* If MEM is in the form of "base+offset", extract the two parts
10455 of address and set to BASE and OFFSET, otherwise return false
10456 after clearing BASE and OFFSET. */
10457
10458 bool
10459 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10460 {
10461 rtx addr;
10462
10463 gcc_assert (MEM_P (mem));
10464
10465 addr = XEXP (mem, 0);
10466
10467 if (REG_P (addr))
10468 {
10469 *base = addr;
10470 *offset = const0_rtx;
10471 return true;
10472 }
10473
10474 if (GET_CODE (addr) == PLUS
10475 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10476 {
10477 *base = XEXP (addr, 0);
10478 *offset = XEXP (addr, 1);
10479 return true;
10480 }
10481
10482 *base = NULL_RTX;
10483 *offset = NULL_RTX;
10484
10485 return false;
10486 }
10487
10488 /* Initialize the GCC target structure. */
10489 #undef TARGET_ASM_ALIGNED_HI_OP
10490 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
10491 #undef TARGET_ASM_ALIGNED_SI_OP
10492 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
10493 #undef TARGET_ASM_ALIGNED_DI_OP
10494 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
10495
10496 #undef TARGET_OPTION_OVERRIDE
10497 #define TARGET_OPTION_OVERRIDE riscv_option_override
10498
10499 #undef TARGET_OPTION_RESTORE
10500 #define TARGET_OPTION_RESTORE riscv_option_restore
10501
10502 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
10503 #define TARGET_OPTION_VALID_ATTRIBUTE_P riscv_option_valid_attribute_p
10504
10505 #undef TARGET_LEGITIMIZE_ADDRESS
10506 #define TARGET_LEGITIMIZE_ADDRESS riscv_legitimize_address
10507
10508 #undef TARGET_SCHED_ISSUE_RATE
10509 #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
10510 #undef TARGET_SCHED_MACRO_FUSION_P
10511 #define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
10512 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
10513 #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
10514
10515 #undef TARGET_SCHED_VARIABLE_ISSUE
10516 #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue
10517
10518 #undef TARGET_SCHED_ADJUST_COST
10519 #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
10520
10521 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
10522 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
10523
10524 #undef TARGET_SET_CURRENT_FUNCTION
10525 #define TARGET_SET_CURRENT_FUNCTION riscv_set_current_function
10526
10527 #undef TARGET_REGISTER_MOVE_COST
10528 #define TARGET_REGISTER_MOVE_COST riscv_register_move_cost
10529 #undef TARGET_MEMORY_MOVE_COST
10530 #define TARGET_MEMORY_MOVE_COST riscv_memory_move_cost
10531 #undef TARGET_RTX_COSTS
10532 #define TARGET_RTX_COSTS riscv_rtx_costs
10533 #undef TARGET_ADDRESS_COST
10534 #define TARGET_ADDRESS_COST riscv_address_cost
10535 #undef TARGET_INSN_COST
10536 #define TARGET_INSN_COST riscv_insn_cost
10537
10538 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
10539 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST riscv_max_noce_ifcvt_seq_cost
10540 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
10541 #define TARGET_NOCE_CONVERSION_PROFITABLE_P riscv_noce_conversion_profitable_p
10542
10543 #undef TARGET_ASM_FILE_START
10544 #define TARGET_ASM_FILE_START riscv_file_start
10545 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
10546 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
10547 #undef TARGET_ASM_FILE_END
10548 #define TARGET_ASM_FILE_END file_end_indicate_exec_stack
10549
10550 #undef TARGET_EXPAND_BUILTIN_VA_START
10551 #define TARGET_EXPAND_BUILTIN_VA_START riscv_va_start
10552
10553 #undef TARGET_PROMOTE_FUNCTION_MODE
10554 #define TARGET_PROMOTE_FUNCTION_MODE riscv_promote_function_mode
10555
10556 #undef TARGET_RETURN_IN_MEMORY
10557 #define TARGET_RETURN_IN_MEMORY riscv_return_in_memory
10558
10559 #undef TARGET_ASM_OUTPUT_MI_THUNK
10560 #define TARGET_ASM_OUTPUT_MI_THUNK riscv_output_mi_thunk
10561 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10562 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
10563
10564 #undef TARGET_PRINT_OPERAND
10565 #define TARGET_PRINT_OPERAND riscv_print_operand
10566 #undef TARGET_PRINT_OPERAND_ADDRESS
10567 #define TARGET_PRINT_OPERAND_ADDRESS riscv_print_operand_address
10568 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
10569 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P riscv_print_operand_punct_valid_p
10570
10571 #undef TARGET_SETUP_INCOMING_VARARGS
10572 #define TARGET_SETUP_INCOMING_VARARGS riscv_setup_incoming_varargs
10573 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
10574 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS riscv_allocate_stack_slots_for_args
10575 #undef TARGET_STRICT_ARGUMENT_NAMING
10576 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10577 #undef TARGET_MUST_PASS_IN_STACK
10578 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
10579 #undef TARGET_PASS_BY_REFERENCE
10580 #define TARGET_PASS_BY_REFERENCE riscv_pass_by_reference
10581 #undef TARGET_ARG_PARTIAL_BYTES
10582 #define TARGET_ARG_PARTIAL_BYTES riscv_arg_partial_bytes
10583 #undef TARGET_FUNCTION_ARG
10584 #define TARGET_FUNCTION_ARG riscv_function_arg
10585 #undef TARGET_FUNCTION_ARG_ADVANCE
10586 #define TARGET_FUNCTION_ARG_ADVANCE riscv_function_arg_advance
10587 #undef TARGET_FUNCTION_ARG_BOUNDARY
10588 #define TARGET_FUNCTION_ARG_BOUNDARY riscv_function_arg_boundary
10589 #undef TARGET_FNTYPE_ABI
10590 #define TARGET_FNTYPE_ABI riscv_fntype_abi
10591 #undef TARGET_INSN_CALLEE_ABI
10592 #define TARGET_INSN_CALLEE_ABI riscv_insn_callee_abi
10593
10594 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
10595 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
10596 riscv_get_separate_components
10597
10598 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
10599 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
10600 riscv_components_for_bb
10601
10602 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
10603 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
10604 riscv_disqualify_components
10605
10606 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
10607 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
10608 riscv_emit_prologue_components
10609
10610 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
10611 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
10612 riscv_emit_epilogue_components
10613
10614 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
10615 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
10616 riscv_set_handled_components
10617
10618 /* The generic ELF target does not always have TLS support. */
10619 #ifdef HAVE_AS_TLS
10620 #undef TARGET_HAVE_TLS
10621 #define TARGET_HAVE_TLS true
10622 #endif
10623
10624 #undef TARGET_CANNOT_FORCE_CONST_MEM
10625 #define TARGET_CANNOT_FORCE_CONST_MEM riscv_cannot_force_const_mem
10626
10627 #undef TARGET_LEGITIMATE_CONSTANT_P
10628 #define TARGET_LEGITIMATE_CONSTANT_P riscv_legitimate_constant_p
10629
10630 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10631 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P riscv_use_blocks_for_constant_p
10632
10633 #undef TARGET_LEGITIMATE_ADDRESS_P
10634 #define TARGET_LEGITIMATE_ADDRESS_P riscv_legitimate_address_p
10635
10636 #undef TARGET_CAN_ELIMINATE
10637 #define TARGET_CAN_ELIMINATE riscv_can_eliminate
10638
10639 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10640 #define TARGET_CONDITIONAL_REGISTER_USAGE riscv_conditional_register_usage
10641
10642 #undef TARGET_CLASS_MAX_NREGS
10643 #define TARGET_CLASS_MAX_NREGS riscv_class_max_nregs
10644
10645 #undef TARGET_TRAMPOLINE_INIT
10646 #define TARGET_TRAMPOLINE_INIT riscv_trampoline_init
10647
10648 #undef TARGET_IN_SMALL_DATA_P
10649 #define TARGET_IN_SMALL_DATA_P riscv_in_small_data_p
10650
10651 #undef TARGET_HAVE_SRODATA_SECTION
10652 #define TARGET_HAVE_SRODATA_SECTION true
10653
10654 #undef TARGET_ASM_SELECT_SECTION
10655 #define TARGET_ASM_SELECT_SECTION riscv_select_section
10656
10657 #undef TARGET_ASM_UNIQUE_SECTION
10658 #define TARGET_ASM_UNIQUE_SECTION riscv_unique_section
10659
10660 #undef TARGET_ASM_SELECT_RTX_SECTION
10661 #define TARGET_ASM_SELECT_RTX_SECTION riscv_elf_select_rtx_section
10662
10663 #undef TARGET_MIN_ANCHOR_OFFSET
10664 #define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2)
10665
10666 #undef TARGET_MAX_ANCHOR_OFFSET
10667 #define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1)
10668
10669 #undef TARGET_REGISTER_PRIORITY
10670 #define TARGET_REGISTER_PRIORITY riscv_register_priority
10671
10672 #undef TARGET_CANNOT_COPY_INSN_P
10673 #define TARGET_CANNOT_COPY_INSN_P riscv_cannot_copy_insn_p
10674
10675 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10676 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV riscv_atomic_assign_expand_fenv
10677
10678 #undef TARGET_INIT_BUILTINS
10679 #define TARGET_INIT_BUILTINS riscv_init_builtins
10680
10681 #undef TARGET_BUILTIN_DECL
10682 #define TARGET_BUILTIN_DECL riscv_builtin_decl
10683
10684 #undef TARGET_GIMPLE_FOLD_BUILTIN
10685 #define TARGET_GIMPLE_FOLD_BUILTIN riscv_gimple_fold_builtin
10686
10687 #undef TARGET_EXPAND_BUILTIN
10688 #define TARGET_EXPAND_BUILTIN riscv_expand_builtin
10689
10690 #undef TARGET_HARD_REGNO_NREGS
10691 #define TARGET_HARD_REGNO_NREGS riscv_hard_regno_nregs
10692 #undef TARGET_HARD_REGNO_MODE_OK
10693 #define TARGET_HARD_REGNO_MODE_OK riscv_hard_regno_mode_ok
10694
10695 #undef TARGET_MODES_TIEABLE_P
10696 #define TARGET_MODES_TIEABLE_P riscv_modes_tieable_p
10697
10698 #undef TARGET_SLOW_UNALIGNED_ACCESS
10699 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
10700
10701 #undef TARGET_SECONDARY_MEMORY_NEEDED
10702 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
10703
10704 #undef TARGET_CAN_CHANGE_MODE_CLASS
10705 #define TARGET_CAN_CHANGE_MODE_CLASS riscv_can_change_mode_class
10706
10707 #undef TARGET_CONSTANT_ALIGNMENT
10708 #define TARGET_CONSTANT_ALIGNMENT riscv_constant_alignment
10709
10710 #undef TARGET_MERGE_DECL_ATTRIBUTES
10711 #define TARGET_MERGE_DECL_ATTRIBUTES riscv_merge_decl_attributes
10712
10713 #undef TARGET_ATTRIBUTE_TABLE
10714 #define TARGET_ATTRIBUTE_TABLE riscv_attribute_table
10715
10716 #undef TARGET_WARN_FUNC_RETURN
10717 #define TARGET_WARN_FUNC_RETURN riscv_warn_func_return
10718
10719 /* The low bit is ignored by jump instructions so is safe to use. */
10720 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
10721 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
10722
10723 #undef TARGET_MACHINE_DEPENDENT_REORG
10724 #define TARGET_MACHINE_DEPENDENT_REORG riscv_reorg
10725
10726 #undef TARGET_NEW_ADDRESS_PROFITABLE_P
10727 #define TARGET_NEW_ADDRESS_PROFITABLE_P riscv_new_address_profitable_p
10728
10729 #undef TARGET_MANGLE_TYPE
10730 #define TARGET_MANGLE_TYPE riscv_mangle_type
10731
10732 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10733 #define TARGET_SCALAR_MODE_SUPPORTED_P riscv_scalar_mode_supported_p
10734
10735 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
10736 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
10737 riscv_libgcc_floating_mode_supported_p
10738
10739 #undef TARGET_INIT_LIBFUNCS
10740 #define TARGET_INIT_LIBFUNCS riscv_init_libfuncs
10741
10742 #undef TARGET_C_EXCESS_PRECISION
10743 #define TARGET_C_EXCESS_PRECISION riscv_excess_precision
10744
10745 #undef TARGET_FLOATN_MODE
10746 #define TARGET_FLOATN_MODE riscv_floatn_mode
10747
10748 #undef TARGET_ASAN_SHADOW_OFFSET
10749 #define TARGET_ASAN_SHADOW_OFFSET riscv_asan_shadow_offset
10750
10751 #ifdef TARGET_BIG_ENDIAN_DEFAULT
10752 #undef TARGET_DEFAULT_TARGET_FLAGS
10753 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_ENDIAN)
10754 #endif
10755
10756 #undef TARGET_VECTOR_MODE_SUPPORTED_P
10757 #define TARGET_VECTOR_MODE_SUPPORTED_P riscv_vector_mode_supported_p
10758
10759 #undef TARGET_VERIFY_TYPE_CONTEXT
10760 #define TARGET_VERIFY_TYPE_CONTEXT riscv_verify_type_context
10761
10762 #undef TARGET_ESTIMATED_POLY_VALUE
10763 #define TARGET_ESTIMATED_POLY_VALUE riscv_estimated_poly_value
10764
10765 #undef TARGET_VECTORIZE_GET_MASK_MODE
10766 #define TARGET_VECTORIZE_GET_MASK_MODE riscv_get_mask_mode
10767
10768 #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
10769 #define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE riscv_empty_mask_is_expensive
10770
10771 #undef TARGET_VECTOR_ALIGNMENT
10772 #define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment
10773
10774 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
10775 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT riscv_support_vector_misalignment
10776
10777 #undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
10778 #define TARGET_DWARF_POLY_INDETERMINATE_VALUE riscv_dwarf_poly_indeterminate_value
10779
10780 #undef TARGET_ZERO_CALL_USED_REGS
10781 #define TARGET_ZERO_CALL_USED_REGS riscv_zero_call_used_regs
10782
10783 #undef TARGET_ARRAY_MODE
10784 #define TARGET_ARRAY_MODE riscv_array_mode
10785
10786 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
10787 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
10788
10789 #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
10790 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
10791 riscv_vectorize_preferred_vector_alignment
10792
10793 /* Mode switching hooks. */
10794
10795 #undef TARGET_MODE_EMIT
10796 #define TARGET_MODE_EMIT riscv_emit_mode_set
10797 #undef TARGET_MODE_NEEDED
10798 #define TARGET_MODE_NEEDED riscv_mode_needed
10799 #undef TARGET_MODE_AFTER
10800 #define TARGET_MODE_AFTER riscv_mode_after
10801 #undef TARGET_MODE_ENTRY
10802 #define TARGET_MODE_ENTRY riscv_mode_entry
10803 #undef TARGET_MODE_EXIT
10804 #define TARGET_MODE_EXIT riscv_mode_exit
10805 #undef TARGET_MODE_PRIORITY
10806 #define TARGET_MODE_PRIORITY riscv_mode_priority
10807
10808 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
10809 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
10810 riscv_autovectorize_vector_modes
10811
10812 #undef TARGET_VECTORIZE_RELATED_MODE
10813 #define TARGET_VECTORIZE_RELATED_MODE riscv_vectorize_related_mode
10814
10815 #undef TARGET_VECTORIZE_VEC_PERM_CONST
10816 #define TARGET_VECTORIZE_VEC_PERM_CONST riscv_vectorize_vec_perm_const
10817
10818 #undef TARGET_FRAME_POINTER_REQUIRED
10819 #define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required
10820
10821 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
10822 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
10823 riscv_builtin_vectorization_cost
10824
10825 #undef TARGET_VECTORIZE_CREATE_COSTS
10826 #define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs
10827
10828 #undef TARGET_PREFERRED_ELSE_VALUE
10829 #define TARGET_PREFERRED_ELSE_VALUE riscv_preferred_else_value
10830
10831 struct gcc_target targetm = TARGET_INITIALIZER;
10832
10833 #include "gt-riscv.h"