]>
Commit | Line | Data |
---|---|---|
09cae750 PD |
1 | /* Subroutines used for code generation for RISC-V. |
2 | Copyright (C) 2011-2017 Free Software Foundation, Inc. | |
3 | Contributed by Andrew Waterman (andrew@sifive.com). | |
4 | Based on MIPS target for GNU compiler. | |
5 | ||
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 3, or (at your option) | |
11 | any later version. | |
12 | ||
13 | GCC is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with GCC; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "coretypes.h" | |
25 | #include "tm.h" | |
26 | #include "rtl.h" | |
27 | #include "regs.h" | |
09cae750 | 28 | #include "insn-config.h" |
09cae750 PD |
29 | #include "insn-attr.h" |
30 | #include "recog.h" | |
31 | #include "output.h" | |
09cae750 | 32 | #include "alias.h" |
09cae750 | 33 | #include "tree.h" |
314e6352 ML |
34 | #include "stringpool.h" |
35 | #include "attribs.h" | |
09cae750 | 36 | #include "varasm.h" |
09cae750 PD |
37 | #include "stor-layout.h" |
38 | #include "calls.h" | |
39 | #include "function.h" | |
09cae750 PD |
40 | #include "explow.h" |
41 | #include "memmodel.h" | |
42 | #include "emit-rtl.h" | |
09cae750 PD |
43 | #include "reload.h" |
44 | #include "tm_p.h" | |
09cae750 PD |
45 | #include "target.h" |
46 | #include "target-def.h" | |
09cae750 | 47 | #include "basic-block.h" |
1b68a156 JL |
48 | #include "expr.h" |
49 | #include "optabs.h" | |
09cae750 | 50 | #include "bitmap.h" |
09cae750 | 51 | #include "df.h" |
09cae750 | 52 | #include "diagnostic.h" |
09cae750 | 53 | #include "builtins.h" |
fb5621b1 | 54 | #include "predict.h" |
09cae750 PD |
55 | |
56 | /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */ | |
57 | #define UNSPEC_ADDRESS_P(X) \ | |
58 | (GET_CODE (X) == UNSPEC \ | |
59 | && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \ | |
60 | && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES) | |
61 | ||
62 | /* Extract the symbol or label from UNSPEC wrapper X. */ | |
63 | #define UNSPEC_ADDRESS(X) \ | |
64 | XVECEXP (X, 0, 0) | |
65 | ||
66 | /* Extract the symbol type from UNSPEC wrapper X. */ | |
67 | #define UNSPEC_ADDRESS_TYPE(X) \ | |
68 | ((enum riscv_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST)) | |
69 | ||
70 | /* True if bit BIT is set in VALUE. */ | |
71 | #define BITSET_P(VALUE, BIT) (((VALUE) & (1ULL << (BIT))) != 0) | |
72 | ||
73 | /* Classifies an address. | |
74 | ||
75 | ADDRESS_REG | |
76 | A natural register + offset address. The register satisfies | |
77 | riscv_valid_base_register_p and the offset is a const_arith_operand. | |
78 | ||
79 | ADDRESS_LO_SUM | |
80 | A LO_SUM rtx. The first operand is a valid base register and | |
81 | the second operand is a symbolic address. | |
82 | ||
83 | ADDRESS_CONST_INT | |
84 | A signed 16-bit constant address. | |
85 | ||
86 | ADDRESS_SYMBOLIC: | |
87 | A constant symbolic address. */ | |
88 | enum riscv_address_type { | |
89 | ADDRESS_REG, | |
90 | ADDRESS_LO_SUM, | |
91 | ADDRESS_CONST_INT, | |
92 | ADDRESS_SYMBOLIC | |
93 | }; | |
94 | ||
95 | /* Information about a function's frame layout. */ | |
96 | struct GTY(()) riscv_frame_info { | |
97 | /* The size of the frame in bytes. */ | |
98 | HOST_WIDE_INT total_size; | |
99 | ||
100 | /* Bit X is set if the function saves or restores GPR X. */ | |
101 | unsigned int mask; | |
102 | ||
103 | /* Likewise FPR X. */ | |
104 | unsigned int fmask; | |
105 | ||
106 | /* How much the GPR save/restore routines adjust sp (or 0 if unused). */ | |
107 | unsigned save_libcall_adjustment; | |
108 | ||
109 | /* Offsets of fixed-point and floating-point save areas from frame bottom */ | |
110 | HOST_WIDE_INT gp_sp_offset; | |
111 | HOST_WIDE_INT fp_sp_offset; | |
112 | ||
113 | /* Offset of virtual frame pointer from stack pointer/frame bottom */ | |
114 | HOST_WIDE_INT frame_pointer_offset; | |
115 | ||
116 | /* Offset of hard frame pointer from stack pointer/frame bottom */ | |
117 | HOST_WIDE_INT hard_frame_pointer_offset; | |
118 | ||
119 | /* The offset of arg_pointer_rtx from the bottom of the frame. */ | |
120 | HOST_WIDE_INT arg_pointer_offset; | |
121 | }; | |
122 | ||
123 | struct GTY(()) machine_function { | |
124 | /* The number of extra stack bytes taken up by register varargs. | |
125 | This area is allocated by the callee at the very top of the frame. */ | |
126 | int varargs_size; | |
127 | ||
128 | /* Memoized return value of leaf_function_p. <0 if false, >0 if true. */ | |
129 | int is_leaf; | |
130 | ||
131 | /* The current frame information, calculated by riscv_compute_frame_info. */ | |
132 | struct riscv_frame_info frame; | |
133 | }; | |
134 | ||
135 | /* Information about a single argument. */ | |
136 | struct riscv_arg_info { | |
137 | /* True if the argument is at least partially passed on the stack. */ | |
138 | bool stack_p; | |
139 | ||
140 | /* The number of integer registers allocated to this argument. */ | |
141 | unsigned int num_gprs; | |
142 | ||
143 | /* The offset of the first register used, provided num_gprs is nonzero. | |
144 | If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */ | |
145 | unsigned int gpr_offset; | |
146 | ||
147 | /* The number of floating-point registers allocated to this argument. */ | |
148 | unsigned int num_fprs; | |
149 | ||
150 | /* The offset of the first register used, provided num_fprs is nonzero. */ | |
151 | unsigned int fpr_offset; | |
152 | }; | |
153 | ||
154 | /* Information about an address described by riscv_address_type. | |
155 | ||
156 | ADDRESS_CONST_INT | |
157 | No fields are used. | |
158 | ||
159 | ADDRESS_REG | |
160 | REG is the base register and OFFSET is the constant offset. | |
161 | ||
162 | ADDRESS_LO_SUM | |
163 | REG and OFFSET are the operands to the LO_SUM and SYMBOL_TYPE | |
164 | is the type of symbol it references. | |
165 | ||
166 | ADDRESS_SYMBOLIC | |
167 | SYMBOL_TYPE is the type of symbol that the address references. */ | |
168 | struct riscv_address_info { | |
169 | enum riscv_address_type type; | |
170 | rtx reg; | |
171 | rtx offset; | |
172 | enum riscv_symbol_type symbol_type; | |
173 | }; | |
174 | ||
175 | /* One stage in a constant building sequence. These sequences have | |
176 | the form: | |
177 | ||
178 | A = VALUE[0] | |
179 | A = A CODE[1] VALUE[1] | |
180 | A = A CODE[2] VALUE[2] | |
181 | ... | |
182 | ||
183 | where A is an accumulator, each CODE[i] is a binary rtl operation | |
184 | and each VALUE[i] is a constant integer. CODE[0] is undefined. */ | |
185 | struct riscv_integer_op { | |
186 | enum rtx_code code; | |
187 | unsigned HOST_WIDE_INT value; | |
188 | }; | |
189 | ||
190 | /* The largest number of operations needed to load an integer constant. | |
191 | The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI. */ | |
192 | #define RISCV_MAX_INTEGER_OPS 8 | |
193 | ||
194 | /* Costs of various operations on the different architectures. */ | |
195 | ||
196 | struct riscv_tune_info | |
197 | { | |
198 | unsigned short fp_add[2]; | |
199 | unsigned short fp_mul[2]; | |
200 | unsigned short fp_div[2]; | |
201 | unsigned short int_mul[2]; | |
202 | unsigned short int_div[2]; | |
203 | unsigned short issue_rate; | |
204 | unsigned short branch_cost; | |
205 | unsigned short memory_cost; | |
82285692 | 206 | bool slow_unaligned_access; |
09cae750 PD |
207 | }; |
208 | ||
209 | /* Information about one CPU we know about. */ | |
210 | struct riscv_cpu_info { | |
211 | /* This CPU's canonical name. */ | |
212 | const char *name; | |
213 | ||
214 | /* Tuning parameters for this CPU. */ | |
215 | const struct riscv_tune_info *tune_info; | |
216 | }; | |
217 | ||
218 | /* Global variables for machine-dependent things. */ | |
219 | ||
82285692 | 220 | /* Whether unaligned accesses execute very slowly. */ |
fb5621b1 | 221 | bool riscv_slow_unaligned_access_p; |
82285692 | 222 | |
09cae750 PD |
223 | /* Which tuning parameters to use. */ |
224 | static const struct riscv_tune_info *tune_info; | |
225 | ||
226 | /* Index R is the smallest register class that contains register R. */ | |
227 | const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = { | |
228 | GR_REGS, GR_REGS, GR_REGS, GR_REGS, | |
229 | GR_REGS, GR_REGS, SIBCALL_REGS, SIBCALL_REGS, | |
230 | JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, | |
231 | JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, | |
232 | JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, | |
233 | JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, | |
234 | JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, | |
235 | SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, | |
236 | FP_REGS, FP_REGS, FP_REGS, FP_REGS, | |
237 | FP_REGS, FP_REGS, FP_REGS, FP_REGS, | |
238 | FP_REGS, FP_REGS, FP_REGS, FP_REGS, | |
239 | FP_REGS, FP_REGS, FP_REGS, FP_REGS, | |
240 | FP_REGS, FP_REGS, FP_REGS, FP_REGS, | |
241 | FP_REGS, FP_REGS, FP_REGS, FP_REGS, | |
242 | FP_REGS, FP_REGS, FP_REGS, FP_REGS, | |
243 | FP_REGS, FP_REGS, FP_REGS, FP_REGS, | |
244 | FRAME_REGS, FRAME_REGS, | |
245 | }; | |
246 | ||
247 | /* Costs to use when optimizing for rocket. */ | |
248 | static const struct riscv_tune_info rocket_tune_info = { | |
249 | {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ | |
250 | {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ | |
251 | {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ | |
252 | {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */ | |
253 | {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */ | |
254 | 1, /* issue_rate */ | |
255 | 3, /* branch_cost */ | |
82285692 AW |
256 | 5, /* memory_cost */ |
257 | true, /* slow_unaligned_access */ | |
09cae750 PD |
258 | }; |
259 | ||
260 | /* Costs to use when optimizing for size. */ | |
261 | static const struct riscv_tune_info optimize_size_tune_info = { | |
262 | {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */ | |
263 | {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_mul */ | |
264 | {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_div */ | |
265 | {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_mul */ | |
266 | {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_div */ | |
267 | 1, /* issue_rate */ | |
268 | 1, /* branch_cost */ | |
82285692 AW |
269 | 2, /* memory_cost */ |
270 | false, /* slow_unaligned_access */ | |
09cae750 PD |
271 | }; |
272 | ||
273 | /* A table describing all the processors GCC knows about. */ | |
274 | static const struct riscv_cpu_info riscv_cpu_info_table[] = { | |
275 | { "rocket", &rocket_tune_info }, | |
82285692 | 276 | { "size", &optimize_size_tune_info }, |
09cae750 PD |
277 | }; |
278 | ||
279 | /* Return the riscv_cpu_info entry for the given name string. */ | |
280 | ||
281 | static const struct riscv_cpu_info * | |
282 | riscv_parse_cpu (const char *cpu_string) | |
283 | { | |
284 | for (unsigned i = 0; i < ARRAY_SIZE (riscv_cpu_info_table); i++) | |
285 | if (strcmp (riscv_cpu_info_table[i].name, cpu_string) == 0) | |
286 | return riscv_cpu_info_table + i; | |
287 | ||
288 | error ("unknown cpu %qs for -mtune", cpu_string); | |
289 | return riscv_cpu_info_table; | |
290 | } | |
291 | ||
292 | /* Helper function for riscv_build_integer; arguments are as for | |
293 | riscv_build_integer. */ | |
294 | ||
295 | static int | |
296 | riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS], | |
b8506a8a | 297 | HOST_WIDE_INT value, machine_mode mode) |
09cae750 PD |
298 | { |
299 | HOST_WIDE_INT low_part = CONST_LOW_PART (value); | |
300 | int cost = RISCV_MAX_INTEGER_OPS + 1, alt_cost; | |
301 | struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS]; | |
302 | ||
303 | if (SMALL_OPERAND (value) || LUI_OPERAND (value)) | |
304 | { | |
305 | /* Simply ADDI or LUI. */ | |
306 | codes[0].code = UNKNOWN; | |
307 | codes[0].value = value; | |
308 | return 1; | |
309 | } | |
310 | ||
311 | /* End with ADDI. When constructing HImode constants, do not generate any | |
312 | intermediate value that is not itself a valid HImode constant. The | |
313 | XORI case below will handle those remaining HImode constants. */ | |
01726bc9 KT |
314 | if (low_part != 0 |
315 | && (mode != HImode | |
316 | || value - low_part <= ((1 << (GET_MODE_BITSIZE (HImode) - 1)) - 1))) | |
09cae750 PD |
317 | { |
318 | alt_cost = 1 + riscv_build_integer_1 (alt_codes, value - low_part, mode); | |
319 | if (alt_cost < cost) | |
320 | { | |
321 | alt_codes[alt_cost-1].code = PLUS; | |
322 | alt_codes[alt_cost-1].value = low_part; | |
323 | memcpy (codes, alt_codes, sizeof (alt_codes)); | |
324 | cost = alt_cost; | |
325 | } | |
326 | } | |
327 | ||
328 | /* End with XORI. */ | |
329 | if (cost > 2 && (low_part < 0 || mode == HImode)) | |
330 | { | |
331 | alt_cost = 1 + riscv_build_integer_1 (alt_codes, value ^ low_part, mode); | |
332 | if (alt_cost < cost) | |
333 | { | |
334 | alt_codes[alt_cost-1].code = XOR; | |
335 | alt_codes[alt_cost-1].value = low_part; | |
336 | memcpy (codes, alt_codes, sizeof (alt_codes)); | |
337 | cost = alt_cost; | |
338 | } | |
339 | } | |
340 | ||
341 | /* Eliminate trailing zeros and end with SLLI. */ | |
342 | if (cost > 2 && (value & 1) == 0) | |
343 | { | |
344 | int shift = ctz_hwi (value); | |
345 | unsigned HOST_WIDE_INT x = value; | |
346 | x = sext_hwi (x >> shift, HOST_BITS_PER_WIDE_INT - shift); | |
347 | ||
348 | /* Don't eliminate the lower 12 bits if LUI might apply. */ | |
349 | if (shift > IMM_BITS && !SMALL_OPERAND (x) && LUI_OPERAND (x << IMM_BITS)) | |
350 | shift -= IMM_BITS, x <<= IMM_BITS; | |
351 | ||
352 | alt_cost = 1 + riscv_build_integer_1 (alt_codes, x, mode); | |
353 | if (alt_cost < cost) | |
354 | { | |
355 | alt_codes[alt_cost-1].code = ASHIFT; | |
356 | alt_codes[alt_cost-1].value = shift; | |
357 | memcpy (codes, alt_codes, sizeof (alt_codes)); | |
358 | cost = alt_cost; | |
359 | } | |
360 | } | |
361 | ||
362 | gcc_assert (cost <= RISCV_MAX_INTEGER_OPS); | |
363 | return cost; | |
364 | } | |
365 | ||
366 | /* Fill CODES with a sequence of rtl operations to load VALUE. | |
367 | Return the number of operations needed. */ | |
368 | ||
369 | static int | |
370 | riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value, | |
b8506a8a | 371 | machine_mode mode) |
09cae750 PD |
372 | { |
373 | int cost = riscv_build_integer_1 (codes, value, mode); | |
374 | ||
375 | /* Eliminate leading zeros and end with SRLI. */ | |
376 | if (value > 0 && cost > 2) | |
377 | { | |
378 | struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS]; | |
379 | int alt_cost, shift = clz_hwi (value); | |
380 | HOST_WIDE_INT shifted_val; | |
381 | ||
382 | /* Try filling trailing bits with 1s. */ | |
383 | shifted_val = (value << shift) | ((((HOST_WIDE_INT) 1) << shift) - 1); | |
384 | alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode); | |
385 | if (alt_cost < cost) | |
386 | { | |
387 | alt_codes[alt_cost-1].code = LSHIFTRT; | |
388 | alt_codes[alt_cost-1].value = shift; | |
389 | memcpy (codes, alt_codes, sizeof (alt_codes)); | |
390 | cost = alt_cost; | |
391 | } | |
392 | ||
393 | /* Try filling trailing bits with 0s. */ | |
394 | shifted_val = value << shift; | |
395 | alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode); | |
396 | if (alt_cost < cost) | |
397 | { | |
398 | alt_codes[alt_cost-1].code = LSHIFTRT; | |
399 | alt_codes[alt_cost-1].value = shift; | |
400 | memcpy (codes, alt_codes, sizeof (alt_codes)); | |
401 | cost = alt_cost; | |
402 | } | |
403 | } | |
404 | ||
405 | return cost; | |
406 | } | |
407 | ||
408 | /* Return the cost of constructing VAL in the event that a scratch | |
409 | register is available. */ | |
410 | ||
411 | static int | |
412 | riscv_split_integer_cost (HOST_WIDE_INT val) | |
413 | { | |
414 | int cost; | |
415 | unsigned HOST_WIDE_INT loval = sext_hwi (val, 32); | |
416 | unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32); | |
417 | struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS]; | |
418 | ||
419 | cost = 2 + riscv_build_integer (codes, loval, VOIDmode); | |
420 | if (loval != hival) | |
421 | cost += riscv_build_integer (codes, hival, VOIDmode); | |
422 | ||
423 | return cost; | |
424 | } | |
425 | ||
426 | /* Return the cost of constructing the integer constant VAL. */ | |
427 | ||
428 | static int | |
429 | riscv_integer_cost (HOST_WIDE_INT val) | |
430 | { | |
431 | struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS]; | |
432 | return MIN (riscv_build_integer (codes, val, VOIDmode), | |
433 | riscv_split_integer_cost (val)); | |
434 | } | |
435 | ||
436 | /* Try to split a 64b integer into 32b parts, then reassemble. */ | |
437 | ||
438 | static rtx | |
b8506a8a | 439 | riscv_split_integer (HOST_WIDE_INT val, machine_mode mode) |
09cae750 PD |
440 | { |
441 | unsigned HOST_WIDE_INT loval = sext_hwi (val, 32); | |
442 | unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32); | |
443 | rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode); | |
444 | ||
445 | riscv_move_integer (hi, hi, hival); | |
446 | riscv_move_integer (lo, lo, loval); | |
447 | ||
448 | hi = gen_rtx_fmt_ee (ASHIFT, mode, hi, GEN_INT (32)); | |
449 | hi = force_reg (mode, hi); | |
450 | ||
451 | return gen_rtx_fmt_ee (PLUS, mode, hi, lo); | |
452 | } | |
453 | ||
454 | /* Return true if X is a thread-local symbol. */ | |
455 | ||
456 | static bool | |
457 | riscv_tls_symbol_p (const_rtx x) | |
458 | { | |
459 | return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0; | |
460 | } | |
461 | ||
462 | /* Return true if symbol X binds locally. */ | |
463 | ||
464 | static bool | |
465 | riscv_symbol_binds_local_p (const_rtx x) | |
466 | { | |
467 | if (SYMBOL_REF_P (x)) | |
468 | return (SYMBOL_REF_DECL (x) | |
469 | ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) | |
470 | : SYMBOL_REF_LOCAL_P (x)); | |
471 | else | |
472 | return false; | |
473 | } | |
474 | ||
475 | /* Return the method that should be used to access SYMBOL_REF or | |
476 | LABEL_REF X. */ | |
477 | ||
478 | static enum riscv_symbol_type | |
479 | riscv_classify_symbol (const_rtx x) | |
480 | { | |
481 | if (riscv_tls_symbol_p (x)) | |
482 | return SYMBOL_TLS; | |
483 | ||
484 | if (GET_CODE (x) == SYMBOL_REF && flag_pic && !riscv_symbol_binds_local_p (x)) | |
485 | return SYMBOL_GOT_DISP; | |
486 | ||
487 | return riscv_cmodel == CM_MEDLOW ? SYMBOL_ABSOLUTE : SYMBOL_PCREL; | |
488 | } | |
489 | ||
490 | /* Classify the base of symbolic expression X. */ | |
491 | ||
492 | enum riscv_symbol_type | |
493 | riscv_classify_symbolic_expression (rtx x) | |
494 | { | |
495 | rtx offset; | |
496 | ||
497 | split_const (x, &x, &offset); | |
498 | if (UNSPEC_ADDRESS_P (x)) | |
499 | return UNSPEC_ADDRESS_TYPE (x); | |
500 | ||
501 | return riscv_classify_symbol (x); | |
502 | } | |
503 | ||
504 | /* Return true if X is a symbolic constant. If it is, store the type of | |
505 | the symbol in *SYMBOL_TYPE. */ | |
506 | ||
507 | bool | |
508 | riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type) | |
509 | { | |
510 | rtx offset; | |
511 | ||
512 | split_const (x, &x, &offset); | |
513 | if (UNSPEC_ADDRESS_P (x)) | |
514 | { | |
515 | *symbol_type = UNSPEC_ADDRESS_TYPE (x); | |
516 | x = UNSPEC_ADDRESS (x); | |
517 | } | |
518 | else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF) | |
519 | *symbol_type = riscv_classify_symbol (x); | |
520 | else | |
521 | return false; | |
522 | ||
523 | if (offset == const0_rtx) | |
524 | return true; | |
525 | ||
526 | /* Nonzero offsets are only valid for references that don't use the GOT. */ | |
527 | switch (*symbol_type) | |
528 | { | |
529 | case SYMBOL_ABSOLUTE: | |
530 | case SYMBOL_PCREL: | |
531 | case SYMBOL_TLS_LE: | |
532 | /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */ | |
533 | return sext_hwi (INTVAL (offset), 32) == INTVAL (offset); | |
534 | ||
535 | default: | |
536 | return false; | |
537 | } | |
538 | } | |
539 | ||
540 | /* Returns the number of instructions necessary to reference a symbol. */ | |
541 | ||
542 | static int riscv_symbol_insns (enum riscv_symbol_type type) | |
543 | { | |
544 | switch (type) | |
545 | { | |
546 | case SYMBOL_TLS: return 0; /* Depends on the TLS model. */ | |
547 | case SYMBOL_ABSOLUTE: return 2; /* LUI + the reference. */ | |
548 | case SYMBOL_PCREL: return 2; /* AUIPC + the reference. */ | |
549 | case SYMBOL_TLS_LE: return 3; /* LUI + ADD TP + the reference. */ | |
550 | case SYMBOL_GOT_DISP: return 3; /* AUIPC + LD GOT + the reference. */ | |
551 | default: gcc_unreachable (); | |
552 | } | |
553 | } | |
554 | ||
555 | /* Implement TARGET_LEGITIMATE_CONSTANT_P. */ | |
556 | ||
557 | static bool | |
b8506a8a | 558 | riscv_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) |
09cae750 PD |
559 | { |
560 | return riscv_const_insns (x) > 0; | |
561 | } | |
562 | ||
563 | /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ | |
564 | ||
565 | static bool | |
b8506a8a | 566 | riscv_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) |
09cae750 PD |
567 | { |
568 | enum riscv_symbol_type type; | |
569 | rtx base, offset; | |
570 | ||
571 | /* There is no assembler syntax for expressing an address-sized | |
572 | high part. */ | |
573 | if (GET_CODE (x) == HIGH) | |
574 | return true; | |
575 | ||
576 | split_const (x, &base, &offset); | |
577 | if (riscv_symbolic_constant_p (base, &type)) | |
578 | { | |
579 | /* As an optimization, don't spill symbolic constants that are as | |
580 | cheap to rematerialize as to access in the constant pool. */ | |
581 | if (SMALL_OPERAND (INTVAL (offset)) && riscv_symbol_insns (type) > 0) | |
582 | return true; | |
583 | ||
584 | /* As an optimization, avoid needlessly generate dynamic relocations. */ | |
585 | if (flag_pic) | |
586 | return true; | |
587 | } | |
588 | ||
589 | /* TLS symbols must be computed by riscv_legitimize_move. */ | |
590 | if (tls_referenced_p (x)) | |
591 | return true; | |
592 | ||
593 | return false; | |
594 | } | |
595 | ||
596 | /* Return true if register REGNO is a valid base register for mode MODE. | |
597 | STRICT_P is true if REG_OK_STRICT is in effect. */ | |
598 | ||
599 | int | |
600 | riscv_regno_mode_ok_for_base_p (int regno, | |
b8506a8a | 601 | machine_mode mode ATTRIBUTE_UNUSED, |
09cae750 PD |
602 | bool strict_p) |
603 | { | |
604 | if (!HARD_REGISTER_NUM_P (regno)) | |
605 | { | |
606 | if (!strict_p) | |
607 | return true; | |
608 | regno = reg_renumber[regno]; | |
609 | } | |
610 | ||
611 | /* These fake registers will be eliminated to either the stack or | |
612 | hard frame pointer, both of which are usually valid base registers. | |
613 | Reload deals with the cases where the eliminated form isn't valid. */ | |
614 | if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM) | |
615 | return true; | |
616 | ||
617 | return GP_REG_P (regno); | |
618 | } | |
619 | ||
620 | /* Return true if X is a valid base register for mode MODE. | |
621 | STRICT_P is true if REG_OK_STRICT is in effect. */ | |
622 | ||
623 | static bool | |
b8506a8a | 624 | riscv_valid_base_register_p (rtx x, machine_mode mode, bool strict_p) |
09cae750 PD |
625 | { |
626 | if (!strict_p && GET_CODE (x) == SUBREG) | |
627 | x = SUBREG_REG (x); | |
628 | ||
629 | return (REG_P (x) | |
630 | && riscv_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p)); | |
631 | } | |
632 | ||
633 | /* Return true if, for every base register BASE_REG, (plus BASE_REG X) | |
634 | can address a value of mode MODE. */ | |
635 | ||
636 | static bool | |
b8506a8a | 637 | riscv_valid_offset_p (rtx x, machine_mode mode) |
09cae750 PD |
638 | { |
639 | /* Check that X is a signed 12-bit number. */ | |
640 | if (!const_arith_operand (x, Pmode)) | |
641 | return false; | |
642 | ||
643 | /* We may need to split multiword moves, so make sure that every word | |
644 | is accessible. */ | |
645 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD | |
646 | && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD)) | |
647 | return false; | |
648 | ||
649 | return true; | |
650 | } | |
651 | ||
652 | /* Should a symbol of type SYMBOL_TYPE should be split in two? */ | |
653 | ||
654 | bool | |
655 | riscv_split_symbol_type (enum riscv_symbol_type symbol_type) | |
656 | { | |
657 | if (symbol_type == SYMBOL_TLS_LE) | |
658 | return true; | |
659 | ||
660 | if (!TARGET_EXPLICIT_RELOCS) | |
661 | return false; | |
662 | ||
663 | return symbol_type == SYMBOL_ABSOLUTE || symbol_type == SYMBOL_PCREL; | |
664 | } | |
665 | ||
666 | /* Return true if a LO_SUM can address a value of mode MODE when the | |
667 | LO_SUM symbol has type SYM_TYPE. */ | |
668 | ||
669 | static bool | |
b8506a8a | 670 | riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, machine_mode mode) |
09cae750 PD |
671 | { |
672 | /* Check that symbols of type SYMBOL_TYPE can be used to access values | |
673 | of mode MODE. */ | |
674 | if (riscv_symbol_insns (sym_type) == 0) | |
675 | return false; | |
676 | ||
677 | /* Check that there is a known low-part relocation. */ | |
678 | if (!riscv_split_symbol_type (sym_type)) | |
679 | return false; | |
680 | ||
681 | /* We may need to split multiword moves, so make sure that each word | |
682 | can be accessed without inducing a carry. */ | |
683 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD | |
82285692 AW |
684 | && (!TARGET_STRICT_ALIGN |
685 | || GET_MODE_BITSIZE (mode) > GET_MODE_ALIGNMENT (mode))) | |
09cae750 PD |
686 | return false; |
687 | ||
688 | return true; | |
689 | } | |
690 | ||
691 | /* Return true if X is a valid address for machine mode MODE. If it is, | |
692 | fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in | |
693 | effect. */ | |
694 | ||
695 | static bool | |
696 | riscv_classify_address (struct riscv_address_info *info, rtx x, | |
b8506a8a | 697 | machine_mode mode, bool strict_p) |
09cae750 PD |
698 | { |
699 | switch (GET_CODE (x)) | |
700 | { | |
701 | case REG: | |
702 | case SUBREG: | |
703 | info->type = ADDRESS_REG; | |
704 | info->reg = x; | |
705 | info->offset = const0_rtx; | |
706 | return riscv_valid_base_register_p (info->reg, mode, strict_p); | |
707 | ||
708 | case PLUS: | |
709 | info->type = ADDRESS_REG; | |
710 | info->reg = XEXP (x, 0); | |
711 | info->offset = XEXP (x, 1); | |
712 | return (riscv_valid_base_register_p (info->reg, mode, strict_p) | |
713 | && riscv_valid_offset_p (info->offset, mode)); | |
714 | ||
715 | case LO_SUM: | |
716 | info->type = ADDRESS_LO_SUM; | |
717 | info->reg = XEXP (x, 0); | |
718 | info->offset = XEXP (x, 1); | |
719 | /* We have to trust the creator of the LO_SUM to do something vaguely | |
720 | sane. Target-independent code that creates a LO_SUM should also | |
721 | create and verify the matching HIGH. Target-independent code that | |
722 | adds an offset to a LO_SUM must prove that the offset will not | |
723 | induce a carry. Failure to do either of these things would be | |
724 | a bug, and we are not required to check for it here. The RISC-V | |
725 | backend itself should only create LO_SUMs for valid symbolic | |
726 | constants, with the high part being either a HIGH or a copy | |
727 | of _gp. */ | |
728 | info->symbol_type | |
729 | = riscv_classify_symbolic_expression (info->offset); | |
730 | return (riscv_valid_base_register_p (info->reg, mode, strict_p) | |
731 | && riscv_valid_lo_sum_p (info->symbol_type, mode)); | |
732 | ||
733 | case CONST_INT: | |
734 | /* Small-integer addresses don't occur very often, but they | |
735 | are legitimate if x0 is a valid base register. */ | |
736 | info->type = ADDRESS_CONST_INT; | |
737 | return SMALL_OPERAND (INTVAL (x)); | |
738 | ||
739 | default: | |
740 | return false; | |
741 | } | |
742 | } | |
743 | ||
744 | /* Implement TARGET_LEGITIMATE_ADDRESS_P. */ | |
745 | ||
746 | static bool | |
b8506a8a | 747 | riscv_legitimate_address_p (machine_mode mode, rtx x, bool strict_p) |
09cae750 PD |
748 | { |
749 | struct riscv_address_info addr; | |
750 | ||
751 | return riscv_classify_address (&addr, x, mode, strict_p); | |
752 | } | |
753 | ||
754 | /* Return the number of instructions needed to load or store a value | |
755 | of mode MODE at address X. Return 0 if X isn't valid for MODE. | |
756 | Assume that multiword moves may need to be split into word moves | |
757 | if MIGHT_SPLIT_P, otherwise assume that a single load or store is | |
758 | enough. */ | |
759 | ||
760 | int | |
b8506a8a | 761 | riscv_address_insns (rtx x, machine_mode mode, bool might_split_p) |
09cae750 PD |
762 | { |
763 | struct riscv_address_info addr; | |
764 | int n = 1; | |
765 | ||
766 | if (!riscv_classify_address (&addr, x, mode, false)) | |
767 | return 0; | |
768 | ||
769 | /* BLKmode is used for single unaligned loads and stores and should | |
770 | not count as a multiword mode. */ | |
771 | if (mode != BLKmode && might_split_p) | |
772 | n += (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
773 | ||
774 | if (addr.type == ADDRESS_LO_SUM) | |
775 | n += riscv_symbol_insns (addr.symbol_type) - 1; | |
776 | ||
777 | return n; | |
778 | } | |
779 | ||
780 | /* Return the number of instructions needed to load constant X. | |
781 | Return 0 if X isn't a valid constant. */ | |
782 | ||
783 | int | |
784 | riscv_const_insns (rtx x) | |
785 | { | |
786 | enum riscv_symbol_type symbol_type; | |
787 | rtx offset; | |
788 | ||
789 | switch (GET_CODE (x)) | |
790 | { | |
791 | case HIGH: | |
792 | if (!riscv_symbolic_constant_p (XEXP (x, 0), &symbol_type) | |
793 | || !riscv_split_symbol_type (symbol_type)) | |
794 | return 0; | |
795 | ||
796 | /* This is simply an LUI. */ | |
797 | return 1; | |
798 | ||
799 | case CONST_INT: | |
800 | { | |
801 | int cost = riscv_integer_cost (INTVAL (x)); | |
802 | /* Force complicated constants to memory. */ | |
803 | return cost < 4 ? cost : 0; | |
804 | } | |
805 | ||
806 | case CONST_DOUBLE: | |
807 | case CONST_VECTOR: | |
808 | /* We can use x0 to load floating-point zero. */ | |
809 | return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0; | |
810 | ||
811 | case CONST: | |
812 | /* See if we can refer to X directly. */ | |
813 | if (riscv_symbolic_constant_p (x, &symbol_type)) | |
814 | return riscv_symbol_insns (symbol_type); | |
815 | ||
816 | /* Otherwise try splitting the constant into a base and offset. */ | |
817 | split_const (x, &x, &offset); | |
818 | if (offset != 0) | |
819 | { | |
820 | int n = riscv_const_insns (x); | |
821 | if (n != 0) | |
822 | return n + riscv_integer_cost (INTVAL (offset)); | |
823 | } | |
824 | return 0; | |
825 | ||
826 | case SYMBOL_REF: | |
827 | case LABEL_REF: | |
828 | return riscv_symbol_insns (riscv_classify_symbol (x)); | |
829 | ||
830 | default: | |
831 | return 0; | |
832 | } | |
833 | } | |
834 | ||
835 | /* X is a doubleword constant that can be handled by splitting it into | |
836 | two words and loading each word separately. Return the number of | |
837 | instructions required to do this. */ | |
838 | ||
839 | int | |
840 | riscv_split_const_insns (rtx x) | |
841 | { | |
842 | unsigned int low, high; | |
843 | ||
844 | low = riscv_const_insns (riscv_subword (x, false)); | |
845 | high = riscv_const_insns (riscv_subword (x, true)); | |
846 | gcc_assert (low > 0 && high > 0); | |
847 | return low + high; | |
848 | } | |
849 | ||
850 | /* Return the number of instructions needed to implement INSN, | |
851 | given that it loads from or stores to MEM. */ | |
852 | ||
853 | int | |
854 | riscv_load_store_insns (rtx mem, rtx_insn *insn) | |
855 | { | |
b8506a8a | 856 | machine_mode mode; |
09cae750 PD |
857 | bool might_split_p; |
858 | rtx set; | |
859 | ||
860 | gcc_assert (MEM_P (mem)); | |
861 | mode = GET_MODE (mem); | |
862 | ||
863 | /* Try to prove that INSN does not need to be split. */ | |
864 | might_split_p = true; | |
865 | if (GET_MODE_BITSIZE (mode) <= 32) | |
866 | might_split_p = false; | |
867 | else if (GET_MODE_BITSIZE (mode) == 64) | |
868 | { | |
869 | set = single_set (insn); | |
870 | if (set && !riscv_split_64bit_move_p (SET_DEST (set), SET_SRC (set))) | |
871 | might_split_p = false; | |
872 | } | |
873 | ||
874 | return riscv_address_insns (XEXP (mem, 0), mode, might_split_p); | |
875 | } | |
876 | ||
877 | /* Emit a move from SRC to DEST. Assume that the move expanders can | |
878 | handle all moves if !can_create_pseudo_p (). The distinction is | |
879 | important because, unlike emit_move_insn, the move expanders know | |
880 | how to force Pmode objects into the constant pool even when the | |
881 | constant pool address is not itself legitimate. */ | |
882 | ||
883 | rtx | |
884 | riscv_emit_move (rtx dest, rtx src) | |
885 | { | |
886 | return (can_create_pseudo_p () | |
887 | ? emit_move_insn (dest, src) | |
888 | : emit_move_insn_1 (dest, src)); | |
889 | } | |
890 | ||
891 | /* Emit an instruction of the form (set TARGET SRC). */ | |
892 | ||
893 | static rtx | |
894 | riscv_emit_set (rtx target, rtx src) | |
895 | { | |
896 | emit_insn (gen_rtx_SET (target, src)); | |
897 | return target; | |
898 | } | |
899 | ||
900 | /* Emit an instruction of the form (set DEST (CODE X Y)). */ | |
901 | ||
902 | static rtx | |
903 | riscv_emit_binary (enum rtx_code code, rtx dest, rtx x, rtx y) | |
904 | { | |
905 | return riscv_emit_set (dest, gen_rtx_fmt_ee (code, GET_MODE (dest), x, y)); | |
906 | } | |
907 | ||
908 | /* Compute (CODE X Y) and store the result in a new register | |
909 | of mode MODE. Return that new register. */ | |
910 | ||
911 | static rtx | |
b8506a8a | 912 | riscv_force_binary (machine_mode mode, enum rtx_code code, rtx x, rtx y) |
09cae750 PD |
913 | { |
914 | return riscv_emit_binary (code, gen_reg_rtx (mode), x, y); | |
915 | } | |
916 | ||
917 | /* Copy VALUE to a register and return that register. If new pseudos | |
918 | are allowed, copy it into a new register, otherwise use DEST. */ | |
919 | ||
920 | static rtx | |
921 | riscv_force_temporary (rtx dest, rtx value) | |
922 | { | |
923 | if (can_create_pseudo_p ()) | |
924 | return force_reg (Pmode, value); | |
925 | else | |
926 | { | |
927 | riscv_emit_move (dest, value); | |
928 | return dest; | |
929 | } | |
930 | } | |
931 | ||
932 | /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE, | |
933 | then add CONST_INT OFFSET to the result. */ | |
934 | ||
935 | static rtx | |
936 | riscv_unspec_address_offset (rtx base, rtx offset, | |
937 | enum riscv_symbol_type symbol_type) | |
938 | { | |
939 | base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base), | |
940 | UNSPEC_ADDRESS_FIRST + symbol_type); | |
941 | if (offset != const0_rtx) | |
942 | base = gen_rtx_PLUS (Pmode, base, offset); | |
943 | return gen_rtx_CONST (Pmode, base); | |
944 | } | |
945 | ||
946 | /* Return an UNSPEC address with underlying address ADDRESS and symbol | |
947 | type SYMBOL_TYPE. */ | |
948 | ||
949 | rtx | |
950 | riscv_unspec_address (rtx address, enum riscv_symbol_type symbol_type) | |
951 | { | |
952 | rtx base, offset; | |
953 | ||
954 | split_const (address, &base, &offset); | |
955 | return riscv_unspec_address_offset (base, offset, symbol_type); | |
956 | } | |
957 | ||
958 | /* If OP is an UNSPEC address, return the address to which it refers, | |
959 | otherwise return OP itself. */ | |
960 | ||
961 | static rtx | |
962 | riscv_strip_unspec_address (rtx op) | |
963 | { | |
964 | rtx base, offset; | |
965 | ||
966 | split_const (op, &base, &offset); | |
967 | if (UNSPEC_ADDRESS_P (base)) | |
968 | op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset)); | |
969 | return op; | |
970 | } | |
971 | ||
972 | /* If riscv_unspec_address (ADDR, SYMBOL_TYPE) is a 32-bit value, add the | |
973 | high part to BASE and return the result. Just return BASE otherwise. | |
974 | TEMP is as for riscv_force_temporary. | |
975 | ||
976 | The returned expression can be used as the first operand to a LO_SUM. */ | |
977 | ||
978 | static rtx | |
979 | riscv_unspec_offset_high (rtx temp, rtx addr, enum riscv_symbol_type symbol_type) | |
980 | { | |
981 | addr = gen_rtx_HIGH (Pmode, riscv_unspec_address (addr, symbol_type)); | |
982 | return riscv_force_temporary (temp, addr); | |
983 | } | |
984 | ||
985 | /* Load an entry from the GOT for a TLS GD access. */ | |
986 | ||
987 | static rtx riscv_got_load_tls_gd (rtx dest, rtx sym) | |
988 | { | |
989 | if (Pmode == DImode) | |
990 | return gen_got_load_tls_gddi (dest, sym); | |
991 | else | |
992 | return gen_got_load_tls_gdsi (dest, sym); | |
993 | } | |
994 | ||
995 | /* Load an entry from the GOT for a TLS IE access. */ | |
996 | ||
997 | static rtx riscv_got_load_tls_ie (rtx dest, rtx sym) | |
998 | { | |
999 | if (Pmode == DImode) | |
1000 | return gen_got_load_tls_iedi (dest, sym); | |
1001 | else | |
1002 | return gen_got_load_tls_iesi (dest, sym); | |
1003 | } | |
1004 | ||
1005 | /* Add in the thread pointer for a TLS LE access. */ | |
1006 | ||
1007 | static rtx riscv_tls_add_tp_le (rtx dest, rtx base, rtx sym) | |
1008 | { | |
1009 | rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); | |
1010 | if (Pmode == DImode) | |
1011 | return gen_tls_add_tp_ledi (dest, base, tp, sym); | |
1012 | else | |
1013 | return gen_tls_add_tp_lesi (dest, base, tp, sym); | |
1014 | } | |
1015 | ||
1016 | /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise | |
1017 | it appears in a MEM of that mode. Return true if ADDR is a legitimate | |
1018 | constant in that context and can be split into high and low parts. | |
1019 | If so, and if LOW_OUT is nonnull, emit the high part and store the | |
1020 | low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise. | |
1021 | ||
1022 | TEMP is as for riscv_force_temporary and is used to load the high | |
1023 | part into a register. | |
1024 | ||
1025 | When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be | |
1026 | a legitimize SET_SRC for an .md pattern, otherwise the low part | |
1027 | is guaranteed to be a legitimate address for mode MODE. */ | |
1028 | ||
1029 | bool | |
b8506a8a | 1030 | riscv_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) |
09cae750 PD |
1031 | { |
1032 | enum riscv_symbol_type symbol_type; | |
1033 | ||
1034 | if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE) | |
1035 | || !riscv_symbolic_constant_p (addr, &symbol_type) | |
1036 | || riscv_symbol_insns (symbol_type) == 0 | |
1037 | || !riscv_split_symbol_type (symbol_type)) | |
1038 | return false; | |
1039 | ||
1040 | if (low_out) | |
1041 | switch (symbol_type) | |
1042 | { | |
1043 | case SYMBOL_ABSOLUTE: | |
1044 | { | |
1045 | rtx high = gen_rtx_HIGH (Pmode, copy_rtx (addr)); | |
1046 | high = riscv_force_temporary (temp, high); | |
1047 | *low_out = gen_rtx_LO_SUM (Pmode, high, addr); | |
1048 | } | |
1049 | break; | |
1050 | ||
1051 | case SYMBOL_PCREL: | |
1052 | { | |
1053 | static unsigned seqno; | |
1054 | char buf[32]; | |
1055 | rtx label; | |
1056 | ||
1057 | ssize_t bytes = snprintf (buf, sizeof (buf), ".LA%u", seqno); | |
1058 | gcc_assert ((size_t) bytes < sizeof (buf)); | |
1059 | ||
1060 | label = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); | |
1061 | SYMBOL_REF_FLAGS (label) |= SYMBOL_FLAG_LOCAL; | |
1062 | ||
1063 | if (temp == NULL) | |
1064 | temp = gen_reg_rtx (Pmode); | |
1065 | ||
1066 | if (Pmode == DImode) | |
1067 | emit_insn (gen_auipcdi (temp, copy_rtx (addr), GEN_INT (seqno))); | |
1068 | else | |
1069 | emit_insn (gen_auipcsi (temp, copy_rtx (addr), GEN_INT (seqno))); | |
1070 | ||
1071 | *low_out = gen_rtx_LO_SUM (Pmode, temp, label); | |
1072 | ||
1073 | seqno++; | |
1074 | } | |
1075 | break; | |
1076 | ||
1077 | default: | |
1078 | gcc_unreachable (); | |
1079 | } | |
1080 | ||
1081 | return true; | |
1082 | } | |
1083 | ||
1084 | /* Return a legitimate address for REG + OFFSET. TEMP is as for | |
1085 | riscv_force_temporary; it is only needed when OFFSET is not a | |
1086 | SMALL_OPERAND. */ | |
1087 | ||
1088 | static rtx | |
1089 | riscv_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) | |
1090 | { | |
1091 | if (!SMALL_OPERAND (offset)) | |
1092 | { | |
1093 | rtx high; | |
1094 | ||
1095 | /* Leave OFFSET as a 16-bit offset and put the excess in HIGH. | |
1096 | The addition inside the macro CONST_HIGH_PART may cause an | |
1097 | overflow, so we need to force a sign-extension check. */ | |
1098 | high = gen_int_mode (CONST_HIGH_PART (offset), Pmode); | |
1099 | offset = CONST_LOW_PART (offset); | |
1100 | high = riscv_force_temporary (temp, high); | |
1101 | reg = riscv_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg)); | |
1102 | } | |
1103 | return plus_constant (Pmode, reg, offset); | |
1104 | } | |
1105 | ||
1106 | /* The __tls_get_attr symbol. */ | |
1107 | static GTY(()) rtx riscv_tls_symbol; | |
1108 | ||
1109 | /* Return an instruction sequence that calls __tls_get_addr. SYM is | |
1110 | the TLS symbol we are referencing and TYPE is the symbol type to use | |
1111 | (either global dynamic or local dynamic). RESULT is an RTX for the | |
1112 | return value location. */ | |
1113 | ||
1114 | static rtx_insn * | |
1115 | riscv_call_tls_get_addr (rtx sym, rtx result) | |
1116 | { | |
1117 | rtx a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST), func; | |
1118 | rtx_insn *insn; | |
1119 | ||
1120 | if (!riscv_tls_symbol) | |
1121 | riscv_tls_symbol = init_one_libfunc ("__tls_get_addr"); | |
1122 | func = gen_rtx_MEM (FUNCTION_MODE, riscv_tls_symbol); | |
1123 | ||
1124 | start_sequence (); | |
1125 | ||
1126 | emit_insn (riscv_got_load_tls_gd (a0, sym)); | |
1127 | insn = emit_call_insn (gen_call_value (result, func, const0_rtx, NULL)); | |
1128 | RTL_CONST_CALL_P (insn) = 1; | |
1129 | use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0); | |
1130 | insn = get_insns (); | |
1131 | ||
1132 | end_sequence (); | |
1133 | ||
1134 | return insn; | |
1135 | } | |
1136 | ||
1137 | /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return | |
1138 | its address. The return value will be both a valid address and a valid | |
1139 | SET_SRC (either a REG or a LO_SUM). */ | |
1140 | ||
1141 | static rtx | |
1142 | riscv_legitimize_tls_address (rtx loc) | |
1143 | { | |
1144 | rtx dest, tp, tmp; | |
1145 | enum tls_model model = SYMBOL_REF_TLS_MODEL (loc); | |
1146 | ||
1147 | /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE. */ | |
1148 | if (!flag_pic) | |
1149 | model = TLS_MODEL_LOCAL_EXEC; | |
1150 | ||
1151 | switch (model) | |
1152 | { | |
1153 | case TLS_MODEL_LOCAL_DYNAMIC: | |
1154 | /* Rely on section anchors for the optimization that LDM TLS | |
1155 | provides. The anchor's address is loaded with GD TLS. */ | |
1156 | case TLS_MODEL_GLOBAL_DYNAMIC: | |
1157 | tmp = gen_rtx_REG (Pmode, GP_RETURN); | |
1158 | dest = gen_reg_rtx (Pmode); | |
1159 | emit_libcall_block (riscv_call_tls_get_addr (loc, tmp), dest, tmp, loc); | |
1160 | break; | |
1161 | ||
1162 | case TLS_MODEL_INITIAL_EXEC: | |
1163 | /* la.tls.ie; tp-relative add */ | |
1164 | tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); | |
1165 | tmp = gen_reg_rtx (Pmode); | |
1166 | emit_insn (riscv_got_load_tls_ie (tmp, loc)); | |
1167 | dest = gen_reg_rtx (Pmode); | |
1168 | emit_insn (gen_add3_insn (dest, tmp, tp)); | |
1169 | break; | |
1170 | ||
1171 | case TLS_MODEL_LOCAL_EXEC: | |
1172 | tmp = riscv_unspec_offset_high (NULL, loc, SYMBOL_TLS_LE); | |
1173 | dest = gen_reg_rtx (Pmode); | |
1174 | emit_insn (riscv_tls_add_tp_le (dest, tmp, loc)); | |
1175 | dest = gen_rtx_LO_SUM (Pmode, dest, | |
1176 | riscv_unspec_address (loc, SYMBOL_TLS_LE)); | |
1177 | break; | |
1178 | ||
1179 | default: | |
1180 | gcc_unreachable (); | |
1181 | } | |
1182 | return dest; | |
1183 | } | |
1184 | \f | |
1185 | /* If X is not a valid address for mode MODE, force it into a register. */ | |
1186 | ||
1187 | static rtx | |
b8506a8a | 1188 | riscv_force_address (rtx x, machine_mode mode) |
09cae750 PD |
1189 | { |
1190 | if (!riscv_legitimate_address_p (mode, x, false)) | |
1191 | x = force_reg (Pmode, x); | |
1192 | return x; | |
1193 | } | |
1194 | ||
1195 | /* This function is used to implement LEGITIMIZE_ADDRESS. If X can | |
1196 | be legitimized in a way that the generic machinery might not expect, | |
1197 | return a new address, otherwise return NULL. MODE is the mode of | |
1198 | the memory being accessed. */ | |
1199 | ||
1200 | static rtx | |
1201 | riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, | |
b8506a8a | 1202 | machine_mode mode) |
09cae750 PD |
1203 | { |
1204 | rtx addr; | |
1205 | ||
1206 | if (riscv_tls_symbol_p (x)) | |
1207 | return riscv_legitimize_tls_address (x); | |
1208 | ||
1209 | /* See if the address can split into a high part and a LO_SUM. */ | |
1210 | if (riscv_split_symbol (NULL, x, mode, &addr)) | |
1211 | return riscv_force_address (addr, mode); | |
1212 | ||
1213 | /* Handle BASE + OFFSET using riscv_add_offset. */ | |
1214 | if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)) | |
1215 | && INTVAL (XEXP (x, 1)) != 0) | |
1216 | { | |
1217 | rtx base = XEXP (x, 0); | |
1218 | HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); | |
1219 | ||
1220 | if (!riscv_valid_base_register_p (base, mode, false)) | |
1221 | base = copy_to_mode_reg (Pmode, base); | |
1222 | addr = riscv_add_offset (NULL, base, offset); | |
1223 | return riscv_force_address (addr, mode); | |
1224 | } | |
1225 | ||
1226 | return x; | |
1227 | } | |
1228 | ||
1229 | /* Load VALUE into DEST. TEMP is as for riscv_force_temporary. */ | |
1230 | ||
1231 | void | |
1232 | riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value) | |
1233 | { | |
1234 | struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS]; | |
b8506a8a | 1235 | machine_mode mode; |
09cae750 PD |
1236 | int i, num_ops; |
1237 | rtx x; | |
1238 | ||
1239 | mode = GET_MODE (dest); | |
1240 | num_ops = riscv_build_integer (codes, value, mode); | |
1241 | ||
1242 | if (can_create_pseudo_p () && num_ops > 2 /* not a simple constant */ | |
1243 | && num_ops >= riscv_split_integer_cost (value)) | |
1244 | x = riscv_split_integer (value, mode); | |
1245 | else | |
1246 | { | |
1247 | /* Apply each binary operation to X. */ | |
1248 | x = GEN_INT (codes[0].value); | |
1249 | ||
1250 | for (i = 1; i < num_ops; i++) | |
1251 | { | |
1252 | if (!can_create_pseudo_p ()) | |
1253 | x = riscv_emit_set (temp, x); | |
1254 | else | |
1255 | x = force_reg (mode, x); | |
1256 | ||
1257 | x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value)); | |
1258 | } | |
1259 | } | |
1260 | ||
1261 | riscv_emit_set (dest, x); | |
1262 | } | |
1263 | ||
1264 | /* Subroutine of riscv_legitimize_move. Move constant SRC into register | |
1265 | DEST given that SRC satisfies immediate_operand but doesn't satisfy | |
1266 | move_operand. */ | |
1267 | ||
1268 | static void | |
b8506a8a | 1269 | riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src) |
09cae750 PD |
1270 | { |
1271 | rtx base, offset; | |
1272 | ||
1273 | /* Split moves of big integers into smaller pieces. */ | |
1274 | if (splittable_const_int_operand (src, mode)) | |
1275 | { | |
1276 | riscv_move_integer (dest, dest, INTVAL (src)); | |
1277 | return; | |
1278 | } | |
1279 | ||
1280 | /* Split moves of symbolic constants into high/low pairs. */ | |
1281 | if (riscv_split_symbol (dest, src, MAX_MACHINE_MODE, &src)) | |
1282 | { | |
1283 | riscv_emit_set (dest, src); | |
1284 | return; | |
1285 | } | |
1286 | ||
1287 | /* Generate the appropriate access sequences for TLS symbols. */ | |
1288 | if (riscv_tls_symbol_p (src)) | |
1289 | { | |
1290 | riscv_emit_move (dest, riscv_legitimize_tls_address (src)); | |
1291 | return; | |
1292 | } | |
1293 | ||
1294 | /* If we have (const (plus symbol offset)), and that expression cannot | |
1295 | be forced into memory, load the symbol first and add in the offset. Also | |
1296 | prefer to do this even if the constant _can_ be forced into memory, as it | |
1297 | usually produces better code. */ | |
1298 | split_const (src, &base, &offset); | |
1299 | if (offset != const0_rtx | |
1300 | && (targetm.cannot_force_const_mem (mode, src) || can_create_pseudo_p ())) | |
1301 | { | |
1302 | base = riscv_force_temporary (dest, base); | |
1303 | riscv_emit_move (dest, riscv_add_offset (NULL, base, INTVAL (offset))); | |
1304 | return; | |
1305 | } | |
1306 | ||
1307 | src = force_const_mem (mode, src); | |
1308 | ||
1309 | /* When using explicit relocs, constant pool references are sometimes | |
1310 | not legitimate addresses. */ | |
1311 | riscv_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0)); | |
1312 | riscv_emit_move (dest, src); | |
1313 | } | |
1314 | ||
1315 | /* If (set DEST SRC) is not a valid move instruction, emit an equivalent | |
1316 | sequence that is valid. */ | |
1317 | ||
1318 | bool | |
b8506a8a | 1319 | riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) |
09cae750 PD |
1320 | { |
1321 | if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode)) | |
1322 | { | |
1323 | riscv_emit_move (dest, force_reg (mode, src)); | |
1324 | return true; | |
1325 | } | |
1326 | ||
1327 | /* We need to deal with constants that would be legitimate | |
1328 | immediate_operands but aren't legitimate move_operands. */ | |
1329 | if (CONSTANT_P (src) && !move_operand (src, mode)) | |
1330 | { | |
1331 | riscv_legitimize_const_move (mode, dest, src); | |
1332 | set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src)); | |
1333 | return true; | |
1334 | } | |
1335 | ||
13e4f305 KC |
1336 | /* RISC-V GCC may generate non-legitimate address due to we provide some |
1337 | pattern for optimize access PIC local symbol and it's make GCC generate | |
1338 | unrecognizable instruction during optmizing. */ | |
1339 | ||
1340 | if (MEM_P (dest) && !riscv_legitimate_address_p (mode, XEXP (dest, 0), | |
1341 | reload_completed)) | |
1342 | { | |
1343 | XEXP (dest, 0) = riscv_force_address (XEXP (dest, 0), mode); | |
1344 | } | |
1345 | ||
1346 | if (MEM_P (src) && !riscv_legitimate_address_p (mode, XEXP (src, 0), | |
1347 | reload_completed)) | |
1348 | { | |
1349 | XEXP (src, 0) = riscv_force_address (XEXP (src, 0), mode); | |
1350 | } | |
1351 | ||
09cae750 PD |
1352 | return false; |
1353 | } | |
1354 | ||
1355 | /* Return true if there is an instruction that implements CODE and accepts | |
1356 | X as an immediate operand. */ | |
1357 | ||
1358 | static int | |
1359 | riscv_immediate_operand_p (int code, HOST_WIDE_INT x) | |
1360 | { | |
1361 | switch (code) | |
1362 | { | |
1363 | case ASHIFT: | |
1364 | case ASHIFTRT: | |
1365 | case LSHIFTRT: | |
1366 | /* All shift counts are truncated to a valid constant. */ | |
1367 | return true; | |
1368 | ||
1369 | case AND: | |
1370 | case IOR: | |
1371 | case XOR: | |
1372 | case PLUS: | |
1373 | case LT: | |
1374 | case LTU: | |
1375 | /* These instructions take 12-bit signed immediates. */ | |
1376 | return SMALL_OPERAND (x); | |
1377 | ||
1378 | case LE: | |
1379 | /* We add 1 to the immediate and use SLT. */ | |
1380 | return SMALL_OPERAND (x + 1); | |
1381 | ||
1382 | case LEU: | |
1383 | /* Likewise SLTU, but reject the always-true case. */ | |
1384 | return SMALL_OPERAND (x + 1) && x + 1 != 0; | |
1385 | ||
1386 | case GE: | |
1387 | case GEU: | |
1388 | /* We can emulate an immediate of 1 by using GT/GTU against x0. */ | |
1389 | return x == 1; | |
1390 | ||
1391 | default: | |
1392 | /* By default assume that x0 can be used for 0. */ | |
1393 | return x == 0; | |
1394 | } | |
1395 | } | |
1396 | ||
1397 | /* Return the cost of binary operation X, given that the instruction | |
1398 | sequence for a word-sized or smaller operation takes SIGNLE_INSNS | |
1399 | instructions and that the sequence of a double-word operation takes | |
1400 | DOUBLE_INSNS instructions. */ | |
1401 | ||
1402 | static int | |
1403 | riscv_binary_cost (rtx x, int single_insns, int double_insns) | |
1404 | { | |
1405 | if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2) | |
1406 | return COSTS_N_INSNS (double_insns); | |
1407 | return COSTS_N_INSNS (single_insns); | |
1408 | } | |
1409 | ||
1410 | /* Return the cost of sign- or zero-extending OP. */ | |
1411 | ||
1412 | static int | |
1413 | riscv_extend_cost (rtx op, bool unsigned_p) | |
1414 | { | |
1415 | if (MEM_P (op)) | |
1416 | return 0; | |
1417 | ||
1418 | if (unsigned_p && GET_MODE (op) == QImode) | |
1419 | /* We can use ANDI. */ | |
1420 | return COSTS_N_INSNS (1); | |
1421 | ||
1422 | if (!unsigned_p && GET_MODE (op) == SImode) | |
1423 | /* We can use SEXT.W. */ | |
1424 | return COSTS_N_INSNS (1); | |
1425 | ||
1426 | /* We need to use a shift left and a shift right. */ | |
1427 | return COSTS_N_INSNS (2); | |
1428 | } | |
1429 | ||
1430 | /* Implement TARGET_RTX_COSTS. */ | |
1431 | ||
08539f3e JW |
1432 | #define SINGLE_SHIFT_COST 1 |
1433 | ||
09cae750 PD |
1434 | static bool |
1435 | riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED, | |
1436 | int *total, bool speed) | |
1437 | { | |
1438 | bool float_mode_p = FLOAT_MODE_P (mode); | |
1439 | int cost; | |
1440 | ||
1441 | switch (GET_CODE (x)) | |
1442 | { | |
1443 | case CONST_INT: | |
1444 | if (riscv_immediate_operand_p (outer_code, INTVAL (x))) | |
1445 | { | |
1446 | *total = 0; | |
1447 | return true; | |
1448 | } | |
1449 | /* Fall through. */ | |
1450 | ||
1451 | case SYMBOL_REF: | |
1452 | case LABEL_REF: | |
1453 | case CONST_DOUBLE: | |
1454 | case CONST: | |
1455 | if ((cost = riscv_const_insns (x)) > 0) | |
1456 | { | |
1457 | /* If the constant is likely to be stored in a GPR, SETs of | |
1458 | single-insn constants are as cheap as register sets; we | |
1459 | never want to CSE them. */ | |
1460 | if (cost == 1 && outer_code == SET) | |
1461 | *total = 0; | |
1462 | /* When we load a constant more than once, it usually is better | |
1463 | to duplicate the last operation in the sequence than to CSE | |
1464 | the constant itself. */ | |
1465 | else if (outer_code == SET || GET_MODE (x) == VOIDmode) | |
1466 | *total = COSTS_N_INSNS (1); | |
1467 | } | |
1468 | else /* The instruction will be fetched from the constant pool. */ | |
1469 | *total = COSTS_N_INSNS (riscv_symbol_insns (SYMBOL_ABSOLUTE)); | |
1470 | return true; | |
1471 | ||
1472 | case MEM: | |
1473 | /* If the address is legitimate, return the number of | |
1474 | instructions it needs. */ | |
1475 | if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0) | |
1476 | { | |
1477 | *total = COSTS_N_INSNS (cost + tune_info->memory_cost); | |
1478 | return true; | |
1479 | } | |
1480 | /* Otherwise use the default handling. */ | |
1481 | return false; | |
1482 | ||
1483 | case NOT: | |
1484 | *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1); | |
1485 | return false; | |
1486 | ||
1487 | case AND: | |
1488 | case IOR: | |
1489 | case XOR: | |
1490 | /* Double-word operations use two single-word operations. */ | |
1491 | *total = riscv_binary_cost (x, 1, 2); | |
1492 | return false; | |
1493 | ||
08539f3e JW |
1494 | case ZERO_EXTRACT: |
1495 | /* This is an SImode shift. */ | |
1496 | if (outer_code == SET && (INTVAL (XEXP (x, 2)) > 0) | |
1497 | && (INTVAL (XEXP (x, 1)) + INTVAL (XEXP (x, 2)) == 32)) | |
1498 | { | |
1499 | *total = COSTS_N_INSNS (SINGLE_SHIFT_COST); | |
1500 | return true; | |
1501 | } | |
1502 | return false; | |
1503 | ||
09cae750 PD |
1504 | case ASHIFT: |
1505 | case ASHIFTRT: | |
1506 | case LSHIFTRT: | |
08539f3e JW |
1507 | *total = riscv_binary_cost (x, SINGLE_SHIFT_COST, |
1508 | CONSTANT_P (XEXP (x, 1)) ? 4 : 9); | |
09cae750 PD |
1509 | return false; |
1510 | ||
1511 | case ABS: | |
1512 | *total = COSTS_N_INSNS (float_mode_p ? 1 : 3); | |
1513 | return false; | |
1514 | ||
1515 | case LO_SUM: | |
1516 | *total = set_src_cost (XEXP (x, 0), mode, speed); | |
1517 | return true; | |
1518 | ||
1519 | case LT: | |
08539f3e JW |
1520 | /* This is an SImode shift. */ |
1521 | if (outer_code == SET && GET_MODE (x) == DImode | |
1522 | && GET_MODE (XEXP (x, 0)) == SImode) | |
1523 | { | |
1524 | *total = COSTS_N_INSNS (SINGLE_SHIFT_COST); | |
1525 | return true; | |
1526 | } | |
1527 | /* Fall through. */ | |
09cae750 PD |
1528 | case LTU: |
1529 | case LE: | |
1530 | case LEU: | |
1531 | case GT: | |
1532 | case GTU: | |
1533 | case GE: | |
1534 | case GEU: | |
1535 | case EQ: | |
1536 | case NE: | |
1537 | /* Branch comparisons have VOIDmode, so use the first operand's | |
1538 | mode instead. */ | |
1539 | mode = GET_MODE (XEXP (x, 0)); | |
1540 | if (float_mode_p) | |
1541 | *total = tune_info->fp_add[mode == DFmode]; | |
1542 | else | |
1543 | *total = riscv_binary_cost (x, 1, 3); | |
1544 | return false; | |
1545 | ||
1546 | case UNORDERED: | |
1547 | case ORDERED: | |
1548 | /* (FEQ(A, A) & FEQ(B, B)) compared against 0. */ | |
1549 | mode = GET_MODE (XEXP (x, 0)); | |
1550 | *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (2); | |
1551 | return false; | |
1552 | ||
1553 | case UNEQ: | |
1554 | case LTGT: | |
1555 | /* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B). */ | |
1556 | mode = GET_MODE (XEXP (x, 0)); | |
1557 | *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (3); | |
1558 | return false; | |
1559 | ||
1560 | case UNGE: | |
1561 | case UNGT: | |
1562 | case UNLE: | |
1563 | case UNLT: | |
1564 | /* FLT or FLE, but guarded by an FFLAGS read and write. */ | |
1565 | mode = GET_MODE (XEXP (x, 0)); | |
1566 | *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (4); | |
1567 | return false; | |
1568 | ||
1569 | case MINUS: | |
1570 | case PLUS: | |
1571 | if (float_mode_p) | |
1572 | *total = tune_info->fp_add[mode == DFmode]; | |
1573 | else | |
1574 | *total = riscv_binary_cost (x, 1, 4); | |
1575 | return false; | |
1576 | ||
1577 | case NEG: | |
1578 | { | |
1579 | rtx op = XEXP (x, 0); | |
1580 | if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode)) | |
1581 | { | |
1582 | *total = (tune_info->fp_mul[mode == DFmode] | |
1583 | + set_src_cost (XEXP (op, 0), mode, speed) | |
1584 | + set_src_cost (XEXP (op, 1), mode, speed) | |
1585 | + set_src_cost (XEXP (op, 2), mode, speed)); | |
1586 | return true; | |
1587 | } | |
1588 | } | |
1589 | ||
1590 | if (float_mode_p) | |
1591 | *total = tune_info->fp_add[mode == DFmode]; | |
1592 | else | |
1593 | *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1); | |
1594 | return false; | |
1595 | ||
1596 | case MULT: | |
1597 | if (float_mode_p) | |
1598 | *total = tune_info->fp_mul[mode == DFmode]; | |
1599 | else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) | |
1600 | *total = 3 * tune_info->int_mul[0] + COSTS_N_INSNS (2); | |
1601 | else if (!speed) | |
1602 | *total = COSTS_N_INSNS (1); | |
1603 | else | |
1604 | *total = tune_info->int_mul[mode == DImode]; | |
1605 | return false; | |
1606 | ||
1607 | case DIV: | |
1608 | case SQRT: | |
1609 | case MOD: | |
1610 | if (float_mode_p) | |
1611 | { | |
1612 | *total = tune_info->fp_div[mode == DFmode]; | |
1613 | return false; | |
1614 | } | |
1615 | /* Fall through. */ | |
1616 | ||
1617 | case UDIV: | |
1618 | case UMOD: | |
1619 | if (speed) | |
1620 | *total = tune_info->int_div[mode == DImode]; | |
1621 | else | |
1622 | *total = COSTS_N_INSNS (1); | |
1623 | return false; | |
1624 | ||
09cae750 | 1625 | case ZERO_EXTEND: |
08539f3e JW |
1626 | /* This is an SImode shift. */ |
1627 | if (GET_CODE (XEXP (x, 0)) == LSHIFTRT) | |
1628 | { | |
1629 | *total = COSTS_N_INSNS (SINGLE_SHIFT_COST); | |
1630 | return true; | |
1631 | } | |
1632 | /* Fall through. */ | |
1633 | case SIGN_EXTEND: | |
09cae750 PD |
1634 | *total = riscv_extend_cost (XEXP (x, 0), GET_CODE (x) == ZERO_EXTEND); |
1635 | return false; | |
1636 | ||
1637 | case FLOAT: | |
1638 | case UNSIGNED_FLOAT: | |
1639 | case FIX: | |
1640 | case FLOAT_EXTEND: | |
1641 | case FLOAT_TRUNCATE: | |
1642 | *total = tune_info->fp_add[mode == DFmode]; | |
1643 | return false; | |
1644 | ||
1645 | case FMA: | |
1646 | *total = (tune_info->fp_mul[mode == DFmode] | |
1647 | + set_src_cost (XEXP (x, 0), mode, speed) | |
1648 | + set_src_cost (XEXP (x, 1), mode, speed) | |
1649 | + set_src_cost (XEXP (x, 2), mode, speed)); | |
1650 | return true; | |
1651 | ||
1652 | case UNSPEC: | |
1653 | if (XINT (x, 1) == UNSPEC_AUIPC) | |
1654 | { | |
1655 | /* Make AUIPC cheap to avoid spilling its result to the stack. */ | |
1656 | *total = 1; | |
1657 | return true; | |
1658 | } | |
1659 | return false; | |
1660 | ||
1661 | default: | |
1662 | return false; | |
1663 | } | |
1664 | } | |
1665 | ||
1666 | /* Implement TARGET_ADDRESS_COST. */ | |
1667 | ||
1668 | static int | |
b8506a8a | 1669 | riscv_address_cost (rtx addr, machine_mode mode, |
09cae750 PD |
1670 | addr_space_t as ATTRIBUTE_UNUSED, |
1671 | bool speed ATTRIBUTE_UNUSED) | |
1672 | { | |
1673 | return riscv_address_insns (addr, mode, false); | |
1674 | } | |
1675 | ||
1676 | /* Return one word of double-word value OP. HIGH_P is true to select the | |
1677 | high part or false to select the low part. */ | |
1678 | ||
1679 | rtx | |
1680 | riscv_subword (rtx op, bool high_p) | |
1681 | { | |
1682 | unsigned int byte = high_p ? UNITS_PER_WORD : 0; | |
b8506a8a | 1683 | machine_mode mode = GET_MODE (op); |
09cae750 PD |
1684 | |
1685 | if (mode == VOIDmode) | |
1686 | mode = TARGET_64BIT ? TImode : DImode; | |
1687 | ||
1688 | if (MEM_P (op)) | |
1689 | return adjust_address (op, word_mode, byte); | |
1690 | ||
1691 | if (REG_P (op)) | |
1692 | gcc_assert (!FP_REG_RTX_P (op)); | |
1693 | ||
1694 | return simplify_gen_subreg (word_mode, op, mode, byte); | |
1695 | } | |
1696 | ||
1697 | /* Return true if a 64-bit move from SRC to DEST should be split into two. */ | |
1698 | ||
1699 | bool | |
1700 | riscv_split_64bit_move_p (rtx dest, rtx src) | |
1701 | { | |
1702 | if (TARGET_64BIT) | |
1703 | return false; | |
1704 | ||
1705 | /* Allow FPR <-> FPR and FPR <-> MEM moves, and permit the special case | |
1706 | of zeroing an FPR with FCVT.D.W. */ | |
1707 | if (TARGET_DOUBLE_FLOAT | |
1708 | && ((FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) | |
1709 | || (FP_REG_RTX_P (dest) && MEM_P (src)) | |
1710 | || (FP_REG_RTX_P (src) && MEM_P (dest)) | |
1711 | || (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src))))) | |
1712 | return false; | |
1713 | ||
1714 | return true; | |
1715 | } | |
1716 | ||
1717 | /* Split a doubleword move from SRC to DEST. On 32-bit targets, | |
1718 | this function handles 64-bit moves for which riscv_split_64bit_move_p | |
1719 | holds. For 64-bit targets, this function handles 128-bit moves. */ | |
1720 | ||
1721 | void | |
1722 | riscv_split_doubleword_move (rtx dest, rtx src) | |
1723 | { | |
1724 | rtx low_dest; | |
1725 | ||
1726 | /* The operation can be split into two normal moves. Decide in | |
1727 | which order to do them. */ | |
1728 | low_dest = riscv_subword (dest, false); | |
1729 | if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src)) | |
1730 | { | |
1731 | riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true)); | |
1732 | riscv_emit_move (low_dest, riscv_subword (src, false)); | |
1733 | } | |
1734 | else | |
1735 | { | |
1736 | riscv_emit_move (low_dest, riscv_subword (src, false)); | |
1737 | riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true)); | |
1738 | } | |
1739 | } | |
1740 | \f | |
1741 | /* Return the appropriate instructions to move SRC into DEST. Assume | |
1742 | that SRC is operand 1 and DEST is operand 0. */ | |
1743 | ||
1744 | const char * | |
1745 | riscv_output_move (rtx dest, rtx src) | |
1746 | { | |
1747 | enum rtx_code dest_code, src_code; | |
b8506a8a | 1748 | machine_mode mode; |
09cae750 PD |
1749 | bool dbl_p; |
1750 | ||
1751 | dest_code = GET_CODE (dest); | |
1752 | src_code = GET_CODE (src); | |
1753 | mode = GET_MODE (dest); | |
1754 | dbl_p = (GET_MODE_SIZE (mode) == 8); | |
1755 | ||
1756 | if (dbl_p && riscv_split_64bit_move_p (dest, src)) | |
1757 | return "#"; | |
1758 | ||
1759 | if (dest_code == REG && GP_REG_P (REGNO (dest))) | |
1760 | { | |
1761 | if (src_code == REG && FP_REG_P (REGNO (src))) | |
1762 | return dbl_p ? "fmv.x.d\t%0,%1" : "fmv.x.s\t%0,%1"; | |
1763 | ||
1764 | if (src_code == MEM) | |
1765 | switch (GET_MODE_SIZE (mode)) | |
1766 | { | |
1767 | case 1: return "lbu\t%0,%1"; | |
1768 | case 2: return "lhu\t%0,%1"; | |
1769 | case 4: return "lw\t%0,%1"; | |
1770 | case 8: return "ld\t%0,%1"; | |
1771 | } | |
1772 | ||
1773 | if (src_code == CONST_INT) | |
1774 | return "li\t%0,%1"; | |
1775 | ||
1776 | if (src_code == HIGH) | |
1777 | return "lui\t%0,%h1"; | |
1778 | ||
1779 | if (symbolic_operand (src, VOIDmode)) | |
1780 | switch (riscv_classify_symbolic_expression (src)) | |
1781 | { | |
1782 | case SYMBOL_GOT_DISP: return "la\t%0,%1"; | |
1783 | case SYMBOL_ABSOLUTE: return "lla\t%0,%1"; | |
1784 | case SYMBOL_PCREL: return "lla\t%0,%1"; | |
1785 | default: gcc_unreachable (); | |
1786 | } | |
1787 | } | |
1788 | if ((src_code == REG && GP_REG_P (REGNO (src))) | |
1789 | || (src == CONST0_RTX (mode))) | |
1790 | { | |
1791 | if (dest_code == REG) | |
1792 | { | |
1793 | if (GP_REG_P (REGNO (dest))) | |
1794 | return "mv\t%0,%z1"; | |
1795 | ||
1796 | if (FP_REG_P (REGNO (dest))) | |
1797 | { | |
1798 | if (!dbl_p) | |
1799 | return "fmv.s.x\t%0,%z1"; | |
1800 | if (TARGET_64BIT) | |
1801 | return "fmv.d.x\t%0,%z1"; | |
1802 | /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */ | |
1803 | gcc_assert (src == CONST0_RTX (mode)); | |
1804 | return "fcvt.d.w\t%0,x0"; | |
1805 | } | |
1806 | } | |
1807 | if (dest_code == MEM) | |
1808 | switch (GET_MODE_SIZE (mode)) | |
1809 | { | |
1810 | case 1: return "sb\t%z1,%0"; | |
1811 | case 2: return "sh\t%z1,%0"; | |
1812 | case 4: return "sw\t%z1,%0"; | |
1813 | case 8: return "sd\t%z1,%0"; | |
1814 | } | |
1815 | } | |
1816 | if (src_code == REG && FP_REG_P (REGNO (src))) | |
1817 | { | |
1818 | if (dest_code == REG && FP_REG_P (REGNO (dest))) | |
1819 | return dbl_p ? "fmv.d\t%0,%1" : "fmv.s\t%0,%1"; | |
1820 | ||
1821 | if (dest_code == MEM) | |
1822 | return dbl_p ? "fsd\t%1,%0" : "fsw\t%1,%0"; | |
1823 | } | |
1824 | if (dest_code == REG && FP_REG_P (REGNO (dest))) | |
1825 | { | |
1826 | if (src_code == MEM) | |
1827 | return dbl_p ? "fld\t%0,%1" : "flw\t%0,%1"; | |
1828 | } | |
1829 | gcc_unreachable (); | |
1830 | } | |
1831 | \f | |
1832 | /* Return true if CMP1 is a suitable second operand for integer ordering | |
1833 | test CODE. See also the *sCC patterns in riscv.md. */ | |
1834 | ||
1835 | static bool | |
1836 | riscv_int_order_operand_ok_p (enum rtx_code code, rtx cmp1) | |
1837 | { | |
1838 | switch (code) | |
1839 | { | |
1840 | case GT: | |
1841 | case GTU: | |
1842 | return reg_or_0_operand (cmp1, VOIDmode); | |
1843 | ||
1844 | case GE: | |
1845 | case GEU: | |
1846 | return cmp1 == const1_rtx; | |
1847 | ||
1848 | case LT: | |
1849 | case LTU: | |
1850 | return arith_operand (cmp1, VOIDmode); | |
1851 | ||
1852 | case LE: | |
1853 | return sle_operand (cmp1, VOIDmode); | |
1854 | ||
1855 | case LEU: | |
1856 | return sleu_operand (cmp1, VOIDmode); | |
1857 | ||
1858 | default: | |
1859 | gcc_unreachable (); | |
1860 | } | |
1861 | } | |
1862 | ||
1863 | /* Return true if *CMP1 (of mode MODE) is a valid second operand for | |
1864 | integer ordering test *CODE, or if an equivalent combination can | |
1865 | be formed by adjusting *CODE and *CMP1. When returning true, update | |
1866 | *CODE and *CMP1 with the chosen code and operand, otherwise leave | |
1867 | them alone. */ | |
1868 | ||
1869 | static bool | |
1870 | riscv_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1, | |
b8506a8a | 1871 | machine_mode mode) |
09cae750 PD |
1872 | { |
1873 | HOST_WIDE_INT plus_one; | |
1874 | ||
1875 | if (riscv_int_order_operand_ok_p (*code, *cmp1)) | |
1876 | return true; | |
1877 | ||
1878 | if (CONST_INT_P (*cmp1)) | |
1879 | switch (*code) | |
1880 | { | |
1881 | case LE: | |
1882 | plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode); | |
1883 | if (INTVAL (*cmp1) < plus_one) | |
1884 | { | |
1885 | *code = LT; | |
1886 | *cmp1 = force_reg (mode, GEN_INT (plus_one)); | |
1887 | return true; | |
1888 | } | |
1889 | break; | |
1890 | ||
1891 | case LEU: | |
1892 | plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode); | |
1893 | if (plus_one != 0) | |
1894 | { | |
1895 | *code = LTU; | |
1896 | *cmp1 = force_reg (mode, GEN_INT (plus_one)); | |
1897 | return true; | |
1898 | } | |
1899 | break; | |
1900 | ||
1901 | default: | |
1902 | break; | |
1903 | } | |
1904 | return false; | |
1905 | } | |
1906 | ||
1907 | /* Compare CMP0 and CMP1 using ordering test CODE and store the result | |
1908 | in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR | |
1909 | is nonnull, it's OK to set TARGET to the inverse of the result and | |
1910 | flip *INVERT_PTR instead. */ | |
1911 | ||
1912 | static void | |
1913 | riscv_emit_int_order_test (enum rtx_code code, bool *invert_ptr, | |
1914 | rtx target, rtx cmp0, rtx cmp1) | |
1915 | { | |
b8506a8a | 1916 | machine_mode mode; |
09cae750 PD |
1917 | |
1918 | /* First see if there is a RISCV instruction that can do this operation. | |
1919 | If not, try doing the same for the inverse operation. If that also | |
1920 | fails, force CMP1 into a register and try again. */ | |
1921 | mode = GET_MODE (cmp0); | |
1922 | if (riscv_canonicalize_int_order_test (&code, &cmp1, mode)) | |
1923 | riscv_emit_binary (code, target, cmp0, cmp1); | |
1924 | else | |
1925 | { | |
1926 | enum rtx_code inv_code = reverse_condition (code); | |
1927 | if (!riscv_canonicalize_int_order_test (&inv_code, &cmp1, mode)) | |
1928 | { | |
1929 | cmp1 = force_reg (mode, cmp1); | |
1930 | riscv_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1); | |
1931 | } | |
1932 | else if (invert_ptr == 0) | |
1933 | { | |
1934 | rtx inv_target = riscv_force_binary (GET_MODE (target), | |
1935 | inv_code, cmp0, cmp1); | |
1936 | riscv_emit_binary (XOR, target, inv_target, const1_rtx); | |
1937 | } | |
1938 | else | |
1939 | { | |
1940 | *invert_ptr = !*invert_ptr; | |
1941 | riscv_emit_binary (inv_code, target, cmp0, cmp1); | |
1942 | } | |
1943 | } | |
1944 | } | |
1945 | ||
1946 | /* Return a register that is zero iff CMP0 and CMP1 are equal. | |
1947 | The register will have the same mode as CMP0. */ | |
1948 | ||
1949 | static rtx | |
1950 | riscv_zero_if_equal (rtx cmp0, rtx cmp1) | |
1951 | { | |
1952 | if (cmp1 == const0_rtx) | |
1953 | return cmp0; | |
1954 | ||
1955 | return expand_binop (GET_MODE (cmp0), sub_optab, | |
1956 | cmp0, cmp1, 0, 0, OPTAB_DIRECT); | |
1957 | } | |
1958 | ||
1959 | /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */ | |
1960 | ||
1961 | static void | |
1962 | riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1) | |
1963 | { | |
1964 | /* Comparisons consider all XLEN bits, so extend sub-XLEN values. */ | |
1965 | if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0))) | |
1966 | { | |
1967 | /* It is more profitable to zero-extend QImode values. */ | |
1968 | if (unsigned_condition (code) == code && GET_MODE (*op0) == QImode) | |
1969 | { | |
1970 | *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0); | |
1971 | if (CONST_INT_P (*op1)) | |
1972 | *op1 = GEN_INT ((uint8_t) INTVAL (*op1)); | |
1973 | else | |
1974 | *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1); | |
1975 | } | |
1976 | else | |
1977 | { | |
1978 | *op0 = gen_rtx_SIGN_EXTEND (word_mode, *op0); | |
1979 | if (*op1 != const0_rtx) | |
1980 | *op1 = gen_rtx_SIGN_EXTEND (word_mode, *op1); | |
1981 | } | |
1982 | } | |
1983 | } | |
1984 | ||
1985 | /* Convert a comparison into something that can be used in a branch. On | |
1986 | entry, *OP0 and *OP1 are the values being compared and *CODE is the code | |
1987 | used to compare them. Update them to describe the final comparison. */ | |
1988 | ||
1989 | static void | |
1990 | riscv_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1) | |
1991 | { | |
1992 | if (splittable_const_int_operand (*op1, VOIDmode)) | |
1993 | { | |
1994 | HOST_WIDE_INT rhs = INTVAL (*op1); | |
1995 | ||
1996 | if (*code == EQ || *code == NE) | |
1997 | { | |
1998 | /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */ | |
1999 | if (SMALL_OPERAND (-rhs)) | |
2000 | { | |
2001 | *op0 = riscv_force_binary (GET_MODE (*op0), PLUS, *op0, | |
2002 | GEN_INT (-rhs)); | |
2003 | *op1 = const0_rtx; | |
2004 | } | |
2005 | } | |
2006 | else | |
2007 | { | |
2008 | static const enum rtx_code mag_comparisons[][2] = { | |
2009 | {LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE} | |
2010 | }; | |
2011 | ||
2012 | /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */ | |
2013 | for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++) | |
2014 | { | |
2015 | HOST_WIDE_INT new_rhs; | |
2016 | bool increment = *code == mag_comparisons[i][0]; | |
2017 | bool decrement = *code == mag_comparisons[i][1]; | |
2018 | if (!increment && !decrement) | |
2019 | continue; | |
2020 | ||
2021 | new_rhs = rhs + (increment ? 1 : -1); | |
2022 | if (riscv_integer_cost (new_rhs) < riscv_integer_cost (rhs) | |
2023 | && (rhs < 0) == (new_rhs < 0)) | |
2024 | { | |
2025 | *op1 = GEN_INT (new_rhs); | |
2026 | *code = mag_comparisons[i][increment]; | |
2027 | } | |
2028 | break; | |
2029 | } | |
2030 | } | |
2031 | } | |
2032 | ||
2033 | riscv_extend_comparands (*code, op0, op1); | |
2034 | ||
2035 | *op0 = force_reg (word_mode, *op0); | |
2036 | if (*op1 != const0_rtx) | |
2037 | *op1 = force_reg (word_mode, *op1); | |
2038 | } | |
2039 | ||
2040 | /* Like riscv_emit_int_compare, but for floating-point comparisons. */ | |
2041 | ||
2042 | static void | |
2043 | riscv_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1) | |
2044 | { | |
2045 | rtx tmp0, tmp1, cmp_op0 = *op0, cmp_op1 = *op1; | |
2046 | enum rtx_code fp_code = *code; | |
2047 | *code = NE; | |
2048 | ||
2049 | switch (fp_code) | |
2050 | { | |
2051 | case UNORDERED: | |
2052 | *code = EQ; | |
2053 | /* Fall through. */ | |
2054 | ||
2055 | case ORDERED: | |
2056 | /* a == a && b == b */ | |
2057 | tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0); | |
2058 | tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1); | |
2059 | *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1); | |
2060 | *op1 = const0_rtx; | |
2061 | break; | |
2062 | ||
2063 | case UNEQ: | |
2064 | case LTGT: | |
2065 | /* ordered(a, b) > (a == b) */ | |
2066 | *code = fp_code == LTGT ? GTU : EQ; | |
2067 | tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0); | |
2068 | tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1); | |
2069 | *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1); | |
2070 | *op1 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op1); | |
2071 | break; | |
2072 | ||
2073 | #define UNORDERED_COMPARISON(CODE, CMP) \ | |
2074 | case CODE: \ | |
2075 | *code = EQ; \ | |
2076 | *op0 = gen_reg_rtx (word_mode); \ | |
2077 | if (GET_MODE (cmp_op0) == SFmode && TARGET_64BIT) \ | |
2078 | emit_insn (gen_f##CMP##_quietsfdi4 (*op0, cmp_op0, cmp_op1)); \ | |
2079 | else if (GET_MODE (cmp_op0) == SFmode) \ | |
2080 | emit_insn (gen_f##CMP##_quietsfsi4 (*op0, cmp_op0, cmp_op1)); \ | |
2081 | else if (GET_MODE (cmp_op0) == DFmode && TARGET_64BIT) \ | |
2082 | emit_insn (gen_f##CMP##_quietdfdi4 (*op0, cmp_op0, cmp_op1)); \ | |
2083 | else if (GET_MODE (cmp_op0) == DFmode) \ | |
2084 | emit_insn (gen_f##CMP##_quietdfsi4 (*op0, cmp_op0, cmp_op1)); \ | |
2085 | else \ | |
2086 | gcc_unreachable (); \ | |
2087 | *op1 = const0_rtx; \ | |
2088 | break; | |
2089 | ||
2090 | case UNLT: | |
2091 | std::swap (cmp_op0, cmp_op1); | |
cccfcff4 | 2092 | gcc_fallthrough (); |
09cae750 PD |
2093 | |
2094 | UNORDERED_COMPARISON(UNGT, le) | |
2095 | ||
2096 | case UNLE: | |
2097 | std::swap (cmp_op0, cmp_op1); | |
cccfcff4 | 2098 | gcc_fallthrough (); |
09cae750 PD |
2099 | |
2100 | UNORDERED_COMPARISON(UNGE, lt) | |
2101 | #undef UNORDERED_COMPARISON | |
2102 | ||
2103 | case NE: | |
2104 | fp_code = EQ; | |
2105 | *code = EQ; | |
2106 | /* Fall through. */ | |
2107 | ||
2108 | case EQ: | |
2109 | case LE: | |
2110 | case LT: | |
2111 | case GE: | |
2112 | case GT: | |
2113 | /* We have instructions for these cases. */ | |
2114 | *op0 = riscv_force_binary (word_mode, fp_code, cmp_op0, cmp_op1); | |
2115 | *op1 = const0_rtx; | |
2116 | break; | |
2117 | ||
2118 | default: | |
2119 | gcc_unreachable (); | |
2120 | } | |
2121 | } | |
2122 | ||
2123 | /* CODE-compare OP0 and OP1. Store the result in TARGET. */ | |
2124 | ||
2125 | void | |
2126 | riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1) | |
2127 | { | |
2128 | riscv_extend_comparands (code, &op0, &op1); | |
2129 | op0 = force_reg (word_mode, op0); | |
2130 | ||
2131 | if (code == EQ || code == NE) | |
2132 | { | |
2133 | rtx zie = riscv_zero_if_equal (op0, op1); | |
2134 | riscv_emit_binary (code, target, zie, const0_rtx); | |
2135 | } | |
2136 | else | |
2137 | riscv_emit_int_order_test (code, 0, target, op0, op1); | |
2138 | } | |
2139 | ||
2140 | /* Like riscv_expand_int_scc, but for floating-point comparisons. */ | |
2141 | ||
2142 | void | |
2143 | riscv_expand_float_scc (rtx target, enum rtx_code code, rtx op0, rtx op1) | |
2144 | { | |
2145 | riscv_emit_float_compare (&code, &op0, &op1); | |
2146 | ||
2147 | rtx cmp = riscv_force_binary (word_mode, code, op0, op1); | |
2148 | riscv_emit_set (target, lowpart_subreg (SImode, cmp, word_mode)); | |
2149 | } | |
2150 | ||
2151 | /* Jump to LABEL if (CODE OP0 OP1) holds. */ | |
2152 | ||
2153 | void | |
2154 | riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1) | |
2155 | { | |
2156 | if (FLOAT_MODE_P (GET_MODE (op1))) | |
2157 | riscv_emit_float_compare (&code, &op0, &op1); | |
2158 | else | |
2159 | riscv_emit_int_compare (&code, &op0, &op1); | |
2160 | ||
2161 | rtx condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); | |
2162 | emit_jump_insn (gen_condjump (condition, label)); | |
2163 | } | |
2164 | ||
2165 | /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at | |
2166 | least PARM_BOUNDARY bits of alignment, but will be given anything up | |
2167 | to STACK_BOUNDARY bits if the type requires it. */ | |
2168 | ||
2169 | static unsigned int | |
b8506a8a | 2170 | riscv_function_arg_boundary (machine_mode mode, const_tree type) |
09cae750 PD |
2171 | { |
2172 | unsigned int alignment; | |
2173 | ||
2174 | /* Use natural alignment if the type is not aggregate data. */ | |
2175 | if (type && !AGGREGATE_TYPE_P (type)) | |
2176 | alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type)); | |
2177 | else | |
2178 | alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode); | |
2179 | ||
2180 | return MIN (STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment)); | |
2181 | } | |
2182 | ||
2183 | /* If MODE represents an argument that can be passed or returned in | |
2184 | floating-point registers, return the number of registers, else 0. */ | |
2185 | ||
2186 | static unsigned | |
b8506a8a | 2187 | riscv_pass_mode_in_fpr_p (machine_mode mode) |
09cae750 PD |
2188 | { |
2189 | if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG) | |
2190 | { | |
2191 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) | |
2192 | return 1; | |
2193 | ||
2194 | if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) | |
2195 | return 2; | |
2196 | } | |
2197 | ||
2198 | return 0; | |
2199 | } | |
2200 | ||
2201 | typedef struct { | |
2202 | const_tree type; | |
2203 | HOST_WIDE_INT offset; | |
2204 | } riscv_aggregate_field; | |
2205 | ||
2206 | /* Identify subfields of aggregates that are candidates for passing in | |
2207 | floating-point registers. */ | |
2208 | ||
2209 | static int | |
2210 | riscv_flatten_aggregate_field (const_tree type, | |
2211 | riscv_aggregate_field fields[2], | |
2212 | int n, HOST_WIDE_INT offset) | |
2213 | { | |
2214 | switch (TREE_CODE (type)) | |
2215 | { | |
2216 | case RECORD_TYPE: | |
2217 | /* Can't handle incomplete types nor sizes that are not fixed. */ | |
2218 | if (!COMPLETE_TYPE_P (type) | |
2219 | || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST | |
2220 | || !tree_fits_uhwi_p (TYPE_SIZE (type))) | |
2221 | return -1; | |
2222 | ||
2223 | for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) | |
2224 | if (TREE_CODE (f) == FIELD_DECL) | |
2225 | { | |
2226 | if (!TYPE_P (TREE_TYPE (f))) | |
2227 | return -1; | |
2228 | ||
2229 | HOST_WIDE_INT pos = offset + int_byte_position (f); | |
2230 | n = riscv_flatten_aggregate_field (TREE_TYPE (f), fields, n, pos); | |
2231 | if (n < 0) | |
2232 | return -1; | |
2233 | } | |
2234 | return n; | |
2235 | ||
2236 | case ARRAY_TYPE: | |
2237 | { | |
2238 | HOST_WIDE_INT n_elts; | |
2239 | riscv_aggregate_field subfields[2]; | |
2240 | tree index = TYPE_DOMAIN (type); | |
2241 | tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type)); | |
2242 | int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type), | |
2243 | subfields, 0, offset); | |
2244 | ||
2245 | /* Can't handle incomplete types nor sizes that are not fixed. */ | |
2246 | if (n_subfields <= 0 | |
2247 | || !COMPLETE_TYPE_P (type) | |
2248 | || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST | |
2249 | || !index | |
2250 | || !TYPE_MAX_VALUE (index) | |
2251 | || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) | |
2252 | || !TYPE_MIN_VALUE (index) | |
2253 | || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) | |
2254 | || !tree_fits_uhwi_p (elt_size)) | |
2255 | return -1; | |
2256 | ||
2257 | n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) | |
2258 | - tree_to_uhwi (TYPE_MIN_VALUE (index)); | |
2259 | gcc_assert (n_elts >= 0); | |
2260 | ||
2261 | for (HOST_WIDE_INT i = 0; i < n_elts; i++) | |
2262 | for (int j = 0; j < n_subfields; j++) | |
2263 | { | |
2264 | if (n >= 2) | |
2265 | return -1; | |
2266 | ||
2267 | fields[n] = subfields[j]; | |
2268 | fields[n++].offset += i * tree_to_uhwi (elt_size); | |
2269 | } | |
2270 | ||
2271 | return n; | |
2272 | } | |
2273 | ||
2274 | case COMPLEX_TYPE: | |
2275 | { | |
2276 | /* Complex type need consume 2 field, so n must be 0. */ | |
2277 | if (n != 0) | |
2278 | return -1; | |
2279 | ||
2280 | HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))); | |
2281 | ||
2282 | if (elt_size <= UNITS_PER_FP_ARG) | |
2283 | { | |
2284 | fields[0].type = TREE_TYPE (type); | |
2285 | fields[0].offset = offset; | |
2286 | fields[1].type = TREE_TYPE (type); | |
2287 | fields[1].offset = offset + elt_size; | |
2288 | ||
2289 | return 2; | |
2290 | } | |
2291 | ||
2292 | return -1; | |
2293 | } | |
2294 | ||
2295 | default: | |
2296 | if (n < 2 | |
2297 | && ((SCALAR_FLOAT_TYPE_P (type) | |
2298 | && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG) | |
2299 | || (INTEGRAL_TYPE_P (type) | |
2300 | && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD))) | |
2301 | { | |
2302 | fields[n].type = type; | |
2303 | fields[n].offset = offset; | |
2304 | return n + 1; | |
2305 | } | |
2306 | else | |
2307 | return -1; | |
2308 | } | |
2309 | } | |
2310 | ||
2311 | /* Identify candidate aggregates for passing in floating-point registers. | |
2312 | Candidates have at most two fields after flattening. */ | |
2313 | ||
2314 | static int | |
2315 | riscv_flatten_aggregate_argument (const_tree type, | |
2316 | riscv_aggregate_field fields[2]) | |
2317 | { | |
2318 | if (!type || TREE_CODE (type) != RECORD_TYPE) | |
2319 | return -1; | |
2320 | ||
2321 | return riscv_flatten_aggregate_field (type, fields, 0, 0); | |
2322 | } | |
2323 | ||
2324 | /* See whether TYPE is a record whose fields should be returned in one or | |
2325 | two floating-point registers. If so, populate FIELDS accordingly. */ | |
2326 | ||
2327 | static unsigned | |
2328 | riscv_pass_aggregate_in_fpr_pair_p (const_tree type, | |
2329 | riscv_aggregate_field fields[2]) | |
2330 | { | |
2331 | int n = riscv_flatten_aggregate_argument (type, fields); | |
2332 | ||
2333 | for (int i = 0; i < n; i++) | |
2334 | if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) | |
2335 | return 0; | |
2336 | ||
2337 | return n > 0 ? n : 0; | |
2338 | } | |
2339 | ||
2340 | /* See whether TYPE is a record whose fields should be returned in one or | |
2341 | floating-point register and one integer register. If so, populate | |
2342 | FIELDS accordingly. */ | |
2343 | ||
2344 | static bool | |
2345 | riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type, | |
2346 | riscv_aggregate_field fields[2]) | |
2347 | { | |
2348 | unsigned num_int = 0, num_float = 0; | |
2349 | int n = riscv_flatten_aggregate_argument (type, fields); | |
2350 | ||
2351 | for (int i = 0; i < n; i++) | |
2352 | { | |
2353 | num_float += SCALAR_FLOAT_TYPE_P (fields[i].type); | |
2354 | num_int += INTEGRAL_TYPE_P (fields[i].type); | |
2355 | } | |
2356 | ||
2357 | return num_int == 1 && num_float == 1; | |
2358 | } | |
2359 | ||
2360 | /* Return the representation of an argument passed or returned in an FPR | |
2361 | when the value has mode VALUE_MODE and the type has TYPE_MODE. The | |
2362 | two modes may be different for structures like: | |
2363 | ||
2364 | struct __attribute__((packed)) foo { float f; } | |
2365 | ||
2366 | where the SFmode value "f" is passed in REGNO but the struct itself | |
2367 | has mode BLKmode. */ | |
2368 | ||
2369 | static rtx | |
b8506a8a RS |
2370 | riscv_pass_fpr_single (machine_mode type_mode, unsigned regno, |
2371 | machine_mode value_mode) | |
09cae750 PD |
2372 | { |
2373 | rtx x = gen_rtx_REG (value_mode, regno); | |
2374 | ||
2375 | if (type_mode != value_mode) | |
2376 | { | |
2377 | x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx); | |
2378 | x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x)); | |
2379 | } | |
2380 | return x; | |
2381 | } | |
2382 | ||
2383 | /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1. | |
2384 | MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and | |
2385 | byte offset for the first value, likewise MODE2 and OFFSET2 for the | |
2386 | second value. */ | |
2387 | ||
2388 | static rtx | |
b8506a8a RS |
2389 | riscv_pass_fpr_pair (machine_mode mode, unsigned regno1, |
2390 | machine_mode mode1, HOST_WIDE_INT offset1, | |
2391 | unsigned regno2, machine_mode mode2, | |
09cae750 PD |
2392 | HOST_WIDE_INT offset2) |
2393 | { | |
2394 | return gen_rtx_PARALLEL | |
2395 | (mode, | |
2396 | gen_rtvec (2, | |
2397 | gen_rtx_EXPR_LIST (VOIDmode, | |
2398 | gen_rtx_REG (mode1, regno1), | |
2399 | GEN_INT (offset1)), | |
2400 | gen_rtx_EXPR_LIST (VOIDmode, | |
2401 | gen_rtx_REG (mode2, regno2), | |
2402 | GEN_INT (offset2)))); | |
2403 | } | |
2404 | ||
2405 | /* Fill INFO with information about a single argument, and return an | |
2406 | RTL pattern to pass or return the argument. CUM is the cumulative | |
2407 | state for earlier arguments. MODE is the mode of this argument and | |
2408 | TYPE is its type (if known). NAMED is true if this is a named | |
2409 | (fixed) argument rather than a variable one. RETURN_P is true if | |
2410 | returning the argument, or false if passing the argument. */ | |
2411 | ||
2412 | static rtx | |
2413 | riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum, | |
b8506a8a | 2414 | machine_mode mode, const_tree type, bool named, |
09cae750 PD |
2415 | bool return_p) |
2416 | { | |
2417 | unsigned num_bytes, num_words; | |
2418 | unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST; | |
2419 | unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST; | |
2420 | unsigned alignment = riscv_function_arg_boundary (mode, type); | |
2421 | ||
2422 | memset (info, 0, sizeof (*info)); | |
2423 | info->gpr_offset = cum->num_gprs; | |
2424 | info->fpr_offset = cum->num_fprs; | |
2425 | ||
2426 | if (named) | |
2427 | { | |
2428 | riscv_aggregate_field fields[2]; | |
2429 | unsigned fregno = fpr_base + info->fpr_offset; | |
2430 | unsigned gregno = gpr_base + info->gpr_offset; | |
2431 | ||
2432 | /* Pass one- or two-element floating-point aggregates in FPRs. */ | |
2433 | if ((info->num_fprs = riscv_pass_aggregate_in_fpr_pair_p (type, fields)) | |
2434 | && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) | |
2435 | switch (info->num_fprs) | |
2436 | { | |
2437 | case 1: | |
2438 | return riscv_pass_fpr_single (mode, fregno, | |
2439 | TYPE_MODE (fields[0].type)); | |
2440 | ||
2441 | case 2: | |
2442 | return riscv_pass_fpr_pair (mode, fregno, | |
2443 | TYPE_MODE (fields[0].type), | |
2444 | fields[0].offset, | |
2445 | fregno + 1, | |
2446 | TYPE_MODE (fields[1].type), | |
2447 | fields[1].offset); | |
2448 | ||
2449 | default: | |
2450 | gcc_unreachable (); | |
2451 | } | |
2452 | ||
2453 | /* Pass real and complex floating-point numbers in FPRs. */ | |
2454 | if ((info->num_fprs = riscv_pass_mode_in_fpr_p (mode)) | |
2455 | && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) | |
2456 | switch (GET_MODE_CLASS (mode)) | |
2457 | { | |
2458 | case MODE_FLOAT: | |
2459 | return gen_rtx_REG (mode, fregno); | |
2460 | ||
2461 | case MODE_COMPLEX_FLOAT: | |
2462 | return riscv_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0, | |
2463 | fregno + 1, GET_MODE_INNER (mode), | |
2464 | GET_MODE_UNIT_SIZE (mode)); | |
2465 | ||
2466 | default: | |
2467 | gcc_unreachable (); | |
2468 | } | |
2469 | ||
2470 | /* Pass structs with one float and one integer in an FPR and a GPR. */ | |
2471 | if (riscv_pass_aggregate_in_fpr_and_gpr_p (type, fields) | |
2472 | && info->gpr_offset < MAX_ARGS_IN_REGISTERS | |
2473 | && info->fpr_offset < MAX_ARGS_IN_REGISTERS) | |
2474 | { | |
2475 | info->num_gprs = 1; | |
2476 | info->num_fprs = 1; | |
2477 | ||
2478 | if (!SCALAR_FLOAT_TYPE_P (fields[0].type)) | |
2479 | std::swap (fregno, gregno); | |
2480 | ||
2481 | return riscv_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type), | |
2482 | fields[0].offset, | |
2483 | gregno, TYPE_MODE (fields[1].type), | |
2484 | fields[1].offset); | |
2485 | } | |
2486 | } | |
2487 | ||
2488 | /* Work out the size of the argument. */ | |
2489 | num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); | |
2490 | num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
2491 | ||
2492 | /* Doubleword-aligned varargs start on an even register boundary. */ | |
2493 | if (!named && num_bytes != 0 && alignment > BITS_PER_WORD) | |
2494 | info->gpr_offset += info->gpr_offset & 1; | |
2495 | ||
2496 | /* Partition the argument between registers and stack. */ | |
2497 | info->num_fprs = 0; | |
2498 | info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset); | |
2499 | info->stack_p = (num_words - info->num_gprs) != 0; | |
2500 | ||
2501 | if (info->num_gprs || return_p) | |
2502 | return gen_rtx_REG (mode, gpr_base + info->gpr_offset); | |
2503 | ||
2504 | return NULL_RTX; | |
2505 | } | |
2506 | ||
2507 | /* Implement TARGET_FUNCTION_ARG. */ | |
2508 | ||
2509 | static rtx | |
b8506a8a | 2510 | riscv_function_arg (cumulative_args_t cum_v, machine_mode mode, |
09cae750 PD |
2511 | const_tree type, bool named) |
2512 | { | |
2513 | CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); | |
2514 | struct riscv_arg_info info; | |
2515 | ||
2516 | if (mode == VOIDmode) | |
2517 | return NULL; | |
2518 | ||
2519 | return riscv_get_arg_info (&info, cum, mode, type, named, false); | |
2520 | } | |
2521 | ||
2522 | /* Implement TARGET_FUNCTION_ARG_ADVANCE. */ | |
2523 | ||
2524 | static void | |
b8506a8a | 2525 | riscv_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, |
09cae750 PD |
2526 | const_tree type, bool named) |
2527 | { | |
2528 | CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); | |
2529 | struct riscv_arg_info info; | |
2530 | ||
2531 | riscv_get_arg_info (&info, cum, mode, type, named, false); | |
2532 | ||
2533 | /* Advance the register count. This has the effect of setting | |
2534 | num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned | |
2535 | argument required us to skip the final GPR and pass the whole | |
2536 | argument on the stack. */ | |
2537 | cum->num_fprs = info.fpr_offset + info.num_fprs; | |
2538 | cum->num_gprs = info.gpr_offset + info.num_gprs; | |
2539 | } | |
2540 | ||
2541 | /* Implement TARGET_ARG_PARTIAL_BYTES. */ | |
2542 | ||
2543 | static int | |
2544 | riscv_arg_partial_bytes (cumulative_args_t cum, | |
b8506a8a | 2545 | machine_mode mode, tree type, bool named) |
09cae750 PD |
2546 | { |
2547 | struct riscv_arg_info arg; | |
2548 | ||
2549 | riscv_get_arg_info (&arg, get_cumulative_args (cum), mode, type, named, false); | |
2550 | return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0; | |
2551 | } | |
2552 | ||
2553 | /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls, | |
2554 | VALTYPE is the return type and MODE is VOIDmode. For libcalls, | |
2555 | VALTYPE is null and MODE is the mode of the return value. */ | |
2556 | ||
2557 | rtx | |
b8506a8a | 2558 | riscv_function_value (const_tree type, const_tree func, machine_mode mode) |
09cae750 PD |
2559 | { |
2560 | struct riscv_arg_info info; | |
2561 | CUMULATIVE_ARGS args; | |
2562 | ||
2563 | if (type) | |
2564 | { | |
2565 | int unsigned_p = TYPE_UNSIGNED (type); | |
2566 | ||
2567 | mode = TYPE_MODE (type); | |
2568 | ||
2569 | /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes, | |
2570 | return values, promote the mode here too. */ | |
2571 | mode = promote_function_mode (type, mode, &unsigned_p, func, 1); | |
2572 | } | |
2573 | ||
2574 | memset (&args, 0, sizeof args); | |
2575 | return riscv_get_arg_info (&info, &args, mode, type, true, true); | |
2576 | } | |
2577 | ||
2578 | /* Implement TARGET_PASS_BY_REFERENCE. */ | |
2579 | ||
2580 | static bool | |
b8506a8a | 2581 | riscv_pass_by_reference (cumulative_args_t cum_v, machine_mode mode, |
09cae750 PD |
2582 | const_tree type, bool named) |
2583 | { | |
2584 | HOST_WIDE_INT size = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); | |
2585 | struct riscv_arg_info info; | |
2586 | CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); | |
2587 | ||
2588 | /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we | |
2589 | never pass variadic arguments in floating-point registers, so we can | |
2590 | avoid the call to riscv_get_arg_info in this case. */ | |
2591 | if (cum != NULL) | |
2592 | { | |
2593 | /* Don't pass by reference if we can use a floating-point register. */ | |
2594 | riscv_get_arg_info (&info, cum, mode, type, named, false); | |
2595 | if (info.num_fprs) | |
2596 | return false; | |
2597 | } | |
2598 | ||
2599 | /* Pass by reference if the data do not fit in two integer registers. */ | |
2600 | return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD); | |
2601 | } | |
2602 | ||
2603 | /* Implement TARGET_RETURN_IN_MEMORY. */ | |
2604 | ||
2605 | static bool | |
2606 | riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) | |
2607 | { | |
2608 | CUMULATIVE_ARGS args; | |
2609 | cumulative_args_t cum = pack_cumulative_args (&args); | |
2610 | ||
2611 | /* The rules for returning in memory are the same as for passing the | |
2612 | first named argument by reference. */ | |
2613 | memset (&args, 0, sizeof args); | |
2614 | return riscv_pass_by_reference (cum, TYPE_MODE (type), type, true); | |
2615 | } | |
2616 | ||
2617 | /* Implement TARGET_SETUP_INCOMING_VARARGS. */ | |
2618 | ||
2619 | static void | |
b8506a8a | 2620 | riscv_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode, |
09cae750 PD |
2621 | tree type, int *pretend_size ATTRIBUTE_UNUSED, |
2622 | int no_rtl) | |
2623 | { | |
2624 | CUMULATIVE_ARGS local_cum; | |
2625 | int gp_saved; | |
2626 | ||
2627 | /* The caller has advanced CUM up to, but not beyond, the last named | |
2628 | argument. Advance a local copy of CUM past the last "real" named | |
2629 | argument, to find out how many registers are left over. */ | |
2630 | local_cum = *get_cumulative_args (cum); | |
2631 | riscv_function_arg_advance (pack_cumulative_args (&local_cum), mode, type, 1); | |
2632 | ||
2633 | /* Found out how many registers we need to save. */ | |
2634 | gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; | |
2635 | ||
2636 | if (!no_rtl && gp_saved > 0) | |
2637 | { | |
2638 | rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx, | |
2639 | REG_PARM_STACK_SPACE (cfun->decl) | |
2640 | - gp_saved * UNITS_PER_WORD); | |
2641 | rtx mem = gen_frame_mem (BLKmode, ptr); | |
2642 | set_mem_alias_set (mem, get_varargs_alias_set ()); | |
2643 | ||
2644 | move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, | |
2645 | mem, gp_saved); | |
2646 | } | |
2647 | if (REG_PARM_STACK_SPACE (cfun->decl) == 0) | |
2648 | cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD; | |
2649 | } | |
2650 | ||
2651 | /* Implement TARGET_EXPAND_BUILTIN_VA_START. */ | |
2652 | ||
2653 | static void | |
2654 | riscv_va_start (tree valist, rtx nextarg) | |
2655 | { | |
2656 | nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size); | |
2657 | std_expand_builtin_va_start (valist, nextarg); | |
2658 | } | |
2659 | ||
2660 | /* Make ADDR suitable for use as a call or sibcall target. */ | |
2661 | ||
2662 | rtx | |
2663 | riscv_legitimize_call_address (rtx addr) | |
2664 | { | |
2665 | if (!call_insn_operand (addr, VOIDmode)) | |
2666 | { | |
2667 | rtx reg = RISCV_PROLOGUE_TEMP (Pmode); | |
2668 | riscv_emit_move (reg, addr); | |
2669 | return reg; | |
2670 | } | |
2671 | return addr; | |
2672 | } | |
2673 | ||
6ed01e6b AW |
2674 | /* Emit straight-line code to move LENGTH bytes from SRC to DEST. |
2675 | Assume that the areas do not overlap. */ | |
2676 | ||
2677 | static void | |
2678 | riscv_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) | |
2679 | { | |
2680 | HOST_WIDE_INT offset, delta; | |
2681 | unsigned HOST_WIDE_INT bits; | |
2682 | int i; | |
2683 | enum machine_mode mode; | |
2684 | rtx *regs; | |
2685 | ||
2686 | bits = MAX (BITS_PER_UNIT, | |
2687 | MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)))); | |
2688 | ||
fb5621b1 | 2689 | mode = mode_for_size (bits, MODE_INT, 0).require (); |
6ed01e6b AW |
2690 | delta = bits / BITS_PER_UNIT; |
2691 | ||
2692 | /* Allocate a buffer for the temporary registers. */ | |
2693 | regs = XALLOCAVEC (rtx, length / delta); | |
2694 | ||
2695 | /* Load as many BITS-sized chunks as possible. Use a normal load if | |
2696 | the source has enough alignment, otherwise use left/right pairs. */ | |
2697 | for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) | |
2698 | { | |
2699 | regs[i] = gen_reg_rtx (mode); | |
2700 | riscv_emit_move (regs[i], adjust_address (src, mode, offset)); | |
2701 | } | |
2702 | ||
2703 | /* Copy the chunks to the destination. */ | |
2704 | for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) | |
2705 | riscv_emit_move (adjust_address (dest, mode, offset), regs[i]); | |
2706 | ||
2707 | /* Mop up any left-over bytes. */ | |
2708 | if (offset < length) | |
2709 | { | |
2710 | src = adjust_address (src, BLKmode, offset); | |
2711 | dest = adjust_address (dest, BLKmode, offset); | |
2712 | move_by_pieces (dest, src, length - offset, | |
2713 | MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), 0); | |
2714 | } | |
2715 | } | |
2716 | ||
2717 | /* Helper function for doing a loop-based block operation on memory | |
2718 | reference MEM. Each iteration of the loop will operate on LENGTH | |
2719 | bytes of MEM. | |
2720 | ||
2721 | Create a new base register for use within the loop and point it to | |
2722 | the start of MEM. Create a new memory reference that uses this | |
2723 | register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ | |
2724 | ||
2725 | static void | |
2726 | riscv_adjust_block_mem (rtx mem, HOST_WIDE_INT length, | |
2727 | rtx *loop_reg, rtx *loop_mem) | |
2728 | { | |
2729 | *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); | |
2730 | ||
2731 | /* Although the new mem does not refer to a known location, | |
2732 | it does keep up to LENGTH bytes of alignment. */ | |
2733 | *loop_mem = change_address (mem, BLKmode, *loop_reg); | |
2734 | set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); | |
2735 | } | |
2736 | ||
2737 | /* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER | |
2738 | bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that | |
2739 | the memory regions do not overlap. */ | |
2740 | ||
2741 | static void | |
2742 | riscv_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, | |
2743 | HOST_WIDE_INT bytes_per_iter) | |
2744 | { | |
2745 | rtx label, src_reg, dest_reg, final_src, test; | |
2746 | HOST_WIDE_INT leftover; | |
2747 | ||
2748 | leftover = length % bytes_per_iter; | |
2749 | length -= leftover; | |
2750 | ||
2751 | /* Create registers and memory references for use within the loop. */ | |
2752 | riscv_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); | |
2753 | riscv_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); | |
2754 | ||
2755 | /* Calculate the value that SRC_REG should have after the last iteration | |
2756 | of the loop. */ | |
2757 | final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), | |
2758 | 0, 0, OPTAB_WIDEN); | |
2759 | ||
2760 | /* Emit the start of the loop. */ | |
2761 | label = gen_label_rtx (); | |
2762 | emit_label (label); | |
2763 | ||
2764 | /* Emit the loop body. */ | |
2765 | riscv_block_move_straight (dest, src, bytes_per_iter); | |
2766 | ||
2767 | /* Move on to the next block. */ | |
2768 | riscv_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); | |
2769 | riscv_emit_move (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter)); | |
2770 | ||
2771 | /* Emit the loop condition. */ | |
2772 | test = gen_rtx_NE (VOIDmode, src_reg, final_src); | |
2773 | if (Pmode == DImode) | |
2774 | emit_jump_insn (gen_cbranchdi4 (test, src_reg, final_src, label)); | |
2775 | else | |
2776 | emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label)); | |
2777 | ||
2778 | /* Mop up any left-over bytes. */ | |
2779 | if (leftover) | |
2780 | riscv_block_move_straight (dest, src, leftover); | |
2781 | else | |
2782 | emit_insn(gen_nop ()); | |
2783 | } | |
2784 | ||
2785 | /* Expand a movmemsi instruction, which copies LENGTH bytes from | |
2786 | memory reference SRC to memory reference DEST. */ | |
2787 | ||
2788 | bool | |
2789 | riscv_expand_block_move (rtx dest, rtx src, rtx length) | |
2790 | { | |
2791 | if (CONST_INT_P (length)) | |
2792 | { | |
2793 | HOST_WIDE_INT factor, align; | |
2794 | ||
2795 | align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD); | |
2796 | factor = BITS_PER_WORD / align; | |
2797 | ||
2798 | if (optimize_function_for_size_p (cfun) | |
2799 | && INTVAL (length) * factor * UNITS_PER_WORD > MOVE_RATIO (false)) | |
2800 | return false; | |
2801 | ||
2802 | if (INTVAL (length) <= RISCV_MAX_MOVE_BYTES_STRAIGHT / factor) | |
2803 | { | |
2804 | riscv_block_move_straight (dest, src, INTVAL (length)); | |
2805 | return true; | |
2806 | } | |
2807 | else if (optimize && align >= BITS_PER_WORD) | |
2808 | { | |
2809 | unsigned min_iter_words | |
2810 | = RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD; | |
2811 | unsigned iter_words = min_iter_words; | |
2812 | HOST_WIDE_INT bytes = INTVAL (length), words = bytes / UNITS_PER_WORD; | |
2813 | ||
2814 | /* Lengthen the loop body if it shortens the tail. */ | |
2815 | for (unsigned i = min_iter_words; i < min_iter_words * 2 - 1; i++) | |
2816 | { | |
2817 | unsigned cur_cost = iter_words + words % iter_words; | |
2818 | unsigned new_cost = i + words % i; | |
2819 | if (new_cost <= cur_cost) | |
2820 | iter_words = i; | |
2821 | } | |
2822 | ||
2823 | riscv_block_move_loop (dest, src, bytes, iter_words * UNITS_PER_WORD); | |
2824 | return true; | |
2825 | } | |
2826 | } | |
2827 | return false; | |
2828 | } | |
2829 | ||
09cae750 PD |
2830 | /* Print symbolic operand OP, which is part of a HIGH or LO_SUM |
2831 | in context CONTEXT. HI_RELOC indicates a high-part reloc. */ | |
2832 | ||
2833 | static void | |
2834 | riscv_print_operand_reloc (FILE *file, rtx op, bool hi_reloc) | |
2835 | { | |
2836 | const char *reloc; | |
2837 | ||
2838 | switch (riscv_classify_symbolic_expression (op)) | |
2839 | { | |
2840 | case SYMBOL_ABSOLUTE: | |
2841 | reloc = hi_reloc ? "%hi" : "%lo"; | |
2842 | break; | |
2843 | ||
2844 | case SYMBOL_PCREL: | |
2845 | reloc = hi_reloc ? "%pcrel_hi" : "%pcrel_lo"; | |
2846 | break; | |
2847 | ||
2848 | case SYMBOL_TLS_LE: | |
2849 | reloc = hi_reloc ? "%tprel_hi" : "%tprel_lo"; | |
2850 | break; | |
2851 | ||
2852 | default: | |
2853 | gcc_unreachable (); | |
2854 | } | |
2855 | ||
2856 | fprintf (file, "%s(", reloc); | |
2857 | output_addr_const (file, riscv_strip_unspec_address (op)); | |
2858 | fputc (')', file); | |
2859 | } | |
2860 | ||
2861 | /* Return true if the .AQ suffix should be added to an AMO to implement the | |
2862 | acquire portion of memory model MODEL. */ | |
2863 | ||
2864 | static bool | |
2865 | riscv_memmodel_needs_amo_acquire (enum memmodel model) | |
2866 | { | |
2867 | switch (model) | |
2868 | { | |
2869 | case MEMMODEL_ACQ_REL: | |
2870 | case MEMMODEL_SEQ_CST: | |
2871 | case MEMMODEL_SYNC_SEQ_CST: | |
2872 | case MEMMODEL_ACQUIRE: | |
2873 | case MEMMODEL_CONSUME: | |
2874 | case MEMMODEL_SYNC_ACQUIRE: | |
2875 | return true; | |
2876 | ||
2877 | case MEMMODEL_RELEASE: | |
2878 | case MEMMODEL_SYNC_RELEASE: | |
2879 | case MEMMODEL_RELAXED: | |
2880 | return false; | |
2881 | ||
2882 | default: | |
2883 | gcc_unreachable (); | |
2884 | } | |
2885 | } | |
2886 | ||
2887 | /* Return true if a FENCE should be emitted to before a memory access to | |
2888 | implement the release portion of memory model MODEL. */ | |
2889 | ||
2890 | static bool | |
2891 | riscv_memmodel_needs_release_fence (enum memmodel model) | |
2892 | { | |
2893 | switch (model) | |
2894 | { | |
2895 | case MEMMODEL_ACQ_REL: | |
2896 | case MEMMODEL_SEQ_CST: | |
2897 | case MEMMODEL_SYNC_SEQ_CST: | |
2898 | case MEMMODEL_RELEASE: | |
2899 | case MEMMODEL_SYNC_RELEASE: | |
2900 | return true; | |
2901 | ||
2902 | case MEMMODEL_ACQUIRE: | |
2903 | case MEMMODEL_CONSUME: | |
2904 | case MEMMODEL_SYNC_ACQUIRE: | |
2905 | case MEMMODEL_RELAXED: | |
2906 | return false; | |
2907 | ||
2908 | default: | |
2909 | gcc_unreachable (); | |
2910 | } | |
2911 | } | |
2912 | ||
2913 | /* Implement TARGET_PRINT_OPERAND. The RISCV-specific operand codes are: | |
2914 | ||
2915 | 'h' Print the high-part relocation associated with OP, after stripping | |
2916 | any outermost HIGH. | |
2917 | 'R' Print the low-part relocation associated with OP. | |
2918 | 'C' Print the integer branch condition for comparison OP. | |
2919 | 'A' Print the atomic operation suffix for memory model OP. | |
2920 | 'F' Print a FENCE if the memory model requires a release. | |
0791ac18 MC |
2921 | 'z' Print x0 if OP is zero, otherwise print OP normally. |
2922 | 'i' Print i if the operand is not a register. */ | |
09cae750 PD |
2923 | |
2924 | static void | |
2925 | riscv_print_operand (FILE *file, rtx op, int letter) | |
2926 | { | |
b8506a8a | 2927 | machine_mode mode = GET_MODE (op); |
09cae750 PD |
2928 | enum rtx_code code = GET_CODE (op); |
2929 | ||
2930 | switch (letter) | |
2931 | { | |
2932 | case 'h': | |
2933 | if (code == HIGH) | |
2934 | op = XEXP (op, 0); | |
2935 | riscv_print_operand_reloc (file, op, true); | |
2936 | break; | |
2937 | ||
2938 | case 'R': | |
2939 | riscv_print_operand_reloc (file, op, false); | |
2940 | break; | |
2941 | ||
2942 | case 'C': | |
2943 | /* The RTL names match the instruction names. */ | |
2944 | fputs (GET_RTX_NAME (code), file); | |
2945 | break; | |
2946 | ||
2947 | case 'A': | |
2948 | if (riscv_memmodel_needs_amo_acquire ((enum memmodel) INTVAL (op))) | |
2949 | fputs (".aq", file); | |
2950 | break; | |
2951 | ||
2952 | case 'F': | |
2953 | if (riscv_memmodel_needs_release_fence ((enum memmodel) INTVAL (op))) | |
e05a9f8e | 2954 | fputs ("fence iorw,ow; ", file); |
09cae750 PD |
2955 | break; |
2956 | ||
0791ac18 MC |
2957 | case 'i': |
2958 | if (code != REG) | |
2959 | fputs ("i", file); | |
2960 | break; | |
2961 | ||
09cae750 PD |
2962 | default: |
2963 | switch (code) | |
2964 | { | |
2965 | case REG: | |
2966 | if (letter && letter != 'z') | |
2967 | output_operand_lossage ("invalid use of '%%%c'", letter); | |
2968 | fprintf (file, "%s", reg_names[REGNO (op)]); | |
2969 | break; | |
2970 | ||
2971 | case MEM: | |
2972 | if (letter && letter != 'z') | |
2973 | output_operand_lossage ("invalid use of '%%%c'", letter); | |
2974 | else | |
2975 | output_address (mode, XEXP (op, 0)); | |
2976 | break; | |
2977 | ||
2978 | default: | |
2979 | if (letter == 'z' && op == CONST0_RTX (GET_MODE (op))) | |
2980 | fputs (reg_names[GP_REG_FIRST], file); | |
2981 | else if (letter && letter != 'z') | |
2982 | output_operand_lossage ("invalid use of '%%%c'", letter); | |
2983 | else | |
2984 | output_addr_const (file, riscv_strip_unspec_address (op)); | |
2985 | break; | |
2986 | } | |
2987 | } | |
2988 | } | |
2989 | ||
2990 | /* Implement TARGET_PRINT_OPERAND_ADDRESS. */ | |
2991 | ||
2992 | static void | |
2993 | riscv_print_operand_address (FILE *file, machine_mode mode ATTRIBUTE_UNUSED, rtx x) | |
2994 | { | |
2995 | struct riscv_address_info addr; | |
2996 | ||
2997 | if (riscv_classify_address (&addr, x, word_mode, true)) | |
2998 | switch (addr.type) | |
2999 | { | |
3000 | case ADDRESS_REG: | |
3001 | riscv_print_operand (file, addr.offset, 0); | |
3002 | fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]); | |
3003 | return; | |
3004 | ||
3005 | case ADDRESS_LO_SUM: | |
3006 | riscv_print_operand_reloc (file, addr.offset, false); | |
3007 | fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]); | |
3008 | return; | |
3009 | ||
3010 | case ADDRESS_CONST_INT: | |
3011 | output_addr_const (file, x); | |
3012 | fprintf (file, "(%s)", reg_names[GP_REG_FIRST]); | |
3013 | return; | |
3014 | ||
3015 | case ADDRESS_SYMBOLIC: | |
3016 | output_addr_const (file, riscv_strip_unspec_address (x)); | |
3017 | return; | |
3018 | } | |
3019 | gcc_unreachable (); | |
3020 | } | |
3021 | ||
3022 | static bool | |
3023 | riscv_size_ok_for_small_data_p (int size) | |
3024 | { | |
3025 | return g_switch_value && IN_RANGE (size, 1, g_switch_value); | |
3026 | } | |
3027 | ||
3028 | /* Return true if EXP should be placed in the small data section. */ | |
3029 | ||
3030 | static bool | |
3031 | riscv_in_small_data_p (const_tree x) | |
3032 | { | |
3033 | if (TREE_CODE (x) == STRING_CST || TREE_CODE (x) == FUNCTION_DECL) | |
3034 | return false; | |
3035 | ||
3036 | if (TREE_CODE (x) == VAR_DECL && DECL_SECTION_NAME (x)) | |
3037 | { | |
3038 | const char *sec = DECL_SECTION_NAME (x); | |
3039 | return strcmp (sec, ".sdata") == 0 || strcmp (sec, ".sbss") == 0; | |
3040 | } | |
3041 | ||
3042 | return riscv_size_ok_for_small_data_p (int_size_in_bytes (TREE_TYPE (x))); | |
3043 | } | |
3044 | ||
3045 | /* Return a section for X, handling small data. */ | |
3046 | ||
3047 | static section * | |
b8506a8a | 3048 | riscv_elf_select_rtx_section (machine_mode mode, rtx x, |
09cae750 PD |
3049 | unsigned HOST_WIDE_INT align) |
3050 | { | |
3051 | section *s = default_elf_select_rtx_section (mode, x, align); | |
3052 | ||
3053 | if (riscv_size_ok_for_small_data_p (GET_MODE_SIZE (mode))) | |
3054 | { | |
3055 | if (strncmp (s->named.name, ".rodata.cst", strlen (".rodata.cst")) == 0) | |
3056 | { | |
3057 | /* Rename .rodata.cst* to .srodata.cst*. */ | |
3058 | char *name = (char *) alloca (strlen (s->named.name) + 2); | |
3059 | sprintf (name, ".s%s", s->named.name + 1); | |
3060 | return get_section (name, s->named.common.flags, NULL); | |
3061 | } | |
3062 | ||
3063 | if (s == data_section) | |
3064 | return sdata_section; | |
3065 | } | |
3066 | ||
3067 | return s; | |
3068 | } | |
3069 | ||
3070 | /* Make the last instruction frame-related and note that it performs | |
3071 | the operation described by FRAME_PATTERN. */ | |
3072 | ||
3073 | static void | |
3074 | riscv_set_frame_expr (rtx frame_pattern) | |
3075 | { | |
3076 | rtx insn; | |
3077 | ||
3078 | insn = get_last_insn (); | |
3079 | RTX_FRAME_RELATED_P (insn) = 1; | |
3080 | REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, | |
3081 | frame_pattern, | |
3082 | REG_NOTES (insn)); | |
3083 | } | |
3084 | ||
3085 | /* Return a frame-related rtx that stores REG at MEM. | |
3086 | REG must be a single register. */ | |
3087 | ||
3088 | static rtx | |
3089 | riscv_frame_set (rtx mem, rtx reg) | |
3090 | { | |
3091 | rtx set = gen_rtx_SET (mem, reg); | |
3092 | RTX_FRAME_RELATED_P (set) = 1; | |
3093 | return set; | |
3094 | } | |
3095 | ||
3096 | /* Return true if the current function must save register REGNO. */ | |
3097 | ||
3098 | static bool | |
3099 | riscv_save_reg_p (unsigned int regno) | |
3100 | { | |
3101 | bool call_saved = !global_regs[regno] && !call_used_regs[regno]; | |
3102 | bool might_clobber = crtl->saves_all_registers | |
3103 | || df_regs_ever_live_p (regno); | |
3104 | ||
3105 | if (call_saved && might_clobber) | |
3106 | return true; | |
3107 | ||
3108 | if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) | |
3109 | return true; | |
3110 | ||
3111 | if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return) | |
3112 | return true; | |
3113 | ||
3114 | return false; | |
3115 | } | |
3116 | ||
3117 | /* Determine whether to call GPR save/restore routines. */ | |
3118 | static bool | |
3119 | riscv_use_save_libcall (const struct riscv_frame_info *frame) | |
3120 | { | |
3121 | if (!TARGET_SAVE_RESTORE || crtl->calls_eh_return || frame_pointer_needed) | |
3122 | return false; | |
3123 | ||
3124 | return frame->save_libcall_adjustment != 0; | |
3125 | } | |
3126 | ||
3127 | /* Determine which GPR save/restore routine to call. */ | |
3128 | ||
3129 | static unsigned | |
3130 | riscv_save_libcall_count (unsigned mask) | |
3131 | { | |
3132 | for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--) | |
3133 | if (BITSET_P (mask, n)) | |
3134 | return CALLEE_SAVED_REG_NUMBER (n) + 1; | |
3135 | abort (); | |
3136 | } | |
3137 | ||
3138 | /* Populate the current function's riscv_frame_info structure. | |
3139 | ||
3140 | RISC-V stack frames grown downward. High addresses are at the top. | |
3141 | ||
3142 | +-------------------------------+ | |
3143 | | | | |
3144 | | incoming stack arguments | | |
3145 | | | | |
3146 | +-------------------------------+ <-- incoming stack pointer | |
3147 | | | | |
3148 | | callee-allocated save area | | |
3149 | | for arguments that are | | |
3150 | | split between registers and | | |
3151 | | the stack | | |
3152 | | | | |
3153 | +-------------------------------+ <-- arg_pointer_rtx | |
3154 | | | | |
3155 | | callee-allocated save area | | |
3156 | | for register varargs | | |
3157 | | | | |
3158 | +-------------------------------+ <-- hard_frame_pointer_rtx; | |
3159 | | | stack_pointer_rtx + gp_sp_offset | |
3160 | | GPR save area | + UNITS_PER_WORD | |
3161 | | | | |
3162 | +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset | |
3163 | | | + UNITS_PER_HWVALUE | |
3164 | | FPR save area | | |
3165 | | | | |
3166 | +-------------------------------+ <-- frame_pointer_rtx (virtual) | |
3167 | | | | |
3168 | | local variables | | |
3169 | | | | |
3170 | P +-------------------------------+ | |
3171 | | | | |
3172 | | outgoing stack arguments | | |
3173 | | | | |
3174 | +-------------------------------+ <-- stack_pointer_rtx | |
3175 | ||
3176 | Dynamic stack allocations such as alloca insert data at point P. | |
3177 | They decrease stack_pointer_rtx but leave frame_pointer_rtx and | |
3178 | hard_frame_pointer_rtx unchanged. */ | |
3179 | ||
3180 | static void | |
3181 | riscv_compute_frame_info (void) | |
3182 | { | |
3183 | struct riscv_frame_info *frame; | |
3184 | HOST_WIDE_INT offset; | |
3185 | unsigned int regno, i, num_x_saved = 0, num_f_saved = 0; | |
3186 | ||
3187 | frame = &cfun->machine->frame; | |
3188 | memset (frame, 0, sizeof (*frame)); | |
3189 | ||
3190 | /* Find out which GPRs we need to save. */ | |
3191 | for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) | |
3192 | if (riscv_save_reg_p (regno)) | |
3193 | frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; | |
3194 | ||
3195 | /* If this function calls eh_return, we must also save and restore the | |
3196 | EH data registers. */ | |
3197 | if (crtl->calls_eh_return) | |
3198 | for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++) | |
3199 | frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; | |
3200 | ||
3201 | /* Find out which FPRs we need to save. This loop must iterate over | |
3202 | the same space as its companion in riscv_for_each_saved_reg. */ | |
3203 | if (TARGET_HARD_FLOAT) | |
3204 | for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) | |
3205 | if (riscv_save_reg_p (regno)) | |
3206 | frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++; | |
3207 | ||
3208 | /* At the bottom of the frame are any outgoing stack arguments. */ | |
3209 | offset = crtl->outgoing_args_size; | |
3210 | /* Next are local stack variables. */ | |
3211 | offset += RISCV_STACK_ALIGN (get_frame_size ()); | |
3212 | /* The virtual frame pointer points above the local variables. */ | |
3213 | frame->frame_pointer_offset = offset; | |
3214 | /* Next are the callee-saved FPRs. */ | |
3215 | if (frame->fmask) | |
3216 | offset += RISCV_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG); | |
3217 | frame->fp_sp_offset = offset - UNITS_PER_FP_REG; | |
3218 | /* Next are the callee-saved GPRs. */ | |
3219 | if (frame->mask) | |
3220 | { | |
3221 | unsigned x_save_size = RISCV_STACK_ALIGN (num_x_saved * UNITS_PER_WORD); | |
3222 | unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask); | |
3223 | ||
3224 | /* Only use save/restore routines if they don't alter the stack size. */ | |
3225 | if (RISCV_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size) | |
3226 | frame->save_libcall_adjustment = x_save_size; | |
3227 | ||
3228 | offset += x_save_size; | |
3229 | } | |
3230 | frame->gp_sp_offset = offset - UNITS_PER_WORD; | |
3231 | /* The hard frame pointer points above the callee-saved GPRs. */ | |
3232 | frame->hard_frame_pointer_offset = offset; | |
3233 | /* Above the hard frame pointer is the callee-allocated varags save area. */ | |
3234 | offset += RISCV_STACK_ALIGN (cfun->machine->varargs_size); | |
3235 | frame->arg_pointer_offset = offset; | |
3236 | /* Next is the callee-allocated area for pretend stack arguments. */ | |
3237 | offset += crtl->args.pretend_args_size; | |
3238 | frame->total_size = offset; | |
3239 | /* Next points the incoming stack pointer and any incoming arguments. */ | |
3240 | ||
3241 | /* Only use save/restore routines when the GPRs are atop the frame. */ | |
3242 | if (frame->hard_frame_pointer_offset != frame->total_size) | |
3243 | frame->save_libcall_adjustment = 0; | |
3244 | } | |
3245 | ||
3246 | /* Make sure that we're not trying to eliminate to the wrong hard frame | |
3247 | pointer. */ | |
3248 | ||
3249 | static bool | |
3250 | riscv_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) | |
3251 | { | |
3252 | return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM); | |
3253 | } | |
3254 | ||
3255 | /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer | |
3256 | or argument pointer. TO is either the stack pointer or hard frame | |
3257 | pointer. */ | |
3258 | ||
3259 | HOST_WIDE_INT | |
3260 | riscv_initial_elimination_offset (int from, int to) | |
3261 | { | |
3262 | HOST_WIDE_INT src, dest; | |
3263 | ||
3264 | riscv_compute_frame_info (); | |
3265 | ||
3266 | if (to == HARD_FRAME_POINTER_REGNUM) | |
3267 | dest = cfun->machine->frame.hard_frame_pointer_offset; | |
3268 | else if (to == STACK_POINTER_REGNUM) | |
3269 | dest = 0; /* The stack pointer is the base of all offsets, hence 0. */ | |
3270 | else | |
3271 | gcc_unreachable (); | |
3272 | ||
3273 | if (from == FRAME_POINTER_REGNUM) | |
3274 | src = cfun->machine->frame.frame_pointer_offset; | |
3275 | else if (from == ARG_POINTER_REGNUM) | |
3276 | src = cfun->machine->frame.arg_pointer_offset; | |
3277 | else | |
3278 | gcc_unreachable (); | |
3279 | ||
3280 | return src - dest; | |
3281 | } | |
3282 | ||
3283 | /* Implement RETURN_ADDR_RTX. We do not support moving back to a | |
3284 | previous frame. */ | |
3285 | ||
3286 | rtx | |
3287 | riscv_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) | |
3288 | { | |
3289 | if (count != 0) | |
3290 | return const0_rtx; | |
3291 | ||
3292 | return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM); | |
3293 | } | |
3294 | ||
3295 | /* Emit code to change the current function's return address to | |
3296 | ADDRESS. SCRATCH is available as a scratch register, if needed. | |
3297 | ADDRESS and SCRATCH are both word-mode GPRs. */ | |
3298 | ||
3299 | void | |
3300 | riscv_set_return_address (rtx address, rtx scratch) | |
3301 | { | |
3302 | rtx slot_address; | |
3303 | ||
3304 | gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM)); | |
3305 | slot_address = riscv_add_offset (scratch, stack_pointer_rtx, | |
3306 | cfun->machine->frame.gp_sp_offset); | |
3307 | riscv_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address); | |
3308 | } | |
3309 | ||
3310 | /* A function to save or store a register. The first argument is the | |
3311 | register and the second is the stack slot. */ | |
3312 | typedef void (*riscv_save_restore_fn) (rtx, rtx); | |
3313 | ||
3314 | /* Use FN to save or restore register REGNO. MODE is the register's | |
3315 | mode and OFFSET is the offset of its save slot from the current | |
3316 | stack pointer. */ | |
3317 | ||
3318 | static void | |
b8506a8a | 3319 | riscv_save_restore_reg (machine_mode mode, int regno, |
09cae750 PD |
3320 | HOST_WIDE_INT offset, riscv_save_restore_fn fn) |
3321 | { | |
3322 | rtx mem; | |
3323 | ||
3324 | mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset)); | |
3325 | fn (gen_rtx_REG (mode, regno), mem); | |
3326 | } | |
3327 | ||
3328 | /* Call FN for each register that is saved by the current function. | |
3329 | SP_OFFSET is the offset of the current stack pointer from the start | |
3330 | of the frame. */ | |
3331 | ||
3332 | static void | |
3333 | riscv_for_each_saved_reg (HOST_WIDE_INT sp_offset, riscv_save_restore_fn fn) | |
3334 | { | |
3335 | HOST_WIDE_INT offset; | |
3336 | ||
3337 | /* Save the link register and s-registers. */ | |
3338 | offset = cfun->machine->frame.gp_sp_offset - sp_offset; | |
3339 | for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST-1; regno++) | |
3340 | if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) | |
3341 | { | |
3342 | riscv_save_restore_reg (word_mode, regno, offset, fn); | |
3343 | offset -= UNITS_PER_WORD; | |
3344 | } | |
3345 | ||
3346 | /* This loop must iterate over the same space as its companion in | |
3347 | riscv_compute_frame_info. */ | |
3348 | offset = cfun->machine->frame.fp_sp_offset - sp_offset; | |
3349 | for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) | |
3350 | if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) | |
3351 | { | |
b8506a8a | 3352 | machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode; |
09cae750 PD |
3353 | |
3354 | riscv_save_restore_reg (mode, regno, offset, fn); | |
3355 | offset -= GET_MODE_SIZE (mode); | |
3356 | } | |
3357 | } | |
3358 | ||
3359 | /* Save register REG to MEM. Make the instruction frame-related. */ | |
3360 | ||
3361 | static void | |
3362 | riscv_save_reg (rtx reg, rtx mem) | |
3363 | { | |
3364 | riscv_emit_move (mem, reg); | |
3365 | riscv_set_frame_expr (riscv_frame_set (mem, reg)); | |
3366 | } | |
3367 | ||
3368 | /* Restore register REG from MEM. */ | |
3369 | ||
3370 | static void | |
3371 | riscv_restore_reg (rtx reg, rtx mem) | |
3372 | { | |
3373 | rtx insn = riscv_emit_move (reg, mem); | |
3374 | rtx dwarf = NULL_RTX; | |
3375 | dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); | |
3376 | REG_NOTES (insn) = dwarf; | |
3377 | ||
3378 | RTX_FRAME_RELATED_P (insn) = 1; | |
3379 | } | |
3380 | ||
3381 | /* Return the code to invoke the GPR save routine. */ | |
3382 | ||
3383 | const char * | |
3384 | riscv_output_gpr_save (unsigned mask) | |
3385 | { | |
3386 | static char s[32]; | |
3387 | unsigned n = riscv_save_libcall_count (mask); | |
3388 | ||
3389 | ssize_t bytes = snprintf (s, sizeof (s), "call\tt0,__riscv_save_%u", n); | |
3390 | gcc_assert ((size_t) bytes < sizeof (s)); | |
3391 | ||
3392 | return s; | |
3393 | } | |
3394 | ||
3395 | /* For stack frames that can't be allocated with a single ADDI instruction, | |
3396 | compute the best value to initially allocate. It must at a minimum | |
3397 | allocate enough space to spill the callee-saved registers. */ | |
3398 | ||
3399 | static HOST_WIDE_INT | |
3400 | riscv_first_stack_step (struct riscv_frame_info *frame) | |
3401 | { | |
3402 | HOST_WIDE_INT min_first_step = frame->total_size - frame->fp_sp_offset; | |
3403 | HOST_WIDE_INT max_first_step = IMM_REACH / 2 - STACK_BOUNDARY / 8; | |
3404 | ||
3405 | if (SMALL_OPERAND (frame->total_size)) | |
3406 | return frame->total_size; | |
3407 | ||
3408 | /* As an optimization, use the least-significant bits of the total frame | |
3409 | size, so that the second adjustment step is just LUI + ADD. */ | |
3410 | if (!SMALL_OPERAND (frame->total_size - max_first_step) | |
3411 | && frame->total_size % IMM_REACH < IMM_REACH / 2 | |
3412 | && frame->total_size % IMM_REACH >= min_first_step) | |
3413 | return frame->total_size % IMM_REACH; | |
3414 | ||
3415 | gcc_assert (min_first_step <= max_first_step); | |
3416 | return max_first_step; | |
3417 | } | |
3418 | ||
3419 | static rtx | |
3420 | riscv_adjust_libcall_cfi_prologue () | |
3421 | { | |
3422 | rtx dwarf = NULL_RTX; | |
3423 | rtx adjust_sp_rtx, reg, mem, insn; | |
3424 | int saved_size = cfun->machine->frame.save_libcall_adjustment; | |
3425 | int offset; | |
3426 | ||
3427 | for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST-1; regno++) | |
3428 | if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) | |
3429 | { | |
3430 | /* The save order is ra, s0, s1, s2 to s11. */ | |
3431 | if (regno == RETURN_ADDR_REGNUM) | |
3432 | offset = saved_size - UNITS_PER_WORD; | |
3433 | else if (regno == S0_REGNUM) | |
3434 | offset = saved_size - UNITS_PER_WORD * 2; | |
3435 | else if (regno == S1_REGNUM) | |
3436 | offset = saved_size - UNITS_PER_WORD * 3; | |
3437 | else | |
3438 | offset = saved_size - ((regno - S2_REGNUM + 4) * UNITS_PER_WORD); | |
3439 | ||
3440 | reg = gen_rtx_REG (SImode, regno); | |
3441 | mem = gen_frame_mem (SImode, plus_constant (Pmode, | |
3442 | stack_pointer_rtx, | |
3443 | offset)); | |
3444 | ||
3445 | insn = gen_rtx_SET (mem, reg); | |
3446 | dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf); | |
3447 | } | |
3448 | ||
3449 | /* Debug info for adjust sp. */ | |
3450 | adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx, | |
3451 | stack_pointer_rtx, GEN_INT (-saved_size)); | |
3452 | dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, | |
3453 | dwarf); | |
3454 | return dwarf; | |
3455 | } | |
3456 | ||
3457 | static void | |
3458 | riscv_emit_stack_tie (void) | |
3459 | { | |
3460 | if (Pmode == SImode) | |
3461 | emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx)); | |
3462 | else | |
3463 | emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx)); | |
3464 | } | |
3465 | ||
3466 | /* Expand the "prologue" pattern. */ | |
3467 | ||
3468 | void | |
3469 | riscv_expand_prologue (void) | |
3470 | { | |
3471 | struct riscv_frame_info *frame = &cfun->machine->frame; | |
3472 | HOST_WIDE_INT size = frame->total_size; | |
3473 | unsigned mask = frame->mask; | |
3474 | rtx insn; | |
3475 | ||
3476 | if (flag_stack_usage_info) | |
3477 | current_function_static_stack_size = size; | |
3478 | ||
3479 | /* When optimizing for size, call a subroutine to save the registers. */ | |
3480 | if (riscv_use_save_libcall (frame)) | |
3481 | { | |
3482 | rtx dwarf = NULL_RTX; | |
3483 | dwarf = riscv_adjust_libcall_cfi_prologue (); | |
3484 | ||
3485 | frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ | |
3486 | size -= frame->save_libcall_adjustment; | |
3487 | insn = emit_insn (gen_gpr_save (GEN_INT (mask))); | |
3488 | ||
3489 | RTX_FRAME_RELATED_P (insn) = 1; | |
3490 | REG_NOTES (insn) = dwarf; | |
3491 | } | |
3492 | ||
3493 | /* Save the registers. */ | |
3494 | if ((frame->mask | frame->fmask) != 0) | |
3495 | { | |
3496 | HOST_WIDE_INT step1 = MIN (size, riscv_first_stack_step (frame)); | |
3497 | ||
3498 | insn = gen_add3_insn (stack_pointer_rtx, | |
3499 | stack_pointer_rtx, | |
3500 | GEN_INT (-step1)); | |
3501 | RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; | |
3502 | size -= step1; | |
3503 | riscv_for_each_saved_reg (size, riscv_save_reg); | |
3504 | } | |
3505 | ||
3506 | frame->mask = mask; /* Undo the above fib. */ | |
3507 | ||
3508 | /* Set up the frame pointer, if we're using one. */ | |
3509 | if (frame_pointer_needed) | |
3510 | { | |
3511 | insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, | |
3512 | GEN_INT (frame->hard_frame_pointer_offset - size)); | |
3513 | RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; | |
3514 | ||
3515 | riscv_emit_stack_tie (); | |
3516 | } | |
3517 | ||
3518 | /* Allocate the rest of the frame. */ | |
3519 | if (size > 0) | |
3520 | { | |
3521 | if (SMALL_OPERAND (-size)) | |
3522 | { | |
3523 | insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, | |
3524 | GEN_INT (-size)); | |
3525 | RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; | |
3526 | } | |
3527 | else | |
3528 | { | |
3529 | riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-size)); | |
3530 | emit_insn (gen_add3_insn (stack_pointer_rtx, | |
3531 | stack_pointer_rtx, | |
3532 | RISCV_PROLOGUE_TEMP (Pmode))); | |
3533 | ||
3534 | /* Describe the effect of the previous instructions. */ | |
3535 | insn = plus_constant (Pmode, stack_pointer_rtx, -size); | |
3536 | insn = gen_rtx_SET (stack_pointer_rtx, insn); | |
3537 | riscv_set_frame_expr (insn); | |
3538 | } | |
3539 | } | |
3540 | } | |
3541 | ||
3542 | static rtx | |
3543 | riscv_adjust_libcall_cfi_epilogue () | |
3544 | { | |
3545 | rtx dwarf = NULL_RTX; | |
3546 | rtx adjust_sp_rtx, reg; | |
3547 | int saved_size = cfun->machine->frame.save_libcall_adjustment; | |
3548 | ||
3549 | /* Debug info for adjust sp. */ | |
3550 | adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx, | |
3551 | stack_pointer_rtx, GEN_INT (saved_size)); | |
3552 | dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, | |
3553 | dwarf); | |
3554 | ||
3555 | for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST-1; regno++) | |
3556 | if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) | |
3557 | { | |
3558 | reg = gen_rtx_REG (SImode, regno); | |
3559 | dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); | |
3560 | } | |
3561 | ||
3562 | return dwarf; | |
3563 | } | |
3564 | ||
3565 | /* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P | |
3566 | says which. */ | |
3567 | ||
3568 | void | |
3569 | riscv_expand_epilogue (bool sibcall_p) | |
3570 | { | |
3571 | /* Split the frame into two. STEP1 is the amount of stack we should | |
3572 | deallocate before restoring the registers. STEP2 is the amount we | |
3573 | should deallocate afterwards. | |
3574 | ||
3575 | Start off by assuming that no registers need to be restored. */ | |
3576 | struct riscv_frame_info *frame = &cfun->machine->frame; | |
3577 | unsigned mask = frame->mask; | |
3578 | HOST_WIDE_INT step1 = frame->total_size; | |
3579 | HOST_WIDE_INT step2 = 0; | |
3580 | bool use_restore_libcall = !sibcall_p && riscv_use_save_libcall (frame); | |
3581 | rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); | |
3582 | rtx insn; | |
3583 | ||
3584 | /* We need to add memory barrier to prevent read from deallocated stack. */ | |
3585 | bool need_barrier_p = (get_frame_size () | |
3586 | + cfun->machine->frame.arg_pointer_offset) != 0; | |
3587 | ||
3588 | if (!sibcall_p && riscv_can_use_return_insn ()) | |
3589 | { | |
3590 | emit_jump_insn (gen_return ()); | |
3591 | return; | |
3592 | } | |
3593 | ||
3594 | /* Move past any dynamic stack allocations. */ | |
3595 | if (cfun->calls_alloca) | |
3596 | { | |
3597 | /* Emit a barrier to prevent loads from a deallocated stack. */ | |
3598 | riscv_emit_stack_tie (); | |
3599 | need_barrier_p = false; | |
3600 | ||
3601 | rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset); | |
3602 | if (!SMALL_OPERAND (INTVAL (adjust))) | |
3603 | { | |
3604 | riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust); | |
3605 | adjust = RISCV_PROLOGUE_TEMP (Pmode); | |
3606 | } | |
3607 | ||
3608 | insn = emit_insn ( | |
3609 | gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx, | |
3610 | adjust)); | |
3611 | ||
3612 | rtx dwarf = NULL_RTX; | |
3613 | rtx cfa_adjust_value = gen_rtx_PLUS ( | |
3614 | Pmode, hard_frame_pointer_rtx, | |
3615 | GEN_INT (-frame->hard_frame_pointer_offset)); | |
3616 | rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value); | |
3617 | dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf); | |
3618 | RTX_FRAME_RELATED_P (insn) = 1; | |
3619 | ||
3620 | REG_NOTES (insn) = dwarf; | |
3621 | } | |
3622 | ||
3623 | /* If we need to restore registers, deallocate as much stack as | |
3624 | possible in the second step without going out of range. */ | |
3625 | if ((frame->mask | frame->fmask) != 0) | |
3626 | { | |
3627 | step2 = riscv_first_stack_step (frame); | |
3628 | step1 -= step2; | |
3629 | } | |
3630 | ||
3631 | /* Set TARGET to BASE + STEP1. */ | |
3632 | if (step1 > 0) | |
3633 | { | |
3634 | /* Emit a barrier to prevent loads from a deallocated stack. */ | |
3635 | riscv_emit_stack_tie (); | |
3636 | need_barrier_p = false; | |
3637 | ||
3638 | /* Get an rtx for STEP1 that we can add to BASE. */ | |
3639 | rtx adjust = GEN_INT (step1); | |
3640 | if (!SMALL_OPERAND (step1)) | |
3641 | { | |
3642 | riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust); | |
3643 | adjust = RISCV_PROLOGUE_TEMP (Pmode); | |
3644 | } | |
3645 | ||
3646 | insn = emit_insn ( | |
3647 | gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, adjust)); | |
3648 | ||
3649 | rtx dwarf = NULL_RTX; | |
3650 | rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, | |
3651 | GEN_INT (step2)); | |
3652 | ||
3653 | dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); | |
3654 | RTX_FRAME_RELATED_P (insn) = 1; | |
3655 | ||
3656 | REG_NOTES (insn) = dwarf; | |
3657 | } | |
3658 | ||
3659 | if (use_restore_libcall) | |
3660 | frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ | |
3661 | ||
3662 | /* Restore the registers. */ | |
3663 | riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg); | |
3664 | ||
3665 | if (use_restore_libcall) | |
3666 | { | |
3667 | frame->mask = mask; /* Undo the above fib. */ | |
3668 | gcc_assert (step2 >= frame->save_libcall_adjustment); | |
3669 | step2 -= frame->save_libcall_adjustment; | |
3670 | } | |
3671 | ||
3672 | if (need_barrier_p) | |
3673 | riscv_emit_stack_tie (); | |
3674 | ||
3675 | /* Deallocate the final bit of the frame. */ | |
3676 | if (step2 > 0) | |
3677 | { | |
3678 | insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, | |
3679 | GEN_INT (step2))); | |
3680 | ||
3681 | rtx dwarf = NULL_RTX; | |
3682 | rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, | |
3683 | const0_rtx); | |
3684 | dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); | |
3685 | RTX_FRAME_RELATED_P (insn) = 1; | |
3686 | ||
3687 | REG_NOTES (insn) = dwarf; | |
3688 | } | |
3689 | ||
3690 | if (use_restore_libcall) | |
3691 | { | |
3692 | rtx dwarf = riscv_adjust_libcall_cfi_epilogue (); | |
3693 | insn = emit_insn (gen_gpr_restore (GEN_INT (riscv_save_libcall_count (mask)))); | |
3694 | RTX_FRAME_RELATED_P (insn) = 1; | |
3695 | REG_NOTES (insn) = dwarf; | |
3696 | ||
3697 | emit_jump_insn (gen_gpr_restore_return (ra)); | |
3698 | return; | |
3699 | } | |
3700 | ||
3701 | /* Add in the __builtin_eh_return stack adjustment. */ | |
3702 | if (crtl->calls_eh_return) | |
3703 | emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, | |
3704 | EH_RETURN_STACKADJ_RTX)); | |
3705 | ||
3706 | if (!sibcall_p) | |
3707 | emit_jump_insn (gen_simple_return_internal (ra)); | |
3708 | } | |
3709 | ||
3710 | /* Return nonzero if this function is known to have a null epilogue. | |
3711 | This allows the optimizer to omit jumps to jumps if no stack | |
3712 | was created. */ | |
3713 | ||
3714 | bool | |
3715 | riscv_can_use_return_insn (void) | |
3716 | { | |
3717 | return reload_completed && cfun->machine->frame.total_size == 0; | |
3718 | } | |
3719 | ||
f15643d4 RS |
3720 | /* Implement TARGET_SECONDARY_MEMORY_NEEDED. |
3721 | ||
3722 | When floating-point registers are wider than integer ones, moves between | |
3723 | them must go through memory. */ | |
3724 | ||
3725 | static bool | |
3726 | riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1, | |
3727 | reg_class_t class2) | |
3728 | { | |
3729 | return (GET_MODE_SIZE (mode) > UNITS_PER_WORD | |
3730 | && (class1 == FP_REGS) != (class2 == FP_REGS)); | |
3731 | } | |
3732 | ||
09cae750 PD |
3733 | /* Implement TARGET_REGISTER_MOVE_COST. */ |
3734 | ||
3735 | static int | |
b8506a8a | 3736 | riscv_register_move_cost (machine_mode mode, |
09cae750 PD |
3737 | reg_class_t from, reg_class_t to) |
3738 | { | |
f15643d4 | 3739 | return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2; |
09cae750 PD |
3740 | } |
3741 | ||
c43f4279 RS |
3742 | /* Implement TARGET_HARD_REGNO_NREGS. */ |
3743 | ||
3744 | static unsigned int | |
3745 | riscv_hard_regno_nregs (unsigned int regno, machine_mode mode) | |
3746 | { | |
3747 | if (FP_REG_P (regno)) | |
3748 | return (GET_MODE_SIZE (mode) + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG; | |
3749 | ||
3750 | /* All other registers are word-sized. */ | |
3751 | return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
3752 | } | |
3753 | ||
f939c3e6 | 3754 | /* Implement TARGET_HARD_REGNO_MODE_OK. */ |
09cae750 | 3755 | |
f939c3e6 RS |
3756 | static bool |
3757 | riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode) | |
09cae750 PD |
3758 | { |
3759 | unsigned int nregs = riscv_hard_regno_nregs (regno, mode); | |
3760 | ||
3761 | if (GP_REG_P (regno)) | |
3762 | { | |
3763 | if (!GP_REG_P (regno + nregs - 1)) | |
3764 | return false; | |
3765 | } | |
3766 | else if (FP_REG_P (regno)) | |
3767 | { | |
3768 | if (!FP_REG_P (regno + nregs - 1)) | |
3769 | return false; | |
3770 | ||
3771 | if (GET_MODE_CLASS (mode) != MODE_FLOAT | |
3772 | && GET_MODE_CLASS (mode) != MODE_COMPLEX_FLOAT) | |
3773 | return false; | |
3774 | ||
3775 | /* Only use callee-saved registers if a potential callee is guaranteed | |
3776 | to spill the requisite width. */ | |
3777 | if (GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_REG | |
3778 | || (!call_used_regs[regno] | |
3779 | && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_ARG)) | |
3780 | return false; | |
3781 | } | |
3782 | else | |
3783 | return false; | |
3784 | ||
3785 | /* Require same callee-savedness for all registers. */ | |
3786 | for (unsigned i = 1; i < nregs; i++) | |
3787 | if (call_used_regs[regno] != call_used_regs[regno + i]) | |
3788 | return false; | |
3789 | ||
3790 | return true; | |
3791 | } | |
3792 | ||
99e1629f RS |
3793 | /* Implement TARGET_MODES_TIEABLE_P. |
3794 | ||
3795 | Don't allow floating-point modes to be tied, since type punning of | |
3796 | single-precision and double-precision is implementation defined. */ | |
3797 | ||
3798 | static bool | |
3799 | riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2) | |
3800 | { | |
3801 | return (mode1 == mode2 | |
3802 | || !(GET_MODE_CLASS (mode1) == MODE_FLOAT | |
3803 | && GET_MODE_CLASS (mode2) == MODE_FLOAT)); | |
3804 | } | |
3805 | ||
09cae750 PD |
3806 | /* Implement CLASS_MAX_NREGS. */ |
3807 | ||
3808 | static unsigned char | |
b8506a8a | 3809 | riscv_class_max_nregs (reg_class_t rclass, machine_mode mode) |
09cae750 PD |
3810 | { |
3811 | if (reg_class_subset_p (FP_REGS, rclass)) | |
3812 | return riscv_hard_regno_nregs (FP_REG_FIRST, mode); | |
3813 | ||
3814 | if (reg_class_subset_p (GR_REGS, rclass)) | |
3815 | return riscv_hard_regno_nregs (GP_REG_FIRST, mode); | |
3816 | ||
3817 | return 0; | |
3818 | } | |
3819 | ||
09cae750 PD |
3820 | /* Implement TARGET_MEMORY_MOVE_COST. */ |
3821 | ||
3822 | static int | |
b8506a8a | 3823 | riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in) |
09cae750 PD |
3824 | { |
3825 | return (tune_info->memory_cost | |
3826 | + memory_move_secondary_cost (mode, rclass, in)); | |
3827 | } | |
3828 | ||
3829 | /* Return the number of instructions that can be issued per cycle. */ | |
3830 | ||
3831 | static int | |
3832 | riscv_issue_rate (void) | |
3833 | { | |
3834 | return tune_info->issue_rate; | |
3835 | } | |
3836 | ||
3837 | /* Implement TARGET_ASM_FILE_START. */ | |
3838 | ||
3839 | static void | |
3840 | riscv_file_start (void) | |
3841 | { | |
3842 | default_file_start (); | |
3843 | ||
3844 | /* Instruct GAS to generate position-[in]dependent code. */ | |
3845 | fprintf (asm_out_file, "\t.option %spic\n", (flag_pic ? "" : "no")); | |
3846 | } | |
3847 | ||
3848 | /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text | |
3849 | in order to avoid duplicating too much logic from elsewhere. */ | |
3850 | ||
3851 | static void | |
3852 | riscv_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, | |
3853 | HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, | |
3854 | tree function) | |
3855 | { | |
3856 | rtx this_rtx, temp1, temp2, fnaddr; | |
3857 | rtx_insn *insn; | |
3858 | ||
3859 | /* Pretend to be a post-reload pass while generating rtl. */ | |
3860 | reload_completed = 1; | |
3861 | ||
3862 | /* Mark the end of the (empty) prologue. */ | |
3863 | emit_note (NOTE_INSN_PROLOGUE_END); | |
3864 | ||
3865 | /* Determine if we can use a sibcall to call FUNCTION directly. */ | |
3866 | fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0)); | |
3867 | ||
3868 | /* We need two temporary registers in some cases. */ | |
3869 | temp1 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP_REGNUM); | |
3870 | temp2 = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM); | |
3871 | ||
3872 | /* Find out which register contains the "this" pointer. */ | |
3873 | if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) | |
3874 | this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1); | |
3875 | else | |
3876 | this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST); | |
3877 | ||
3878 | /* Add DELTA to THIS_RTX. */ | |
3879 | if (delta != 0) | |
3880 | { | |
3881 | rtx offset = GEN_INT (delta); | |
3882 | if (!SMALL_OPERAND (delta)) | |
3883 | { | |
3884 | riscv_emit_move (temp1, offset); | |
3885 | offset = temp1; | |
3886 | } | |
3887 | emit_insn (gen_add3_insn (this_rtx, this_rtx, offset)); | |
3888 | } | |
3889 | ||
3890 | /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */ | |
3891 | if (vcall_offset != 0) | |
3892 | { | |
3893 | rtx addr; | |
3894 | ||
3895 | /* Set TEMP1 to *THIS_RTX. */ | |
3896 | riscv_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx)); | |
3897 | ||
3898 | /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */ | |
3899 | addr = riscv_add_offset (temp2, temp1, vcall_offset); | |
3900 | ||
3901 | /* Load the offset and add it to THIS_RTX. */ | |
3902 | riscv_emit_move (temp1, gen_rtx_MEM (Pmode, addr)); | |
3903 | emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1)); | |
3904 | } | |
3905 | ||
3906 | /* Jump to the target function. */ | |
3907 | insn = emit_call_insn (gen_sibcall (fnaddr, const0_rtx, NULL, const0_rtx)); | |
3908 | SIBLING_CALL_P (insn) = 1; | |
3909 | ||
3910 | /* Run just enough of rest_of_compilation. This sequence was | |
3911 | "borrowed" from alpha.c. */ | |
3912 | insn = get_insns (); | |
3913 | split_all_insns_noflow (); | |
3914 | shorten_branches (insn); | |
3915 | final_start_function (insn, file, 1); | |
3916 | final (insn, file, 1); | |
3917 | final_end_function (); | |
3918 | ||
3919 | /* Clean up the vars set above. Note that final_end_function resets | |
3920 | the global pointer for us. */ | |
3921 | reload_completed = 0; | |
3922 | } | |
3923 | ||
3924 | /* Allocate a chunk of memory for per-function machine-dependent data. */ | |
3925 | ||
3926 | static struct machine_function * | |
3927 | riscv_init_machine_status (void) | |
3928 | { | |
3929 | return ggc_cleared_alloc<machine_function> (); | |
3930 | } | |
3931 | ||
3932 | /* Implement TARGET_OPTION_OVERRIDE. */ | |
3933 | ||
3934 | static void | |
3935 | riscv_option_override (void) | |
3936 | { | |
3937 | const struct riscv_cpu_info *cpu; | |
3938 | ||
3939 | #ifdef SUBTARGET_OVERRIDE_OPTIONS | |
3940 | SUBTARGET_OVERRIDE_OPTIONS; | |
3941 | #endif | |
3942 | ||
3943 | flag_pcc_struct_return = 0; | |
3944 | ||
3945 | if (flag_pic) | |
3946 | g_switch_value = 0; | |
3947 | ||
3948 | /* The presence of the M extension implies that division instructions | |
3949 | are present, so include them unless explicitly disabled. */ | |
3950 | if (TARGET_MUL && (target_flags_explicit & MASK_DIV) == 0) | |
3951 | target_flags |= MASK_DIV; | |
3952 | else if (!TARGET_MUL && TARGET_DIV) | |
3953 | error ("-mdiv requires -march to subsume the %<M%> extension"); | |
3954 | ||
3955 | /* Likewise floating-point division and square root. */ | |
3956 | if (TARGET_HARD_FLOAT && (target_flags_explicit & MASK_FDIV) == 0) | |
3957 | target_flags |= MASK_FDIV; | |
3958 | ||
3959 | /* Handle -mtune. */ | |
3960 | cpu = riscv_parse_cpu (riscv_tune_string ? riscv_tune_string : | |
3961 | RISCV_TUNE_STRING_DEFAULT); | |
3962 | tune_info = optimize_size ? &optimize_size_tune_info : cpu->tune_info; | |
3963 | ||
82285692 AW |
3964 | /* Use -mtune's setting for slow_unaligned_access, even when optimizing |
3965 | for size. For architectures that trap and emulate unaligned accesses, | |
caf1c1cd AW |
3966 | the performance cost is too great, even for -Os. Similarly, if |
3967 | -m[no-]strict-align is left unspecified, heed -mtune's advice. */ | |
e0bd6c9f RS |
3968 | riscv_slow_unaligned_access_p = (cpu->tune_info->slow_unaligned_access |
3969 | || TARGET_STRICT_ALIGN); | |
caf1c1cd AW |
3970 | if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0 |
3971 | && cpu->tune_info->slow_unaligned_access) | |
3972 | target_flags |= MASK_STRICT_ALIGN; | |
82285692 | 3973 | |
09cae750 PD |
3974 | /* If the user hasn't specified a branch cost, use the processor's |
3975 | default. */ | |
3976 | if (riscv_branch_cost == 0) | |
3977 | riscv_branch_cost = tune_info->branch_cost; | |
3978 | ||
3979 | /* Function to allocate machine-dependent function status. */ | |
3980 | init_machine_status = &riscv_init_machine_status; | |
3981 | ||
3982 | if (flag_pic) | |
3983 | riscv_cmodel = CM_PIC; | |
3984 | ||
3985 | /* We get better code with explicit relocs for CM_MEDLOW, but | |
3986 | worse code for the others (for now). Pick the best default. */ | |
3987 | if ((target_flags_explicit & MASK_EXPLICIT_RELOCS) == 0) | |
3988 | if (riscv_cmodel == CM_MEDLOW) | |
3989 | target_flags |= MASK_EXPLICIT_RELOCS; | |
3990 | ||
3991 | /* Require that the ISA supports the requested floating-point ABI. */ | |
3992 | if (UNITS_PER_FP_ARG > (TARGET_HARD_FLOAT ? UNITS_PER_FP_REG : 0)) | |
3993 | error ("requested ABI requires -march to subsume the %qc extension", | |
3994 | UNITS_PER_FP_ARG > 8 ? 'Q' : (UNITS_PER_FP_ARG > 4 ? 'D' : 'F')); | |
3995 | ||
3996 | /* We do not yet support ILP32 on RV64. */ | |
3997 | if (BITS_PER_WORD != POINTER_SIZE) | |
3998 | error ("ABI requires -march=rv%d", POINTER_SIZE); | |
3999 | } | |
4000 | ||
4001 | /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ | |
4002 | ||
4003 | static void | |
4004 | riscv_conditional_register_usage (void) | |
4005 | { | |
4006 | if (!TARGET_HARD_FLOAT) | |
4007 | { | |
4008 | for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) | |
4009 | fixed_regs[regno] = call_used_regs[regno] = 1; | |
4010 | } | |
4011 | } | |
4012 | ||
4013 | /* Return a register priority for hard reg REGNO. */ | |
4014 | ||
4015 | static int | |
4016 | riscv_register_priority (int regno) | |
4017 | { | |
4018 | /* Favor x8-x15/f8-f15 to improve the odds of RVC instruction selection. */ | |
4019 | if (TARGET_RVC && (IN_RANGE (regno, GP_REG_FIRST + 8, GP_REG_FIRST + 15) | |
4020 | || IN_RANGE (regno, FP_REG_FIRST + 8, FP_REG_FIRST + 15))) | |
4021 | return 1; | |
4022 | ||
4023 | return 0; | |
4024 | } | |
4025 | ||
4026 | /* Implement TARGET_TRAMPOLINE_INIT. */ | |
4027 | ||
4028 | static void | |
4029 | riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) | |
4030 | { | |
4031 | rtx addr, end_addr, mem; | |
4032 | uint32_t trampoline[4]; | |
4033 | unsigned int i; | |
4034 | HOST_WIDE_INT static_chain_offset, target_function_offset; | |
4035 | ||
4036 | /* Work out the offsets of the pointers from the start of the | |
4037 | trampoline code. */ | |
4038 | gcc_assert (ARRAY_SIZE (trampoline) * 4 == TRAMPOLINE_CODE_SIZE); | |
4039 | ||
4040 | /* Get pointers to the beginning and end of the code block. */ | |
4041 | addr = force_reg (Pmode, XEXP (m_tramp, 0)); | |
4042 | end_addr = riscv_force_binary (Pmode, PLUS, addr, | |
4043 | GEN_INT (TRAMPOLINE_CODE_SIZE)); | |
4044 | ||
4045 | ||
4046 | if (Pmode == SImode) | |
4047 | { | |
4048 | chain_value = force_reg (Pmode, chain_value); | |
4049 | ||
4050 | rtx target_function = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0)); | |
4051 | /* lui t2, hi(chain) | |
4052 | lui t1, hi(func) | |
4053 | addi t2, t2, lo(chain) | |
4054 | jr r1, lo(func) | |
4055 | */ | |
4056 | unsigned HOST_WIDE_INT lui_hi_chain_code, lui_hi_func_code; | |
4057 | unsigned HOST_WIDE_INT lo_chain_code, lo_func_code; | |
4058 | ||
4059 | rtx uimm_mask = force_reg (SImode, gen_int_mode (-IMM_REACH, SImode)); | |
4060 | ||
4061 | /* 0xfff. */ | |
4062 | rtx imm12_mask = gen_reg_rtx (SImode); | |
4063 | emit_insn (gen_one_cmplsi2 (imm12_mask, uimm_mask)); | |
4064 | ||
4065 | rtx fixup_value = force_reg (SImode, gen_int_mode (IMM_REACH/2, SImode)); | |
4066 | ||
4067 | /* Gen lui t2, hi(chain). */ | |
4068 | rtx hi_chain = riscv_force_binary (SImode, PLUS, chain_value, | |
4069 | fixup_value); | |
4070 | hi_chain = riscv_force_binary (SImode, AND, hi_chain, | |
4071 | uimm_mask); | |
4072 | lui_hi_chain_code = OPCODE_LUI | (STATIC_CHAIN_REGNUM << SHIFT_RD); | |
4073 | rtx lui_hi_chain = riscv_force_binary (SImode, IOR, hi_chain, | |
4074 | gen_int_mode (lui_hi_chain_code, SImode)); | |
4075 | ||
4076 | mem = adjust_address (m_tramp, SImode, 0); | |
4077 | riscv_emit_move (mem, lui_hi_chain); | |
4078 | ||
4079 | /* Gen lui t1, hi(func). */ | |
4080 | rtx hi_func = riscv_force_binary (SImode, PLUS, target_function, | |
4081 | fixup_value); | |
4082 | hi_func = riscv_force_binary (SImode, AND, hi_func, | |
4083 | uimm_mask); | |
4084 | lui_hi_func_code = OPCODE_LUI | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD); | |
4085 | rtx lui_hi_func = riscv_force_binary (SImode, IOR, hi_func, | |
4086 | gen_int_mode (lui_hi_func_code, SImode)); | |
4087 | ||
4088 | mem = adjust_address (m_tramp, SImode, 1 * GET_MODE_SIZE (SImode)); | |
4089 | riscv_emit_move (mem, lui_hi_func); | |
4090 | ||
4091 | /* Gen addi t2, t2, lo(chain). */ | |
4092 | rtx lo_chain = riscv_force_binary (SImode, AND, chain_value, | |
4093 | imm12_mask); | |
4094 | lo_chain = riscv_force_binary (SImode, ASHIFT, lo_chain, GEN_INT (20)); | |
4095 | ||
4096 | lo_chain_code = OPCODE_ADDI | |
4097 | | (STATIC_CHAIN_REGNUM << SHIFT_RD) | |
4098 | | (STATIC_CHAIN_REGNUM << SHIFT_RS1); | |
4099 | ||
4100 | rtx addi_lo_chain = riscv_force_binary (SImode, IOR, lo_chain, | |
4101 | force_reg (SImode, GEN_INT (lo_chain_code))); | |
4102 | ||
4103 | mem = adjust_address (m_tramp, SImode, 2 * GET_MODE_SIZE (SImode)); | |
4104 | riscv_emit_move (mem, addi_lo_chain); | |
4105 | ||
4106 | /* Gen jr r1, lo(func). */ | |
4107 | rtx lo_func = riscv_force_binary (SImode, AND, target_function, | |
4108 | imm12_mask); | |
4109 | lo_func = riscv_force_binary (SImode, ASHIFT, lo_func, GEN_INT (20)); | |
4110 | ||
4111 | lo_func_code = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1); | |
4112 | ||
4113 | rtx jr_lo_func = riscv_force_binary (SImode, IOR, lo_func, | |
4114 | force_reg (SImode, GEN_INT (lo_func_code))); | |
4115 | ||
4116 | mem = adjust_address (m_tramp, SImode, 3 * GET_MODE_SIZE (SImode)); | |
4117 | riscv_emit_move (mem, jr_lo_func); | |
4118 | } | |
4119 | else | |
4120 | { | |
4121 | static_chain_offset = TRAMPOLINE_CODE_SIZE; | |
4122 | target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode); | |
4123 | ||
4124 | /* auipc t2, 0 | |
4125 | l[wd] t1, target_function_offset(t2) | |
4126 | l[wd] t2, static_chain_offset(t2) | |
4127 | jr t1 | |
4128 | */ | |
4129 | trampoline[0] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD); | |
4130 | trampoline[1] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW) | |
4131 | | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD) | |
4132 | | (STATIC_CHAIN_REGNUM << SHIFT_RS1) | |
4133 | | (target_function_offset << SHIFT_IMM); | |
4134 | trampoline[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW) | |
4135 | | (STATIC_CHAIN_REGNUM << SHIFT_RD) | |
4136 | | (STATIC_CHAIN_REGNUM << SHIFT_RS1) | |
4137 | | (static_chain_offset << SHIFT_IMM); | |
4138 | trampoline[3] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1); | |
4139 | ||
4140 | /* Copy the trampoline code. */ | |
4141 | for (i = 0; i < ARRAY_SIZE (trampoline); i++) | |
4142 | { | |
4143 | mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode)); | |
4144 | riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode)); | |
4145 | } | |
4146 | ||
4147 | /* Set up the static chain pointer field. */ | |
4148 | mem = adjust_address (m_tramp, ptr_mode, static_chain_offset); | |
4149 | riscv_emit_move (mem, chain_value); | |
4150 | ||
4151 | /* Set up the target function field. */ | |
4152 | mem = adjust_address (m_tramp, ptr_mode, target_function_offset); | |
4153 | riscv_emit_move (mem, XEXP (DECL_RTL (fndecl), 0)); | |
4154 | } | |
4155 | ||
4156 | /* Flush the code part of the trampoline. */ | |
4157 | emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE))); | |
4158 | emit_insn (gen_clear_cache (addr, end_addr)); | |
4159 | } | |
4160 | ||
4161 | /* Return leaf_function_p () and memoize the result. */ | |
4162 | ||
4163 | static bool | |
4164 | riscv_leaf_function_p (void) | |
4165 | { | |
4166 | if (cfun->machine->is_leaf == 0) | |
4167 | cfun->machine->is_leaf = leaf_function_p () ? 1 : -1; | |
4168 | ||
4169 | return cfun->machine->is_leaf > 0; | |
4170 | } | |
4171 | ||
4172 | /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ | |
4173 | ||
4174 | static bool | |
4175 | riscv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, | |
4176 | tree exp ATTRIBUTE_UNUSED) | |
4177 | { | |
4178 | /* When optimzing for size, don't use sibcalls in non-leaf routines */ | |
4179 | if (TARGET_SAVE_RESTORE) | |
4180 | return riscv_leaf_function_p (); | |
4181 | ||
4182 | return true; | |
4183 | } | |
4184 | ||
4185 | /* Implement TARGET_CANNOT_COPY_INSN_P. */ | |
4186 | ||
4187 | static bool | |
4188 | riscv_cannot_copy_insn_p (rtx_insn *insn) | |
4189 | { | |
4190 | return recog_memoized (insn) >= 0 && get_attr_cannot_copy (insn); | |
4191 | } | |
4192 | ||
e0bd6c9f RS |
4193 | /* Implement TARGET_SLOW_UNALIGNED_ACCESS. */ |
4194 | ||
4195 | static bool | |
4196 | riscv_slow_unaligned_access (machine_mode, unsigned int) | |
4197 | { | |
4198 | return riscv_slow_unaligned_access_p; | |
4199 | } | |
4200 | ||
0d803030 RS |
4201 | /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ |
4202 | ||
4203 | static bool | |
4204 | riscv_can_change_mode_class (machine_mode, machine_mode, reg_class_t rclass) | |
4205 | { | |
4206 | return !reg_classes_intersect_p (FP_REGS, rclass); | |
4207 | } | |
4208 | ||
58e17cf8 RS |
4209 | |
4210 | /* Implement TARGET_CONSTANT_ALIGNMENT. */ | |
4211 | ||
4212 | static HOST_WIDE_INT | |
4213 | riscv_constant_alignment (const_tree exp, HOST_WIDE_INT align) | |
4214 | { | |
4215 | if (TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR) | |
4216 | return MAX (align, BITS_PER_WORD); | |
4217 | return align; | |
4218 | } | |
4219 | ||
09cae750 PD |
4220 | /* Initialize the GCC target structure. */ |
4221 | #undef TARGET_ASM_ALIGNED_HI_OP | |
4222 | #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" | |
4223 | #undef TARGET_ASM_ALIGNED_SI_OP | |
4224 | #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" | |
4225 | #undef TARGET_ASM_ALIGNED_DI_OP | |
4226 | #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" | |
4227 | ||
4228 | #undef TARGET_OPTION_OVERRIDE | |
4229 | #define TARGET_OPTION_OVERRIDE riscv_option_override | |
4230 | ||
4231 | #undef TARGET_LEGITIMIZE_ADDRESS | |
4232 | #define TARGET_LEGITIMIZE_ADDRESS riscv_legitimize_address | |
4233 | ||
4234 | #undef TARGET_SCHED_ISSUE_RATE | |
4235 | #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate | |
4236 | ||
4237 | #undef TARGET_FUNCTION_OK_FOR_SIBCALL | |
4238 | #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall | |
4239 | ||
4240 | #undef TARGET_REGISTER_MOVE_COST | |
4241 | #define TARGET_REGISTER_MOVE_COST riscv_register_move_cost | |
4242 | #undef TARGET_MEMORY_MOVE_COST | |
4243 | #define TARGET_MEMORY_MOVE_COST riscv_memory_move_cost | |
4244 | #undef TARGET_RTX_COSTS | |
4245 | #define TARGET_RTX_COSTS riscv_rtx_costs | |
4246 | #undef TARGET_ADDRESS_COST | |
4247 | #define TARGET_ADDRESS_COST riscv_address_cost | |
4248 | ||
09cae750 PD |
4249 | #undef TARGET_ASM_FILE_START |
4250 | #define TARGET_ASM_FILE_START riscv_file_start | |
4251 | #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE | |
4252 | #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true | |
4253 | ||
4254 | #undef TARGET_EXPAND_BUILTIN_VA_START | |
4255 | #define TARGET_EXPAND_BUILTIN_VA_START riscv_va_start | |
4256 | ||
4257 | #undef TARGET_PROMOTE_FUNCTION_MODE | |
4258 | #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote | |
4259 | ||
4260 | #undef TARGET_RETURN_IN_MEMORY | |
4261 | #define TARGET_RETURN_IN_MEMORY riscv_return_in_memory | |
4262 | ||
4263 | #undef TARGET_ASM_OUTPUT_MI_THUNK | |
4264 | #define TARGET_ASM_OUTPUT_MI_THUNK riscv_output_mi_thunk | |
4265 | #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK | |
4266 | #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true | |
4267 | ||
4268 | #undef TARGET_PRINT_OPERAND | |
4269 | #define TARGET_PRINT_OPERAND riscv_print_operand | |
4270 | #undef TARGET_PRINT_OPERAND_ADDRESS | |
4271 | #define TARGET_PRINT_OPERAND_ADDRESS riscv_print_operand_address | |
4272 | ||
4273 | #undef TARGET_SETUP_INCOMING_VARARGS | |
4274 | #define TARGET_SETUP_INCOMING_VARARGS riscv_setup_incoming_varargs | |
4275 | #undef TARGET_STRICT_ARGUMENT_NAMING | |
4276 | #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true | |
4277 | #undef TARGET_MUST_PASS_IN_STACK | |
4278 | #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size | |
4279 | #undef TARGET_PASS_BY_REFERENCE | |
4280 | #define TARGET_PASS_BY_REFERENCE riscv_pass_by_reference | |
4281 | #undef TARGET_ARG_PARTIAL_BYTES | |
4282 | #define TARGET_ARG_PARTIAL_BYTES riscv_arg_partial_bytes | |
4283 | #undef TARGET_FUNCTION_ARG | |
4284 | #define TARGET_FUNCTION_ARG riscv_function_arg | |
4285 | #undef TARGET_FUNCTION_ARG_ADVANCE | |
4286 | #define TARGET_FUNCTION_ARG_ADVANCE riscv_function_arg_advance | |
4287 | #undef TARGET_FUNCTION_ARG_BOUNDARY | |
4288 | #define TARGET_FUNCTION_ARG_BOUNDARY riscv_function_arg_boundary | |
4289 | ||
4290 | /* The generic ELF target does not always have TLS support. */ | |
4291 | #ifdef HAVE_AS_TLS | |
4292 | #undef TARGET_HAVE_TLS | |
4293 | #define TARGET_HAVE_TLS true | |
4294 | #endif | |
4295 | ||
4296 | #undef TARGET_CANNOT_FORCE_CONST_MEM | |
4297 | #define TARGET_CANNOT_FORCE_CONST_MEM riscv_cannot_force_const_mem | |
4298 | ||
4299 | #undef TARGET_LEGITIMATE_CONSTANT_P | |
4300 | #define TARGET_LEGITIMATE_CONSTANT_P riscv_legitimate_constant_p | |
4301 | ||
4302 | #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P | |
4303 | #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true | |
4304 | ||
4305 | #undef TARGET_LEGITIMATE_ADDRESS_P | |
4306 | #define TARGET_LEGITIMATE_ADDRESS_P riscv_legitimate_address_p | |
4307 | ||
4308 | #undef TARGET_CAN_ELIMINATE | |
4309 | #define TARGET_CAN_ELIMINATE riscv_can_eliminate | |
4310 | ||
4311 | #undef TARGET_CONDITIONAL_REGISTER_USAGE | |
4312 | #define TARGET_CONDITIONAL_REGISTER_USAGE riscv_conditional_register_usage | |
4313 | ||
4314 | #undef TARGET_CLASS_MAX_NREGS | |
4315 | #define TARGET_CLASS_MAX_NREGS riscv_class_max_nregs | |
4316 | ||
4317 | #undef TARGET_TRAMPOLINE_INIT | |
4318 | #define TARGET_TRAMPOLINE_INIT riscv_trampoline_init | |
4319 | ||
4320 | #undef TARGET_IN_SMALL_DATA_P | |
4321 | #define TARGET_IN_SMALL_DATA_P riscv_in_small_data_p | |
4322 | ||
4323 | #undef TARGET_ASM_SELECT_RTX_SECTION | |
4324 | #define TARGET_ASM_SELECT_RTX_SECTION riscv_elf_select_rtx_section | |
4325 | ||
4326 | #undef TARGET_MIN_ANCHOR_OFFSET | |
4327 | #define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2) | |
4328 | ||
4329 | #undef TARGET_MAX_ANCHOR_OFFSET | |
4330 | #define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1) | |
4331 | ||
4332 | #undef TARGET_REGISTER_PRIORITY | |
4333 | #define TARGET_REGISTER_PRIORITY riscv_register_priority | |
4334 | ||
4335 | #undef TARGET_CANNOT_COPY_INSN_P | |
4336 | #define TARGET_CANNOT_COPY_INSN_P riscv_cannot_copy_insn_p | |
4337 | ||
4338 | #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV | |
4339 | #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV riscv_atomic_assign_expand_fenv | |
4340 | ||
4341 | #undef TARGET_INIT_BUILTINS | |
4342 | #define TARGET_INIT_BUILTINS riscv_init_builtins | |
4343 | ||
4344 | #undef TARGET_BUILTIN_DECL | |
4345 | #define TARGET_BUILTIN_DECL riscv_builtin_decl | |
4346 | ||
4347 | #undef TARGET_EXPAND_BUILTIN | |
4348 | #define TARGET_EXPAND_BUILTIN riscv_expand_builtin | |
4349 | ||
c43f4279 RS |
4350 | #undef TARGET_HARD_REGNO_NREGS |
4351 | #define TARGET_HARD_REGNO_NREGS riscv_hard_regno_nregs | |
f939c3e6 RS |
4352 | #undef TARGET_HARD_REGNO_MODE_OK |
4353 | #define TARGET_HARD_REGNO_MODE_OK riscv_hard_regno_mode_ok | |
4354 | ||
99e1629f RS |
4355 | #undef TARGET_MODES_TIEABLE_P |
4356 | #define TARGET_MODES_TIEABLE_P riscv_modes_tieable_p | |
4357 | ||
e0bd6c9f RS |
4358 | #undef TARGET_SLOW_UNALIGNED_ACCESS |
4359 | #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access | |
4360 | ||
f15643d4 RS |
4361 | #undef TARGET_SECONDARY_MEMORY_NEEDED |
4362 | #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed | |
4363 | ||
0d803030 RS |
4364 | #undef TARGET_CAN_CHANGE_MODE_CLASS |
4365 | #define TARGET_CAN_CHANGE_MODE_CLASS riscv_can_change_mode_class | |
4366 | ||
58e17cf8 RS |
4367 | #undef TARGET_CONSTANT_ALIGNMENT |
4368 | #define TARGET_CONSTANT_ALIGNMENT riscv_constant_alignment | |
4369 | ||
09cae750 PD |
4370 | struct gcc_target targetm = TARGET_INITIALIZER; |
4371 | ||
4372 | #include "gt-riscv.h" |