]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
f4ac6e4673cb2fbe86505d15f76e36eab5aa0d53
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2013 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "insn-codes.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "function.h"
38 #include "except.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "recog.h"
42 #include "diagnostic-core.h"
43 #include "ggc.h"
44 #include "tm_p.h"
45 #include "debug.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "gimple.h"
50 #include "langhooks.h"
51 #include "reload.h"
52 #include "params.h"
53 #include "df.h"
54 #include "opts.h"
55
56 /* Processor costs */
57
58 struct processor_costs {
59 /* Integer load */
60 const int int_load;
61
62 /* Integer signed load */
63 const int int_sload;
64
65 /* Integer zeroed load */
66 const int int_zload;
67
68 /* Float load */
69 const int float_load;
70
71 /* fmov, fneg, fabs */
72 const int float_move;
73
74 /* fadd, fsub */
75 const int float_plusminus;
76
77 /* fcmp */
78 const int float_cmp;
79
80 /* fmov, fmovr */
81 const int float_cmove;
82
83 /* fmul */
84 const int float_mul;
85
86 /* fdivs */
87 const int float_div_sf;
88
89 /* fdivd */
90 const int float_div_df;
91
92 /* fsqrts */
93 const int float_sqrt_sf;
94
95 /* fsqrtd */
96 const int float_sqrt_df;
97
98 /* umul/smul */
99 const int int_mul;
100
101 /* mulX */
102 const int int_mulX;
103
104 /* integer multiply cost for each bit set past the most
105 significant 3, so the formula for multiply cost becomes:
106
107 if (rs1 < 0)
108 highest_bit = highest_clear_bit(rs1);
109 else
110 highest_bit = highest_set_bit(rs1);
111 if (highest_bit < 3)
112 highest_bit = 3;
113 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
114
115 A value of zero indicates that the multiply costs is fixed,
116 and not variable. */
117 const int int_mul_bit_factor;
118
119 /* udiv/sdiv */
120 const int int_div;
121
122 /* divX */
123 const int int_divX;
124
125 /* movcc, movr */
126 const int int_cmove;
127
128 /* penalty for shifts, due to scheduling rules etc. */
129 const int shift_penalty;
130 };
131
132 static const
133 struct processor_costs cypress_costs = {
134 COSTS_N_INSNS (2), /* int load */
135 COSTS_N_INSNS (2), /* int signed load */
136 COSTS_N_INSNS (2), /* int zeroed load */
137 COSTS_N_INSNS (2), /* float load */
138 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
139 COSTS_N_INSNS (5), /* fadd, fsub */
140 COSTS_N_INSNS (1), /* fcmp */
141 COSTS_N_INSNS (1), /* fmov, fmovr */
142 COSTS_N_INSNS (7), /* fmul */
143 COSTS_N_INSNS (37), /* fdivs */
144 COSTS_N_INSNS (37), /* fdivd */
145 COSTS_N_INSNS (63), /* fsqrts */
146 COSTS_N_INSNS (63), /* fsqrtd */
147 COSTS_N_INSNS (1), /* imul */
148 COSTS_N_INSNS (1), /* imulX */
149 0, /* imul bit factor */
150 COSTS_N_INSNS (1), /* idiv */
151 COSTS_N_INSNS (1), /* idivX */
152 COSTS_N_INSNS (1), /* movcc/movr */
153 0, /* shift penalty */
154 };
155
156 static const
157 struct processor_costs supersparc_costs = {
158 COSTS_N_INSNS (1), /* int load */
159 COSTS_N_INSNS (1), /* int signed load */
160 COSTS_N_INSNS (1), /* int zeroed load */
161 COSTS_N_INSNS (0), /* float load */
162 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
163 COSTS_N_INSNS (3), /* fadd, fsub */
164 COSTS_N_INSNS (3), /* fcmp */
165 COSTS_N_INSNS (1), /* fmov, fmovr */
166 COSTS_N_INSNS (3), /* fmul */
167 COSTS_N_INSNS (6), /* fdivs */
168 COSTS_N_INSNS (9), /* fdivd */
169 COSTS_N_INSNS (12), /* fsqrts */
170 COSTS_N_INSNS (12), /* fsqrtd */
171 COSTS_N_INSNS (4), /* imul */
172 COSTS_N_INSNS (4), /* imulX */
173 0, /* imul bit factor */
174 COSTS_N_INSNS (4), /* idiv */
175 COSTS_N_INSNS (4), /* idivX */
176 COSTS_N_INSNS (1), /* movcc/movr */
177 1, /* shift penalty */
178 };
179
180 static const
181 struct processor_costs hypersparc_costs = {
182 COSTS_N_INSNS (1), /* int load */
183 COSTS_N_INSNS (1), /* int signed load */
184 COSTS_N_INSNS (1), /* int zeroed load */
185 COSTS_N_INSNS (1), /* float load */
186 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
187 COSTS_N_INSNS (1), /* fadd, fsub */
188 COSTS_N_INSNS (1), /* fcmp */
189 COSTS_N_INSNS (1), /* fmov, fmovr */
190 COSTS_N_INSNS (1), /* fmul */
191 COSTS_N_INSNS (8), /* fdivs */
192 COSTS_N_INSNS (12), /* fdivd */
193 COSTS_N_INSNS (17), /* fsqrts */
194 COSTS_N_INSNS (17), /* fsqrtd */
195 COSTS_N_INSNS (17), /* imul */
196 COSTS_N_INSNS (17), /* imulX */
197 0, /* imul bit factor */
198 COSTS_N_INSNS (17), /* idiv */
199 COSTS_N_INSNS (17), /* idivX */
200 COSTS_N_INSNS (1), /* movcc/movr */
201 0, /* shift penalty */
202 };
203
204 static const
205 struct processor_costs leon_costs = {
206 COSTS_N_INSNS (1), /* int load */
207 COSTS_N_INSNS (1), /* int signed load */
208 COSTS_N_INSNS (1), /* int zeroed load */
209 COSTS_N_INSNS (1), /* float load */
210 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
211 COSTS_N_INSNS (1), /* fadd, fsub */
212 COSTS_N_INSNS (1), /* fcmp */
213 COSTS_N_INSNS (1), /* fmov, fmovr */
214 COSTS_N_INSNS (1), /* fmul */
215 COSTS_N_INSNS (15), /* fdivs */
216 COSTS_N_INSNS (15), /* fdivd */
217 COSTS_N_INSNS (23), /* fsqrts */
218 COSTS_N_INSNS (23), /* fsqrtd */
219 COSTS_N_INSNS (5), /* imul */
220 COSTS_N_INSNS (5), /* imulX */
221 0, /* imul bit factor */
222 COSTS_N_INSNS (5), /* idiv */
223 COSTS_N_INSNS (5), /* idivX */
224 COSTS_N_INSNS (1), /* movcc/movr */
225 0, /* shift penalty */
226 };
227
228 static const
229 struct processor_costs sparclet_costs = {
230 COSTS_N_INSNS (3), /* int load */
231 COSTS_N_INSNS (3), /* int signed load */
232 COSTS_N_INSNS (1), /* int zeroed load */
233 COSTS_N_INSNS (1), /* float load */
234 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
235 COSTS_N_INSNS (1), /* fadd, fsub */
236 COSTS_N_INSNS (1), /* fcmp */
237 COSTS_N_INSNS (1), /* fmov, fmovr */
238 COSTS_N_INSNS (1), /* fmul */
239 COSTS_N_INSNS (1), /* fdivs */
240 COSTS_N_INSNS (1), /* fdivd */
241 COSTS_N_INSNS (1), /* fsqrts */
242 COSTS_N_INSNS (1), /* fsqrtd */
243 COSTS_N_INSNS (5), /* imul */
244 COSTS_N_INSNS (5), /* imulX */
245 0, /* imul bit factor */
246 COSTS_N_INSNS (5), /* idiv */
247 COSTS_N_INSNS (5), /* idivX */
248 COSTS_N_INSNS (1), /* movcc/movr */
249 0, /* shift penalty */
250 };
251
252 static const
253 struct processor_costs ultrasparc_costs = {
254 COSTS_N_INSNS (2), /* int load */
255 COSTS_N_INSNS (3), /* int signed load */
256 COSTS_N_INSNS (2), /* int zeroed load */
257 COSTS_N_INSNS (2), /* float load */
258 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
259 COSTS_N_INSNS (4), /* fadd, fsub */
260 COSTS_N_INSNS (1), /* fcmp */
261 COSTS_N_INSNS (2), /* fmov, fmovr */
262 COSTS_N_INSNS (4), /* fmul */
263 COSTS_N_INSNS (13), /* fdivs */
264 COSTS_N_INSNS (23), /* fdivd */
265 COSTS_N_INSNS (13), /* fsqrts */
266 COSTS_N_INSNS (23), /* fsqrtd */
267 COSTS_N_INSNS (4), /* imul */
268 COSTS_N_INSNS (4), /* imulX */
269 2, /* imul bit factor */
270 COSTS_N_INSNS (37), /* idiv */
271 COSTS_N_INSNS (68), /* idivX */
272 COSTS_N_INSNS (2), /* movcc/movr */
273 2, /* shift penalty */
274 };
275
276 static const
277 struct processor_costs ultrasparc3_costs = {
278 COSTS_N_INSNS (2), /* int load */
279 COSTS_N_INSNS (3), /* int signed load */
280 COSTS_N_INSNS (3), /* int zeroed load */
281 COSTS_N_INSNS (2), /* float load */
282 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
283 COSTS_N_INSNS (4), /* fadd, fsub */
284 COSTS_N_INSNS (5), /* fcmp */
285 COSTS_N_INSNS (3), /* fmov, fmovr */
286 COSTS_N_INSNS (4), /* fmul */
287 COSTS_N_INSNS (17), /* fdivs */
288 COSTS_N_INSNS (20), /* fdivd */
289 COSTS_N_INSNS (20), /* fsqrts */
290 COSTS_N_INSNS (29), /* fsqrtd */
291 COSTS_N_INSNS (6), /* imul */
292 COSTS_N_INSNS (6), /* imulX */
293 0, /* imul bit factor */
294 COSTS_N_INSNS (40), /* idiv */
295 COSTS_N_INSNS (71), /* idivX */
296 COSTS_N_INSNS (2), /* movcc/movr */
297 0, /* shift penalty */
298 };
299
300 static const
301 struct processor_costs niagara_costs = {
302 COSTS_N_INSNS (3), /* int load */
303 COSTS_N_INSNS (3), /* int signed load */
304 COSTS_N_INSNS (3), /* int zeroed load */
305 COSTS_N_INSNS (9), /* float load */
306 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
307 COSTS_N_INSNS (8), /* fadd, fsub */
308 COSTS_N_INSNS (26), /* fcmp */
309 COSTS_N_INSNS (8), /* fmov, fmovr */
310 COSTS_N_INSNS (29), /* fmul */
311 COSTS_N_INSNS (54), /* fdivs */
312 COSTS_N_INSNS (83), /* fdivd */
313 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
314 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
315 COSTS_N_INSNS (11), /* imul */
316 COSTS_N_INSNS (11), /* imulX */
317 0, /* imul bit factor */
318 COSTS_N_INSNS (72), /* idiv */
319 COSTS_N_INSNS (72), /* idivX */
320 COSTS_N_INSNS (1), /* movcc/movr */
321 0, /* shift penalty */
322 };
323
324 static const
325 struct processor_costs niagara2_costs = {
326 COSTS_N_INSNS (3), /* int load */
327 COSTS_N_INSNS (3), /* int signed load */
328 COSTS_N_INSNS (3), /* int zeroed load */
329 COSTS_N_INSNS (3), /* float load */
330 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
331 COSTS_N_INSNS (6), /* fadd, fsub */
332 COSTS_N_INSNS (6), /* fcmp */
333 COSTS_N_INSNS (6), /* fmov, fmovr */
334 COSTS_N_INSNS (6), /* fmul */
335 COSTS_N_INSNS (19), /* fdivs */
336 COSTS_N_INSNS (33), /* fdivd */
337 COSTS_N_INSNS (19), /* fsqrts */
338 COSTS_N_INSNS (33), /* fsqrtd */
339 COSTS_N_INSNS (5), /* imul */
340 COSTS_N_INSNS (5), /* imulX */
341 0, /* imul bit factor */
342 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
343 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
344 COSTS_N_INSNS (1), /* movcc/movr */
345 0, /* shift penalty */
346 };
347
348 static const
349 struct processor_costs niagara3_costs = {
350 COSTS_N_INSNS (3), /* int load */
351 COSTS_N_INSNS (3), /* int signed load */
352 COSTS_N_INSNS (3), /* int zeroed load */
353 COSTS_N_INSNS (3), /* float load */
354 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
355 COSTS_N_INSNS (9), /* fadd, fsub */
356 COSTS_N_INSNS (9), /* fcmp */
357 COSTS_N_INSNS (9), /* fmov, fmovr */
358 COSTS_N_INSNS (9), /* fmul */
359 COSTS_N_INSNS (23), /* fdivs */
360 COSTS_N_INSNS (37), /* fdivd */
361 COSTS_N_INSNS (23), /* fsqrts */
362 COSTS_N_INSNS (37), /* fsqrtd */
363 COSTS_N_INSNS (9), /* imul */
364 COSTS_N_INSNS (9), /* imulX */
365 0, /* imul bit factor */
366 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
367 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
368 COSTS_N_INSNS (1), /* movcc/movr */
369 0, /* shift penalty */
370 };
371
372 static const
373 struct processor_costs niagara4_costs = {
374 COSTS_N_INSNS (5), /* int load */
375 COSTS_N_INSNS (5), /* int signed load */
376 COSTS_N_INSNS (5), /* int zeroed load */
377 COSTS_N_INSNS (5), /* float load */
378 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
379 COSTS_N_INSNS (11), /* fadd, fsub */
380 COSTS_N_INSNS (11), /* fcmp */
381 COSTS_N_INSNS (11), /* fmov, fmovr */
382 COSTS_N_INSNS (11), /* fmul */
383 COSTS_N_INSNS (24), /* fdivs */
384 COSTS_N_INSNS (37), /* fdivd */
385 COSTS_N_INSNS (24), /* fsqrts */
386 COSTS_N_INSNS (37), /* fsqrtd */
387 COSTS_N_INSNS (12), /* imul */
388 COSTS_N_INSNS (12), /* imulX */
389 0, /* imul bit factor */
390 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
391 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
392 COSTS_N_INSNS (1), /* movcc/movr */
393 0, /* shift penalty */
394 };
395
396 static const struct processor_costs *sparc_costs = &cypress_costs;
397
398 #ifdef HAVE_AS_RELAX_OPTION
399 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
400 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
401 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
402 somebody does not branch between the sethi and jmp. */
403 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
404 #else
405 #define LEAF_SIBCALL_SLOT_RESERVED_P \
406 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
407 #endif
408
409 /* Vector to say how input registers are mapped to output registers.
410 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
411 eliminate it. You must use -fomit-frame-pointer to get that. */
412 char leaf_reg_remap[] =
413 { 0, 1, 2, 3, 4, 5, 6, 7,
414 -1, -1, -1, -1, -1, -1, 14, -1,
415 -1, -1, -1, -1, -1, -1, -1, -1,
416 8, 9, 10, 11, 12, 13, -1, 15,
417
418 32, 33, 34, 35, 36, 37, 38, 39,
419 40, 41, 42, 43, 44, 45, 46, 47,
420 48, 49, 50, 51, 52, 53, 54, 55,
421 56, 57, 58, 59, 60, 61, 62, 63,
422 64, 65, 66, 67, 68, 69, 70, 71,
423 72, 73, 74, 75, 76, 77, 78, 79,
424 80, 81, 82, 83, 84, 85, 86, 87,
425 88, 89, 90, 91, 92, 93, 94, 95,
426 96, 97, 98, 99, 100, 101, 102};
427
428 /* Vector, indexed by hard register number, which contains 1
429 for a register that is allowable in a candidate for leaf
430 function treatment. */
431 char sparc_leaf_regs[] =
432 { 1, 1, 1, 1, 1, 1, 1, 1,
433 0, 0, 0, 0, 0, 0, 1, 0,
434 0, 0, 0, 0, 0, 0, 0, 0,
435 1, 1, 1, 1, 1, 1, 0, 1,
436 1, 1, 1, 1, 1, 1, 1, 1,
437 1, 1, 1, 1, 1, 1, 1, 1,
438 1, 1, 1, 1, 1, 1, 1, 1,
439 1, 1, 1, 1, 1, 1, 1, 1,
440 1, 1, 1, 1, 1, 1, 1, 1,
441 1, 1, 1, 1, 1, 1, 1, 1,
442 1, 1, 1, 1, 1, 1, 1, 1,
443 1, 1, 1, 1, 1, 1, 1, 1,
444 1, 1, 1, 1, 1, 1, 1};
445
446 struct GTY(()) machine_function
447 {
448 /* Size of the frame of the function. */
449 HOST_WIDE_INT frame_size;
450
451 /* Size of the frame of the function minus the register window save area
452 and the outgoing argument area. */
453 HOST_WIDE_INT apparent_frame_size;
454
455 /* Register we pretend the frame pointer is allocated to. Normally, this
456 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
457 record "offset" separately as it may be too big for (reg + disp). */
458 rtx frame_base_reg;
459 HOST_WIDE_INT frame_base_offset;
460
461 /* Some local-dynamic TLS symbol name. */
462 const char *some_ld_name;
463
464 /* Number of global or FP registers to be saved (as 4-byte quantities). */
465 int n_global_fp_regs;
466
467 /* True if the current function is leaf and uses only leaf regs,
468 so that the SPARC leaf function optimization can be applied.
469 Private version of crtl->uses_only_leaf_regs, see
470 sparc_expand_prologue for the rationale. */
471 int leaf_function_p;
472
473 /* True if the prologue saves local or in registers. */
474 bool save_local_in_regs_p;
475
476 /* True if the data calculated by sparc_expand_prologue are valid. */
477 bool prologue_data_valid_p;
478 };
479
480 #define sparc_frame_size cfun->machine->frame_size
481 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
482 #define sparc_frame_base_reg cfun->machine->frame_base_reg
483 #define sparc_frame_base_offset cfun->machine->frame_base_offset
484 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
485 #define sparc_leaf_function_p cfun->machine->leaf_function_p
486 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
487 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
488
489 /* 1 if the next opcode is to be specially indented. */
490 int sparc_indent_opcode = 0;
491
492 static void sparc_option_override (void);
493 static void sparc_init_modes (void);
494 static void scan_record_type (const_tree, int *, int *, int *);
495 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
496 const_tree, bool, bool, int *, int *);
497
498 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
499 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
500
501 static void sparc_emit_set_const32 (rtx, rtx);
502 static void sparc_emit_set_const64 (rtx, rtx);
503 static void sparc_output_addr_vec (rtx);
504 static void sparc_output_addr_diff_vec (rtx);
505 static void sparc_output_deferred_case_vectors (void);
506 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
507 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
508 static rtx sparc_builtin_saveregs (void);
509 static int epilogue_renumber (rtx *, int);
510 static bool sparc_assemble_integer (rtx, unsigned int, int);
511 static int set_extends (rtx);
512 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
513 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
514 #ifdef TARGET_SOLARIS
515 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
516 tree) ATTRIBUTE_UNUSED;
517 #endif
518 static int sparc_adjust_cost (rtx, rtx, rtx, int);
519 static int sparc_issue_rate (void);
520 static void sparc_sched_init (FILE *, int, int);
521 static int sparc_use_sched_lookahead (void);
522
523 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
524 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
525 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
526 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
527 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
528
529 static bool sparc_function_ok_for_sibcall (tree, tree);
530 static void sparc_init_libfuncs (void);
531 static void sparc_init_builtins (void);
532 static void sparc_vis_init_builtins (void);
533 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
534 static tree sparc_fold_builtin (tree, int, tree *, bool);
535 static int sparc_vis_mul8x16 (int, int);
536 static void sparc_handle_vis_mul8x16 (tree *, int, tree, tree, tree);
537 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
538 HOST_WIDE_INT, tree);
539 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
540 HOST_WIDE_INT, const_tree);
541 static void sparc_reorg (void);
542 static struct machine_function * sparc_init_machine_status (void);
543 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
544 static rtx sparc_tls_get_addr (void);
545 static rtx sparc_tls_got (void);
546 static const char *get_some_local_dynamic_name (void);
547 static int get_some_local_dynamic_name_1 (rtx *, void *);
548 static int sparc_register_move_cost (enum machine_mode,
549 reg_class_t, reg_class_t);
550 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
551 static rtx sparc_function_value (const_tree, const_tree, bool);
552 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
553 static bool sparc_function_value_regno_p (const unsigned int);
554 static rtx sparc_struct_value_rtx (tree, int);
555 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
556 int *, const_tree, int);
557 static bool sparc_return_in_memory (const_tree, const_tree);
558 static bool sparc_strict_argument_naming (cumulative_args_t);
559 static void sparc_va_start (tree, rtx);
560 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
561 static bool sparc_vector_mode_supported_p (enum machine_mode);
562 static bool sparc_tls_referenced_p (rtx);
563 static rtx sparc_legitimize_tls_address (rtx);
564 static rtx sparc_legitimize_pic_address (rtx, rtx);
565 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
566 static rtx sparc_delegitimize_address (rtx);
567 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
568 static bool sparc_pass_by_reference (cumulative_args_t,
569 enum machine_mode, const_tree, bool);
570 static void sparc_function_arg_advance (cumulative_args_t,
571 enum machine_mode, const_tree, bool);
572 static rtx sparc_function_arg_1 (cumulative_args_t,
573 enum machine_mode, const_tree, bool, bool);
574 static rtx sparc_function_arg (cumulative_args_t,
575 enum machine_mode, const_tree, bool);
576 static rtx sparc_function_incoming_arg (cumulative_args_t,
577 enum machine_mode, const_tree, bool);
578 static unsigned int sparc_function_arg_boundary (enum machine_mode,
579 const_tree);
580 static int sparc_arg_partial_bytes (cumulative_args_t,
581 enum machine_mode, tree, bool);
582 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
583 static void sparc_file_end (void);
584 static bool sparc_frame_pointer_required (void);
585 static bool sparc_can_eliminate (const int, const int);
586 static rtx sparc_builtin_setjmp_frame_value (void);
587 static void sparc_conditional_register_usage (void);
588 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
589 static const char *sparc_mangle_type (const_tree);
590 #endif
591 static void sparc_trampoline_init (rtx, tree, rtx);
592 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
593 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
594 static bool sparc_print_operand_punct_valid_p (unsigned char);
595 static void sparc_print_operand (FILE *, rtx, int);
596 static void sparc_print_operand_address (FILE *, rtx);
597 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
598 enum machine_mode,
599 secondary_reload_info *);
600 \f
601 #ifdef SUBTARGET_ATTRIBUTE_TABLE
602 /* Table of valid machine attributes. */
603 static const struct attribute_spec sparc_attribute_table[] =
604 {
605 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
606 do_diagnostic } */
607 SUBTARGET_ATTRIBUTE_TABLE,
608 { NULL, 0, 0, false, false, false, NULL, false }
609 };
610 #endif
611 \f
612 /* Option handling. */
613
614 /* Parsed value. */
615 enum cmodel sparc_cmodel;
616
617 char sparc_hard_reg_printed[8];
618
619 /* Initialize the GCC target structure. */
620
621 /* The default is to use .half rather than .short for aligned HI objects. */
622 #undef TARGET_ASM_ALIGNED_HI_OP
623 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
624
625 #undef TARGET_ASM_UNALIGNED_HI_OP
626 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
627 #undef TARGET_ASM_UNALIGNED_SI_OP
628 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
629 #undef TARGET_ASM_UNALIGNED_DI_OP
630 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
631
632 /* The target hook has to handle DI-mode values. */
633 #undef TARGET_ASM_INTEGER
634 #define TARGET_ASM_INTEGER sparc_assemble_integer
635
636 #undef TARGET_ASM_FUNCTION_PROLOGUE
637 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
638 #undef TARGET_ASM_FUNCTION_EPILOGUE
639 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
640
641 #undef TARGET_SCHED_ADJUST_COST
642 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
643 #undef TARGET_SCHED_ISSUE_RATE
644 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
645 #undef TARGET_SCHED_INIT
646 #define TARGET_SCHED_INIT sparc_sched_init
647 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
648 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
649
650 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
651 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
652
653 #undef TARGET_INIT_LIBFUNCS
654 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
655 #undef TARGET_INIT_BUILTINS
656 #define TARGET_INIT_BUILTINS sparc_init_builtins
657
658 #undef TARGET_LEGITIMIZE_ADDRESS
659 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
660 #undef TARGET_DELEGITIMIZE_ADDRESS
661 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
662 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
663 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
664
665 #undef TARGET_EXPAND_BUILTIN
666 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
667 #undef TARGET_FOLD_BUILTIN
668 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
669
670 #if TARGET_TLS
671 #undef TARGET_HAVE_TLS
672 #define TARGET_HAVE_TLS true
673 #endif
674
675 #undef TARGET_CANNOT_FORCE_CONST_MEM
676 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
677
678 #undef TARGET_ASM_OUTPUT_MI_THUNK
679 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
680 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
681 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
682
683 #undef TARGET_MACHINE_DEPENDENT_REORG
684 #define TARGET_MACHINE_DEPENDENT_REORG sparc_reorg
685
686 #undef TARGET_RTX_COSTS
687 #define TARGET_RTX_COSTS sparc_rtx_costs
688 #undef TARGET_ADDRESS_COST
689 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
690 #undef TARGET_REGISTER_MOVE_COST
691 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
692
693 #undef TARGET_PROMOTE_FUNCTION_MODE
694 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
695
696 #undef TARGET_FUNCTION_VALUE
697 #define TARGET_FUNCTION_VALUE sparc_function_value
698 #undef TARGET_LIBCALL_VALUE
699 #define TARGET_LIBCALL_VALUE sparc_libcall_value
700 #undef TARGET_FUNCTION_VALUE_REGNO_P
701 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
702
703 #undef TARGET_STRUCT_VALUE_RTX
704 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
705 #undef TARGET_RETURN_IN_MEMORY
706 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
707 #undef TARGET_MUST_PASS_IN_STACK
708 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
709 #undef TARGET_PASS_BY_REFERENCE
710 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
711 #undef TARGET_ARG_PARTIAL_BYTES
712 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
713 #undef TARGET_FUNCTION_ARG_ADVANCE
714 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
715 #undef TARGET_FUNCTION_ARG
716 #define TARGET_FUNCTION_ARG sparc_function_arg
717 #undef TARGET_FUNCTION_INCOMING_ARG
718 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
719 #undef TARGET_FUNCTION_ARG_BOUNDARY
720 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
721
722 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
723 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
724 #undef TARGET_STRICT_ARGUMENT_NAMING
725 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
726
727 #undef TARGET_EXPAND_BUILTIN_VA_START
728 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
729 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
730 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
731
732 #undef TARGET_VECTOR_MODE_SUPPORTED_P
733 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
734
735 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
736 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
737
738 #ifdef SUBTARGET_INSERT_ATTRIBUTES
739 #undef TARGET_INSERT_ATTRIBUTES
740 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
741 #endif
742
743 #ifdef SUBTARGET_ATTRIBUTE_TABLE
744 #undef TARGET_ATTRIBUTE_TABLE
745 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
746 #endif
747
748 #undef TARGET_RELAXED_ORDERING
749 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
750
751 #undef TARGET_OPTION_OVERRIDE
752 #define TARGET_OPTION_OVERRIDE sparc_option_override
753
754 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
755 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
756 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
757 #endif
758
759 #undef TARGET_ASM_FILE_END
760 #define TARGET_ASM_FILE_END sparc_file_end
761
762 #undef TARGET_FRAME_POINTER_REQUIRED
763 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
764
765 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
766 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
767
768 #undef TARGET_CAN_ELIMINATE
769 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
770
771 #undef TARGET_PREFERRED_RELOAD_CLASS
772 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
773
774 #undef TARGET_SECONDARY_RELOAD
775 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
776
777 #undef TARGET_CONDITIONAL_REGISTER_USAGE
778 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
779
780 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
781 #undef TARGET_MANGLE_TYPE
782 #define TARGET_MANGLE_TYPE sparc_mangle_type
783 #endif
784
785 #undef TARGET_LEGITIMATE_ADDRESS_P
786 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
787
788 #undef TARGET_LEGITIMATE_CONSTANT_P
789 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
790
791 #undef TARGET_TRAMPOLINE_INIT
792 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
793
794 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
795 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
796 #undef TARGET_PRINT_OPERAND
797 #define TARGET_PRINT_OPERAND sparc_print_operand
798 #undef TARGET_PRINT_OPERAND_ADDRESS
799 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
800
801 /* The value stored by LDSTUB. */
802 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
803 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
804
805 struct gcc_target targetm = TARGET_INITIALIZER;
806
807 static void
808 dump_target_flag_bits (const int flags)
809 {
810 if (flags & MASK_64BIT)
811 fprintf (stderr, "64BIT ");
812 if (flags & MASK_APP_REGS)
813 fprintf (stderr, "APP_REGS ");
814 if (flags & MASK_FASTER_STRUCTS)
815 fprintf (stderr, "FASTER_STRUCTS ");
816 if (flags & MASK_FLAT)
817 fprintf (stderr, "FLAT ");
818 if (flags & MASK_FMAF)
819 fprintf (stderr, "FMAF ");
820 if (flags & MASK_FPU)
821 fprintf (stderr, "FPU ");
822 if (flags & MASK_HARD_QUAD)
823 fprintf (stderr, "HARD_QUAD ");
824 if (flags & MASK_POPC)
825 fprintf (stderr, "POPC ");
826 if (flags & MASK_PTR64)
827 fprintf (stderr, "PTR64 ");
828 if (flags & MASK_STACK_BIAS)
829 fprintf (stderr, "STACK_BIAS ");
830 if (flags & MASK_UNALIGNED_DOUBLES)
831 fprintf (stderr, "UNALIGNED_DOUBLES ");
832 if (flags & MASK_V8PLUS)
833 fprintf (stderr, "V8PLUS ");
834 if (flags & MASK_VIS)
835 fprintf (stderr, "VIS ");
836 if (flags & MASK_VIS2)
837 fprintf (stderr, "VIS2 ");
838 if (flags & MASK_VIS3)
839 fprintf (stderr, "VIS3 ");
840 if (flags & MASK_CBCOND)
841 fprintf (stderr, "CBCOND ");
842 if (flags & MASK_DEPRECATED_V8_INSNS)
843 fprintf (stderr, "DEPRECATED_V8_INSNS ");
844 if (flags & MASK_SPARCLET)
845 fprintf (stderr, "SPARCLET ");
846 if (flags & MASK_SPARCLITE)
847 fprintf (stderr, "SPARCLITE ");
848 if (flags & MASK_V8)
849 fprintf (stderr, "V8 ");
850 if (flags & MASK_V9)
851 fprintf (stderr, "V9 ");
852 }
853
854 static void
855 dump_target_flags (const char *prefix, const int flags)
856 {
857 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
858 dump_target_flag_bits (flags);
859 fprintf(stderr, "]\n");
860 }
861
862 /* Validate and override various options, and do some machine dependent
863 initialization. */
864
865 static void
866 sparc_option_override (void)
867 {
868 static struct code_model {
869 const char *const name;
870 const enum cmodel value;
871 } const cmodels[] = {
872 { "32", CM_32 },
873 { "medlow", CM_MEDLOW },
874 { "medmid", CM_MEDMID },
875 { "medany", CM_MEDANY },
876 { "embmedany", CM_EMBMEDANY },
877 { NULL, (enum cmodel) 0 }
878 };
879 const struct code_model *cmodel;
880 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
881 static struct cpu_default {
882 const int cpu;
883 const enum processor_type processor;
884 } const cpu_default[] = {
885 /* There must be one entry here for each TARGET_CPU value. */
886 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
887 { TARGET_CPU_v8, PROCESSOR_V8 },
888 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
889 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
890 { TARGET_CPU_leon, PROCESSOR_LEON },
891 { TARGET_CPU_sparclite, PROCESSOR_F930 },
892 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
893 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
894 { TARGET_CPU_v9, PROCESSOR_V9 },
895 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
896 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
897 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
898 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
899 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
900 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
901 { -1, PROCESSOR_V7 }
902 };
903 const struct cpu_default *def;
904 /* Table of values for -m{cpu,tune}=. This must match the order of
905 the PROCESSOR_* enumeration. */
906 static struct cpu_table {
907 const char *const name;
908 const int disable;
909 const int enable;
910 } const cpu_table[] = {
911 { "v7", MASK_ISA, 0 },
912 { "cypress", MASK_ISA, 0 },
913 { "v8", MASK_ISA, MASK_V8 },
914 /* TI TMS390Z55 supersparc */
915 { "supersparc", MASK_ISA, MASK_V8 },
916 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
917 /* LEON */
918 { "leon", MASK_ISA, MASK_V8|MASK_FPU },
919 { "sparclite", MASK_ISA, MASK_SPARCLITE },
920 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
921 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
922 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
923 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
924 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
925 { "sparclet", MASK_ISA, MASK_SPARCLET },
926 /* TEMIC sparclet */
927 { "tsc701", MASK_ISA, MASK_SPARCLET },
928 { "v9", MASK_ISA, MASK_V9 },
929 /* UltraSPARC I, II, IIi */
930 { "ultrasparc", MASK_ISA,
931 /* Although insns using %y are deprecated, it is a clear win. */
932 MASK_V9|MASK_DEPRECATED_V8_INSNS },
933 /* UltraSPARC III */
934 /* ??? Check if %y issue still holds true. */
935 { "ultrasparc3", MASK_ISA,
936 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
937 /* UltraSPARC T1 */
938 { "niagara", MASK_ISA,
939 MASK_V9|MASK_DEPRECATED_V8_INSNS },
940 /* UltraSPARC T2 */
941 { "niagara2", MASK_ISA,
942 MASK_V9|MASK_POPC|MASK_VIS2 },
943 /* UltraSPARC T3 */
944 { "niagara3", MASK_ISA,
945 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
946 /* UltraSPARC T4 */
947 { "niagara4", MASK_ISA,
948 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
949 };
950 const struct cpu_table *cpu;
951 unsigned int i;
952 int fpu;
953
954 if (sparc_debug_string != NULL)
955 {
956 const char *q;
957 char *p;
958
959 p = ASTRDUP (sparc_debug_string);
960 while ((q = strtok (p, ",")) != NULL)
961 {
962 bool invert;
963 int mask;
964
965 p = NULL;
966 if (*q == '!')
967 {
968 invert = true;
969 q++;
970 }
971 else
972 invert = false;
973
974 if (! strcmp (q, "all"))
975 mask = MASK_DEBUG_ALL;
976 else if (! strcmp (q, "options"))
977 mask = MASK_DEBUG_OPTIONS;
978 else
979 error ("unknown -mdebug-%s switch", q);
980
981 if (invert)
982 sparc_debug &= ~mask;
983 else
984 sparc_debug |= mask;
985 }
986 }
987
988 if (TARGET_DEBUG_OPTIONS)
989 {
990 dump_target_flags("Initial target_flags", target_flags);
991 dump_target_flags("target_flags_explicit", target_flags_explicit);
992 }
993
994 #ifdef SUBTARGET_OVERRIDE_OPTIONS
995 SUBTARGET_OVERRIDE_OPTIONS;
996 #endif
997
998 #ifndef SPARC_BI_ARCH
999 /* Check for unsupported architecture size. */
1000 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1001 error ("%s is not supported by this configuration",
1002 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1003 #endif
1004
1005 /* We force all 64bit archs to use 128 bit long double */
1006 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1007 {
1008 error ("-mlong-double-64 not allowed with -m64");
1009 target_flags |= MASK_LONG_DOUBLE_128;
1010 }
1011
1012 /* Code model selection. */
1013 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1014
1015 #ifdef SPARC_BI_ARCH
1016 if (TARGET_ARCH32)
1017 sparc_cmodel = CM_32;
1018 #endif
1019
1020 if (sparc_cmodel_string != NULL)
1021 {
1022 if (TARGET_ARCH64)
1023 {
1024 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1025 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1026 break;
1027 if (cmodel->name == NULL)
1028 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1029 else
1030 sparc_cmodel = cmodel->value;
1031 }
1032 else
1033 error ("-mcmodel= is not supported on 32 bit systems");
1034 }
1035
1036 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1037 for (i = 8; i < 16; i++)
1038 if (!call_used_regs [i])
1039 {
1040 error ("-fcall-saved-REG is not supported for out registers");
1041 call_used_regs [i] = 1;
1042 }
1043
1044 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1045
1046 /* Set the default CPU. */
1047 if (!global_options_set.x_sparc_cpu_and_features)
1048 {
1049 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1050 if (def->cpu == TARGET_CPU_DEFAULT)
1051 break;
1052 gcc_assert (def->cpu != -1);
1053 sparc_cpu_and_features = def->processor;
1054 }
1055
1056 if (!global_options_set.x_sparc_cpu)
1057 sparc_cpu = sparc_cpu_and_features;
1058
1059 cpu = &cpu_table[(int) sparc_cpu_and_features];
1060
1061 if (TARGET_DEBUG_OPTIONS)
1062 {
1063 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1064 fprintf (stderr, "sparc_cpu: %s\n",
1065 cpu_table[(int) sparc_cpu].name);
1066 dump_target_flags ("cpu->disable", cpu->disable);
1067 dump_target_flags ("cpu->enable", cpu->enable);
1068 }
1069
1070 target_flags &= ~cpu->disable;
1071 target_flags |= (cpu->enable
1072 #ifndef HAVE_AS_FMAF_HPC_VIS3
1073 & ~(MASK_FMAF | MASK_VIS3)
1074 #endif
1075 #ifndef HAVE_AS_SPARC4
1076 & ~MASK_CBCOND
1077 #endif
1078 );
1079
1080 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1081 the processor default. */
1082 if (target_flags_explicit & MASK_FPU)
1083 target_flags = (target_flags & ~MASK_FPU) | fpu;
1084
1085 /* -mvis2 implies -mvis */
1086 if (TARGET_VIS2)
1087 target_flags |= MASK_VIS;
1088
1089 /* -mvis3 implies -mvis2 and -mvis */
1090 if (TARGET_VIS3)
1091 target_flags |= MASK_VIS2 | MASK_VIS;
1092
1093 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1094 disabled. */
1095 if (! TARGET_FPU)
1096 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1097
1098 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1099 are available.
1100 -m64 also implies v9. */
1101 if (TARGET_VIS || TARGET_ARCH64)
1102 {
1103 target_flags |= MASK_V9;
1104 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1105 }
1106
1107 /* -mvis also implies -mv8plus on 32-bit */
1108 if (TARGET_VIS && ! TARGET_ARCH64)
1109 target_flags |= MASK_V8PLUS;
1110
1111 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1112 if (TARGET_V9 && TARGET_ARCH32)
1113 target_flags |= MASK_DEPRECATED_V8_INSNS;
1114
1115 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1116 if (! TARGET_V9 || TARGET_ARCH64)
1117 target_flags &= ~MASK_V8PLUS;
1118
1119 /* Don't use stack biasing in 32 bit mode. */
1120 if (TARGET_ARCH32)
1121 target_flags &= ~MASK_STACK_BIAS;
1122
1123 /* Supply a default value for align_functions. */
1124 if (align_functions == 0
1125 && (sparc_cpu == PROCESSOR_ULTRASPARC
1126 || sparc_cpu == PROCESSOR_ULTRASPARC3
1127 || sparc_cpu == PROCESSOR_NIAGARA
1128 || sparc_cpu == PROCESSOR_NIAGARA2
1129 || sparc_cpu == PROCESSOR_NIAGARA3
1130 || sparc_cpu == PROCESSOR_NIAGARA4))
1131 align_functions = 32;
1132
1133 /* Validate PCC_STRUCT_RETURN. */
1134 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1135 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1136
1137 /* Only use .uaxword when compiling for a 64-bit target. */
1138 if (!TARGET_ARCH64)
1139 targetm.asm_out.unaligned_op.di = NULL;
1140
1141 /* Do various machine dependent initializations. */
1142 sparc_init_modes ();
1143
1144 /* Set up function hooks. */
1145 init_machine_status = sparc_init_machine_status;
1146
1147 switch (sparc_cpu)
1148 {
1149 case PROCESSOR_V7:
1150 case PROCESSOR_CYPRESS:
1151 sparc_costs = &cypress_costs;
1152 break;
1153 case PROCESSOR_V8:
1154 case PROCESSOR_SPARCLITE:
1155 case PROCESSOR_SUPERSPARC:
1156 sparc_costs = &supersparc_costs;
1157 break;
1158 case PROCESSOR_F930:
1159 case PROCESSOR_F934:
1160 case PROCESSOR_HYPERSPARC:
1161 case PROCESSOR_SPARCLITE86X:
1162 sparc_costs = &hypersparc_costs;
1163 break;
1164 case PROCESSOR_LEON:
1165 sparc_costs = &leon_costs;
1166 break;
1167 case PROCESSOR_SPARCLET:
1168 case PROCESSOR_TSC701:
1169 sparc_costs = &sparclet_costs;
1170 break;
1171 case PROCESSOR_V9:
1172 case PROCESSOR_ULTRASPARC:
1173 sparc_costs = &ultrasparc_costs;
1174 break;
1175 case PROCESSOR_ULTRASPARC3:
1176 sparc_costs = &ultrasparc3_costs;
1177 break;
1178 case PROCESSOR_NIAGARA:
1179 sparc_costs = &niagara_costs;
1180 break;
1181 case PROCESSOR_NIAGARA2:
1182 sparc_costs = &niagara2_costs;
1183 break;
1184 case PROCESSOR_NIAGARA3:
1185 sparc_costs = &niagara3_costs;
1186 break;
1187 case PROCESSOR_NIAGARA4:
1188 sparc_costs = &niagara4_costs;
1189 break;
1190 case PROCESSOR_NATIVE:
1191 gcc_unreachable ();
1192 };
1193
1194 if (sparc_memory_model == SMM_DEFAULT)
1195 {
1196 /* Choose the memory model for the operating system. */
1197 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1198 if (os_default != SMM_DEFAULT)
1199 sparc_memory_model = os_default;
1200 /* Choose the most relaxed model for the processor. */
1201 else if (TARGET_V9)
1202 sparc_memory_model = SMM_RMO;
1203 else if (TARGET_V8)
1204 sparc_memory_model = SMM_PSO;
1205 else
1206 sparc_memory_model = SMM_SC;
1207 }
1208
1209 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1210 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1211 target_flags |= MASK_LONG_DOUBLE_128;
1212 #endif
1213
1214 if (TARGET_DEBUG_OPTIONS)
1215 dump_target_flags ("Final target_flags", target_flags);
1216
1217 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1218 ((sparc_cpu == PROCESSOR_ULTRASPARC
1219 || sparc_cpu == PROCESSOR_NIAGARA
1220 || sparc_cpu == PROCESSOR_NIAGARA2
1221 || sparc_cpu == PROCESSOR_NIAGARA3
1222 || sparc_cpu == PROCESSOR_NIAGARA4)
1223 ? 2
1224 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1225 ? 8 : 3)),
1226 global_options.x_param_values,
1227 global_options_set.x_param_values);
1228 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1229 ((sparc_cpu == PROCESSOR_ULTRASPARC
1230 || sparc_cpu == PROCESSOR_ULTRASPARC3
1231 || sparc_cpu == PROCESSOR_NIAGARA
1232 || sparc_cpu == PROCESSOR_NIAGARA2
1233 || sparc_cpu == PROCESSOR_NIAGARA3
1234 || sparc_cpu == PROCESSOR_NIAGARA4)
1235 ? 64 : 32),
1236 global_options.x_param_values,
1237 global_options_set.x_param_values);
1238
1239 /* Disable save slot sharing for call-clobbered registers by default.
1240 The IRA sharing algorithm works on single registers only and this
1241 pessimizes for double floating-point registers. */
1242 if (!global_options_set.x_flag_ira_share_save_slots)
1243 flag_ira_share_save_slots = 0;
1244 }
1245 \f
1246 /* Miscellaneous utilities. */
1247
1248 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1249 or branch on register contents instructions. */
1250
1251 int
1252 v9_regcmp_p (enum rtx_code code)
1253 {
1254 return (code == EQ || code == NE || code == GE || code == LT
1255 || code == LE || code == GT);
1256 }
1257
1258 /* Nonzero if OP is a floating point constant which can
1259 be loaded into an integer register using a single
1260 sethi instruction. */
1261
1262 int
1263 fp_sethi_p (rtx op)
1264 {
1265 if (GET_CODE (op) == CONST_DOUBLE)
1266 {
1267 REAL_VALUE_TYPE r;
1268 long i;
1269
1270 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1271 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1272 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1273 }
1274
1275 return 0;
1276 }
1277
1278 /* Nonzero if OP is a floating point constant which can
1279 be loaded into an integer register using a single
1280 mov instruction. */
1281
1282 int
1283 fp_mov_p (rtx op)
1284 {
1285 if (GET_CODE (op) == CONST_DOUBLE)
1286 {
1287 REAL_VALUE_TYPE r;
1288 long i;
1289
1290 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1291 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1292 return SPARC_SIMM13_P (i);
1293 }
1294
1295 return 0;
1296 }
1297
1298 /* Nonzero if OP is a floating point constant which can
1299 be loaded into an integer register using a high/losum
1300 instruction sequence. */
1301
1302 int
1303 fp_high_losum_p (rtx op)
1304 {
1305 /* The constraints calling this should only be in
1306 SFmode move insns, so any constant which cannot
1307 be moved using a single insn will do. */
1308 if (GET_CODE (op) == CONST_DOUBLE)
1309 {
1310 REAL_VALUE_TYPE r;
1311 long i;
1312
1313 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1314 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1315 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1316 }
1317
1318 return 0;
1319 }
1320
1321 /* Return true if the address of LABEL can be loaded by means of the
1322 mov{si,di}_pic_label_ref patterns in PIC mode. */
1323
1324 static bool
1325 can_use_mov_pic_label_ref (rtx label)
1326 {
1327 /* VxWorks does not impose a fixed gap between segments; the run-time
1328 gap can be different from the object-file gap. We therefore can't
1329 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1330 are absolutely sure that X is in the same segment as the GOT.
1331 Unfortunately, the flexibility of linker scripts means that we
1332 can't be sure of that in general, so assume that GOT-relative
1333 accesses are never valid on VxWorks. */
1334 if (TARGET_VXWORKS_RTP)
1335 return false;
1336
1337 /* Similarly, if the label is non-local, it might end up being placed
1338 in a different section than the current one; now mov_pic_label_ref
1339 requires the label and the code to be in the same section. */
1340 if (LABEL_REF_NONLOCAL_P (label))
1341 return false;
1342
1343 /* Finally, if we are reordering basic blocks and partition into hot
1344 and cold sections, this might happen for any label. */
1345 if (flag_reorder_blocks_and_partition)
1346 return false;
1347
1348 return true;
1349 }
1350
1351 /* Expand a move instruction. Return true if all work is done. */
1352
1353 bool
1354 sparc_expand_move (enum machine_mode mode, rtx *operands)
1355 {
1356 /* Handle sets of MEM first. */
1357 if (GET_CODE (operands[0]) == MEM)
1358 {
1359 /* 0 is a register (or a pair of registers) on SPARC. */
1360 if (register_or_zero_operand (operands[1], mode))
1361 return false;
1362
1363 if (!reload_in_progress)
1364 {
1365 operands[0] = validize_mem (operands[0]);
1366 operands[1] = force_reg (mode, operands[1]);
1367 }
1368 }
1369
1370 /* Fixup TLS cases. */
1371 if (TARGET_HAVE_TLS
1372 && CONSTANT_P (operands[1])
1373 && sparc_tls_referenced_p (operands [1]))
1374 {
1375 operands[1] = sparc_legitimize_tls_address (operands[1]);
1376 return false;
1377 }
1378
1379 /* Fixup PIC cases. */
1380 if (flag_pic && CONSTANT_P (operands[1]))
1381 {
1382 if (pic_address_needs_scratch (operands[1]))
1383 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1384
1385 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1386 if (GET_CODE (operands[1]) == LABEL_REF
1387 && can_use_mov_pic_label_ref (operands[1]))
1388 {
1389 if (mode == SImode)
1390 {
1391 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1392 return true;
1393 }
1394
1395 if (mode == DImode)
1396 {
1397 gcc_assert (TARGET_ARCH64);
1398 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1399 return true;
1400 }
1401 }
1402
1403 if (symbolic_operand (operands[1], mode))
1404 {
1405 operands[1]
1406 = sparc_legitimize_pic_address (operands[1],
1407 reload_in_progress
1408 ? operands[0] : NULL_RTX);
1409 return false;
1410 }
1411 }
1412
1413 /* If we are trying to toss an integer constant into FP registers,
1414 or loading a FP or vector constant, force it into memory. */
1415 if (CONSTANT_P (operands[1])
1416 && REG_P (operands[0])
1417 && (SPARC_FP_REG_P (REGNO (operands[0]))
1418 || SCALAR_FLOAT_MODE_P (mode)
1419 || VECTOR_MODE_P (mode)))
1420 {
1421 /* emit_group_store will send such bogosity to us when it is
1422 not storing directly into memory. So fix this up to avoid
1423 crashes in output_constant_pool. */
1424 if (operands [1] == const0_rtx)
1425 operands[1] = CONST0_RTX (mode);
1426
1427 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1428 always other regs. */
1429 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1430 && (const_zero_operand (operands[1], mode)
1431 || const_all_ones_operand (operands[1], mode)))
1432 return false;
1433
1434 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1435 /* We are able to build any SF constant in integer registers
1436 with at most 2 instructions. */
1437 && (mode == SFmode
1438 /* And any DF constant in integer registers. */
1439 || (mode == DFmode
1440 && ! can_create_pseudo_p ())))
1441 return false;
1442
1443 operands[1] = force_const_mem (mode, operands[1]);
1444 if (!reload_in_progress)
1445 operands[1] = validize_mem (operands[1]);
1446 return false;
1447 }
1448
1449 /* Accept non-constants and valid constants unmodified. */
1450 if (!CONSTANT_P (operands[1])
1451 || GET_CODE (operands[1]) == HIGH
1452 || input_operand (operands[1], mode))
1453 return false;
1454
1455 switch (mode)
1456 {
1457 case QImode:
1458 /* All QImode constants require only one insn, so proceed. */
1459 break;
1460
1461 case HImode:
1462 case SImode:
1463 sparc_emit_set_const32 (operands[0], operands[1]);
1464 return true;
1465
1466 case DImode:
1467 /* input_operand should have filtered out 32-bit mode. */
1468 sparc_emit_set_const64 (operands[0], operands[1]);
1469 return true;
1470
1471 case TImode:
1472 {
1473 rtx high, low;
1474 /* TImode isn't available in 32-bit mode. */
1475 split_double (operands[1], &high, &low);
1476 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1477 high));
1478 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1479 low));
1480 }
1481 return true;
1482
1483 default:
1484 gcc_unreachable ();
1485 }
1486
1487 return false;
1488 }
1489
1490 /* Load OP1, a 32-bit constant, into OP0, a register.
1491 We know it can't be done in one insn when we get
1492 here, the move expander guarantees this. */
1493
1494 static void
1495 sparc_emit_set_const32 (rtx op0, rtx op1)
1496 {
1497 enum machine_mode mode = GET_MODE (op0);
1498 rtx temp = op0;
1499
1500 if (can_create_pseudo_p ())
1501 temp = gen_reg_rtx (mode);
1502
1503 if (GET_CODE (op1) == CONST_INT)
1504 {
1505 gcc_assert (!small_int_operand (op1, mode)
1506 && !const_high_operand (op1, mode));
1507
1508 /* Emit them as real moves instead of a HIGH/LO_SUM,
1509 this way CSE can see everything and reuse intermediate
1510 values if it wants. */
1511 emit_insn (gen_rtx_SET (VOIDmode, temp,
1512 GEN_INT (INTVAL (op1)
1513 & ~(HOST_WIDE_INT)0x3ff)));
1514
1515 emit_insn (gen_rtx_SET (VOIDmode,
1516 op0,
1517 gen_rtx_IOR (mode, temp,
1518 GEN_INT (INTVAL (op1) & 0x3ff))));
1519 }
1520 else
1521 {
1522 /* A symbol, emit in the traditional way. */
1523 emit_insn (gen_rtx_SET (VOIDmode, temp,
1524 gen_rtx_HIGH (mode, op1)));
1525 emit_insn (gen_rtx_SET (VOIDmode,
1526 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1527 }
1528 }
1529
1530 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1531 If TEMP is nonzero, we are forbidden to use any other scratch
1532 registers. Otherwise, we are allowed to generate them as needed.
1533
1534 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1535 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1536
1537 void
1538 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1539 {
1540 rtx temp1, temp2, temp3, temp4, temp5;
1541 rtx ti_temp = 0;
1542
1543 if (temp && GET_MODE (temp) == TImode)
1544 {
1545 ti_temp = temp;
1546 temp = gen_rtx_REG (DImode, REGNO (temp));
1547 }
1548
1549 /* SPARC-V9 code-model support. */
1550 switch (sparc_cmodel)
1551 {
1552 case CM_MEDLOW:
1553 /* The range spanned by all instructions in the object is less
1554 than 2^31 bytes (2GB) and the distance from any instruction
1555 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1556 than 2^31 bytes (2GB).
1557
1558 The executable must be in the low 4TB of the virtual address
1559 space.
1560
1561 sethi %hi(symbol), %temp1
1562 or %temp1, %lo(symbol), %reg */
1563 if (temp)
1564 temp1 = temp; /* op0 is allowed. */
1565 else
1566 temp1 = gen_reg_rtx (DImode);
1567
1568 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1569 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1570 break;
1571
1572 case CM_MEDMID:
1573 /* The range spanned by all instructions in the object is less
1574 than 2^31 bytes (2GB) and the distance from any instruction
1575 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1576 than 2^31 bytes (2GB).
1577
1578 The executable must be in the low 16TB of the virtual address
1579 space.
1580
1581 sethi %h44(symbol), %temp1
1582 or %temp1, %m44(symbol), %temp2
1583 sllx %temp2, 12, %temp3
1584 or %temp3, %l44(symbol), %reg */
1585 if (temp)
1586 {
1587 temp1 = op0;
1588 temp2 = op0;
1589 temp3 = temp; /* op0 is allowed. */
1590 }
1591 else
1592 {
1593 temp1 = gen_reg_rtx (DImode);
1594 temp2 = gen_reg_rtx (DImode);
1595 temp3 = gen_reg_rtx (DImode);
1596 }
1597
1598 emit_insn (gen_seth44 (temp1, op1));
1599 emit_insn (gen_setm44 (temp2, temp1, op1));
1600 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1601 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1602 emit_insn (gen_setl44 (op0, temp3, op1));
1603 break;
1604
1605 case CM_MEDANY:
1606 /* The range spanned by all instructions in the object is less
1607 than 2^31 bytes (2GB) and the distance from any instruction
1608 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1609 than 2^31 bytes (2GB).
1610
1611 The executable can be placed anywhere in the virtual address
1612 space.
1613
1614 sethi %hh(symbol), %temp1
1615 sethi %lm(symbol), %temp2
1616 or %temp1, %hm(symbol), %temp3
1617 sllx %temp3, 32, %temp4
1618 or %temp4, %temp2, %temp5
1619 or %temp5, %lo(symbol), %reg */
1620 if (temp)
1621 {
1622 /* It is possible that one of the registers we got for operands[2]
1623 might coincide with that of operands[0] (which is why we made
1624 it TImode). Pick the other one to use as our scratch. */
1625 if (rtx_equal_p (temp, op0))
1626 {
1627 gcc_assert (ti_temp);
1628 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1629 }
1630 temp1 = op0;
1631 temp2 = temp; /* op0 is _not_ allowed, see above. */
1632 temp3 = op0;
1633 temp4 = op0;
1634 temp5 = op0;
1635 }
1636 else
1637 {
1638 temp1 = gen_reg_rtx (DImode);
1639 temp2 = gen_reg_rtx (DImode);
1640 temp3 = gen_reg_rtx (DImode);
1641 temp4 = gen_reg_rtx (DImode);
1642 temp5 = gen_reg_rtx (DImode);
1643 }
1644
1645 emit_insn (gen_sethh (temp1, op1));
1646 emit_insn (gen_setlm (temp2, op1));
1647 emit_insn (gen_sethm (temp3, temp1, op1));
1648 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1649 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1650 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1651 gen_rtx_PLUS (DImode, temp4, temp2)));
1652 emit_insn (gen_setlo (op0, temp5, op1));
1653 break;
1654
1655 case CM_EMBMEDANY:
1656 /* Old old old backwards compatibility kruft here.
1657 Essentially it is MEDLOW with a fixed 64-bit
1658 virtual base added to all data segment addresses.
1659 Text-segment stuff is computed like MEDANY, we can't
1660 reuse the code above because the relocation knobs
1661 look different.
1662
1663 Data segment: sethi %hi(symbol), %temp1
1664 add %temp1, EMBMEDANY_BASE_REG, %temp2
1665 or %temp2, %lo(symbol), %reg */
1666 if (data_segment_operand (op1, GET_MODE (op1)))
1667 {
1668 if (temp)
1669 {
1670 temp1 = temp; /* op0 is allowed. */
1671 temp2 = op0;
1672 }
1673 else
1674 {
1675 temp1 = gen_reg_rtx (DImode);
1676 temp2 = gen_reg_rtx (DImode);
1677 }
1678
1679 emit_insn (gen_embmedany_sethi (temp1, op1));
1680 emit_insn (gen_embmedany_brsum (temp2, temp1));
1681 emit_insn (gen_embmedany_losum (op0, temp2, op1));
1682 }
1683
1684 /* Text segment: sethi %uhi(symbol), %temp1
1685 sethi %hi(symbol), %temp2
1686 or %temp1, %ulo(symbol), %temp3
1687 sllx %temp3, 32, %temp4
1688 or %temp4, %temp2, %temp5
1689 or %temp5, %lo(symbol), %reg */
1690 else
1691 {
1692 if (temp)
1693 {
1694 /* It is possible that one of the registers we got for operands[2]
1695 might coincide with that of operands[0] (which is why we made
1696 it TImode). Pick the other one to use as our scratch. */
1697 if (rtx_equal_p (temp, op0))
1698 {
1699 gcc_assert (ti_temp);
1700 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1701 }
1702 temp1 = op0;
1703 temp2 = temp; /* op0 is _not_ allowed, see above. */
1704 temp3 = op0;
1705 temp4 = op0;
1706 temp5 = op0;
1707 }
1708 else
1709 {
1710 temp1 = gen_reg_rtx (DImode);
1711 temp2 = gen_reg_rtx (DImode);
1712 temp3 = gen_reg_rtx (DImode);
1713 temp4 = gen_reg_rtx (DImode);
1714 temp5 = gen_reg_rtx (DImode);
1715 }
1716
1717 emit_insn (gen_embmedany_textuhi (temp1, op1));
1718 emit_insn (gen_embmedany_texthi (temp2, op1));
1719 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1720 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1721 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1722 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1723 gen_rtx_PLUS (DImode, temp4, temp2)));
1724 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
1725 }
1726 break;
1727
1728 default:
1729 gcc_unreachable ();
1730 }
1731 }
1732
1733 #if HOST_BITS_PER_WIDE_INT == 32
1734 static void
1735 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1736 {
1737 gcc_unreachable ();
1738 }
1739 #else
1740 /* These avoid problems when cross compiling. If we do not
1741 go through all this hair then the optimizer will see
1742 invalid REG_EQUAL notes or in some cases none at all. */
1743 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1744 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1745 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1746 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1747
1748 /* The optimizer is not to assume anything about exactly
1749 which bits are set for a HIGH, they are unspecified.
1750 Unfortunately this leads to many missed optimizations
1751 during CSE. We mask out the non-HIGH bits, and matches
1752 a plain movdi, to alleviate this problem. */
1753 static rtx
1754 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1755 {
1756 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1757 }
1758
1759 static rtx
1760 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1761 {
1762 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1763 }
1764
1765 static rtx
1766 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1767 {
1768 return gen_rtx_IOR (DImode, src, GEN_INT (val));
1769 }
1770
1771 static rtx
1772 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1773 {
1774 return gen_rtx_XOR (DImode, src, GEN_INT (val));
1775 }
1776
1777 /* Worker routines for 64-bit constant formation on arch64.
1778 One of the key things to be doing in these emissions is
1779 to create as many temp REGs as possible. This makes it
1780 possible for half-built constants to be used later when
1781 such values are similar to something required later on.
1782 Without doing this, the optimizer cannot see such
1783 opportunities. */
1784
1785 static void sparc_emit_set_const64_quick1 (rtx, rtx,
1786 unsigned HOST_WIDE_INT, int);
1787
1788 static void
1789 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1790 unsigned HOST_WIDE_INT low_bits, int is_neg)
1791 {
1792 unsigned HOST_WIDE_INT high_bits;
1793
1794 if (is_neg)
1795 high_bits = (~low_bits) & 0xffffffff;
1796 else
1797 high_bits = low_bits;
1798
1799 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1800 if (!is_neg)
1801 {
1802 emit_insn (gen_rtx_SET (VOIDmode, op0,
1803 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1804 }
1805 else
1806 {
1807 /* If we are XOR'ing with -1, then we should emit a one's complement
1808 instead. This way the combiner will notice logical operations
1809 such as ANDN later on and substitute. */
1810 if ((low_bits & 0x3ff) == 0x3ff)
1811 {
1812 emit_insn (gen_rtx_SET (VOIDmode, op0,
1813 gen_rtx_NOT (DImode, temp)));
1814 }
1815 else
1816 {
1817 emit_insn (gen_rtx_SET (VOIDmode, op0,
1818 gen_safe_XOR64 (temp,
1819 (-(HOST_WIDE_INT)0x400
1820 | (low_bits & 0x3ff)))));
1821 }
1822 }
1823 }
1824
1825 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1826 unsigned HOST_WIDE_INT, int);
1827
1828 static void
1829 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1830 unsigned HOST_WIDE_INT high_bits,
1831 unsigned HOST_WIDE_INT low_immediate,
1832 int shift_count)
1833 {
1834 rtx temp2 = op0;
1835
1836 if ((high_bits & 0xfffffc00) != 0)
1837 {
1838 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1839 if ((high_bits & ~0xfffffc00) != 0)
1840 emit_insn (gen_rtx_SET (VOIDmode, op0,
1841 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1842 else
1843 temp2 = temp;
1844 }
1845 else
1846 {
1847 emit_insn (gen_safe_SET64 (temp, high_bits));
1848 temp2 = temp;
1849 }
1850
1851 /* Now shift it up into place. */
1852 emit_insn (gen_rtx_SET (VOIDmode, op0,
1853 gen_rtx_ASHIFT (DImode, temp2,
1854 GEN_INT (shift_count))));
1855
1856 /* If there is a low immediate part piece, finish up by
1857 putting that in as well. */
1858 if (low_immediate != 0)
1859 emit_insn (gen_rtx_SET (VOIDmode, op0,
1860 gen_safe_OR64 (op0, low_immediate)));
1861 }
1862
1863 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
1864 unsigned HOST_WIDE_INT);
1865
1866 /* Full 64-bit constant decomposition. Even though this is the
1867 'worst' case, we still optimize a few things away. */
1868 static void
1869 sparc_emit_set_const64_longway (rtx op0, rtx temp,
1870 unsigned HOST_WIDE_INT high_bits,
1871 unsigned HOST_WIDE_INT low_bits)
1872 {
1873 rtx sub_temp = op0;
1874
1875 if (can_create_pseudo_p ())
1876 sub_temp = gen_reg_rtx (DImode);
1877
1878 if ((high_bits & 0xfffffc00) != 0)
1879 {
1880 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1881 if ((high_bits & ~0xfffffc00) != 0)
1882 emit_insn (gen_rtx_SET (VOIDmode,
1883 sub_temp,
1884 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1885 else
1886 sub_temp = temp;
1887 }
1888 else
1889 {
1890 emit_insn (gen_safe_SET64 (temp, high_bits));
1891 sub_temp = temp;
1892 }
1893
1894 if (can_create_pseudo_p ())
1895 {
1896 rtx temp2 = gen_reg_rtx (DImode);
1897 rtx temp3 = gen_reg_rtx (DImode);
1898 rtx temp4 = gen_reg_rtx (DImode);
1899
1900 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1901 gen_rtx_ASHIFT (DImode, sub_temp,
1902 GEN_INT (32))));
1903
1904 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
1905 if ((low_bits & ~0xfffffc00) != 0)
1906 {
1907 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1908 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
1909 emit_insn (gen_rtx_SET (VOIDmode, op0,
1910 gen_rtx_PLUS (DImode, temp4, temp3)));
1911 }
1912 else
1913 {
1914 emit_insn (gen_rtx_SET (VOIDmode, op0,
1915 gen_rtx_PLUS (DImode, temp4, temp2)));
1916 }
1917 }
1918 else
1919 {
1920 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
1921 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
1922 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
1923 int to_shift = 12;
1924
1925 /* We are in the middle of reload, so this is really
1926 painful. However we do still make an attempt to
1927 avoid emitting truly stupid code. */
1928 if (low1 != const0_rtx)
1929 {
1930 emit_insn (gen_rtx_SET (VOIDmode, op0,
1931 gen_rtx_ASHIFT (DImode, sub_temp,
1932 GEN_INT (to_shift))));
1933 emit_insn (gen_rtx_SET (VOIDmode, op0,
1934 gen_rtx_IOR (DImode, op0, low1)));
1935 sub_temp = op0;
1936 to_shift = 12;
1937 }
1938 else
1939 {
1940 to_shift += 12;
1941 }
1942 if (low2 != const0_rtx)
1943 {
1944 emit_insn (gen_rtx_SET (VOIDmode, op0,
1945 gen_rtx_ASHIFT (DImode, sub_temp,
1946 GEN_INT (to_shift))));
1947 emit_insn (gen_rtx_SET (VOIDmode, op0,
1948 gen_rtx_IOR (DImode, op0, low2)));
1949 sub_temp = op0;
1950 to_shift = 8;
1951 }
1952 else
1953 {
1954 to_shift += 8;
1955 }
1956 emit_insn (gen_rtx_SET (VOIDmode, op0,
1957 gen_rtx_ASHIFT (DImode, sub_temp,
1958 GEN_INT (to_shift))));
1959 if (low3 != const0_rtx)
1960 emit_insn (gen_rtx_SET (VOIDmode, op0,
1961 gen_rtx_IOR (DImode, op0, low3)));
1962 /* phew... */
1963 }
1964 }
1965
1966 /* Analyze a 64-bit constant for certain properties. */
1967 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
1968 unsigned HOST_WIDE_INT,
1969 int *, int *, int *);
1970
1971 static void
1972 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
1973 unsigned HOST_WIDE_INT low_bits,
1974 int *hbsp, int *lbsp, int *abbasp)
1975 {
1976 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
1977 int i;
1978
1979 lowest_bit_set = highest_bit_set = -1;
1980 i = 0;
1981 do
1982 {
1983 if ((lowest_bit_set == -1)
1984 && ((low_bits >> i) & 1))
1985 lowest_bit_set = i;
1986 if ((highest_bit_set == -1)
1987 && ((high_bits >> (32 - i - 1)) & 1))
1988 highest_bit_set = (64 - i - 1);
1989 }
1990 while (++i < 32
1991 && ((highest_bit_set == -1)
1992 || (lowest_bit_set == -1)));
1993 if (i == 32)
1994 {
1995 i = 0;
1996 do
1997 {
1998 if ((lowest_bit_set == -1)
1999 && ((high_bits >> i) & 1))
2000 lowest_bit_set = i + 32;
2001 if ((highest_bit_set == -1)
2002 && ((low_bits >> (32 - i - 1)) & 1))
2003 highest_bit_set = 32 - i - 1;
2004 }
2005 while (++i < 32
2006 && ((highest_bit_set == -1)
2007 || (lowest_bit_set == -1)));
2008 }
2009 /* If there are no bits set this should have gone out
2010 as one instruction! */
2011 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2012 all_bits_between_are_set = 1;
2013 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2014 {
2015 if (i < 32)
2016 {
2017 if ((low_bits & (1 << i)) != 0)
2018 continue;
2019 }
2020 else
2021 {
2022 if ((high_bits & (1 << (i - 32))) != 0)
2023 continue;
2024 }
2025 all_bits_between_are_set = 0;
2026 break;
2027 }
2028 *hbsp = highest_bit_set;
2029 *lbsp = lowest_bit_set;
2030 *abbasp = all_bits_between_are_set;
2031 }
2032
2033 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2034
2035 static int
2036 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2037 unsigned HOST_WIDE_INT low_bits)
2038 {
2039 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2040
2041 if (high_bits == 0
2042 || high_bits == 0xffffffff)
2043 return 1;
2044
2045 analyze_64bit_constant (high_bits, low_bits,
2046 &highest_bit_set, &lowest_bit_set,
2047 &all_bits_between_are_set);
2048
2049 if ((highest_bit_set == 63
2050 || lowest_bit_set == 0)
2051 && all_bits_between_are_set != 0)
2052 return 1;
2053
2054 if ((highest_bit_set - lowest_bit_set) < 21)
2055 return 1;
2056
2057 return 0;
2058 }
2059
2060 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2061 unsigned HOST_WIDE_INT,
2062 int, int);
2063
2064 static unsigned HOST_WIDE_INT
2065 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2066 unsigned HOST_WIDE_INT low_bits,
2067 int lowest_bit_set, int shift)
2068 {
2069 HOST_WIDE_INT hi, lo;
2070
2071 if (lowest_bit_set < 32)
2072 {
2073 lo = (low_bits >> lowest_bit_set) << shift;
2074 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2075 }
2076 else
2077 {
2078 lo = 0;
2079 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2080 }
2081 gcc_assert (! (hi & lo));
2082 return (hi | lo);
2083 }
2084
2085 /* Here we are sure to be arch64 and this is an integer constant
2086 being loaded into a register. Emit the most efficient
2087 insn sequence possible. Detection of all the 1-insn cases
2088 has been done already. */
2089 static void
2090 sparc_emit_set_const64 (rtx op0, rtx op1)
2091 {
2092 unsigned HOST_WIDE_INT high_bits, low_bits;
2093 int lowest_bit_set, highest_bit_set;
2094 int all_bits_between_are_set;
2095 rtx temp = 0;
2096
2097 /* Sanity check that we know what we are working with. */
2098 gcc_assert (TARGET_ARCH64
2099 && (GET_CODE (op0) == SUBREG
2100 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2101
2102 if (! can_create_pseudo_p ())
2103 temp = op0;
2104
2105 if (GET_CODE (op1) != CONST_INT)
2106 {
2107 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2108 return;
2109 }
2110
2111 if (! temp)
2112 temp = gen_reg_rtx (DImode);
2113
2114 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2115 low_bits = (INTVAL (op1) & 0xffffffff);
2116
2117 /* low_bits bits 0 --> 31
2118 high_bits bits 32 --> 63 */
2119
2120 analyze_64bit_constant (high_bits, low_bits,
2121 &highest_bit_set, &lowest_bit_set,
2122 &all_bits_between_are_set);
2123
2124 /* First try for a 2-insn sequence. */
2125
2126 /* These situations are preferred because the optimizer can
2127 * do more things with them:
2128 * 1) mov -1, %reg
2129 * sllx %reg, shift, %reg
2130 * 2) mov -1, %reg
2131 * srlx %reg, shift, %reg
2132 * 3) mov some_small_const, %reg
2133 * sllx %reg, shift, %reg
2134 */
2135 if (((highest_bit_set == 63
2136 || lowest_bit_set == 0)
2137 && all_bits_between_are_set != 0)
2138 || ((highest_bit_set - lowest_bit_set) < 12))
2139 {
2140 HOST_WIDE_INT the_const = -1;
2141 int shift = lowest_bit_set;
2142
2143 if ((highest_bit_set != 63
2144 && lowest_bit_set != 0)
2145 || all_bits_between_are_set == 0)
2146 {
2147 the_const =
2148 create_simple_focus_bits (high_bits, low_bits,
2149 lowest_bit_set, 0);
2150 }
2151 else if (lowest_bit_set == 0)
2152 shift = -(63 - highest_bit_set);
2153
2154 gcc_assert (SPARC_SIMM13_P (the_const));
2155 gcc_assert (shift != 0);
2156
2157 emit_insn (gen_safe_SET64 (temp, the_const));
2158 if (shift > 0)
2159 emit_insn (gen_rtx_SET (VOIDmode,
2160 op0,
2161 gen_rtx_ASHIFT (DImode,
2162 temp,
2163 GEN_INT (shift))));
2164 else if (shift < 0)
2165 emit_insn (gen_rtx_SET (VOIDmode,
2166 op0,
2167 gen_rtx_LSHIFTRT (DImode,
2168 temp,
2169 GEN_INT (-shift))));
2170 return;
2171 }
2172
2173 /* Now a range of 22 or less bits set somewhere.
2174 * 1) sethi %hi(focus_bits), %reg
2175 * sllx %reg, shift, %reg
2176 * 2) sethi %hi(focus_bits), %reg
2177 * srlx %reg, shift, %reg
2178 */
2179 if ((highest_bit_set - lowest_bit_set) < 21)
2180 {
2181 unsigned HOST_WIDE_INT focus_bits =
2182 create_simple_focus_bits (high_bits, low_bits,
2183 lowest_bit_set, 10);
2184
2185 gcc_assert (SPARC_SETHI_P (focus_bits));
2186 gcc_assert (lowest_bit_set != 10);
2187
2188 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2189
2190 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2191 if (lowest_bit_set < 10)
2192 emit_insn (gen_rtx_SET (VOIDmode,
2193 op0,
2194 gen_rtx_LSHIFTRT (DImode, temp,
2195 GEN_INT (10 - lowest_bit_set))));
2196 else if (lowest_bit_set > 10)
2197 emit_insn (gen_rtx_SET (VOIDmode,
2198 op0,
2199 gen_rtx_ASHIFT (DImode, temp,
2200 GEN_INT (lowest_bit_set - 10))));
2201 return;
2202 }
2203
2204 /* 1) sethi %hi(low_bits), %reg
2205 * or %reg, %lo(low_bits), %reg
2206 * 2) sethi %hi(~low_bits), %reg
2207 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2208 */
2209 if (high_bits == 0
2210 || high_bits == 0xffffffff)
2211 {
2212 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2213 (high_bits == 0xffffffff));
2214 return;
2215 }
2216
2217 /* Now, try 3-insn sequences. */
2218
2219 /* 1) sethi %hi(high_bits), %reg
2220 * or %reg, %lo(high_bits), %reg
2221 * sllx %reg, 32, %reg
2222 */
2223 if (low_bits == 0)
2224 {
2225 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2226 return;
2227 }
2228
2229 /* We may be able to do something quick
2230 when the constant is negated, so try that. */
2231 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2232 (~low_bits) & 0xfffffc00))
2233 {
2234 /* NOTE: The trailing bits get XOR'd so we need the
2235 non-negated bits, not the negated ones. */
2236 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2237
2238 if ((((~high_bits) & 0xffffffff) == 0
2239 && ((~low_bits) & 0x80000000) == 0)
2240 || (((~high_bits) & 0xffffffff) == 0xffffffff
2241 && ((~low_bits) & 0x80000000) != 0))
2242 {
2243 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2244
2245 if ((SPARC_SETHI_P (fast_int)
2246 && (~high_bits & 0xffffffff) == 0)
2247 || SPARC_SIMM13_P (fast_int))
2248 emit_insn (gen_safe_SET64 (temp, fast_int));
2249 else
2250 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2251 }
2252 else
2253 {
2254 rtx negated_const;
2255 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2256 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2257 sparc_emit_set_const64 (temp, negated_const);
2258 }
2259
2260 /* If we are XOR'ing with -1, then we should emit a one's complement
2261 instead. This way the combiner will notice logical operations
2262 such as ANDN later on and substitute. */
2263 if (trailing_bits == 0x3ff)
2264 {
2265 emit_insn (gen_rtx_SET (VOIDmode, op0,
2266 gen_rtx_NOT (DImode, temp)));
2267 }
2268 else
2269 {
2270 emit_insn (gen_rtx_SET (VOIDmode,
2271 op0,
2272 gen_safe_XOR64 (temp,
2273 (-0x400 | trailing_bits))));
2274 }
2275 return;
2276 }
2277
2278 /* 1) sethi %hi(xxx), %reg
2279 * or %reg, %lo(xxx), %reg
2280 * sllx %reg, yyy, %reg
2281 *
2282 * ??? This is just a generalized version of the low_bits==0
2283 * thing above, FIXME...
2284 */
2285 if ((highest_bit_set - lowest_bit_set) < 32)
2286 {
2287 unsigned HOST_WIDE_INT focus_bits =
2288 create_simple_focus_bits (high_bits, low_bits,
2289 lowest_bit_set, 0);
2290
2291 /* We can't get here in this state. */
2292 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2293
2294 /* So what we know is that the set bits straddle the
2295 middle of the 64-bit word. */
2296 sparc_emit_set_const64_quick2 (op0, temp,
2297 focus_bits, 0,
2298 lowest_bit_set);
2299 return;
2300 }
2301
2302 /* 1) sethi %hi(high_bits), %reg
2303 * or %reg, %lo(high_bits), %reg
2304 * sllx %reg, 32, %reg
2305 * or %reg, low_bits, %reg
2306 */
2307 if (SPARC_SIMM13_P(low_bits)
2308 && ((int)low_bits > 0))
2309 {
2310 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2311 return;
2312 }
2313
2314 /* The easiest way when all else fails, is full decomposition. */
2315 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2316 }
2317 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2318
2319 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2320 return the mode to be used for the comparison. For floating-point,
2321 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2322 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2323 processing is needed. */
2324
2325 enum machine_mode
2326 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2327 {
2328 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2329 {
2330 switch (op)
2331 {
2332 case EQ:
2333 case NE:
2334 case UNORDERED:
2335 case ORDERED:
2336 case UNLT:
2337 case UNLE:
2338 case UNGT:
2339 case UNGE:
2340 case UNEQ:
2341 case LTGT:
2342 return CCFPmode;
2343
2344 case LT:
2345 case LE:
2346 case GT:
2347 case GE:
2348 return CCFPEmode;
2349
2350 default:
2351 gcc_unreachable ();
2352 }
2353 }
2354 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2355 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2356 {
2357 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2358 return CCX_NOOVmode;
2359 else
2360 return CC_NOOVmode;
2361 }
2362 else
2363 {
2364 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2365 return CCXmode;
2366 else
2367 return CCmode;
2368 }
2369 }
2370
2371 /* Emit the compare insn and return the CC reg for a CODE comparison
2372 with operands X and Y. */
2373
2374 static rtx
2375 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2376 {
2377 enum machine_mode mode;
2378 rtx cc_reg;
2379
2380 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2381 return x;
2382
2383 mode = SELECT_CC_MODE (code, x, y);
2384
2385 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2386 fcc regs (cse can't tell they're really call clobbered regs and will
2387 remove a duplicate comparison even if there is an intervening function
2388 call - it will then try to reload the cc reg via an int reg which is why
2389 we need the movcc patterns). It is possible to provide the movcc
2390 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2391 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2392 to tell cse that CCFPE mode registers (even pseudos) are call
2393 clobbered. */
2394
2395 /* ??? This is an experiment. Rather than making changes to cse which may
2396 or may not be easy/clean, we do our own cse. This is possible because
2397 we will generate hard registers. Cse knows they're call clobbered (it
2398 doesn't know the same thing about pseudos). If we guess wrong, no big
2399 deal, but if we win, great! */
2400
2401 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2402 #if 1 /* experiment */
2403 {
2404 int reg;
2405 /* We cycle through the registers to ensure they're all exercised. */
2406 static int next_fcc_reg = 0;
2407 /* Previous x,y for each fcc reg. */
2408 static rtx prev_args[4][2];
2409
2410 /* Scan prev_args for x,y. */
2411 for (reg = 0; reg < 4; reg++)
2412 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2413 break;
2414 if (reg == 4)
2415 {
2416 reg = next_fcc_reg;
2417 prev_args[reg][0] = x;
2418 prev_args[reg][1] = y;
2419 next_fcc_reg = (next_fcc_reg + 1) & 3;
2420 }
2421 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2422 }
2423 #else
2424 cc_reg = gen_reg_rtx (mode);
2425 #endif /* ! experiment */
2426 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2427 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2428 else
2429 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2430
2431 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2432 will only result in an unrecognizable insn so no point in asserting. */
2433 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2434
2435 return cc_reg;
2436 }
2437
2438
2439 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2440
2441 rtx
2442 gen_compare_reg (rtx cmp)
2443 {
2444 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2445 }
2446
2447 /* This function is used for v9 only.
2448 DEST is the target of the Scc insn.
2449 CODE is the code for an Scc's comparison.
2450 X and Y are the values we compare.
2451
2452 This function is needed to turn
2453
2454 (set (reg:SI 110)
2455 (gt (reg:CCX 100 %icc)
2456 (const_int 0)))
2457 into
2458 (set (reg:SI 110)
2459 (gt:DI (reg:CCX 100 %icc)
2460 (const_int 0)))
2461
2462 IE: The instruction recognizer needs to see the mode of the comparison to
2463 find the right instruction. We could use "gt:DI" right in the
2464 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2465
2466 static int
2467 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2468 {
2469 if (! TARGET_ARCH64
2470 && (GET_MODE (x) == DImode
2471 || GET_MODE (dest) == DImode))
2472 return 0;
2473
2474 /* Try to use the movrCC insns. */
2475 if (TARGET_ARCH64
2476 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2477 && y == const0_rtx
2478 && v9_regcmp_p (compare_code))
2479 {
2480 rtx op0 = x;
2481 rtx temp;
2482
2483 /* Special case for op0 != 0. This can be done with one instruction if
2484 dest == x. */
2485
2486 if (compare_code == NE
2487 && GET_MODE (dest) == DImode
2488 && rtx_equal_p (op0, dest))
2489 {
2490 emit_insn (gen_rtx_SET (VOIDmode, dest,
2491 gen_rtx_IF_THEN_ELSE (DImode,
2492 gen_rtx_fmt_ee (compare_code, DImode,
2493 op0, const0_rtx),
2494 const1_rtx,
2495 dest)));
2496 return 1;
2497 }
2498
2499 if (reg_overlap_mentioned_p (dest, op0))
2500 {
2501 /* Handle the case where dest == x.
2502 We "early clobber" the result. */
2503 op0 = gen_reg_rtx (GET_MODE (x));
2504 emit_move_insn (op0, x);
2505 }
2506
2507 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2508 if (GET_MODE (op0) != DImode)
2509 {
2510 temp = gen_reg_rtx (DImode);
2511 convert_move (temp, op0, 0);
2512 }
2513 else
2514 temp = op0;
2515 emit_insn (gen_rtx_SET (VOIDmode, dest,
2516 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2517 gen_rtx_fmt_ee (compare_code, DImode,
2518 temp, const0_rtx),
2519 const1_rtx,
2520 dest)));
2521 return 1;
2522 }
2523 else
2524 {
2525 x = gen_compare_reg_1 (compare_code, x, y);
2526 y = const0_rtx;
2527
2528 gcc_assert (GET_MODE (x) != CC_NOOVmode
2529 && GET_MODE (x) != CCX_NOOVmode);
2530
2531 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2532 emit_insn (gen_rtx_SET (VOIDmode, dest,
2533 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2534 gen_rtx_fmt_ee (compare_code,
2535 GET_MODE (x), x, y),
2536 const1_rtx, dest)));
2537 return 1;
2538 }
2539 }
2540
2541
2542 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2543 without jumps using the addx/subx instructions. */
2544
2545 bool
2546 emit_scc_insn (rtx operands[])
2547 {
2548 rtx tem;
2549 rtx x;
2550 rtx y;
2551 enum rtx_code code;
2552
2553 /* The quad-word fp compare library routines all return nonzero to indicate
2554 true, which is different from the equivalent libgcc routines, so we must
2555 handle them specially here. */
2556 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2557 {
2558 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2559 GET_CODE (operands[1]));
2560 operands[2] = XEXP (operands[1], 0);
2561 operands[3] = XEXP (operands[1], 1);
2562 }
2563
2564 code = GET_CODE (operands[1]);
2565 x = operands[2];
2566 y = operands[3];
2567
2568 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2569 more applications). The exception to this is "reg != 0" which can
2570 be done in one instruction on v9 (so we do it). */
2571 if (code == EQ)
2572 {
2573 if (GET_MODE (x) == SImode)
2574 {
2575 rtx pat = gen_seqsi_special (operands[0], x, y);
2576 emit_insn (pat);
2577 return true;
2578 }
2579 else if (GET_MODE (x) == DImode)
2580 {
2581 rtx pat = gen_seqdi_special (operands[0], x, y);
2582 emit_insn (pat);
2583 return true;
2584 }
2585 }
2586
2587 if (code == NE)
2588 {
2589 if (GET_MODE (x) == SImode)
2590 {
2591 rtx pat = gen_snesi_special (operands[0], x, y);
2592 emit_insn (pat);
2593 return true;
2594 }
2595 else if (GET_MODE (x) == DImode)
2596 {
2597 rtx pat;
2598 if (TARGET_VIS3)
2599 pat = gen_snedi_special_vis3 (operands[0], x, y);
2600 else
2601 pat = gen_snedi_special (operands[0], x, y);
2602 emit_insn (pat);
2603 return true;
2604 }
2605 }
2606
2607 if (TARGET_V9
2608 && TARGET_ARCH64
2609 && GET_MODE (x) == DImode
2610 && !(TARGET_VIS3
2611 && (code == GTU || code == LTU))
2612 && gen_v9_scc (operands[0], code, x, y))
2613 return true;
2614
2615 /* We can do LTU and GEU using the addx/subx instructions too. And
2616 for GTU/LEU, if both operands are registers swap them and fall
2617 back to the easy case. */
2618 if (code == GTU || code == LEU)
2619 {
2620 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2621 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2622 {
2623 tem = x;
2624 x = y;
2625 y = tem;
2626 code = swap_condition (code);
2627 }
2628 }
2629
2630 if (code == LTU
2631 || (!TARGET_VIS3 && code == GEU))
2632 {
2633 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2634 gen_rtx_fmt_ee (code, SImode,
2635 gen_compare_reg_1 (code, x, y),
2636 const0_rtx)));
2637 return true;
2638 }
2639
2640 /* All the posibilities to use addx/subx based sequences has been
2641 exhausted, try for a 3 instruction sequence using v9 conditional
2642 moves. */
2643 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
2644 return true;
2645
2646 /* Nope, do branches. */
2647 return false;
2648 }
2649
2650 /* Emit a conditional jump insn for the v9 architecture using comparison code
2651 CODE and jump target LABEL.
2652 This function exists to take advantage of the v9 brxx insns. */
2653
2654 static void
2655 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2656 {
2657 emit_jump_insn (gen_rtx_SET (VOIDmode,
2658 pc_rtx,
2659 gen_rtx_IF_THEN_ELSE (VOIDmode,
2660 gen_rtx_fmt_ee (code, GET_MODE (op0),
2661 op0, const0_rtx),
2662 gen_rtx_LABEL_REF (VOIDmode, label),
2663 pc_rtx)));
2664 }
2665
2666 /* Emit a conditional jump insn for the UA2011 architecture using
2667 comparison code CODE and jump target LABEL. This function exists
2668 to take advantage of the UA2011 Compare and Branch insns. */
2669
2670 static void
2671 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
2672 {
2673 rtx if_then_else;
2674
2675 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
2676 gen_rtx_fmt_ee(code, GET_MODE(op0),
2677 op0, op1),
2678 gen_rtx_LABEL_REF (VOIDmode, label),
2679 pc_rtx);
2680
2681 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
2682 }
2683
2684 void
2685 emit_conditional_branch_insn (rtx operands[])
2686 {
2687 /* The quad-word fp compare library routines all return nonzero to indicate
2688 true, which is different from the equivalent libgcc routines, so we must
2689 handle them specially here. */
2690 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2691 {
2692 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2693 GET_CODE (operands[0]));
2694 operands[1] = XEXP (operands[0], 0);
2695 operands[2] = XEXP (operands[0], 1);
2696 }
2697
2698 /* If we can tell early on that the comparison is against a constant
2699 that won't fit in the 5-bit signed immediate field of a cbcond,
2700 use one of the other v9 conditional branch sequences. */
2701 if (TARGET_CBCOND
2702 && GET_CODE (operands[1]) == REG
2703 && (GET_MODE (operands[1]) == SImode
2704 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
2705 && (GET_CODE (operands[2]) != CONST_INT
2706 || SPARC_SIMM5_P (INTVAL (operands[2]))))
2707 {
2708 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
2709 return;
2710 }
2711
2712 if (TARGET_ARCH64 && operands[2] == const0_rtx
2713 && GET_CODE (operands[1]) == REG
2714 && GET_MODE (operands[1]) == DImode)
2715 {
2716 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2717 return;
2718 }
2719
2720 operands[1] = gen_compare_reg (operands[0]);
2721 operands[2] = const0_rtx;
2722 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2723 operands[1], operands[2]);
2724 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2725 operands[3]));
2726 }
2727
2728
2729 /* Generate a DFmode part of a hard TFmode register.
2730 REG is the TFmode hard register, LOW is 1 for the
2731 low 64bit of the register and 0 otherwise.
2732 */
2733 rtx
2734 gen_df_reg (rtx reg, int low)
2735 {
2736 int regno = REGNO (reg);
2737
2738 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2739 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
2740 return gen_rtx_REG (DFmode, regno);
2741 }
2742 \f
2743 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
2744 Unlike normal calls, TFmode operands are passed by reference. It is
2745 assumed that no more than 3 operands are required. */
2746
2747 static void
2748 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2749 {
2750 rtx ret_slot = NULL, arg[3], func_sym;
2751 int i;
2752
2753 /* We only expect to be called for conversions, unary, and binary ops. */
2754 gcc_assert (nargs == 2 || nargs == 3);
2755
2756 for (i = 0; i < nargs; ++i)
2757 {
2758 rtx this_arg = operands[i];
2759 rtx this_slot;
2760
2761 /* TFmode arguments and return values are passed by reference. */
2762 if (GET_MODE (this_arg) == TFmode)
2763 {
2764 int force_stack_temp;
2765
2766 force_stack_temp = 0;
2767 if (TARGET_BUGGY_QP_LIB && i == 0)
2768 force_stack_temp = 1;
2769
2770 if (GET_CODE (this_arg) == MEM
2771 && ! force_stack_temp)
2772 {
2773 tree expr = MEM_EXPR (this_arg);
2774 if (expr)
2775 mark_addressable (expr);
2776 this_arg = XEXP (this_arg, 0);
2777 }
2778 else if (CONSTANT_P (this_arg)
2779 && ! force_stack_temp)
2780 {
2781 this_slot = force_const_mem (TFmode, this_arg);
2782 this_arg = XEXP (this_slot, 0);
2783 }
2784 else
2785 {
2786 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
2787
2788 /* Operand 0 is the return value. We'll copy it out later. */
2789 if (i > 0)
2790 emit_move_insn (this_slot, this_arg);
2791 else
2792 ret_slot = this_slot;
2793
2794 this_arg = XEXP (this_slot, 0);
2795 }
2796 }
2797
2798 arg[i] = this_arg;
2799 }
2800
2801 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2802
2803 if (GET_MODE (operands[0]) == TFmode)
2804 {
2805 if (nargs == 2)
2806 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2807 arg[0], GET_MODE (arg[0]),
2808 arg[1], GET_MODE (arg[1]));
2809 else
2810 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2811 arg[0], GET_MODE (arg[0]),
2812 arg[1], GET_MODE (arg[1]),
2813 arg[2], GET_MODE (arg[2]));
2814
2815 if (ret_slot)
2816 emit_move_insn (operands[0], ret_slot);
2817 }
2818 else
2819 {
2820 rtx ret;
2821
2822 gcc_assert (nargs == 2);
2823
2824 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2825 GET_MODE (operands[0]), 1,
2826 arg[1], GET_MODE (arg[1]));
2827
2828 if (ret != operands[0])
2829 emit_move_insn (operands[0], ret);
2830 }
2831 }
2832
2833 /* Expand soft-float TFmode calls to sparc abi routines. */
2834
2835 static void
2836 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
2837 {
2838 const char *func;
2839
2840 switch (code)
2841 {
2842 case PLUS:
2843 func = "_Qp_add";
2844 break;
2845 case MINUS:
2846 func = "_Qp_sub";
2847 break;
2848 case MULT:
2849 func = "_Qp_mul";
2850 break;
2851 case DIV:
2852 func = "_Qp_div";
2853 break;
2854 default:
2855 gcc_unreachable ();
2856 }
2857
2858 emit_soft_tfmode_libcall (func, 3, operands);
2859 }
2860
2861 static void
2862 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
2863 {
2864 const char *func;
2865
2866 gcc_assert (code == SQRT);
2867 func = "_Qp_sqrt";
2868
2869 emit_soft_tfmode_libcall (func, 2, operands);
2870 }
2871
2872 static void
2873 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
2874 {
2875 const char *func;
2876
2877 switch (code)
2878 {
2879 case FLOAT_EXTEND:
2880 switch (GET_MODE (operands[1]))
2881 {
2882 case SFmode:
2883 func = "_Qp_stoq";
2884 break;
2885 case DFmode:
2886 func = "_Qp_dtoq";
2887 break;
2888 default:
2889 gcc_unreachable ();
2890 }
2891 break;
2892
2893 case FLOAT_TRUNCATE:
2894 switch (GET_MODE (operands[0]))
2895 {
2896 case SFmode:
2897 func = "_Qp_qtos";
2898 break;
2899 case DFmode:
2900 func = "_Qp_qtod";
2901 break;
2902 default:
2903 gcc_unreachable ();
2904 }
2905 break;
2906
2907 case FLOAT:
2908 switch (GET_MODE (operands[1]))
2909 {
2910 case SImode:
2911 func = "_Qp_itoq";
2912 if (TARGET_ARCH64)
2913 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
2914 break;
2915 case DImode:
2916 func = "_Qp_xtoq";
2917 break;
2918 default:
2919 gcc_unreachable ();
2920 }
2921 break;
2922
2923 case UNSIGNED_FLOAT:
2924 switch (GET_MODE (operands[1]))
2925 {
2926 case SImode:
2927 func = "_Qp_uitoq";
2928 if (TARGET_ARCH64)
2929 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
2930 break;
2931 case DImode:
2932 func = "_Qp_uxtoq";
2933 break;
2934 default:
2935 gcc_unreachable ();
2936 }
2937 break;
2938
2939 case FIX:
2940 switch (GET_MODE (operands[0]))
2941 {
2942 case SImode:
2943 func = "_Qp_qtoi";
2944 break;
2945 case DImode:
2946 func = "_Qp_qtox";
2947 break;
2948 default:
2949 gcc_unreachable ();
2950 }
2951 break;
2952
2953 case UNSIGNED_FIX:
2954 switch (GET_MODE (operands[0]))
2955 {
2956 case SImode:
2957 func = "_Qp_qtoui";
2958 break;
2959 case DImode:
2960 func = "_Qp_qtoux";
2961 break;
2962 default:
2963 gcc_unreachable ();
2964 }
2965 break;
2966
2967 default:
2968 gcc_unreachable ();
2969 }
2970
2971 emit_soft_tfmode_libcall (func, 2, operands);
2972 }
2973
2974 /* Expand a hard-float tfmode operation. All arguments must be in
2975 registers. */
2976
2977 static void
2978 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
2979 {
2980 rtx op, dest;
2981
2982 if (GET_RTX_CLASS (code) == RTX_UNARY)
2983 {
2984 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2985 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
2986 }
2987 else
2988 {
2989 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
2990 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
2991 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2992 operands[1], operands[2]);
2993 }
2994
2995 if (register_operand (operands[0], VOIDmode))
2996 dest = operands[0];
2997 else
2998 dest = gen_reg_rtx (GET_MODE (operands[0]));
2999
3000 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3001
3002 if (dest != operands[0])
3003 emit_move_insn (operands[0], dest);
3004 }
3005
3006 void
3007 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3008 {
3009 if (TARGET_HARD_QUAD)
3010 emit_hard_tfmode_operation (code, operands);
3011 else
3012 emit_soft_tfmode_binop (code, operands);
3013 }
3014
3015 void
3016 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3017 {
3018 if (TARGET_HARD_QUAD)
3019 emit_hard_tfmode_operation (code, operands);
3020 else
3021 emit_soft_tfmode_unop (code, operands);
3022 }
3023
3024 void
3025 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3026 {
3027 if (TARGET_HARD_QUAD)
3028 emit_hard_tfmode_operation (code, operands);
3029 else
3030 emit_soft_tfmode_cvt (code, operands);
3031 }
3032 \f
3033 /* Return nonzero if a branch/jump/call instruction will be emitting
3034 nop into its delay slot. */
3035
3036 int
3037 empty_delay_slot (rtx insn)
3038 {
3039 rtx seq;
3040
3041 /* If no previous instruction (should not happen), return true. */
3042 if (PREV_INSN (insn) == NULL)
3043 return 1;
3044
3045 seq = NEXT_INSN (PREV_INSN (insn));
3046 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3047 return 0;
3048
3049 return 1;
3050 }
3051
3052 /* Return nonzero if we should emit a nop after a cbcond instruction.
3053 The cbcond instruction does not have a delay slot, however there is
3054 a severe performance penalty if a control transfer appears right
3055 after a cbcond. Therefore we emit a nop when we detect this
3056 situation. */
3057
3058 int
3059 emit_cbcond_nop (rtx insn)
3060 {
3061 rtx next = next_active_insn (insn);
3062
3063 if (!next)
3064 return 1;
3065
3066 if (GET_CODE (next) == INSN
3067 && GET_CODE (PATTERN (next)) == SEQUENCE)
3068 next = XVECEXP (PATTERN (next), 0, 0);
3069 else if (GET_CODE (next) == CALL_INSN
3070 && GET_CODE (PATTERN (next)) == PARALLEL)
3071 {
3072 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3073
3074 if (GET_CODE (delay) == RETURN)
3075 {
3076 /* It's a sibling call. Do not emit the nop if we're going
3077 to emit something other than the jump itself as the first
3078 instruction of the sibcall sequence. */
3079 if (sparc_leaf_function_p || TARGET_FLAT)
3080 return 0;
3081 }
3082 }
3083
3084 if (NONJUMP_INSN_P (next))
3085 return 0;
3086
3087 return 1;
3088 }
3089
3090 /* Return nonzero if TRIAL can go into the call delay slot. */
3091
3092 int
3093 tls_call_delay (rtx trial)
3094 {
3095 rtx pat;
3096
3097 /* Binutils allows
3098 call __tls_get_addr, %tgd_call (foo)
3099 add %l7, %o0, %o0, %tgd_add (foo)
3100 while Sun as/ld does not. */
3101 if (TARGET_GNU_TLS || !TARGET_TLS)
3102 return 1;
3103
3104 pat = PATTERN (trial);
3105
3106 /* We must reject tgd_add{32|64}, i.e.
3107 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3108 and tldm_add{32|64}, i.e.
3109 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3110 for Sun as/ld. */
3111 if (GET_CODE (pat) == SET
3112 && GET_CODE (SET_SRC (pat)) == PLUS)
3113 {
3114 rtx unspec = XEXP (SET_SRC (pat), 1);
3115
3116 if (GET_CODE (unspec) == UNSPEC
3117 && (XINT (unspec, 1) == UNSPEC_TLSGD
3118 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3119 return 0;
3120 }
3121
3122 return 1;
3123 }
3124
3125 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3126 instruction. RETURN_P is true if the v9 variant 'return' is to be
3127 considered in the test too.
3128
3129 TRIAL must be a SET whose destination is a REG appropriate for the
3130 'restore' instruction or, if RETURN_P is true, for the 'return'
3131 instruction. */
3132
3133 static int
3134 eligible_for_restore_insn (rtx trial, bool return_p)
3135 {
3136 rtx pat = PATTERN (trial);
3137 rtx src = SET_SRC (pat);
3138 bool src_is_freg = false;
3139 rtx src_reg;
3140
3141 /* Since we now can do moves between float and integer registers when
3142 VIS3 is enabled, we have to catch this case. We can allow such
3143 moves when doing a 'return' however. */
3144 src_reg = src;
3145 if (GET_CODE (src_reg) == SUBREG)
3146 src_reg = SUBREG_REG (src_reg);
3147 if (GET_CODE (src_reg) == REG
3148 && SPARC_FP_REG_P (REGNO (src_reg)))
3149 src_is_freg = true;
3150
3151 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3152 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3153 && arith_operand (src, GET_MODE (src))
3154 && ! src_is_freg)
3155 {
3156 if (TARGET_ARCH64)
3157 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3158 else
3159 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3160 }
3161
3162 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3163 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3164 && arith_double_operand (src, GET_MODE (src))
3165 && ! src_is_freg)
3166 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3167
3168 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3169 else if (! TARGET_FPU && register_operand (src, SFmode))
3170 return 1;
3171
3172 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3173 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3174 return 1;
3175
3176 /* If we have the 'return' instruction, anything that does not use
3177 local or output registers and can go into a delay slot wins. */
3178 else if (return_p
3179 && TARGET_V9
3180 && !epilogue_renumber (&pat, 1)
3181 && get_attr_in_uncond_branch_delay (trial)
3182 == IN_UNCOND_BRANCH_DELAY_TRUE)
3183 return 1;
3184
3185 /* The 'restore src1,src2,dest' pattern for SImode. */
3186 else if (GET_CODE (src) == PLUS
3187 && register_operand (XEXP (src, 0), SImode)
3188 && arith_operand (XEXP (src, 1), SImode))
3189 return 1;
3190
3191 /* The 'restore src1,src2,dest' pattern for DImode. */
3192 else if (GET_CODE (src) == PLUS
3193 && register_operand (XEXP (src, 0), DImode)
3194 && arith_double_operand (XEXP (src, 1), DImode))
3195 return 1;
3196
3197 /* The 'restore src1,%lo(src2),dest' pattern. */
3198 else if (GET_CODE (src) == LO_SUM
3199 && ! TARGET_CM_MEDMID
3200 && ((register_operand (XEXP (src, 0), SImode)
3201 && immediate_operand (XEXP (src, 1), SImode))
3202 || (TARGET_ARCH64
3203 && register_operand (XEXP (src, 0), DImode)
3204 && immediate_operand (XEXP (src, 1), DImode))))
3205 return 1;
3206
3207 /* The 'restore src,src,dest' pattern. */
3208 else if (GET_CODE (src) == ASHIFT
3209 && (register_operand (XEXP (src, 0), SImode)
3210 || register_operand (XEXP (src, 0), DImode))
3211 && XEXP (src, 1) == const1_rtx)
3212 return 1;
3213
3214 return 0;
3215 }
3216
3217 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3218
3219 int
3220 eligible_for_return_delay (rtx trial)
3221 {
3222 int regno;
3223 rtx pat;
3224
3225 if (GET_CODE (trial) != INSN)
3226 return 0;
3227
3228 if (get_attr_length (trial) != 1)
3229 return 0;
3230
3231 /* If the function uses __builtin_eh_return, the eh_return machinery
3232 occupies the delay slot. */
3233 if (crtl->calls_eh_return)
3234 return 0;
3235
3236 /* In the case of a leaf or flat function, anything can go into the slot. */
3237 if (sparc_leaf_function_p || TARGET_FLAT)
3238 return
3239 get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE;
3240
3241 pat = PATTERN (trial);
3242 if (GET_CODE (pat) == PARALLEL)
3243 {
3244 int i;
3245
3246 if (! TARGET_V9)
3247 return 0;
3248 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3249 {
3250 rtx expr = XVECEXP (pat, 0, i);
3251 if (GET_CODE (expr) != SET)
3252 return 0;
3253 if (GET_CODE (SET_DEST (expr)) != REG)
3254 return 0;
3255 regno = REGNO (SET_DEST (expr));
3256 if (regno >= 8 && regno < 24)
3257 return 0;
3258 }
3259 return !epilogue_renumber (&pat, 1)
3260 && (get_attr_in_uncond_branch_delay (trial)
3261 == IN_UNCOND_BRANCH_DELAY_TRUE);
3262 }
3263
3264 if (GET_CODE (pat) != SET)
3265 return 0;
3266
3267 if (GET_CODE (SET_DEST (pat)) != REG)
3268 return 0;
3269
3270 regno = REGNO (SET_DEST (pat));
3271
3272 /* Otherwise, only operations which can be done in tandem with
3273 a `restore' or `return' insn can go into the delay slot. */
3274 if (regno >= 8 && regno < 24)
3275 return 0;
3276
3277 /* If this instruction sets up floating point register and we have a return
3278 instruction, it can probably go in. But restore will not work
3279 with FP_REGS. */
3280 if (! SPARC_INT_REG_P (regno))
3281 return (TARGET_V9
3282 && !epilogue_renumber (&pat, 1)
3283 && get_attr_in_uncond_branch_delay (trial)
3284 == IN_UNCOND_BRANCH_DELAY_TRUE);
3285
3286 return eligible_for_restore_insn (trial, true);
3287 }
3288
3289 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3290
3291 int
3292 eligible_for_sibcall_delay (rtx trial)
3293 {
3294 rtx pat;
3295
3296 if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
3297 return 0;
3298
3299 if (get_attr_length (trial) != 1)
3300 return 0;
3301
3302 pat = PATTERN (trial);
3303
3304 if (sparc_leaf_function_p || TARGET_FLAT)
3305 {
3306 /* If the tail call is done using the call instruction,
3307 we have to restore %o7 in the delay slot. */
3308 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3309 return 0;
3310
3311 /* %g1 is used to build the function address */
3312 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3313 return 0;
3314
3315 return 1;
3316 }
3317
3318 /* Otherwise, only operations which can be done in tandem with
3319 a `restore' insn can go into the delay slot. */
3320 if (GET_CODE (SET_DEST (pat)) != REG
3321 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3322 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3323 return 0;
3324
3325 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3326 in most cases. */
3327 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3328 return 0;
3329
3330 return eligible_for_restore_insn (trial, false);
3331 }
3332 \f
3333 /* Determine if it's legal to put X into the constant pool. This
3334 is not possible if X contains the address of a symbol that is
3335 not constant (TLS) or not known at final link time (PIC). */
3336
3337 static bool
3338 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3339 {
3340 switch (GET_CODE (x))
3341 {
3342 case CONST_INT:
3343 case CONST_DOUBLE:
3344 case CONST_VECTOR:
3345 /* Accept all non-symbolic constants. */
3346 return false;
3347
3348 case LABEL_REF:
3349 /* Labels are OK iff we are non-PIC. */
3350 return flag_pic != 0;
3351
3352 case SYMBOL_REF:
3353 /* 'Naked' TLS symbol references are never OK,
3354 non-TLS symbols are OK iff we are non-PIC. */
3355 if (SYMBOL_REF_TLS_MODEL (x))
3356 return true;
3357 else
3358 return flag_pic != 0;
3359
3360 case CONST:
3361 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3362 case PLUS:
3363 case MINUS:
3364 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3365 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3366 case UNSPEC:
3367 return true;
3368 default:
3369 gcc_unreachable ();
3370 }
3371 }
3372 \f
3373 /* Global Offset Table support. */
3374 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3375 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3376
3377 /* Return the SYMBOL_REF for the Global Offset Table. */
3378
3379 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3380
3381 static rtx
3382 sparc_got (void)
3383 {
3384 if (!sparc_got_symbol)
3385 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3386
3387 return sparc_got_symbol;
3388 }
3389
3390 /* Ensure that we are not using patterns that are not OK with PIC. */
3391
3392 int
3393 check_pic (int i)
3394 {
3395 rtx op;
3396
3397 switch (flag_pic)
3398 {
3399 case 1:
3400 op = recog_data.operand[i];
3401 gcc_assert (GET_CODE (op) != SYMBOL_REF
3402 && (GET_CODE (op) != CONST
3403 || (GET_CODE (XEXP (op, 0)) == MINUS
3404 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3405 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3406 case 2:
3407 default:
3408 return 1;
3409 }
3410 }
3411
3412 /* Return true if X is an address which needs a temporary register when
3413 reloaded while generating PIC code. */
3414
3415 int
3416 pic_address_needs_scratch (rtx x)
3417 {
3418 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3419 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3420 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3421 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3422 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3423 return 1;
3424
3425 return 0;
3426 }
3427
3428 /* Determine if a given RTX is a valid constant. We already know this
3429 satisfies CONSTANT_P. */
3430
3431 static bool
3432 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3433 {
3434 switch (GET_CODE (x))
3435 {
3436 case CONST:
3437 case SYMBOL_REF:
3438 if (sparc_tls_referenced_p (x))
3439 return false;
3440 break;
3441
3442 case CONST_DOUBLE:
3443 if (GET_MODE (x) == VOIDmode)
3444 return true;
3445
3446 /* Floating point constants are generally not ok.
3447 The only exception is 0.0 and all-ones in VIS. */
3448 if (TARGET_VIS
3449 && SCALAR_FLOAT_MODE_P (mode)
3450 && (const_zero_operand (x, mode)
3451 || const_all_ones_operand (x, mode)))
3452 return true;
3453
3454 return false;
3455
3456 case CONST_VECTOR:
3457 /* Vector constants are generally not ok.
3458 The only exception is 0 or -1 in VIS. */
3459 if (TARGET_VIS
3460 && (const_zero_operand (x, mode)
3461 || const_all_ones_operand (x, mode)))
3462 return true;
3463
3464 return false;
3465
3466 default:
3467 break;
3468 }
3469
3470 return true;
3471 }
3472
3473 /* Determine if a given RTX is a valid constant address. */
3474
3475 bool
3476 constant_address_p (rtx x)
3477 {
3478 switch (GET_CODE (x))
3479 {
3480 case LABEL_REF:
3481 case CONST_INT:
3482 case HIGH:
3483 return true;
3484
3485 case CONST:
3486 if (flag_pic && pic_address_needs_scratch (x))
3487 return false;
3488 return sparc_legitimate_constant_p (Pmode, x);
3489
3490 case SYMBOL_REF:
3491 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3492
3493 default:
3494 return false;
3495 }
3496 }
3497
3498 /* Nonzero if the constant value X is a legitimate general operand
3499 when generating PIC code. It is given that flag_pic is on and
3500 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3501
3502 bool
3503 legitimate_pic_operand_p (rtx x)
3504 {
3505 if (pic_address_needs_scratch (x))
3506 return false;
3507 if (sparc_tls_referenced_p (x))
3508 return false;
3509 return true;
3510 }
3511
3512 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3513 (CONST_INT_P (X) \
3514 && INTVAL (X) >= -0x1000 \
3515 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3516
3517 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3518 (CONST_INT_P (X) \
3519 && INTVAL (X) >= -0x1000 \
3520 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3521
3522 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3523
3524 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3525 ordinarily. This changes a bit when generating PIC. */
3526
3527 static bool
3528 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3529 {
3530 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3531
3532 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3533 rs1 = addr;
3534 else if (GET_CODE (addr) == PLUS)
3535 {
3536 rs1 = XEXP (addr, 0);
3537 rs2 = XEXP (addr, 1);
3538
3539 /* Canonicalize. REG comes first, if there are no regs,
3540 LO_SUM comes first. */
3541 if (!REG_P (rs1)
3542 && GET_CODE (rs1) != SUBREG
3543 && (REG_P (rs2)
3544 || GET_CODE (rs2) == SUBREG
3545 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3546 {
3547 rs1 = XEXP (addr, 1);
3548 rs2 = XEXP (addr, 0);
3549 }
3550
3551 if ((flag_pic == 1
3552 && rs1 == pic_offset_table_rtx
3553 && !REG_P (rs2)
3554 && GET_CODE (rs2) != SUBREG
3555 && GET_CODE (rs2) != LO_SUM
3556 && GET_CODE (rs2) != MEM
3557 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3558 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3559 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3560 || ((REG_P (rs1)
3561 || GET_CODE (rs1) == SUBREG)
3562 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3563 {
3564 imm1 = rs2;
3565 rs2 = NULL;
3566 }
3567 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3568 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3569 {
3570 /* We prohibit REG + REG for TFmode when there are no quad move insns
3571 and we consequently need to split. We do this because REG+REG
3572 is not an offsettable address. If we get the situation in reload
3573 where source and destination of a movtf pattern are both MEMs with
3574 REG+REG address, then only one of them gets converted to an
3575 offsettable address. */
3576 if (mode == TFmode
3577 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3578 return 0;
3579
3580 /* Likewise for TImode, but in all cases. */
3581 if (mode == TImode)
3582 return 0;
3583
3584 /* We prohibit REG + REG on ARCH32 if not optimizing for
3585 DFmode/DImode because then mem_min_alignment is likely to be zero
3586 after reload and the forced split would lack a matching splitter
3587 pattern. */
3588 if (TARGET_ARCH32 && !optimize
3589 && (mode == DFmode || mode == DImode))
3590 return 0;
3591 }
3592 else if (USE_AS_OFFSETABLE_LO10
3593 && GET_CODE (rs1) == LO_SUM
3594 && TARGET_ARCH64
3595 && ! TARGET_CM_MEDMID
3596 && RTX_OK_FOR_OLO10_P (rs2, mode))
3597 {
3598 rs2 = NULL;
3599 imm1 = XEXP (rs1, 1);
3600 rs1 = XEXP (rs1, 0);
3601 if (!CONSTANT_P (imm1)
3602 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3603 return 0;
3604 }
3605 }
3606 else if (GET_CODE (addr) == LO_SUM)
3607 {
3608 rs1 = XEXP (addr, 0);
3609 imm1 = XEXP (addr, 1);
3610
3611 if (!CONSTANT_P (imm1)
3612 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3613 return 0;
3614
3615 /* We can't allow TFmode in 32-bit mode, because an offset greater
3616 than the alignment (8) may cause the LO_SUM to overflow. */
3617 if (mode == TFmode && TARGET_ARCH32)
3618 return 0;
3619 }
3620 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3621 return 1;
3622 else
3623 return 0;
3624
3625 if (GET_CODE (rs1) == SUBREG)
3626 rs1 = SUBREG_REG (rs1);
3627 if (!REG_P (rs1))
3628 return 0;
3629
3630 if (rs2)
3631 {
3632 if (GET_CODE (rs2) == SUBREG)
3633 rs2 = SUBREG_REG (rs2);
3634 if (!REG_P (rs2))
3635 return 0;
3636 }
3637
3638 if (strict)
3639 {
3640 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3641 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3642 return 0;
3643 }
3644 else
3645 {
3646 if ((! SPARC_INT_REG_P (REGNO (rs1))
3647 && REGNO (rs1) != FRAME_POINTER_REGNUM
3648 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3649 || (rs2
3650 && (! SPARC_INT_REG_P (REGNO (rs2))
3651 && REGNO (rs2) != FRAME_POINTER_REGNUM
3652 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3653 return 0;
3654 }
3655 return 1;
3656 }
3657
3658 /* Return the SYMBOL_REF for the tls_get_addr function. */
3659
3660 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3661
3662 static rtx
3663 sparc_tls_get_addr (void)
3664 {
3665 if (!sparc_tls_symbol)
3666 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3667
3668 return sparc_tls_symbol;
3669 }
3670
3671 /* Return the Global Offset Table to be used in TLS mode. */
3672
3673 static rtx
3674 sparc_tls_got (void)
3675 {
3676 /* In PIC mode, this is just the PIC offset table. */
3677 if (flag_pic)
3678 {
3679 crtl->uses_pic_offset_table = 1;
3680 return pic_offset_table_rtx;
3681 }
3682
3683 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
3684 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
3685 if (TARGET_SUN_TLS && TARGET_ARCH32)
3686 {
3687 load_got_register ();
3688 return global_offset_table_rtx;
3689 }
3690
3691 /* In all other cases, we load a new pseudo with the GOT symbol. */
3692 return copy_to_reg (sparc_got ());
3693 }
3694
3695 /* Return true if X contains a thread-local symbol. */
3696
3697 static bool
3698 sparc_tls_referenced_p (rtx x)
3699 {
3700 if (!TARGET_HAVE_TLS)
3701 return false;
3702
3703 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3704 x = XEXP (XEXP (x, 0), 0);
3705
3706 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3707 return true;
3708
3709 /* That's all we handle in sparc_legitimize_tls_address for now. */
3710 return false;
3711 }
3712
3713 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3714 this (thread-local) address. */
3715
3716 static rtx
3717 sparc_legitimize_tls_address (rtx addr)
3718 {
3719 rtx temp1, temp2, temp3, ret, o0, got, insn;
3720
3721 gcc_assert (can_create_pseudo_p ());
3722
3723 if (GET_CODE (addr) == SYMBOL_REF)
3724 switch (SYMBOL_REF_TLS_MODEL (addr))
3725 {
3726 case TLS_MODEL_GLOBAL_DYNAMIC:
3727 start_sequence ();
3728 temp1 = gen_reg_rtx (SImode);
3729 temp2 = gen_reg_rtx (SImode);
3730 ret = gen_reg_rtx (Pmode);
3731 o0 = gen_rtx_REG (Pmode, 8);
3732 got = sparc_tls_got ();
3733 emit_insn (gen_tgd_hi22 (temp1, addr));
3734 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3735 if (TARGET_ARCH32)
3736 {
3737 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3738 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3739 addr, const1_rtx));
3740 }
3741 else
3742 {
3743 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3744 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3745 addr, const1_rtx));
3746 }
3747 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
3748 insn = get_insns ();
3749 end_sequence ();
3750 emit_libcall_block (insn, ret, o0, addr);
3751 break;
3752
3753 case TLS_MODEL_LOCAL_DYNAMIC:
3754 start_sequence ();
3755 temp1 = gen_reg_rtx (SImode);
3756 temp2 = gen_reg_rtx (SImode);
3757 temp3 = gen_reg_rtx (Pmode);
3758 ret = gen_reg_rtx (Pmode);
3759 o0 = gen_rtx_REG (Pmode, 8);
3760 got = sparc_tls_got ();
3761 emit_insn (gen_tldm_hi22 (temp1));
3762 emit_insn (gen_tldm_lo10 (temp2, temp1));
3763 if (TARGET_ARCH32)
3764 {
3765 emit_insn (gen_tldm_add32 (o0, got, temp2));
3766 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
3767 const1_rtx));
3768 }
3769 else
3770 {
3771 emit_insn (gen_tldm_add64 (o0, got, temp2));
3772 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
3773 const1_rtx));
3774 }
3775 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
3776 insn = get_insns ();
3777 end_sequence ();
3778 emit_libcall_block (insn, temp3, o0,
3779 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
3780 UNSPEC_TLSLD_BASE));
3781 temp1 = gen_reg_rtx (SImode);
3782 temp2 = gen_reg_rtx (SImode);
3783 emit_insn (gen_tldo_hix22 (temp1, addr));
3784 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
3785 if (TARGET_ARCH32)
3786 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
3787 else
3788 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
3789 break;
3790
3791 case TLS_MODEL_INITIAL_EXEC:
3792 temp1 = gen_reg_rtx (SImode);
3793 temp2 = gen_reg_rtx (SImode);
3794 temp3 = gen_reg_rtx (Pmode);
3795 got = sparc_tls_got ();
3796 emit_insn (gen_tie_hi22 (temp1, addr));
3797 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
3798 if (TARGET_ARCH32)
3799 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
3800 else
3801 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
3802 if (TARGET_SUN_TLS)
3803 {
3804 ret = gen_reg_rtx (Pmode);
3805 if (TARGET_ARCH32)
3806 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
3807 temp3, addr));
3808 else
3809 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
3810 temp3, addr));
3811 }
3812 else
3813 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
3814 break;
3815
3816 case TLS_MODEL_LOCAL_EXEC:
3817 temp1 = gen_reg_rtx (Pmode);
3818 temp2 = gen_reg_rtx (Pmode);
3819 if (TARGET_ARCH32)
3820 {
3821 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
3822 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
3823 }
3824 else
3825 {
3826 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
3827 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
3828 }
3829 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
3830 break;
3831
3832 default:
3833 gcc_unreachable ();
3834 }
3835
3836 else if (GET_CODE (addr) == CONST)
3837 {
3838 rtx base, offset;
3839
3840 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
3841
3842 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
3843 offset = XEXP (XEXP (addr, 0), 1);
3844
3845 base = force_operand (base, NULL_RTX);
3846 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
3847 offset = force_reg (Pmode, offset);
3848 ret = gen_rtx_PLUS (Pmode, base, offset);
3849 }
3850
3851 else
3852 gcc_unreachable (); /* for now ... */
3853
3854 return ret;
3855 }
3856
3857 /* Legitimize PIC addresses. If the address is already position-independent,
3858 we return ORIG. Newly generated position-independent addresses go into a
3859 reg. This is REG if nonzero, otherwise we allocate register(s) as
3860 necessary. */
3861
3862 static rtx
3863 sparc_legitimize_pic_address (rtx orig, rtx reg)
3864 {
3865 bool gotdata_op = false;
3866
3867 if (GET_CODE (orig) == SYMBOL_REF
3868 /* See the comment in sparc_expand_move. */
3869 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
3870 {
3871 rtx pic_ref, address;
3872 rtx insn;
3873
3874 if (reg == 0)
3875 {
3876 gcc_assert (can_create_pseudo_p ());
3877 reg = gen_reg_rtx (Pmode);
3878 }
3879
3880 if (flag_pic == 2)
3881 {
3882 /* If not during reload, allocate another temp reg here for loading
3883 in the address, so that these instructions can be optimized
3884 properly. */
3885 rtx temp_reg = (! can_create_pseudo_p ()
3886 ? reg : gen_reg_rtx (Pmode));
3887
3888 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
3889 won't get confused into thinking that these two instructions
3890 are loading in the true address of the symbol. If in the
3891 future a PIC rtx exists, that should be used instead. */
3892 if (TARGET_ARCH64)
3893 {
3894 emit_insn (gen_movdi_high_pic (temp_reg, orig));
3895 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
3896 }
3897 else
3898 {
3899 emit_insn (gen_movsi_high_pic (temp_reg, orig));
3900 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
3901 }
3902 address = temp_reg;
3903 gotdata_op = true;
3904 }
3905 else
3906 address = orig;
3907
3908 crtl->uses_pic_offset_table = 1;
3909 if (gotdata_op)
3910 {
3911 if (TARGET_ARCH64)
3912 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
3913 pic_offset_table_rtx,
3914 address, orig));
3915 else
3916 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
3917 pic_offset_table_rtx,
3918 address, orig));
3919 }
3920 else
3921 {
3922 pic_ref
3923 = gen_const_mem (Pmode,
3924 gen_rtx_PLUS (Pmode,
3925 pic_offset_table_rtx, address));
3926 insn = emit_move_insn (reg, pic_ref);
3927 }
3928
3929 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3930 by loop. */
3931 set_unique_reg_note (insn, REG_EQUAL, orig);
3932 return reg;
3933 }
3934 else if (GET_CODE (orig) == CONST)
3935 {
3936 rtx base, offset;
3937
3938 if (GET_CODE (XEXP (orig, 0)) == PLUS
3939 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3940 return orig;
3941
3942 if (reg == 0)
3943 {
3944 gcc_assert (can_create_pseudo_p ());
3945 reg = gen_reg_rtx (Pmode);
3946 }
3947
3948 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3949 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
3950 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
3951 base == reg ? NULL_RTX : reg);
3952
3953 if (GET_CODE (offset) == CONST_INT)
3954 {
3955 if (SMALL_INT (offset))
3956 return plus_constant (Pmode, base, INTVAL (offset));
3957 else if (can_create_pseudo_p ())
3958 offset = force_reg (Pmode, offset);
3959 else
3960 /* If we reach here, then something is seriously wrong. */
3961 gcc_unreachable ();
3962 }
3963 return gen_rtx_PLUS (Pmode, base, offset);
3964 }
3965 else if (GET_CODE (orig) == LABEL_REF)
3966 /* ??? We ought to be checking that the register is live instead, in case
3967 it is eliminated. */
3968 crtl->uses_pic_offset_table = 1;
3969
3970 return orig;
3971 }
3972
3973 /* Try machine-dependent ways of modifying an illegitimate address X
3974 to be legitimate. If we find one, return the new, valid address.
3975
3976 OLDX is the address as it was before break_out_memory_refs was called.
3977 In some cases it is useful to look at this to decide what needs to be done.
3978
3979 MODE is the mode of the operand pointed to by X.
3980
3981 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
3982
3983 static rtx
3984 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3985 enum machine_mode mode)
3986 {
3987 rtx orig_x = x;
3988
3989 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
3990 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
3991 force_operand (XEXP (x, 0), NULL_RTX));
3992 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
3993 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
3994 force_operand (XEXP (x, 1), NULL_RTX));
3995 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
3996 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
3997 XEXP (x, 1));
3998 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
3999 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4000 force_operand (XEXP (x, 1), NULL_RTX));
4001
4002 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4003 return x;
4004
4005 if (sparc_tls_referenced_p (x))
4006 x = sparc_legitimize_tls_address (x);
4007 else if (flag_pic)
4008 x = sparc_legitimize_pic_address (x, NULL_RTX);
4009 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4010 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4011 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4012 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4013 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4014 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4015 else if (GET_CODE (x) == SYMBOL_REF
4016 || GET_CODE (x) == CONST
4017 || GET_CODE (x) == LABEL_REF)
4018 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4019
4020 return x;
4021 }
4022
4023 /* Delegitimize an address that was legitimized by the above function. */
4024
4025 static rtx
4026 sparc_delegitimize_address (rtx x)
4027 {
4028 x = delegitimize_mem_from_attrs (x);
4029
4030 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4031 switch (XINT (XEXP (x, 1), 1))
4032 {
4033 case UNSPEC_MOVE_PIC:
4034 case UNSPEC_TLSLE:
4035 x = XVECEXP (XEXP (x, 1), 0, 0);
4036 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4037 break;
4038 default:
4039 break;
4040 }
4041
4042 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4043 if (GET_CODE (x) == MINUS
4044 && REG_P (XEXP (x, 0))
4045 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4046 && GET_CODE (XEXP (x, 1)) == LO_SUM
4047 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4048 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4049 {
4050 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4051 gcc_assert (GET_CODE (x) == LABEL_REF);
4052 }
4053
4054 return x;
4055 }
4056
4057 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4058 replace the input X, or the original X if no replacement is called for.
4059 The output parameter *WIN is 1 if the calling macro should goto WIN,
4060 0 if it should not.
4061
4062 For SPARC, we wish to handle addresses by splitting them into
4063 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4064 This cuts the number of extra insns by one.
4065
4066 Do nothing when generating PIC code and the address is a symbolic
4067 operand or requires a scratch register. */
4068
4069 rtx
4070 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4071 int opnum, int type,
4072 int ind_levels ATTRIBUTE_UNUSED, int *win)
4073 {
4074 /* Decompose SImode constants into HIGH+LO_SUM. */
4075 if (CONSTANT_P (x)
4076 && (mode != TFmode || TARGET_ARCH64)
4077 && GET_MODE (x) == SImode
4078 && GET_CODE (x) != LO_SUM
4079 && GET_CODE (x) != HIGH
4080 && sparc_cmodel <= CM_MEDLOW
4081 && !(flag_pic
4082 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4083 {
4084 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4085 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4086 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4087 opnum, (enum reload_type)type);
4088 *win = 1;
4089 return x;
4090 }
4091
4092 /* We have to recognize what we have already generated above. */
4093 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4094 {
4095 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4096 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4097 opnum, (enum reload_type)type);
4098 *win = 1;
4099 return x;
4100 }
4101
4102 *win = 0;
4103 return x;
4104 }
4105
4106 /* Return true if ADDR (a legitimate address expression)
4107 has an effect that depends on the machine mode it is used for.
4108
4109 In PIC mode,
4110
4111 (mem:HI [%l7+a])
4112
4113 is not equivalent to
4114
4115 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4116
4117 because [%l7+a+1] is interpreted as the address of (a+1). */
4118
4119
4120 static bool
4121 sparc_mode_dependent_address_p (const_rtx addr,
4122 addr_space_t as ATTRIBUTE_UNUSED)
4123 {
4124 if (flag_pic && GET_CODE (addr) == PLUS)
4125 {
4126 rtx op0 = XEXP (addr, 0);
4127 rtx op1 = XEXP (addr, 1);
4128 if (op0 == pic_offset_table_rtx
4129 && symbolic_operand (op1, VOIDmode))
4130 return true;
4131 }
4132
4133 return false;
4134 }
4135
4136 #ifdef HAVE_GAS_HIDDEN
4137 # define USE_HIDDEN_LINKONCE 1
4138 #else
4139 # define USE_HIDDEN_LINKONCE 0
4140 #endif
4141
4142 static void
4143 get_pc_thunk_name (char name[32], unsigned int regno)
4144 {
4145 const char *reg_name = reg_names[regno];
4146
4147 /* Skip the leading '%' as that cannot be used in a
4148 symbol name. */
4149 reg_name += 1;
4150
4151 if (USE_HIDDEN_LINKONCE)
4152 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4153 else
4154 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4155 }
4156
4157 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4158
4159 static rtx
4160 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4161 {
4162 int orig_flag_pic = flag_pic;
4163 rtx insn;
4164
4165 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4166 flag_pic = 0;
4167 if (TARGET_ARCH64)
4168 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4169 else
4170 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4171 flag_pic = orig_flag_pic;
4172
4173 return insn;
4174 }
4175
4176 /* Emit code to load the GOT register. */
4177
4178 void
4179 load_got_register (void)
4180 {
4181 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4182 if (!global_offset_table_rtx)
4183 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4184
4185 if (TARGET_VXWORKS_RTP)
4186 emit_insn (gen_vxworks_load_got ());
4187 else
4188 {
4189 /* The GOT symbol is subject to a PC-relative relocation so we need a
4190 helper function to add the PC value and thus get the final value. */
4191 if (!got_helper_rtx)
4192 {
4193 char name[32];
4194 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4195 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4196 }
4197
4198 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4199 got_helper_rtx,
4200 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4201 }
4202
4203 /* Need to emit this whether or not we obey regdecls,
4204 since setjmp/longjmp can cause life info to screw up.
4205 ??? In the case where we don't obey regdecls, this is not sufficient
4206 since we may not fall out the bottom. */
4207 emit_use (global_offset_table_rtx);
4208 }
4209
4210 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4211 address of the call target. */
4212
4213 void
4214 sparc_emit_call_insn (rtx pat, rtx addr)
4215 {
4216 rtx insn;
4217
4218 insn = emit_call_insn (pat);
4219
4220 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4221 if (TARGET_VXWORKS_RTP
4222 && flag_pic
4223 && GET_CODE (addr) == SYMBOL_REF
4224 && (SYMBOL_REF_DECL (addr)
4225 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4226 : !SYMBOL_REF_LOCAL_P (addr)))
4227 {
4228 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4229 crtl->uses_pic_offset_table = 1;
4230 }
4231 }
4232 \f
4233 /* Return 1 if RTX is a MEM which is known to be aligned to at
4234 least a DESIRED byte boundary. */
4235
4236 int
4237 mem_min_alignment (rtx mem, int desired)
4238 {
4239 rtx addr, base, offset;
4240
4241 /* If it's not a MEM we can't accept it. */
4242 if (GET_CODE (mem) != MEM)
4243 return 0;
4244
4245 /* Obviously... */
4246 if (!TARGET_UNALIGNED_DOUBLES
4247 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4248 return 1;
4249
4250 /* ??? The rest of the function predates MEM_ALIGN so
4251 there is probably a bit of redundancy. */
4252 addr = XEXP (mem, 0);
4253 base = offset = NULL_RTX;
4254 if (GET_CODE (addr) == PLUS)
4255 {
4256 if (GET_CODE (XEXP (addr, 0)) == REG)
4257 {
4258 base = XEXP (addr, 0);
4259
4260 /* What we are saying here is that if the base
4261 REG is aligned properly, the compiler will make
4262 sure any REG based index upon it will be so
4263 as well. */
4264 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4265 offset = XEXP (addr, 1);
4266 else
4267 offset = const0_rtx;
4268 }
4269 }
4270 else if (GET_CODE (addr) == REG)
4271 {
4272 base = addr;
4273 offset = const0_rtx;
4274 }
4275
4276 if (base != NULL_RTX)
4277 {
4278 int regno = REGNO (base);
4279
4280 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4281 {
4282 /* Check if the compiler has recorded some information
4283 about the alignment of the base REG. If reload has
4284 completed, we already matched with proper alignments.
4285 If not running global_alloc, reload might give us
4286 unaligned pointer to local stack though. */
4287 if (((cfun != 0
4288 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4289 || (optimize && reload_completed))
4290 && (INTVAL (offset) & (desired - 1)) == 0)
4291 return 1;
4292 }
4293 else
4294 {
4295 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4296 return 1;
4297 }
4298 }
4299 else if (! TARGET_UNALIGNED_DOUBLES
4300 || CONSTANT_P (addr)
4301 || GET_CODE (addr) == LO_SUM)
4302 {
4303 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4304 is true, in which case we can only assume that an access is aligned if
4305 it is to a constant address, or the address involves a LO_SUM. */
4306 return 1;
4307 }
4308
4309 /* An obviously unaligned address. */
4310 return 0;
4311 }
4312
4313 \f
4314 /* Vectors to keep interesting information about registers where it can easily
4315 be got. We used to use the actual mode value as the bit number, but there
4316 are more than 32 modes now. Instead we use two tables: one indexed by
4317 hard register number, and one indexed by mode. */
4318
4319 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4320 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4321 mapped into one sparc_mode_class mode. */
4322
4323 enum sparc_mode_class {
4324 S_MODE, D_MODE, T_MODE, O_MODE,
4325 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4326 CC_MODE, CCFP_MODE
4327 };
4328
4329 /* Modes for single-word and smaller quantities. */
4330 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4331
4332 /* Modes for double-word and smaller quantities. */
4333 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4334
4335 /* Modes for quad-word and smaller quantities. */
4336 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4337
4338 /* Modes for 8-word and smaller quantities. */
4339 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4340
4341 /* Modes for single-float quantities. We must allow any single word or
4342 smaller quantity. This is because the fix/float conversion instructions
4343 take integer inputs/outputs from the float registers. */
4344 #define SF_MODES (S_MODES)
4345
4346 /* Modes for double-float and smaller quantities. */
4347 #define DF_MODES (D_MODES)
4348
4349 /* Modes for quad-float and smaller quantities. */
4350 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4351
4352 /* Modes for quad-float pairs and smaller quantities. */
4353 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4354
4355 /* Modes for double-float only quantities. */
4356 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4357
4358 /* Modes for quad-float and double-float only quantities. */
4359 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4360
4361 /* Modes for quad-float pairs and double-float only quantities. */
4362 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4363
4364 /* Modes for condition codes. */
4365 #define CC_MODES (1 << (int) CC_MODE)
4366 #define CCFP_MODES (1 << (int) CCFP_MODE)
4367
4368 /* Value is 1 if register/mode pair is acceptable on sparc.
4369 The funny mixture of D and T modes is because integer operations
4370 do not specially operate on tetra quantities, so non-quad-aligned
4371 registers can hold quadword quantities (except %o4 and %i4 because
4372 they cross fixed registers). */
4373
4374 /* This points to either the 32 bit or the 64 bit version. */
4375 const int *hard_regno_mode_classes;
4376
4377 static const int hard_32bit_mode_classes[] = {
4378 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4379 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4380 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4381 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4382
4383 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4384 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4385 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4386 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4387
4388 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4389 and none can hold SFmode/SImode values. */
4390 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4391 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4392 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4393 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4394
4395 /* %fcc[0123] */
4396 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4397
4398 /* %icc, %sfp, %gsr */
4399 CC_MODES, 0, D_MODES
4400 };
4401
4402 static const int hard_64bit_mode_classes[] = {
4403 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4404 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4405 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4406 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4407
4408 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4409 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4410 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4411 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4412
4413 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4414 and none can hold SFmode/SImode values. */
4415 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4416 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4417 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4418 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4419
4420 /* %fcc[0123] */
4421 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4422
4423 /* %icc, %sfp, %gsr */
4424 CC_MODES, 0, D_MODES
4425 };
4426
4427 int sparc_mode_class [NUM_MACHINE_MODES];
4428
4429 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4430
4431 static void
4432 sparc_init_modes (void)
4433 {
4434 int i;
4435
4436 for (i = 0; i < NUM_MACHINE_MODES; i++)
4437 {
4438 switch (GET_MODE_CLASS (i))
4439 {
4440 case MODE_INT:
4441 case MODE_PARTIAL_INT:
4442 case MODE_COMPLEX_INT:
4443 if (GET_MODE_SIZE (i) <= 4)
4444 sparc_mode_class[i] = 1 << (int) S_MODE;
4445 else if (GET_MODE_SIZE (i) == 8)
4446 sparc_mode_class[i] = 1 << (int) D_MODE;
4447 else if (GET_MODE_SIZE (i) == 16)
4448 sparc_mode_class[i] = 1 << (int) T_MODE;
4449 else if (GET_MODE_SIZE (i) == 32)
4450 sparc_mode_class[i] = 1 << (int) O_MODE;
4451 else
4452 sparc_mode_class[i] = 0;
4453 break;
4454 case MODE_VECTOR_INT:
4455 if (GET_MODE_SIZE (i) <= 4)
4456 sparc_mode_class[i] = 1 << (int)SF_MODE;
4457 else if (GET_MODE_SIZE (i) == 8)
4458 sparc_mode_class[i] = 1 << (int)DF_MODE;
4459 break;
4460 case MODE_FLOAT:
4461 case MODE_COMPLEX_FLOAT:
4462 if (GET_MODE_SIZE (i) <= 4)
4463 sparc_mode_class[i] = 1 << (int) SF_MODE;
4464 else if (GET_MODE_SIZE (i) == 8)
4465 sparc_mode_class[i] = 1 << (int) DF_MODE;
4466 else if (GET_MODE_SIZE (i) == 16)
4467 sparc_mode_class[i] = 1 << (int) TF_MODE;
4468 else if (GET_MODE_SIZE (i) == 32)
4469 sparc_mode_class[i] = 1 << (int) OF_MODE;
4470 else
4471 sparc_mode_class[i] = 0;
4472 break;
4473 case MODE_CC:
4474 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4475 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4476 else
4477 sparc_mode_class[i] = 1 << (int) CC_MODE;
4478 break;
4479 default:
4480 sparc_mode_class[i] = 0;
4481 break;
4482 }
4483 }
4484
4485 if (TARGET_ARCH64)
4486 hard_regno_mode_classes = hard_64bit_mode_classes;
4487 else
4488 hard_regno_mode_classes = hard_32bit_mode_classes;
4489
4490 /* Initialize the array used by REGNO_REG_CLASS. */
4491 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4492 {
4493 if (i < 16 && TARGET_V8PLUS)
4494 sparc_regno_reg_class[i] = I64_REGS;
4495 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4496 sparc_regno_reg_class[i] = GENERAL_REGS;
4497 else if (i < 64)
4498 sparc_regno_reg_class[i] = FP_REGS;
4499 else if (i < 96)
4500 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4501 else if (i < 100)
4502 sparc_regno_reg_class[i] = FPCC_REGS;
4503 else
4504 sparc_regno_reg_class[i] = NO_REGS;
4505 }
4506 }
4507 \f
4508 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4509
4510 static inline bool
4511 save_global_or_fp_reg_p (unsigned int regno,
4512 int leaf_function ATTRIBUTE_UNUSED)
4513 {
4514 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4515 }
4516
4517 /* Return whether the return address register (%i7) is needed. */
4518
4519 static inline bool
4520 return_addr_reg_needed_p (int leaf_function)
4521 {
4522 /* If it is live, for example because of __builtin_return_address (0). */
4523 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4524 return true;
4525
4526 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4527 if (!leaf_function
4528 /* Loading the GOT register clobbers %o7. */
4529 || crtl->uses_pic_offset_table
4530 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4531 return true;
4532
4533 return false;
4534 }
4535
4536 /* Return whether REGNO, a local or in register, must be saved/restored. */
4537
4538 static bool
4539 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4540 {
4541 /* General case: call-saved registers live at some point. */
4542 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4543 return true;
4544
4545 /* Frame pointer register (%fp) if needed. */
4546 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4547 return true;
4548
4549 /* Return address register (%i7) if needed. */
4550 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4551 return true;
4552
4553 /* GOT register (%l7) if needed. */
4554 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4555 return true;
4556
4557 /* If the function accesses prior frames, the frame pointer and the return
4558 address of the previous frame must be saved on the stack. */
4559 if (crtl->accesses_prior_frames
4560 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4561 return true;
4562
4563 return false;
4564 }
4565
4566 /* Compute the frame size required by the function. This function is called
4567 during the reload pass and also by sparc_expand_prologue. */
4568
4569 HOST_WIDE_INT
4570 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4571 {
4572 HOST_WIDE_INT frame_size, apparent_frame_size;
4573 int args_size, n_global_fp_regs = 0;
4574 bool save_local_in_regs_p = false;
4575 unsigned int i;
4576
4577 /* If the function allocates dynamic stack space, the dynamic offset is
4578 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4579 if (leaf_function && !cfun->calls_alloca)
4580 args_size = 0;
4581 else
4582 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4583
4584 /* Calculate space needed for global registers. */
4585 if (TARGET_ARCH64)
4586 for (i = 0; i < 8; i++)
4587 if (save_global_or_fp_reg_p (i, 0))
4588 n_global_fp_regs += 2;
4589 else
4590 for (i = 0; i < 8; i += 2)
4591 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4592 n_global_fp_regs += 2;
4593
4594 /* In the flat window model, find out which local and in registers need to
4595 be saved. We don't reserve space in the current frame for them as they
4596 will be spilled into the register window save area of the caller's frame.
4597 However, as soon as we use this register window save area, we must create
4598 that of the current frame to make it the live one. */
4599 if (TARGET_FLAT)
4600 for (i = 16; i < 32; i++)
4601 if (save_local_or_in_reg_p (i, leaf_function))
4602 {
4603 save_local_in_regs_p = true;
4604 break;
4605 }
4606
4607 /* Calculate space needed for FP registers. */
4608 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4609 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4610 n_global_fp_regs += 2;
4611
4612 if (size == 0
4613 && n_global_fp_regs == 0
4614 && args_size == 0
4615 && !save_local_in_regs_p)
4616 frame_size = apparent_frame_size = 0;
4617 else
4618 {
4619 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4620 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
4621 apparent_frame_size += n_global_fp_regs * 4;
4622
4623 /* We need to add the size of the outgoing argument area. */
4624 frame_size = apparent_frame_size + ((args_size + 7) & -8);
4625
4626 /* And that of the register window save area. */
4627 frame_size += FIRST_PARM_OFFSET (cfun->decl);
4628
4629 /* Finally, bump to the appropriate alignment. */
4630 frame_size = SPARC_STACK_ALIGN (frame_size);
4631 }
4632
4633 /* Set up values for use in prologue and epilogue. */
4634 sparc_frame_size = frame_size;
4635 sparc_apparent_frame_size = apparent_frame_size;
4636 sparc_n_global_fp_regs = n_global_fp_regs;
4637 sparc_save_local_in_regs_p = save_local_in_regs_p;
4638
4639 return frame_size;
4640 }
4641
4642 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
4643
4644 int
4645 sparc_initial_elimination_offset (int to)
4646 {
4647 int offset;
4648
4649 if (to == STACK_POINTER_REGNUM)
4650 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
4651 else
4652 offset = 0;
4653
4654 offset += SPARC_STACK_BIAS;
4655 return offset;
4656 }
4657
4658 /* Output any necessary .register pseudo-ops. */
4659
4660 void
4661 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4662 {
4663 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
4664 int i;
4665
4666 if (TARGET_ARCH32)
4667 return;
4668
4669 /* Check if %g[2367] were used without
4670 .register being printed for them already. */
4671 for (i = 2; i < 8; i++)
4672 {
4673 if (df_regs_ever_live_p (i)
4674 && ! sparc_hard_reg_printed [i])
4675 {
4676 sparc_hard_reg_printed [i] = 1;
4677 /* %g7 is used as TLS base register, use #ignore
4678 for it instead of #scratch. */
4679 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4680 i == 7 ? "ignore" : "scratch");
4681 }
4682 if (i == 3) i = 5;
4683 }
4684 #endif
4685 }
4686
4687 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
4688
4689 #if PROBE_INTERVAL > 4096
4690 #error Cannot use indexed addressing mode for stack probing
4691 #endif
4692
4693 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
4694 inclusive. These are offsets from the current stack pointer.
4695
4696 Note that we don't use the REG+REG addressing mode for the probes because
4697 of the stack bias in 64-bit mode. And it doesn't really buy us anything
4698 so the advantages of having a single code win here. */
4699
4700 static void
4701 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
4702 {
4703 rtx g1 = gen_rtx_REG (Pmode, 1);
4704
4705 /* See if we have a constant small number of probes to generate. If so,
4706 that's the easy case. */
4707 if (size <= PROBE_INTERVAL)
4708 {
4709 emit_move_insn (g1, GEN_INT (first));
4710 emit_insn (gen_rtx_SET (VOIDmode, g1,
4711 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4712 emit_stack_probe (plus_constant (Pmode, g1, -size));
4713 }
4714
4715 /* The run-time loop is made up of 10 insns in the generic case while the
4716 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
4717 else if (size <= 5 * PROBE_INTERVAL)
4718 {
4719 HOST_WIDE_INT i;
4720
4721 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
4722 emit_insn (gen_rtx_SET (VOIDmode, g1,
4723 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4724 emit_stack_probe (g1);
4725
4726 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
4727 it exceeds SIZE. If only two probes are needed, this will not
4728 generate any code. Then probe at FIRST + SIZE. */
4729 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
4730 {
4731 emit_insn (gen_rtx_SET (VOIDmode, g1,
4732 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
4733 emit_stack_probe (g1);
4734 }
4735
4736 emit_stack_probe (plus_constant (Pmode, g1,
4737 (i - PROBE_INTERVAL) - size));
4738 }
4739
4740 /* Otherwise, do the same as above, but in a loop. Note that we must be
4741 extra careful with variables wrapping around because we might be at
4742 the very top (or the very bottom) of the address space and we have
4743 to be able to handle this case properly; in particular, we use an
4744 equality test for the loop condition. */
4745 else
4746 {
4747 HOST_WIDE_INT rounded_size;
4748 rtx g4 = gen_rtx_REG (Pmode, 4);
4749
4750 emit_move_insn (g1, GEN_INT (first));
4751
4752
4753 /* Step 1: round SIZE to the previous multiple of the interval. */
4754
4755 rounded_size = size & -PROBE_INTERVAL;
4756 emit_move_insn (g4, GEN_INT (rounded_size));
4757
4758
4759 /* Step 2: compute initial and final value of the loop counter. */
4760
4761 /* TEST_ADDR = SP + FIRST. */
4762 emit_insn (gen_rtx_SET (VOIDmode, g1,
4763 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4764
4765 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
4766 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
4767
4768
4769 /* Step 3: the loop
4770
4771 while (TEST_ADDR != LAST_ADDR)
4772 {
4773 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
4774 probe at TEST_ADDR
4775 }
4776
4777 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
4778 until it is equal to ROUNDED_SIZE. */
4779
4780 if (TARGET_ARCH64)
4781 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
4782 else
4783 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
4784
4785
4786 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
4787 that SIZE is equal to ROUNDED_SIZE. */
4788
4789 if (size != rounded_size)
4790 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
4791 }
4792
4793 /* Make sure nothing is scheduled before we are done. */
4794 emit_insn (gen_blockage ());
4795 }
4796
4797 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
4798 absolute addresses. */
4799
4800 const char *
4801 output_probe_stack_range (rtx reg1, rtx reg2)
4802 {
4803 static int labelno = 0;
4804 char loop_lab[32], end_lab[32];
4805 rtx xops[2];
4806
4807 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
4808 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
4809
4810 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
4811
4812 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
4813 xops[0] = reg1;
4814 xops[1] = reg2;
4815 output_asm_insn ("cmp\t%0, %1", xops);
4816 if (TARGET_ARCH64)
4817 fputs ("\tbe,pn\t%xcc,", asm_out_file);
4818 else
4819 fputs ("\tbe\t", asm_out_file);
4820 assemble_name_raw (asm_out_file, end_lab);
4821 fputc ('\n', asm_out_file);
4822
4823 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
4824 xops[1] = GEN_INT (-PROBE_INTERVAL);
4825 output_asm_insn (" add\t%0, %1, %0", xops);
4826
4827 /* Probe at TEST_ADDR and branch. */
4828 if (TARGET_ARCH64)
4829 fputs ("\tba,pt\t%xcc,", asm_out_file);
4830 else
4831 fputs ("\tba\t", asm_out_file);
4832 assemble_name_raw (asm_out_file, loop_lab);
4833 fputc ('\n', asm_out_file);
4834 xops[1] = GEN_INT (SPARC_STACK_BIAS);
4835 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
4836
4837 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
4838
4839 return "";
4840 }
4841
4842 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
4843 needed. LOW is supposed to be double-word aligned for 32-bit registers.
4844 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
4845 is the action to be performed if SAVE_P returns true and ACTION_FALSE
4846 the action to be performed if it returns false. Return the new offset. */
4847
4848 typedef bool (*sorr_pred_t) (unsigned int, int);
4849 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
4850
4851 static int
4852 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
4853 int offset, int leaf_function, sorr_pred_t save_p,
4854 sorr_act_t action_true, sorr_act_t action_false)
4855 {
4856 unsigned int i;
4857 rtx mem, insn;
4858
4859 if (TARGET_ARCH64 && high <= 32)
4860 {
4861 int fp_offset = -1;
4862
4863 for (i = low; i < high; i++)
4864 {
4865 if (save_p (i, leaf_function))
4866 {
4867 mem = gen_frame_mem (DImode, plus_constant (Pmode,
4868 base, offset));
4869 if (action_true == SORR_SAVE)
4870 {
4871 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
4872 RTX_FRAME_RELATED_P (insn) = 1;
4873 }
4874 else /* action_true == SORR_RESTORE */
4875 {
4876 /* The frame pointer must be restored last since its old
4877 value may be used as base address for the frame. This
4878 is problematic in 64-bit mode only because of the lack
4879 of double-word load instruction. */
4880 if (i == HARD_FRAME_POINTER_REGNUM)
4881 fp_offset = offset;
4882 else
4883 emit_move_insn (gen_rtx_REG (DImode, i), mem);
4884 }
4885 offset += 8;
4886 }
4887 else if (action_false == SORR_ADVANCE)
4888 offset += 8;
4889 }
4890
4891 if (fp_offset >= 0)
4892 {
4893 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
4894 emit_move_insn (hard_frame_pointer_rtx, mem);
4895 }
4896 }
4897 else
4898 {
4899 for (i = low; i < high; i += 2)
4900 {
4901 bool reg0 = save_p (i, leaf_function);
4902 bool reg1 = save_p (i + 1, leaf_function);
4903 enum machine_mode mode;
4904 int regno;
4905
4906 if (reg0 && reg1)
4907 {
4908 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
4909 regno = i;
4910 }
4911 else if (reg0)
4912 {
4913 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
4914 regno = i;
4915 }
4916 else if (reg1)
4917 {
4918 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
4919 regno = i + 1;
4920 offset += 4;
4921 }
4922 else
4923 {
4924 if (action_false == SORR_ADVANCE)
4925 offset += 8;
4926 continue;
4927 }
4928
4929 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
4930 if (action_true == SORR_SAVE)
4931 {
4932 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
4933 RTX_FRAME_RELATED_P (insn) = 1;
4934 if (mode == DImode)
4935 {
4936 rtx set1, set2;
4937 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
4938 offset));
4939 set1 = gen_rtx_SET (VOIDmode, mem,
4940 gen_rtx_REG (SImode, regno));
4941 RTX_FRAME_RELATED_P (set1) = 1;
4942 mem
4943 = gen_frame_mem (SImode, plus_constant (Pmode, base,
4944 offset + 4));
4945 set2 = gen_rtx_SET (VOIDmode, mem,
4946 gen_rtx_REG (SImode, regno + 1));
4947 RTX_FRAME_RELATED_P (set2) = 1;
4948 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4949 gen_rtx_PARALLEL (VOIDmode,
4950 gen_rtvec (2, set1, set2)));
4951 }
4952 }
4953 else /* action_true == SORR_RESTORE */
4954 emit_move_insn (gen_rtx_REG (mode, regno), mem);
4955
4956 /* Always preserve double-word alignment. */
4957 offset = (offset + 8) & -8;
4958 }
4959 }
4960
4961 return offset;
4962 }
4963
4964 /* Emit code to adjust BASE to OFFSET. Return the new base. */
4965
4966 static rtx
4967 emit_adjust_base_to_offset (rtx base, int offset)
4968 {
4969 /* ??? This might be optimized a little as %g1 might already have a
4970 value close enough that a single add insn will do. */
4971 /* ??? Although, all of this is probably only a temporary fix because
4972 if %g1 can hold a function result, then sparc_expand_epilogue will
4973 lose (the result will be clobbered). */
4974 rtx new_base = gen_rtx_REG (Pmode, 1);
4975 emit_move_insn (new_base, GEN_INT (offset));
4976 emit_insn (gen_rtx_SET (VOIDmode,
4977 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
4978 return new_base;
4979 }
4980
4981 /* Emit code to save/restore call-saved global and FP registers. */
4982
4983 static void
4984 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
4985 {
4986 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
4987 {
4988 base = emit_adjust_base_to_offset (base, offset);
4989 offset = 0;
4990 }
4991
4992 offset
4993 = emit_save_or_restore_regs (0, 8, base, offset, 0,
4994 save_global_or_fp_reg_p, action, SORR_NONE);
4995 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
4996 save_global_or_fp_reg_p, action, SORR_NONE);
4997 }
4998
4999 /* Emit code to save/restore call-saved local and in registers. */
5000
5001 static void
5002 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5003 {
5004 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5005 {
5006 base = emit_adjust_base_to_offset (base, offset);
5007 offset = 0;
5008 }
5009
5010 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5011 save_local_or_in_reg_p, action, SORR_ADVANCE);
5012 }
5013
5014 /* Emit a window_save insn. */
5015
5016 static rtx
5017 emit_window_save (rtx increment)
5018 {
5019 rtx insn = emit_insn (gen_window_save (increment));
5020 RTX_FRAME_RELATED_P (insn) = 1;
5021
5022 /* The incoming return address (%o7) is saved in %i7. */
5023 add_reg_note (insn, REG_CFA_REGISTER,
5024 gen_rtx_SET (VOIDmode,
5025 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5026 gen_rtx_REG (Pmode,
5027 INCOMING_RETURN_ADDR_REGNUM)));
5028
5029 /* The window save event. */
5030 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5031
5032 /* The CFA is %fp, the hard frame pointer. */
5033 add_reg_note (insn, REG_CFA_DEF_CFA,
5034 plus_constant (Pmode, hard_frame_pointer_rtx,
5035 INCOMING_FRAME_SP_OFFSET));
5036
5037 return insn;
5038 }
5039
5040 /* Generate an increment for the stack pointer. */
5041
5042 static rtx
5043 gen_stack_pointer_inc (rtx increment)
5044 {
5045 return gen_rtx_SET (VOIDmode,
5046 stack_pointer_rtx,
5047 gen_rtx_PLUS (Pmode,
5048 stack_pointer_rtx,
5049 increment));
5050 }
5051
5052 /* Expand the function prologue. The prologue is responsible for reserving
5053 storage for the frame, saving the call-saved registers and loading the
5054 GOT register if needed. */
5055
5056 void
5057 sparc_expand_prologue (void)
5058 {
5059 HOST_WIDE_INT size;
5060 rtx insn;
5061
5062 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5063 on the final value of the flag means deferring the prologue/epilogue
5064 expansion until just before the second scheduling pass, which is too
5065 late to emit multiple epilogues or return insns.
5066
5067 Of course we are making the assumption that the value of the flag
5068 will not change between now and its final value. Of the three parts
5069 of the formula, only the last one can reasonably vary. Let's take a
5070 closer look, after assuming that the first two ones are set to true
5071 (otherwise the last value is effectively silenced).
5072
5073 If only_leaf_regs_used returns false, the global predicate will also
5074 be false so the actual frame size calculated below will be positive.
5075 As a consequence, the save_register_window insn will be emitted in
5076 the instruction stream; now this insn explicitly references %fp
5077 which is not a leaf register so only_leaf_regs_used will always
5078 return false subsequently.
5079
5080 If only_leaf_regs_used returns true, we hope that the subsequent
5081 optimization passes won't cause non-leaf registers to pop up. For
5082 example, the regrename pass has special provisions to not rename to
5083 non-leaf registers in a leaf function. */
5084 sparc_leaf_function_p
5085 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5086
5087 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5088
5089 if (flag_stack_usage_info)
5090 current_function_static_stack_size = size;
5091
5092 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5093 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5094
5095 if (size == 0)
5096 ; /* do nothing. */
5097 else if (sparc_leaf_function_p)
5098 {
5099 rtx size_int_rtx = GEN_INT (-size);
5100
5101 if (size <= 4096)
5102 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5103 else if (size <= 8192)
5104 {
5105 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5106 RTX_FRAME_RELATED_P (insn) = 1;
5107
5108 /* %sp is still the CFA register. */
5109 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5110 }
5111 else
5112 {
5113 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5114 emit_move_insn (size_rtx, size_int_rtx);
5115 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5116 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5117 gen_stack_pointer_inc (size_int_rtx));
5118 }
5119
5120 RTX_FRAME_RELATED_P (insn) = 1;
5121 }
5122 else
5123 {
5124 rtx size_int_rtx = GEN_INT (-size);
5125
5126 if (size <= 4096)
5127 emit_window_save (size_int_rtx);
5128 else if (size <= 8192)
5129 {
5130 emit_window_save (GEN_INT (-4096));
5131
5132 /* %sp is not the CFA register anymore. */
5133 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5134
5135 /* Make sure no %fp-based store is issued until after the frame is
5136 established. The offset between the frame pointer and the stack
5137 pointer is calculated relative to the value of the stack pointer
5138 at the end of the function prologue, and moving instructions that
5139 access the stack via the frame pointer between the instructions
5140 that decrement the stack pointer could result in accessing the
5141 register window save area, which is volatile. */
5142 emit_insn (gen_frame_blockage ());
5143 }
5144 else
5145 {
5146 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5147 emit_move_insn (size_rtx, size_int_rtx);
5148 emit_window_save (size_rtx);
5149 }
5150 }
5151
5152 if (sparc_leaf_function_p)
5153 {
5154 sparc_frame_base_reg = stack_pointer_rtx;
5155 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5156 }
5157 else
5158 {
5159 sparc_frame_base_reg = hard_frame_pointer_rtx;
5160 sparc_frame_base_offset = SPARC_STACK_BIAS;
5161 }
5162
5163 if (sparc_n_global_fp_regs > 0)
5164 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5165 sparc_frame_base_offset
5166 - sparc_apparent_frame_size,
5167 SORR_SAVE);
5168
5169 /* Load the GOT register if needed. */
5170 if (crtl->uses_pic_offset_table)
5171 load_got_register ();
5172
5173 /* Advertise that the data calculated just above are now valid. */
5174 sparc_prologue_data_valid_p = true;
5175 }
5176
5177 /* Expand the function prologue. The prologue is responsible for reserving
5178 storage for the frame, saving the call-saved registers and loading the
5179 GOT register if needed. */
5180
5181 void
5182 sparc_flat_expand_prologue (void)
5183 {
5184 HOST_WIDE_INT size;
5185 rtx insn;
5186
5187 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5188
5189 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5190
5191 if (flag_stack_usage_info)
5192 current_function_static_stack_size = size;
5193
5194 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5195 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5196
5197 if (sparc_save_local_in_regs_p)
5198 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5199 SORR_SAVE);
5200
5201 if (size == 0)
5202 ; /* do nothing. */
5203 else
5204 {
5205 rtx size_int_rtx, size_rtx;
5206
5207 size_rtx = size_int_rtx = GEN_INT (-size);
5208
5209 /* We establish the frame (i.e. decrement the stack pointer) first, even
5210 if we use a frame pointer, because we cannot clobber any call-saved
5211 registers, including the frame pointer, if we haven't created a new
5212 register save area, for the sake of compatibility with the ABI. */
5213 if (size <= 4096)
5214 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5215 else if (size <= 8192 && !frame_pointer_needed)
5216 {
5217 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5218 RTX_FRAME_RELATED_P (insn) = 1;
5219 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5220 }
5221 else
5222 {
5223 size_rtx = gen_rtx_REG (Pmode, 1);
5224 emit_move_insn (size_rtx, size_int_rtx);
5225 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5226 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5227 gen_stack_pointer_inc (size_int_rtx));
5228 }
5229 RTX_FRAME_RELATED_P (insn) = 1;
5230
5231 /* Ensure nothing is scheduled until after the frame is established. */
5232 emit_insn (gen_blockage ());
5233
5234 if (frame_pointer_needed)
5235 {
5236 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5237 gen_rtx_MINUS (Pmode,
5238 stack_pointer_rtx,
5239 size_rtx)));
5240 RTX_FRAME_RELATED_P (insn) = 1;
5241
5242 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5243 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5244 plus_constant (Pmode, stack_pointer_rtx,
5245 size)));
5246 }
5247
5248 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5249 {
5250 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5251 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5252
5253 insn = emit_move_insn (i7, o7);
5254 RTX_FRAME_RELATED_P (insn) = 1;
5255
5256 add_reg_note (insn, REG_CFA_REGISTER,
5257 gen_rtx_SET (VOIDmode, i7, o7));
5258
5259 /* Prevent this instruction from ever being considered dead,
5260 even if this function has no epilogue. */
5261 emit_use (i7);
5262 }
5263 }
5264
5265 if (frame_pointer_needed)
5266 {
5267 sparc_frame_base_reg = hard_frame_pointer_rtx;
5268 sparc_frame_base_offset = SPARC_STACK_BIAS;
5269 }
5270 else
5271 {
5272 sparc_frame_base_reg = stack_pointer_rtx;
5273 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5274 }
5275
5276 if (sparc_n_global_fp_regs > 0)
5277 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5278 sparc_frame_base_offset
5279 - sparc_apparent_frame_size,
5280 SORR_SAVE);
5281
5282 /* Load the GOT register if needed. */
5283 if (crtl->uses_pic_offset_table)
5284 load_got_register ();
5285
5286 /* Advertise that the data calculated just above are now valid. */
5287 sparc_prologue_data_valid_p = true;
5288 }
5289
5290 /* This function generates the assembly code for function entry, which boils
5291 down to emitting the necessary .register directives. */
5292
5293 static void
5294 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5295 {
5296 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5297 if (!TARGET_FLAT)
5298 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5299
5300 sparc_output_scratch_registers (file);
5301 }
5302
5303 /* Expand the function epilogue, either normal or part of a sibcall.
5304 We emit all the instructions except the return or the call. */
5305
5306 void
5307 sparc_expand_epilogue (bool for_eh)
5308 {
5309 HOST_WIDE_INT size = sparc_frame_size;
5310
5311 if (sparc_n_global_fp_regs > 0)
5312 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5313 sparc_frame_base_offset
5314 - sparc_apparent_frame_size,
5315 SORR_RESTORE);
5316
5317 if (size == 0 || for_eh)
5318 ; /* do nothing. */
5319 else if (sparc_leaf_function_p)
5320 {
5321 if (size <= 4096)
5322 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5323 else if (size <= 8192)
5324 {
5325 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5326 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5327 }
5328 else
5329 {
5330 rtx reg = gen_rtx_REG (Pmode, 1);
5331 emit_move_insn (reg, GEN_INT (size));
5332 emit_insn (gen_stack_pointer_inc (reg));
5333 }
5334 }
5335 }
5336
5337 /* Expand the function epilogue, either normal or part of a sibcall.
5338 We emit all the instructions except the return or the call. */
5339
5340 void
5341 sparc_flat_expand_epilogue (bool for_eh)
5342 {
5343 HOST_WIDE_INT size = sparc_frame_size;
5344
5345 if (sparc_n_global_fp_regs > 0)
5346 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5347 sparc_frame_base_offset
5348 - sparc_apparent_frame_size,
5349 SORR_RESTORE);
5350
5351 /* If we have a frame pointer, we'll need both to restore it before the
5352 frame is destroyed and use its current value in destroying the frame.
5353 Since we don't have an atomic way to do that in the flat window model,
5354 we save the current value into a temporary register (%g1). */
5355 if (frame_pointer_needed && !for_eh)
5356 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5357
5358 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5359 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5360 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5361
5362 if (sparc_save_local_in_regs_p)
5363 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5364 sparc_frame_base_offset,
5365 SORR_RESTORE);
5366
5367 if (size == 0 || for_eh)
5368 ; /* do nothing. */
5369 else if (frame_pointer_needed)
5370 {
5371 /* Make sure the frame is destroyed after everything else is done. */
5372 emit_insn (gen_blockage ());
5373
5374 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5375 }
5376 else
5377 {
5378 /* Likewise. */
5379 emit_insn (gen_blockage ());
5380
5381 if (size <= 4096)
5382 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5383 else if (size <= 8192)
5384 {
5385 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5386 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5387 }
5388 else
5389 {
5390 rtx reg = gen_rtx_REG (Pmode, 1);
5391 emit_move_insn (reg, GEN_INT (size));
5392 emit_insn (gen_stack_pointer_inc (reg));
5393 }
5394 }
5395 }
5396
5397 /* Return true if it is appropriate to emit `return' instructions in the
5398 body of a function. */
5399
5400 bool
5401 sparc_can_use_return_insn_p (void)
5402 {
5403 return sparc_prologue_data_valid_p
5404 && sparc_n_global_fp_regs == 0
5405 && TARGET_FLAT
5406 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5407 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5408 }
5409
5410 /* This function generates the assembly code for function exit. */
5411
5412 static void
5413 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5414 {
5415 /* If the last two instructions of a function are "call foo; dslot;"
5416 the return address might point to the first instruction in the next
5417 function and we have to output a dummy nop for the sake of sane
5418 backtraces in such cases. This is pointless for sibling calls since
5419 the return address is explicitly adjusted. */
5420
5421 rtx insn, last_real_insn;
5422
5423 insn = get_last_insn ();
5424
5425 last_real_insn = prev_real_insn (insn);
5426 if (last_real_insn
5427 && GET_CODE (last_real_insn) == INSN
5428 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5429 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5430
5431 if (last_real_insn
5432 && CALL_P (last_real_insn)
5433 && !SIBLING_CALL_P (last_real_insn))
5434 fputs("\tnop\n", file);
5435
5436 sparc_output_deferred_case_vectors ();
5437 }
5438
5439 /* Output a 'restore' instruction. */
5440
5441 static void
5442 output_restore (rtx pat)
5443 {
5444 rtx operands[3];
5445
5446 if (! pat)
5447 {
5448 fputs ("\t restore\n", asm_out_file);
5449 return;
5450 }
5451
5452 gcc_assert (GET_CODE (pat) == SET);
5453
5454 operands[0] = SET_DEST (pat);
5455 pat = SET_SRC (pat);
5456
5457 switch (GET_CODE (pat))
5458 {
5459 case PLUS:
5460 operands[1] = XEXP (pat, 0);
5461 operands[2] = XEXP (pat, 1);
5462 output_asm_insn (" restore %r1, %2, %Y0", operands);
5463 break;
5464 case LO_SUM:
5465 operands[1] = XEXP (pat, 0);
5466 operands[2] = XEXP (pat, 1);
5467 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5468 break;
5469 case ASHIFT:
5470 operands[1] = XEXP (pat, 0);
5471 gcc_assert (XEXP (pat, 1) == const1_rtx);
5472 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5473 break;
5474 default:
5475 operands[1] = pat;
5476 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5477 break;
5478 }
5479 }
5480
5481 /* Output a return. */
5482
5483 const char *
5484 output_return (rtx insn)
5485 {
5486 if (crtl->calls_eh_return)
5487 {
5488 /* If the function uses __builtin_eh_return, the eh_return
5489 machinery occupies the delay slot. */
5490 gcc_assert (!final_sequence);
5491
5492 if (flag_delayed_branch)
5493 {
5494 if (!TARGET_FLAT && TARGET_V9)
5495 fputs ("\treturn\t%i7+8\n", asm_out_file);
5496 else
5497 {
5498 if (!TARGET_FLAT)
5499 fputs ("\trestore\n", asm_out_file);
5500
5501 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5502 }
5503
5504 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5505 }
5506 else
5507 {
5508 if (!TARGET_FLAT)
5509 fputs ("\trestore\n", asm_out_file);
5510
5511 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5512 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5513 }
5514 }
5515 else if (sparc_leaf_function_p || TARGET_FLAT)
5516 {
5517 /* This is a leaf or flat function so we don't have to bother restoring
5518 the register window, which frees us from dealing with the convoluted
5519 semantics of restore/return. We simply output the jump to the
5520 return address and the insn in the delay slot (if any). */
5521
5522 return "jmp\t%%o7+%)%#";
5523 }
5524 else
5525 {
5526 /* This is a regular function so we have to restore the register window.
5527 We may have a pending insn for the delay slot, which will be either
5528 combined with the 'restore' instruction or put in the delay slot of
5529 the 'return' instruction. */
5530
5531 if (final_sequence)
5532 {
5533 rtx delay, pat;
5534
5535 delay = NEXT_INSN (insn);
5536 gcc_assert (delay);
5537
5538 pat = PATTERN (delay);
5539
5540 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5541 {
5542 epilogue_renumber (&pat, 0);
5543 return "return\t%%i7+%)%#";
5544 }
5545 else
5546 {
5547 output_asm_insn ("jmp\t%%i7+%)", NULL);
5548 output_restore (pat);
5549 PATTERN (delay) = gen_blockage ();
5550 INSN_CODE (delay) = -1;
5551 }
5552 }
5553 else
5554 {
5555 /* The delay slot is empty. */
5556 if (TARGET_V9)
5557 return "return\t%%i7+%)\n\t nop";
5558 else if (flag_delayed_branch)
5559 return "jmp\t%%i7+%)\n\t restore";
5560 else
5561 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5562 }
5563 }
5564
5565 return "";
5566 }
5567
5568 /* Output a sibling call. */
5569
5570 const char *
5571 output_sibcall (rtx insn, rtx call_operand)
5572 {
5573 rtx operands[1];
5574
5575 gcc_assert (flag_delayed_branch);
5576
5577 operands[0] = call_operand;
5578
5579 if (sparc_leaf_function_p || TARGET_FLAT)
5580 {
5581 /* This is a leaf or flat function so we don't have to bother restoring
5582 the register window. We simply output the jump to the function and
5583 the insn in the delay slot (if any). */
5584
5585 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5586
5587 if (final_sequence)
5588 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5589 operands);
5590 else
5591 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
5592 it into branch if possible. */
5593 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
5594 operands);
5595 }
5596 else
5597 {
5598 /* This is a regular function so we have to restore the register window.
5599 We may have a pending insn for the delay slot, which will be combined
5600 with the 'restore' instruction. */
5601
5602 output_asm_insn ("call\t%a0, 0", operands);
5603
5604 if (final_sequence)
5605 {
5606 rtx delay = NEXT_INSN (insn);
5607 gcc_assert (delay);
5608
5609 output_restore (PATTERN (delay));
5610
5611 PATTERN (delay) = gen_blockage ();
5612 INSN_CODE (delay) = -1;
5613 }
5614 else
5615 output_restore (NULL_RTX);
5616 }
5617
5618 return "";
5619 }
5620 \f
5621 /* Functions for handling argument passing.
5622
5623 For 32-bit, the first 6 args are normally in registers and the rest are
5624 pushed. Any arg that starts within the first 6 words is at least
5625 partially passed in a register unless its data type forbids.
5626
5627 For 64-bit, the argument registers are laid out as an array of 16 elements
5628 and arguments are added sequentially. The first 6 int args and up to the
5629 first 16 fp args (depending on size) are passed in regs.
5630
5631 Slot Stack Integral Float Float in structure Double Long Double
5632 ---- ----- -------- ----- ------------------ ------ -----------
5633 15 [SP+248] %f31 %f30,%f31 %d30
5634 14 [SP+240] %f29 %f28,%f29 %d28 %q28
5635 13 [SP+232] %f27 %f26,%f27 %d26
5636 12 [SP+224] %f25 %f24,%f25 %d24 %q24
5637 11 [SP+216] %f23 %f22,%f23 %d22
5638 10 [SP+208] %f21 %f20,%f21 %d20 %q20
5639 9 [SP+200] %f19 %f18,%f19 %d18
5640 8 [SP+192] %f17 %f16,%f17 %d16 %q16
5641 7 [SP+184] %f15 %f14,%f15 %d14
5642 6 [SP+176] %f13 %f12,%f13 %d12 %q12
5643 5 [SP+168] %o5 %f11 %f10,%f11 %d10
5644 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
5645 3 [SP+152] %o3 %f7 %f6,%f7 %d6
5646 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
5647 1 [SP+136] %o1 %f3 %f2,%f3 %d2
5648 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
5649
5650 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
5651
5652 Integral arguments are always passed as 64-bit quantities appropriately
5653 extended.
5654
5655 Passing of floating point values is handled as follows.
5656 If a prototype is in scope:
5657 If the value is in a named argument (i.e. not a stdarg function or a
5658 value not part of the `...') then the value is passed in the appropriate
5659 fp reg.
5660 If the value is part of the `...' and is passed in one of the first 6
5661 slots then the value is passed in the appropriate int reg.
5662 If the value is part of the `...' and is not passed in one of the first 6
5663 slots then the value is passed in memory.
5664 If a prototype is not in scope:
5665 If the value is one of the first 6 arguments the value is passed in the
5666 appropriate integer reg and the appropriate fp reg.
5667 If the value is not one of the first 6 arguments the value is passed in
5668 the appropriate fp reg and in memory.
5669
5670
5671 Summary of the calling conventions implemented by GCC on the SPARC:
5672
5673 32-bit ABI:
5674 size argument return value
5675
5676 small integer <4 int. reg. int. reg.
5677 word 4 int. reg. int. reg.
5678 double word 8 int. reg. int. reg.
5679
5680 _Complex small integer <8 int. reg. int. reg.
5681 _Complex word 8 int. reg. int. reg.
5682 _Complex double word 16 memory int. reg.
5683
5684 vector integer <=8 int. reg. FP reg.
5685 vector integer >8 memory memory
5686
5687 float 4 int. reg. FP reg.
5688 double 8 int. reg. FP reg.
5689 long double 16 memory memory
5690
5691 _Complex float 8 memory FP reg.
5692 _Complex double 16 memory FP reg.
5693 _Complex long double 32 memory FP reg.
5694
5695 vector float any memory memory
5696
5697 aggregate any memory memory
5698
5699
5700
5701 64-bit ABI:
5702 size argument return value
5703
5704 small integer <8 int. reg. int. reg.
5705 word 8 int. reg. int. reg.
5706 double word 16 int. reg. int. reg.
5707
5708 _Complex small integer <16 int. reg. int. reg.
5709 _Complex word 16 int. reg. int. reg.
5710 _Complex double word 32 memory int. reg.
5711
5712 vector integer <=16 FP reg. FP reg.
5713 vector integer 16<s<=32 memory FP reg.
5714 vector integer >32 memory memory
5715
5716 float 4 FP reg. FP reg.
5717 double 8 FP reg. FP reg.
5718 long double 16 FP reg. FP reg.
5719
5720 _Complex float 8 FP reg. FP reg.
5721 _Complex double 16 FP reg. FP reg.
5722 _Complex long double 32 memory FP reg.
5723
5724 vector float <=16 FP reg. FP reg.
5725 vector float 16<s<=32 memory FP reg.
5726 vector float >32 memory memory
5727
5728 aggregate <=16 reg. reg.
5729 aggregate 16<s<=32 memory reg.
5730 aggregate >32 memory memory
5731
5732
5733
5734 Note #1: complex floating-point types follow the extended SPARC ABIs as
5735 implemented by the Sun compiler.
5736
5737 Note #2: integral vector types follow the scalar floating-point types
5738 conventions to match what is implemented by the Sun VIS SDK.
5739
5740 Note #3: floating-point vector types follow the aggregate types
5741 conventions. */
5742
5743
5744 /* Maximum number of int regs for args. */
5745 #define SPARC_INT_ARG_MAX 6
5746 /* Maximum number of fp regs for args. */
5747 #define SPARC_FP_ARG_MAX 16
5748
5749 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
5750
5751 /* Handle the INIT_CUMULATIVE_ARGS macro.
5752 Initialize a variable CUM of type CUMULATIVE_ARGS
5753 for a call to a function whose data type is FNTYPE.
5754 For a library call, FNTYPE is 0. */
5755
5756 void
5757 init_cumulative_args (struct sparc_args *cum, tree fntype,
5758 rtx libname ATTRIBUTE_UNUSED,
5759 tree fndecl ATTRIBUTE_UNUSED)
5760 {
5761 cum->words = 0;
5762 cum->prototype_p = fntype && prototype_p (fntype);
5763 cum->libcall_p = fntype == 0;
5764 }
5765
5766 /* Handle promotion of pointer and integer arguments. */
5767
5768 static enum machine_mode
5769 sparc_promote_function_mode (const_tree type,
5770 enum machine_mode mode,
5771 int *punsignedp,
5772 const_tree fntype ATTRIBUTE_UNUSED,
5773 int for_return ATTRIBUTE_UNUSED)
5774 {
5775 if (type != NULL_TREE && POINTER_TYPE_P (type))
5776 {
5777 *punsignedp = POINTERS_EXTEND_UNSIGNED;
5778 return Pmode;
5779 }
5780
5781 /* Integral arguments are passed as full words, as per the ABI. */
5782 if (GET_MODE_CLASS (mode) == MODE_INT
5783 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5784 return word_mode;
5785
5786 return mode;
5787 }
5788
5789 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
5790
5791 static bool
5792 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
5793 {
5794 return TARGET_ARCH64 ? true : false;
5795 }
5796
5797 /* Scan the record type TYPE and return the following predicates:
5798 - INTREGS_P: the record contains at least one field or sub-field
5799 that is eligible for promotion in integer registers.
5800 - FP_REGS_P: the record contains at least one field or sub-field
5801 that is eligible for promotion in floating-point registers.
5802 - PACKED_P: the record contains at least one field that is packed.
5803
5804 Sub-fields are not taken into account for the PACKED_P predicate. */
5805
5806 static void
5807 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
5808 int *packed_p)
5809 {
5810 tree field;
5811
5812 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5813 {
5814 if (TREE_CODE (field) == FIELD_DECL)
5815 {
5816 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5817 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
5818 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5819 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5820 && TARGET_FPU)
5821 *fpregs_p = 1;
5822 else
5823 *intregs_p = 1;
5824
5825 if (packed_p && DECL_PACKED (field))
5826 *packed_p = 1;
5827 }
5828 }
5829 }
5830
5831 /* Compute the slot number to pass an argument in.
5832 Return the slot number or -1 if passing on the stack.
5833
5834 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5835 the preceding args and about the function being called.
5836 MODE is the argument's machine mode.
5837 TYPE is the data type of the argument (as a tree).
5838 This is null for libcalls where that information may
5839 not be available.
5840 NAMED is nonzero if this argument is a named parameter
5841 (otherwise it is an extra parameter matching an ellipsis).
5842 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
5843 *PREGNO records the register number to use if scalar type.
5844 *PPADDING records the amount of padding needed in words. */
5845
5846 static int
5847 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
5848 const_tree type, bool named, bool incoming_p,
5849 int *pregno, int *ppadding)
5850 {
5851 int regbase = (incoming_p
5852 ? SPARC_INCOMING_INT_ARG_FIRST
5853 : SPARC_OUTGOING_INT_ARG_FIRST);
5854 int slotno = cum->words;
5855 enum mode_class mclass;
5856 int regno;
5857
5858 *ppadding = 0;
5859
5860 if (type && TREE_ADDRESSABLE (type))
5861 return -1;
5862
5863 if (TARGET_ARCH32
5864 && mode == BLKmode
5865 && type
5866 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
5867 return -1;
5868
5869 /* For SPARC64, objects requiring 16-byte alignment get it. */
5870 if (TARGET_ARCH64
5871 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
5872 && (slotno & 1) != 0)
5873 slotno++, *ppadding = 1;
5874
5875 mclass = GET_MODE_CLASS (mode);
5876 if (type && TREE_CODE (type) == VECTOR_TYPE)
5877 {
5878 /* Vector types deserve special treatment because they are
5879 polymorphic wrt their mode, depending upon whether VIS
5880 instructions are enabled. */
5881 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5882 {
5883 /* The SPARC port defines no floating-point vector modes. */
5884 gcc_assert (mode == BLKmode);
5885 }
5886 else
5887 {
5888 /* Integral vector types should either have a vector
5889 mode or an integral mode, because we are guaranteed
5890 by pass_by_reference that their size is not greater
5891 than 16 bytes and TImode is 16-byte wide. */
5892 gcc_assert (mode != BLKmode);
5893
5894 /* Vector integers are handled like floats according to
5895 the Sun VIS SDK. */
5896 mclass = MODE_FLOAT;
5897 }
5898 }
5899
5900 switch (mclass)
5901 {
5902 case MODE_FLOAT:
5903 case MODE_COMPLEX_FLOAT:
5904 case MODE_VECTOR_INT:
5905 if (TARGET_ARCH64 && TARGET_FPU && named)
5906 {
5907 if (slotno >= SPARC_FP_ARG_MAX)
5908 return -1;
5909 regno = SPARC_FP_ARG_FIRST + slotno * 2;
5910 /* Arguments filling only one single FP register are
5911 right-justified in the outer double FP register. */
5912 if (GET_MODE_SIZE (mode) <= 4)
5913 regno++;
5914 break;
5915 }
5916 /* fallthrough */
5917
5918 case MODE_INT:
5919 case MODE_COMPLEX_INT:
5920 if (slotno >= SPARC_INT_ARG_MAX)
5921 return -1;
5922 regno = regbase + slotno;
5923 break;
5924
5925 case MODE_RANDOM:
5926 if (mode == VOIDmode)
5927 /* MODE is VOIDmode when generating the actual call. */
5928 return -1;
5929
5930 gcc_assert (mode == BLKmode);
5931
5932 if (TARGET_ARCH32
5933 || !type
5934 || (TREE_CODE (type) != VECTOR_TYPE
5935 && TREE_CODE (type) != RECORD_TYPE))
5936 {
5937 if (slotno >= SPARC_INT_ARG_MAX)
5938 return -1;
5939 regno = regbase + slotno;
5940 }
5941 else /* TARGET_ARCH64 && type */
5942 {
5943 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
5944
5945 /* First see what kinds of registers we would need. */
5946 if (TREE_CODE (type) == VECTOR_TYPE)
5947 fpregs_p = 1;
5948 else
5949 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
5950
5951 /* The ABI obviously doesn't specify how packed structures
5952 are passed. These are defined to be passed in int regs
5953 if possible, otherwise memory. */
5954 if (packed_p || !named)
5955 fpregs_p = 0, intregs_p = 1;
5956
5957 /* If all arg slots are filled, then must pass on stack. */
5958 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
5959 return -1;
5960
5961 /* If there are only int args and all int arg slots are filled,
5962 then must pass on stack. */
5963 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
5964 return -1;
5965
5966 /* Note that even if all int arg slots are filled, fp members may
5967 still be passed in regs if such regs are available.
5968 *PREGNO isn't set because there may be more than one, it's up
5969 to the caller to compute them. */
5970 return slotno;
5971 }
5972 break;
5973
5974 default :
5975 gcc_unreachable ();
5976 }
5977
5978 *pregno = regno;
5979 return slotno;
5980 }
5981
5982 /* Handle recursive register counting for structure field layout. */
5983
5984 struct function_arg_record_value_parms
5985 {
5986 rtx ret; /* return expression being built. */
5987 int slotno; /* slot number of the argument. */
5988 int named; /* whether the argument is named. */
5989 int regbase; /* regno of the base register. */
5990 int stack; /* 1 if part of the argument is on the stack. */
5991 int intoffset; /* offset of the first pending integer field. */
5992 unsigned int nregs; /* number of words passed in registers. */
5993 };
5994
5995 static void function_arg_record_value_3
5996 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
5997 static void function_arg_record_value_2
5998 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
5999 static void function_arg_record_value_1
6000 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6001 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6002 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6003
6004 /* A subroutine of function_arg_record_value. Traverse the structure
6005 recursively and determine how many registers will be required. */
6006
6007 static void
6008 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6009 struct function_arg_record_value_parms *parms,
6010 bool packed_p)
6011 {
6012 tree field;
6013
6014 /* We need to compute how many registers are needed so we can
6015 allocate the PARALLEL but before we can do that we need to know
6016 whether there are any packed fields. The ABI obviously doesn't
6017 specify how structures are passed in this case, so they are
6018 defined to be passed in int regs if possible, otherwise memory,
6019 regardless of whether there are fp values present. */
6020
6021 if (! packed_p)
6022 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6023 {
6024 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6025 {
6026 packed_p = true;
6027 break;
6028 }
6029 }
6030
6031 /* Compute how many registers we need. */
6032 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6033 {
6034 if (TREE_CODE (field) == FIELD_DECL)
6035 {
6036 HOST_WIDE_INT bitpos = startbitpos;
6037
6038 if (DECL_SIZE (field) != 0)
6039 {
6040 if (integer_zerop (DECL_SIZE (field)))
6041 continue;
6042
6043 if (host_integerp (bit_position (field), 1))
6044 bitpos += int_bit_position (field);
6045 }
6046
6047 /* ??? FIXME: else assume zero offset. */
6048
6049 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6050 function_arg_record_value_1 (TREE_TYPE (field),
6051 bitpos,
6052 parms,
6053 packed_p);
6054 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6055 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6056 && TARGET_FPU
6057 && parms->named
6058 && ! packed_p)
6059 {
6060 if (parms->intoffset != -1)
6061 {
6062 unsigned int startbit, endbit;
6063 int intslots, this_slotno;
6064
6065 startbit = parms->intoffset & -BITS_PER_WORD;
6066 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6067
6068 intslots = (endbit - startbit) / BITS_PER_WORD;
6069 this_slotno = parms->slotno + parms->intoffset
6070 / BITS_PER_WORD;
6071
6072 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6073 {
6074 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6075 /* We need to pass this field on the stack. */
6076 parms->stack = 1;
6077 }
6078
6079 parms->nregs += intslots;
6080 parms->intoffset = -1;
6081 }
6082
6083 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6084 If it wasn't true we wouldn't be here. */
6085 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6086 && DECL_MODE (field) == BLKmode)
6087 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6088 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6089 parms->nregs += 2;
6090 else
6091 parms->nregs += 1;
6092 }
6093 else
6094 {
6095 if (parms->intoffset == -1)
6096 parms->intoffset = bitpos;
6097 }
6098 }
6099 }
6100 }
6101
6102 /* A subroutine of function_arg_record_value. Assign the bits of the
6103 structure between parms->intoffset and bitpos to integer registers. */
6104
6105 static void
6106 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6107 struct function_arg_record_value_parms *parms)
6108 {
6109 enum machine_mode mode;
6110 unsigned int regno;
6111 unsigned int startbit, endbit;
6112 int this_slotno, intslots, intoffset;
6113 rtx reg;
6114
6115 if (parms->intoffset == -1)
6116 return;
6117
6118 intoffset = parms->intoffset;
6119 parms->intoffset = -1;
6120
6121 startbit = intoffset & -BITS_PER_WORD;
6122 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6123 intslots = (endbit - startbit) / BITS_PER_WORD;
6124 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6125
6126 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6127 if (intslots <= 0)
6128 return;
6129
6130 /* If this is the trailing part of a word, only load that much into
6131 the register. Otherwise load the whole register. Note that in
6132 the latter case we may pick up unwanted bits. It's not a problem
6133 at the moment but may wish to revisit. */
6134
6135 if (intoffset % BITS_PER_WORD != 0)
6136 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6137 MODE_INT);
6138 else
6139 mode = word_mode;
6140
6141 intoffset /= BITS_PER_UNIT;
6142 do
6143 {
6144 regno = parms->regbase + this_slotno;
6145 reg = gen_rtx_REG (mode, regno);
6146 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6147 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6148
6149 this_slotno += 1;
6150 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6151 mode = word_mode;
6152 parms->nregs += 1;
6153 intslots -= 1;
6154 }
6155 while (intslots > 0);
6156 }
6157
6158 /* A subroutine of function_arg_record_value. Traverse the structure
6159 recursively and assign bits to floating point registers. Track which
6160 bits in between need integer registers; invoke function_arg_record_value_3
6161 to make that happen. */
6162
6163 static void
6164 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6165 struct function_arg_record_value_parms *parms,
6166 bool packed_p)
6167 {
6168 tree field;
6169
6170 if (! packed_p)
6171 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6172 {
6173 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6174 {
6175 packed_p = true;
6176 break;
6177 }
6178 }
6179
6180 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6181 {
6182 if (TREE_CODE (field) == FIELD_DECL)
6183 {
6184 HOST_WIDE_INT bitpos = startbitpos;
6185
6186 if (DECL_SIZE (field) != 0)
6187 {
6188 if (integer_zerop (DECL_SIZE (field)))
6189 continue;
6190
6191 if (host_integerp (bit_position (field), 1))
6192 bitpos += int_bit_position (field);
6193 }
6194
6195 /* ??? FIXME: else assume zero offset. */
6196
6197 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6198 function_arg_record_value_2 (TREE_TYPE (field),
6199 bitpos,
6200 parms,
6201 packed_p);
6202 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6203 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6204 && TARGET_FPU
6205 && parms->named
6206 && ! packed_p)
6207 {
6208 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6209 int regno, nregs, pos;
6210 enum machine_mode mode = DECL_MODE (field);
6211 rtx reg;
6212
6213 function_arg_record_value_3 (bitpos, parms);
6214
6215 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6216 && mode == BLKmode)
6217 {
6218 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6219 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6220 }
6221 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6222 {
6223 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6224 nregs = 2;
6225 }
6226 else
6227 nregs = 1;
6228
6229 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6230 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6231 regno++;
6232 reg = gen_rtx_REG (mode, regno);
6233 pos = bitpos / BITS_PER_UNIT;
6234 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6235 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6236 parms->nregs += 1;
6237 while (--nregs > 0)
6238 {
6239 regno += GET_MODE_SIZE (mode) / 4;
6240 reg = gen_rtx_REG (mode, regno);
6241 pos += GET_MODE_SIZE (mode);
6242 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6243 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6244 parms->nregs += 1;
6245 }
6246 }
6247 else
6248 {
6249 if (parms->intoffset == -1)
6250 parms->intoffset = bitpos;
6251 }
6252 }
6253 }
6254 }
6255
6256 /* Used by function_arg and sparc_function_value_1 to implement the complex
6257 conventions of the 64-bit ABI for passing and returning structures.
6258 Return an expression valid as a return value for the FUNCTION_ARG
6259 and TARGET_FUNCTION_VALUE.
6260
6261 TYPE is the data type of the argument (as a tree).
6262 This is null for libcalls where that information may
6263 not be available.
6264 MODE is the argument's machine mode.
6265 SLOTNO is the index number of the argument's slot in the parameter array.
6266 NAMED is nonzero if this argument is a named parameter
6267 (otherwise it is an extra parameter matching an ellipsis).
6268 REGBASE is the regno of the base register for the parameter array. */
6269
6270 static rtx
6271 function_arg_record_value (const_tree type, enum machine_mode mode,
6272 int slotno, int named, int regbase)
6273 {
6274 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6275 struct function_arg_record_value_parms parms;
6276 unsigned int nregs;
6277
6278 parms.ret = NULL_RTX;
6279 parms.slotno = slotno;
6280 parms.named = named;
6281 parms.regbase = regbase;
6282 parms.stack = 0;
6283
6284 /* Compute how many registers we need. */
6285 parms.nregs = 0;
6286 parms.intoffset = 0;
6287 function_arg_record_value_1 (type, 0, &parms, false);
6288
6289 /* Take into account pending integer fields. */
6290 if (parms.intoffset != -1)
6291 {
6292 unsigned int startbit, endbit;
6293 int intslots, this_slotno;
6294
6295 startbit = parms.intoffset & -BITS_PER_WORD;
6296 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6297 intslots = (endbit - startbit) / BITS_PER_WORD;
6298 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6299
6300 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6301 {
6302 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6303 /* We need to pass this field on the stack. */
6304 parms.stack = 1;
6305 }
6306
6307 parms.nregs += intslots;
6308 }
6309 nregs = parms.nregs;
6310
6311 /* Allocate the vector and handle some annoying special cases. */
6312 if (nregs == 0)
6313 {
6314 /* ??? Empty structure has no value? Duh? */
6315 if (typesize <= 0)
6316 {
6317 /* Though there's nothing really to store, return a word register
6318 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6319 leads to breakage due to the fact that there are zero bytes to
6320 load. */
6321 return gen_rtx_REG (mode, regbase);
6322 }
6323 else
6324 {
6325 /* ??? C++ has structures with no fields, and yet a size. Give up
6326 for now and pass everything back in integer registers. */
6327 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6328 }
6329 if (nregs + slotno > SPARC_INT_ARG_MAX)
6330 nregs = SPARC_INT_ARG_MAX - slotno;
6331 }
6332 gcc_assert (nregs != 0);
6333
6334 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6335
6336 /* If at least one field must be passed on the stack, generate
6337 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6338 also be passed on the stack. We can't do much better because the
6339 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6340 of structures for which the fields passed exclusively in registers
6341 are not at the beginning of the structure. */
6342 if (parms.stack)
6343 XVECEXP (parms.ret, 0, 0)
6344 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6345
6346 /* Fill in the entries. */
6347 parms.nregs = 0;
6348 parms.intoffset = 0;
6349 function_arg_record_value_2 (type, 0, &parms, false);
6350 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6351
6352 gcc_assert (parms.nregs == nregs);
6353
6354 return parms.ret;
6355 }
6356
6357 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6358 of the 64-bit ABI for passing and returning unions.
6359 Return an expression valid as a return value for the FUNCTION_ARG
6360 and TARGET_FUNCTION_VALUE.
6361
6362 SIZE is the size in bytes of the union.
6363 MODE is the argument's machine mode.
6364 REGNO is the hard register the union will be passed in. */
6365
6366 static rtx
6367 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6368 int regno)
6369 {
6370 int nwords = ROUND_ADVANCE (size), i;
6371 rtx regs;
6372
6373 /* See comment in previous function for empty structures. */
6374 if (nwords == 0)
6375 return gen_rtx_REG (mode, regno);
6376
6377 if (slotno == SPARC_INT_ARG_MAX - 1)
6378 nwords = 1;
6379
6380 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6381
6382 for (i = 0; i < nwords; i++)
6383 {
6384 /* Unions are passed left-justified. */
6385 XVECEXP (regs, 0, i)
6386 = gen_rtx_EXPR_LIST (VOIDmode,
6387 gen_rtx_REG (word_mode, regno),
6388 GEN_INT (UNITS_PER_WORD * i));
6389 regno++;
6390 }
6391
6392 return regs;
6393 }
6394
6395 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6396 for passing and returning large (BLKmode) vectors.
6397 Return an expression valid as a return value for the FUNCTION_ARG
6398 and TARGET_FUNCTION_VALUE.
6399
6400 SIZE is the size in bytes of the vector (at least 8 bytes).
6401 REGNO is the FP hard register the vector will be passed in. */
6402
6403 static rtx
6404 function_arg_vector_value (int size, int regno)
6405 {
6406 int i, nregs = size / 8;
6407 rtx regs;
6408
6409 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6410
6411 for (i = 0; i < nregs; i++)
6412 {
6413 XVECEXP (regs, 0, i)
6414 = gen_rtx_EXPR_LIST (VOIDmode,
6415 gen_rtx_REG (DImode, regno + 2*i),
6416 GEN_INT (i*8));
6417 }
6418
6419 return regs;
6420 }
6421
6422 /* Determine where to put an argument to a function.
6423 Value is zero to push the argument on the stack,
6424 or a hard register in which to store the argument.
6425
6426 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6427 the preceding args and about the function being called.
6428 MODE is the argument's machine mode.
6429 TYPE is the data type of the argument (as a tree).
6430 This is null for libcalls where that information may
6431 not be available.
6432 NAMED is true if this argument is a named parameter
6433 (otherwise it is an extra parameter matching an ellipsis).
6434 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6435 TARGET_FUNCTION_INCOMING_ARG. */
6436
6437 static rtx
6438 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6439 const_tree type, bool named, bool incoming_p)
6440 {
6441 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6442
6443 int regbase = (incoming_p
6444 ? SPARC_INCOMING_INT_ARG_FIRST
6445 : SPARC_OUTGOING_INT_ARG_FIRST);
6446 int slotno, regno, padding;
6447 enum mode_class mclass = GET_MODE_CLASS (mode);
6448
6449 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6450 &regno, &padding);
6451 if (slotno == -1)
6452 return 0;
6453
6454 /* Vector types deserve special treatment because they are polymorphic wrt
6455 their mode, depending upon whether VIS instructions are enabled. */
6456 if (type && TREE_CODE (type) == VECTOR_TYPE)
6457 {
6458 HOST_WIDE_INT size = int_size_in_bytes (type);
6459 gcc_assert ((TARGET_ARCH32 && size <= 8)
6460 || (TARGET_ARCH64 && size <= 16));
6461
6462 if (mode == BLKmode)
6463 return function_arg_vector_value (size,
6464 SPARC_FP_ARG_FIRST + 2*slotno);
6465 else
6466 mclass = MODE_FLOAT;
6467 }
6468
6469 if (TARGET_ARCH32)
6470 return gen_rtx_REG (mode, regno);
6471
6472 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6473 and are promoted to registers if possible. */
6474 if (type && TREE_CODE (type) == RECORD_TYPE)
6475 {
6476 HOST_WIDE_INT size = int_size_in_bytes (type);
6477 gcc_assert (size <= 16);
6478
6479 return function_arg_record_value (type, mode, slotno, named, regbase);
6480 }
6481
6482 /* Unions up to 16 bytes in size are passed in integer registers. */
6483 else if (type && TREE_CODE (type) == UNION_TYPE)
6484 {
6485 HOST_WIDE_INT size = int_size_in_bytes (type);
6486 gcc_assert (size <= 16);
6487
6488 return function_arg_union_value (size, mode, slotno, regno);
6489 }
6490
6491 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6492 but also have the slot allocated for them.
6493 If no prototype is in scope fp values in register slots get passed
6494 in two places, either fp regs and int regs or fp regs and memory. */
6495 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6496 && SPARC_FP_REG_P (regno))
6497 {
6498 rtx reg = gen_rtx_REG (mode, regno);
6499 if (cum->prototype_p || cum->libcall_p)
6500 {
6501 /* "* 2" because fp reg numbers are recorded in 4 byte
6502 quantities. */
6503 #if 0
6504 /* ??? This will cause the value to be passed in the fp reg and
6505 in the stack. When a prototype exists we want to pass the
6506 value in the reg but reserve space on the stack. That's an
6507 optimization, and is deferred [for a bit]. */
6508 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6509 return gen_rtx_PARALLEL (mode,
6510 gen_rtvec (2,
6511 gen_rtx_EXPR_LIST (VOIDmode,
6512 NULL_RTX, const0_rtx),
6513 gen_rtx_EXPR_LIST (VOIDmode,
6514 reg, const0_rtx)));
6515 else
6516 #else
6517 /* ??? It seems that passing back a register even when past
6518 the area declared by REG_PARM_STACK_SPACE will allocate
6519 space appropriately, and will not copy the data onto the
6520 stack, exactly as we desire.
6521
6522 This is due to locate_and_pad_parm being called in
6523 expand_call whenever reg_parm_stack_space > 0, which
6524 while beneficial to our example here, would seem to be
6525 in error from what had been intended. Ho hum... -- r~ */
6526 #endif
6527 return reg;
6528 }
6529 else
6530 {
6531 rtx v0, v1;
6532
6533 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6534 {
6535 int intreg;
6536
6537 /* On incoming, we don't need to know that the value
6538 is passed in %f0 and %i0, and it confuses other parts
6539 causing needless spillage even on the simplest cases. */
6540 if (incoming_p)
6541 return reg;
6542
6543 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6544 + (regno - SPARC_FP_ARG_FIRST) / 2);
6545
6546 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6547 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6548 const0_rtx);
6549 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6550 }
6551 else
6552 {
6553 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6554 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6555 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6556 }
6557 }
6558 }
6559
6560 /* All other aggregate types are passed in an integer register in a mode
6561 corresponding to the size of the type. */
6562 else if (type && AGGREGATE_TYPE_P (type))
6563 {
6564 HOST_WIDE_INT size = int_size_in_bytes (type);
6565 gcc_assert (size <= 16);
6566
6567 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6568 }
6569
6570 return gen_rtx_REG (mode, regno);
6571 }
6572
6573 /* Handle the TARGET_FUNCTION_ARG target hook. */
6574
6575 static rtx
6576 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6577 const_tree type, bool named)
6578 {
6579 return sparc_function_arg_1 (cum, mode, type, named, false);
6580 }
6581
6582 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6583
6584 static rtx
6585 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6586 const_tree type, bool named)
6587 {
6588 return sparc_function_arg_1 (cum, mode, type, named, true);
6589 }
6590
6591 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6592
6593 static unsigned int
6594 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
6595 {
6596 return ((TARGET_ARCH64
6597 && (GET_MODE_ALIGNMENT (mode) == 128
6598 || (type && TYPE_ALIGN (type) == 128)))
6599 ? 128
6600 : PARM_BOUNDARY);
6601 }
6602
6603 /* For an arg passed partly in registers and partly in memory,
6604 this is the number of bytes of registers used.
6605 For args passed entirely in registers or entirely in memory, zero.
6606
6607 Any arg that starts in the first 6 regs but won't entirely fit in them
6608 needs partial registers on v8. On v9, structures with integer
6609 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6610 values that begin in the last fp reg [where "last fp reg" varies with the
6611 mode] will be split between that reg and memory. */
6612
6613 static int
6614 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
6615 tree type, bool named)
6616 {
6617 int slotno, regno, padding;
6618
6619 /* We pass false for incoming_p here, it doesn't matter. */
6620 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6621 false, &regno, &padding);
6622
6623 if (slotno == -1)
6624 return 0;
6625
6626 if (TARGET_ARCH32)
6627 {
6628 if ((slotno + (mode == BLKmode
6629 ? ROUND_ADVANCE (int_size_in_bytes (type))
6630 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
6631 > SPARC_INT_ARG_MAX)
6632 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
6633 }
6634 else
6635 {
6636 /* We are guaranteed by pass_by_reference that the size of the
6637 argument is not greater than 16 bytes, so we only need to return
6638 one word if the argument is partially passed in registers. */
6639
6640 if (type && AGGREGATE_TYPE_P (type))
6641 {
6642 int size = int_size_in_bytes (type);
6643
6644 if (size > UNITS_PER_WORD
6645 && slotno == SPARC_INT_ARG_MAX - 1)
6646 return UNITS_PER_WORD;
6647 }
6648 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
6649 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6650 && ! (TARGET_FPU && named)))
6651 {
6652 /* The complex types are passed as packed types. */
6653 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6654 && slotno == SPARC_INT_ARG_MAX - 1)
6655 return UNITS_PER_WORD;
6656 }
6657 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6658 {
6659 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
6660 > SPARC_FP_ARG_MAX)
6661 return UNITS_PER_WORD;
6662 }
6663 }
6664
6665 return 0;
6666 }
6667
6668 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6669 Specify whether to pass the argument by reference. */
6670
6671 static bool
6672 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6673 enum machine_mode mode, const_tree type,
6674 bool named ATTRIBUTE_UNUSED)
6675 {
6676 if (TARGET_ARCH32)
6677 /* Original SPARC 32-bit ABI says that structures and unions,
6678 and quad-precision floats are passed by reference. For Pascal,
6679 also pass arrays by reference. All other base types are passed
6680 in registers.
6681
6682 Extended ABI (as implemented by the Sun compiler) says that all
6683 complex floats are passed by reference. Pass complex integers
6684 in registers up to 8 bytes. More generally, enforce the 2-word
6685 cap for passing arguments in registers.
6686
6687 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6688 integers are passed like floats of the same size, that is in
6689 registers up to 8 bytes. Pass all vector floats by reference
6690 like structure and unions. */
6691 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6692 || mode == SCmode
6693 /* Catch CDImode, TFmode, DCmode and TCmode. */
6694 || GET_MODE_SIZE (mode) > 8
6695 || (type
6696 && TREE_CODE (type) == VECTOR_TYPE
6697 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6698 else
6699 /* Original SPARC 64-bit ABI says that structures and unions
6700 smaller than 16 bytes are passed in registers, as well as
6701 all other base types.
6702
6703 Extended ABI (as implemented by the Sun compiler) says that
6704 complex floats are passed in registers up to 16 bytes. Pass
6705 all complex integers in registers up to 16 bytes. More generally,
6706 enforce the 2-word cap for passing arguments in registers.
6707
6708 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6709 integers are passed like floats of the same size, that is in
6710 registers (up to 16 bytes). Pass all vector floats like structure
6711 and unions. */
6712 return ((type
6713 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
6714 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6715 /* Catch CTImode and TCmode. */
6716 || GET_MODE_SIZE (mode) > 16);
6717 }
6718
6719 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
6720 Update the data in CUM to advance over an argument
6721 of mode MODE and data type TYPE.
6722 TYPE is null for libcalls where that information may not be available. */
6723
6724 static void
6725 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6726 const_tree type, bool named)
6727 {
6728 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6729 int regno, padding;
6730
6731 /* We pass false for incoming_p here, it doesn't matter. */
6732 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
6733
6734 /* If argument requires leading padding, add it. */
6735 cum->words += padding;
6736
6737 if (TARGET_ARCH32)
6738 {
6739 cum->words += (mode != BLKmode
6740 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6741 : ROUND_ADVANCE (int_size_in_bytes (type)));
6742 }
6743 else
6744 {
6745 if (type && AGGREGATE_TYPE_P (type))
6746 {
6747 int size = int_size_in_bytes (type);
6748
6749 if (size <= 8)
6750 ++cum->words;
6751 else if (size <= 16)
6752 cum->words += 2;
6753 else /* passed by reference */
6754 ++cum->words;
6755 }
6756 else
6757 {
6758 cum->words += (mode != BLKmode
6759 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6760 : ROUND_ADVANCE (int_size_in_bytes (type)));
6761 }
6762 }
6763 }
6764
6765 /* Handle the FUNCTION_ARG_PADDING macro.
6766 For the 64 bit ABI structs are always stored left shifted in their
6767 argument slot. */
6768
6769 enum direction
6770 function_arg_padding (enum machine_mode mode, const_tree type)
6771 {
6772 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
6773 return upward;
6774
6775 /* Fall back to the default. */
6776 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
6777 }
6778
6779 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
6780 Specify whether to return the return value in memory. */
6781
6782 static bool
6783 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6784 {
6785 if (TARGET_ARCH32)
6786 /* Original SPARC 32-bit ABI says that structures and unions,
6787 and quad-precision floats are returned in memory. All other
6788 base types are returned in registers.
6789
6790 Extended ABI (as implemented by the Sun compiler) says that
6791 all complex floats are returned in registers (8 FP registers
6792 at most for '_Complex long double'). Return all complex integers
6793 in registers (4 at most for '_Complex long long').
6794
6795 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6796 integers are returned like floats of the same size, that is in
6797 registers up to 8 bytes and in memory otherwise. Return all
6798 vector floats in memory like structure and unions; note that
6799 they always have BLKmode like the latter. */
6800 return (TYPE_MODE (type) == BLKmode
6801 || TYPE_MODE (type) == TFmode
6802 || (TREE_CODE (type) == VECTOR_TYPE
6803 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6804 else
6805 /* Original SPARC 64-bit ABI says that structures and unions
6806 smaller than 32 bytes are returned in registers, as well as
6807 all other base types.
6808
6809 Extended ABI (as implemented by the Sun compiler) says that all
6810 complex floats are returned in registers (8 FP registers at most
6811 for '_Complex long double'). Return all complex integers in
6812 registers (4 at most for '_Complex TItype').
6813
6814 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6815 integers are returned like floats of the same size, that is in
6816 registers. Return all vector floats like structure and unions;
6817 note that they always have BLKmode like the latter. */
6818 return (TYPE_MODE (type) == BLKmode
6819 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
6820 }
6821
6822 /* Handle the TARGET_STRUCT_VALUE target hook.
6823 Return where to find the structure return value address. */
6824
6825 static rtx
6826 sparc_struct_value_rtx (tree fndecl, int incoming)
6827 {
6828 if (TARGET_ARCH64)
6829 return 0;
6830 else
6831 {
6832 rtx mem;
6833
6834 if (incoming)
6835 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
6836 STRUCT_VALUE_OFFSET));
6837 else
6838 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
6839 STRUCT_VALUE_OFFSET));
6840
6841 /* Only follow the SPARC ABI for fixed-size structure returns.
6842 Variable size structure returns are handled per the normal
6843 procedures in GCC. This is enabled by -mstd-struct-return */
6844 if (incoming == 2
6845 && sparc_std_struct_return
6846 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
6847 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
6848 {
6849 /* We must check and adjust the return address, as it is
6850 optional as to whether the return object is really
6851 provided. */
6852 rtx ret_reg = gen_rtx_REG (Pmode, 31);
6853 rtx scratch = gen_reg_rtx (SImode);
6854 rtx endlab = gen_label_rtx ();
6855
6856 /* Calculate the return object size */
6857 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
6858 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
6859 /* Construct a temporary return value */
6860 rtx temp_val
6861 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
6862
6863 /* Implement SPARC 32-bit psABI callee return struct checking:
6864
6865 Fetch the instruction where we will return to and see if
6866 it's an unimp instruction (the most significant 10 bits
6867 will be zero). */
6868 emit_move_insn (scratch, gen_rtx_MEM (SImode,
6869 plus_constant (Pmode,
6870 ret_reg, 8)));
6871 /* Assume the size is valid and pre-adjust */
6872 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
6873 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
6874 0, endlab);
6875 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
6876 /* Write the address of the memory pointed to by temp_val into
6877 the memory pointed to by mem */
6878 emit_move_insn (mem, XEXP (temp_val, 0));
6879 emit_label (endlab);
6880 }
6881
6882 return mem;
6883 }
6884 }
6885
6886 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
6887 For v9, function return values are subject to the same rules as arguments,
6888 except that up to 32 bytes may be returned in registers. */
6889
6890 static rtx
6891 sparc_function_value_1 (const_tree type, enum machine_mode mode,
6892 bool outgoing)
6893 {
6894 /* Beware that the two values are swapped here wrt function_arg. */
6895 int regbase = (outgoing
6896 ? SPARC_INCOMING_INT_ARG_FIRST
6897 : SPARC_OUTGOING_INT_ARG_FIRST);
6898 enum mode_class mclass = GET_MODE_CLASS (mode);
6899 int regno;
6900
6901 /* Vector types deserve special treatment because they are polymorphic wrt
6902 their mode, depending upon whether VIS instructions are enabled. */
6903 if (type && TREE_CODE (type) == VECTOR_TYPE)
6904 {
6905 HOST_WIDE_INT size = int_size_in_bytes (type);
6906 gcc_assert ((TARGET_ARCH32 && size <= 8)
6907 || (TARGET_ARCH64 && size <= 32));
6908
6909 if (mode == BLKmode)
6910 return function_arg_vector_value (size,
6911 SPARC_FP_ARG_FIRST);
6912 else
6913 mclass = MODE_FLOAT;
6914 }
6915
6916 if (TARGET_ARCH64 && type)
6917 {
6918 /* Structures up to 32 bytes in size are returned in registers. */
6919 if (TREE_CODE (type) == RECORD_TYPE)
6920 {
6921 HOST_WIDE_INT size = int_size_in_bytes (type);
6922 gcc_assert (size <= 32);
6923
6924 return function_arg_record_value (type, mode, 0, 1, regbase);
6925 }
6926
6927 /* Unions up to 32 bytes in size are returned in integer registers. */
6928 else if (TREE_CODE (type) == UNION_TYPE)
6929 {
6930 HOST_WIDE_INT size = int_size_in_bytes (type);
6931 gcc_assert (size <= 32);
6932
6933 return function_arg_union_value (size, mode, 0, regbase);
6934 }
6935
6936 /* Objects that require it are returned in FP registers. */
6937 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6938 ;
6939
6940 /* All other aggregate types are returned in an integer register in a
6941 mode corresponding to the size of the type. */
6942 else if (AGGREGATE_TYPE_P (type))
6943 {
6944 /* All other aggregate types are passed in an integer register
6945 in a mode corresponding to the size of the type. */
6946 HOST_WIDE_INT size = int_size_in_bytes (type);
6947 gcc_assert (size <= 32);
6948
6949 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6950
6951 /* ??? We probably should have made the same ABI change in
6952 3.4.0 as the one we made for unions. The latter was
6953 required by the SCD though, while the former is not
6954 specified, so we favored compatibility and efficiency.
6955
6956 Now we're stuck for aggregates larger than 16 bytes,
6957 because OImode vanished in the meantime. Let's not
6958 try to be unduly clever, and simply follow the ABI
6959 for unions in that case. */
6960 if (mode == BLKmode)
6961 return function_arg_union_value (size, mode, 0, regbase);
6962 else
6963 mclass = MODE_INT;
6964 }
6965
6966 /* We should only have pointer and integer types at this point. This
6967 must match sparc_promote_function_mode. */
6968 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6969 mode = word_mode;
6970 }
6971
6972 /* We should only have pointer and integer types at this point. This must
6973 match sparc_promote_function_mode. */
6974 else if (TARGET_ARCH32
6975 && mclass == MODE_INT
6976 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6977 mode = word_mode;
6978
6979 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
6980 regno = SPARC_FP_ARG_FIRST;
6981 else
6982 regno = regbase;
6983
6984 return gen_rtx_REG (mode, regno);
6985 }
6986
6987 /* Handle TARGET_FUNCTION_VALUE.
6988 On the SPARC, the value is found in the first "output" register, but the
6989 called function leaves it in the first "input" register. */
6990
6991 static rtx
6992 sparc_function_value (const_tree valtype,
6993 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
6994 bool outgoing)
6995 {
6996 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
6997 }
6998
6999 /* Handle TARGET_LIBCALL_VALUE. */
7000
7001 static rtx
7002 sparc_libcall_value (enum machine_mode mode,
7003 const_rtx fun ATTRIBUTE_UNUSED)
7004 {
7005 return sparc_function_value_1 (NULL_TREE, mode, false);
7006 }
7007
7008 /* Handle FUNCTION_VALUE_REGNO_P.
7009 On the SPARC, the first "output" reg is used for integer values, and the
7010 first floating point register is used for floating point values. */
7011
7012 static bool
7013 sparc_function_value_regno_p (const unsigned int regno)
7014 {
7015 return (regno == 8 || regno == 32);
7016 }
7017
7018 /* Do what is necessary for `va_start'. We look at the current function
7019 to determine if stdarg or varargs is used and return the address of
7020 the first unnamed parameter. */
7021
7022 static rtx
7023 sparc_builtin_saveregs (void)
7024 {
7025 int first_reg = crtl->args.info.words;
7026 rtx address;
7027 int regno;
7028
7029 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7030 emit_move_insn (gen_rtx_MEM (word_mode,
7031 gen_rtx_PLUS (Pmode,
7032 frame_pointer_rtx,
7033 GEN_INT (FIRST_PARM_OFFSET (0)
7034 + (UNITS_PER_WORD
7035 * regno)))),
7036 gen_rtx_REG (word_mode,
7037 SPARC_INCOMING_INT_ARG_FIRST + regno));
7038
7039 address = gen_rtx_PLUS (Pmode,
7040 frame_pointer_rtx,
7041 GEN_INT (FIRST_PARM_OFFSET (0)
7042 + UNITS_PER_WORD * first_reg));
7043
7044 return address;
7045 }
7046
7047 /* Implement `va_start' for stdarg. */
7048
7049 static void
7050 sparc_va_start (tree valist, rtx nextarg)
7051 {
7052 nextarg = expand_builtin_saveregs ();
7053 std_expand_builtin_va_start (valist, nextarg);
7054 }
7055
7056 /* Implement `va_arg' for stdarg. */
7057
7058 static tree
7059 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7060 gimple_seq *post_p)
7061 {
7062 HOST_WIDE_INT size, rsize, align;
7063 tree addr, incr;
7064 bool indirect;
7065 tree ptrtype = build_pointer_type (type);
7066
7067 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7068 {
7069 indirect = true;
7070 size = rsize = UNITS_PER_WORD;
7071 align = 0;
7072 }
7073 else
7074 {
7075 indirect = false;
7076 size = int_size_in_bytes (type);
7077 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7078 align = 0;
7079
7080 if (TARGET_ARCH64)
7081 {
7082 /* For SPARC64, objects requiring 16-byte alignment get it. */
7083 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7084 align = 2 * UNITS_PER_WORD;
7085
7086 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7087 are left-justified in their slots. */
7088 if (AGGREGATE_TYPE_P (type))
7089 {
7090 if (size == 0)
7091 size = rsize = UNITS_PER_WORD;
7092 else
7093 size = rsize;
7094 }
7095 }
7096 }
7097
7098 incr = valist;
7099 if (align)
7100 {
7101 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7102 incr = fold_convert (sizetype, incr);
7103 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7104 size_int (-align));
7105 incr = fold_convert (ptr_type_node, incr);
7106 }
7107
7108 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7109 addr = incr;
7110
7111 if (BYTES_BIG_ENDIAN && size < rsize)
7112 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7113
7114 if (indirect)
7115 {
7116 addr = fold_convert (build_pointer_type (ptrtype), addr);
7117 addr = build_va_arg_indirect_ref (addr);
7118 }
7119
7120 /* If the address isn't aligned properly for the type, we need a temporary.
7121 FIXME: This is inefficient, usually we can do this in registers. */
7122 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7123 {
7124 tree tmp = create_tmp_var (type, "va_arg_tmp");
7125 tree dest_addr = build_fold_addr_expr (tmp);
7126 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7127 3, dest_addr, addr, size_int (rsize));
7128 TREE_ADDRESSABLE (tmp) = 1;
7129 gimplify_and_add (copy, pre_p);
7130 addr = dest_addr;
7131 }
7132
7133 else
7134 addr = fold_convert (ptrtype, addr);
7135
7136 incr = fold_build_pointer_plus_hwi (incr, rsize);
7137 gimplify_assign (valist, incr, post_p);
7138
7139 return build_va_arg_indirect_ref (addr);
7140 }
7141 \f
7142 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7143 Specify whether the vector mode is supported by the hardware. */
7144
7145 static bool
7146 sparc_vector_mode_supported_p (enum machine_mode mode)
7147 {
7148 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7149 }
7150 \f
7151 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7152
7153 static enum machine_mode
7154 sparc_preferred_simd_mode (enum machine_mode mode)
7155 {
7156 if (TARGET_VIS)
7157 switch (mode)
7158 {
7159 case SImode:
7160 return V2SImode;
7161 case HImode:
7162 return V4HImode;
7163 case QImode:
7164 return V8QImode;
7165
7166 default:;
7167 }
7168
7169 return word_mode;
7170 }
7171 \f
7172 /* Return the string to output an unconditional branch to LABEL, which is
7173 the operand number of the label.
7174
7175 DEST is the destination insn (i.e. the label), INSN is the source. */
7176
7177 const char *
7178 output_ubranch (rtx dest, rtx insn)
7179 {
7180 static char string[64];
7181 bool v9_form = false;
7182 int delta;
7183 char *p;
7184
7185 /* Even if we are trying to use cbcond for this, evaluate
7186 whether we can use V9 branches as our backup plan. */
7187
7188 delta = 5000000;
7189 if (INSN_ADDRESSES_SET_P ())
7190 delta = (INSN_ADDRESSES (INSN_UID (dest))
7191 - INSN_ADDRESSES (INSN_UID (insn)));
7192
7193 /* Leave some instructions for "slop". */
7194 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7195 v9_form = true;
7196
7197 if (TARGET_CBCOND)
7198 {
7199 bool emit_nop = emit_cbcond_nop (insn);
7200 bool far = false;
7201 const char *rval;
7202
7203 if (delta < -500 || delta > 500)
7204 far = true;
7205
7206 if (far)
7207 {
7208 if (v9_form)
7209 rval = "ba,a,pt\t%%xcc, %l0";
7210 else
7211 rval = "b,a\t%l0";
7212 }
7213 else
7214 {
7215 if (emit_nop)
7216 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7217 else
7218 rval = "cwbe\t%%g0, %%g0, %l0";
7219 }
7220 return rval;
7221 }
7222
7223 if (v9_form)
7224 strcpy (string, "ba%*,pt\t%%xcc, ");
7225 else
7226 strcpy (string, "b%*\t");
7227
7228 p = strchr (string, '\0');
7229 *p++ = '%';
7230 *p++ = 'l';
7231 *p++ = '0';
7232 *p++ = '%';
7233 *p++ = '(';
7234 *p = '\0';
7235
7236 return string;
7237 }
7238
7239 /* Return the string to output a conditional branch to LABEL, which is
7240 the operand number of the label. OP is the conditional expression.
7241 XEXP (OP, 0) is assumed to be a condition code register (integer or
7242 floating point) and its mode specifies what kind of comparison we made.
7243
7244 DEST is the destination insn (i.e. the label), INSN is the source.
7245
7246 REVERSED is nonzero if we should reverse the sense of the comparison.
7247
7248 ANNUL is nonzero if we should generate an annulling branch. */
7249
7250 const char *
7251 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7252 rtx insn)
7253 {
7254 static char string[64];
7255 enum rtx_code code = GET_CODE (op);
7256 rtx cc_reg = XEXP (op, 0);
7257 enum machine_mode mode = GET_MODE (cc_reg);
7258 const char *labelno, *branch;
7259 int spaces = 8, far;
7260 char *p;
7261
7262 /* v9 branches are limited to +-1MB. If it is too far away,
7263 change
7264
7265 bne,pt %xcc, .LC30
7266
7267 to
7268
7269 be,pn %xcc, .+12
7270 nop
7271 ba .LC30
7272
7273 and
7274
7275 fbne,a,pn %fcc2, .LC29
7276
7277 to
7278
7279 fbe,pt %fcc2, .+16
7280 nop
7281 ba .LC29 */
7282
7283 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7284 if (reversed ^ far)
7285 {
7286 /* Reversal of FP compares takes care -- an ordered compare
7287 becomes an unordered compare and vice versa. */
7288 if (mode == CCFPmode || mode == CCFPEmode)
7289 code = reverse_condition_maybe_unordered (code);
7290 else
7291 code = reverse_condition (code);
7292 }
7293
7294 /* Start by writing the branch condition. */
7295 if (mode == CCFPmode || mode == CCFPEmode)
7296 {
7297 switch (code)
7298 {
7299 case NE:
7300 branch = "fbne";
7301 break;
7302 case EQ:
7303 branch = "fbe";
7304 break;
7305 case GE:
7306 branch = "fbge";
7307 break;
7308 case GT:
7309 branch = "fbg";
7310 break;
7311 case LE:
7312 branch = "fble";
7313 break;
7314 case LT:
7315 branch = "fbl";
7316 break;
7317 case UNORDERED:
7318 branch = "fbu";
7319 break;
7320 case ORDERED:
7321 branch = "fbo";
7322 break;
7323 case UNGT:
7324 branch = "fbug";
7325 break;
7326 case UNLT:
7327 branch = "fbul";
7328 break;
7329 case UNEQ:
7330 branch = "fbue";
7331 break;
7332 case UNGE:
7333 branch = "fbuge";
7334 break;
7335 case UNLE:
7336 branch = "fbule";
7337 break;
7338 case LTGT:
7339 branch = "fblg";
7340 break;
7341
7342 default:
7343 gcc_unreachable ();
7344 }
7345
7346 /* ??? !v9: FP branches cannot be preceded by another floating point
7347 insn. Because there is currently no concept of pre-delay slots,
7348 we can fix this only by always emitting a nop before a floating
7349 point branch. */
7350
7351 string[0] = '\0';
7352 if (! TARGET_V9)
7353 strcpy (string, "nop\n\t");
7354 strcat (string, branch);
7355 }
7356 else
7357 {
7358 switch (code)
7359 {
7360 case NE:
7361 branch = "bne";
7362 break;
7363 case EQ:
7364 branch = "be";
7365 break;
7366 case GE:
7367 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7368 branch = "bpos";
7369 else
7370 branch = "bge";
7371 break;
7372 case GT:
7373 branch = "bg";
7374 break;
7375 case LE:
7376 branch = "ble";
7377 break;
7378 case LT:
7379 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7380 branch = "bneg";
7381 else
7382 branch = "bl";
7383 break;
7384 case GEU:
7385 branch = "bgeu";
7386 break;
7387 case GTU:
7388 branch = "bgu";
7389 break;
7390 case LEU:
7391 branch = "bleu";
7392 break;
7393 case LTU:
7394 branch = "blu";
7395 break;
7396
7397 default:
7398 gcc_unreachable ();
7399 }
7400 strcpy (string, branch);
7401 }
7402 spaces -= strlen (branch);
7403 p = strchr (string, '\0');
7404
7405 /* Now add the annulling, the label, and a possible noop. */
7406 if (annul && ! far)
7407 {
7408 strcpy (p, ",a");
7409 p += 2;
7410 spaces -= 2;
7411 }
7412
7413 if (TARGET_V9)
7414 {
7415 rtx note;
7416 int v8 = 0;
7417
7418 if (! far && insn && INSN_ADDRESSES_SET_P ())
7419 {
7420 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7421 - INSN_ADDRESSES (INSN_UID (insn)));
7422 /* Leave some instructions for "slop". */
7423 if (delta < -260000 || delta >= 260000)
7424 v8 = 1;
7425 }
7426
7427 if (mode == CCFPmode || mode == CCFPEmode)
7428 {
7429 static char v9_fcc_labelno[] = "%%fccX, ";
7430 /* Set the char indicating the number of the fcc reg to use. */
7431 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7432 labelno = v9_fcc_labelno;
7433 if (v8)
7434 {
7435 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7436 labelno = "";
7437 }
7438 }
7439 else if (mode == CCXmode || mode == CCX_NOOVmode)
7440 {
7441 labelno = "%%xcc, ";
7442 gcc_assert (! v8);
7443 }
7444 else
7445 {
7446 labelno = "%%icc, ";
7447 if (v8)
7448 labelno = "";
7449 }
7450
7451 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7452 {
7453 strcpy (p,
7454 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7455 ? ",pt" : ",pn");
7456 p += 3;
7457 spaces -= 3;
7458 }
7459 }
7460 else
7461 labelno = "";
7462
7463 if (spaces > 0)
7464 *p++ = '\t';
7465 else
7466 *p++ = ' ';
7467 strcpy (p, labelno);
7468 p = strchr (p, '\0');
7469 if (far)
7470 {
7471 strcpy (p, ".+12\n\t nop\n\tb\t");
7472 /* Skip the next insn if requested or
7473 if we know that it will be a nop. */
7474 if (annul || ! final_sequence)
7475 p[3] = '6';
7476 p += 14;
7477 }
7478 *p++ = '%';
7479 *p++ = 'l';
7480 *p++ = label + '0';
7481 *p++ = '%';
7482 *p++ = '#';
7483 *p = '\0';
7484
7485 return string;
7486 }
7487
7488 /* Emit a library call comparison between floating point X and Y.
7489 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7490 Return the new operator to be used in the comparison sequence.
7491
7492 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7493 values as arguments instead of the TFmode registers themselves,
7494 that's why we cannot call emit_float_lib_cmp. */
7495
7496 rtx
7497 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7498 {
7499 const char *qpfunc;
7500 rtx slot0, slot1, result, tem, tem2, libfunc;
7501 enum machine_mode mode;
7502 enum rtx_code new_comparison;
7503
7504 switch (comparison)
7505 {
7506 case EQ:
7507 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7508 break;
7509
7510 case NE:
7511 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7512 break;
7513
7514 case GT:
7515 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7516 break;
7517
7518 case GE:
7519 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7520 break;
7521
7522 case LT:
7523 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7524 break;
7525
7526 case LE:
7527 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7528 break;
7529
7530 case ORDERED:
7531 case UNORDERED:
7532 case UNGT:
7533 case UNLT:
7534 case UNEQ:
7535 case UNGE:
7536 case UNLE:
7537 case LTGT:
7538 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7539 break;
7540
7541 default:
7542 gcc_unreachable ();
7543 }
7544
7545 if (TARGET_ARCH64)
7546 {
7547 if (MEM_P (x))
7548 {
7549 tree expr = MEM_EXPR (x);
7550 if (expr)
7551 mark_addressable (expr);
7552 slot0 = x;
7553 }
7554 else
7555 {
7556 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7557 emit_move_insn (slot0, x);
7558 }
7559
7560 if (MEM_P (y))
7561 {
7562 tree expr = MEM_EXPR (y);
7563 if (expr)
7564 mark_addressable (expr);
7565 slot1 = y;
7566 }
7567 else
7568 {
7569 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7570 emit_move_insn (slot1, y);
7571 }
7572
7573 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7574 emit_library_call (libfunc, LCT_NORMAL,
7575 DImode, 2,
7576 XEXP (slot0, 0), Pmode,
7577 XEXP (slot1, 0), Pmode);
7578 mode = DImode;
7579 }
7580 else
7581 {
7582 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7583 emit_library_call (libfunc, LCT_NORMAL,
7584 SImode, 2,
7585 x, TFmode, y, TFmode);
7586 mode = SImode;
7587 }
7588
7589
7590 /* Immediately move the result of the libcall into a pseudo
7591 register so reload doesn't clobber the value if it needs
7592 the return register for a spill reg. */
7593 result = gen_reg_rtx (mode);
7594 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7595
7596 switch (comparison)
7597 {
7598 default:
7599 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7600 case ORDERED:
7601 case UNORDERED:
7602 new_comparison = (comparison == UNORDERED ? EQ : NE);
7603 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7604 case UNGT:
7605 case UNGE:
7606 new_comparison = (comparison == UNGT ? GT : NE);
7607 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7608 case UNLE:
7609 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7610 case UNLT:
7611 tem = gen_reg_rtx (mode);
7612 if (TARGET_ARCH32)
7613 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7614 else
7615 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7616 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7617 case UNEQ:
7618 case LTGT:
7619 tem = gen_reg_rtx (mode);
7620 if (TARGET_ARCH32)
7621 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7622 else
7623 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7624 tem2 = gen_reg_rtx (mode);
7625 if (TARGET_ARCH32)
7626 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7627 else
7628 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7629 new_comparison = (comparison == UNEQ ? EQ : NE);
7630 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7631 }
7632
7633 gcc_unreachable ();
7634 }
7635
7636 /* Generate an unsigned DImode to FP conversion. This is the same code
7637 optabs would emit if we didn't have TFmode patterns. */
7638
7639 void
7640 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
7641 {
7642 rtx neglab, donelab, i0, i1, f0, in, out;
7643
7644 out = operands[0];
7645 in = force_reg (DImode, operands[1]);
7646 neglab = gen_label_rtx ();
7647 donelab = gen_label_rtx ();
7648 i0 = gen_reg_rtx (DImode);
7649 i1 = gen_reg_rtx (DImode);
7650 f0 = gen_reg_rtx (mode);
7651
7652 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
7653
7654 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
7655 emit_jump_insn (gen_jump (donelab));
7656 emit_barrier ();
7657
7658 emit_label (neglab);
7659
7660 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
7661 emit_insn (gen_anddi3 (i1, in, const1_rtx));
7662 emit_insn (gen_iordi3 (i0, i0, i1));
7663 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
7664 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
7665
7666 emit_label (donelab);
7667 }
7668
7669 /* Generate an FP to unsigned DImode conversion. This is the same code
7670 optabs would emit if we didn't have TFmode patterns. */
7671
7672 void
7673 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
7674 {
7675 rtx neglab, donelab, i0, i1, f0, in, out, limit;
7676
7677 out = operands[0];
7678 in = force_reg (mode, operands[1]);
7679 neglab = gen_label_rtx ();
7680 donelab = gen_label_rtx ();
7681 i0 = gen_reg_rtx (DImode);
7682 i1 = gen_reg_rtx (DImode);
7683 limit = gen_reg_rtx (mode);
7684 f0 = gen_reg_rtx (mode);
7685
7686 emit_move_insn (limit,
7687 CONST_DOUBLE_FROM_REAL_VALUE (
7688 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
7689 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
7690
7691 emit_insn (gen_rtx_SET (VOIDmode,
7692 out,
7693 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
7694 emit_jump_insn (gen_jump (donelab));
7695 emit_barrier ();
7696
7697 emit_label (neglab);
7698
7699 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
7700 emit_insn (gen_rtx_SET (VOIDmode,
7701 i0,
7702 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
7703 emit_insn (gen_movdi (i1, const1_rtx));
7704 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
7705 emit_insn (gen_xordi3 (out, i0, i1));
7706
7707 emit_label (donelab);
7708 }
7709
7710 /* Return the string to output a compare and branch instruction to DEST.
7711 DEST is the destination insn (i.e. the label), INSN is the source,
7712 and OP is the conditional expression. */
7713
7714 const char *
7715 output_cbcond (rtx op, rtx dest, rtx insn)
7716 {
7717 enum machine_mode mode = GET_MODE (XEXP (op, 0));
7718 enum rtx_code code = GET_CODE (op);
7719 const char *cond_str, *tmpl;
7720 int far, emit_nop, len;
7721 static char string[64];
7722 char size_char;
7723
7724 /* Compare and Branch is limited to +-2KB. If it is too far away,
7725 change
7726
7727 cxbne X, Y, .LC30
7728
7729 to
7730
7731 cxbe X, Y, .+16
7732 nop
7733 ba,pt xcc, .LC30
7734 nop */
7735
7736 len = get_attr_length (insn);
7737
7738 far = len == 4;
7739 emit_nop = len == 2;
7740
7741 if (far)
7742 code = reverse_condition (code);
7743
7744 size_char = ((mode == SImode) ? 'w' : 'x');
7745
7746 switch (code)
7747 {
7748 case NE:
7749 cond_str = "ne";
7750 break;
7751
7752 case EQ:
7753 cond_str = "e";
7754 break;
7755
7756 case GE:
7757 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7758 cond_str = "pos";
7759 else
7760 cond_str = "ge";
7761 break;
7762
7763 case GT:
7764 cond_str = "g";
7765 break;
7766
7767 case LE:
7768 cond_str = "le";
7769 break;
7770
7771 case LT:
7772 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7773 cond_str = "neg";
7774 else
7775 cond_str = "l";
7776 break;
7777
7778 case GEU:
7779 cond_str = "cc";
7780 break;
7781
7782 case GTU:
7783 cond_str = "gu";
7784 break;
7785
7786 case LEU:
7787 cond_str = "leu";
7788 break;
7789
7790 case LTU:
7791 cond_str = "cs";
7792 break;
7793
7794 default:
7795 gcc_unreachable ();
7796 }
7797
7798 if (far)
7799 {
7800 int veryfar = 1, delta;
7801
7802 if (INSN_ADDRESSES_SET_P ())
7803 {
7804 delta = (INSN_ADDRESSES (INSN_UID (dest))
7805 - INSN_ADDRESSES (INSN_UID (insn)));
7806 /* Leave some instructions for "slop". */
7807 if (delta >= -260000 && delta < 260000)
7808 veryfar = 0;
7809 }
7810
7811 if (veryfar)
7812 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
7813 else
7814 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
7815 }
7816 else
7817 {
7818 if (emit_nop)
7819 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
7820 else
7821 tmpl = "c%cb%s\t%%1, %%2, %%3";
7822 }
7823
7824 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
7825
7826 return string;
7827 }
7828
7829 /* Return the string to output a conditional branch to LABEL, testing
7830 register REG. LABEL is the operand number of the label; REG is the
7831 operand number of the reg. OP is the conditional expression. The mode
7832 of REG says what kind of comparison we made.
7833
7834 DEST is the destination insn (i.e. the label), INSN is the source.
7835
7836 REVERSED is nonzero if we should reverse the sense of the comparison.
7837
7838 ANNUL is nonzero if we should generate an annulling branch. */
7839
7840 const char *
7841 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
7842 int annul, rtx insn)
7843 {
7844 static char string[64];
7845 enum rtx_code code = GET_CODE (op);
7846 enum machine_mode mode = GET_MODE (XEXP (op, 0));
7847 rtx note;
7848 int far;
7849 char *p;
7850
7851 /* branch on register are limited to +-128KB. If it is too far away,
7852 change
7853
7854 brnz,pt %g1, .LC30
7855
7856 to
7857
7858 brz,pn %g1, .+12
7859 nop
7860 ba,pt %xcc, .LC30
7861
7862 and
7863
7864 brgez,a,pn %o1, .LC29
7865
7866 to
7867
7868 brlz,pt %o1, .+16
7869 nop
7870 ba,pt %xcc, .LC29 */
7871
7872 far = get_attr_length (insn) >= 3;
7873
7874 /* If not floating-point or if EQ or NE, we can just reverse the code. */
7875 if (reversed ^ far)
7876 code = reverse_condition (code);
7877
7878 /* Only 64 bit versions of these instructions exist. */
7879 gcc_assert (mode == DImode);
7880
7881 /* Start by writing the branch condition. */
7882
7883 switch (code)
7884 {
7885 case NE:
7886 strcpy (string, "brnz");
7887 break;
7888
7889 case EQ:
7890 strcpy (string, "brz");
7891 break;
7892
7893 case GE:
7894 strcpy (string, "brgez");
7895 break;
7896
7897 case LT:
7898 strcpy (string, "brlz");
7899 break;
7900
7901 case LE:
7902 strcpy (string, "brlez");
7903 break;
7904
7905 case GT:
7906 strcpy (string, "brgz");
7907 break;
7908
7909 default:
7910 gcc_unreachable ();
7911 }
7912
7913 p = strchr (string, '\0');
7914
7915 /* Now add the annulling, reg, label, and nop. */
7916 if (annul && ! far)
7917 {
7918 strcpy (p, ",a");
7919 p += 2;
7920 }
7921
7922 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7923 {
7924 strcpy (p,
7925 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7926 ? ",pt" : ",pn");
7927 p += 3;
7928 }
7929
7930 *p = p < string + 8 ? '\t' : ' ';
7931 p++;
7932 *p++ = '%';
7933 *p++ = '0' + reg;
7934 *p++ = ',';
7935 *p++ = ' ';
7936 if (far)
7937 {
7938 int veryfar = 1, delta;
7939
7940 if (INSN_ADDRESSES_SET_P ())
7941 {
7942 delta = (INSN_ADDRESSES (INSN_UID (dest))
7943 - INSN_ADDRESSES (INSN_UID (insn)));
7944 /* Leave some instructions for "slop". */
7945 if (delta >= -260000 && delta < 260000)
7946 veryfar = 0;
7947 }
7948
7949 strcpy (p, ".+12\n\t nop\n\t");
7950 /* Skip the next insn if requested or
7951 if we know that it will be a nop. */
7952 if (annul || ! final_sequence)
7953 p[3] = '6';
7954 p += 12;
7955 if (veryfar)
7956 {
7957 strcpy (p, "b\t");
7958 p += 2;
7959 }
7960 else
7961 {
7962 strcpy (p, "ba,pt\t%%xcc, ");
7963 p += 13;
7964 }
7965 }
7966 *p++ = '%';
7967 *p++ = 'l';
7968 *p++ = '0' + label;
7969 *p++ = '%';
7970 *p++ = '#';
7971 *p = '\0';
7972
7973 return string;
7974 }
7975
7976 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
7977 Such instructions cannot be used in the delay slot of return insn on v9.
7978 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
7979 */
7980
7981 static int
7982 epilogue_renumber (register rtx *where, int test)
7983 {
7984 register const char *fmt;
7985 register int i;
7986 register enum rtx_code code;
7987
7988 if (*where == 0)
7989 return 0;
7990
7991 code = GET_CODE (*where);
7992
7993 switch (code)
7994 {
7995 case REG:
7996 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
7997 return 1;
7998 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
7999 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8000 case SCRATCH:
8001 case CC0:
8002 case PC:
8003 case CONST_INT:
8004 case CONST_DOUBLE:
8005 return 0;
8006
8007 /* Do not replace the frame pointer with the stack pointer because
8008 it can cause the delayed instruction to load below the stack.
8009 This occurs when instructions like:
8010
8011 (set (reg/i:SI 24 %i0)
8012 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8013 (const_int -20 [0xffffffec])) 0))
8014
8015 are in the return delayed slot. */
8016 case PLUS:
8017 if (GET_CODE (XEXP (*where, 0)) == REG
8018 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8019 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8020 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8021 return 1;
8022 break;
8023
8024 case MEM:
8025 if (SPARC_STACK_BIAS
8026 && GET_CODE (XEXP (*where, 0)) == REG
8027 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8028 return 1;
8029 break;
8030
8031 default:
8032 break;
8033 }
8034
8035 fmt = GET_RTX_FORMAT (code);
8036
8037 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8038 {
8039 if (fmt[i] == 'E')
8040 {
8041 register int j;
8042 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8043 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8044 return 1;
8045 }
8046 else if (fmt[i] == 'e'
8047 && epilogue_renumber (&(XEXP (*where, i)), test))
8048 return 1;
8049 }
8050 return 0;
8051 }
8052 \f
8053 /* Leaf functions and non-leaf functions have different needs. */
8054
8055 static const int
8056 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8057
8058 static const int
8059 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8060
8061 static const int *const reg_alloc_orders[] = {
8062 reg_leaf_alloc_order,
8063 reg_nonleaf_alloc_order};
8064
8065 void
8066 order_regs_for_local_alloc (void)
8067 {
8068 static int last_order_nonleaf = 1;
8069
8070 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8071 {
8072 last_order_nonleaf = !last_order_nonleaf;
8073 memcpy ((char *) reg_alloc_order,
8074 (const char *) reg_alloc_orders[last_order_nonleaf],
8075 FIRST_PSEUDO_REGISTER * sizeof (int));
8076 }
8077 }
8078 \f
8079 /* Return 1 if REG and MEM are legitimate enough to allow the various
8080 mem<-->reg splits to be run. */
8081
8082 int
8083 sparc_splitdi_legitimate (rtx reg, rtx mem)
8084 {
8085 /* Punt if we are here by mistake. */
8086 gcc_assert (reload_completed);
8087
8088 /* We must have an offsettable memory reference. */
8089 if (! offsettable_memref_p (mem))
8090 return 0;
8091
8092 /* If we have legitimate args for ldd/std, we do not want
8093 the split to happen. */
8094 if ((REGNO (reg) % 2) == 0
8095 && mem_min_alignment (mem, 8))
8096 return 0;
8097
8098 /* Success. */
8099 return 1;
8100 }
8101
8102 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8103
8104 int
8105 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8106 {
8107 int regno1, regno2;
8108
8109 if (GET_CODE (reg1) == SUBREG)
8110 reg1 = SUBREG_REG (reg1);
8111 if (GET_CODE (reg1) != REG)
8112 return 0;
8113 regno1 = REGNO (reg1);
8114
8115 if (GET_CODE (reg2) == SUBREG)
8116 reg2 = SUBREG_REG (reg2);
8117 if (GET_CODE (reg2) != REG)
8118 return 0;
8119 regno2 = REGNO (reg2);
8120
8121 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8122 return 1;
8123
8124 if (TARGET_VIS3)
8125 {
8126 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8127 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8128 return 1;
8129 }
8130
8131 return 0;
8132 }
8133
8134 /* Return 1 if x and y are some kind of REG and they refer to
8135 different hard registers. This test is guaranteed to be
8136 run after reload. */
8137
8138 int
8139 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
8140 {
8141 if (GET_CODE (x) != REG)
8142 return 0;
8143 if (GET_CODE (y) != REG)
8144 return 0;
8145 if (REGNO (x) == REGNO (y))
8146 return 0;
8147 return 1;
8148 }
8149
8150 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8151 This makes them candidates for using ldd and std insns.
8152
8153 Note reg1 and reg2 *must* be hard registers. */
8154
8155 int
8156 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8157 {
8158 /* We might have been passed a SUBREG. */
8159 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8160 return 0;
8161
8162 if (REGNO (reg1) % 2 != 0)
8163 return 0;
8164
8165 /* Integer ldd is deprecated in SPARC V9 */
8166 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8167 return 0;
8168
8169 return (REGNO (reg1) == REGNO (reg2) - 1);
8170 }
8171
8172 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8173 an ldd or std insn.
8174
8175 This can only happen when addr1 and addr2, the addresses in mem1
8176 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8177 addr1 must also be aligned on a 64-bit boundary.
8178
8179 Also iff dependent_reg_rtx is not null it should not be used to
8180 compute the address for mem1, i.e. we cannot optimize a sequence
8181 like:
8182 ld [%o0], %o0
8183 ld [%o0 + 4], %o1
8184 to
8185 ldd [%o0], %o0
8186 nor:
8187 ld [%g3 + 4], %g3
8188 ld [%g3], %g2
8189 to
8190 ldd [%g3], %g2
8191
8192 But, note that the transformation from:
8193 ld [%g2 + 4], %g3
8194 ld [%g2], %g2
8195 to
8196 ldd [%g2], %g2
8197 is perfectly fine. Thus, the peephole2 patterns always pass us
8198 the destination register of the first load, never the second one.
8199
8200 For stores we don't have a similar problem, so dependent_reg_rtx is
8201 NULL_RTX. */
8202
8203 int
8204 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8205 {
8206 rtx addr1, addr2;
8207 unsigned int reg1;
8208 HOST_WIDE_INT offset1;
8209
8210 /* The mems cannot be volatile. */
8211 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8212 return 0;
8213
8214 /* MEM1 should be aligned on a 64-bit boundary. */
8215 if (MEM_ALIGN (mem1) < 64)
8216 return 0;
8217
8218 addr1 = XEXP (mem1, 0);
8219 addr2 = XEXP (mem2, 0);
8220
8221 /* Extract a register number and offset (if used) from the first addr. */
8222 if (GET_CODE (addr1) == PLUS)
8223 {
8224 /* If not a REG, return zero. */
8225 if (GET_CODE (XEXP (addr1, 0)) != REG)
8226 return 0;
8227 else
8228 {
8229 reg1 = REGNO (XEXP (addr1, 0));
8230 /* The offset must be constant! */
8231 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8232 return 0;
8233 offset1 = INTVAL (XEXP (addr1, 1));
8234 }
8235 }
8236 else if (GET_CODE (addr1) != REG)
8237 return 0;
8238 else
8239 {
8240 reg1 = REGNO (addr1);
8241 /* This was a simple (mem (reg)) expression. Offset is 0. */
8242 offset1 = 0;
8243 }
8244
8245 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8246 if (GET_CODE (addr2) != PLUS)
8247 return 0;
8248
8249 if (GET_CODE (XEXP (addr2, 0)) != REG
8250 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8251 return 0;
8252
8253 if (reg1 != REGNO (XEXP (addr2, 0)))
8254 return 0;
8255
8256 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8257 return 0;
8258
8259 /* The first offset must be evenly divisible by 8 to ensure the
8260 address is 64 bit aligned. */
8261 if (offset1 % 8 != 0)
8262 return 0;
8263
8264 /* The offset for the second addr must be 4 more than the first addr. */
8265 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8266 return 0;
8267
8268 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8269 instructions. */
8270 return 1;
8271 }
8272
8273 /* Return 1 if reg is a pseudo, or is the first register in
8274 a hard register pair. This makes it suitable for use in
8275 ldd and std insns. */
8276
8277 int
8278 register_ok_for_ldd (rtx reg)
8279 {
8280 /* We might have been passed a SUBREG. */
8281 if (!REG_P (reg))
8282 return 0;
8283
8284 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8285 return (REGNO (reg) % 2 == 0);
8286
8287 return 1;
8288 }
8289
8290 /* Return 1 if OP, a MEM, has an address which is known to be
8291 aligned to an 8-byte boundary. */
8292
8293 int
8294 memory_ok_for_ldd (rtx op)
8295 {
8296 /* In 64-bit mode, we assume that the address is word-aligned. */
8297 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8298 return 0;
8299
8300 if (! can_create_pseudo_p ()
8301 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8302 return 0;
8303
8304 return 1;
8305 }
8306 \f
8307 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8308
8309 static bool
8310 sparc_print_operand_punct_valid_p (unsigned char code)
8311 {
8312 if (code == '#'
8313 || code == '*'
8314 || code == '('
8315 || code == ')'
8316 || code == '_'
8317 || code == '&')
8318 return true;
8319
8320 return false;
8321 }
8322
8323 /* Implement TARGET_PRINT_OPERAND.
8324 Print operand X (an rtx) in assembler syntax to file FILE.
8325 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8326 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8327
8328 static void
8329 sparc_print_operand (FILE *file, rtx x, int code)
8330 {
8331 switch (code)
8332 {
8333 case '#':
8334 /* Output an insn in a delay slot. */
8335 if (final_sequence)
8336 sparc_indent_opcode = 1;
8337 else
8338 fputs ("\n\t nop", file);
8339 return;
8340 case '*':
8341 /* Output an annul flag if there's nothing for the delay slot and we
8342 are optimizing. This is always used with '(' below.
8343 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8344 this is a dbx bug. So, we only do this when optimizing.
8345 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8346 Always emit a nop in case the next instruction is a branch. */
8347 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8348 fputs (",a", file);
8349 return;
8350 case '(':
8351 /* Output a 'nop' if there's nothing for the delay slot and we are
8352 not optimizing. This is always used with '*' above. */
8353 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8354 fputs ("\n\t nop", file);
8355 else if (final_sequence)
8356 sparc_indent_opcode = 1;
8357 return;
8358 case ')':
8359 /* Output the right displacement from the saved PC on function return.
8360 The caller may have placed an "unimp" insn immediately after the call
8361 so we have to account for it. This insn is used in the 32-bit ABI
8362 when calling a function that returns a non zero-sized structure. The
8363 64-bit ABI doesn't have it. Be careful to have this test be the same
8364 as that for the call. The exception is when sparc_std_struct_return
8365 is enabled, the psABI is followed exactly and the adjustment is made
8366 by the code in sparc_struct_value_rtx. The call emitted is the same
8367 when sparc_std_struct_return is enabled. */
8368 if (!TARGET_ARCH64
8369 && cfun->returns_struct
8370 && !sparc_std_struct_return
8371 && DECL_SIZE (DECL_RESULT (current_function_decl))
8372 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8373 == INTEGER_CST
8374 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8375 fputs ("12", file);
8376 else
8377 fputc ('8', file);
8378 return;
8379 case '_':
8380 /* Output the Embedded Medium/Anywhere code model base register. */
8381 fputs (EMBMEDANY_BASE_REG, file);
8382 return;
8383 case '&':
8384 /* Print some local dynamic TLS name. */
8385 assemble_name (file, get_some_local_dynamic_name ());
8386 return;
8387
8388 case 'Y':
8389 /* Adjust the operand to take into account a RESTORE operation. */
8390 if (GET_CODE (x) == CONST_INT)
8391 break;
8392 else if (GET_CODE (x) != REG)
8393 output_operand_lossage ("invalid %%Y operand");
8394 else if (REGNO (x) < 8)
8395 fputs (reg_names[REGNO (x)], file);
8396 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8397 fputs (reg_names[REGNO (x)-16], file);
8398 else
8399 output_operand_lossage ("invalid %%Y operand");
8400 return;
8401 case 'L':
8402 /* Print out the low order register name of a register pair. */
8403 if (WORDS_BIG_ENDIAN)
8404 fputs (reg_names[REGNO (x)+1], file);
8405 else
8406 fputs (reg_names[REGNO (x)], file);
8407 return;
8408 case 'H':
8409 /* Print out the high order register name of a register pair. */
8410 if (WORDS_BIG_ENDIAN)
8411 fputs (reg_names[REGNO (x)], file);
8412 else
8413 fputs (reg_names[REGNO (x)+1], file);
8414 return;
8415 case 'R':
8416 /* Print out the second register name of a register pair or quad.
8417 I.e., R (%o0) => %o1. */
8418 fputs (reg_names[REGNO (x)+1], file);
8419 return;
8420 case 'S':
8421 /* Print out the third register name of a register quad.
8422 I.e., S (%o0) => %o2. */
8423 fputs (reg_names[REGNO (x)+2], file);
8424 return;
8425 case 'T':
8426 /* Print out the fourth register name of a register quad.
8427 I.e., T (%o0) => %o3. */
8428 fputs (reg_names[REGNO (x)+3], file);
8429 return;
8430 case 'x':
8431 /* Print a condition code register. */
8432 if (REGNO (x) == SPARC_ICC_REG)
8433 {
8434 /* We don't handle CC[X]_NOOVmode because they're not supposed
8435 to occur here. */
8436 if (GET_MODE (x) == CCmode)
8437 fputs ("%icc", file);
8438 else if (GET_MODE (x) == CCXmode)
8439 fputs ("%xcc", file);
8440 else
8441 gcc_unreachable ();
8442 }
8443 else
8444 /* %fccN register */
8445 fputs (reg_names[REGNO (x)], file);
8446 return;
8447 case 'm':
8448 /* Print the operand's address only. */
8449 output_address (XEXP (x, 0));
8450 return;
8451 case 'r':
8452 /* In this case we need a register. Use %g0 if the
8453 operand is const0_rtx. */
8454 if (x == const0_rtx
8455 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8456 {
8457 fputs ("%g0", file);
8458 return;
8459 }
8460 else
8461 break;
8462
8463 case 'A':
8464 switch (GET_CODE (x))
8465 {
8466 case IOR: fputs ("or", file); break;
8467 case AND: fputs ("and", file); break;
8468 case XOR: fputs ("xor", file); break;
8469 default: output_operand_lossage ("invalid %%A operand");
8470 }
8471 return;
8472
8473 case 'B':
8474 switch (GET_CODE (x))
8475 {
8476 case IOR: fputs ("orn", file); break;
8477 case AND: fputs ("andn", file); break;
8478 case XOR: fputs ("xnor", file); break;
8479 default: output_operand_lossage ("invalid %%B operand");
8480 }
8481 return;
8482
8483 /* This is used by the conditional move instructions. */
8484 case 'C':
8485 {
8486 enum rtx_code rc = GET_CODE (x);
8487
8488 switch (rc)
8489 {
8490 case NE: fputs ("ne", file); break;
8491 case EQ: fputs ("e", file); break;
8492 case GE: fputs ("ge", file); break;
8493 case GT: fputs ("g", file); break;
8494 case LE: fputs ("le", file); break;
8495 case LT: fputs ("l", file); break;
8496 case GEU: fputs ("geu", file); break;
8497 case GTU: fputs ("gu", file); break;
8498 case LEU: fputs ("leu", file); break;
8499 case LTU: fputs ("lu", file); break;
8500 case LTGT: fputs ("lg", file); break;
8501 case UNORDERED: fputs ("u", file); break;
8502 case ORDERED: fputs ("o", file); break;
8503 case UNLT: fputs ("ul", file); break;
8504 case UNLE: fputs ("ule", file); break;
8505 case UNGT: fputs ("ug", file); break;
8506 case UNGE: fputs ("uge", file); break;
8507 case UNEQ: fputs ("ue", file); break;
8508 default: output_operand_lossage ("invalid %%C operand");
8509 }
8510 return;
8511 }
8512
8513 /* This are used by the movr instruction pattern. */
8514 case 'D':
8515 {
8516 enum rtx_code rc = GET_CODE (x);
8517 switch (rc)
8518 {
8519 case NE: fputs ("ne", file); break;
8520 case EQ: fputs ("e", file); break;
8521 case GE: fputs ("gez", file); break;
8522 case LT: fputs ("lz", file); break;
8523 case LE: fputs ("lez", file); break;
8524 case GT: fputs ("gz", file); break;
8525 default: output_operand_lossage ("invalid %%D operand");
8526 }
8527 return;
8528 }
8529
8530 case 'b':
8531 {
8532 /* Print a sign-extended character. */
8533 int i = trunc_int_for_mode (INTVAL (x), QImode);
8534 fprintf (file, "%d", i);
8535 return;
8536 }
8537
8538 case 'f':
8539 /* Operand must be a MEM; write its address. */
8540 if (GET_CODE (x) != MEM)
8541 output_operand_lossage ("invalid %%f operand");
8542 output_address (XEXP (x, 0));
8543 return;
8544
8545 case 's':
8546 {
8547 /* Print a sign-extended 32-bit value. */
8548 HOST_WIDE_INT i;
8549 if (GET_CODE(x) == CONST_INT)
8550 i = INTVAL (x);
8551 else if (GET_CODE(x) == CONST_DOUBLE)
8552 i = CONST_DOUBLE_LOW (x);
8553 else
8554 {
8555 output_operand_lossage ("invalid %%s operand");
8556 return;
8557 }
8558 i = trunc_int_for_mode (i, SImode);
8559 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8560 return;
8561 }
8562
8563 case 0:
8564 /* Do nothing special. */
8565 break;
8566
8567 default:
8568 /* Undocumented flag. */
8569 output_operand_lossage ("invalid operand output code");
8570 }
8571
8572 if (GET_CODE (x) == REG)
8573 fputs (reg_names[REGNO (x)], file);
8574 else if (GET_CODE (x) == MEM)
8575 {
8576 fputc ('[', file);
8577 /* Poor Sun assembler doesn't understand absolute addressing. */
8578 if (CONSTANT_P (XEXP (x, 0)))
8579 fputs ("%g0+", file);
8580 output_address (XEXP (x, 0));
8581 fputc (']', file);
8582 }
8583 else if (GET_CODE (x) == HIGH)
8584 {
8585 fputs ("%hi(", file);
8586 output_addr_const (file, XEXP (x, 0));
8587 fputc (')', file);
8588 }
8589 else if (GET_CODE (x) == LO_SUM)
8590 {
8591 sparc_print_operand (file, XEXP (x, 0), 0);
8592 if (TARGET_CM_MEDMID)
8593 fputs ("+%l44(", file);
8594 else
8595 fputs ("+%lo(", file);
8596 output_addr_const (file, XEXP (x, 1));
8597 fputc (')', file);
8598 }
8599 else if (GET_CODE (x) == CONST_DOUBLE
8600 && (GET_MODE (x) == VOIDmode
8601 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
8602 {
8603 if (CONST_DOUBLE_HIGH (x) == 0)
8604 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
8605 else if (CONST_DOUBLE_HIGH (x) == -1
8606 && CONST_DOUBLE_LOW (x) < 0)
8607 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
8608 else
8609 output_operand_lossage ("long long constant not a valid immediate operand");
8610 }
8611 else if (GET_CODE (x) == CONST_DOUBLE)
8612 output_operand_lossage ("floating point constant not a valid immediate operand");
8613 else { output_addr_const (file, x); }
8614 }
8615
8616 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8617
8618 static void
8619 sparc_print_operand_address (FILE *file, rtx x)
8620 {
8621 register rtx base, index = 0;
8622 int offset = 0;
8623 register rtx addr = x;
8624
8625 if (REG_P (addr))
8626 fputs (reg_names[REGNO (addr)], file);
8627 else if (GET_CODE (addr) == PLUS)
8628 {
8629 if (CONST_INT_P (XEXP (addr, 0)))
8630 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8631 else if (CONST_INT_P (XEXP (addr, 1)))
8632 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8633 else
8634 base = XEXP (addr, 0), index = XEXP (addr, 1);
8635 if (GET_CODE (base) == LO_SUM)
8636 {
8637 gcc_assert (USE_AS_OFFSETABLE_LO10
8638 && TARGET_ARCH64
8639 && ! TARGET_CM_MEDMID);
8640 output_operand (XEXP (base, 0), 0);
8641 fputs ("+%lo(", file);
8642 output_address (XEXP (base, 1));
8643 fprintf (file, ")+%d", offset);
8644 }
8645 else
8646 {
8647 fputs (reg_names[REGNO (base)], file);
8648 if (index == 0)
8649 fprintf (file, "%+d", offset);
8650 else if (REG_P (index))
8651 fprintf (file, "+%s", reg_names[REGNO (index)]);
8652 else if (GET_CODE (index) == SYMBOL_REF
8653 || GET_CODE (index) == LABEL_REF
8654 || GET_CODE (index) == CONST)
8655 fputc ('+', file), output_addr_const (file, index);
8656 else gcc_unreachable ();
8657 }
8658 }
8659 else if (GET_CODE (addr) == MINUS
8660 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
8661 {
8662 output_addr_const (file, XEXP (addr, 0));
8663 fputs ("-(", file);
8664 output_addr_const (file, XEXP (addr, 1));
8665 fputs ("-.)", file);
8666 }
8667 else if (GET_CODE (addr) == LO_SUM)
8668 {
8669 output_operand (XEXP (addr, 0), 0);
8670 if (TARGET_CM_MEDMID)
8671 fputs ("+%l44(", file);
8672 else
8673 fputs ("+%lo(", file);
8674 output_address (XEXP (addr, 1));
8675 fputc (')', file);
8676 }
8677 else if (flag_pic
8678 && GET_CODE (addr) == CONST
8679 && GET_CODE (XEXP (addr, 0)) == MINUS
8680 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
8681 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
8682 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
8683 {
8684 addr = XEXP (addr, 0);
8685 output_addr_const (file, XEXP (addr, 0));
8686 /* Group the args of the second CONST in parenthesis. */
8687 fputs ("-(", file);
8688 /* Skip past the second CONST--it does nothing for us. */
8689 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
8690 /* Close the parenthesis. */
8691 fputc (')', file);
8692 }
8693 else
8694 {
8695 output_addr_const (file, addr);
8696 }
8697 }
8698 \f
8699 /* Target hook for assembling integer objects. The sparc version has
8700 special handling for aligned DI-mode objects. */
8701
8702 static bool
8703 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
8704 {
8705 /* ??? We only output .xword's for symbols and only then in environments
8706 where the assembler can handle them. */
8707 if (aligned_p && size == 8
8708 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
8709 {
8710 if (TARGET_V9)
8711 {
8712 assemble_integer_with_op ("\t.xword\t", x);
8713 return true;
8714 }
8715 else
8716 {
8717 assemble_aligned_integer (4, const0_rtx);
8718 assemble_aligned_integer (4, x);
8719 return true;
8720 }
8721 }
8722 return default_assemble_integer (x, size, aligned_p);
8723 }
8724 \f
8725 /* Return the value of a code used in the .proc pseudo-op that says
8726 what kind of result this function returns. For non-C types, we pick
8727 the closest C type. */
8728
8729 #ifndef SHORT_TYPE_SIZE
8730 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
8731 #endif
8732
8733 #ifndef INT_TYPE_SIZE
8734 #define INT_TYPE_SIZE BITS_PER_WORD
8735 #endif
8736
8737 #ifndef LONG_TYPE_SIZE
8738 #define LONG_TYPE_SIZE BITS_PER_WORD
8739 #endif
8740
8741 #ifndef LONG_LONG_TYPE_SIZE
8742 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
8743 #endif
8744
8745 #ifndef FLOAT_TYPE_SIZE
8746 #define FLOAT_TYPE_SIZE BITS_PER_WORD
8747 #endif
8748
8749 #ifndef DOUBLE_TYPE_SIZE
8750 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
8751 #endif
8752
8753 #ifndef LONG_DOUBLE_TYPE_SIZE
8754 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
8755 #endif
8756
8757 unsigned long
8758 sparc_type_code (register tree type)
8759 {
8760 register unsigned long qualifiers = 0;
8761 register unsigned shift;
8762
8763 /* Only the first 30 bits of the qualifier are valid. We must refrain from
8764 setting more, since some assemblers will give an error for this. Also,
8765 we must be careful to avoid shifts of 32 bits or more to avoid getting
8766 unpredictable results. */
8767
8768 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
8769 {
8770 switch (TREE_CODE (type))
8771 {
8772 case ERROR_MARK:
8773 return qualifiers;
8774
8775 case ARRAY_TYPE:
8776 qualifiers |= (3 << shift);
8777 break;
8778
8779 case FUNCTION_TYPE:
8780 case METHOD_TYPE:
8781 qualifiers |= (2 << shift);
8782 break;
8783
8784 case POINTER_TYPE:
8785 case REFERENCE_TYPE:
8786 case OFFSET_TYPE:
8787 qualifiers |= (1 << shift);
8788 break;
8789
8790 case RECORD_TYPE:
8791 return (qualifiers | 8);
8792
8793 case UNION_TYPE:
8794 case QUAL_UNION_TYPE:
8795 return (qualifiers | 9);
8796
8797 case ENUMERAL_TYPE:
8798 return (qualifiers | 10);
8799
8800 case VOID_TYPE:
8801 return (qualifiers | 16);
8802
8803 case INTEGER_TYPE:
8804 /* If this is a range type, consider it to be the underlying
8805 type. */
8806 if (TREE_TYPE (type) != 0)
8807 break;
8808
8809 /* Carefully distinguish all the standard types of C,
8810 without messing up if the language is not C. We do this by
8811 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
8812 look at both the names and the above fields, but that's redundant.
8813 Any type whose size is between two C types will be considered
8814 to be the wider of the two types. Also, we do not have a
8815 special code to use for "long long", so anything wider than
8816 long is treated the same. Note that we can't distinguish
8817 between "int" and "long" in this code if they are the same
8818 size, but that's fine, since neither can the assembler. */
8819
8820 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
8821 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
8822
8823 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
8824 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
8825
8826 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
8827 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
8828
8829 else
8830 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
8831
8832 case REAL_TYPE:
8833 /* If this is a range type, consider it to be the underlying
8834 type. */
8835 if (TREE_TYPE (type) != 0)
8836 break;
8837
8838 /* Carefully distinguish all the standard types of C,
8839 without messing up if the language is not C. */
8840
8841 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
8842 return (qualifiers | 6);
8843
8844 else
8845 return (qualifiers | 7);
8846
8847 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
8848 /* ??? We need to distinguish between double and float complex types,
8849 but I don't know how yet because I can't reach this code from
8850 existing front-ends. */
8851 return (qualifiers | 7); /* Who knows? */
8852
8853 case VECTOR_TYPE:
8854 case BOOLEAN_TYPE: /* Boolean truth value type. */
8855 case LANG_TYPE:
8856 case NULLPTR_TYPE:
8857 return qualifiers;
8858
8859 default:
8860 gcc_unreachable (); /* Not a type! */
8861 }
8862 }
8863
8864 return qualifiers;
8865 }
8866 \f
8867 /* Nested function support. */
8868
8869 /* Emit RTL insns to initialize the variable parts of a trampoline.
8870 FNADDR is an RTX for the address of the function's pure code.
8871 CXT is an RTX for the static chain value for the function.
8872
8873 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
8874 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
8875 (to store insns). This is a bit excessive. Perhaps a different
8876 mechanism would be better here.
8877
8878 Emit enough FLUSH insns to synchronize the data and instruction caches. */
8879
8880 static void
8881 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
8882 {
8883 /* SPARC 32-bit trampoline:
8884
8885 sethi %hi(fn), %g1
8886 sethi %hi(static), %g2
8887 jmp %g1+%lo(fn)
8888 or %g2, %lo(static), %g2
8889
8890 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
8891 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
8892 */
8893
8894 emit_move_insn
8895 (adjust_address (m_tramp, SImode, 0),
8896 expand_binop (SImode, ior_optab,
8897 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
8898 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
8899 NULL_RTX, 1, OPTAB_DIRECT));
8900
8901 emit_move_insn
8902 (adjust_address (m_tramp, SImode, 4),
8903 expand_binop (SImode, ior_optab,
8904 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
8905 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
8906 NULL_RTX, 1, OPTAB_DIRECT));
8907
8908 emit_move_insn
8909 (adjust_address (m_tramp, SImode, 8),
8910 expand_binop (SImode, ior_optab,
8911 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
8912 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
8913 NULL_RTX, 1, OPTAB_DIRECT));
8914
8915 emit_move_insn
8916 (adjust_address (m_tramp, SImode, 12),
8917 expand_binop (SImode, ior_optab,
8918 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
8919 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
8920 NULL_RTX, 1, OPTAB_DIRECT));
8921
8922 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
8923 aligned on a 16 byte boundary so one flush clears it all. */
8924 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
8925 if (sparc_cpu != PROCESSOR_ULTRASPARC
8926 && sparc_cpu != PROCESSOR_ULTRASPARC3
8927 && sparc_cpu != PROCESSOR_NIAGARA
8928 && sparc_cpu != PROCESSOR_NIAGARA2
8929 && sparc_cpu != PROCESSOR_NIAGARA3
8930 && sparc_cpu != PROCESSOR_NIAGARA4)
8931 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
8932
8933 /* Call __enable_execute_stack after writing onto the stack to make sure
8934 the stack address is accessible. */
8935 #ifdef HAVE_ENABLE_EXECUTE_STACK
8936 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
8937 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
8938 #endif
8939
8940 }
8941
8942 /* The 64-bit version is simpler because it makes more sense to load the
8943 values as "immediate" data out of the trampoline. It's also easier since
8944 we can read the PC without clobbering a register. */
8945
8946 static void
8947 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
8948 {
8949 /* SPARC 64-bit trampoline:
8950
8951 rd %pc, %g1
8952 ldx [%g1+24], %g5
8953 jmp %g5
8954 ldx [%g1+16], %g5
8955 +16 bytes data
8956 */
8957
8958 emit_move_insn (adjust_address (m_tramp, SImode, 0),
8959 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
8960 emit_move_insn (adjust_address (m_tramp, SImode, 4),
8961 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
8962 emit_move_insn (adjust_address (m_tramp, SImode, 8),
8963 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
8964 emit_move_insn (adjust_address (m_tramp, SImode, 12),
8965 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
8966 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
8967 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
8968 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
8969
8970 if (sparc_cpu != PROCESSOR_ULTRASPARC
8971 && sparc_cpu != PROCESSOR_ULTRASPARC3
8972 && sparc_cpu != PROCESSOR_NIAGARA
8973 && sparc_cpu != PROCESSOR_NIAGARA2
8974 && sparc_cpu != PROCESSOR_NIAGARA3
8975 && sparc_cpu != PROCESSOR_NIAGARA4)
8976 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
8977
8978 /* Call __enable_execute_stack after writing onto the stack to make sure
8979 the stack address is accessible. */
8980 #ifdef HAVE_ENABLE_EXECUTE_STACK
8981 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
8982 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
8983 #endif
8984 }
8985
8986 /* Worker for TARGET_TRAMPOLINE_INIT. */
8987
8988 static void
8989 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
8990 {
8991 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
8992 cxt = force_reg (Pmode, cxt);
8993 if (TARGET_ARCH64)
8994 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
8995 else
8996 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
8997 }
8998 \f
8999 /* Adjust the cost of a scheduling dependency. Return the new cost of
9000 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9001
9002 static int
9003 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9004 {
9005 enum attr_type insn_type;
9006
9007 if (! recog_memoized (insn))
9008 return 0;
9009
9010 insn_type = get_attr_type (insn);
9011
9012 if (REG_NOTE_KIND (link) == 0)
9013 {
9014 /* Data dependency; DEP_INSN writes a register that INSN reads some
9015 cycles later. */
9016
9017 /* if a load, then the dependence must be on the memory address;
9018 add an extra "cycle". Note that the cost could be two cycles
9019 if the reg was written late in an instruction group; we ca not tell
9020 here. */
9021 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9022 return cost + 3;
9023
9024 /* Get the delay only if the address of the store is the dependence. */
9025 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9026 {
9027 rtx pat = PATTERN(insn);
9028 rtx dep_pat = PATTERN (dep_insn);
9029
9030 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9031 return cost; /* This should not happen! */
9032
9033 /* The dependency between the two instructions was on the data that
9034 is being stored. Assume that this implies that the address of the
9035 store is not dependent. */
9036 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9037 return cost;
9038
9039 return cost + 3; /* An approximation. */
9040 }
9041
9042 /* A shift instruction cannot receive its data from an instruction
9043 in the same cycle; add a one cycle penalty. */
9044 if (insn_type == TYPE_SHIFT)
9045 return cost + 3; /* Split before cascade into shift. */
9046 }
9047 else
9048 {
9049 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9050 INSN writes some cycles later. */
9051
9052 /* These are only significant for the fpu unit; writing a fp reg before
9053 the fpu has finished with it stalls the processor. */
9054
9055 /* Reusing an integer register causes no problems. */
9056 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9057 return 0;
9058 }
9059
9060 return cost;
9061 }
9062
9063 static int
9064 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9065 {
9066 enum attr_type insn_type, dep_type;
9067 rtx pat = PATTERN(insn);
9068 rtx dep_pat = PATTERN (dep_insn);
9069
9070 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9071 return cost;
9072
9073 insn_type = get_attr_type (insn);
9074 dep_type = get_attr_type (dep_insn);
9075
9076 switch (REG_NOTE_KIND (link))
9077 {
9078 case 0:
9079 /* Data dependency; DEP_INSN writes a register that INSN reads some
9080 cycles later. */
9081
9082 switch (insn_type)
9083 {
9084 case TYPE_STORE:
9085 case TYPE_FPSTORE:
9086 /* Get the delay iff the address of the store is the dependence. */
9087 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9088 return cost;
9089
9090 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9091 return cost;
9092 return cost + 3;
9093
9094 case TYPE_LOAD:
9095 case TYPE_SLOAD:
9096 case TYPE_FPLOAD:
9097 /* If a load, then the dependence must be on the memory address. If
9098 the addresses aren't equal, then it might be a false dependency */
9099 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9100 {
9101 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9102 || GET_CODE (SET_DEST (dep_pat)) != MEM
9103 || GET_CODE (SET_SRC (pat)) != MEM
9104 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9105 XEXP (SET_SRC (pat), 0)))
9106 return cost + 2;
9107
9108 return cost + 8;
9109 }
9110 break;
9111
9112 case TYPE_BRANCH:
9113 /* Compare to branch latency is 0. There is no benefit from
9114 separating compare and branch. */
9115 if (dep_type == TYPE_COMPARE)
9116 return 0;
9117 /* Floating point compare to branch latency is less than
9118 compare to conditional move. */
9119 if (dep_type == TYPE_FPCMP)
9120 return cost - 1;
9121 break;
9122 default:
9123 break;
9124 }
9125 break;
9126
9127 case REG_DEP_ANTI:
9128 /* Anti-dependencies only penalize the fpu unit. */
9129 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9130 return 0;
9131 break;
9132
9133 default:
9134 break;
9135 }
9136
9137 return cost;
9138 }
9139
9140 static int
9141 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
9142 {
9143 switch (sparc_cpu)
9144 {
9145 case PROCESSOR_SUPERSPARC:
9146 cost = supersparc_adjust_cost (insn, link, dep, cost);
9147 break;
9148 case PROCESSOR_HYPERSPARC:
9149 case PROCESSOR_SPARCLITE86X:
9150 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9151 break;
9152 default:
9153 break;
9154 }
9155 return cost;
9156 }
9157
9158 static void
9159 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9160 int sched_verbose ATTRIBUTE_UNUSED,
9161 int max_ready ATTRIBUTE_UNUSED)
9162 {}
9163
9164 static int
9165 sparc_use_sched_lookahead (void)
9166 {
9167 if (sparc_cpu == PROCESSOR_NIAGARA
9168 || sparc_cpu == PROCESSOR_NIAGARA2
9169 || sparc_cpu == PROCESSOR_NIAGARA3)
9170 return 0;
9171 if (sparc_cpu == PROCESSOR_NIAGARA4)
9172 return 2;
9173 if (sparc_cpu == PROCESSOR_ULTRASPARC
9174 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9175 return 4;
9176 if ((1 << sparc_cpu) &
9177 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9178 (1 << PROCESSOR_SPARCLITE86X)))
9179 return 3;
9180 return 0;
9181 }
9182
9183 static int
9184 sparc_issue_rate (void)
9185 {
9186 switch (sparc_cpu)
9187 {
9188 case PROCESSOR_NIAGARA:
9189 case PROCESSOR_NIAGARA2:
9190 case PROCESSOR_NIAGARA3:
9191 default:
9192 return 1;
9193 case PROCESSOR_NIAGARA4:
9194 case PROCESSOR_V9:
9195 /* Assume V9 processors are capable of at least dual-issue. */
9196 return 2;
9197 case PROCESSOR_SUPERSPARC:
9198 return 3;
9199 case PROCESSOR_HYPERSPARC:
9200 case PROCESSOR_SPARCLITE86X:
9201 return 2;
9202 case PROCESSOR_ULTRASPARC:
9203 case PROCESSOR_ULTRASPARC3:
9204 return 4;
9205 }
9206 }
9207
9208 static int
9209 set_extends (rtx insn)
9210 {
9211 register rtx pat = PATTERN (insn);
9212
9213 switch (GET_CODE (SET_SRC (pat)))
9214 {
9215 /* Load and some shift instructions zero extend. */
9216 case MEM:
9217 case ZERO_EXTEND:
9218 /* sethi clears the high bits */
9219 case HIGH:
9220 /* LO_SUM is used with sethi. sethi cleared the high
9221 bits and the values used with lo_sum are positive */
9222 case LO_SUM:
9223 /* Store flag stores 0 or 1 */
9224 case LT: case LTU:
9225 case GT: case GTU:
9226 case LE: case LEU:
9227 case GE: case GEU:
9228 case EQ:
9229 case NE:
9230 return 1;
9231 case AND:
9232 {
9233 rtx op0 = XEXP (SET_SRC (pat), 0);
9234 rtx op1 = XEXP (SET_SRC (pat), 1);
9235 if (GET_CODE (op1) == CONST_INT)
9236 return INTVAL (op1) >= 0;
9237 if (GET_CODE (op0) != REG)
9238 return 0;
9239 if (sparc_check_64 (op0, insn) == 1)
9240 return 1;
9241 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9242 }
9243 case IOR:
9244 case XOR:
9245 {
9246 rtx op0 = XEXP (SET_SRC (pat), 0);
9247 rtx op1 = XEXP (SET_SRC (pat), 1);
9248 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9249 return 0;
9250 if (GET_CODE (op1) == CONST_INT)
9251 return INTVAL (op1) >= 0;
9252 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9253 }
9254 case LSHIFTRT:
9255 return GET_MODE (SET_SRC (pat)) == SImode;
9256 /* Positive integers leave the high bits zero. */
9257 case CONST_DOUBLE:
9258 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9259 case CONST_INT:
9260 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9261 case ASHIFTRT:
9262 case SIGN_EXTEND:
9263 return - (GET_MODE (SET_SRC (pat)) == SImode);
9264 case REG:
9265 return sparc_check_64 (SET_SRC (pat), insn);
9266 default:
9267 return 0;
9268 }
9269 }
9270
9271 /* We _ought_ to have only one kind per function, but... */
9272 static GTY(()) rtx sparc_addr_diff_list;
9273 static GTY(()) rtx sparc_addr_list;
9274
9275 void
9276 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9277 {
9278 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9279 if (diff)
9280 sparc_addr_diff_list
9281 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9282 else
9283 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9284 }
9285
9286 static void
9287 sparc_output_addr_vec (rtx vec)
9288 {
9289 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9290 int idx, vlen = XVECLEN (body, 0);
9291
9292 #ifdef ASM_OUTPUT_ADDR_VEC_START
9293 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9294 #endif
9295
9296 #ifdef ASM_OUTPUT_CASE_LABEL
9297 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9298 NEXT_INSN (lab));
9299 #else
9300 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9301 #endif
9302
9303 for (idx = 0; idx < vlen; idx++)
9304 {
9305 ASM_OUTPUT_ADDR_VEC_ELT
9306 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9307 }
9308
9309 #ifdef ASM_OUTPUT_ADDR_VEC_END
9310 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9311 #endif
9312 }
9313
9314 static void
9315 sparc_output_addr_diff_vec (rtx vec)
9316 {
9317 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9318 rtx base = XEXP (XEXP (body, 0), 0);
9319 int idx, vlen = XVECLEN (body, 1);
9320
9321 #ifdef ASM_OUTPUT_ADDR_VEC_START
9322 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9323 #endif
9324
9325 #ifdef ASM_OUTPUT_CASE_LABEL
9326 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9327 NEXT_INSN (lab));
9328 #else
9329 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9330 #endif
9331
9332 for (idx = 0; idx < vlen; idx++)
9333 {
9334 ASM_OUTPUT_ADDR_DIFF_ELT
9335 (asm_out_file,
9336 body,
9337 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9338 CODE_LABEL_NUMBER (base));
9339 }
9340
9341 #ifdef ASM_OUTPUT_ADDR_VEC_END
9342 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9343 #endif
9344 }
9345
9346 static void
9347 sparc_output_deferred_case_vectors (void)
9348 {
9349 rtx t;
9350 int align;
9351
9352 if (sparc_addr_list == NULL_RTX
9353 && sparc_addr_diff_list == NULL_RTX)
9354 return;
9355
9356 /* Align to cache line in the function's code section. */
9357 switch_to_section (current_function_section ());
9358
9359 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9360 if (align > 0)
9361 ASM_OUTPUT_ALIGN (asm_out_file, align);
9362
9363 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9364 sparc_output_addr_vec (XEXP (t, 0));
9365 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9366 sparc_output_addr_diff_vec (XEXP (t, 0));
9367
9368 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9369 }
9370
9371 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9372 unknown. Return 1 if the high bits are zero, -1 if the register is
9373 sign extended. */
9374 int
9375 sparc_check_64 (rtx x, rtx insn)
9376 {
9377 /* If a register is set only once it is safe to ignore insns this
9378 code does not know how to handle. The loop will either recognize
9379 the single set and return the correct value or fail to recognize
9380 it and return 0. */
9381 int set_once = 0;
9382 rtx y = x;
9383
9384 gcc_assert (GET_CODE (x) == REG);
9385
9386 if (GET_MODE (x) == DImode)
9387 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9388
9389 if (flag_expensive_optimizations
9390 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9391 set_once = 1;
9392
9393 if (insn == 0)
9394 {
9395 if (set_once)
9396 insn = get_last_insn_anywhere ();
9397 else
9398 return 0;
9399 }
9400
9401 while ((insn = PREV_INSN (insn)))
9402 {
9403 switch (GET_CODE (insn))
9404 {
9405 case JUMP_INSN:
9406 case NOTE:
9407 break;
9408 case CODE_LABEL:
9409 case CALL_INSN:
9410 default:
9411 if (! set_once)
9412 return 0;
9413 break;
9414 case INSN:
9415 {
9416 rtx pat = PATTERN (insn);
9417 if (GET_CODE (pat) != SET)
9418 return 0;
9419 if (rtx_equal_p (x, SET_DEST (pat)))
9420 return set_extends (insn);
9421 if (y && rtx_equal_p (y, SET_DEST (pat)))
9422 return set_extends (insn);
9423 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9424 return 0;
9425 }
9426 }
9427 }
9428 return 0;
9429 }
9430
9431 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9432 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9433
9434 const char *
9435 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
9436 {
9437 static char asm_code[60];
9438
9439 /* The scratch register is only required when the destination
9440 register is not a 64-bit global or out register. */
9441 if (which_alternative != 2)
9442 operands[3] = operands[0];
9443
9444 /* We can only shift by constants <= 63. */
9445 if (GET_CODE (operands[2]) == CONST_INT)
9446 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9447
9448 if (GET_CODE (operands[1]) == CONST_INT)
9449 {
9450 output_asm_insn ("mov\t%1, %3", operands);
9451 }
9452 else
9453 {
9454 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9455 if (sparc_check_64 (operands[1], insn) <= 0)
9456 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9457 output_asm_insn ("or\t%L1, %3, %3", operands);
9458 }
9459
9460 strcpy (asm_code, opcode);
9461
9462 if (which_alternative != 2)
9463 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9464 else
9465 return
9466 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9467 }
9468 \f
9469 /* Output rtl to increment the profiler label LABELNO
9470 for profiling a function entry. */
9471
9472 void
9473 sparc_profile_hook (int labelno)
9474 {
9475 char buf[32];
9476 rtx lab, fun;
9477
9478 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9479 if (NO_PROFILE_COUNTERS)
9480 {
9481 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9482 }
9483 else
9484 {
9485 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9486 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9487 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9488 }
9489 }
9490 \f
9491 #ifdef TARGET_SOLARIS
9492 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9493
9494 static void
9495 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9496 tree decl ATTRIBUTE_UNUSED)
9497 {
9498 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9499 {
9500 solaris_elf_asm_comdat_section (name, flags, decl);
9501 return;
9502 }
9503
9504 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9505
9506 if (!(flags & SECTION_DEBUG))
9507 fputs (",#alloc", asm_out_file);
9508 if (flags & SECTION_WRITE)
9509 fputs (",#write", asm_out_file);
9510 if (flags & SECTION_TLS)
9511 fputs (",#tls", asm_out_file);
9512 if (flags & SECTION_CODE)
9513 fputs (",#execinstr", asm_out_file);
9514
9515 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9516 if (HAVE_AS_SPARC_NOBITS)
9517 {
9518 if (flags & SECTION_BSS)
9519 fputs (",#nobits", asm_out_file);
9520 else
9521 fputs (",#progbits", asm_out_file);
9522 }
9523
9524 fputc ('\n', asm_out_file);
9525 }
9526 #endif /* TARGET_SOLARIS */
9527
9528 /* We do not allow indirect calls to be optimized into sibling calls.
9529
9530 We cannot use sibling calls when delayed branches are disabled
9531 because they will likely require the call delay slot to be filled.
9532
9533 Also, on SPARC 32-bit we cannot emit a sibling call when the
9534 current function returns a structure. This is because the "unimp
9535 after call" convention would cause the callee to return to the
9536 wrong place. The generic code already disallows cases where the
9537 function being called returns a structure.
9538
9539 It may seem strange how this last case could occur. Usually there
9540 is code after the call which jumps to epilogue code which dumps the
9541 return value into the struct return area. That ought to invalidate
9542 the sibling call right? Well, in the C++ case we can end up passing
9543 the pointer to the struct return area to a constructor (which returns
9544 void) and then nothing else happens. Such a sibling call would look
9545 valid without the added check here.
9546
9547 VxWorks PIC PLT entries require the global pointer to be initialized
9548 on entry. We therefore can't emit sibling calls to them. */
9549 static bool
9550 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9551 {
9552 return (decl
9553 && flag_delayed_branch
9554 && (TARGET_ARCH64 || ! cfun->returns_struct)
9555 && !(TARGET_VXWORKS_RTP
9556 && flag_pic
9557 && !targetm.binds_local_p (decl)));
9558 }
9559 \f
9560 /* libfunc renaming. */
9561
9562 static void
9563 sparc_init_libfuncs (void)
9564 {
9565 if (TARGET_ARCH32)
9566 {
9567 /* Use the subroutines that Sun's library provides for integer
9568 multiply and divide. The `*' prevents an underscore from
9569 being prepended by the compiler. .umul is a little faster
9570 than .mul. */
9571 set_optab_libfunc (smul_optab, SImode, "*.umul");
9572 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9573 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9574 set_optab_libfunc (smod_optab, SImode, "*.rem");
9575 set_optab_libfunc (umod_optab, SImode, "*.urem");
9576
9577 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9578 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9579 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9580 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9581 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9582 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9583
9584 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9585 is because with soft-float, the SFmode and DFmode sqrt
9586 instructions will be absent, and the compiler will notice and
9587 try to use the TFmode sqrt instruction for calls to the
9588 builtin function sqrt, but this fails. */
9589 if (TARGET_FPU)
9590 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9591
9592 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9593 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9594 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9595 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9596 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9597 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9598
9599 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9600 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9601 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9602 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9603
9604 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9605 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9606 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9607 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9608
9609 if (DITF_CONVERSION_LIBFUNCS)
9610 {
9611 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9612 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9613 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9614 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9615 }
9616
9617 if (SUN_CONVERSION_LIBFUNCS)
9618 {
9619 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9620 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9621 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9622 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9623 }
9624 }
9625 if (TARGET_ARCH64)
9626 {
9627 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9628 do not exist in the library. Make sure the compiler does not
9629 emit calls to them by accident. (It should always use the
9630 hardware instructions.) */
9631 set_optab_libfunc (smul_optab, SImode, 0);
9632 set_optab_libfunc (sdiv_optab, SImode, 0);
9633 set_optab_libfunc (udiv_optab, SImode, 0);
9634 set_optab_libfunc (smod_optab, SImode, 0);
9635 set_optab_libfunc (umod_optab, SImode, 0);
9636
9637 if (SUN_INTEGER_MULTIPLY_64)
9638 {
9639 set_optab_libfunc (smul_optab, DImode, "__mul64");
9640 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9641 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9642 set_optab_libfunc (smod_optab, DImode, "__rem64");
9643 set_optab_libfunc (umod_optab, DImode, "__urem64");
9644 }
9645
9646 if (SUN_CONVERSION_LIBFUNCS)
9647 {
9648 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
9649 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
9650 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
9651 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
9652 }
9653 }
9654 }
9655 \f
9656 static tree def_builtin(const char *name, int code, tree type)
9657 {
9658 return add_builtin_function(name, type, code, BUILT_IN_MD, NULL,
9659 NULL_TREE);
9660 }
9661
9662 static tree def_builtin_const(const char *name, int code, tree type)
9663 {
9664 tree t = def_builtin(name, code, type);
9665
9666 if (t)
9667 TREE_READONLY (t) = 1;
9668
9669 return t;
9670 }
9671
9672 /* Implement the TARGET_INIT_BUILTINS target hook.
9673 Create builtin functions for special SPARC instructions. */
9674
9675 static void
9676 sparc_init_builtins (void)
9677 {
9678 if (TARGET_VIS)
9679 sparc_vis_init_builtins ();
9680 }
9681
9682 /* Create builtin functions for VIS 1.0 instructions. */
9683
9684 static void
9685 sparc_vis_init_builtins (void)
9686 {
9687 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
9688 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
9689 tree v4hi = build_vector_type (intHI_type_node, 4);
9690 tree v2hi = build_vector_type (intHI_type_node, 2);
9691 tree v2si = build_vector_type (intSI_type_node, 2);
9692 tree v1si = build_vector_type (intSI_type_node, 1);
9693
9694 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
9695 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
9696 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
9697 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
9698 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
9699 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
9700 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
9701 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
9702 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
9703 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
9704 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
9705 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
9706 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
9707 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
9708 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
9709 v8qi, v8qi,
9710 intDI_type_node, 0);
9711 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
9712 v8qi, v8qi, 0);
9713 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
9714 v8qi, v8qi, 0);
9715 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
9716 intDI_type_node,
9717 intDI_type_node, 0);
9718 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
9719 intSI_type_node,
9720 intSI_type_node, 0);
9721 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
9722 ptr_type_node,
9723 intSI_type_node, 0);
9724 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
9725 ptr_type_node,
9726 intDI_type_node, 0);
9727 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
9728 ptr_type_node,
9729 ptr_type_node, 0);
9730 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
9731 ptr_type_node,
9732 ptr_type_node, 0);
9733 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
9734 v4hi, v4hi, 0);
9735 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
9736 v2si, v2si, 0);
9737 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
9738 v4hi, v4hi, 0);
9739 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
9740 v2si, v2si, 0);
9741 tree void_ftype_di = build_function_type_list (void_type_node,
9742 intDI_type_node, 0);
9743 tree di_ftype_void = build_function_type_list (intDI_type_node,
9744 void_type_node, 0);
9745 tree void_ftype_si = build_function_type_list (void_type_node,
9746 intSI_type_node, 0);
9747 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
9748 float_type_node,
9749 float_type_node, 0);
9750 tree df_ftype_df_df = build_function_type_list (double_type_node,
9751 double_type_node,
9752 double_type_node, 0);
9753
9754 /* Packing and expanding vectors. */
9755 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
9756 v4qi_ftype_v4hi);
9757 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
9758 v8qi_ftype_v2si_v8qi);
9759 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
9760 v2hi_ftype_v2si);
9761 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
9762 v4hi_ftype_v4qi);
9763 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
9764 v8qi_ftype_v4qi_v4qi);
9765
9766 /* Multiplications. */
9767 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
9768 v4hi_ftype_v4qi_v4hi);
9769 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
9770 v4hi_ftype_v4qi_v2hi);
9771 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
9772 v4hi_ftype_v4qi_v2hi);
9773 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
9774 v4hi_ftype_v8qi_v4hi);
9775 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
9776 v4hi_ftype_v8qi_v4hi);
9777 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
9778 v2si_ftype_v4qi_v2hi);
9779 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
9780 v2si_ftype_v4qi_v2hi);
9781
9782 /* Data aligning. */
9783 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
9784 v4hi_ftype_v4hi_v4hi);
9785 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
9786 v8qi_ftype_v8qi_v8qi);
9787 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
9788 v2si_ftype_v2si_v2si);
9789 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
9790 di_ftype_di_di);
9791
9792 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
9793 void_ftype_di);
9794 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
9795 di_ftype_void);
9796
9797 if (TARGET_ARCH64)
9798 {
9799 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
9800 ptr_ftype_ptr_di);
9801 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
9802 ptr_ftype_ptr_di);
9803 }
9804 else
9805 {
9806 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
9807 ptr_ftype_ptr_si);
9808 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
9809 ptr_ftype_ptr_si);
9810 }
9811
9812 /* Pixel distance. */
9813 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
9814 di_ftype_v8qi_v8qi_di);
9815
9816 /* Edge handling. */
9817 if (TARGET_ARCH64)
9818 {
9819 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
9820 di_ftype_ptr_ptr);
9821 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
9822 di_ftype_ptr_ptr);
9823 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
9824 di_ftype_ptr_ptr);
9825 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
9826 di_ftype_ptr_ptr);
9827 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
9828 di_ftype_ptr_ptr);
9829 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
9830 di_ftype_ptr_ptr);
9831 if (TARGET_VIS2)
9832 {
9833 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
9834 di_ftype_ptr_ptr);
9835 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
9836 di_ftype_ptr_ptr);
9837 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
9838 di_ftype_ptr_ptr);
9839 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
9840 di_ftype_ptr_ptr);
9841 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
9842 di_ftype_ptr_ptr);
9843 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
9844 di_ftype_ptr_ptr);
9845 }
9846 }
9847 else
9848 {
9849 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
9850 si_ftype_ptr_ptr);
9851 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
9852 si_ftype_ptr_ptr);
9853 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
9854 si_ftype_ptr_ptr);
9855 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
9856 si_ftype_ptr_ptr);
9857 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
9858 si_ftype_ptr_ptr);
9859 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
9860 si_ftype_ptr_ptr);
9861 if (TARGET_VIS2)
9862 {
9863 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
9864 si_ftype_ptr_ptr);
9865 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
9866 si_ftype_ptr_ptr);
9867 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
9868 si_ftype_ptr_ptr);
9869 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
9870 si_ftype_ptr_ptr);
9871 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
9872 si_ftype_ptr_ptr);
9873 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
9874 si_ftype_ptr_ptr);
9875 }
9876 }
9877
9878 /* Pixel compare. */
9879 if (TARGET_ARCH64)
9880 {
9881 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
9882 di_ftype_v4hi_v4hi);
9883 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
9884 di_ftype_v2si_v2si);
9885 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
9886 di_ftype_v4hi_v4hi);
9887 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
9888 di_ftype_v2si_v2si);
9889 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
9890 di_ftype_v4hi_v4hi);
9891 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
9892 di_ftype_v2si_v2si);
9893 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
9894 di_ftype_v4hi_v4hi);
9895 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
9896 di_ftype_v2si_v2si);
9897 }
9898 else
9899 {
9900 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
9901 si_ftype_v4hi_v4hi);
9902 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
9903 si_ftype_v2si_v2si);
9904 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
9905 si_ftype_v4hi_v4hi);
9906 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
9907 si_ftype_v2si_v2si);
9908 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
9909 si_ftype_v4hi_v4hi);
9910 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
9911 si_ftype_v2si_v2si);
9912 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
9913 si_ftype_v4hi_v4hi);
9914 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
9915 si_ftype_v2si_v2si);
9916 }
9917
9918 /* Addition and subtraction. */
9919 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
9920 v4hi_ftype_v4hi_v4hi);
9921 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
9922 v2hi_ftype_v2hi_v2hi);
9923 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
9924 v2si_ftype_v2si_v2si);
9925 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
9926 v1si_ftype_v1si_v1si);
9927 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
9928 v4hi_ftype_v4hi_v4hi);
9929 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
9930 v2hi_ftype_v2hi_v2hi);
9931 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
9932 v2si_ftype_v2si_v2si);
9933 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
9934 v1si_ftype_v1si_v1si);
9935
9936 /* Three-dimensional array addressing. */
9937 if (TARGET_ARCH64)
9938 {
9939 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
9940 di_ftype_di_di);
9941 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
9942 di_ftype_di_di);
9943 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
9944 di_ftype_di_di);
9945 }
9946 else
9947 {
9948 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
9949 si_ftype_si_si);
9950 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
9951 si_ftype_si_si);
9952 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
9953 si_ftype_si_si);
9954 }
9955
9956 if (TARGET_VIS2)
9957 {
9958 /* Byte mask and shuffle */
9959 if (TARGET_ARCH64)
9960 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
9961 di_ftype_di_di);
9962 else
9963 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
9964 si_ftype_si_si);
9965 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
9966 v4hi_ftype_v4hi_v4hi);
9967 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
9968 v8qi_ftype_v8qi_v8qi);
9969 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
9970 v2si_ftype_v2si_v2si);
9971 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
9972 di_ftype_di_di);
9973 }
9974
9975 if (TARGET_VIS3)
9976 {
9977 if (TARGET_ARCH64)
9978 {
9979 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
9980 void_ftype_di);
9981 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
9982 void_ftype_di);
9983 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
9984 void_ftype_di);
9985 }
9986 else
9987 {
9988 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
9989 void_ftype_si);
9990 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
9991 void_ftype_si);
9992 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
9993 void_ftype_si);
9994 }
9995
9996 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
9997 v4hi_ftype_v4hi_v4hi);
9998
9999 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10000 v4hi_ftype_v4hi_v4hi);
10001 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10002 v4hi_ftype_v4hi_v4hi);
10003 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10004 v4hi_ftype_v4hi_v4hi);
10005 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10006 v4hi_ftype_v4hi_v4hi);
10007 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10008 v2si_ftype_v2si_v2si);
10009 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10010 v2si_ftype_v2si_v2si);
10011 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10012 v2si_ftype_v2si_v2si);
10013 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10014 v2si_ftype_v2si_v2si);
10015
10016 if (TARGET_ARCH64)
10017 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10018 di_ftype_v8qi_v8qi);
10019 else
10020 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10021 si_ftype_v8qi_v8qi);
10022
10023 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10024 v4hi_ftype_v4hi_v4hi);
10025 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10026 di_ftype_di_di);
10027 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10028 di_ftype_di_di);
10029
10030 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10031 v4hi_ftype_v4hi_v4hi);
10032 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10033 v2hi_ftype_v2hi_v2hi);
10034 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10035 v4hi_ftype_v4hi_v4hi);
10036 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10037 v2hi_ftype_v2hi_v2hi);
10038 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10039 v2si_ftype_v2si_v2si);
10040 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10041 v1si_ftype_v1si_v1si);
10042 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10043 v2si_ftype_v2si_v2si);
10044 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10045 v1si_ftype_v1si_v1si);
10046
10047 if (TARGET_ARCH64)
10048 {
10049 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10050 di_ftype_v8qi_v8qi);
10051 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10052 di_ftype_v8qi_v8qi);
10053 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10054 di_ftype_v8qi_v8qi);
10055 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10056 di_ftype_v8qi_v8qi);
10057 }
10058 else
10059 {
10060 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10061 si_ftype_v8qi_v8qi);
10062 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10063 si_ftype_v8qi_v8qi);
10064 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10065 si_ftype_v8qi_v8qi);
10066 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10067 si_ftype_v8qi_v8qi);
10068 }
10069
10070 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10071 sf_ftype_sf_sf);
10072 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10073 df_ftype_df_df);
10074 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10075 sf_ftype_sf_sf);
10076 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10077 df_ftype_df_df);
10078 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10079 sf_ftype_sf_sf);
10080 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10081 df_ftype_df_df);
10082
10083 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10084 di_ftype_di_di);
10085 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10086 di_ftype_di_di);
10087 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10088 di_ftype_di_di);
10089 }
10090 }
10091
10092 /* Handle TARGET_EXPAND_BUILTIN target hook.
10093 Expand builtin functions for sparc intrinsics. */
10094
10095 static rtx
10096 sparc_expand_builtin (tree exp, rtx target,
10097 rtx subtarget ATTRIBUTE_UNUSED,
10098 enum machine_mode tmode ATTRIBUTE_UNUSED,
10099 int ignore ATTRIBUTE_UNUSED)
10100 {
10101 tree arg;
10102 call_expr_arg_iterator iter;
10103 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10104 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
10105 rtx pat, op[4];
10106 int arg_count = 0;
10107 bool nonvoid;
10108
10109 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10110
10111 if (nonvoid)
10112 {
10113 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10114 if (!target
10115 || GET_MODE (target) != tmode
10116 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10117 op[0] = gen_reg_rtx (tmode);
10118 else
10119 op[0] = target;
10120 }
10121 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10122 {
10123 const struct insn_operand_data *insn_op;
10124 int idx;
10125
10126 if (arg == error_mark_node)
10127 return NULL_RTX;
10128
10129 arg_count++;
10130 idx = arg_count - !nonvoid;
10131 insn_op = &insn_data[icode].operand[idx];
10132 op[arg_count] = expand_normal (arg);
10133
10134 if (insn_op->mode == V1DImode
10135 && GET_MODE (op[arg_count]) == DImode)
10136 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10137 else if (insn_op->mode == V1SImode
10138 && GET_MODE (op[arg_count]) == SImode)
10139 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10140
10141 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10142 insn_op->mode))
10143 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10144 }
10145
10146 switch (arg_count)
10147 {
10148 case 0:
10149 pat = GEN_FCN (icode) (op[0]);
10150 break;
10151 case 1:
10152 if (nonvoid)
10153 pat = GEN_FCN (icode) (op[0], op[1]);
10154 else
10155 pat = GEN_FCN (icode) (op[1]);
10156 break;
10157 case 2:
10158 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10159 break;
10160 case 3:
10161 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10162 break;
10163 default:
10164 gcc_unreachable ();
10165 }
10166
10167 if (!pat)
10168 return NULL_RTX;
10169
10170 emit_insn (pat);
10171
10172 if (nonvoid)
10173 return op[0];
10174 else
10175 return const0_rtx;
10176 }
10177
10178 static int
10179 sparc_vis_mul8x16 (int e8, int e16)
10180 {
10181 return (e8 * e16 + 128) / 256;
10182 }
10183
10184 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10185 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10186
10187 static void
10188 sparc_handle_vis_mul8x16 (tree *n_elts, int fncode, tree inner_type,
10189 tree cst0, tree cst1)
10190 {
10191 unsigned i, num = VECTOR_CST_NELTS (cst0);
10192 int scale;
10193
10194 switch (fncode)
10195 {
10196 case CODE_FOR_fmul8x16_vis:
10197 for (i = 0; i < num; ++i)
10198 {
10199 int val
10200 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10201 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10202 n_elts[i] = build_int_cst (inner_type, val);
10203 }
10204 break;
10205
10206 case CODE_FOR_fmul8x16au_vis:
10207 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10208
10209 for (i = 0; i < num; ++i)
10210 {
10211 int val
10212 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10213 scale);
10214 n_elts[i] = build_int_cst (inner_type, val);
10215 }
10216 break;
10217
10218 case CODE_FOR_fmul8x16al_vis:
10219 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10220
10221 for (i = 0; i < num; ++i)
10222 {
10223 int val
10224 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10225 scale);
10226 n_elts[i] = build_int_cst (inner_type, val);
10227 }
10228 break;
10229
10230 default:
10231 gcc_unreachable ();
10232 }
10233 }
10234
10235 /* Handle TARGET_FOLD_BUILTIN target hook.
10236 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10237 result of the function call is ignored. NULL_TREE is returned if the
10238 function could not be folded. */
10239
10240 static tree
10241 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10242 tree *args, bool ignore)
10243 {
10244 tree arg0, arg1, arg2;
10245 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10246 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
10247
10248 if (ignore)
10249 {
10250 /* Note that a switch statement instead of the sequence of tests would
10251 be incorrect as many of the CODE_FOR values could be CODE_FOR_nothing
10252 and that would yield multiple alternatives with identical values. */
10253 if (icode == CODE_FOR_alignaddrsi_vis
10254 || icode == CODE_FOR_alignaddrdi_vis
10255 || icode == CODE_FOR_wrgsr_vis
10256 || icode == CODE_FOR_bmasksi_vis
10257 || icode == CODE_FOR_bmaskdi_vis
10258 || icode == CODE_FOR_cmask8si_vis
10259 || icode == CODE_FOR_cmask8di_vis
10260 || icode == CODE_FOR_cmask16si_vis
10261 || icode == CODE_FOR_cmask16di_vis
10262 || icode == CODE_FOR_cmask32si_vis
10263 || icode == CODE_FOR_cmask32di_vis)
10264 ;
10265 else
10266 return build_zero_cst (rtype);
10267 }
10268
10269 switch (icode)
10270 {
10271 case CODE_FOR_fexpand_vis:
10272 arg0 = args[0];
10273 STRIP_NOPS (arg0);
10274
10275 if (TREE_CODE (arg0) == VECTOR_CST)
10276 {
10277 tree inner_type = TREE_TYPE (rtype);
10278 tree *n_elts;
10279 unsigned i;
10280
10281 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10282 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10283 n_elts[i] = build_int_cst (inner_type,
10284 TREE_INT_CST_LOW
10285 (VECTOR_CST_ELT (arg0, i)) << 4);
10286 return build_vector (rtype, n_elts);
10287 }
10288 break;
10289
10290 case CODE_FOR_fmul8x16_vis:
10291 case CODE_FOR_fmul8x16au_vis:
10292 case CODE_FOR_fmul8x16al_vis:
10293 arg0 = args[0];
10294 arg1 = args[1];
10295 STRIP_NOPS (arg0);
10296 STRIP_NOPS (arg1);
10297
10298 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10299 {
10300 tree inner_type = TREE_TYPE (rtype);
10301 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10302 sparc_handle_vis_mul8x16 (n_elts, icode, inner_type, arg0, arg1);
10303 return build_vector (rtype, n_elts);
10304 }
10305 break;
10306
10307 case CODE_FOR_fpmerge_vis:
10308 arg0 = args[0];
10309 arg1 = args[1];
10310 STRIP_NOPS (arg0);
10311 STRIP_NOPS (arg1);
10312
10313 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10314 {
10315 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10316 unsigned i;
10317 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10318 {
10319 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10320 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10321 }
10322
10323 return build_vector (rtype, n_elts);
10324 }
10325 break;
10326
10327 case CODE_FOR_pdist_vis:
10328 arg0 = args[0];
10329 arg1 = args[1];
10330 arg2 = args[2];
10331 STRIP_NOPS (arg0);
10332 STRIP_NOPS (arg1);
10333 STRIP_NOPS (arg2);
10334
10335 if (TREE_CODE (arg0) == VECTOR_CST
10336 && TREE_CODE (arg1) == VECTOR_CST
10337 && TREE_CODE (arg2) == INTEGER_CST)
10338 {
10339 bool overflow = false;
10340 double_int result = TREE_INT_CST (arg2);
10341 double_int tmp;
10342 unsigned i;
10343
10344 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10345 {
10346 double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
10347 double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
10348
10349 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10350
10351 tmp = e1.neg_with_overflow (&neg1_ovf);
10352 tmp = e0.add_with_sign (tmp, false, &add1_ovf);
10353 if (tmp.is_negative ())
10354 tmp = tmp.neg_with_overflow (&neg2_ovf);
10355
10356 result = result.add_with_sign (tmp, false, &add2_ovf);
10357 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10358 }
10359
10360 gcc_assert (!overflow);
10361
10362 return build_int_cst_wide (rtype, result.low, result.high);
10363 }
10364
10365 default:
10366 break;
10367 }
10368
10369 return NULL_TREE;
10370 }
10371 \f
10372 /* ??? This duplicates information provided to the compiler by the
10373 ??? scheduler description. Some day, teach genautomata to output
10374 ??? the latencies and then CSE will just use that. */
10375
10376 static bool
10377 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10378 int *total, bool speed ATTRIBUTE_UNUSED)
10379 {
10380 enum machine_mode mode = GET_MODE (x);
10381 bool float_mode_p = FLOAT_MODE_P (mode);
10382
10383 switch (code)
10384 {
10385 case CONST_INT:
10386 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10387 {
10388 *total = 0;
10389 return true;
10390 }
10391 /* FALLTHRU */
10392
10393 case HIGH:
10394 *total = 2;
10395 return true;
10396
10397 case CONST:
10398 case LABEL_REF:
10399 case SYMBOL_REF:
10400 *total = 4;
10401 return true;
10402
10403 case CONST_DOUBLE:
10404 if (GET_MODE (x) == VOIDmode
10405 && ((CONST_DOUBLE_HIGH (x) == 0
10406 && CONST_DOUBLE_LOW (x) < 0x1000)
10407 || (CONST_DOUBLE_HIGH (x) == -1
10408 && CONST_DOUBLE_LOW (x) < 0
10409 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10410 *total = 0;
10411 else
10412 *total = 8;
10413 return true;
10414
10415 case MEM:
10416 /* If outer-code was a sign or zero extension, a cost
10417 of COSTS_N_INSNS (1) was already added in. This is
10418 why we are subtracting it back out. */
10419 if (outer_code == ZERO_EXTEND)
10420 {
10421 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10422 }
10423 else if (outer_code == SIGN_EXTEND)
10424 {
10425 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
10426 }
10427 else if (float_mode_p)
10428 {
10429 *total = sparc_costs->float_load;
10430 }
10431 else
10432 {
10433 *total = sparc_costs->int_load;
10434 }
10435
10436 return true;
10437
10438 case PLUS:
10439 case MINUS:
10440 if (float_mode_p)
10441 *total = sparc_costs->float_plusminus;
10442 else
10443 *total = COSTS_N_INSNS (1);
10444 return false;
10445
10446 case FMA:
10447 {
10448 rtx sub;
10449
10450 gcc_assert (float_mode_p);
10451 *total = sparc_costs->float_mul;
10452
10453 sub = XEXP (x, 0);
10454 if (GET_CODE (sub) == NEG)
10455 sub = XEXP (sub, 0);
10456 *total += rtx_cost (sub, FMA, 0, speed);
10457
10458 sub = XEXP (x, 2);
10459 if (GET_CODE (sub) == NEG)
10460 sub = XEXP (sub, 0);
10461 *total += rtx_cost (sub, FMA, 2, speed);
10462 return true;
10463 }
10464
10465 case MULT:
10466 if (float_mode_p)
10467 *total = sparc_costs->float_mul;
10468 else if (! TARGET_HARD_MUL)
10469 *total = COSTS_N_INSNS (25);
10470 else
10471 {
10472 int bit_cost;
10473
10474 bit_cost = 0;
10475 if (sparc_costs->int_mul_bit_factor)
10476 {
10477 int nbits;
10478
10479 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
10480 {
10481 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
10482 for (nbits = 0; value != 0; value &= value - 1)
10483 nbits++;
10484 }
10485 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
10486 && GET_MODE (XEXP (x, 1)) == VOIDmode)
10487 {
10488 rtx x1 = XEXP (x, 1);
10489 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
10490 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
10491
10492 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
10493 nbits++;
10494 for (; value2 != 0; value2 &= value2 - 1)
10495 nbits++;
10496 }
10497 else
10498 nbits = 7;
10499
10500 if (nbits < 3)
10501 nbits = 3;
10502 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
10503 bit_cost = COSTS_N_INSNS (bit_cost);
10504 }
10505
10506 if (mode == DImode)
10507 *total = sparc_costs->int_mulX + bit_cost;
10508 else
10509 *total = sparc_costs->int_mul + bit_cost;
10510 }
10511 return false;
10512
10513 case ASHIFT:
10514 case ASHIFTRT:
10515 case LSHIFTRT:
10516 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
10517 return false;
10518
10519 case DIV:
10520 case UDIV:
10521 case MOD:
10522 case UMOD:
10523 if (float_mode_p)
10524 {
10525 if (mode == DFmode)
10526 *total = sparc_costs->float_div_df;
10527 else
10528 *total = sparc_costs->float_div_sf;
10529 }
10530 else
10531 {
10532 if (mode == DImode)
10533 *total = sparc_costs->int_divX;
10534 else
10535 *total = sparc_costs->int_div;
10536 }
10537 return false;
10538
10539 case NEG:
10540 if (! float_mode_p)
10541 {
10542 *total = COSTS_N_INSNS (1);
10543 return false;
10544 }
10545 /* FALLTHRU */
10546
10547 case ABS:
10548 case FLOAT:
10549 case UNSIGNED_FLOAT:
10550 case FIX:
10551 case UNSIGNED_FIX:
10552 case FLOAT_EXTEND:
10553 case FLOAT_TRUNCATE:
10554 *total = sparc_costs->float_move;
10555 return false;
10556
10557 case SQRT:
10558 if (mode == DFmode)
10559 *total = sparc_costs->float_sqrt_df;
10560 else
10561 *total = sparc_costs->float_sqrt_sf;
10562 return false;
10563
10564 case COMPARE:
10565 if (float_mode_p)
10566 *total = sparc_costs->float_cmp;
10567 else
10568 *total = COSTS_N_INSNS (1);
10569 return false;
10570
10571 case IF_THEN_ELSE:
10572 if (float_mode_p)
10573 *total = sparc_costs->float_cmove;
10574 else
10575 *total = sparc_costs->int_cmove;
10576 return false;
10577
10578 case IOR:
10579 /* Handle the NAND vector patterns. */
10580 if (sparc_vector_mode_supported_p (GET_MODE (x))
10581 && GET_CODE (XEXP (x, 0)) == NOT
10582 && GET_CODE (XEXP (x, 1)) == NOT)
10583 {
10584 *total = COSTS_N_INSNS (1);
10585 return true;
10586 }
10587 else
10588 return false;
10589
10590 default:
10591 return false;
10592 }
10593 }
10594
10595 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
10596
10597 static inline bool
10598 general_or_i64_p (reg_class_t rclass)
10599 {
10600 return (rclass == GENERAL_REGS || rclass == I64_REGS);
10601 }
10602
10603 /* Implement TARGET_REGISTER_MOVE_COST. */
10604
10605 static int
10606 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
10607 reg_class_t from, reg_class_t to)
10608 {
10609 bool need_memory = false;
10610
10611 if (from == FPCC_REGS || to == FPCC_REGS)
10612 need_memory = true;
10613 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
10614 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
10615 {
10616 if (TARGET_VIS3)
10617 {
10618 int size = GET_MODE_SIZE (mode);
10619 if (size == 8 || size == 4)
10620 {
10621 if (! TARGET_ARCH32 || size == 4)
10622 return 4;
10623 else
10624 return 6;
10625 }
10626 }
10627 need_memory = true;
10628 }
10629
10630 if (need_memory)
10631 {
10632 if (sparc_cpu == PROCESSOR_ULTRASPARC
10633 || sparc_cpu == PROCESSOR_ULTRASPARC3
10634 || sparc_cpu == PROCESSOR_NIAGARA
10635 || sparc_cpu == PROCESSOR_NIAGARA2
10636 || sparc_cpu == PROCESSOR_NIAGARA3
10637 || sparc_cpu == PROCESSOR_NIAGARA4)
10638 return 12;
10639
10640 return 6;
10641 }
10642
10643 return 2;
10644 }
10645
10646 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
10647 This is achieved by means of a manual dynamic stack space allocation in
10648 the current frame. We make the assumption that SEQ doesn't contain any
10649 function calls, with the possible exception of calls to the GOT helper. */
10650
10651 static void
10652 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
10653 {
10654 /* We must preserve the lowest 16 words for the register save area. */
10655 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
10656 /* We really need only 2 words of fresh stack space. */
10657 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
10658
10659 rtx slot
10660 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
10661 SPARC_STACK_BIAS + offset));
10662
10663 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
10664 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
10665 if (reg2)
10666 emit_insn (gen_rtx_SET (VOIDmode,
10667 adjust_address (slot, word_mode, UNITS_PER_WORD),
10668 reg2));
10669 emit_insn (seq);
10670 if (reg2)
10671 emit_insn (gen_rtx_SET (VOIDmode,
10672 reg2,
10673 adjust_address (slot, word_mode, UNITS_PER_WORD)));
10674 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
10675 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
10676 }
10677
10678 /* Output the assembler code for a thunk function. THUNK_DECL is the
10679 declaration for the thunk function itself, FUNCTION is the decl for
10680 the target function. DELTA is an immediate constant offset to be
10681 added to THIS. If VCALL_OFFSET is nonzero, the word at address
10682 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
10683
10684 static void
10685 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10686 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10687 tree function)
10688 {
10689 rtx this_rtx, insn, funexp;
10690 unsigned int int_arg_first;
10691
10692 reload_completed = 1;
10693 epilogue_completed = 1;
10694
10695 emit_note (NOTE_INSN_PROLOGUE_END);
10696
10697 if (TARGET_FLAT)
10698 {
10699 sparc_leaf_function_p = 1;
10700
10701 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10702 }
10703 else if (flag_delayed_branch)
10704 {
10705 /* We will emit a regular sibcall below, so we need to instruct
10706 output_sibcall that we are in a leaf function. */
10707 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
10708
10709 /* This will cause final.c to invoke leaf_renumber_regs so we
10710 must behave as if we were in a not-yet-leafified function. */
10711 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
10712 }
10713 else
10714 {
10715 /* We will emit the sibcall manually below, so we will need to
10716 manually spill non-leaf registers. */
10717 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
10718
10719 /* We really are in a leaf function. */
10720 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10721 }
10722
10723 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
10724 returns a structure, the structure return pointer is there instead. */
10725 if (TARGET_ARCH64
10726 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10727 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
10728 else
10729 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
10730
10731 /* Add DELTA. When possible use a plain add, otherwise load it into
10732 a register first. */
10733 if (delta)
10734 {
10735 rtx delta_rtx = GEN_INT (delta);
10736
10737 if (! SPARC_SIMM13_P (delta))
10738 {
10739 rtx scratch = gen_rtx_REG (Pmode, 1);
10740 emit_move_insn (scratch, delta_rtx);
10741 delta_rtx = scratch;
10742 }
10743
10744 /* THIS_RTX += DELTA. */
10745 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
10746 }
10747
10748 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
10749 if (vcall_offset)
10750 {
10751 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10752 rtx scratch = gen_rtx_REG (Pmode, 1);
10753
10754 gcc_assert (vcall_offset < 0);
10755
10756 /* SCRATCH = *THIS_RTX. */
10757 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
10758
10759 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
10760 may not have any available scratch register at this point. */
10761 if (SPARC_SIMM13_P (vcall_offset))
10762 ;
10763 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
10764 else if (! fixed_regs[5]
10765 /* The below sequence is made up of at least 2 insns,
10766 while the default method may need only one. */
10767 && vcall_offset < -8192)
10768 {
10769 rtx scratch2 = gen_rtx_REG (Pmode, 5);
10770 emit_move_insn (scratch2, vcall_offset_rtx);
10771 vcall_offset_rtx = scratch2;
10772 }
10773 else
10774 {
10775 rtx increment = GEN_INT (-4096);
10776
10777 /* VCALL_OFFSET is a negative number whose typical range can be
10778 estimated as -32768..0 in 32-bit mode. In almost all cases
10779 it is therefore cheaper to emit multiple add insns than
10780 spilling and loading the constant into a register (at least
10781 6 insns). */
10782 while (! SPARC_SIMM13_P (vcall_offset))
10783 {
10784 emit_insn (gen_add2_insn (scratch, increment));
10785 vcall_offset += 4096;
10786 }
10787 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
10788 }
10789
10790 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
10791 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
10792 gen_rtx_PLUS (Pmode,
10793 scratch,
10794 vcall_offset_rtx)));
10795
10796 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
10797 emit_insn (gen_add2_insn (this_rtx, scratch));
10798 }
10799
10800 /* Generate a tail call to the target function. */
10801 if (! TREE_USED (function))
10802 {
10803 assemble_external (function);
10804 TREE_USED (function) = 1;
10805 }
10806 funexp = XEXP (DECL_RTL (function), 0);
10807
10808 if (flag_delayed_branch)
10809 {
10810 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10811 insn = emit_call_insn (gen_sibcall (funexp));
10812 SIBLING_CALL_P (insn) = 1;
10813 }
10814 else
10815 {
10816 /* The hoops we have to jump through in order to generate a sibcall
10817 without using delay slots... */
10818 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
10819
10820 if (flag_pic)
10821 {
10822 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
10823 start_sequence ();
10824 load_got_register (); /* clobbers %o7 */
10825 scratch = sparc_legitimize_pic_address (funexp, scratch);
10826 seq = get_insns ();
10827 end_sequence ();
10828 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
10829 }
10830 else if (TARGET_ARCH32)
10831 {
10832 emit_insn (gen_rtx_SET (VOIDmode,
10833 scratch,
10834 gen_rtx_HIGH (SImode, funexp)));
10835 emit_insn (gen_rtx_SET (VOIDmode,
10836 scratch,
10837 gen_rtx_LO_SUM (SImode, scratch, funexp)));
10838 }
10839 else /* TARGET_ARCH64 */
10840 {
10841 switch (sparc_cmodel)
10842 {
10843 case CM_MEDLOW:
10844 case CM_MEDMID:
10845 /* The destination can serve as a temporary. */
10846 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
10847 break;
10848
10849 case CM_MEDANY:
10850 case CM_EMBMEDANY:
10851 /* The destination cannot serve as a temporary. */
10852 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
10853 start_sequence ();
10854 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
10855 seq = get_insns ();
10856 end_sequence ();
10857 emit_and_preserve (seq, spill_reg, 0);
10858 break;
10859
10860 default:
10861 gcc_unreachable ();
10862 }
10863 }
10864
10865 emit_jump_insn (gen_indirect_jump (scratch));
10866 }
10867
10868 emit_barrier ();
10869
10870 /* Run just enough of rest_of_compilation to get the insns emitted.
10871 There's not really enough bulk here to make other passes such as
10872 instruction scheduling worth while. Note that use_thunk calls
10873 assemble_start_function and assemble_end_function. */
10874 insn = get_insns ();
10875 shorten_branches (insn);
10876 final_start_function (insn, file, 1);
10877 final (insn, file, 1);
10878 final_end_function ();
10879
10880 reload_completed = 0;
10881 epilogue_completed = 0;
10882 }
10883
10884 /* Return true if sparc_output_mi_thunk would be able to output the
10885 assembler code for the thunk function specified by the arguments
10886 it is passed, and false otherwise. */
10887 static bool
10888 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
10889 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
10890 HOST_WIDE_INT vcall_offset,
10891 const_tree function ATTRIBUTE_UNUSED)
10892 {
10893 /* Bound the loop used in the default method above. */
10894 return (vcall_offset >= -32768 || ! fixed_regs[5]);
10895 }
10896
10897 /* We use the machine specific reorg pass to enable workarounds for errata. */
10898
10899 static void
10900 sparc_reorg (void)
10901 {
10902 rtx insn, next;
10903
10904 /* The only erratum we handle for now is that of the AT697F processor. */
10905 if (!sparc_fix_at697f)
10906 return;
10907
10908 /* We need to have the (essentially) final form of the insn stream in order
10909 to properly detect the various hazards. Run delay slot scheduling. */
10910 if (optimize > 0 && flag_delayed_branch)
10911 {
10912 cleanup_barriers ();
10913 dbr_schedule (get_insns ());
10914 }
10915
10916 /* Now look for specific patterns in the insn stream. */
10917 for (insn = get_insns (); insn; insn = next)
10918 {
10919 bool insert_nop = false;
10920 rtx set;
10921
10922 /* Look for a single-word load into an odd-numbered FP register. */
10923 if (NONJUMP_INSN_P (insn)
10924 && (set = single_set (insn)) != NULL_RTX
10925 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
10926 && MEM_P (SET_SRC (set))
10927 && REG_P (SET_DEST (set))
10928 && REGNO (SET_DEST (set)) > 31
10929 && REGNO (SET_DEST (set)) % 2 != 0)
10930 {
10931 /* The wrong dependency is on the enclosing double register. */
10932 unsigned int x = REGNO (SET_DEST (set)) - 1;
10933 unsigned int src1, src2, dest;
10934 int code;
10935
10936 /* If the insn has a delay slot, then it cannot be problematic. */
10937 next = next_active_insn (insn);
10938 if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
10939 code = -1;
10940 else
10941 {
10942 extract_insn (next);
10943 code = INSN_CODE (next);
10944 }
10945
10946 switch (code)
10947 {
10948 case CODE_FOR_adddf3:
10949 case CODE_FOR_subdf3:
10950 case CODE_FOR_muldf3:
10951 case CODE_FOR_divdf3:
10952 dest = REGNO (recog_data.operand[0]);
10953 src1 = REGNO (recog_data.operand[1]);
10954 src2 = REGNO (recog_data.operand[2]);
10955 if (src1 != src2)
10956 {
10957 /* Case [1-4]:
10958 ld [address], %fx+1
10959 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
10960 if ((src1 == x || src2 == x)
10961 && (dest == src1 || dest == src2))
10962 insert_nop = true;
10963 }
10964 else
10965 {
10966 /* Case 5:
10967 ld [address], %fx+1
10968 FPOPd %fx, %fx, %fx */
10969 if (src1 == x
10970 && dest == src1
10971 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
10972 insert_nop = true;
10973 }
10974 break;
10975
10976 case CODE_FOR_sqrtdf2:
10977 dest = REGNO (recog_data.operand[0]);
10978 src1 = REGNO (recog_data.operand[1]);
10979 /* Case 6:
10980 ld [address], %fx+1
10981 fsqrtd %fx, %fx */
10982 if (src1 == x && dest == src1)
10983 insert_nop = true;
10984 break;
10985
10986 default:
10987 break;
10988 }
10989 }
10990 else
10991 next = NEXT_INSN (insn);
10992
10993 if (insert_nop)
10994 emit_insn_after (gen_nop (), insn);
10995 }
10996 }
10997
10998 /* How to allocate a 'struct machine_function'. */
10999
11000 static struct machine_function *
11001 sparc_init_machine_status (void)
11002 {
11003 return ggc_alloc_cleared_machine_function ();
11004 }
11005
11006 /* Locate some local-dynamic symbol still in use by this function
11007 so that we can print its name in local-dynamic base patterns. */
11008
11009 static const char *
11010 get_some_local_dynamic_name (void)
11011 {
11012 rtx insn;
11013
11014 if (cfun->machine->some_ld_name)
11015 return cfun->machine->some_ld_name;
11016
11017 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11018 if (INSN_P (insn)
11019 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11020 return cfun->machine->some_ld_name;
11021
11022 gcc_unreachable ();
11023 }
11024
11025 static int
11026 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11027 {
11028 rtx x = *px;
11029
11030 if (x
11031 && GET_CODE (x) == SYMBOL_REF
11032 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11033 {
11034 cfun->machine->some_ld_name = XSTR (x, 0);
11035 return 1;
11036 }
11037
11038 return 0;
11039 }
11040
11041 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11042 We need to emit DTP-relative relocations. */
11043
11044 static void
11045 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11046 {
11047 switch (size)
11048 {
11049 case 4:
11050 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11051 break;
11052 case 8:
11053 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11054 break;
11055 default:
11056 gcc_unreachable ();
11057 }
11058 output_addr_const (file, x);
11059 fputs (")", file);
11060 }
11061
11062 /* Do whatever processing is required at the end of a file. */
11063
11064 static void
11065 sparc_file_end (void)
11066 {
11067 /* If we need to emit the special GOT helper function, do so now. */
11068 if (got_helper_rtx)
11069 {
11070 const char *name = XSTR (got_helper_rtx, 0);
11071 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11072 #ifdef DWARF2_UNWIND_INFO
11073 bool do_cfi;
11074 #endif
11075
11076 if (USE_HIDDEN_LINKONCE)
11077 {
11078 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11079 get_identifier (name),
11080 build_function_type_list (void_type_node,
11081 NULL_TREE));
11082 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11083 NULL_TREE, void_type_node);
11084 TREE_PUBLIC (decl) = 1;
11085 TREE_STATIC (decl) = 1;
11086 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11087 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11088 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11089 resolve_unique_section (decl, 0, flag_function_sections);
11090 allocate_struct_function (decl, true);
11091 cfun->is_thunk = 1;
11092 current_function_decl = decl;
11093 init_varasm_status ();
11094 assemble_start_function (decl, name);
11095 }
11096 else
11097 {
11098 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11099 switch_to_section (text_section);
11100 if (align > 0)
11101 ASM_OUTPUT_ALIGN (asm_out_file, align);
11102 ASM_OUTPUT_LABEL (asm_out_file, name);
11103 }
11104
11105 #ifdef DWARF2_UNWIND_INFO
11106 do_cfi = dwarf2out_do_cfi_asm ();
11107 if (do_cfi)
11108 fprintf (asm_out_file, "\t.cfi_startproc\n");
11109 #endif
11110 if (flag_delayed_branch)
11111 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11112 reg_name, reg_name);
11113 else
11114 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11115 reg_name, reg_name);
11116 #ifdef DWARF2_UNWIND_INFO
11117 if (do_cfi)
11118 fprintf (asm_out_file, "\t.cfi_endproc\n");
11119 #endif
11120 }
11121
11122 if (NEED_INDICATE_EXEC_STACK)
11123 file_end_indicate_exec_stack ();
11124
11125 #ifdef TARGET_SOLARIS
11126 solaris_file_end ();
11127 #endif
11128 }
11129
11130 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11131 /* Implement TARGET_MANGLE_TYPE. */
11132
11133 static const char *
11134 sparc_mangle_type (const_tree type)
11135 {
11136 if (!TARGET_64BIT
11137 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11138 && TARGET_LONG_DOUBLE_128)
11139 return "g";
11140
11141 /* For all other types, use normal C++ mangling. */
11142 return NULL;
11143 }
11144 #endif
11145
11146 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11147 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11148 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11149
11150 void
11151 sparc_emit_membar_for_model (enum memmodel model,
11152 int load_store, int before_after)
11153 {
11154 /* Bits for the MEMBAR mmask field. */
11155 const int LoadLoad = 1;
11156 const int StoreLoad = 2;
11157 const int LoadStore = 4;
11158 const int StoreStore = 8;
11159
11160 int mm = 0, implied = 0;
11161
11162 switch (sparc_memory_model)
11163 {
11164 case SMM_SC:
11165 /* Sequential Consistency. All memory transactions are immediately
11166 visible in sequential execution order. No barriers needed. */
11167 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11168 break;
11169
11170 case SMM_TSO:
11171 /* Total Store Ordering: all memory transactions with store semantics
11172 are followed by an implied StoreStore. */
11173 implied |= StoreStore;
11174 /* FALLTHRU */
11175
11176 case SMM_PSO:
11177 /* Partial Store Ordering: all memory transactions with load semantics
11178 are followed by an implied LoadLoad | LoadStore. */
11179 implied |= LoadLoad | LoadStore;
11180
11181 /* If we're not looking for a raw barrer (before+after), then atomic
11182 operations get the benefit of being both load and store. */
11183 if (load_store == 3 && before_after == 2)
11184 implied |= StoreLoad | StoreStore;
11185 /* FALLTHRU */
11186
11187 case SMM_RMO:
11188 /* Relaxed Memory Ordering: no implicit bits. */
11189 break;
11190
11191 default:
11192 gcc_unreachable ();
11193 }
11194
11195 if (before_after & 1)
11196 {
11197 if (model == MEMMODEL_RELEASE
11198 || model == MEMMODEL_ACQ_REL
11199 || model == MEMMODEL_SEQ_CST)
11200 {
11201 if (load_store & 1)
11202 mm |= LoadLoad | StoreLoad;
11203 if (load_store & 2)
11204 mm |= LoadStore | StoreStore;
11205 }
11206 }
11207 if (before_after & 2)
11208 {
11209 if (model == MEMMODEL_ACQUIRE
11210 || model == MEMMODEL_ACQ_REL
11211 || model == MEMMODEL_SEQ_CST)
11212 {
11213 if (load_store & 1)
11214 mm |= LoadLoad | LoadStore;
11215 if (load_store & 2)
11216 mm |= StoreLoad | StoreStore;
11217 }
11218 }
11219
11220 /* Remove the bits implied by the system memory model. */
11221 mm &= ~implied;
11222
11223 /* For raw barriers (before+after), always emit a barrier.
11224 This will become a compile-time barrier if needed. */
11225 if (mm || before_after == 3)
11226 emit_insn (gen_membar (GEN_INT (mm)));
11227 }
11228
11229 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11230 compare and swap on the word containing the byte or half-word. */
11231
11232 static void
11233 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11234 rtx oldval, rtx newval)
11235 {
11236 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11237 rtx addr = gen_reg_rtx (Pmode);
11238 rtx off = gen_reg_rtx (SImode);
11239 rtx oldv = gen_reg_rtx (SImode);
11240 rtx newv = gen_reg_rtx (SImode);
11241 rtx oldvalue = gen_reg_rtx (SImode);
11242 rtx newvalue = gen_reg_rtx (SImode);
11243 rtx res = gen_reg_rtx (SImode);
11244 rtx resv = gen_reg_rtx (SImode);
11245 rtx memsi, val, mask, end_label, loop_label, cc;
11246
11247 emit_insn (gen_rtx_SET (VOIDmode, addr,
11248 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11249
11250 if (Pmode != SImode)
11251 addr1 = gen_lowpart (SImode, addr1);
11252 emit_insn (gen_rtx_SET (VOIDmode, off,
11253 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11254
11255 memsi = gen_rtx_MEM (SImode, addr);
11256 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11257 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11258
11259 val = copy_to_reg (memsi);
11260
11261 emit_insn (gen_rtx_SET (VOIDmode, off,
11262 gen_rtx_XOR (SImode, off,
11263 GEN_INT (GET_MODE (mem) == QImode
11264 ? 3 : 2))));
11265
11266 emit_insn (gen_rtx_SET (VOIDmode, off,
11267 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11268
11269 if (GET_MODE (mem) == QImode)
11270 mask = force_reg (SImode, GEN_INT (0xff));
11271 else
11272 mask = force_reg (SImode, GEN_INT (0xffff));
11273
11274 emit_insn (gen_rtx_SET (VOIDmode, mask,
11275 gen_rtx_ASHIFT (SImode, mask, off)));
11276
11277 emit_insn (gen_rtx_SET (VOIDmode, val,
11278 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11279 val)));
11280
11281 oldval = gen_lowpart (SImode, oldval);
11282 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11283 gen_rtx_ASHIFT (SImode, oldval, off)));
11284
11285 newval = gen_lowpart_common (SImode, newval);
11286 emit_insn (gen_rtx_SET (VOIDmode, newv,
11287 gen_rtx_ASHIFT (SImode, newval, off)));
11288
11289 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11290 gen_rtx_AND (SImode, oldv, mask)));
11291
11292 emit_insn (gen_rtx_SET (VOIDmode, newv,
11293 gen_rtx_AND (SImode, newv, mask)));
11294
11295 end_label = gen_label_rtx ();
11296 loop_label = gen_label_rtx ();
11297 emit_label (loop_label);
11298
11299 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11300 gen_rtx_IOR (SImode, oldv, val)));
11301
11302 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11303 gen_rtx_IOR (SImode, newv, val)));
11304
11305 emit_move_insn (bool_result, const1_rtx);
11306
11307 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11308
11309 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11310
11311 emit_insn (gen_rtx_SET (VOIDmode, resv,
11312 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11313 res)));
11314
11315 emit_move_insn (bool_result, const0_rtx);
11316
11317 cc = gen_compare_reg_1 (NE, resv, val);
11318 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11319
11320 /* Use cbranchcc4 to separate the compare and branch! */
11321 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11322 cc, const0_rtx, loop_label));
11323
11324 emit_label (end_label);
11325
11326 emit_insn (gen_rtx_SET (VOIDmode, res,
11327 gen_rtx_AND (SImode, res, mask)));
11328
11329 emit_insn (gen_rtx_SET (VOIDmode, res,
11330 gen_rtx_LSHIFTRT (SImode, res, off)));
11331
11332 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11333 }
11334
11335 /* Expand code to perform a compare-and-swap. */
11336
11337 void
11338 sparc_expand_compare_and_swap (rtx operands[])
11339 {
11340 rtx bval, retval, mem, oldval, newval;
11341 enum machine_mode mode;
11342 enum memmodel model;
11343
11344 bval = operands[0];
11345 retval = operands[1];
11346 mem = operands[2];
11347 oldval = operands[3];
11348 newval = operands[4];
11349 model = (enum memmodel) INTVAL (operands[6]);
11350 mode = GET_MODE (mem);
11351
11352 sparc_emit_membar_for_model (model, 3, 1);
11353
11354 if (reg_overlap_mentioned_p (retval, oldval))
11355 oldval = copy_to_reg (oldval);
11356
11357 if (mode == QImode || mode == HImode)
11358 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11359 else
11360 {
11361 rtx (*gen) (rtx, rtx, rtx, rtx);
11362 rtx x;
11363
11364 if (mode == SImode)
11365 gen = gen_atomic_compare_and_swapsi_1;
11366 else
11367 gen = gen_atomic_compare_and_swapdi_1;
11368 emit_insn (gen (retval, mem, oldval, newval));
11369
11370 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11371 if (x != bval)
11372 convert_move (bval, x, 1);
11373 }
11374
11375 sparc_emit_membar_for_model (model, 3, 2);
11376 }
11377
11378 void
11379 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11380 {
11381 rtx t_1, t_2, t_3;
11382
11383 sel = gen_lowpart (DImode, sel);
11384 switch (vmode)
11385 {
11386 case V2SImode:
11387 /* inp = xxxxxxxAxxxxxxxB */
11388 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11389 NULL_RTX, 1, OPTAB_DIRECT);
11390 /* t_1 = ....xxxxxxxAxxx. */
11391 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11392 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11393 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11394 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11395 /* sel = .......B */
11396 /* t_1 = ...A.... */
11397 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11398 /* sel = ...A...B */
11399 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11400 /* sel = AAAABBBB * 4 */
11401 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11402 /* sel = { A*4, A*4+1, A*4+2, ... } */
11403 break;
11404
11405 case V4HImode:
11406 /* inp = xxxAxxxBxxxCxxxD */
11407 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11408 NULL_RTX, 1, OPTAB_DIRECT);
11409 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11410 NULL_RTX, 1, OPTAB_DIRECT);
11411 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11412 NULL_RTX, 1, OPTAB_DIRECT);
11413 /* t_1 = ..xxxAxxxBxxxCxx */
11414 /* t_2 = ....xxxAxxxBxxxC */
11415 /* t_3 = ......xxxAxxxBxx */
11416 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11417 GEN_INT (0x07),
11418 NULL_RTX, 1, OPTAB_DIRECT);
11419 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11420 GEN_INT (0x0700),
11421 NULL_RTX, 1, OPTAB_DIRECT);
11422 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11423 GEN_INT (0x070000),
11424 NULL_RTX, 1, OPTAB_DIRECT);
11425 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11426 GEN_INT (0x07000000),
11427 NULL_RTX, 1, OPTAB_DIRECT);
11428 /* sel = .......D */
11429 /* t_1 = .....C.. */
11430 /* t_2 = ...B.... */
11431 /* t_3 = .A...... */
11432 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11433 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11434 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11435 /* sel = .A.B.C.D */
11436 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11437 /* sel = AABBCCDD * 2 */
11438 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11439 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11440 break;
11441
11442 case V8QImode:
11443 /* input = xAxBxCxDxExFxGxH */
11444 sel = expand_simple_binop (DImode, AND, sel,
11445 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11446 | 0x0f0f0f0f),
11447 NULL_RTX, 1, OPTAB_DIRECT);
11448 /* sel = .A.B.C.D.E.F.G.H */
11449 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11450 NULL_RTX, 1, OPTAB_DIRECT);
11451 /* t_1 = ..A.B.C.D.E.F.G. */
11452 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11453 NULL_RTX, 1, OPTAB_DIRECT);
11454 /* sel = .AABBCCDDEEFFGGH */
11455 sel = expand_simple_binop (DImode, AND, sel,
11456 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11457 | 0xff00ff),
11458 NULL_RTX, 1, OPTAB_DIRECT);
11459 /* sel = ..AB..CD..EF..GH */
11460 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11461 NULL_RTX, 1, OPTAB_DIRECT);
11462 /* t_1 = ....AB..CD..EF.. */
11463 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11464 NULL_RTX, 1, OPTAB_DIRECT);
11465 /* sel = ..ABABCDCDEFEFGH */
11466 sel = expand_simple_binop (DImode, AND, sel,
11467 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11468 NULL_RTX, 1, OPTAB_DIRECT);
11469 /* sel = ....ABCD....EFGH */
11470 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11471 NULL_RTX, 1, OPTAB_DIRECT);
11472 /* t_1 = ........ABCD.... */
11473 sel = gen_lowpart (SImode, sel);
11474 t_1 = gen_lowpart (SImode, t_1);
11475 break;
11476
11477 default:
11478 gcc_unreachable ();
11479 }
11480
11481 /* Always perform the final addition/merge within the bmask insn. */
11482 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
11483 }
11484
11485 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11486
11487 static bool
11488 sparc_frame_pointer_required (void)
11489 {
11490 /* If the stack pointer is dynamically modified in the function, it cannot
11491 serve as the frame pointer. */
11492 if (cfun->calls_alloca)
11493 return true;
11494
11495 /* If the function receives nonlocal gotos, it needs to save the frame
11496 pointer in the nonlocal_goto_save_area object. */
11497 if (cfun->has_nonlocal_label)
11498 return true;
11499
11500 /* In flat mode, that's it. */
11501 if (TARGET_FLAT)
11502 return false;
11503
11504 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11505 return !(crtl->is_leaf && only_leaf_regs_used ());
11506 }
11507
11508 /* The way this is structured, we can't eliminate SFP in favor of SP
11509 if the frame pointer is required: we want to use the SFP->HFP elimination
11510 in that case. But the test in update_eliminables doesn't know we are
11511 assuming below that we only do the former elimination. */
11512
11513 static bool
11514 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11515 {
11516 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11517 }
11518
11519 /* Return the hard frame pointer directly to bypass the stack bias. */
11520
11521 static rtx
11522 sparc_builtin_setjmp_frame_value (void)
11523 {
11524 return hard_frame_pointer_rtx;
11525 }
11526
11527 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11528 they won't be allocated. */
11529
11530 static void
11531 sparc_conditional_register_usage (void)
11532 {
11533 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11534 {
11535 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11536 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11537 }
11538 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11539 /* then honor it. */
11540 if (TARGET_ARCH32 && fixed_regs[5])
11541 fixed_regs[5] = 1;
11542 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11543 fixed_regs[5] = 0;
11544 if (! TARGET_V9)
11545 {
11546 int regno;
11547 for (regno = SPARC_FIRST_V9_FP_REG;
11548 regno <= SPARC_LAST_V9_FP_REG;
11549 regno++)
11550 fixed_regs[regno] = 1;
11551 /* %fcc0 is used by v8 and v9. */
11552 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
11553 regno <= SPARC_LAST_V9_FCC_REG;
11554 regno++)
11555 fixed_regs[regno] = 1;
11556 }
11557 if (! TARGET_FPU)
11558 {
11559 int regno;
11560 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
11561 fixed_regs[regno] = 1;
11562 }
11563 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
11564 /* then honor it. Likewise with g3 and g4. */
11565 if (fixed_regs[2] == 2)
11566 fixed_regs[2] = ! TARGET_APP_REGS;
11567 if (fixed_regs[3] == 2)
11568 fixed_regs[3] = ! TARGET_APP_REGS;
11569 if (TARGET_ARCH32 && fixed_regs[4] == 2)
11570 fixed_regs[4] = ! TARGET_APP_REGS;
11571 else if (TARGET_CM_EMBMEDANY)
11572 fixed_regs[4] = 1;
11573 else if (fixed_regs[4] == 2)
11574 fixed_regs[4] = 0;
11575 if (TARGET_FLAT)
11576 {
11577 int regno;
11578 /* Disable leaf functions. */
11579 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
11580 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11581 leaf_reg_remap [regno] = regno;
11582 }
11583 if (TARGET_VIS)
11584 global_regs[SPARC_GSR_REG] = 1;
11585 }
11586
11587 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
11588
11589 - We can't load constants into FP registers.
11590 - We can't load FP constants into integer registers when soft-float,
11591 because there is no soft-float pattern with a r/F constraint.
11592 - We can't load FP constants into integer registers for TFmode unless
11593 it is 0.0L, because there is no movtf pattern with a r/F constraint.
11594 - Try and reload integer constants (symbolic or otherwise) back into
11595 registers directly, rather than having them dumped to memory. */
11596
11597 static reg_class_t
11598 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
11599 {
11600 enum machine_mode mode = GET_MODE (x);
11601 if (CONSTANT_P (x))
11602 {
11603 if (FP_REG_CLASS_P (rclass)
11604 || rclass == GENERAL_OR_FP_REGS
11605 || rclass == GENERAL_OR_EXTRA_FP_REGS
11606 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
11607 || (mode == TFmode && ! const_zero_operand (x, mode)))
11608 return NO_REGS;
11609
11610 if (GET_MODE_CLASS (mode) == MODE_INT)
11611 return GENERAL_REGS;
11612
11613 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11614 {
11615 if (! FP_REG_CLASS_P (rclass)
11616 || !(const_zero_operand (x, mode)
11617 || const_all_ones_operand (x, mode)))
11618 return NO_REGS;
11619 }
11620 }
11621
11622 if (TARGET_VIS3
11623 && ! TARGET_ARCH64
11624 && (rclass == EXTRA_FP_REGS
11625 || rclass == GENERAL_OR_EXTRA_FP_REGS))
11626 {
11627 int regno = true_regnum (x);
11628
11629 if (SPARC_INT_REG_P (regno))
11630 return (rclass == EXTRA_FP_REGS
11631 ? FP_REGS : GENERAL_OR_FP_REGS);
11632 }
11633
11634 return rclass;
11635 }
11636
11637 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
11638 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
11639
11640 const char *
11641 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
11642 {
11643 char mulstr[32];
11644
11645 gcc_assert (! TARGET_ARCH64);
11646
11647 if (sparc_check_64 (operands[1], insn) <= 0)
11648 output_asm_insn ("srl\t%L1, 0, %L1", operands);
11649 if (which_alternative == 1)
11650 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
11651 if (GET_CODE (operands[2]) == CONST_INT)
11652 {
11653 if (which_alternative == 1)
11654 {
11655 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11656 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
11657 output_asm_insn (mulstr, operands);
11658 return "srlx\t%L0, 32, %H0";
11659 }
11660 else
11661 {
11662 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11663 output_asm_insn ("or\t%L1, %3, %3", operands);
11664 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
11665 output_asm_insn (mulstr, operands);
11666 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11667 return "mov\t%3, %L0";
11668 }
11669 }
11670 else if (rtx_equal_p (operands[1], operands[2]))
11671 {
11672 if (which_alternative == 1)
11673 {
11674 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11675 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
11676 output_asm_insn (mulstr, operands);
11677 return "srlx\t%L0, 32, %H0";
11678 }
11679 else
11680 {
11681 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11682 output_asm_insn ("or\t%L1, %3, %3", operands);
11683 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
11684 output_asm_insn (mulstr, operands);
11685 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11686 return "mov\t%3, %L0";
11687 }
11688 }
11689 if (sparc_check_64 (operands[2], insn) <= 0)
11690 output_asm_insn ("srl\t%L2, 0, %L2", operands);
11691 if (which_alternative == 1)
11692 {
11693 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11694 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
11695 output_asm_insn ("or\t%L2, %L1, %L1", operands);
11696 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
11697 output_asm_insn (mulstr, operands);
11698 return "srlx\t%L0, 32, %H0";
11699 }
11700 else
11701 {
11702 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11703 output_asm_insn ("sllx\t%H2, 32, %4", operands);
11704 output_asm_insn ("or\t%L1, %3, %3", operands);
11705 output_asm_insn ("or\t%L2, %4, %4", operands);
11706 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
11707 output_asm_insn (mulstr, operands);
11708 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11709 return "mov\t%3, %L0";
11710 }
11711 }
11712
11713 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11714 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
11715 and INNER_MODE are the modes describing TARGET. */
11716
11717 static void
11718 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
11719 enum machine_mode inner_mode)
11720 {
11721 rtx t1, final_insn;
11722 int bmask;
11723
11724 t1 = gen_reg_rtx (mode);
11725
11726 elt = convert_modes (SImode, inner_mode, elt, true);
11727 emit_move_insn (gen_lowpart(SImode, t1), elt);
11728
11729 switch (mode)
11730 {
11731 case V2SImode:
11732 final_insn = gen_bshufflev2si_vis (target, t1, t1);
11733 bmask = 0x45674567;
11734 break;
11735 case V4HImode:
11736 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
11737 bmask = 0x67676767;
11738 break;
11739 case V8QImode:
11740 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
11741 bmask = 0x77777777;
11742 break;
11743 default:
11744 gcc_unreachable ();
11745 }
11746
11747 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
11748 force_reg (SImode, GEN_INT (bmask))));
11749 emit_insn (final_insn);
11750 }
11751
11752 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11753 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
11754
11755 static void
11756 vector_init_fpmerge (rtx target, rtx elt)
11757 {
11758 rtx t1, t2, t2_low, t3, t3_low;
11759
11760 t1 = gen_reg_rtx (V4QImode);
11761 elt = convert_modes (SImode, QImode, elt, true);
11762 emit_move_insn (gen_lowpart (SImode, t1), elt);
11763
11764 t2 = gen_reg_rtx (V8QImode);
11765 t2_low = gen_lowpart (V4QImode, t2);
11766 emit_insn (gen_fpmerge_vis (t2, t1, t1));
11767
11768 t3 = gen_reg_rtx (V8QImode);
11769 t3_low = gen_lowpart (V4QImode, t3);
11770 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
11771
11772 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
11773 }
11774
11775 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11776 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
11777
11778 static void
11779 vector_init_faligndata (rtx target, rtx elt)
11780 {
11781 rtx t1 = gen_reg_rtx (V4HImode);
11782 int i;
11783
11784 elt = convert_modes (SImode, HImode, elt, true);
11785 emit_move_insn (gen_lowpart (SImode, t1), elt);
11786
11787 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
11788 force_reg (SImode, GEN_INT (6)),
11789 const0_rtx));
11790
11791 for (i = 0; i < 4; i++)
11792 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
11793 }
11794
11795 /* Emit code to initialize TARGET to values for individual fields VALS. */
11796
11797 void
11798 sparc_expand_vector_init (rtx target, rtx vals)
11799 {
11800 const enum machine_mode mode = GET_MODE (target);
11801 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
11802 const int n_elts = GET_MODE_NUNITS (mode);
11803 int i, n_var = 0;
11804 bool all_same;
11805 rtx mem;
11806
11807 all_same = true;
11808 for (i = 0; i < n_elts; i++)
11809 {
11810 rtx x = XVECEXP (vals, 0, i);
11811 if (!CONSTANT_P (x))
11812 n_var++;
11813
11814 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
11815 all_same = false;
11816 }
11817
11818 if (n_var == 0)
11819 {
11820 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
11821 return;
11822 }
11823
11824 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
11825 {
11826 if (GET_MODE_SIZE (inner_mode) == 4)
11827 {
11828 emit_move_insn (gen_lowpart (SImode, target),
11829 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
11830 return;
11831 }
11832 else if (GET_MODE_SIZE (inner_mode) == 8)
11833 {
11834 emit_move_insn (gen_lowpart (DImode, target),
11835 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
11836 return;
11837 }
11838 }
11839 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
11840 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
11841 {
11842 emit_move_insn (gen_highpart (word_mode, target),
11843 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
11844 emit_move_insn (gen_lowpart (word_mode, target),
11845 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
11846 return;
11847 }
11848
11849 if (all_same && GET_MODE_SIZE (mode) == 8)
11850 {
11851 if (TARGET_VIS2)
11852 {
11853 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
11854 return;
11855 }
11856 if (mode == V8QImode)
11857 {
11858 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
11859 return;
11860 }
11861 if (mode == V4HImode)
11862 {
11863 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
11864 return;
11865 }
11866 }
11867
11868 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
11869 for (i = 0; i < n_elts; i++)
11870 emit_move_insn (adjust_address_nv (mem, inner_mode,
11871 i * GET_MODE_SIZE (inner_mode)),
11872 XVECEXP (vals, 0, i));
11873 emit_move_insn (target, mem);
11874 }
11875
11876 /* Implement TARGET_SECONDARY_RELOAD. */
11877
11878 static reg_class_t
11879 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11880 enum machine_mode mode, secondary_reload_info *sri)
11881 {
11882 enum reg_class rclass = (enum reg_class) rclass_i;
11883
11884 sri->icode = CODE_FOR_nothing;
11885 sri->extra_cost = 0;
11886
11887 /* We need a temporary when loading/storing a HImode/QImode value
11888 between memory and the FPU registers. This can happen when combine puts
11889 a paradoxical subreg in a float/fix conversion insn. */
11890 if (FP_REG_CLASS_P (rclass)
11891 && (mode == HImode || mode == QImode)
11892 && (GET_CODE (x) == MEM
11893 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
11894 && true_regnum (x) == -1)))
11895 return GENERAL_REGS;
11896
11897 /* On 32-bit we need a temporary when loading/storing a DFmode value
11898 between unaligned memory and the upper FPU registers. */
11899 if (TARGET_ARCH32
11900 && rclass == EXTRA_FP_REGS
11901 && mode == DFmode
11902 && GET_CODE (x) == MEM
11903 && ! mem_min_alignment (x, 8))
11904 return FP_REGS;
11905
11906 if (((TARGET_CM_MEDANY
11907 && symbolic_operand (x, mode))
11908 || (TARGET_CM_EMBMEDANY
11909 && text_segment_operand (x, mode)))
11910 && ! flag_pic)
11911 {
11912 if (in_p)
11913 sri->icode = direct_optab_handler (reload_in_optab, mode);
11914 else
11915 sri->icode = direct_optab_handler (reload_out_optab, mode);
11916 return NO_REGS;
11917 }
11918
11919 if (TARGET_VIS3 && TARGET_ARCH32)
11920 {
11921 int regno = true_regnum (x);
11922
11923 /* When using VIS3 fp<-->int register moves, on 32-bit we have
11924 to move 8-byte values in 4-byte pieces. This only works via
11925 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
11926 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
11927 an FP_REGS intermediate move. */
11928 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
11929 || ((general_or_i64_p (rclass)
11930 || rclass == GENERAL_OR_FP_REGS)
11931 && SPARC_FP_REG_P (regno)))
11932 {
11933 sri->extra_cost = 2;
11934 return FP_REGS;
11935 }
11936 }
11937
11938 return NO_REGS;
11939 }
11940
11941 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
11942 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
11943
11944 bool
11945 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
11946 {
11947 enum rtx_code rc = GET_CODE (operands[1]);
11948 enum machine_mode cmp_mode;
11949 rtx cc_reg, dst, cmp;
11950
11951 cmp = operands[1];
11952 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
11953 return false;
11954
11955 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
11956 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
11957
11958 cmp_mode = GET_MODE (XEXP (cmp, 0));
11959 rc = GET_CODE (cmp);
11960
11961 dst = operands[0];
11962 if (! rtx_equal_p (operands[2], dst)
11963 && ! rtx_equal_p (operands[3], dst))
11964 {
11965 if (reg_overlap_mentioned_p (dst, cmp))
11966 dst = gen_reg_rtx (mode);
11967
11968 emit_move_insn (dst, operands[3]);
11969 }
11970 else if (operands[2] == dst)
11971 {
11972 operands[2] = operands[3];
11973
11974 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
11975 rc = reverse_condition_maybe_unordered (rc);
11976 else
11977 rc = reverse_condition (rc);
11978 }
11979
11980 if (XEXP (cmp, 1) == const0_rtx
11981 && GET_CODE (XEXP (cmp, 0)) == REG
11982 && cmp_mode == DImode
11983 && v9_regcmp_p (rc))
11984 cc_reg = XEXP (cmp, 0);
11985 else
11986 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
11987
11988 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
11989
11990 emit_insn (gen_rtx_SET (VOIDmode, dst,
11991 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
11992
11993 if (dst != operands[0])
11994 emit_move_insn (operands[0], dst);
11995
11996 return true;
11997 }
11998
11999 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12000 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12001 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12002 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12003 code to be used for the condition mask. */
12004
12005 void
12006 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12007 {
12008 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12009 enum rtx_code code = GET_CODE (operands[3]);
12010
12011 mask = gen_reg_rtx (Pmode);
12012 cop0 = operands[4];
12013 cop1 = operands[5];
12014 if (code == LT || code == GE)
12015 {
12016 rtx t;
12017
12018 code = swap_condition (code);
12019 t = cop0; cop0 = cop1; cop1 = t;
12020 }
12021
12022 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12023
12024 fcmp = gen_rtx_UNSPEC (Pmode,
12025 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12026 fcode);
12027
12028 cmask = gen_rtx_UNSPEC (DImode,
12029 gen_rtvec (2, mask, gsr),
12030 ccode);
12031
12032 bshuf = gen_rtx_UNSPEC (mode,
12033 gen_rtvec (3, operands[1], operands[2], gsr),
12034 UNSPEC_BSHUFFLE);
12035
12036 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12037 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12038
12039 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12040 }
12041
12042 /* On sparc, any mode which naturally allocates into the float
12043 registers should return 4 here. */
12044
12045 unsigned int
12046 sparc_regmode_natural_size (enum machine_mode mode)
12047 {
12048 int size = UNITS_PER_WORD;
12049
12050 if (TARGET_ARCH64)
12051 {
12052 enum mode_class mclass = GET_MODE_CLASS (mode);
12053
12054 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12055 size = 4;
12056 }
12057
12058 return size;
12059 }
12060
12061 /* Return TRUE if it is a good idea to tie two pseudo registers
12062 when one has mode MODE1 and one has mode MODE2.
12063 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12064 for any hard reg, then this must be FALSE for correct output.
12065
12066 For V9 we have to deal with the fact that only the lower 32 floating
12067 point registers are 32-bit addressable. */
12068
12069 bool
12070 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12071 {
12072 enum mode_class mclass1, mclass2;
12073 unsigned short size1, size2;
12074
12075 if (mode1 == mode2)
12076 return true;
12077
12078 mclass1 = GET_MODE_CLASS (mode1);
12079 mclass2 = GET_MODE_CLASS (mode2);
12080 if (mclass1 != mclass2)
12081 return false;
12082
12083 if (! TARGET_V9)
12084 return true;
12085
12086 /* Classes are the same and we are V9 so we have to deal with upper
12087 vs. lower floating point registers. If one of the modes is a
12088 4-byte mode, and the other is not, we have to mark them as not
12089 tieable because only the lower 32 floating point register are
12090 addressable 32-bits at a time.
12091
12092 We can't just test explicitly for SFmode, otherwise we won't
12093 cover the vector mode cases properly. */
12094
12095 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12096 return true;
12097
12098 size1 = GET_MODE_SIZE (mode1);
12099 size2 = GET_MODE_SIZE (mode2);
12100 if ((size1 > 4 && size2 == 4)
12101 || (size2 > 4 && size1 == 4))
12102 return false;
12103
12104 return true;
12105 }
12106
12107 #include "gt-sparc.h"