]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
PR target/28115
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2015 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "gimple.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "explow.h"
48 #include "expr.h"
49 #include "debug.h"
50 #include "common/common-target.h"
51 #include "gimplify.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "params.h"
55 #include "tree-pass.h"
56 #include "context.h"
57 #include "builtins.h"
58
59 /* This file should be included last. */
60 #include "target-def.h"
61
62 /* Processor costs */
63
64 struct processor_costs {
65 /* Integer load */
66 const int int_load;
67
68 /* Integer signed load */
69 const int int_sload;
70
71 /* Integer zeroed load */
72 const int int_zload;
73
74 /* Float load */
75 const int float_load;
76
77 /* fmov, fneg, fabs */
78 const int float_move;
79
80 /* fadd, fsub */
81 const int float_plusminus;
82
83 /* fcmp */
84 const int float_cmp;
85
86 /* fmov, fmovr */
87 const int float_cmove;
88
89 /* fmul */
90 const int float_mul;
91
92 /* fdivs */
93 const int float_div_sf;
94
95 /* fdivd */
96 const int float_div_df;
97
98 /* fsqrts */
99 const int float_sqrt_sf;
100
101 /* fsqrtd */
102 const int float_sqrt_df;
103
104 /* umul/smul */
105 const int int_mul;
106
107 /* mulX */
108 const int int_mulX;
109
110 /* integer multiply cost for each bit set past the most
111 significant 3, so the formula for multiply cost becomes:
112
113 if (rs1 < 0)
114 highest_bit = highest_clear_bit(rs1);
115 else
116 highest_bit = highest_set_bit(rs1);
117 if (highest_bit < 3)
118 highest_bit = 3;
119 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
120
121 A value of zero indicates that the multiply costs is fixed,
122 and not variable. */
123 const int int_mul_bit_factor;
124
125 /* udiv/sdiv */
126 const int int_div;
127
128 /* divX */
129 const int int_divX;
130
131 /* movcc, movr */
132 const int int_cmove;
133
134 /* penalty for shifts, due to scheduling rules etc. */
135 const int shift_penalty;
136 };
137
138 static const
139 struct processor_costs cypress_costs = {
140 COSTS_N_INSNS (2), /* int load */
141 COSTS_N_INSNS (2), /* int signed load */
142 COSTS_N_INSNS (2), /* int zeroed load */
143 COSTS_N_INSNS (2), /* float load */
144 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
145 COSTS_N_INSNS (5), /* fadd, fsub */
146 COSTS_N_INSNS (1), /* fcmp */
147 COSTS_N_INSNS (1), /* fmov, fmovr */
148 COSTS_N_INSNS (7), /* fmul */
149 COSTS_N_INSNS (37), /* fdivs */
150 COSTS_N_INSNS (37), /* fdivd */
151 COSTS_N_INSNS (63), /* fsqrts */
152 COSTS_N_INSNS (63), /* fsqrtd */
153 COSTS_N_INSNS (1), /* imul */
154 COSTS_N_INSNS (1), /* imulX */
155 0, /* imul bit factor */
156 COSTS_N_INSNS (1), /* idiv */
157 COSTS_N_INSNS (1), /* idivX */
158 COSTS_N_INSNS (1), /* movcc/movr */
159 0, /* shift penalty */
160 };
161
162 static const
163 struct processor_costs supersparc_costs = {
164 COSTS_N_INSNS (1), /* int load */
165 COSTS_N_INSNS (1), /* int signed load */
166 COSTS_N_INSNS (1), /* int zeroed load */
167 COSTS_N_INSNS (0), /* float load */
168 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
169 COSTS_N_INSNS (3), /* fadd, fsub */
170 COSTS_N_INSNS (3), /* fcmp */
171 COSTS_N_INSNS (1), /* fmov, fmovr */
172 COSTS_N_INSNS (3), /* fmul */
173 COSTS_N_INSNS (6), /* fdivs */
174 COSTS_N_INSNS (9), /* fdivd */
175 COSTS_N_INSNS (12), /* fsqrts */
176 COSTS_N_INSNS (12), /* fsqrtd */
177 COSTS_N_INSNS (4), /* imul */
178 COSTS_N_INSNS (4), /* imulX */
179 0, /* imul bit factor */
180 COSTS_N_INSNS (4), /* idiv */
181 COSTS_N_INSNS (4), /* idivX */
182 COSTS_N_INSNS (1), /* movcc/movr */
183 1, /* shift penalty */
184 };
185
186 static const
187 struct processor_costs hypersparc_costs = {
188 COSTS_N_INSNS (1), /* int load */
189 COSTS_N_INSNS (1), /* int signed load */
190 COSTS_N_INSNS (1), /* int zeroed load */
191 COSTS_N_INSNS (1), /* float load */
192 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
193 COSTS_N_INSNS (1), /* fadd, fsub */
194 COSTS_N_INSNS (1), /* fcmp */
195 COSTS_N_INSNS (1), /* fmov, fmovr */
196 COSTS_N_INSNS (1), /* fmul */
197 COSTS_N_INSNS (8), /* fdivs */
198 COSTS_N_INSNS (12), /* fdivd */
199 COSTS_N_INSNS (17), /* fsqrts */
200 COSTS_N_INSNS (17), /* fsqrtd */
201 COSTS_N_INSNS (17), /* imul */
202 COSTS_N_INSNS (17), /* imulX */
203 0, /* imul bit factor */
204 COSTS_N_INSNS (17), /* idiv */
205 COSTS_N_INSNS (17), /* idivX */
206 COSTS_N_INSNS (1), /* movcc/movr */
207 0, /* shift penalty */
208 };
209
210 static const
211 struct processor_costs leon_costs = {
212 COSTS_N_INSNS (1), /* int load */
213 COSTS_N_INSNS (1), /* int signed load */
214 COSTS_N_INSNS (1), /* int zeroed load */
215 COSTS_N_INSNS (1), /* float load */
216 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
217 COSTS_N_INSNS (1), /* fadd, fsub */
218 COSTS_N_INSNS (1), /* fcmp */
219 COSTS_N_INSNS (1), /* fmov, fmovr */
220 COSTS_N_INSNS (1), /* fmul */
221 COSTS_N_INSNS (15), /* fdivs */
222 COSTS_N_INSNS (15), /* fdivd */
223 COSTS_N_INSNS (23), /* fsqrts */
224 COSTS_N_INSNS (23), /* fsqrtd */
225 COSTS_N_INSNS (5), /* imul */
226 COSTS_N_INSNS (5), /* imulX */
227 0, /* imul bit factor */
228 COSTS_N_INSNS (5), /* idiv */
229 COSTS_N_INSNS (5), /* idivX */
230 COSTS_N_INSNS (1), /* movcc/movr */
231 0, /* shift penalty */
232 };
233
234 static const
235 struct processor_costs leon3_costs = {
236 COSTS_N_INSNS (1), /* int load */
237 COSTS_N_INSNS (1), /* int signed load */
238 COSTS_N_INSNS (1), /* int zeroed load */
239 COSTS_N_INSNS (1), /* float load */
240 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
241 COSTS_N_INSNS (1), /* fadd, fsub */
242 COSTS_N_INSNS (1), /* fcmp */
243 COSTS_N_INSNS (1), /* fmov, fmovr */
244 COSTS_N_INSNS (1), /* fmul */
245 COSTS_N_INSNS (14), /* fdivs */
246 COSTS_N_INSNS (15), /* fdivd */
247 COSTS_N_INSNS (22), /* fsqrts */
248 COSTS_N_INSNS (23), /* fsqrtd */
249 COSTS_N_INSNS (5), /* imul */
250 COSTS_N_INSNS (5), /* imulX */
251 0, /* imul bit factor */
252 COSTS_N_INSNS (35), /* idiv */
253 COSTS_N_INSNS (35), /* idivX */
254 COSTS_N_INSNS (1), /* movcc/movr */
255 0, /* shift penalty */
256 };
257
258 static const
259 struct processor_costs sparclet_costs = {
260 COSTS_N_INSNS (3), /* int load */
261 COSTS_N_INSNS (3), /* int signed load */
262 COSTS_N_INSNS (1), /* int zeroed load */
263 COSTS_N_INSNS (1), /* float load */
264 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
265 COSTS_N_INSNS (1), /* fadd, fsub */
266 COSTS_N_INSNS (1), /* fcmp */
267 COSTS_N_INSNS (1), /* fmov, fmovr */
268 COSTS_N_INSNS (1), /* fmul */
269 COSTS_N_INSNS (1), /* fdivs */
270 COSTS_N_INSNS (1), /* fdivd */
271 COSTS_N_INSNS (1), /* fsqrts */
272 COSTS_N_INSNS (1), /* fsqrtd */
273 COSTS_N_INSNS (5), /* imul */
274 COSTS_N_INSNS (5), /* imulX */
275 0, /* imul bit factor */
276 COSTS_N_INSNS (5), /* idiv */
277 COSTS_N_INSNS (5), /* idivX */
278 COSTS_N_INSNS (1), /* movcc/movr */
279 0, /* shift penalty */
280 };
281
282 static const
283 struct processor_costs ultrasparc_costs = {
284 COSTS_N_INSNS (2), /* int load */
285 COSTS_N_INSNS (3), /* int signed load */
286 COSTS_N_INSNS (2), /* int zeroed load */
287 COSTS_N_INSNS (2), /* float load */
288 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
289 COSTS_N_INSNS (4), /* fadd, fsub */
290 COSTS_N_INSNS (1), /* fcmp */
291 COSTS_N_INSNS (2), /* fmov, fmovr */
292 COSTS_N_INSNS (4), /* fmul */
293 COSTS_N_INSNS (13), /* fdivs */
294 COSTS_N_INSNS (23), /* fdivd */
295 COSTS_N_INSNS (13), /* fsqrts */
296 COSTS_N_INSNS (23), /* fsqrtd */
297 COSTS_N_INSNS (4), /* imul */
298 COSTS_N_INSNS (4), /* imulX */
299 2, /* imul bit factor */
300 COSTS_N_INSNS (37), /* idiv */
301 COSTS_N_INSNS (68), /* idivX */
302 COSTS_N_INSNS (2), /* movcc/movr */
303 2, /* shift penalty */
304 };
305
306 static const
307 struct processor_costs ultrasparc3_costs = {
308 COSTS_N_INSNS (2), /* int load */
309 COSTS_N_INSNS (3), /* int signed load */
310 COSTS_N_INSNS (3), /* int zeroed load */
311 COSTS_N_INSNS (2), /* float load */
312 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
313 COSTS_N_INSNS (4), /* fadd, fsub */
314 COSTS_N_INSNS (5), /* fcmp */
315 COSTS_N_INSNS (3), /* fmov, fmovr */
316 COSTS_N_INSNS (4), /* fmul */
317 COSTS_N_INSNS (17), /* fdivs */
318 COSTS_N_INSNS (20), /* fdivd */
319 COSTS_N_INSNS (20), /* fsqrts */
320 COSTS_N_INSNS (29), /* fsqrtd */
321 COSTS_N_INSNS (6), /* imul */
322 COSTS_N_INSNS (6), /* imulX */
323 0, /* imul bit factor */
324 COSTS_N_INSNS (40), /* idiv */
325 COSTS_N_INSNS (71), /* idivX */
326 COSTS_N_INSNS (2), /* movcc/movr */
327 0, /* shift penalty */
328 };
329
330 static const
331 struct processor_costs niagara_costs = {
332 COSTS_N_INSNS (3), /* int load */
333 COSTS_N_INSNS (3), /* int signed load */
334 COSTS_N_INSNS (3), /* int zeroed load */
335 COSTS_N_INSNS (9), /* float load */
336 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
337 COSTS_N_INSNS (8), /* fadd, fsub */
338 COSTS_N_INSNS (26), /* fcmp */
339 COSTS_N_INSNS (8), /* fmov, fmovr */
340 COSTS_N_INSNS (29), /* fmul */
341 COSTS_N_INSNS (54), /* fdivs */
342 COSTS_N_INSNS (83), /* fdivd */
343 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
344 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
345 COSTS_N_INSNS (11), /* imul */
346 COSTS_N_INSNS (11), /* imulX */
347 0, /* imul bit factor */
348 COSTS_N_INSNS (72), /* idiv */
349 COSTS_N_INSNS (72), /* idivX */
350 COSTS_N_INSNS (1), /* movcc/movr */
351 0, /* shift penalty */
352 };
353
354 static const
355 struct processor_costs niagara2_costs = {
356 COSTS_N_INSNS (3), /* int load */
357 COSTS_N_INSNS (3), /* int signed load */
358 COSTS_N_INSNS (3), /* int zeroed load */
359 COSTS_N_INSNS (3), /* float load */
360 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
361 COSTS_N_INSNS (6), /* fadd, fsub */
362 COSTS_N_INSNS (6), /* fcmp */
363 COSTS_N_INSNS (6), /* fmov, fmovr */
364 COSTS_N_INSNS (6), /* fmul */
365 COSTS_N_INSNS (19), /* fdivs */
366 COSTS_N_INSNS (33), /* fdivd */
367 COSTS_N_INSNS (19), /* fsqrts */
368 COSTS_N_INSNS (33), /* fsqrtd */
369 COSTS_N_INSNS (5), /* imul */
370 COSTS_N_INSNS (5), /* imulX */
371 0, /* imul bit factor */
372 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
373 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
374 COSTS_N_INSNS (1), /* movcc/movr */
375 0, /* shift penalty */
376 };
377
378 static const
379 struct processor_costs niagara3_costs = {
380 COSTS_N_INSNS (3), /* int load */
381 COSTS_N_INSNS (3), /* int signed load */
382 COSTS_N_INSNS (3), /* int zeroed load */
383 COSTS_N_INSNS (3), /* float load */
384 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
385 COSTS_N_INSNS (9), /* fadd, fsub */
386 COSTS_N_INSNS (9), /* fcmp */
387 COSTS_N_INSNS (9), /* fmov, fmovr */
388 COSTS_N_INSNS (9), /* fmul */
389 COSTS_N_INSNS (23), /* fdivs */
390 COSTS_N_INSNS (37), /* fdivd */
391 COSTS_N_INSNS (23), /* fsqrts */
392 COSTS_N_INSNS (37), /* fsqrtd */
393 COSTS_N_INSNS (9), /* imul */
394 COSTS_N_INSNS (9), /* imulX */
395 0, /* imul bit factor */
396 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
397 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
398 COSTS_N_INSNS (1), /* movcc/movr */
399 0, /* shift penalty */
400 };
401
402 static const
403 struct processor_costs niagara4_costs = {
404 COSTS_N_INSNS (5), /* int load */
405 COSTS_N_INSNS (5), /* int signed load */
406 COSTS_N_INSNS (5), /* int zeroed load */
407 COSTS_N_INSNS (5), /* float load */
408 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
409 COSTS_N_INSNS (11), /* fadd, fsub */
410 COSTS_N_INSNS (11), /* fcmp */
411 COSTS_N_INSNS (11), /* fmov, fmovr */
412 COSTS_N_INSNS (11), /* fmul */
413 COSTS_N_INSNS (24), /* fdivs */
414 COSTS_N_INSNS (37), /* fdivd */
415 COSTS_N_INSNS (24), /* fsqrts */
416 COSTS_N_INSNS (37), /* fsqrtd */
417 COSTS_N_INSNS (12), /* imul */
418 COSTS_N_INSNS (12), /* imulX */
419 0, /* imul bit factor */
420 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
421 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
422 COSTS_N_INSNS (1), /* movcc/movr */
423 0, /* shift penalty */
424 };
425
426 static const struct processor_costs *sparc_costs = &cypress_costs;
427
428 #ifdef HAVE_AS_RELAX_OPTION
429 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
430 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
431 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
432 somebody does not branch between the sethi and jmp. */
433 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
434 #else
435 #define LEAF_SIBCALL_SLOT_RESERVED_P \
436 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
437 #endif
438
439 /* Vector to say how input registers are mapped to output registers.
440 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
441 eliminate it. You must use -fomit-frame-pointer to get that. */
442 char leaf_reg_remap[] =
443 { 0, 1, 2, 3, 4, 5, 6, 7,
444 -1, -1, -1, -1, -1, -1, 14, -1,
445 -1, -1, -1, -1, -1, -1, -1, -1,
446 8, 9, 10, 11, 12, 13, -1, 15,
447
448 32, 33, 34, 35, 36, 37, 38, 39,
449 40, 41, 42, 43, 44, 45, 46, 47,
450 48, 49, 50, 51, 52, 53, 54, 55,
451 56, 57, 58, 59, 60, 61, 62, 63,
452 64, 65, 66, 67, 68, 69, 70, 71,
453 72, 73, 74, 75, 76, 77, 78, 79,
454 80, 81, 82, 83, 84, 85, 86, 87,
455 88, 89, 90, 91, 92, 93, 94, 95,
456 96, 97, 98, 99, 100, 101, 102};
457
458 /* Vector, indexed by hard register number, which contains 1
459 for a register that is allowable in a candidate for leaf
460 function treatment. */
461 char sparc_leaf_regs[] =
462 { 1, 1, 1, 1, 1, 1, 1, 1,
463 0, 0, 0, 0, 0, 0, 1, 0,
464 0, 0, 0, 0, 0, 0, 0, 0,
465 1, 1, 1, 1, 1, 1, 0, 1,
466 1, 1, 1, 1, 1, 1, 1, 1,
467 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 1, 1, 1, 1, 1, 1,
469 1, 1, 1, 1, 1, 1, 1, 1,
470 1, 1, 1, 1, 1, 1, 1, 1,
471 1, 1, 1, 1, 1, 1, 1, 1,
472 1, 1, 1, 1, 1, 1, 1, 1,
473 1, 1, 1, 1, 1, 1, 1, 1,
474 1, 1, 1, 1, 1, 1, 1};
475
476 struct GTY(()) machine_function
477 {
478 /* Size of the frame of the function. */
479 HOST_WIDE_INT frame_size;
480
481 /* Size of the frame of the function minus the register window save area
482 and the outgoing argument area. */
483 HOST_WIDE_INT apparent_frame_size;
484
485 /* Register we pretend the frame pointer is allocated to. Normally, this
486 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
487 record "offset" separately as it may be too big for (reg + disp). */
488 rtx frame_base_reg;
489 HOST_WIDE_INT frame_base_offset;
490
491 /* Number of global or FP registers to be saved (as 4-byte quantities). */
492 int n_global_fp_regs;
493
494 /* True if the current function is leaf and uses only leaf regs,
495 so that the SPARC leaf function optimization can be applied.
496 Private version of crtl->uses_only_leaf_regs, see
497 sparc_expand_prologue for the rationale. */
498 int leaf_function_p;
499
500 /* True if the prologue saves local or in registers. */
501 bool save_local_in_regs_p;
502
503 /* True if the data calculated by sparc_expand_prologue are valid. */
504 bool prologue_data_valid_p;
505 };
506
507 #define sparc_frame_size cfun->machine->frame_size
508 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
509 #define sparc_frame_base_reg cfun->machine->frame_base_reg
510 #define sparc_frame_base_offset cfun->machine->frame_base_offset
511 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
512 #define sparc_leaf_function_p cfun->machine->leaf_function_p
513 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
514 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
515
516 /* 1 if the next opcode is to be specially indented. */
517 int sparc_indent_opcode = 0;
518
519 static void sparc_option_override (void);
520 static void sparc_init_modes (void);
521 static void scan_record_type (const_tree, int *, int *, int *);
522 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
523 const_tree, bool, bool, int *, int *);
524
525 static int supersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
526 static int hypersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
527
528 static void sparc_emit_set_const32 (rtx, rtx);
529 static void sparc_emit_set_const64 (rtx, rtx);
530 static void sparc_output_addr_vec (rtx);
531 static void sparc_output_addr_diff_vec (rtx);
532 static void sparc_output_deferred_case_vectors (void);
533 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
534 static bool sparc_legitimate_constant_p (machine_mode, rtx);
535 static rtx sparc_builtin_saveregs (void);
536 static int epilogue_renumber (rtx *, int);
537 static bool sparc_assemble_integer (rtx, unsigned int, int);
538 static int set_extends (rtx_insn *);
539 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
540 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
541 #ifdef TARGET_SOLARIS
542 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
543 tree) ATTRIBUTE_UNUSED;
544 #endif
545 static int sparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
546 static int sparc_issue_rate (void);
547 static void sparc_sched_init (FILE *, int, int);
548 static int sparc_use_sched_lookahead (void);
549
550 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
551 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
552 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
553 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
554 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
555
556 static bool sparc_function_ok_for_sibcall (tree, tree);
557 static void sparc_init_libfuncs (void);
558 static void sparc_init_builtins (void);
559 static void sparc_fpu_init_builtins (void);
560 static void sparc_vis_init_builtins (void);
561 static tree sparc_builtin_decl (unsigned, bool);
562 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
563 static tree sparc_fold_builtin (tree, int, tree *, bool);
564 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
565 HOST_WIDE_INT, tree);
566 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
567 HOST_WIDE_INT, const_tree);
568 static struct machine_function * sparc_init_machine_status (void);
569 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
570 static rtx sparc_tls_get_addr (void);
571 static rtx sparc_tls_got (void);
572 static int sparc_register_move_cost (machine_mode,
573 reg_class_t, reg_class_t);
574 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
575 static rtx sparc_function_value (const_tree, const_tree, bool);
576 static rtx sparc_libcall_value (machine_mode, const_rtx);
577 static bool sparc_function_value_regno_p (const unsigned int);
578 static rtx sparc_struct_value_rtx (tree, int);
579 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
580 int *, const_tree, int);
581 static bool sparc_return_in_memory (const_tree, const_tree);
582 static bool sparc_strict_argument_naming (cumulative_args_t);
583 static void sparc_va_start (tree, rtx);
584 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
585 static bool sparc_vector_mode_supported_p (machine_mode);
586 static bool sparc_tls_referenced_p (rtx);
587 static rtx sparc_legitimize_tls_address (rtx);
588 static rtx sparc_legitimize_pic_address (rtx, rtx);
589 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
590 static rtx sparc_delegitimize_address (rtx);
591 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
592 static bool sparc_pass_by_reference (cumulative_args_t,
593 machine_mode, const_tree, bool);
594 static void sparc_function_arg_advance (cumulative_args_t,
595 machine_mode, const_tree, bool);
596 static rtx sparc_function_arg_1 (cumulative_args_t,
597 machine_mode, const_tree, bool, bool);
598 static rtx sparc_function_arg (cumulative_args_t,
599 machine_mode, const_tree, bool);
600 static rtx sparc_function_incoming_arg (cumulative_args_t,
601 machine_mode, const_tree, bool);
602 static unsigned int sparc_function_arg_boundary (machine_mode,
603 const_tree);
604 static int sparc_arg_partial_bytes (cumulative_args_t,
605 machine_mode, tree, bool);
606 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
607 static void sparc_file_end (void);
608 static bool sparc_frame_pointer_required (void);
609 static bool sparc_can_eliminate (const int, const int);
610 static rtx sparc_builtin_setjmp_frame_value (void);
611 static void sparc_conditional_register_usage (void);
612 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
613 static const char *sparc_mangle_type (const_tree);
614 #endif
615 static void sparc_trampoline_init (rtx, tree, rtx);
616 static machine_mode sparc_preferred_simd_mode (machine_mode);
617 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
618 static bool sparc_print_operand_punct_valid_p (unsigned char);
619 static void sparc_print_operand (FILE *, rtx, int);
620 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
621 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
622 machine_mode,
623 secondary_reload_info *);
624 static machine_mode sparc_cstore_mode (enum insn_code icode);
625 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
626 \f
627 #ifdef SUBTARGET_ATTRIBUTE_TABLE
628 /* Table of valid machine attributes. */
629 static const struct attribute_spec sparc_attribute_table[] =
630 {
631 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
632 do_diagnostic } */
633 SUBTARGET_ATTRIBUTE_TABLE,
634 { NULL, 0, 0, false, false, false, NULL, false }
635 };
636 #endif
637 \f
638 /* Option handling. */
639
640 /* Parsed value. */
641 enum cmodel sparc_cmodel;
642
643 char sparc_hard_reg_printed[8];
644
645 /* Initialize the GCC target structure. */
646
647 /* The default is to use .half rather than .short for aligned HI objects. */
648 #undef TARGET_ASM_ALIGNED_HI_OP
649 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
650
651 #undef TARGET_ASM_UNALIGNED_HI_OP
652 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
653 #undef TARGET_ASM_UNALIGNED_SI_OP
654 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
655 #undef TARGET_ASM_UNALIGNED_DI_OP
656 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
657
658 /* The target hook has to handle DI-mode values. */
659 #undef TARGET_ASM_INTEGER
660 #define TARGET_ASM_INTEGER sparc_assemble_integer
661
662 #undef TARGET_ASM_FUNCTION_PROLOGUE
663 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
664 #undef TARGET_ASM_FUNCTION_EPILOGUE
665 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
666
667 #undef TARGET_SCHED_ADJUST_COST
668 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
669 #undef TARGET_SCHED_ISSUE_RATE
670 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
671 #undef TARGET_SCHED_INIT
672 #define TARGET_SCHED_INIT sparc_sched_init
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
675
676 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
677 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
678
679 #undef TARGET_INIT_LIBFUNCS
680 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
681
682 #undef TARGET_LEGITIMIZE_ADDRESS
683 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
684 #undef TARGET_DELEGITIMIZE_ADDRESS
685 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
686 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
687 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
688
689 #undef TARGET_INIT_BUILTINS
690 #define TARGET_INIT_BUILTINS sparc_init_builtins
691 #undef TARGET_BUILTIN_DECL
692 #define TARGET_BUILTIN_DECL sparc_builtin_decl
693 #undef TARGET_EXPAND_BUILTIN
694 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
695 #undef TARGET_FOLD_BUILTIN
696 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
697
698 #if TARGET_TLS
699 #undef TARGET_HAVE_TLS
700 #define TARGET_HAVE_TLS true
701 #endif
702
703 #undef TARGET_CANNOT_FORCE_CONST_MEM
704 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
705
706 #undef TARGET_ASM_OUTPUT_MI_THUNK
707 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
708 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
709 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
710
711 #undef TARGET_RTX_COSTS
712 #define TARGET_RTX_COSTS sparc_rtx_costs
713 #undef TARGET_ADDRESS_COST
714 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
715 #undef TARGET_REGISTER_MOVE_COST
716 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
717
718 #undef TARGET_PROMOTE_FUNCTION_MODE
719 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
720
721 #undef TARGET_FUNCTION_VALUE
722 #define TARGET_FUNCTION_VALUE sparc_function_value
723 #undef TARGET_LIBCALL_VALUE
724 #define TARGET_LIBCALL_VALUE sparc_libcall_value
725 #undef TARGET_FUNCTION_VALUE_REGNO_P
726 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
727
728 #undef TARGET_STRUCT_VALUE_RTX
729 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
730 #undef TARGET_RETURN_IN_MEMORY
731 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
732 #undef TARGET_MUST_PASS_IN_STACK
733 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
734 #undef TARGET_PASS_BY_REFERENCE
735 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
736 #undef TARGET_ARG_PARTIAL_BYTES
737 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
738 #undef TARGET_FUNCTION_ARG_ADVANCE
739 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
740 #undef TARGET_FUNCTION_ARG
741 #define TARGET_FUNCTION_ARG sparc_function_arg
742 #undef TARGET_FUNCTION_INCOMING_ARG
743 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
744 #undef TARGET_FUNCTION_ARG_BOUNDARY
745 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
746
747 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
748 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
749 #undef TARGET_STRICT_ARGUMENT_NAMING
750 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
751
752 #undef TARGET_EXPAND_BUILTIN_VA_START
753 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
754 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
755 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
756
757 #undef TARGET_VECTOR_MODE_SUPPORTED_P
758 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
759
760 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
761 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
762
763 #ifdef SUBTARGET_INSERT_ATTRIBUTES
764 #undef TARGET_INSERT_ATTRIBUTES
765 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
766 #endif
767
768 #ifdef SUBTARGET_ATTRIBUTE_TABLE
769 #undef TARGET_ATTRIBUTE_TABLE
770 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
771 #endif
772
773 #undef TARGET_OPTION_OVERRIDE
774 #define TARGET_OPTION_OVERRIDE sparc_option_override
775
776 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
777 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
778 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
779 #endif
780
781 #undef TARGET_ASM_FILE_END
782 #define TARGET_ASM_FILE_END sparc_file_end
783
784 #undef TARGET_FRAME_POINTER_REQUIRED
785 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
786
787 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
788 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
789
790 #undef TARGET_CAN_ELIMINATE
791 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
792
793 #undef TARGET_PREFERRED_RELOAD_CLASS
794 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
795
796 #undef TARGET_SECONDARY_RELOAD
797 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
798
799 #undef TARGET_CONDITIONAL_REGISTER_USAGE
800 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
801
802 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
803 #undef TARGET_MANGLE_TYPE
804 #define TARGET_MANGLE_TYPE sparc_mangle_type
805 #endif
806
807 #undef TARGET_LEGITIMATE_ADDRESS_P
808 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
809
810 #undef TARGET_LEGITIMATE_CONSTANT_P
811 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
812
813 #undef TARGET_TRAMPOLINE_INIT
814 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
815
816 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
817 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
818 #undef TARGET_PRINT_OPERAND
819 #define TARGET_PRINT_OPERAND sparc_print_operand
820 #undef TARGET_PRINT_OPERAND_ADDRESS
821 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
822
823 /* The value stored by LDSTUB. */
824 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
825 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
826
827 #undef TARGET_CSTORE_MODE
828 #define TARGET_CSTORE_MODE sparc_cstore_mode
829
830 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
831 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
832
833 struct gcc_target targetm = TARGET_INITIALIZER;
834
835 /* Return the memory reference contained in X if any, zero otherwise. */
836
837 static rtx
838 mem_ref (rtx x)
839 {
840 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
841 x = XEXP (x, 0);
842
843 if (MEM_P (x))
844 return x;
845
846 return NULL_RTX;
847 }
848
849 /* We use a machine specific pass to enable workarounds for errata.
850 We need to have the (essentially) final form of the insn stream in order
851 to properly detect the various hazards. Therefore, this machine specific
852 pass runs as late as possible. The pass is inserted in the pass pipeline
853 at the end of sparc_option_override. */
854
855 static unsigned int
856 sparc_do_work_around_errata (void)
857 {
858 rtx_insn *insn, *next;
859
860 /* Force all instructions to be split into their final form. */
861 split_all_insns_noflow ();
862
863 /* Now look for specific patterns in the insn stream. */
864 for (insn = get_insns (); insn; insn = next)
865 {
866 bool insert_nop = false;
867 rtx set;
868
869 /* Look into the instruction in a delay slot. */
870 if (NONJUMP_INSN_P (insn))
871 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
872 insn = seq->insn (1);
873
874 /* Look for a single-word load into an odd-numbered FP register. */
875 if (sparc_fix_at697f
876 && NONJUMP_INSN_P (insn)
877 && (set = single_set (insn)) != NULL_RTX
878 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
879 && MEM_P (SET_SRC (set))
880 && REG_P (SET_DEST (set))
881 && REGNO (SET_DEST (set)) > 31
882 && REGNO (SET_DEST (set)) % 2 != 0)
883 {
884 /* The wrong dependency is on the enclosing double register. */
885 const unsigned int x = REGNO (SET_DEST (set)) - 1;
886 unsigned int src1, src2, dest;
887 int code;
888
889 next = next_active_insn (insn);
890 if (!next)
891 break;
892 /* If the insn is a branch, then it cannot be problematic. */
893 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
894 continue;
895
896 extract_insn (next);
897 code = INSN_CODE (next);
898
899 switch (code)
900 {
901 case CODE_FOR_adddf3:
902 case CODE_FOR_subdf3:
903 case CODE_FOR_muldf3:
904 case CODE_FOR_divdf3:
905 dest = REGNO (recog_data.operand[0]);
906 src1 = REGNO (recog_data.operand[1]);
907 src2 = REGNO (recog_data.operand[2]);
908 if (src1 != src2)
909 {
910 /* Case [1-4]:
911 ld [address], %fx+1
912 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
913 if ((src1 == x || src2 == x)
914 && (dest == src1 || dest == src2))
915 insert_nop = true;
916 }
917 else
918 {
919 /* Case 5:
920 ld [address], %fx+1
921 FPOPd %fx, %fx, %fx */
922 if (src1 == x
923 && dest == src1
924 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
925 insert_nop = true;
926 }
927 break;
928
929 case CODE_FOR_sqrtdf2:
930 dest = REGNO (recog_data.operand[0]);
931 src1 = REGNO (recog_data.operand[1]);
932 /* Case 6:
933 ld [address], %fx+1
934 fsqrtd %fx, %fx */
935 if (src1 == x && dest == src1)
936 insert_nop = true;
937 break;
938
939 default:
940 break;
941 }
942 }
943
944 /* Look for a single-word load into an integer register. */
945 else if (sparc_fix_ut699
946 && NONJUMP_INSN_P (insn)
947 && (set = single_set (insn)) != NULL_RTX
948 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
949 && mem_ref (SET_SRC (set)) != NULL_RTX
950 && REG_P (SET_DEST (set))
951 && REGNO (SET_DEST (set)) < 32)
952 {
953 /* There is no problem if the second memory access has a data
954 dependency on the first single-cycle load. */
955 rtx x = SET_DEST (set);
956
957 next = next_active_insn (insn);
958 if (!next)
959 break;
960 /* If the insn is a branch, then it cannot be problematic. */
961 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
962 continue;
963
964 /* Look for a second memory access to/from an integer register. */
965 if ((set = single_set (next)) != NULL_RTX)
966 {
967 rtx src = SET_SRC (set);
968 rtx dest = SET_DEST (set);
969 rtx mem;
970
971 /* LDD is affected. */
972 if ((mem = mem_ref (src)) != NULL_RTX
973 && REG_P (dest)
974 && REGNO (dest) < 32
975 && !reg_mentioned_p (x, XEXP (mem, 0)))
976 insert_nop = true;
977
978 /* STD is *not* affected. */
979 else if (MEM_P (dest)
980 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
981 && (src == CONST0_RTX (GET_MODE (dest))
982 || (REG_P (src)
983 && REGNO (src) < 32
984 && REGNO (src) != REGNO (x)))
985 && !reg_mentioned_p (x, XEXP (dest, 0)))
986 insert_nop = true;
987 }
988 }
989
990 /* Look for a single-word load/operation into an FP register. */
991 else if (sparc_fix_ut699
992 && NONJUMP_INSN_P (insn)
993 && (set = single_set (insn)) != NULL_RTX
994 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
995 && REG_P (SET_DEST (set))
996 && REGNO (SET_DEST (set)) > 31)
997 {
998 /* Number of instructions in the problematic window. */
999 const int n_insns = 4;
1000 /* The problematic combination is with the sibling FP register. */
1001 const unsigned int x = REGNO (SET_DEST (set));
1002 const unsigned int y = x ^ 1;
1003 rtx_insn *after;
1004 int i;
1005
1006 next = next_active_insn (insn);
1007 if (!next)
1008 break;
1009 /* If the insn is a branch, then it cannot be problematic. */
1010 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1011 continue;
1012
1013 /* Look for a second load/operation into the sibling FP register. */
1014 if (!((set = single_set (next)) != NULL_RTX
1015 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1016 && REG_P (SET_DEST (set))
1017 && REGNO (SET_DEST (set)) == y))
1018 continue;
1019
1020 /* Look for a (possible) store from the FP register in the next N
1021 instructions, but bail out if it is again modified or if there
1022 is a store from the sibling FP register before this store. */
1023 for (after = next, i = 0; i < n_insns; i++)
1024 {
1025 bool branch_p;
1026
1027 after = next_active_insn (after);
1028 if (!after)
1029 break;
1030
1031 /* This is a branch with an empty delay slot. */
1032 if (!NONJUMP_INSN_P (after))
1033 {
1034 if (++i == n_insns)
1035 break;
1036 branch_p = true;
1037 after = NULL;
1038 }
1039 /* This is a branch with a filled delay slot. */
1040 else if (rtx_sequence *seq =
1041 dyn_cast <rtx_sequence *> (PATTERN (after)))
1042 {
1043 if (++i == n_insns)
1044 break;
1045 branch_p = true;
1046 after = seq->insn (1);
1047 }
1048 /* This is a regular instruction. */
1049 else
1050 branch_p = false;
1051
1052 if (after && (set = single_set (after)) != NULL_RTX)
1053 {
1054 const rtx src = SET_SRC (set);
1055 const rtx dest = SET_DEST (set);
1056 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1057
1058 /* If the FP register is again modified before the store,
1059 then the store isn't affected. */
1060 if (REG_P (dest)
1061 && (REGNO (dest) == x
1062 || (REGNO (dest) == y && size == 8)))
1063 break;
1064
1065 if (MEM_P (dest) && REG_P (src))
1066 {
1067 /* If there is a store from the sibling FP register
1068 before the store, then the store is not affected. */
1069 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1070 break;
1071
1072 /* Otherwise, the store is affected. */
1073 if (REGNO (src) == x && size == 4)
1074 {
1075 insert_nop = true;
1076 break;
1077 }
1078 }
1079 }
1080
1081 /* If we have a branch in the first M instructions, then we
1082 cannot see the (M+2)th instruction so we play safe. */
1083 if (branch_p && i <= (n_insns - 2))
1084 {
1085 insert_nop = true;
1086 break;
1087 }
1088 }
1089 }
1090
1091 else
1092 next = NEXT_INSN (insn);
1093
1094 if (insert_nop)
1095 emit_insn_before (gen_nop (), next);
1096 }
1097
1098 return 0;
1099 }
1100
1101 namespace {
1102
1103 const pass_data pass_data_work_around_errata =
1104 {
1105 RTL_PASS, /* type */
1106 "errata", /* name */
1107 OPTGROUP_NONE, /* optinfo_flags */
1108 TV_MACH_DEP, /* tv_id */
1109 0, /* properties_required */
1110 0, /* properties_provided */
1111 0, /* properties_destroyed */
1112 0, /* todo_flags_start */
1113 0, /* todo_flags_finish */
1114 };
1115
1116 class pass_work_around_errata : public rtl_opt_pass
1117 {
1118 public:
1119 pass_work_around_errata(gcc::context *ctxt)
1120 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1121 {}
1122
1123 /* opt_pass methods: */
1124 virtual bool gate (function *)
1125 {
1126 /* The only errata we handle are those of the AT697F and UT699. */
1127 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1128 }
1129
1130 virtual unsigned int execute (function *)
1131 {
1132 return sparc_do_work_around_errata ();
1133 }
1134
1135 }; // class pass_work_around_errata
1136
1137 } // anon namespace
1138
1139 rtl_opt_pass *
1140 make_pass_work_around_errata (gcc::context *ctxt)
1141 {
1142 return new pass_work_around_errata (ctxt);
1143 }
1144
1145 /* Helpers for TARGET_DEBUG_OPTIONS. */
1146 static void
1147 dump_target_flag_bits (const int flags)
1148 {
1149 if (flags & MASK_64BIT)
1150 fprintf (stderr, "64BIT ");
1151 if (flags & MASK_APP_REGS)
1152 fprintf (stderr, "APP_REGS ");
1153 if (flags & MASK_FASTER_STRUCTS)
1154 fprintf (stderr, "FASTER_STRUCTS ");
1155 if (flags & MASK_FLAT)
1156 fprintf (stderr, "FLAT ");
1157 if (flags & MASK_FMAF)
1158 fprintf (stderr, "FMAF ");
1159 if (flags & MASK_FPU)
1160 fprintf (stderr, "FPU ");
1161 if (flags & MASK_HARD_QUAD)
1162 fprintf (stderr, "HARD_QUAD ");
1163 if (flags & MASK_POPC)
1164 fprintf (stderr, "POPC ");
1165 if (flags & MASK_PTR64)
1166 fprintf (stderr, "PTR64 ");
1167 if (flags & MASK_STACK_BIAS)
1168 fprintf (stderr, "STACK_BIAS ");
1169 if (flags & MASK_UNALIGNED_DOUBLES)
1170 fprintf (stderr, "UNALIGNED_DOUBLES ");
1171 if (flags & MASK_V8PLUS)
1172 fprintf (stderr, "V8PLUS ");
1173 if (flags & MASK_VIS)
1174 fprintf (stderr, "VIS ");
1175 if (flags & MASK_VIS2)
1176 fprintf (stderr, "VIS2 ");
1177 if (flags & MASK_VIS3)
1178 fprintf (stderr, "VIS3 ");
1179 if (flags & MASK_CBCOND)
1180 fprintf (stderr, "CBCOND ");
1181 if (flags & MASK_DEPRECATED_V8_INSNS)
1182 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1183 if (flags & MASK_SPARCLET)
1184 fprintf (stderr, "SPARCLET ");
1185 if (flags & MASK_SPARCLITE)
1186 fprintf (stderr, "SPARCLITE ");
1187 if (flags & MASK_V8)
1188 fprintf (stderr, "V8 ");
1189 if (flags & MASK_V9)
1190 fprintf (stderr, "V9 ");
1191 }
1192
1193 static void
1194 dump_target_flags (const char *prefix, const int flags)
1195 {
1196 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1197 dump_target_flag_bits (flags);
1198 fprintf(stderr, "]\n");
1199 }
1200
1201 /* Validate and override various options, and do some machine dependent
1202 initialization. */
1203
1204 static void
1205 sparc_option_override (void)
1206 {
1207 static struct code_model {
1208 const char *const name;
1209 const enum cmodel value;
1210 } const cmodels[] = {
1211 { "32", CM_32 },
1212 { "medlow", CM_MEDLOW },
1213 { "medmid", CM_MEDMID },
1214 { "medany", CM_MEDANY },
1215 { "embmedany", CM_EMBMEDANY },
1216 { NULL, (enum cmodel) 0 }
1217 };
1218 const struct code_model *cmodel;
1219 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1220 static struct cpu_default {
1221 const int cpu;
1222 const enum processor_type processor;
1223 } const cpu_default[] = {
1224 /* There must be one entry here for each TARGET_CPU value. */
1225 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1226 { TARGET_CPU_v8, PROCESSOR_V8 },
1227 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1228 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1229 { TARGET_CPU_leon, PROCESSOR_LEON },
1230 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1231 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1232 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1233 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1234 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1235 { TARGET_CPU_v9, PROCESSOR_V9 },
1236 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1237 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1238 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1239 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1240 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1241 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1242 { -1, PROCESSOR_V7 }
1243 };
1244 const struct cpu_default *def;
1245 /* Table of values for -m{cpu,tune}=. This must match the order of
1246 the enum processor_type in sparc-opts.h. */
1247 static struct cpu_table {
1248 const char *const name;
1249 const int disable;
1250 const int enable;
1251 } const cpu_table[] = {
1252 { "v7", MASK_ISA, 0 },
1253 { "cypress", MASK_ISA, 0 },
1254 { "v8", MASK_ISA, MASK_V8 },
1255 /* TI TMS390Z55 supersparc */
1256 { "supersparc", MASK_ISA, MASK_V8 },
1257 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1258 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1259 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1260 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1261 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1262 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1263 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1264 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1265 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1266 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1267 { "sparclet", MASK_ISA, MASK_SPARCLET },
1268 /* TEMIC sparclet */
1269 { "tsc701", MASK_ISA, MASK_SPARCLET },
1270 { "v9", MASK_ISA, MASK_V9 },
1271 /* UltraSPARC I, II, IIi */
1272 { "ultrasparc", MASK_ISA,
1273 /* Although insns using %y are deprecated, it is a clear win. */
1274 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1275 /* UltraSPARC III */
1276 /* ??? Check if %y issue still holds true. */
1277 { "ultrasparc3", MASK_ISA,
1278 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1279 /* UltraSPARC T1 */
1280 { "niagara", MASK_ISA,
1281 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1282 /* UltraSPARC T2 */
1283 { "niagara2", MASK_ISA,
1284 MASK_V9|MASK_POPC|MASK_VIS2 },
1285 /* UltraSPARC T3 */
1286 { "niagara3", MASK_ISA,
1287 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1288 /* UltraSPARC T4 */
1289 { "niagara4", MASK_ISA,
1290 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1291 };
1292 const struct cpu_table *cpu;
1293 unsigned int i;
1294 int fpu;
1295
1296 if (sparc_debug_string != NULL)
1297 {
1298 const char *q;
1299 char *p;
1300
1301 p = ASTRDUP (sparc_debug_string);
1302 while ((q = strtok (p, ",")) != NULL)
1303 {
1304 bool invert;
1305 int mask;
1306
1307 p = NULL;
1308 if (*q == '!')
1309 {
1310 invert = true;
1311 q++;
1312 }
1313 else
1314 invert = false;
1315
1316 if (! strcmp (q, "all"))
1317 mask = MASK_DEBUG_ALL;
1318 else if (! strcmp (q, "options"))
1319 mask = MASK_DEBUG_OPTIONS;
1320 else
1321 error ("unknown -mdebug-%s switch", q);
1322
1323 if (invert)
1324 sparc_debug &= ~mask;
1325 else
1326 sparc_debug |= mask;
1327 }
1328 }
1329
1330 if (TARGET_DEBUG_OPTIONS)
1331 {
1332 dump_target_flags("Initial target_flags", target_flags);
1333 dump_target_flags("target_flags_explicit", target_flags_explicit);
1334 }
1335
1336 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1337 SUBTARGET_OVERRIDE_OPTIONS;
1338 #endif
1339
1340 #ifndef SPARC_BI_ARCH
1341 /* Check for unsupported architecture size. */
1342 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1343 error ("%s is not supported by this configuration",
1344 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1345 #endif
1346
1347 /* We force all 64bit archs to use 128 bit long double */
1348 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1349 {
1350 error ("-mlong-double-64 not allowed with -m64");
1351 target_flags |= MASK_LONG_DOUBLE_128;
1352 }
1353
1354 /* Code model selection. */
1355 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1356
1357 #ifdef SPARC_BI_ARCH
1358 if (TARGET_ARCH32)
1359 sparc_cmodel = CM_32;
1360 #endif
1361
1362 if (sparc_cmodel_string != NULL)
1363 {
1364 if (TARGET_ARCH64)
1365 {
1366 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1367 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1368 break;
1369 if (cmodel->name == NULL)
1370 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1371 else
1372 sparc_cmodel = cmodel->value;
1373 }
1374 else
1375 error ("-mcmodel= is not supported on 32 bit systems");
1376 }
1377
1378 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1379 for (i = 8; i < 16; i++)
1380 if (!call_used_regs [i])
1381 {
1382 error ("-fcall-saved-REG is not supported for out registers");
1383 call_used_regs [i] = 1;
1384 }
1385
1386 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1387
1388 /* Set the default CPU. */
1389 if (!global_options_set.x_sparc_cpu_and_features)
1390 {
1391 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1392 if (def->cpu == TARGET_CPU_DEFAULT)
1393 break;
1394 gcc_assert (def->cpu != -1);
1395 sparc_cpu_and_features = def->processor;
1396 }
1397
1398 if (!global_options_set.x_sparc_cpu)
1399 sparc_cpu = sparc_cpu_and_features;
1400
1401 cpu = &cpu_table[(int) sparc_cpu_and_features];
1402
1403 if (TARGET_DEBUG_OPTIONS)
1404 {
1405 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1406 fprintf (stderr, "sparc_cpu: %s\n",
1407 cpu_table[(int) sparc_cpu].name);
1408 dump_target_flags ("cpu->disable", cpu->disable);
1409 dump_target_flags ("cpu->enable", cpu->enable);
1410 }
1411
1412 target_flags &= ~cpu->disable;
1413 target_flags |= (cpu->enable
1414 #ifndef HAVE_AS_FMAF_HPC_VIS3
1415 & ~(MASK_FMAF | MASK_VIS3)
1416 #endif
1417 #ifndef HAVE_AS_SPARC4
1418 & ~MASK_CBCOND
1419 #endif
1420 #ifndef HAVE_AS_LEON
1421 & ~(MASK_LEON | MASK_LEON3)
1422 #endif
1423 );
1424
1425 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1426 the processor default. */
1427 if (target_flags_explicit & MASK_FPU)
1428 target_flags = (target_flags & ~MASK_FPU) | fpu;
1429
1430 /* -mvis2 implies -mvis */
1431 if (TARGET_VIS2)
1432 target_flags |= MASK_VIS;
1433
1434 /* -mvis3 implies -mvis2 and -mvis */
1435 if (TARGET_VIS3)
1436 target_flags |= MASK_VIS2 | MASK_VIS;
1437
1438 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1439 disabled. */
1440 if (! TARGET_FPU)
1441 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1442
1443 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1444 are available.
1445 -m64 also implies v9. */
1446 if (TARGET_VIS || TARGET_ARCH64)
1447 {
1448 target_flags |= MASK_V9;
1449 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1450 }
1451
1452 /* -mvis also implies -mv8plus on 32-bit */
1453 if (TARGET_VIS && ! TARGET_ARCH64)
1454 target_flags |= MASK_V8PLUS;
1455
1456 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1457 if (TARGET_V9 && TARGET_ARCH32)
1458 target_flags |= MASK_DEPRECATED_V8_INSNS;
1459
1460 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1461 if (! TARGET_V9 || TARGET_ARCH64)
1462 target_flags &= ~MASK_V8PLUS;
1463
1464 /* Don't use stack biasing in 32 bit mode. */
1465 if (TARGET_ARCH32)
1466 target_flags &= ~MASK_STACK_BIAS;
1467
1468 /* Supply a default value for align_functions. */
1469 if (align_functions == 0
1470 && (sparc_cpu == PROCESSOR_ULTRASPARC
1471 || sparc_cpu == PROCESSOR_ULTRASPARC3
1472 || sparc_cpu == PROCESSOR_NIAGARA
1473 || sparc_cpu == PROCESSOR_NIAGARA2
1474 || sparc_cpu == PROCESSOR_NIAGARA3
1475 || sparc_cpu == PROCESSOR_NIAGARA4))
1476 align_functions = 32;
1477
1478 /* Validate PCC_STRUCT_RETURN. */
1479 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1480 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1481
1482 /* Only use .uaxword when compiling for a 64-bit target. */
1483 if (!TARGET_ARCH64)
1484 targetm.asm_out.unaligned_op.di = NULL;
1485
1486 /* Do various machine dependent initializations. */
1487 sparc_init_modes ();
1488
1489 /* Set up function hooks. */
1490 init_machine_status = sparc_init_machine_status;
1491
1492 switch (sparc_cpu)
1493 {
1494 case PROCESSOR_V7:
1495 case PROCESSOR_CYPRESS:
1496 sparc_costs = &cypress_costs;
1497 break;
1498 case PROCESSOR_V8:
1499 case PROCESSOR_SPARCLITE:
1500 case PROCESSOR_SUPERSPARC:
1501 sparc_costs = &supersparc_costs;
1502 break;
1503 case PROCESSOR_F930:
1504 case PROCESSOR_F934:
1505 case PROCESSOR_HYPERSPARC:
1506 case PROCESSOR_SPARCLITE86X:
1507 sparc_costs = &hypersparc_costs;
1508 break;
1509 case PROCESSOR_LEON:
1510 sparc_costs = &leon_costs;
1511 break;
1512 case PROCESSOR_LEON3:
1513 case PROCESSOR_LEON3V7:
1514 sparc_costs = &leon3_costs;
1515 break;
1516 case PROCESSOR_SPARCLET:
1517 case PROCESSOR_TSC701:
1518 sparc_costs = &sparclet_costs;
1519 break;
1520 case PROCESSOR_V9:
1521 case PROCESSOR_ULTRASPARC:
1522 sparc_costs = &ultrasparc_costs;
1523 break;
1524 case PROCESSOR_ULTRASPARC3:
1525 sparc_costs = &ultrasparc3_costs;
1526 break;
1527 case PROCESSOR_NIAGARA:
1528 sparc_costs = &niagara_costs;
1529 break;
1530 case PROCESSOR_NIAGARA2:
1531 sparc_costs = &niagara2_costs;
1532 break;
1533 case PROCESSOR_NIAGARA3:
1534 sparc_costs = &niagara3_costs;
1535 break;
1536 case PROCESSOR_NIAGARA4:
1537 sparc_costs = &niagara4_costs;
1538 break;
1539 case PROCESSOR_NATIVE:
1540 gcc_unreachable ();
1541 };
1542
1543 if (sparc_memory_model == SMM_DEFAULT)
1544 {
1545 /* Choose the memory model for the operating system. */
1546 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1547 if (os_default != SMM_DEFAULT)
1548 sparc_memory_model = os_default;
1549 /* Choose the most relaxed model for the processor. */
1550 else if (TARGET_V9)
1551 sparc_memory_model = SMM_RMO;
1552 else if (TARGET_LEON3)
1553 sparc_memory_model = SMM_TSO;
1554 else if (TARGET_LEON)
1555 sparc_memory_model = SMM_SC;
1556 else if (TARGET_V8)
1557 sparc_memory_model = SMM_PSO;
1558 else
1559 sparc_memory_model = SMM_SC;
1560 }
1561
1562 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1563 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1564 target_flags |= MASK_LONG_DOUBLE_128;
1565 #endif
1566
1567 if (TARGET_DEBUG_OPTIONS)
1568 dump_target_flags ("Final target_flags", target_flags);
1569
1570 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1571 ((sparc_cpu == PROCESSOR_ULTRASPARC
1572 || sparc_cpu == PROCESSOR_NIAGARA
1573 || sparc_cpu == PROCESSOR_NIAGARA2
1574 || sparc_cpu == PROCESSOR_NIAGARA3
1575 || sparc_cpu == PROCESSOR_NIAGARA4)
1576 ? 2
1577 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1578 ? 8 : 3)),
1579 global_options.x_param_values,
1580 global_options_set.x_param_values);
1581 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1582 ((sparc_cpu == PROCESSOR_ULTRASPARC
1583 || sparc_cpu == PROCESSOR_ULTRASPARC3
1584 || sparc_cpu == PROCESSOR_NIAGARA
1585 || sparc_cpu == PROCESSOR_NIAGARA2
1586 || sparc_cpu == PROCESSOR_NIAGARA3
1587 || sparc_cpu == PROCESSOR_NIAGARA4)
1588 ? 64 : 32),
1589 global_options.x_param_values,
1590 global_options_set.x_param_values);
1591
1592 /* Disable save slot sharing for call-clobbered registers by default.
1593 The IRA sharing algorithm works on single registers only and this
1594 pessimizes for double floating-point registers. */
1595 if (!global_options_set.x_flag_ira_share_save_slots)
1596 flag_ira_share_save_slots = 0;
1597
1598 /* We register a machine specific pass to work around errata, if any.
1599 The pass mut be scheduled as late as possible so that we have the
1600 (essentially) final form of the insn stream to work on.
1601 Registering the pass must be done at start up. It's convenient to
1602 do it here. */
1603 opt_pass *errata_pass = make_pass_work_around_errata (g);
1604 struct register_pass_info insert_pass_work_around_errata =
1605 {
1606 errata_pass, /* pass */
1607 "dbr", /* reference_pass_name */
1608 1, /* ref_pass_instance_number */
1609 PASS_POS_INSERT_AFTER /* po_op */
1610 };
1611 register_pass (&insert_pass_work_around_errata);
1612 }
1613 \f
1614 /* Miscellaneous utilities. */
1615
1616 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1617 or branch on register contents instructions. */
1618
1619 int
1620 v9_regcmp_p (enum rtx_code code)
1621 {
1622 return (code == EQ || code == NE || code == GE || code == LT
1623 || code == LE || code == GT);
1624 }
1625
1626 /* Nonzero if OP is a floating point constant which can
1627 be loaded into an integer register using a single
1628 sethi instruction. */
1629
1630 int
1631 fp_sethi_p (rtx op)
1632 {
1633 if (GET_CODE (op) == CONST_DOUBLE)
1634 {
1635 long i;
1636
1637 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1638 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1639 }
1640
1641 return 0;
1642 }
1643
1644 /* Nonzero if OP is a floating point constant which can
1645 be loaded into an integer register using a single
1646 mov instruction. */
1647
1648 int
1649 fp_mov_p (rtx op)
1650 {
1651 if (GET_CODE (op) == CONST_DOUBLE)
1652 {
1653 long i;
1654
1655 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1656 return SPARC_SIMM13_P (i);
1657 }
1658
1659 return 0;
1660 }
1661
1662 /* Nonzero if OP is a floating point constant which can
1663 be loaded into an integer register using a high/losum
1664 instruction sequence. */
1665
1666 int
1667 fp_high_losum_p (rtx op)
1668 {
1669 /* The constraints calling this should only be in
1670 SFmode move insns, so any constant which cannot
1671 be moved using a single insn will do. */
1672 if (GET_CODE (op) == CONST_DOUBLE)
1673 {
1674 long i;
1675
1676 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1677 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1678 }
1679
1680 return 0;
1681 }
1682
1683 /* Return true if the address of LABEL can be loaded by means of the
1684 mov{si,di}_pic_label_ref patterns in PIC mode. */
1685
1686 static bool
1687 can_use_mov_pic_label_ref (rtx label)
1688 {
1689 /* VxWorks does not impose a fixed gap between segments; the run-time
1690 gap can be different from the object-file gap. We therefore can't
1691 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1692 are absolutely sure that X is in the same segment as the GOT.
1693 Unfortunately, the flexibility of linker scripts means that we
1694 can't be sure of that in general, so assume that GOT-relative
1695 accesses are never valid on VxWorks. */
1696 if (TARGET_VXWORKS_RTP)
1697 return false;
1698
1699 /* Similarly, if the label is non-local, it might end up being placed
1700 in a different section than the current one; now mov_pic_label_ref
1701 requires the label and the code to be in the same section. */
1702 if (LABEL_REF_NONLOCAL_P (label))
1703 return false;
1704
1705 /* Finally, if we are reordering basic blocks and partition into hot
1706 and cold sections, this might happen for any label. */
1707 if (flag_reorder_blocks_and_partition)
1708 return false;
1709
1710 return true;
1711 }
1712
1713 /* Expand a move instruction. Return true if all work is done. */
1714
1715 bool
1716 sparc_expand_move (machine_mode mode, rtx *operands)
1717 {
1718 /* Handle sets of MEM first. */
1719 if (GET_CODE (operands[0]) == MEM)
1720 {
1721 /* 0 is a register (or a pair of registers) on SPARC. */
1722 if (register_or_zero_operand (operands[1], mode))
1723 return false;
1724
1725 if (!reload_in_progress)
1726 {
1727 operands[0] = validize_mem (operands[0]);
1728 operands[1] = force_reg (mode, operands[1]);
1729 }
1730 }
1731
1732 /* Fixup TLS cases. */
1733 if (TARGET_HAVE_TLS
1734 && CONSTANT_P (operands[1])
1735 && sparc_tls_referenced_p (operands [1]))
1736 {
1737 operands[1] = sparc_legitimize_tls_address (operands[1]);
1738 return false;
1739 }
1740
1741 /* Fixup PIC cases. */
1742 if (flag_pic && CONSTANT_P (operands[1]))
1743 {
1744 if (pic_address_needs_scratch (operands[1]))
1745 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1746
1747 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1748 if (GET_CODE (operands[1]) == LABEL_REF
1749 && can_use_mov_pic_label_ref (operands[1]))
1750 {
1751 if (mode == SImode)
1752 {
1753 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1754 return true;
1755 }
1756
1757 if (mode == DImode)
1758 {
1759 gcc_assert (TARGET_ARCH64);
1760 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1761 return true;
1762 }
1763 }
1764
1765 if (symbolic_operand (operands[1], mode))
1766 {
1767 operands[1]
1768 = sparc_legitimize_pic_address (operands[1],
1769 reload_in_progress
1770 ? operands[0] : NULL_RTX);
1771 return false;
1772 }
1773 }
1774
1775 /* If we are trying to toss an integer constant into FP registers,
1776 or loading a FP or vector constant, force it into memory. */
1777 if (CONSTANT_P (operands[1])
1778 && REG_P (operands[0])
1779 && (SPARC_FP_REG_P (REGNO (operands[0]))
1780 || SCALAR_FLOAT_MODE_P (mode)
1781 || VECTOR_MODE_P (mode)))
1782 {
1783 /* emit_group_store will send such bogosity to us when it is
1784 not storing directly into memory. So fix this up to avoid
1785 crashes in output_constant_pool. */
1786 if (operands [1] == const0_rtx)
1787 operands[1] = CONST0_RTX (mode);
1788
1789 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1790 always other regs. */
1791 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1792 && (const_zero_operand (operands[1], mode)
1793 || const_all_ones_operand (operands[1], mode)))
1794 return false;
1795
1796 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1797 /* We are able to build any SF constant in integer registers
1798 with at most 2 instructions. */
1799 && (mode == SFmode
1800 /* And any DF constant in integer registers. */
1801 || (mode == DFmode
1802 && ! can_create_pseudo_p ())))
1803 return false;
1804
1805 operands[1] = force_const_mem (mode, operands[1]);
1806 if (!reload_in_progress)
1807 operands[1] = validize_mem (operands[1]);
1808 return false;
1809 }
1810
1811 /* Accept non-constants and valid constants unmodified. */
1812 if (!CONSTANT_P (operands[1])
1813 || GET_CODE (operands[1]) == HIGH
1814 || input_operand (operands[1], mode))
1815 return false;
1816
1817 switch (mode)
1818 {
1819 case QImode:
1820 /* All QImode constants require only one insn, so proceed. */
1821 break;
1822
1823 case HImode:
1824 case SImode:
1825 sparc_emit_set_const32 (operands[0], operands[1]);
1826 return true;
1827
1828 case DImode:
1829 /* input_operand should have filtered out 32-bit mode. */
1830 sparc_emit_set_const64 (operands[0], operands[1]);
1831 return true;
1832
1833 case TImode:
1834 {
1835 rtx high, low;
1836 /* TImode isn't available in 32-bit mode. */
1837 split_double (operands[1], &high, &low);
1838 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1839 high));
1840 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1841 low));
1842 }
1843 return true;
1844
1845 default:
1846 gcc_unreachable ();
1847 }
1848
1849 return false;
1850 }
1851
1852 /* Load OP1, a 32-bit constant, into OP0, a register.
1853 We know it can't be done in one insn when we get
1854 here, the move expander guarantees this. */
1855
1856 static void
1857 sparc_emit_set_const32 (rtx op0, rtx op1)
1858 {
1859 machine_mode mode = GET_MODE (op0);
1860 rtx temp = op0;
1861
1862 if (can_create_pseudo_p ())
1863 temp = gen_reg_rtx (mode);
1864
1865 if (GET_CODE (op1) == CONST_INT)
1866 {
1867 gcc_assert (!small_int_operand (op1, mode)
1868 && !const_high_operand (op1, mode));
1869
1870 /* Emit them as real moves instead of a HIGH/LO_SUM,
1871 this way CSE can see everything and reuse intermediate
1872 values if it wants. */
1873 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
1874 & ~(HOST_WIDE_INT) 0x3ff)));
1875
1876 emit_insn (gen_rtx_SET (op0,
1877 gen_rtx_IOR (mode, temp,
1878 GEN_INT (INTVAL (op1) & 0x3ff))));
1879 }
1880 else
1881 {
1882 /* A symbol, emit in the traditional way. */
1883 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
1884 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
1885 }
1886 }
1887
1888 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1889 If TEMP is nonzero, we are forbidden to use any other scratch
1890 registers. Otherwise, we are allowed to generate them as needed.
1891
1892 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1893 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1894
1895 void
1896 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1897 {
1898 rtx temp1, temp2, temp3, temp4, temp5;
1899 rtx ti_temp = 0;
1900
1901 if (temp && GET_MODE (temp) == TImode)
1902 {
1903 ti_temp = temp;
1904 temp = gen_rtx_REG (DImode, REGNO (temp));
1905 }
1906
1907 /* SPARC-V9 code-model support. */
1908 switch (sparc_cmodel)
1909 {
1910 case CM_MEDLOW:
1911 /* The range spanned by all instructions in the object is less
1912 than 2^31 bytes (2GB) and the distance from any instruction
1913 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1914 than 2^31 bytes (2GB).
1915
1916 The executable must be in the low 4TB of the virtual address
1917 space.
1918
1919 sethi %hi(symbol), %temp1
1920 or %temp1, %lo(symbol), %reg */
1921 if (temp)
1922 temp1 = temp; /* op0 is allowed. */
1923 else
1924 temp1 = gen_reg_rtx (DImode);
1925
1926 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
1927 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1928 break;
1929
1930 case CM_MEDMID:
1931 /* The range spanned by all instructions in the object is less
1932 than 2^31 bytes (2GB) and the distance from any instruction
1933 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1934 than 2^31 bytes (2GB).
1935
1936 The executable must be in the low 16TB of the virtual address
1937 space.
1938
1939 sethi %h44(symbol), %temp1
1940 or %temp1, %m44(symbol), %temp2
1941 sllx %temp2, 12, %temp3
1942 or %temp3, %l44(symbol), %reg */
1943 if (temp)
1944 {
1945 temp1 = op0;
1946 temp2 = op0;
1947 temp3 = temp; /* op0 is allowed. */
1948 }
1949 else
1950 {
1951 temp1 = gen_reg_rtx (DImode);
1952 temp2 = gen_reg_rtx (DImode);
1953 temp3 = gen_reg_rtx (DImode);
1954 }
1955
1956 emit_insn (gen_seth44 (temp1, op1));
1957 emit_insn (gen_setm44 (temp2, temp1, op1));
1958 emit_insn (gen_rtx_SET (temp3,
1959 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1960 emit_insn (gen_setl44 (op0, temp3, op1));
1961 break;
1962
1963 case CM_MEDANY:
1964 /* The range spanned by all instructions in the object is less
1965 than 2^31 bytes (2GB) and the distance from any instruction
1966 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1967 than 2^31 bytes (2GB).
1968
1969 The executable can be placed anywhere in the virtual address
1970 space.
1971
1972 sethi %hh(symbol), %temp1
1973 sethi %lm(symbol), %temp2
1974 or %temp1, %hm(symbol), %temp3
1975 sllx %temp3, 32, %temp4
1976 or %temp4, %temp2, %temp5
1977 or %temp5, %lo(symbol), %reg */
1978 if (temp)
1979 {
1980 /* It is possible that one of the registers we got for operands[2]
1981 might coincide with that of operands[0] (which is why we made
1982 it TImode). Pick the other one to use as our scratch. */
1983 if (rtx_equal_p (temp, op0))
1984 {
1985 gcc_assert (ti_temp);
1986 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1987 }
1988 temp1 = op0;
1989 temp2 = temp; /* op0 is _not_ allowed, see above. */
1990 temp3 = op0;
1991 temp4 = op0;
1992 temp5 = op0;
1993 }
1994 else
1995 {
1996 temp1 = gen_reg_rtx (DImode);
1997 temp2 = gen_reg_rtx (DImode);
1998 temp3 = gen_reg_rtx (DImode);
1999 temp4 = gen_reg_rtx (DImode);
2000 temp5 = gen_reg_rtx (DImode);
2001 }
2002
2003 emit_insn (gen_sethh (temp1, op1));
2004 emit_insn (gen_setlm (temp2, op1));
2005 emit_insn (gen_sethm (temp3, temp1, op1));
2006 emit_insn (gen_rtx_SET (temp4,
2007 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2008 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2009 emit_insn (gen_setlo (op0, temp5, op1));
2010 break;
2011
2012 case CM_EMBMEDANY:
2013 /* Old old old backwards compatibility kruft here.
2014 Essentially it is MEDLOW with a fixed 64-bit
2015 virtual base added to all data segment addresses.
2016 Text-segment stuff is computed like MEDANY, we can't
2017 reuse the code above because the relocation knobs
2018 look different.
2019
2020 Data segment: sethi %hi(symbol), %temp1
2021 add %temp1, EMBMEDANY_BASE_REG, %temp2
2022 or %temp2, %lo(symbol), %reg */
2023 if (data_segment_operand (op1, GET_MODE (op1)))
2024 {
2025 if (temp)
2026 {
2027 temp1 = temp; /* op0 is allowed. */
2028 temp2 = op0;
2029 }
2030 else
2031 {
2032 temp1 = gen_reg_rtx (DImode);
2033 temp2 = gen_reg_rtx (DImode);
2034 }
2035
2036 emit_insn (gen_embmedany_sethi (temp1, op1));
2037 emit_insn (gen_embmedany_brsum (temp2, temp1));
2038 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2039 }
2040
2041 /* Text segment: sethi %uhi(symbol), %temp1
2042 sethi %hi(symbol), %temp2
2043 or %temp1, %ulo(symbol), %temp3
2044 sllx %temp3, 32, %temp4
2045 or %temp4, %temp2, %temp5
2046 or %temp5, %lo(symbol), %reg */
2047 else
2048 {
2049 if (temp)
2050 {
2051 /* It is possible that one of the registers we got for operands[2]
2052 might coincide with that of operands[0] (which is why we made
2053 it TImode). Pick the other one to use as our scratch. */
2054 if (rtx_equal_p (temp, op0))
2055 {
2056 gcc_assert (ti_temp);
2057 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2058 }
2059 temp1 = op0;
2060 temp2 = temp; /* op0 is _not_ allowed, see above. */
2061 temp3 = op0;
2062 temp4 = op0;
2063 temp5 = op0;
2064 }
2065 else
2066 {
2067 temp1 = gen_reg_rtx (DImode);
2068 temp2 = gen_reg_rtx (DImode);
2069 temp3 = gen_reg_rtx (DImode);
2070 temp4 = gen_reg_rtx (DImode);
2071 temp5 = gen_reg_rtx (DImode);
2072 }
2073
2074 emit_insn (gen_embmedany_textuhi (temp1, op1));
2075 emit_insn (gen_embmedany_texthi (temp2, op1));
2076 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2077 emit_insn (gen_rtx_SET (temp4,
2078 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2079 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2080 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2081 }
2082 break;
2083
2084 default:
2085 gcc_unreachable ();
2086 }
2087 }
2088
2089 #if HOST_BITS_PER_WIDE_INT == 32
2090 static void
2091 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
2092 {
2093 gcc_unreachable ();
2094 }
2095 #else
2096 /* These avoid problems when cross compiling. If we do not
2097 go through all this hair then the optimizer will see
2098 invalid REG_EQUAL notes or in some cases none at all. */
2099 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2100 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2101 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2102 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2103
2104 /* The optimizer is not to assume anything about exactly
2105 which bits are set for a HIGH, they are unspecified.
2106 Unfortunately this leads to many missed optimizations
2107 during CSE. We mask out the non-HIGH bits, and matches
2108 a plain movdi, to alleviate this problem. */
2109 static rtx
2110 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2111 {
2112 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2113 }
2114
2115 static rtx
2116 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2117 {
2118 return gen_rtx_SET (dest, GEN_INT (val));
2119 }
2120
2121 static rtx
2122 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2123 {
2124 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2125 }
2126
2127 static rtx
2128 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2129 {
2130 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2131 }
2132
2133 /* Worker routines for 64-bit constant formation on arch64.
2134 One of the key things to be doing in these emissions is
2135 to create as many temp REGs as possible. This makes it
2136 possible for half-built constants to be used later when
2137 such values are similar to something required later on.
2138 Without doing this, the optimizer cannot see such
2139 opportunities. */
2140
2141 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2142 unsigned HOST_WIDE_INT, int);
2143
2144 static void
2145 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2146 unsigned HOST_WIDE_INT low_bits, int is_neg)
2147 {
2148 unsigned HOST_WIDE_INT high_bits;
2149
2150 if (is_neg)
2151 high_bits = (~low_bits) & 0xffffffff;
2152 else
2153 high_bits = low_bits;
2154
2155 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2156 if (!is_neg)
2157 {
2158 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2159 }
2160 else
2161 {
2162 /* If we are XOR'ing with -1, then we should emit a one's complement
2163 instead. This way the combiner will notice logical operations
2164 such as ANDN later on and substitute. */
2165 if ((low_bits & 0x3ff) == 0x3ff)
2166 {
2167 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2168 }
2169 else
2170 {
2171 emit_insn (gen_rtx_SET (op0,
2172 gen_safe_XOR64 (temp,
2173 (-(HOST_WIDE_INT)0x400
2174 | (low_bits & 0x3ff)))));
2175 }
2176 }
2177 }
2178
2179 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2180 unsigned HOST_WIDE_INT, int);
2181
2182 static void
2183 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2184 unsigned HOST_WIDE_INT high_bits,
2185 unsigned HOST_WIDE_INT low_immediate,
2186 int shift_count)
2187 {
2188 rtx temp2 = op0;
2189
2190 if ((high_bits & 0xfffffc00) != 0)
2191 {
2192 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2193 if ((high_bits & ~0xfffffc00) != 0)
2194 emit_insn (gen_rtx_SET (op0,
2195 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2196 else
2197 temp2 = temp;
2198 }
2199 else
2200 {
2201 emit_insn (gen_safe_SET64 (temp, high_bits));
2202 temp2 = temp;
2203 }
2204
2205 /* Now shift it up into place. */
2206 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2207 GEN_INT (shift_count))));
2208
2209 /* If there is a low immediate part piece, finish up by
2210 putting that in as well. */
2211 if (low_immediate != 0)
2212 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2213 }
2214
2215 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2216 unsigned HOST_WIDE_INT);
2217
2218 /* Full 64-bit constant decomposition. Even though this is the
2219 'worst' case, we still optimize a few things away. */
2220 static void
2221 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2222 unsigned HOST_WIDE_INT high_bits,
2223 unsigned HOST_WIDE_INT low_bits)
2224 {
2225 rtx sub_temp = op0;
2226
2227 if (can_create_pseudo_p ())
2228 sub_temp = gen_reg_rtx (DImode);
2229
2230 if ((high_bits & 0xfffffc00) != 0)
2231 {
2232 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2233 if ((high_bits & ~0xfffffc00) != 0)
2234 emit_insn (gen_rtx_SET (sub_temp,
2235 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2236 else
2237 sub_temp = temp;
2238 }
2239 else
2240 {
2241 emit_insn (gen_safe_SET64 (temp, high_bits));
2242 sub_temp = temp;
2243 }
2244
2245 if (can_create_pseudo_p ())
2246 {
2247 rtx temp2 = gen_reg_rtx (DImode);
2248 rtx temp3 = gen_reg_rtx (DImode);
2249 rtx temp4 = gen_reg_rtx (DImode);
2250
2251 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2252 GEN_INT (32))));
2253
2254 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2255 if ((low_bits & ~0xfffffc00) != 0)
2256 {
2257 emit_insn (gen_rtx_SET (temp3,
2258 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2259 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2260 }
2261 else
2262 {
2263 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2264 }
2265 }
2266 else
2267 {
2268 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2269 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2270 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2271 int to_shift = 12;
2272
2273 /* We are in the middle of reload, so this is really
2274 painful. However we do still make an attempt to
2275 avoid emitting truly stupid code. */
2276 if (low1 != const0_rtx)
2277 {
2278 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2279 GEN_INT (to_shift))));
2280 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2281 sub_temp = op0;
2282 to_shift = 12;
2283 }
2284 else
2285 {
2286 to_shift += 12;
2287 }
2288 if (low2 != const0_rtx)
2289 {
2290 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2291 GEN_INT (to_shift))));
2292 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2293 sub_temp = op0;
2294 to_shift = 8;
2295 }
2296 else
2297 {
2298 to_shift += 8;
2299 }
2300 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2301 GEN_INT (to_shift))));
2302 if (low3 != const0_rtx)
2303 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2304 /* phew... */
2305 }
2306 }
2307
2308 /* Analyze a 64-bit constant for certain properties. */
2309 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2310 unsigned HOST_WIDE_INT,
2311 int *, int *, int *);
2312
2313 static void
2314 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2315 unsigned HOST_WIDE_INT low_bits,
2316 int *hbsp, int *lbsp, int *abbasp)
2317 {
2318 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2319 int i;
2320
2321 lowest_bit_set = highest_bit_set = -1;
2322 i = 0;
2323 do
2324 {
2325 if ((lowest_bit_set == -1)
2326 && ((low_bits >> i) & 1))
2327 lowest_bit_set = i;
2328 if ((highest_bit_set == -1)
2329 && ((high_bits >> (32 - i - 1)) & 1))
2330 highest_bit_set = (64 - i - 1);
2331 }
2332 while (++i < 32
2333 && ((highest_bit_set == -1)
2334 || (lowest_bit_set == -1)));
2335 if (i == 32)
2336 {
2337 i = 0;
2338 do
2339 {
2340 if ((lowest_bit_set == -1)
2341 && ((high_bits >> i) & 1))
2342 lowest_bit_set = i + 32;
2343 if ((highest_bit_set == -1)
2344 && ((low_bits >> (32 - i - 1)) & 1))
2345 highest_bit_set = 32 - i - 1;
2346 }
2347 while (++i < 32
2348 && ((highest_bit_set == -1)
2349 || (lowest_bit_set == -1)));
2350 }
2351 /* If there are no bits set this should have gone out
2352 as one instruction! */
2353 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2354 all_bits_between_are_set = 1;
2355 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2356 {
2357 if (i < 32)
2358 {
2359 if ((low_bits & (1 << i)) != 0)
2360 continue;
2361 }
2362 else
2363 {
2364 if ((high_bits & (1 << (i - 32))) != 0)
2365 continue;
2366 }
2367 all_bits_between_are_set = 0;
2368 break;
2369 }
2370 *hbsp = highest_bit_set;
2371 *lbsp = lowest_bit_set;
2372 *abbasp = all_bits_between_are_set;
2373 }
2374
2375 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2376
2377 static int
2378 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2379 unsigned HOST_WIDE_INT low_bits)
2380 {
2381 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2382
2383 if (high_bits == 0
2384 || high_bits == 0xffffffff)
2385 return 1;
2386
2387 analyze_64bit_constant (high_bits, low_bits,
2388 &highest_bit_set, &lowest_bit_set,
2389 &all_bits_between_are_set);
2390
2391 if ((highest_bit_set == 63
2392 || lowest_bit_set == 0)
2393 && all_bits_between_are_set != 0)
2394 return 1;
2395
2396 if ((highest_bit_set - lowest_bit_set) < 21)
2397 return 1;
2398
2399 return 0;
2400 }
2401
2402 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2403 unsigned HOST_WIDE_INT,
2404 int, int);
2405
2406 static unsigned HOST_WIDE_INT
2407 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2408 unsigned HOST_WIDE_INT low_bits,
2409 int lowest_bit_set, int shift)
2410 {
2411 HOST_WIDE_INT hi, lo;
2412
2413 if (lowest_bit_set < 32)
2414 {
2415 lo = (low_bits >> lowest_bit_set) << shift;
2416 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2417 }
2418 else
2419 {
2420 lo = 0;
2421 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2422 }
2423 gcc_assert (! (hi & lo));
2424 return (hi | lo);
2425 }
2426
2427 /* Here we are sure to be arch64 and this is an integer constant
2428 being loaded into a register. Emit the most efficient
2429 insn sequence possible. Detection of all the 1-insn cases
2430 has been done already. */
2431 static void
2432 sparc_emit_set_const64 (rtx op0, rtx op1)
2433 {
2434 unsigned HOST_WIDE_INT high_bits, low_bits;
2435 int lowest_bit_set, highest_bit_set;
2436 int all_bits_between_are_set;
2437 rtx temp = 0;
2438
2439 /* Sanity check that we know what we are working with. */
2440 gcc_assert (TARGET_ARCH64
2441 && (GET_CODE (op0) == SUBREG
2442 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2443
2444 if (! can_create_pseudo_p ())
2445 temp = op0;
2446
2447 if (GET_CODE (op1) != CONST_INT)
2448 {
2449 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2450 return;
2451 }
2452
2453 if (! temp)
2454 temp = gen_reg_rtx (DImode);
2455
2456 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2457 low_bits = (INTVAL (op1) & 0xffffffff);
2458
2459 /* low_bits bits 0 --> 31
2460 high_bits bits 32 --> 63 */
2461
2462 analyze_64bit_constant (high_bits, low_bits,
2463 &highest_bit_set, &lowest_bit_set,
2464 &all_bits_between_are_set);
2465
2466 /* First try for a 2-insn sequence. */
2467
2468 /* These situations are preferred because the optimizer can
2469 * do more things with them:
2470 * 1) mov -1, %reg
2471 * sllx %reg, shift, %reg
2472 * 2) mov -1, %reg
2473 * srlx %reg, shift, %reg
2474 * 3) mov some_small_const, %reg
2475 * sllx %reg, shift, %reg
2476 */
2477 if (((highest_bit_set == 63
2478 || lowest_bit_set == 0)
2479 && all_bits_between_are_set != 0)
2480 || ((highest_bit_set - lowest_bit_set) < 12))
2481 {
2482 HOST_WIDE_INT the_const = -1;
2483 int shift = lowest_bit_set;
2484
2485 if ((highest_bit_set != 63
2486 && lowest_bit_set != 0)
2487 || all_bits_between_are_set == 0)
2488 {
2489 the_const =
2490 create_simple_focus_bits (high_bits, low_bits,
2491 lowest_bit_set, 0);
2492 }
2493 else if (lowest_bit_set == 0)
2494 shift = -(63 - highest_bit_set);
2495
2496 gcc_assert (SPARC_SIMM13_P (the_const));
2497 gcc_assert (shift != 0);
2498
2499 emit_insn (gen_safe_SET64 (temp, the_const));
2500 if (shift > 0)
2501 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2502 GEN_INT (shift))));
2503 else if (shift < 0)
2504 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2505 GEN_INT (-shift))));
2506 return;
2507 }
2508
2509 /* Now a range of 22 or less bits set somewhere.
2510 * 1) sethi %hi(focus_bits), %reg
2511 * sllx %reg, shift, %reg
2512 * 2) sethi %hi(focus_bits), %reg
2513 * srlx %reg, shift, %reg
2514 */
2515 if ((highest_bit_set - lowest_bit_set) < 21)
2516 {
2517 unsigned HOST_WIDE_INT focus_bits =
2518 create_simple_focus_bits (high_bits, low_bits,
2519 lowest_bit_set, 10);
2520
2521 gcc_assert (SPARC_SETHI_P (focus_bits));
2522 gcc_assert (lowest_bit_set != 10);
2523
2524 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2525
2526 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2527 if (lowest_bit_set < 10)
2528 emit_insn (gen_rtx_SET (op0,
2529 gen_rtx_LSHIFTRT (DImode, temp,
2530 GEN_INT (10 - lowest_bit_set))));
2531 else if (lowest_bit_set > 10)
2532 emit_insn (gen_rtx_SET (op0,
2533 gen_rtx_ASHIFT (DImode, temp,
2534 GEN_INT (lowest_bit_set - 10))));
2535 return;
2536 }
2537
2538 /* 1) sethi %hi(low_bits), %reg
2539 * or %reg, %lo(low_bits), %reg
2540 * 2) sethi %hi(~low_bits), %reg
2541 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2542 */
2543 if (high_bits == 0
2544 || high_bits == 0xffffffff)
2545 {
2546 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2547 (high_bits == 0xffffffff));
2548 return;
2549 }
2550
2551 /* Now, try 3-insn sequences. */
2552
2553 /* 1) sethi %hi(high_bits), %reg
2554 * or %reg, %lo(high_bits), %reg
2555 * sllx %reg, 32, %reg
2556 */
2557 if (low_bits == 0)
2558 {
2559 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2560 return;
2561 }
2562
2563 /* We may be able to do something quick
2564 when the constant is negated, so try that. */
2565 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2566 (~low_bits) & 0xfffffc00))
2567 {
2568 /* NOTE: The trailing bits get XOR'd so we need the
2569 non-negated bits, not the negated ones. */
2570 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2571
2572 if ((((~high_bits) & 0xffffffff) == 0
2573 && ((~low_bits) & 0x80000000) == 0)
2574 || (((~high_bits) & 0xffffffff) == 0xffffffff
2575 && ((~low_bits) & 0x80000000) != 0))
2576 {
2577 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2578
2579 if ((SPARC_SETHI_P (fast_int)
2580 && (~high_bits & 0xffffffff) == 0)
2581 || SPARC_SIMM13_P (fast_int))
2582 emit_insn (gen_safe_SET64 (temp, fast_int));
2583 else
2584 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2585 }
2586 else
2587 {
2588 rtx negated_const;
2589 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2590 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2591 sparc_emit_set_const64 (temp, negated_const);
2592 }
2593
2594 /* If we are XOR'ing with -1, then we should emit a one's complement
2595 instead. This way the combiner will notice logical operations
2596 such as ANDN later on and substitute. */
2597 if (trailing_bits == 0x3ff)
2598 {
2599 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2600 }
2601 else
2602 {
2603 emit_insn (gen_rtx_SET (op0,
2604 gen_safe_XOR64 (temp,
2605 (-0x400 | trailing_bits))));
2606 }
2607 return;
2608 }
2609
2610 /* 1) sethi %hi(xxx), %reg
2611 * or %reg, %lo(xxx), %reg
2612 * sllx %reg, yyy, %reg
2613 *
2614 * ??? This is just a generalized version of the low_bits==0
2615 * thing above, FIXME...
2616 */
2617 if ((highest_bit_set - lowest_bit_set) < 32)
2618 {
2619 unsigned HOST_WIDE_INT focus_bits =
2620 create_simple_focus_bits (high_bits, low_bits,
2621 lowest_bit_set, 0);
2622
2623 /* We can't get here in this state. */
2624 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2625
2626 /* So what we know is that the set bits straddle the
2627 middle of the 64-bit word. */
2628 sparc_emit_set_const64_quick2 (op0, temp,
2629 focus_bits, 0,
2630 lowest_bit_set);
2631 return;
2632 }
2633
2634 /* 1) sethi %hi(high_bits), %reg
2635 * or %reg, %lo(high_bits), %reg
2636 * sllx %reg, 32, %reg
2637 * or %reg, low_bits, %reg
2638 */
2639 if (SPARC_SIMM13_P(low_bits)
2640 && ((int)low_bits > 0))
2641 {
2642 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2643 return;
2644 }
2645
2646 /* The easiest way when all else fails, is full decomposition. */
2647 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2648 }
2649 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2650
2651 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2652 return the mode to be used for the comparison. For floating-point,
2653 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2654 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2655 processing is needed. */
2656
2657 machine_mode
2658 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2659 {
2660 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2661 {
2662 switch (op)
2663 {
2664 case EQ:
2665 case NE:
2666 case UNORDERED:
2667 case ORDERED:
2668 case UNLT:
2669 case UNLE:
2670 case UNGT:
2671 case UNGE:
2672 case UNEQ:
2673 case LTGT:
2674 return CCFPmode;
2675
2676 case LT:
2677 case LE:
2678 case GT:
2679 case GE:
2680 return CCFPEmode;
2681
2682 default:
2683 gcc_unreachable ();
2684 }
2685 }
2686 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2687 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2688 {
2689 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2690 return CCX_NOOVmode;
2691 else
2692 return CC_NOOVmode;
2693 }
2694 else
2695 {
2696 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2697 return CCXmode;
2698 else
2699 return CCmode;
2700 }
2701 }
2702
2703 /* Emit the compare insn and return the CC reg for a CODE comparison
2704 with operands X and Y. */
2705
2706 static rtx
2707 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2708 {
2709 machine_mode mode;
2710 rtx cc_reg;
2711
2712 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2713 return x;
2714
2715 mode = SELECT_CC_MODE (code, x, y);
2716
2717 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2718 fcc regs (cse can't tell they're really call clobbered regs and will
2719 remove a duplicate comparison even if there is an intervening function
2720 call - it will then try to reload the cc reg via an int reg which is why
2721 we need the movcc patterns). It is possible to provide the movcc
2722 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2723 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2724 to tell cse that CCFPE mode registers (even pseudos) are call
2725 clobbered. */
2726
2727 /* ??? This is an experiment. Rather than making changes to cse which may
2728 or may not be easy/clean, we do our own cse. This is possible because
2729 we will generate hard registers. Cse knows they're call clobbered (it
2730 doesn't know the same thing about pseudos). If we guess wrong, no big
2731 deal, but if we win, great! */
2732
2733 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2734 #if 1 /* experiment */
2735 {
2736 int reg;
2737 /* We cycle through the registers to ensure they're all exercised. */
2738 static int next_fcc_reg = 0;
2739 /* Previous x,y for each fcc reg. */
2740 static rtx prev_args[4][2];
2741
2742 /* Scan prev_args for x,y. */
2743 for (reg = 0; reg < 4; reg++)
2744 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2745 break;
2746 if (reg == 4)
2747 {
2748 reg = next_fcc_reg;
2749 prev_args[reg][0] = x;
2750 prev_args[reg][1] = y;
2751 next_fcc_reg = (next_fcc_reg + 1) & 3;
2752 }
2753 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2754 }
2755 #else
2756 cc_reg = gen_reg_rtx (mode);
2757 #endif /* ! experiment */
2758 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2759 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2760 else
2761 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2762
2763 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2764 will only result in an unrecognizable insn so no point in asserting. */
2765 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2766
2767 return cc_reg;
2768 }
2769
2770
2771 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2772
2773 rtx
2774 gen_compare_reg (rtx cmp)
2775 {
2776 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2777 }
2778
2779 /* This function is used for v9 only.
2780 DEST is the target of the Scc insn.
2781 CODE is the code for an Scc's comparison.
2782 X and Y are the values we compare.
2783
2784 This function is needed to turn
2785
2786 (set (reg:SI 110)
2787 (gt (reg:CCX 100 %icc)
2788 (const_int 0)))
2789 into
2790 (set (reg:SI 110)
2791 (gt:DI (reg:CCX 100 %icc)
2792 (const_int 0)))
2793
2794 IE: The instruction recognizer needs to see the mode of the comparison to
2795 find the right instruction. We could use "gt:DI" right in the
2796 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2797
2798 static int
2799 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2800 {
2801 if (! TARGET_ARCH64
2802 && (GET_MODE (x) == DImode
2803 || GET_MODE (dest) == DImode))
2804 return 0;
2805
2806 /* Try to use the movrCC insns. */
2807 if (TARGET_ARCH64
2808 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2809 && y == const0_rtx
2810 && v9_regcmp_p (compare_code))
2811 {
2812 rtx op0 = x;
2813 rtx temp;
2814
2815 /* Special case for op0 != 0. This can be done with one instruction if
2816 dest == x. */
2817
2818 if (compare_code == NE
2819 && GET_MODE (dest) == DImode
2820 && rtx_equal_p (op0, dest))
2821 {
2822 emit_insn (gen_rtx_SET (dest,
2823 gen_rtx_IF_THEN_ELSE (DImode,
2824 gen_rtx_fmt_ee (compare_code, DImode,
2825 op0, const0_rtx),
2826 const1_rtx,
2827 dest)));
2828 return 1;
2829 }
2830
2831 if (reg_overlap_mentioned_p (dest, op0))
2832 {
2833 /* Handle the case where dest == x.
2834 We "early clobber" the result. */
2835 op0 = gen_reg_rtx (GET_MODE (x));
2836 emit_move_insn (op0, x);
2837 }
2838
2839 emit_insn (gen_rtx_SET (dest, const0_rtx));
2840 if (GET_MODE (op0) != DImode)
2841 {
2842 temp = gen_reg_rtx (DImode);
2843 convert_move (temp, op0, 0);
2844 }
2845 else
2846 temp = op0;
2847 emit_insn (gen_rtx_SET (dest,
2848 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2849 gen_rtx_fmt_ee (compare_code, DImode,
2850 temp, const0_rtx),
2851 const1_rtx,
2852 dest)));
2853 return 1;
2854 }
2855 else
2856 {
2857 x = gen_compare_reg_1 (compare_code, x, y);
2858 y = const0_rtx;
2859
2860 gcc_assert (GET_MODE (x) != CC_NOOVmode
2861 && GET_MODE (x) != CCX_NOOVmode);
2862
2863 emit_insn (gen_rtx_SET (dest, const0_rtx));
2864 emit_insn (gen_rtx_SET (dest,
2865 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2866 gen_rtx_fmt_ee (compare_code,
2867 GET_MODE (x), x, y),
2868 const1_rtx, dest)));
2869 return 1;
2870 }
2871 }
2872
2873
2874 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2875 without jumps using the addx/subx instructions. */
2876
2877 bool
2878 emit_scc_insn (rtx operands[])
2879 {
2880 rtx tem;
2881 rtx x;
2882 rtx y;
2883 enum rtx_code code;
2884
2885 /* The quad-word fp compare library routines all return nonzero to indicate
2886 true, which is different from the equivalent libgcc routines, so we must
2887 handle them specially here. */
2888 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2889 {
2890 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2891 GET_CODE (operands[1]));
2892 operands[2] = XEXP (operands[1], 0);
2893 operands[3] = XEXP (operands[1], 1);
2894 }
2895
2896 code = GET_CODE (operands[1]);
2897 x = operands[2];
2898 y = operands[3];
2899
2900 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2901 more applications). The exception to this is "reg != 0" which can
2902 be done in one instruction on v9 (so we do it). */
2903 if (code == EQ)
2904 {
2905 if (GET_MODE (x) == SImode)
2906 {
2907 rtx pat;
2908 if (TARGET_ARCH64)
2909 pat = gen_seqsidi_special (operands[0], x, y);
2910 else
2911 pat = gen_seqsisi_special (operands[0], x, y);
2912 emit_insn (pat);
2913 return true;
2914 }
2915 else if (GET_MODE (x) == DImode)
2916 {
2917 rtx pat = gen_seqdi_special (operands[0], x, y);
2918 emit_insn (pat);
2919 return true;
2920 }
2921 }
2922
2923 if (code == NE)
2924 {
2925 if (GET_MODE (x) == SImode)
2926 {
2927 rtx pat;
2928 if (TARGET_ARCH64)
2929 pat = gen_snesidi_special (operands[0], x, y);
2930 else
2931 pat = gen_snesisi_special (operands[0], x, y);
2932 emit_insn (pat);
2933 return true;
2934 }
2935 else if (GET_MODE (x) == DImode)
2936 {
2937 rtx pat;
2938 if (TARGET_VIS3)
2939 pat = gen_snedi_special_vis3 (operands[0], x, y);
2940 else
2941 pat = gen_snedi_special (operands[0], x, y);
2942 emit_insn (pat);
2943 return true;
2944 }
2945 }
2946
2947 if (TARGET_V9
2948 && TARGET_ARCH64
2949 && GET_MODE (x) == DImode
2950 && !(TARGET_VIS3
2951 && (code == GTU || code == LTU))
2952 && gen_v9_scc (operands[0], code, x, y))
2953 return true;
2954
2955 /* We can do LTU and GEU using the addx/subx instructions too. And
2956 for GTU/LEU, if both operands are registers swap them and fall
2957 back to the easy case. */
2958 if (code == GTU || code == LEU)
2959 {
2960 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2961 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2962 {
2963 tem = x;
2964 x = y;
2965 y = tem;
2966 code = swap_condition (code);
2967 }
2968 }
2969
2970 if (code == LTU
2971 || (!TARGET_VIS3 && code == GEU))
2972 {
2973 emit_insn (gen_rtx_SET (operands[0],
2974 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2975 gen_compare_reg_1 (code, x, y),
2976 const0_rtx)));
2977 return true;
2978 }
2979
2980 /* All the posibilities to use addx/subx based sequences has been
2981 exhausted, try for a 3 instruction sequence using v9 conditional
2982 moves. */
2983 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
2984 return true;
2985
2986 /* Nope, do branches. */
2987 return false;
2988 }
2989
2990 /* Emit a conditional jump insn for the v9 architecture using comparison code
2991 CODE and jump target LABEL.
2992 This function exists to take advantage of the v9 brxx insns. */
2993
2994 static void
2995 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2996 {
2997 emit_jump_insn (gen_rtx_SET (pc_rtx,
2998 gen_rtx_IF_THEN_ELSE (VOIDmode,
2999 gen_rtx_fmt_ee (code, GET_MODE (op0),
3000 op0, const0_rtx),
3001 gen_rtx_LABEL_REF (VOIDmode, label),
3002 pc_rtx)));
3003 }
3004
3005 /* Emit a conditional jump insn for the UA2011 architecture using
3006 comparison code CODE and jump target LABEL. This function exists
3007 to take advantage of the UA2011 Compare and Branch insns. */
3008
3009 static void
3010 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3011 {
3012 rtx if_then_else;
3013
3014 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3015 gen_rtx_fmt_ee(code, GET_MODE(op0),
3016 op0, op1),
3017 gen_rtx_LABEL_REF (VOIDmode, label),
3018 pc_rtx);
3019
3020 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3021 }
3022
3023 void
3024 emit_conditional_branch_insn (rtx operands[])
3025 {
3026 /* The quad-word fp compare library routines all return nonzero to indicate
3027 true, which is different from the equivalent libgcc routines, so we must
3028 handle them specially here. */
3029 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3030 {
3031 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3032 GET_CODE (operands[0]));
3033 operands[1] = XEXP (operands[0], 0);
3034 operands[2] = XEXP (operands[0], 1);
3035 }
3036
3037 /* If we can tell early on that the comparison is against a constant
3038 that won't fit in the 5-bit signed immediate field of a cbcond,
3039 use one of the other v9 conditional branch sequences. */
3040 if (TARGET_CBCOND
3041 && GET_CODE (operands[1]) == REG
3042 && (GET_MODE (operands[1]) == SImode
3043 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3044 && (GET_CODE (operands[2]) != CONST_INT
3045 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3046 {
3047 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3048 return;
3049 }
3050
3051 if (TARGET_ARCH64 && operands[2] == const0_rtx
3052 && GET_CODE (operands[1]) == REG
3053 && GET_MODE (operands[1]) == DImode)
3054 {
3055 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3056 return;
3057 }
3058
3059 operands[1] = gen_compare_reg (operands[0]);
3060 operands[2] = const0_rtx;
3061 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3062 operands[1], operands[2]);
3063 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3064 operands[3]));
3065 }
3066
3067
3068 /* Generate a DFmode part of a hard TFmode register.
3069 REG is the TFmode hard register, LOW is 1 for the
3070 low 64bit of the register and 0 otherwise.
3071 */
3072 rtx
3073 gen_df_reg (rtx reg, int low)
3074 {
3075 int regno = REGNO (reg);
3076
3077 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3078 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3079 return gen_rtx_REG (DFmode, regno);
3080 }
3081 \f
3082 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3083 Unlike normal calls, TFmode operands are passed by reference. It is
3084 assumed that no more than 3 operands are required. */
3085
3086 static void
3087 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3088 {
3089 rtx ret_slot = NULL, arg[3], func_sym;
3090 int i;
3091
3092 /* We only expect to be called for conversions, unary, and binary ops. */
3093 gcc_assert (nargs == 2 || nargs == 3);
3094
3095 for (i = 0; i < nargs; ++i)
3096 {
3097 rtx this_arg = operands[i];
3098 rtx this_slot;
3099
3100 /* TFmode arguments and return values are passed by reference. */
3101 if (GET_MODE (this_arg) == TFmode)
3102 {
3103 int force_stack_temp;
3104
3105 force_stack_temp = 0;
3106 if (TARGET_BUGGY_QP_LIB && i == 0)
3107 force_stack_temp = 1;
3108
3109 if (GET_CODE (this_arg) == MEM
3110 && ! force_stack_temp)
3111 {
3112 tree expr = MEM_EXPR (this_arg);
3113 if (expr)
3114 mark_addressable (expr);
3115 this_arg = XEXP (this_arg, 0);
3116 }
3117 else if (CONSTANT_P (this_arg)
3118 && ! force_stack_temp)
3119 {
3120 this_slot = force_const_mem (TFmode, this_arg);
3121 this_arg = XEXP (this_slot, 0);
3122 }
3123 else
3124 {
3125 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3126
3127 /* Operand 0 is the return value. We'll copy it out later. */
3128 if (i > 0)
3129 emit_move_insn (this_slot, this_arg);
3130 else
3131 ret_slot = this_slot;
3132
3133 this_arg = XEXP (this_slot, 0);
3134 }
3135 }
3136
3137 arg[i] = this_arg;
3138 }
3139
3140 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3141
3142 if (GET_MODE (operands[0]) == TFmode)
3143 {
3144 if (nargs == 2)
3145 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3146 arg[0], GET_MODE (arg[0]),
3147 arg[1], GET_MODE (arg[1]));
3148 else
3149 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3150 arg[0], GET_MODE (arg[0]),
3151 arg[1], GET_MODE (arg[1]),
3152 arg[2], GET_MODE (arg[2]));
3153
3154 if (ret_slot)
3155 emit_move_insn (operands[0], ret_slot);
3156 }
3157 else
3158 {
3159 rtx ret;
3160
3161 gcc_assert (nargs == 2);
3162
3163 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3164 GET_MODE (operands[0]), 1,
3165 arg[1], GET_MODE (arg[1]));
3166
3167 if (ret != operands[0])
3168 emit_move_insn (operands[0], ret);
3169 }
3170 }
3171
3172 /* Expand soft-float TFmode calls to sparc abi routines. */
3173
3174 static void
3175 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3176 {
3177 const char *func;
3178
3179 switch (code)
3180 {
3181 case PLUS:
3182 func = "_Qp_add";
3183 break;
3184 case MINUS:
3185 func = "_Qp_sub";
3186 break;
3187 case MULT:
3188 func = "_Qp_mul";
3189 break;
3190 case DIV:
3191 func = "_Qp_div";
3192 break;
3193 default:
3194 gcc_unreachable ();
3195 }
3196
3197 emit_soft_tfmode_libcall (func, 3, operands);
3198 }
3199
3200 static void
3201 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3202 {
3203 const char *func;
3204
3205 gcc_assert (code == SQRT);
3206 func = "_Qp_sqrt";
3207
3208 emit_soft_tfmode_libcall (func, 2, operands);
3209 }
3210
3211 static void
3212 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3213 {
3214 const char *func;
3215
3216 switch (code)
3217 {
3218 case FLOAT_EXTEND:
3219 switch (GET_MODE (operands[1]))
3220 {
3221 case SFmode:
3222 func = "_Qp_stoq";
3223 break;
3224 case DFmode:
3225 func = "_Qp_dtoq";
3226 break;
3227 default:
3228 gcc_unreachable ();
3229 }
3230 break;
3231
3232 case FLOAT_TRUNCATE:
3233 switch (GET_MODE (operands[0]))
3234 {
3235 case SFmode:
3236 func = "_Qp_qtos";
3237 break;
3238 case DFmode:
3239 func = "_Qp_qtod";
3240 break;
3241 default:
3242 gcc_unreachable ();
3243 }
3244 break;
3245
3246 case FLOAT:
3247 switch (GET_MODE (operands[1]))
3248 {
3249 case SImode:
3250 func = "_Qp_itoq";
3251 if (TARGET_ARCH64)
3252 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3253 break;
3254 case DImode:
3255 func = "_Qp_xtoq";
3256 break;
3257 default:
3258 gcc_unreachable ();
3259 }
3260 break;
3261
3262 case UNSIGNED_FLOAT:
3263 switch (GET_MODE (operands[1]))
3264 {
3265 case SImode:
3266 func = "_Qp_uitoq";
3267 if (TARGET_ARCH64)
3268 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3269 break;
3270 case DImode:
3271 func = "_Qp_uxtoq";
3272 break;
3273 default:
3274 gcc_unreachable ();
3275 }
3276 break;
3277
3278 case FIX:
3279 switch (GET_MODE (operands[0]))
3280 {
3281 case SImode:
3282 func = "_Qp_qtoi";
3283 break;
3284 case DImode:
3285 func = "_Qp_qtox";
3286 break;
3287 default:
3288 gcc_unreachable ();
3289 }
3290 break;
3291
3292 case UNSIGNED_FIX:
3293 switch (GET_MODE (operands[0]))
3294 {
3295 case SImode:
3296 func = "_Qp_qtoui";
3297 break;
3298 case DImode:
3299 func = "_Qp_qtoux";
3300 break;
3301 default:
3302 gcc_unreachable ();
3303 }
3304 break;
3305
3306 default:
3307 gcc_unreachable ();
3308 }
3309
3310 emit_soft_tfmode_libcall (func, 2, operands);
3311 }
3312
3313 /* Expand a hard-float tfmode operation. All arguments must be in
3314 registers. */
3315
3316 static void
3317 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3318 {
3319 rtx op, dest;
3320
3321 if (GET_RTX_CLASS (code) == RTX_UNARY)
3322 {
3323 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3324 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3325 }
3326 else
3327 {
3328 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3329 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3330 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3331 operands[1], operands[2]);
3332 }
3333
3334 if (register_operand (operands[0], VOIDmode))
3335 dest = operands[0];
3336 else
3337 dest = gen_reg_rtx (GET_MODE (operands[0]));
3338
3339 emit_insn (gen_rtx_SET (dest, op));
3340
3341 if (dest != operands[0])
3342 emit_move_insn (operands[0], dest);
3343 }
3344
3345 void
3346 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3347 {
3348 if (TARGET_HARD_QUAD)
3349 emit_hard_tfmode_operation (code, operands);
3350 else
3351 emit_soft_tfmode_binop (code, operands);
3352 }
3353
3354 void
3355 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3356 {
3357 if (TARGET_HARD_QUAD)
3358 emit_hard_tfmode_operation (code, operands);
3359 else
3360 emit_soft_tfmode_unop (code, operands);
3361 }
3362
3363 void
3364 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3365 {
3366 if (TARGET_HARD_QUAD)
3367 emit_hard_tfmode_operation (code, operands);
3368 else
3369 emit_soft_tfmode_cvt (code, operands);
3370 }
3371 \f
3372 /* Return nonzero if a branch/jump/call instruction will be emitting
3373 nop into its delay slot. */
3374
3375 int
3376 empty_delay_slot (rtx_insn *insn)
3377 {
3378 rtx seq;
3379
3380 /* If no previous instruction (should not happen), return true. */
3381 if (PREV_INSN (insn) == NULL)
3382 return 1;
3383
3384 seq = NEXT_INSN (PREV_INSN (insn));
3385 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3386 return 0;
3387
3388 return 1;
3389 }
3390
3391 /* Return nonzero if we should emit a nop after a cbcond instruction.
3392 The cbcond instruction does not have a delay slot, however there is
3393 a severe performance penalty if a control transfer appears right
3394 after a cbcond. Therefore we emit a nop when we detect this
3395 situation. */
3396
3397 int
3398 emit_cbcond_nop (rtx insn)
3399 {
3400 rtx next = next_active_insn (insn);
3401
3402 if (!next)
3403 return 1;
3404
3405 if (NONJUMP_INSN_P (next)
3406 && GET_CODE (PATTERN (next)) == SEQUENCE)
3407 next = XVECEXP (PATTERN (next), 0, 0);
3408 else if (CALL_P (next)
3409 && GET_CODE (PATTERN (next)) == PARALLEL)
3410 {
3411 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3412
3413 if (GET_CODE (delay) == RETURN)
3414 {
3415 /* It's a sibling call. Do not emit the nop if we're going
3416 to emit something other than the jump itself as the first
3417 instruction of the sibcall sequence. */
3418 if (sparc_leaf_function_p || TARGET_FLAT)
3419 return 0;
3420 }
3421 }
3422
3423 if (NONJUMP_INSN_P (next))
3424 return 0;
3425
3426 return 1;
3427 }
3428
3429 /* Return nonzero if TRIAL can go into the call delay slot. */
3430
3431 int
3432 eligible_for_call_delay (rtx_insn *trial)
3433 {
3434 rtx pat;
3435
3436 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3437 return 0;
3438
3439 /* Binutils allows
3440 call __tls_get_addr, %tgd_call (foo)
3441 add %l7, %o0, %o0, %tgd_add (foo)
3442 while Sun as/ld does not. */
3443 if (TARGET_GNU_TLS || !TARGET_TLS)
3444 return 1;
3445
3446 pat = PATTERN (trial);
3447
3448 /* We must reject tgd_add{32|64}, i.e.
3449 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3450 and tldm_add{32|64}, i.e.
3451 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3452 for Sun as/ld. */
3453 if (GET_CODE (pat) == SET
3454 && GET_CODE (SET_SRC (pat)) == PLUS)
3455 {
3456 rtx unspec = XEXP (SET_SRC (pat), 1);
3457
3458 if (GET_CODE (unspec) == UNSPEC
3459 && (XINT (unspec, 1) == UNSPEC_TLSGD
3460 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3461 return 0;
3462 }
3463
3464 return 1;
3465 }
3466
3467 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3468 instruction. RETURN_P is true if the v9 variant 'return' is to be
3469 considered in the test too.
3470
3471 TRIAL must be a SET whose destination is a REG appropriate for the
3472 'restore' instruction or, if RETURN_P is true, for the 'return'
3473 instruction. */
3474
3475 static int
3476 eligible_for_restore_insn (rtx trial, bool return_p)
3477 {
3478 rtx pat = PATTERN (trial);
3479 rtx src = SET_SRC (pat);
3480 bool src_is_freg = false;
3481 rtx src_reg;
3482
3483 /* Since we now can do moves between float and integer registers when
3484 VIS3 is enabled, we have to catch this case. We can allow such
3485 moves when doing a 'return' however. */
3486 src_reg = src;
3487 if (GET_CODE (src_reg) == SUBREG)
3488 src_reg = SUBREG_REG (src_reg);
3489 if (GET_CODE (src_reg) == REG
3490 && SPARC_FP_REG_P (REGNO (src_reg)))
3491 src_is_freg = true;
3492
3493 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3494 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3495 && arith_operand (src, GET_MODE (src))
3496 && ! src_is_freg)
3497 {
3498 if (TARGET_ARCH64)
3499 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3500 else
3501 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3502 }
3503
3504 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3505 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3506 && arith_double_operand (src, GET_MODE (src))
3507 && ! src_is_freg)
3508 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3509
3510 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3511 else if (! TARGET_FPU && register_operand (src, SFmode))
3512 return 1;
3513
3514 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3515 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3516 return 1;
3517
3518 /* If we have the 'return' instruction, anything that does not use
3519 local or output registers and can go into a delay slot wins. */
3520 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3521 return 1;
3522
3523 /* The 'restore src1,src2,dest' pattern for SImode. */
3524 else if (GET_CODE (src) == PLUS
3525 && register_operand (XEXP (src, 0), SImode)
3526 && arith_operand (XEXP (src, 1), SImode))
3527 return 1;
3528
3529 /* The 'restore src1,src2,dest' pattern for DImode. */
3530 else if (GET_CODE (src) == PLUS
3531 && register_operand (XEXP (src, 0), DImode)
3532 && arith_double_operand (XEXP (src, 1), DImode))
3533 return 1;
3534
3535 /* The 'restore src1,%lo(src2),dest' pattern. */
3536 else if (GET_CODE (src) == LO_SUM
3537 && ! TARGET_CM_MEDMID
3538 && ((register_operand (XEXP (src, 0), SImode)
3539 && immediate_operand (XEXP (src, 1), SImode))
3540 || (TARGET_ARCH64
3541 && register_operand (XEXP (src, 0), DImode)
3542 && immediate_operand (XEXP (src, 1), DImode))))
3543 return 1;
3544
3545 /* The 'restore src,src,dest' pattern. */
3546 else if (GET_CODE (src) == ASHIFT
3547 && (register_operand (XEXP (src, 0), SImode)
3548 || register_operand (XEXP (src, 0), DImode))
3549 && XEXP (src, 1) == const1_rtx)
3550 return 1;
3551
3552 return 0;
3553 }
3554
3555 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3556
3557 int
3558 eligible_for_return_delay (rtx_insn *trial)
3559 {
3560 int regno;
3561 rtx pat;
3562
3563 /* If the function uses __builtin_eh_return, the eh_return machinery
3564 occupies the delay slot. */
3565 if (crtl->calls_eh_return)
3566 return 0;
3567
3568 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3569 return 0;
3570
3571 /* In the case of a leaf or flat function, anything can go into the slot. */
3572 if (sparc_leaf_function_p || TARGET_FLAT)
3573 return 1;
3574
3575 if (!NONJUMP_INSN_P (trial))
3576 return 0;
3577
3578 pat = PATTERN (trial);
3579 if (GET_CODE (pat) == PARALLEL)
3580 {
3581 int i;
3582
3583 if (! TARGET_V9)
3584 return 0;
3585 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3586 {
3587 rtx expr = XVECEXP (pat, 0, i);
3588 if (GET_CODE (expr) != SET)
3589 return 0;
3590 if (GET_CODE (SET_DEST (expr)) != REG)
3591 return 0;
3592 regno = REGNO (SET_DEST (expr));
3593 if (regno >= 8 && regno < 24)
3594 return 0;
3595 }
3596 return !epilogue_renumber (&pat, 1);
3597 }
3598
3599 if (GET_CODE (pat) != SET)
3600 return 0;
3601
3602 if (GET_CODE (SET_DEST (pat)) != REG)
3603 return 0;
3604
3605 regno = REGNO (SET_DEST (pat));
3606
3607 /* Otherwise, only operations which can be done in tandem with
3608 a `restore' or `return' insn can go into the delay slot. */
3609 if (regno >= 8 && regno < 24)
3610 return 0;
3611
3612 /* If this instruction sets up floating point register and we have a return
3613 instruction, it can probably go in. But restore will not work
3614 with FP_REGS. */
3615 if (! SPARC_INT_REG_P (regno))
3616 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3617
3618 return eligible_for_restore_insn (trial, true);
3619 }
3620
3621 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3622
3623 int
3624 eligible_for_sibcall_delay (rtx_insn *trial)
3625 {
3626 rtx pat;
3627
3628 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3629 return 0;
3630
3631 if (!NONJUMP_INSN_P (trial))
3632 return 0;
3633
3634 pat = PATTERN (trial);
3635
3636 if (sparc_leaf_function_p || TARGET_FLAT)
3637 {
3638 /* If the tail call is done using the call instruction,
3639 we have to restore %o7 in the delay slot. */
3640 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3641 return 0;
3642
3643 /* %g1 is used to build the function address */
3644 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3645 return 0;
3646
3647 return 1;
3648 }
3649
3650 if (GET_CODE (pat) != SET)
3651 return 0;
3652
3653 /* Otherwise, only operations which can be done in tandem with
3654 a `restore' insn can go into the delay slot. */
3655 if (GET_CODE (SET_DEST (pat)) != REG
3656 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3657 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3658 return 0;
3659
3660 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3661 in most cases. */
3662 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3663 return 0;
3664
3665 return eligible_for_restore_insn (trial, false);
3666 }
3667 \f
3668 /* Determine if it's legal to put X into the constant pool. This
3669 is not possible if X contains the address of a symbol that is
3670 not constant (TLS) or not known at final link time (PIC). */
3671
3672 static bool
3673 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3674 {
3675 switch (GET_CODE (x))
3676 {
3677 case CONST_INT:
3678 case CONST_DOUBLE:
3679 case CONST_VECTOR:
3680 /* Accept all non-symbolic constants. */
3681 return false;
3682
3683 case LABEL_REF:
3684 /* Labels are OK iff we are non-PIC. */
3685 return flag_pic != 0;
3686
3687 case SYMBOL_REF:
3688 /* 'Naked' TLS symbol references are never OK,
3689 non-TLS symbols are OK iff we are non-PIC. */
3690 if (SYMBOL_REF_TLS_MODEL (x))
3691 return true;
3692 else
3693 return flag_pic != 0;
3694
3695 case CONST:
3696 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3697 case PLUS:
3698 case MINUS:
3699 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3700 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3701 case UNSPEC:
3702 return true;
3703 default:
3704 gcc_unreachable ();
3705 }
3706 }
3707 \f
3708 /* Global Offset Table support. */
3709 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3710 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3711
3712 /* Return the SYMBOL_REF for the Global Offset Table. */
3713
3714 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3715
3716 static rtx
3717 sparc_got (void)
3718 {
3719 if (!sparc_got_symbol)
3720 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3721
3722 return sparc_got_symbol;
3723 }
3724
3725 /* Ensure that we are not using patterns that are not OK with PIC. */
3726
3727 int
3728 check_pic (int i)
3729 {
3730 rtx op;
3731
3732 switch (flag_pic)
3733 {
3734 case 1:
3735 op = recog_data.operand[i];
3736 gcc_assert (GET_CODE (op) != SYMBOL_REF
3737 && (GET_CODE (op) != CONST
3738 || (GET_CODE (XEXP (op, 0)) == MINUS
3739 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3740 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3741 case 2:
3742 default:
3743 return 1;
3744 }
3745 }
3746
3747 /* Return true if X is an address which needs a temporary register when
3748 reloaded while generating PIC code. */
3749
3750 int
3751 pic_address_needs_scratch (rtx x)
3752 {
3753 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3754 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3755 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3756 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3757 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3758 return 1;
3759
3760 return 0;
3761 }
3762
3763 /* Determine if a given RTX is a valid constant. We already know this
3764 satisfies CONSTANT_P. */
3765
3766 static bool
3767 sparc_legitimate_constant_p (machine_mode mode, rtx x)
3768 {
3769 switch (GET_CODE (x))
3770 {
3771 case CONST:
3772 case SYMBOL_REF:
3773 if (sparc_tls_referenced_p (x))
3774 return false;
3775 break;
3776
3777 case CONST_DOUBLE:
3778 if (GET_MODE (x) == VOIDmode)
3779 return true;
3780
3781 /* Floating point constants are generally not ok.
3782 The only exception is 0.0 and all-ones in VIS. */
3783 if (TARGET_VIS
3784 && SCALAR_FLOAT_MODE_P (mode)
3785 && (const_zero_operand (x, mode)
3786 || const_all_ones_operand (x, mode)))
3787 return true;
3788
3789 return false;
3790
3791 case CONST_VECTOR:
3792 /* Vector constants are generally not ok.
3793 The only exception is 0 or -1 in VIS. */
3794 if (TARGET_VIS
3795 && (const_zero_operand (x, mode)
3796 || const_all_ones_operand (x, mode)))
3797 return true;
3798
3799 return false;
3800
3801 default:
3802 break;
3803 }
3804
3805 return true;
3806 }
3807
3808 /* Determine if a given RTX is a valid constant address. */
3809
3810 bool
3811 constant_address_p (rtx x)
3812 {
3813 switch (GET_CODE (x))
3814 {
3815 case LABEL_REF:
3816 case CONST_INT:
3817 case HIGH:
3818 return true;
3819
3820 case CONST:
3821 if (flag_pic && pic_address_needs_scratch (x))
3822 return false;
3823 return sparc_legitimate_constant_p (Pmode, x);
3824
3825 case SYMBOL_REF:
3826 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3827
3828 default:
3829 return false;
3830 }
3831 }
3832
3833 /* Nonzero if the constant value X is a legitimate general operand
3834 when generating PIC code. It is given that flag_pic is on and
3835 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3836
3837 bool
3838 legitimate_pic_operand_p (rtx x)
3839 {
3840 if (pic_address_needs_scratch (x))
3841 return false;
3842 if (sparc_tls_referenced_p (x))
3843 return false;
3844 return true;
3845 }
3846
3847 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3848 (CONST_INT_P (X) \
3849 && INTVAL (X) >= -0x1000 \
3850 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3851
3852 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3853 (CONST_INT_P (X) \
3854 && INTVAL (X) >= -0x1000 \
3855 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3856
3857 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3858
3859 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3860 ordinarily. This changes a bit when generating PIC. */
3861
3862 static bool
3863 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3864 {
3865 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3866
3867 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3868 rs1 = addr;
3869 else if (GET_CODE (addr) == PLUS)
3870 {
3871 rs1 = XEXP (addr, 0);
3872 rs2 = XEXP (addr, 1);
3873
3874 /* Canonicalize. REG comes first, if there are no regs,
3875 LO_SUM comes first. */
3876 if (!REG_P (rs1)
3877 && GET_CODE (rs1) != SUBREG
3878 && (REG_P (rs2)
3879 || GET_CODE (rs2) == SUBREG
3880 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3881 {
3882 rs1 = XEXP (addr, 1);
3883 rs2 = XEXP (addr, 0);
3884 }
3885
3886 if ((flag_pic == 1
3887 && rs1 == pic_offset_table_rtx
3888 && !REG_P (rs2)
3889 && GET_CODE (rs2) != SUBREG
3890 && GET_CODE (rs2) != LO_SUM
3891 && GET_CODE (rs2) != MEM
3892 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3893 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3894 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3895 || ((REG_P (rs1)
3896 || GET_CODE (rs1) == SUBREG)
3897 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3898 {
3899 imm1 = rs2;
3900 rs2 = NULL;
3901 }
3902 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3903 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3904 {
3905 /* We prohibit REG + REG for TFmode when there are no quad move insns
3906 and we consequently need to split. We do this because REG+REG
3907 is not an offsettable address. If we get the situation in reload
3908 where source and destination of a movtf pattern are both MEMs with
3909 REG+REG address, then only one of them gets converted to an
3910 offsettable address. */
3911 if (mode == TFmode
3912 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3913 return 0;
3914
3915 /* Likewise for TImode, but in all cases. */
3916 if (mode == TImode)
3917 return 0;
3918
3919 /* We prohibit REG + REG on ARCH32 if not optimizing for
3920 DFmode/DImode because then mem_min_alignment is likely to be zero
3921 after reload and the forced split would lack a matching splitter
3922 pattern. */
3923 if (TARGET_ARCH32 && !optimize
3924 && (mode == DFmode || mode == DImode))
3925 return 0;
3926 }
3927 else if (USE_AS_OFFSETABLE_LO10
3928 && GET_CODE (rs1) == LO_SUM
3929 && TARGET_ARCH64
3930 && ! TARGET_CM_MEDMID
3931 && RTX_OK_FOR_OLO10_P (rs2, mode))
3932 {
3933 rs2 = NULL;
3934 imm1 = XEXP (rs1, 1);
3935 rs1 = XEXP (rs1, 0);
3936 if (!CONSTANT_P (imm1)
3937 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3938 return 0;
3939 }
3940 }
3941 else if (GET_CODE (addr) == LO_SUM)
3942 {
3943 rs1 = XEXP (addr, 0);
3944 imm1 = XEXP (addr, 1);
3945
3946 if (!CONSTANT_P (imm1)
3947 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3948 return 0;
3949
3950 /* We can't allow TFmode in 32-bit mode, because an offset greater
3951 than the alignment (8) may cause the LO_SUM to overflow. */
3952 if (mode == TFmode && TARGET_ARCH32)
3953 return 0;
3954 }
3955 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3956 return 1;
3957 else
3958 return 0;
3959
3960 if (GET_CODE (rs1) == SUBREG)
3961 rs1 = SUBREG_REG (rs1);
3962 if (!REG_P (rs1))
3963 return 0;
3964
3965 if (rs2)
3966 {
3967 if (GET_CODE (rs2) == SUBREG)
3968 rs2 = SUBREG_REG (rs2);
3969 if (!REG_P (rs2))
3970 return 0;
3971 }
3972
3973 if (strict)
3974 {
3975 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3976 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3977 return 0;
3978 }
3979 else
3980 {
3981 if ((! SPARC_INT_REG_P (REGNO (rs1))
3982 && REGNO (rs1) != FRAME_POINTER_REGNUM
3983 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3984 || (rs2
3985 && (! SPARC_INT_REG_P (REGNO (rs2))
3986 && REGNO (rs2) != FRAME_POINTER_REGNUM
3987 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3988 return 0;
3989 }
3990 return 1;
3991 }
3992
3993 /* Return the SYMBOL_REF for the tls_get_addr function. */
3994
3995 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3996
3997 static rtx
3998 sparc_tls_get_addr (void)
3999 {
4000 if (!sparc_tls_symbol)
4001 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4002
4003 return sparc_tls_symbol;
4004 }
4005
4006 /* Return the Global Offset Table to be used in TLS mode. */
4007
4008 static rtx
4009 sparc_tls_got (void)
4010 {
4011 /* In PIC mode, this is just the PIC offset table. */
4012 if (flag_pic)
4013 {
4014 crtl->uses_pic_offset_table = 1;
4015 return pic_offset_table_rtx;
4016 }
4017
4018 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4019 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4020 if (TARGET_SUN_TLS && TARGET_ARCH32)
4021 {
4022 load_got_register ();
4023 return global_offset_table_rtx;
4024 }
4025
4026 /* In all other cases, we load a new pseudo with the GOT symbol. */
4027 return copy_to_reg (sparc_got ());
4028 }
4029
4030 /* Return true if X contains a thread-local symbol. */
4031
4032 static bool
4033 sparc_tls_referenced_p (rtx x)
4034 {
4035 if (!TARGET_HAVE_TLS)
4036 return false;
4037
4038 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4039 x = XEXP (XEXP (x, 0), 0);
4040
4041 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4042 return true;
4043
4044 /* That's all we handle in sparc_legitimize_tls_address for now. */
4045 return false;
4046 }
4047
4048 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4049 this (thread-local) address. */
4050
4051 static rtx
4052 sparc_legitimize_tls_address (rtx addr)
4053 {
4054 rtx temp1, temp2, temp3, ret, o0, got;
4055 rtx_insn *insn;
4056
4057 gcc_assert (can_create_pseudo_p ());
4058
4059 if (GET_CODE (addr) == SYMBOL_REF)
4060 switch (SYMBOL_REF_TLS_MODEL (addr))
4061 {
4062 case TLS_MODEL_GLOBAL_DYNAMIC:
4063 start_sequence ();
4064 temp1 = gen_reg_rtx (SImode);
4065 temp2 = gen_reg_rtx (SImode);
4066 ret = gen_reg_rtx (Pmode);
4067 o0 = gen_rtx_REG (Pmode, 8);
4068 got = sparc_tls_got ();
4069 emit_insn (gen_tgd_hi22 (temp1, addr));
4070 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4071 if (TARGET_ARCH32)
4072 {
4073 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4074 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4075 addr, const1_rtx));
4076 }
4077 else
4078 {
4079 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4080 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4081 addr, const1_rtx));
4082 }
4083 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4084 insn = get_insns ();
4085 end_sequence ();
4086 emit_libcall_block (insn, ret, o0, addr);
4087 break;
4088
4089 case TLS_MODEL_LOCAL_DYNAMIC:
4090 start_sequence ();
4091 temp1 = gen_reg_rtx (SImode);
4092 temp2 = gen_reg_rtx (SImode);
4093 temp3 = gen_reg_rtx (Pmode);
4094 ret = gen_reg_rtx (Pmode);
4095 o0 = gen_rtx_REG (Pmode, 8);
4096 got = sparc_tls_got ();
4097 emit_insn (gen_tldm_hi22 (temp1));
4098 emit_insn (gen_tldm_lo10 (temp2, temp1));
4099 if (TARGET_ARCH32)
4100 {
4101 emit_insn (gen_tldm_add32 (o0, got, temp2));
4102 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4103 const1_rtx));
4104 }
4105 else
4106 {
4107 emit_insn (gen_tldm_add64 (o0, got, temp2));
4108 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4109 const1_rtx));
4110 }
4111 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4112 insn = get_insns ();
4113 end_sequence ();
4114 emit_libcall_block (insn, temp3, o0,
4115 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4116 UNSPEC_TLSLD_BASE));
4117 temp1 = gen_reg_rtx (SImode);
4118 temp2 = gen_reg_rtx (SImode);
4119 emit_insn (gen_tldo_hix22 (temp1, addr));
4120 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4121 if (TARGET_ARCH32)
4122 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4123 else
4124 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4125 break;
4126
4127 case TLS_MODEL_INITIAL_EXEC:
4128 temp1 = gen_reg_rtx (SImode);
4129 temp2 = gen_reg_rtx (SImode);
4130 temp3 = gen_reg_rtx (Pmode);
4131 got = sparc_tls_got ();
4132 emit_insn (gen_tie_hi22 (temp1, addr));
4133 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4134 if (TARGET_ARCH32)
4135 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4136 else
4137 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4138 if (TARGET_SUN_TLS)
4139 {
4140 ret = gen_reg_rtx (Pmode);
4141 if (TARGET_ARCH32)
4142 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4143 temp3, addr));
4144 else
4145 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4146 temp3, addr));
4147 }
4148 else
4149 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4150 break;
4151
4152 case TLS_MODEL_LOCAL_EXEC:
4153 temp1 = gen_reg_rtx (Pmode);
4154 temp2 = gen_reg_rtx (Pmode);
4155 if (TARGET_ARCH32)
4156 {
4157 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4158 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4159 }
4160 else
4161 {
4162 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4163 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4164 }
4165 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4166 break;
4167
4168 default:
4169 gcc_unreachable ();
4170 }
4171
4172 else if (GET_CODE (addr) == CONST)
4173 {
4174 rtx base, offset;
4175
4176 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4177
4178 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4179 offset = XEXP (XEXP (addr, 0), 1);
4180
4181 base = force_operand (base, NULL_RTX);
4182 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4183 offset = force_reg (Pmode, offset);
4184 ret = gen_rtx_PLUS (Pmode, base, offset);
4185 }
4186
4187 else
4188 gcc_unreachable (); /* for now ... */
4189
4190 return ret;
4191 }
4192
4193 /* Legitimize PIC addresses. If the address is already position-independent,
4194 we return ORIG. Newly generated position-independent addresses go into a
4195 reg. This is REG if nonzero, otherwise we allocate register(s) as
4196 necessary. */
4197
4198 static rtx
4199 sparc_legitimize_pic_address (rtx orig, rtx reg)
4200 {
4201 bool gotdata_op = false;
4202
4203 if (GET_CODE (orig) == SYMBOL_REF
4204 /* See the comment in sparc_expand_move. */
4205 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4206 {
4207 rtx pic_ref, address;
4208 rtx_insn *insn;
4209
4210 if (reg == 0)
4211 {
4212 gcc_assert (can_create_pseudo_p ());
4213 reg = gen_reg_rtx (Pmode);
4214 }
4215
4216 if (flag_pic == 2)
4217 {
4218 /* If not during reload, allocate another temp reg here for loading
4219 in the address, so that these instructions can be optimized
4220 properly. */
4221 rtx temp_reg = (! can_create_pseudo_p ()
4222 ? reg : gen_reg_rtx (Pmode));
4223
4224 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4225 won't get confused into thinking that these two instructions
4226 are loading in the true address of the symbol. If in the
4227 future a PIC rtx exists, that should be used instead. */
4228 if (TARGET_ARCH64)
4229 {
4230 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4231 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4232 }
4233 else
4234 {
4235 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4236 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4237 }
4238 address = temp_reg;
4239 gotdata_op = true;
4240 }
4241 else
4242 address = orig;
4243
4244 crtl->uses_pic_offset_table = 1;
4245 if (gotdata_op)
4246 {
4247 if (TARGET_ARCH64)
4248 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4249 pic_offset_table_rtx,
4250 address, orig));
4251 else
4252 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4253 pic_offset_table_rtx,
4254 address, orig));
4255 }
4256 else
4257 {
4258 pic_ref
4259 = gen_const_mem (Pmode,
4260 gen_rtx_PLUS (Pmode,
4261 pic_offset_table_rtx, address));
4262 insn = emit_move_insn (reg, pic_ref);
4263 }
4264
4265 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4266 by loop. */
4267 set_unique_reg_note (insn, REG_EQUAL, orig);
4268 return reg;
4269 }
4270 else if (GET_CODE (orig) == CONST)
4271 {
4272 rtx base, offset;
4273
4274 if (GET_CODE (XEXP (orig, 0)) == PLUS
4275 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4276 return orig;
4277
4278 if (reg == 0)
4279 {
4280 gcc_assert (can_create_pseudo_p ());
4281 reg = gen_reg_rtx (Pmode);
4282 }
4283
4284 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4285 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4286 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4287 base == reg ? NULL_RTX : reg);
4288
4289 if (GET_CODE (offset) == CONST_INT)
4290 {
4291 if (SMALL_INT (offset))
4292 return plus_constant (Pmode, base, INTVAL (offset));
4293 else if (can_create_pseudo_p ())
4294 offset = force_reg (Pmode, offset);
4295 else
4296 /* If we reach here, then something is seriously wrong. */
4297 gcc_unreachable ();
4298 }
4299 return gen_rtx_PLUS (Pmode, base, offset);
4300 }
4301 else if (GET_CODE (orig) == LABEL_REF)
4302 /* ??? We ought to be checking that the register is live instead, in case
4303 it is eliminated. */
4304 crtl->uses_pic_offset_table = 1;
4305
4306 return orig;
4307 }
4308
4309 /* Try machine-dependent ways of modifying an illegitimate address X
4310 to be legitimate. If we find one, return the new, valid address.
4311
4312 OLDX is the address as it was before break_out_memory_refs was called.
4313 In some cases it is useful to look at this to decide what needs to be done.
4314
4315 MODE is the mode of the operand pointed to by X.
4316
4317 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4318
4319 static rtx
4320 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4321 machine_mode mode)
4322 {
4323 rtx orig_x = x;
4324
4325 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4326 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4327 force_operand (XEXP (x, 0), NULL_RTX));
4328 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4329 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4330 force_operand (XEXP (x, 1), NULL_RTX));
4331 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4332 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4333 XEXP (x, 1));
4334 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4335 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4336 force_operand (XEXP (x, 1), NULL_RTX));
4337
4338 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4339 return x;
4340
4341 if (sparc_tls_referenced_p (x))
4342 x = sparc_legitimize_tls_address (x);
4343 else if (flag_pic)
4344 x = sparc_legitimize_pic_address (x, NULL_RTX);
4345 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4346 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4347 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4348 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4349 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4350 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4351 else if (GET_CODE (x) == SYMBOL_REF
4352 || GET_CODE (x) == CONST
4353 || GET_CODE (x) == LABEL_REF)
4354 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4355
4356 return x;
4357 }
4358
4359 /* Delegitimize an address that was legitimized by the above function. */
4360
4361 static rtx
4362 sparc_delegitimize_address (rtx x)
4363 {
4364 x = delegitimize_mem_from_attrs (x);
4365
4366 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4367 switch (XINT (XEXP (x, 1), 1))
4368 {
4369 case UNSPEC_MOVE_PIC:
4370 case UNSPEC_TLSLE:
4371 x = XVECEXP (XEXP (x, 1), 0, 0);
4372 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4373 break;
4374 default:
4375 break;
4376 }
4377
4378 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4379 if (GET_CODE (x) == MINUS
4380 && REG_P (XEXP (x, 0))
4381 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4382 && GET_CODE (XEXP (x, 1)) == LO_SUM
4383 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4384 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4385 {
4386 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4387 gcc_assert (GET_CODE (x) == LABEL_REF);
4388 }
4389
4390 return x;
4391 }
4392
4393 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4394 replace the input X, or the original X if no replacement is called for.
4395 The output parameter *WIN is 1 if the calling macro should goto WIN,
4396 0 if it should not.
4397
4398 For SPARC, we wish to handle addresses by splitting them into
4399 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4400 This cuts the number of extra insns by one.
4401
4402 Do nothing when generating PIC code and the address is a symbolic
4403 operand or requires a scratch register. */
4404
4405 rtx
4406 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4407 int opnum, int type,
4408 int ind_levels ATTRIBUTE_UNUSED, int *win)
4409 {
4410 /* Decompose SImode constants into HIGH+LO_SUM. */
4411 if (CONSTANT_P (x)
4412 && (mode != TFmode || TARGET_ARCH64)
4413 && GET_MODE (x) == SImode
4414 && GET_CODE (x) != LO_SUM
4415 && GET_CODE (x) != HIGH
4416 && sparc_cmodel <= CM_MEDLOW
4417 && !(flag_pic
4418 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4419 {
4420 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4421 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4422 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4423 opnum, (enum reload_type)type);
4424 *win = 1;
4425 return x;
4426 }
4427
4428 /* We have to recognize what we have already generated above. */
4429 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4430 {
4431 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4432 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4433 opnum, (enum reload_type)type);
4434 *win = 1;
4435 return x;
4436 }
4437
4438 *win = 0;
4439 return x;
4440 }
4441
4442 /* Return true if ADDR (a legitimate address expression)
4443 has an effect that depends on the machine mode it is used for.
4444
4445 In PIC mode,
4446
4447 (mem:HI [%l7+a])
4448
4449 is not equivalent to
4450
4451 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4452
4453 because [%l7+a+1] is interpreted as the address of (a+1). */
4454
4455
4456 static bool
4457 sparc_mode_dependent_address_p (const_rtx addr,
4458 addr_space_t as ATTRIBUTE_UNUSED)
4459 {
4460 if (flag_pic && GET_CODE (addr) == PLUS)
4461 {
4462 rtx op0 = XEXP (addr, 0);
4463 rtx op1 = XEXP (addr, 1);
4464 if (op0 == pic_offset_table_rtx
4465 && symbolic_operand (op1, VOIDmode))
4466 return true;
4467 }
4468
4469 return false;
4470 }
4471
4472 #ifdef HAVE_GAS_HIDDEN
4473 # define USE_HIDDEN_LINKONCE 1
4474 #else
4475 # define USE_HIDDEN_LINKONCE 0
4476 #endif
4477
4478 static void
4479 get_pc_thunk_name (char name[32], unsigned int regno)
4480 {
4481 const char *reg_name = reg_names[regno];
4482
4483 /* Skip the leading '%' as that cannot be used in a
4484 symbol name. */
4485 reg_name += 1;
4486
4487 if (USE_HIDDEN_LINKONCE)
4488 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4489 else
4490 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4491 }
4492
4493 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4494
4495 static rtx
4496 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4497 {
4498 int orig_flag_pic = flag_pic;
4499 rtx insn;
4500
4501 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4502 flag_pic = 0;
4503 if (TARGET_ARCH64)
4504 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4505 else
4506 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4507 flag_pic = orig_flag_pic;
4508
4509 return insn;
4510 }
4511
4512 /* Emit code to load the GOT register. */
4513
4514 void
4515 load_got_register (void)
4516 {
4517 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4518 if (!global_offset_table_rtx)
4519 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4520
4521 if (TARGET_VXWORKS_RTP)
4522 emit_insn (gen_vxworks_load_got ());
4523 else
4524 {
4525 /* The GOT symbol is subject to a PC-relative relocation so we need a
4526 helper function to add the PC value and thus get the final value. */
4527 if (!got_helper_rtx)
4528 {
4529 char name[32];
4530 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4531 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4532 }
4533
4534 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4535 got_helper_rtx,
4536 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4537 }
4538
4539 /* Need to emit this whether or not we obey regdecls,
4540 since setjmp/longjmp can cause life info to screw up.
4541 ??? In the case where we don't obey regdecls, this is not sufficient
4542 since we may not fall out the bottom. */
4543 emit_use (global_offset_table_rtx);
4544 }
4545
4546 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4547 address of the call target. */
4548
4549 void
4550 sparc_emit_call_insn (rtx pat, rtx addr)
4551 {
4552 rtx_insn *insn;
4553
4554 insn = emit_call_insn (pat);
4555
4556 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4557 if (TARGET_VXWORKS_RTP
4558 && flag_pic
4559 && GET_CODE (addr) == SYMBOL_REF
4560 && (SYMBOL_REF_DECL (addr)
4561 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4562 : !SYMBOL_REF_LOCAL_P (addr)))
4563 {
4564 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4565 crtl->uses_pic_offset_table = 1;
4566 }
4567 }
4568 \f
4569 /* Return 1 if RTX is a MEM which is known to be aligned to at
4570 least a DESIRED byte boundary. */
4571
4572 int
4573 mem_min_alignment (rtx mem, int desired)
4574 {
4575 rtx addr, base, offset;
4576
4577 /* If it's not a MEM we can't accept it. */
4578 if (GET_CODE (mem) != MEM)
4579 return 0;
4580
4581 /* Obviously... */
4582 if (!TARGET_UNALIGNED_DOUBLES
4583 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4584 return 1;
4585
4586 /* ??? The rest of the function predates MEM_ALIGN so
4587 there is probably a bit of redundancy. */
4588 addr = XEXP (mem, 0);
4589 base = offset = NULL_RTX;
4590 if (GET_CODE (addr) == PLUS)
4591 {
4592 if (GET_CODE (XEXP (addr, 0)) == REG)
4593 {
4594 base = XEXP (addr, 0);
4595
4596 /* What we are saying here is that if the base
4597 REG is aligned properly, the compiler will make
4598 sure any REG based index upon it will be so
4599 as well. */
4600 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4601 offset = XEXP (addr, 1);
4602 else
4603 offset = const0_rtx;
4604 }
4605 }
4606 else if (GET_CODE (addr) == REG)
4607 {
4608 base = addr;
4609 offset = const0_rtx;
4610 }
4611
4612 if (base != NULL_RTX)
4613 {
4614 int regno = REGNO (base);
4615
4616 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4617 {
4618 /* Check if the compiler has recorded some information
4619 about the alignment of the base REG. If reload has
4620 completed, we already matched with proper alignments.
4621 If not running global_alloc, reload might give us
4622 unaligned pointer to local stack though. */
4623 if (((cfun != 0
4624 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4625 || (optimize && reload_completed))
4626 && (INTVAL (offset) & (desired - 1)) == 0)
4627 return 1;
4628 }
4629 else
4630 {
4631 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4632 return 1;
4633 }
4634 }
4635 else if (! TARGET_UNALIGNED_DOUBLES
4636 || CONSTANT_P (addr)
4637 || GET_CODE (addr) == LO_SUM)
4638 {
4639 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4640 is true, in which case we can only assume that an access is aligned if
4641 it is to a constant address, or the address involves a LO_SUM. */
4642 return 1;
4643 }
4644
4645 /* An obviously unaligned address. */
4646 return 0;
4647 }
4648
4649 \f
4650 /* Vectors to keep interesting information about registers where it can easily
4651 be got. We used to use the actual mode value as the bit number, but there
4652 are more than 32 modes now. Instead we use two tables: one indexed by
4653 hard register number, and one indexed by mode. */
4654
4655 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4656 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4657 mapped into one sparc_mode_class mode. */
4658
4659 enum sparc_mode_class {
4660 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4661 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4662 CC_MODE, CCFP_MODE
4663 };
4664
4665 /* Modes for single-word and smaller quantities. */
4666 #define S_MODES \
4667 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4668
4669 /* Modes for double-word and smaller quantities. */
4670 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4671
4672 /* Modes for quad-word and smaller quantities. */
4673 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4674
4675 /* Modes for 8-word and smaller quantities. */
4676 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4677
4678 /* Modes for single-float quantities. */
4679 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4680
4681 /* Modes for double-float and smaller quantities. */
4682 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4683
4684 /* Modes for quad-float and smaller quantities. */
4685 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4686
4687 /* Modes for quad-float pairs and smaller quantities. */
4688 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4689
4690 /* Modes for double-float only quantities. */
4691 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4692
4693 /* Modes for quad-float and double-float only quantities. */
4694 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4695
4696 /* Modes for quad-float pairs and double-float only quantities. */
4697 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4698
4699 /* Modes for condition codes. */
4700 #define CC_MODES (1 << (int) CC_MODE)
4701 #define CCFP_MODES (1 << (int) CCFP_MODE)
4702
4703 /* Value is 1 if register/mode pair is acceptable on sparc.
4704
4705 The funny mixture of D and T modes is because integer operations
4706 do not specially operate on tetra quantities, so non-quad-aligned
4707 registers can hold quadword quantities (except %o4 and %i4 because
4708 they cross fixed registers).
4709
4710 ??? Note that, despite the settings, non-double-aligned parameter
4711 registers can hold double-word quantities in 32-bit mode. */
4712
4713 /* This points to either the 32 bit or the 64 bit version. */
4714 const int *hard_regno_mode_classes;
4715
4716 static const int hard_32bit_mode_classes[] = {
4717 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4718 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4719 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4720 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4721
4722 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4723 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4724 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4725 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4726
4727 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4728 and none can hold SFmode/SImode values. */
4729 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4730 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4731 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4732 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4733
4734 /* %fcc[0123] */
4735 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4736
4737 /* %icc, %sfp, %gsr */
4738 CC_MODES, 0, D_MODES
4739 };
4740
4741 static const int hard_64bit_mode_classes[] = {
4742 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4743 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4744 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4745 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4746
4747 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4748 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4749 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4750 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4751
4752 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4753 and none can hold SFmode/SImode values. */
4754 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4755 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4756 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4757 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4758
4759 /* %fcc[0123] */
4760 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4761
4762 /* %icc, %sfp, %gsr */
4763 CC_MODES, 0, D_MODES
4764 };
4765
4766 int sparc_mode_class [NUM_MACHINE_MODES];
4767
4768 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4769
4770 static void
4771 sparc_init_modes (void)
4772 {
4773 int i;
4774
4775 for (i = 0; i < NUM_MACHINE_MODES; i++)
4776 {
4777 machine_mode m = (machine_mode) i;
4778 unsigned int size = GET_MODE_SIZE (m);
4779
4780 switch (GET_MODE_CLASS (m))
4781 {
4782 case MODE_INT:
4783 case MODE_PARTIAL_INT:
4784 case MODE_COMPLEX_INT:
4785 if (size < 4)
4786 sparc_mode_class[i] = 1 << (int) H_MODE;
4787 else if (size == 4)
4788 sparc_mode_class[i] = 1 << (int) S_MODE;
4789 else if (size == 8)
4790 sparc_mode_class[i] = 1 << (int) D_MODE;
4791 else if (size == 16)
4792 sparc_mode_class[i] = 1 << (int) T_MODE;
4793 else if (size == 32)
4794 sparc_mode_class[i] = 1 << (int) O_MODE;
4795 else
4796 sparc_mode_class[i] = 0;
4797 break;
4798 case MODE_VECTOR_INT:
4799 if (size == 4)
4800 sparc_mode_class[i] = 1 << (int) SF_MODE;
4801 else if (size == 8)
4802 sparc_mode_class[i] = 1 << (int) DF_MODE;
4803 else
4804 sparc_mode_class[i] = 0;
4805 break;
4806 case MODE_FLOAT:
4807 case MODE_COMPLEX_FLOAT:
4808 if (size == 4)
4809 sparc_mode_class[i] = 1 << (int) SF_MODE;
4810 else if (size == 8)
4811 sparc_mode_class[i] = 1 << (int) DF_MODE;
4812 else if (size == 16)
4813 sparc_mode_class[i] = 1 << (int) TF_MODE;
4814 else if (size == 32)
4815 sparc_mode_class[i] = 1 << (int) OF_MODE;
4816 else
4817 sparc_mode_class[i] = 0;
4818 break;
4819 case MODE_CC:
4820 if (m == CCFPmode || m == CCFPEmode)
4821 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4822 else
4823 sparc_mode_class[i] = 1 << (int) CC_MODE;
4824 break;
4825 default:
4826 sparc_mode_class[i] = 0;
4827 break;
4828 }
4829 }
4830
4831 if (TARGET_ARCH64)
4832 hard_regno_mode_classes = hard_64bit_mode_classes;
4833 else
4834 hard_regno_mode_classes = hard_32bit_mode_classes;
4835
4836 /* Initialize the array used by REGNO_REG_CLASS. */
4837 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4838 {
4839 if (i < 16 && TARGET_V8PLUS)
4840 sparc_regno_reg_class[i] = I64_REGS;
4841 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4842 sparc_regno_reg_class[i] = GENERAL_REGS;
4843 else if (i < 64)
4844 sparc_regno_reg_class[i] = FP_REGS;
4845 else if (i < 96)
4846 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4847 else if (i < 100)
4848 sparc_regno_reg_class[i] = FPCC_REGS;
4849 else
4850 sparc_regno_reg_class[i] = NO_REGS;
4851 }
4852 }
4853 \f
4854 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4855
4856 static inline bool
4857 save_global_or_fp_reg_p (unsigned int regno,
4858 int leaf_function ATTRIBUTE_UNUSED)
4859 {
4860 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4861 }
4862
4863 /* Return whether the return address register (%i7) is needed. */
4864
4865 static inline bool
4866 return_addr_reg_needed_p (int leaf_function)
4867 {
4868 /* If it is live, for example because of __builtin_return_address (0). */
4869 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4870 return true;
4871
4872 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4873 if (!leaf_function
4874 /* Loading the GOT register clobbers %o7. */
4875 || crtl->uses_pic_offset_table
4876 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4877 return true;
4878
4879 return false;
4880 }
4881
4882 /* Return whether REGNO, a local or in register, must be saved/restored. */
4883
4884 static bool
4885 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4886 {
4887 /* General case: call-saved registers live at some point. */
4888 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4889 return true;
4890
4891 /* Frame pointer register (%fp) if needed. */
4892 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4893 return true;
4894
4895 /* Return address register (%i7) if needed. */
4896 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4897 return true;
4898
4899 /* GOT register (%l7) if needed. */
4900 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4901 return true;
4902
4903 /* If the function accesses prior frames, the frame pointer and the return
4904 address of the previous frame must be saved on the stack. */
4905 if (crtl->accesses_prior_frames
4906 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4907 return true;
4908
4909 return false;
4910 }
4911
4912 /* Compute the frame size required by the function. This function is called
4913 during the reload pass and also by sparc_expand_prologue. */
4914
4915 HOST_WIDE_INT
4916 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4917 {
4918 HOST_WIDE_INT frame_size, apparent_frame_size;
4919 int args_size, n_global_fp_regs = 0;
4920 bool save_local_in_regs_p = false;
4921 unsigned int i;
4922
4923 /* If the function allocates dynamic stack space, the dynamic offset is
4924 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4925 if (leaf_function && !cfun->calls_alloca)
4926 args_size = 0;
4927 else
4928 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4929
4930 /* Calculate space needed for global registers. */
4931 if (TARGET_ARCH64)
4932 for (i = 0; i < 8; i++)
4933 if (save_global_or_fp_reg_p (i, 0))
4934 n_global_fp_regs += 2;
4935 else
4936 for (i = 0; i < 8; i += 2)
4937 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4938 n_global_fp_regs += 2;
4939
4940 /* In the flat window model, find out which local and in registers need to
4941 be saved. We don't reserve space in the current frame for them as they
4942 will be spilled into the register window save area of the caller's frame.
4943 However, as soon as we use this register window save area, we must create
4944 that of the current frame to make it the live one. */
4945 if (TARGET_FLAT)
4946 for (i = 16; i < 32; i++)
4947 if (save_local_or_in_reg_p (i, leaf_function))
4948 {
4949 save_local_in_regs_p = true;
4950 break;
4951 }
4952
4953 /* Calculate space needed for FP registers. */
4954 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4955 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4956 n_global_fp_regs += 2;
4957
4958 if (size == 0
4959 && n_global_fp_regs == 0
4960 && args_size == 0
4961 && !save_local_in_regs_p)
4962 frame_size = apparent_frame_size = 0;
4963 else
4964 {
4965 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4966 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
4967 apparent_frame_size += n_global_fp_regs * 4;
4968
4969 /* We need to add the size of the outgoing argument area. */
4970 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
4971
4972 /* And that of the register window save area. */
4973 frame_size += FIRST_PARM_OFFSET (cfun->decl);
4974
4975 /* Finally, bump to the appropriate alignment. */
4976 frame_size = SPARC_STACK_ALIGN (frame_size);
4977 }
4978
4979 /* Set up values for use in prologue and epilogue. */
4980 sparc_frame_size = frame_size;
4981 sparc_apparent_frame_size = apparent_frame_size;
4982 sparc_n_global_fp_regs = n_global_fp_regs;
4983 sparc_save_local_in_regs_p = save_local_in_regs_p;
4984
4985 return frame_size;
4986 }
4987
4988 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
4989
4990 int
4991 sparc_initial_elimination_offset (int to)
4992 {
4993 int offset;
4994
4995 if (to == STACK_POINTER_REGNUM)
4996 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
4997 else
4998 offset = 0;
4999
5000 offset += SPARC_STACK_BIAS;
5001 return offset;
5002 }
5003
5004 /* Output any necessary .register pseudo-ops. */
5005
5006 void
5007 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5008 {
5009 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5010 int i;
5011
5012 if (TARGET_ARCH32)
5013 return;
5014
5015 /* Check if %g[2367] were used without
5016 .register being printed for them already. */
5017 for (i = 2; i < 8; i++)
5018 {
5019 if (df_regs_ever_live_p (i)
5020 && ! sparc_hard_reg_printed [i])
5021 {
5022 sparc_hard_reg_printed [i] = 1;
5023 /* %g7 is used as TLS base register, use #ignore
5024 for it instead of #scratch. */
5025 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5026 i == 7 ? "ignore" : "scratch");
5027 }
5028 if (i == 3) i = 5;
5029 }
5030 #endif
5031 }
5032
5033 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5034
5035 #if PROBE_INTERVAL > 4096
5036 #error Cannot use indexed addressing mode for stack probing
5037 #endif
5038
5039 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5040 inclusive. These are offsets from the current stack pointer.
5041
5042 Note that we don't use the REG+REG addressing mode for the probes because
5043 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5044 so the advantages of having a single code win here. */
5045
5046 static void
5047 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5048 {
5049 rtx g1 = gen_rtx_REG (Pmode, 1);
5050
5051 /* See if we have a constant small number of probes to generate. If so,
5052 that's the easy case. */
5053 if (size <= PROBE_INTERVAL)
5054 {
5055 emit_move_insn (g1, GEN_INT (first));
5056 emit_insn (gen_rtx_SET (g1,
5057 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5058 emit_stack_probe (plus_constant (Pmode, g1, -size));
5059 }
5060
5061 /* The run-time loop is made up of 9 insns in the generic case while the
5062 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5063 else if (size <= 4 * PROBE_INTERVAL)
5064 {
5065 HOST_WIDE_INT i;
5066
5067 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5068 emit_insn (gen_rtx_SET (g1,
5069 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5070 emit_stack_probe (g1);
5071
5072 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5073 it exceeds SIZE. If only two probes are needed, this will not
5074 generate any code. Then probe at FIRST + SIZE. */
5075 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5076 {
5077 emit_insn (gen_rtx_SET (g1,
5078 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5079 emit_stack_probe (g1);
5080 }
5081
5082 emit_stack_probe (plus_constant (Pmode, g1,
5083 (i - PROBE_INTERVAL) - size));
5084 }
5085
5086 /* Otherwise, do the same as above, but in a loop. Note that we must be
5087 extra careful with variables wrapping around because we might be at
5088 the very top (or the very bottom) of the address space and we have
5089 to be able to handle this case properly; in particular, we use an
5090 equality test for the loop condition. */
5091 else
5092 {
5093 HOST_WIDE_INT rounded_size;
5094 rtx g4 = gen_rtx_REG (Pmode, 4);
5095
5096 emit_move_insn (g1, GEN_INT (first));
5097
5098
5099 /* Step 1: round SIZE to the previous multiple of the interval. */
5100
5101 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5102 emit_move_insn (g4, GEN_INT (rounded_size));
5103
5104
5105 /* Step 2: compute initial and final value of the loop counter. */
5106
5107 /* TEST_ADDR = SP + FIRST. */
5108 emit_insn (gen_rtx_SET (g1,
5109 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5110
5111 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5112 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5113
5114
5115 /* Step 3: the loop
5116
5117 while (TEST_ADDR != LAST_ADDR)
5118 {
5119 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5120 probe at TEST_ADDR
5121 }
5122
5123 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5124 until it is equal to ROUNDED_SIZE. */
5125
5126 if (TARGET_ARCH64)
5127 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5128 else
5129 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5130
5131
5132 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5133 that SIZE is equal to ROUNDED_SIZE. */
5134
5135 if (size != rounded_size)
5136 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5137 }
5138
5139 /* Make sure nothing is scheduled before we are done. */
5140 emit_insn (gen_blockage ());
5141 }
5142
5143 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5144 absolute addresses. */
5145
5146 const char *
5147 output_probe_stack_range (rtx reg1, rtx reg2)
5148 {
5149 static int labelno = 0;
5150 char loop_lab[32];
5151 rtx xops[2];
5152
5153 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5154
5155 /* Loop. */
5156 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5157
5158 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5159 xops[0] = reg1;
5160 xops[1] = GEN_INT (-PROBE_INTERVAL);
5161 output_asm_insn ("add\t%0, %1, %0", xops);
5162
5163 /* Test if TEST_ADDR == LAST_ADDR. */
5164 xops[1] = reg2;
5165 output_asm_insn ("cmp\t%0, %1", xops);
5166
5167 /* Probe at TEST_ADDR and branch. */
5168 if (TARGET_ARCH64)
5169 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5170 else
5171 fputs ("\tbne\t", asm_out_file);
5172 assemble_name_raw (asm_out_file, loop_lab);
5173 fputc ('\n', asm_out_file);
5174 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5175 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5176
5177 return "";
5178 }
5179
5180 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5181 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5182 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5183 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5184 the action to be performed if it returns false. Return the new offset. */
5185
5186 typedef bool (*sorr_pred_t) (unsigned int, int);
5187 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5188
5189 static int
5190 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5191 int offset, int leaf_function, sorr_pred_t save_p,
5192 sorr_act_t action_true, sorr_act_t action_false)
5193 {
5194 unsigned int i;
5195 rtx mem;
5196 rtx_insn *insn;
5197
5198 if (TARGET_ARCH64 && high <= 32)
5199 {
5200 int fp_offset = -1;
5201
5202 for (i = low; i < high; i++)
5203 {
5204 if (save_p (i, leaf_function))
5205 {
5206 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5207 base, offset));
5208 if (action_true == SORR_SAVE)
5209 {
5210 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5211 RTX_FRAME_RELATED_P (insn) = 1;
5212 }
5213 else /* action_true == SORR_RESTORE */
5214 {
5215 /* The frame pointer must be restored last since its old
5216 value may be used as base address for the frame. This
5217 is problematic in 64-bit mode only because of the lack
5218 of double-word load instruction. */
5219 if (i == HARD_FRAME_POINTER_REGNUM)
5220 fp_offset = offset;
5221 else
5222 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5223 }
5224 offset += 8;
5225 }
5226 else if (action_false == SORR_ADVANCE)
5227 offset += 8;
5228 }
5229
5230 if (fp_offset >= 0)
5231 {
5232 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5233 emit_move_insn (hard_frame_pointer_rtx, mem);
5234 }
5235 }
5236 else
5237 {
5238 for (i = low; i < high; i += 2)
5239 {
5240 bool reg0 = save_p (i, leaf_function);
5241 bool reg1 = save_p (i + 1, leaf_function);
5242 machine_mode mode;
5243 int regno;
5244
5245 if (reg0 && reg1)
5246 {
5247 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5248 regno = i;
5249 }
5250 else if (reg0)
5251 {
5252 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5253 regno = i;
5254 }
5255 else if (reg1)
5256 {
5257 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5258 regno = i + 1;
5259 offset += 4;
5260 }
5261 else
5262 {
5263 if (action_false == SORR_ADVANCE)
5264 offset += 8;
5265 continue;
5266 }
5267
5268 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5269 if (action_true == SORR_SAVE)
5270 {
5271 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5272 RTX_FRAME_RELATED_P (insn) = 1;
5273 if (mode == DImode)
5274 {
5275 rtx set1, set2;
5276 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5277 offset));
5278 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5279 RTX_FRAME_RELATED_P (set1) = 1;
5280 mem
5281 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5282 offset + 4));
5283 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5284 RTX_FRAME_RELATED_P (set2) = 1;
5285 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5286 gen_rtx_PARALLEL (VOIDmode,
5287 gen_rtvec (2, set1, set2)));
5288 }
5289 }
5290 else /* action_true == SORR_RESTORE */
5291 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5292
5293 /* Bump and round down to double word
5294 in case we already bumped by 4. */
5295 offset = ROUND_DOWN (offset + 8, 8);
5296 }
5297 }
5298
5299 return offset;
5300 }
5301
5302 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5303
5304 static rtx
5305 emit_adjust_base_to_offset (rtx base, int offset)
5306 {
5307 /* ??? This might be optimized a little as %g1 might already have a
5308 value close enough that a single add insn will do. */
5309 /* ??? Although, all of this is probably only a temporary fix because
5310 if %g1 can hold a function result, then sparc_expand_epilogue will
5311 lose (the result will be clobbered). */
5312 rtx new_base = gen_rtx_REG (Pmode, 1);
5313 emit_move_insn (new_base, GEN_INT (offset));
5314 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5315 return new_base;
5316 }
5317
5318 /* Emit code to save/restore call-saved global and FP registers. */
5319
5320 static void
5321 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5322 {
5323 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5324 {
5325 base = emit_adjust_base_to_offset (base, offset);
5326 offset = 0;
5327 }
5328
5329 offset
5330 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5331 save_global_or_fp_reg_p, action, SORR_NONE);
5332 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5333 save_global_or_fp_reg_p, action, SORR_NONE);
5334 }
5335
5336 /* Emit code to save/restore call-saved local and in registers. */
5337
5338 static void
5339 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5340 {
5341 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5342 {
5343 base = emit_adjust_base_to_offset (base, offset);
5344 offset = 0;
5345 }
5346
5347 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5348 save_local_or_in_reg_p, action, SORR_ADVANCE);
5349 }
5350
5351 /* Emit a window_save insn. */
5352
5353 static rtx_insn *
5354 emit_window_save (rtx increment)
5355 {
5356 rtx_insn *insn = emit_insn (gen_window_save (increment));
5357 RTX_FRAME_RELATED_P (insn) = 1;
5358
5359 /* The incoming return address (%o7) is saved in %i7. */
5360 add_reg_note (insn, REG_CFA_REGISTER,
5361 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5362 gen_rtx_REG (Pmode,
5363 INCOMING_RETURN_ADDR_REGNUM)));
5364
5365 /* The window save event. */
5366 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5367
5368 /* The CFA is %fp, the hard frame pointer. */
5369 add_reg_note (insn, REG_CFA_DEF_CFA,
5370 plus_constant (Pmode, hard_frame_pointer_rtx,
5371 INCOMING_FRAME_SP_OFFSET));
5372
5373 return insn;
5374 }
5375
5376 /* Generate an increment for the stack pointer. */
5377
5378 static rtx
5379 gen_stack_pointer_inc (rtx increment)
5380 {
5381 return gen_rtx_SET (stack_pointer_rtx,
5382 gen_rtx_PLUS (Pmode,
5383 stack_pointer_rtx,
5384 increment));
5385 }
5386
5387 /* Expand the function prologue. The prologue is responsible for reserving
5388 storage for the frame, saving the call-saved registers and loading the
5389 GOT register if needed. */
5390
5391 void
5392 sparc_expand_prologue (void)
5393 {
5394 HOST_WIDE_INT size;
5395 rtx_insn *insn;
5396
5397 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5398 on the final value of the flag means deferring the prologue/epilogue
5399 expansion until just before the second scheduling pass, which is too
5400 late to emit multiple epilogues or return insns.
5401
5402 Of course we are making the assumption that the value of the flag
5403 will not change between now and its final value. Of the three parts
5404 of the formula, only the last one can reasonably vary. Let's take a
5405 closer look, after assuming that the first two ones are set to true
5406 (otherwise the last value is effectively silenced).
5407
5408 If only_leaf_regs_used returns false, the global predicate will also
5409 be false so the actual frame size calculated below will be positive.
5410 As a consequence, the save_register_window insn will be emitted in
5411 the instruction stream; now this insn explicitly references %fp
5412 which is not a leaf register so only_leaf_regs_used will always
5413 return false subsequently.
5414
5415 If only_leaf_regs_used returns true, we hope that the subsequent
5416 optimization passes won't cause non-leaf registers to pop up. For
5417 example, the regrename pass has special provisions to not rename to
5418 non-leaf registers in a leaf function. */
5419 sparc_leaf_function_p
5420 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5421
5422 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5423
5424 if (flag_stack_usage_info)
5425 current_function_static_stack_size = size;
5426
5427 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5428 {
5429 if (crtl->is_leaf && !cfun->calls_alloca)
5430 {
5431 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5432 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5433 size - STACK_CHECK_PROTECT);
5434 }
5435 else if (size > 0)
5436 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5437 }
5438
5439 if (size == 0)
5440 ; /* do nothing. */
5441 else if (sparc_leaf_function_p)
5442 {
5443 rtx size_int_rtx = GEN_INT (-size);
5444
5445 if (size <= 4096)
5446 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5447 else if (size <= 8192)
5448 {
5449 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5450 RTX_FRAME_RELATED_P (insn) = 1;
5451
5452 /* %sp is still the CFA register. */
5453 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5454 }
5455 else
5456 {
5457 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5458 emit_move_insn (size_rtx, size_int_rtx);
5459 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5460 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5461 gen_stack_pointer_inc (size_int_rtx));
5462 }
5463
5464 RTX_FRAME_RELATED_P (insn) = 1;
5465 }
5466 else
5467 {
5468 rtx size_int_rtx = GEN_INT (-size);
5469
5470 if (size <= 4096)
5471 emit_window_save (size_int_rtx);
5472 else if (size <= 8192)
5473 {
5474 emit_window_save (GEN_INT (-4096));
5475
5476 /* %sp is not the CFA register anymore. */
5477 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5478
5479 /* Make sure no %fp-based store is issued until after the frame is
5480 established. The offset between the frame pointer and the stack
5481 pointer is calculated relative to the value of the stack pointer
5482 at the end of the function prologue, and moving instructions that
5483 access the stack via the frame pointer between the instructions
5484 that decrement the stack pointer could result in accessing the
5485 register window save area, which is volatile. */
5486 emit_insn (gen_frame_blockage ());
5487 }
5488 else
5489 {
5490 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5491 emit_move_insn (size_rtx, size_int_rtx);
5492 emit_window_save (size_rtx);
5493 }
5494 }
5495
5496 if (sparc_leaf_function_p)
5497 {
5498 sparc_frame_base_reg = stack_pointer_rtx;
5499 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5500 }
5501 else
5502 {
5503 sparc_frame_base_reg = hard_frame_pointer_rtx;
5504 sparc_frame_base_offset = SPARC_STACK_BIAS;
5505 }
5506
5507 if (sparc_n_global_fp_regs > 0)
5508 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5509 sparc_frame_base_offset
5510 - sparc_apparent_frame_size,
5511 SORR_SAVE);
5512
5513 /* Load the GOT register if needed. */
5514 if (crtl->uses_pic_offset_table)
5515 load_got_register ();
5516
5517 /* Advertise that the data calculated just above are now valid. */
5518 sparc_prologue_data_valid_p = true;
5519 }
5520
5521 /* Expand the function prologue. The prologue is responsible for reserving
5522 storage for the frame, saving the call-saved registers and loading the
5523 GOT register if needed. */
5524
5525 void
5526 sparc_flat_expand_prologue (void)
5527 {
5528 HOST_WIDE_INT size;
5529 rtx_insn *insn;
5530
5531 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5532
5533 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5534
5535 if (flag_stack_usage_info)
5536 current_function_static_stack_size = size;
5537
5538 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5539 {
5540 if (crtl->is_leaf && !cfun->calls_alloca)
5541 {
5542 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5543 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5544 size - STACK_CHECK_PROTECT);
5545 }
5546 else if (size > 0)
5547 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5548 }
5549
5550 if (sparc_save_local_in_regs_p)
5551 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5552 SORR_SAVE);
5553
5554 if (size == 0)
5555 ; /* do nothing. */
5556 else
5557 {
5558 rtx size_int_rtx, size_rtx;
5559
5560 size_rtx = size_int_rtx = GEN_INT (-size);
5561
5562 /* We establish the frame (i.e. decrement the stack pointer) first, even
5563 if we use a frame pointer, because we cannot clobber any call-saved
5564 registers, including the frame pointer, if we haven't created a new
5565 register save area, for the sake of compatibility with the ABI. */
5566 if (size <= 4096)
5567 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5568 else if (size <= 8192 && !frame_pointer_needed)
5569 {
5570 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5571 RTX_FRAME_RELATED_P (insn) = 1;
5572 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5573 }
5574 else
5575 {
5576 size_rtx = gen_rtx_REG (Pmode, 1);
5577 emit_move_insn (size_rtx, size_int_rtx);
5578 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5579 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5580 gen_stack_pointer_inc (size_int_rtx));
5581 }
5582 RTX_FRAME_RELATED_P (insn) = 1;
5583
5584 /* Ensure nothing is scheduled until after the frame is established. */
5585 emit_insn (gen_blockage ());
5586
5587 if (frame_pointer_needed)
5588 {
5589 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5590 gen_rtx_MINUS (Pmode,
5591 stack_pointer_rtx,
5592 size_rtx)));
5593 RTX_FRAME_RELATED_P (insn) = 1;
5594
5595 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5596 gen_rtx_SET (hard_frame_pointer_rtx,
5597 plus_constant (Pmode, stack_pointer_rtx,
5598 size)));
5599 }
5600
5601 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5602 {
5603 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5604 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5605
5606 insn = emit_move_insn (i7, o7);
5607 RTX_FRAME_RELATED_P (insn) = 1;
5608
5609 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5610
5611 /* Prevent this instruction from ever being considered dead,
5612 even if this function has no epilogue. */
5613 emit_use (i7);
5614 }
5615 }
5616
5617 if (frame_pointer_needed)
5618 {
5619 sparc_frame_base_reg = hard_frame_pointer_rtx;
5620 sparc_frame_base_offset = SPARC_STACK_BIAS;
5621 }
5622 else
5623 {
5624 sparc_frame_base_reg = stack_pointer_rtx;
5625 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5626 }
5627
5628 if (sparc_n_global_fp_regs > 0)
5629 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5630 sparc_frame_base_offset
5631 - sparc_apparent_frame_size,
5632 SORR_SAVE);
5633
5634 /* Load the GOT register if needed. */
5635 if (crtl->uses_pic_offset_table)
5636 load_got_register ();
5637
5638 /* Advertise that the data calculated just above are now valid. */
5639 sparc_prologue_data_valid_p = true;
5640 }
5641
5642 /* This function generates the assembly code for function entry, which boils
5643 down to emitting the necessary .register directives. */
5644
5645 static void
5646 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5647 {
5648 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5649 if (!TARGET_FLAT)
5650 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5651
5652 sparc_output_scratch_registers (file);
5653 }
5654
5655 /* Expand the function epilogue, either normal or part of a sibcall.
5656 We emit all the instructions except the return or the call. */
5657
5658 void
5659 sparc_expand_epilogue (bool for_eh)
5660 {
5661 HOST_WIDE_INT size = sparc_frame_size;
5662
5663 if (sparc_n_global_fp_regs > 0)
5664 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5665 sparc_frame_base_offset
5666 - sparc_apparent_frame_size,
5667 SORR_RESTORE);
5668
5669 if (size == 0 || for_eh)
5670 ; /* do nothing. */
5671 else if (sparc_leaf_function_p)
5672 {
5673 if (size <= 4096)
5674 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5675 else if (size <= 8192)
5676 {
5677 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5678 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5679 }
5680 else
5681 {
5682 rtx reg = gen_rtx_REG (Pmode, 1);
5683 emit_move_insn (reg, GEN_INT (size));
5684 emit_insn (gen_stack_pointer_inc (reg));
5685 }
5686 }
5687 }
5688
5689 /* Expand the function epilogue, either normal or part of a sibcall.
5690 We emit all the instructions except the return or the call. */
5691
5692 void
5693 sparc_flat_expand_epilogue (bool for_eh)
5694 {
5695 HOST_WIDE_INT size = sparc_frame_size;
5696
5697 if (sparc_n_global_fp_regs > 0)
5698 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5699 sparc_frame_base_offset
5700 - sparc_apparent_frame_size,
5701 SORR_RESTORE);
5702
5703 /* If we have a frame pointer, we'll need both to restore it before the
5704 frame is destroyed and use its current value in destroying the frame.
5705 Since we don't have an atomic way to do that in the flat window model,
5706 we save the current value into a temporary register (%g1). */
5707 if (frame_pointer_needed && !for_eh)
5708 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5709
5710 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5711 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5712 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5713
5714 if (sparc_save_local_in_regs_p)
5715 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5716 sparc_frame_base_offset,
5717 SORR_RESTORE);
5718
5719 if (size == 0 || for_eh)
5720 ; /* do nothing. */
5721 else if (frame_pointer_needed)
5722 {
5723 /* Make sure the frame is destroyed after everything else is done. */
5724 emit_insn (gen_blockage ());
5725
5726 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5727 }
5728 else
5729 {
5730 /* Likewise. */
5731 emit_insn (gen_blockage ());
5732
5733 if (size <= 4096)
5734 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5735 else if (size <= 8192)
5736 {
5737 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5738 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5739 }
5740 else
5741 {
5742 rtx reg = gen_rtx_REG (Pmode, 1);
5743 emit_move_insn (reg, GEN_INT (size));
5744 emit_insn (gen_stack_pointer_inc (reg));
5745 }
5746 }
5747 }
5748
5749 /* Return true if it is appropriate to emit `return' instructions in the
5750 body of a function. */
5751
5752 bool
5753 sparc_can_use_return_insn_p (void)
5754 {
5755 return sparc_prologue_data_valid_p
5756 && sparc_n_global_fp_regs == 0
5757 && TARGET_FLAT
5758 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5759 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5760 }
5761
5762 /* This function generates the assembly code for function exit. */
5763
5764 static void
5765 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5766 {
5767 /* If the last two instructions of a function are "call foo; dslot;"
5768 the return address might point to the first instruction in the next
5769 function and we have to output a dummy nop for the sake of sane
5770 backtraces in such cases. This is pointless for sibling calls since
5771 the return address is explicitly adjusted. */
5772
5773 rtx insn, last_real_insn;
5774
5775 insn = get_last_insn ();
5776
5777 last_real_insn = prev_real_insn (insn);
5778 if (last_real_insn
5779 && NONJUMP_INSN_P (last_real_insn)
5780 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5781 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5782
5783 if (last_real_insn
5784 && CALL_P (last_real_insn)
5785 && !SIBLING_CALL_P (last_real_insn))
5786 fputs("\tnop\n", file);
5787
5788 sparc_output_deferred_case_vectors ();
5789 }
5790
5791 /* Output a 'restore' instruction. */
5792
5793 static void
5794 output_restore (rtx pat)
5795 {
5796 rtx operands[3];
5797
5798 if (! pat)
5799 {
5800 fputs ("\t restore\n", asm_out_file);
5801 return;
5802 }
5803
5804 gcc_assert (GET_CODE (pat) == SET);
5805
5806 operands[0] = SET_DEST (pat);
5807 pat = SET_SRC (pat);
5808
5809 switch (GET_CODE (pat))
5810 {
5811 case PLUS:
5812 operands[1] = XEXP (pat, 0);
5813 operands[2] = XEXP (pat, 1);
5814 output_asm_insn (" restore %r1, %2, %Y0", operands);
5815 break;
5816 case LO_SUM:
5817 operands[1] = XEXP (pat, 0);
5818 operands[2] = XEXP (pat, 1);
5819 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5820 break;
5821 case ASHIFT:
5822 operands[1] = XEXP (pat, 0);
5823 gcc_assert (XEXP (pat, 1) == const1_rtx);
5824 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5825 break;
5826 default:
5827 operands[1] = pat;
5828 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5829 break;
5830 }
5831 }
5832
5833 /* Output a return. */
5834
5835 const char *
5836 output_return (rtx_insn *insn)
5837 {
5838 if (crtl->calls_eh_return)
5839 {
5840 /* If the function uses __builtin_eh_return, the eh_return
5841 machinery occupies the delay slot. */
5842 gcc_assert (!final_sequence);
5843
5844 if (flag_delayed_branch)
5845 {
5846 if (!TARGET_FLAT && TARGET_V9)
5847 fputs ("\treturn\t%i7+8\n", asm_out_file);
5848 else
5849 {
5850 if (!TARGET_FLAT)
5851 fputs ("\trestore\n", asm_out_file);
5852
5853 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5854 }
5855
5856 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5857 }
5858 else
5859 {
5860 if (!TARGET_FLAT)
5861 fputs ("\trestore\n", asm_out_file);
5862
5863 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5864 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5865 }
5866 }
5867 else if (sparc_leaf_function_p || TARGET_FLAT)
5868 {
5869 /* This is a leaf or flat function so we don't have to bother restoring
5870 the register window, which frees us from dealing with the convoluted
5871 semantics of restore/return. We simply output the jump to the
5872 return address and the insn in the delay slot (if any). */
5873
5874 return "jmp\t%%o7+%)%#";
5875 }
5876 else
5877 {
5878 /* This is a regular function so we have to restore the register window.
5879 We may have a pending insn for the delay slot, which will be either
5880 combined with the 'restore' instruction or put in the delay slot of
5881 the 'return' instruction. */
5882
5883 if (final_sequence)
5884 {
5885 rtx delay, pat;
5886
5887 delay = NEXT_INSN (insn);
5888 gcc_assert (delay);
5889
5890 pat = PATTERN (delay);
5891
5892 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5893 {
5894 epilogue_renumber (&pat, 0);
5895 return "return\t%%i7+%)%#";
5896 }
5897 else
5898 {
5899 output_asm_insn ("jmp\t%%i7+%)", NULL);
5900 output_restore (pat);
5901 PATTERN (delay) = gen_blockage ();
5902 INSN_CODE (delay) = -1;
5903 }
5904 }
5905 else
5906 {
5907 /* The delay slot is empty. */
5908 if (TARGET_V9)
5909 return "return\t%%i7+%)\n\t nop";
5910 else if (flag_delayed_branch)
5911 return "jmp\t%%i7+%)\n\t restore";
5912 else
5913 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5914 }
5915 }
5916
5917 return "";
5918 }
5919
5920 /* Output a sibling call. */
5921
5922 const char *
5923 output_sibcall (rtx_insn *insn, rtx call_operand)
5924 {
5925 rtx operands[1];
5926
5927 gcc_assert (flag_delayed_branch);
5928
5929 operands[0] = call_operand;
5930
5931 if (sparc_leaf_function_p || TARGET_FLAT)
5932 {
5933 /* This is a leaf or flat function so we don't have to bother restoring
5934 the register window. We simply output the jump to the function and
5935 the insn in the delay slot (if any). */
5936
5937 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5938
5939 if (final_sequence)
5940 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5941 operands);
5942 else
5943 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
5944 it into branch if possible. */
5945 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
5946 operands);
5947 }
5948 else
5949 {
5950 /* This is a regular function so we have to restore the register window.
5951 We may have a pending insn for the delay slot, which will be combined
5952 with the 'restore' instruction. */
5953
5954 output_asm_insn ("call\t%a0, 0", operands);
5955
5956 if (final_sequence)
5957 {
5958 rtx_insn *delay = NEXT_INSN (insn);
5959 gcc_assert (delay);
5960
5961 output_restore (PATTERN (delay));
5962
5963 PATTERN (delay) = gen_blockage ();
5964 INSN_CODE (delay) = -1;
5965 }
5966 else
5967 output_restore (NULL_RTX);
5968 }
5969
5970 return "";
5971 }
5972 \f
5973 /* Functions for handling argument passing.
5974
5975 For 32-bit, the first 6 args are normally in registers and the rest are
5976 pushed. Any arg that starts within the first 6 words is at least
5977 partially passed in a register unless its data type forbids.
5978
5979 For 64-bit, the argument registers are laid out as an array of 16 elements
5980 and arguments are added sequentially. The first 6 int args and up to the
5981 first 16 fp args (depending on size) are passed in regs.
5982
5983 Slot Stack Integral Float Float in structure Double Long Double
5984 ---- ----- -------- ----- ------------------ ------ -----------
5985 15 [SP+248] %f31 %f30,%f31 %d30
5986 14 [SP+240] %f29 %f28,%f29 %d28 %q28
5987 13 [SP+232] %f27 %f26,%f27 %d26
5988 12 [SP+224] %f25 %f24,%f25 %d24 %q24
5989 11 [SP+216] %f23 %f22,%f23 %d22
5990 10 [SP+208] %f21 %f20,%f21 %d20 %q20
5991 9 [SP+200] %f19 %f18,%f19 %d18
5992 8 [SP+192] %f17 %f16,%f17 %d16 %q16
5993 7 [SP+184] %f15 %f14,%f15 %d14
5994 6 [SP+176] %f13 %f12,%f13 %d12 %q12
5995 5 [SP+168] %o5 %f11 %f10,%f11 %d10
5996 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
5997 3 [SP+152] %o3 %f7 %f6,%f7 %d6
5998 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
5999 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6000 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6001
6002 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6003
6004 Integral arguments are always passed as 64-bit quantities appropriately
6005 extended.
6006
6007 Passing of floating point values is handled as follows.
6008 If a prototype is in scope:
6009 If the value is in a named argument (i.e. not a stdarg function or a
6010 value not part of the `...') then the value is passed in the appropriate
6011 fp reg.
6012 If the value is part of the `...' and is passed in one of the first 6
6013 slots then the value is passed in the appropriate int reg.
6014 If the value is part of the `...' and is not passed in one of the first 6
6015 slots then the value is passed in memory.
6016 If a prototype is not in scope:
6017 If the value is one of the first 6 arguments the value is passed in the
6018 appropriate integer reg and the appropriate fp reg.
6019 If the value is not one of the first 6 arguments the value is passed in
6020 the appropriate fp reg and in memory.
6021
6022
6023 Summary of the calling conventions implemented by GCC on the SPARC:
6024
6025 32-bit ABI:
6026 size argument return value
6027
6028 small integer <4 int. reg. int. reg.
6029 word 4 int. reg. int. reg.
6030 double word 8 int. reg. int. reg.
6031
6032 _Complex small integer <8 int. reg. int. reg.
6033 _Complex word 8 int. reg. int. reg.
6034 _Complex double word 16 memory int. reg.
6035
6036 vector integer <=8 int. reg. FP reg.
6037 vector integer >8 memory memory
6038
6039 float 4 int. reg. FP reg.
6040 double 8 int. reg. FP reg.
6041 long double 16 memory memory
6042
6043 _Complex float 8 memory FP reg.
6044 _Complex double 16 memory FP reg.
6045 _Complex long double 32 memory FP reg.
6046
6047 vector float any memory memory
6048
6049 aggregate any memory memory
6050
6051
6052
6053 64-bit ABI:
6054 size argument return value
6055
6056 small integer <8 int. reg. int. reg.
6057 word 8 int. reg. int. reg.
6058 double word 16 int. reg. int. reg.
6059
6060 _Complex small integer <16 int. reg. int. reg.
6061 _Complex word 16 int. reg. int. reg.
6062 _Complex double word 32 memory int. reg.
6063
6064 vector integer <=16 FP reg. FP reg.
6065 vector integer 16<s<=32 memory FP reg.
6066 vector integer >32 memory memory
6067
6068 float 4 FP reg. FP reg.
6069 double 8 FP reg. FP reg.
6070 long double 16 FP reg. FP reg.
6071
6072 _Complex float 8 FP reg. FP reg.
6073 _Complex double 16 FP reg. FP reg.
6074 _Complex long double 32 memory FP reg.
6075
6076 vector float <=16 FP reg. FP reg.
6077 vector float 16<s<=32 memory FP reg.
6078 vector float >32 memory memory
6079
6080 aggregate <=16 reg. reg.
6081 aggregate 16<s<=32 memory reg.
6082 aggregate >32 memory memory
6083
6084
6085
6086 Note #1: complex floating-point types follow the extended SPARC ABIs as
6087 implemented by the Sun compiler.
6088
6089 Note #2: integral vector types follow the scalar floating-point types
6090 conventions to match what is implemented by the Sun VIS SDK.
6091
6092 Note #3: floating-point vector types follow the aggregate types
6093 conventions. */
6094
6095
6096 /* Maximum number of int regs for args. */
6097 #define SPARC_INT_ARG_MAX 6
6098 /* Maximum number of fp regs for args. */
6099 #define SPARC_FP_ARG_MAX 16
6100
6101 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6102
6103 /* Handle the INIT_CUMULATIVE_ARGS macro.
6104 Initialize a variable CUM of type CUMULATIVE_ARGS
6105 for a call to a function whose data type is FNTYPE.
6106 For a library call, FNTYPE is 0. */
6107
6108 void
6109 init_cumulative_args (struct sparc_args *cum, tree fntype,
6110 rtx libname ATTRIBUTE_UNUSED,
6111 tree fndecl ATTRIBUTE_UNUSED)
6112 {
6113 cum->words = 0;
6114 cum->prototype_p = fntype && prototype_p (fntype);
6115 cum->libcall_p = fntype == 0;
6116 }
6117
6118 /* Handle promotion of pointer and integer arguments. */
6119
6120 static machine_mode
6121 sparc_promote_function_mode (const_tree type,
6122 machine_mode mode,
6123 int *punsignedp,
6124 const_tree fntype ATTRIBUTE_UNUSED,
6125 int for_return ATTRIBUTE_UNUSED)
6126 {
6127 if (type != NULL_TREE && POINTER_TYPE_P (type))
6128 {
6129 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6130 return Pmode;
6131 }
6132
6133 /* Integral arguments are passed as full words, as per the ABI. */
6134 if (GET_MODE_CLASS (mode) == MODE_INT
6135 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6136 return word_mode;
6137
6138 return mode;
6139 }
6140
6141 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6142
6143 static bool
6144 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6145 {
6146 return TARGET_ARCH64 ? true : false;
6147 }
6148
6149 /* Scan the record type TYPE and return the following predicates:
6150 - INTREGS_P: the record contains at least one field or sub-field
6151 that is eligible for promotion in integer registers.
6152 - FP_REGS_P: the record contains at least one field or sub-field
6153 that is eligible for promotion in floating-point registers.
6154 - PACKED_P: the record contains at least one field that is packed.
6155
6156 Sub-fields are not taken into account for the PACKED_P predicate. */
6157
6158 static void
6159 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6160 int *packed_p)
6161 {
6162 tree field;
6163
6164 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6165 {
6166 if (TREE_CODE (field) == FIELD_DECL)
6167 {
6168 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6169 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6170 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6171 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6172 && TARGET_FPU)
6173 *fpregs_p = 1;
6174 else
6175 *intregs_p = 1;
6176
6177 if (packed_p && DECL_PACKED (field))
6178 *packed_p = 1;
6179 }
6180 }
6181 }
6182
6183 /* Compute the slot number to pass an argument in.
6184 Return the slot number or -1 if passing on the stack.
6185
6186 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6187 the preceding args and about the function being called.
6188 MODE is the argument's machine mode.
6189 TYPE is the data type of the argument (as a tree).
6190 This is null for libcalls where that information may
6191 not be available.
6192 NAMED is nonzero if this argument is a named parameter
6193 (otherwise it is an extra parameter matching an ellipsis).
6194 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6195 *PREGNO records the register number to use if scalar type.
6196 *PPADDING records the amount of padding needed in words. */
6197
6198 static int
6199 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6200 const_tree type, bool named, bool incoming_p,
6201 int *pregno, int *ppadding)
6202 {
6203 int regbase = (incoming_p
6204 ? SPARC_INCOMING_INT_ARG_FIRST
6205 : SPARC_OUTGOING_INT_ARG_FIRST);
6206 int slotno = cum->words;
6207 enum mode_class mclass;
6208 int regno;
6209
6210 *ppadding = 0;
6211
6212 if (type && TREE_ADDRESSABLE (type))
6213 return -1;
6214
6215 if (TARGET_ARCH32
6216 && mode == BLKmode
6217 && type
6218 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6219 return -1;
6220
6221 /* For SPARC64, objects requiring 16-byte alignment get it. */
6222 if (TARGET_ARCH64
6223 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6224 && (slotno & 1) != 0)
6225 slotno++, *ppadding = 1;
6226
6227 mclass = GET_MODE_CLASS (mode);
6228 if (type && TREE_CODE (type) == VECTOR_TYPE)
6229 {
6230 /* Vector types deserve special treatment because they are
6231 polymorphic wrt their mode, depending upon whether VIS
6232 instructions are enabled. */
6233 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6234 {
6235 /* The SPARC port defines no floating-point vector modes. */
6236 gcc_assert (mode == BLKmode);
6237 }
6238 else
6239 {
6240 /* Integral vector types should either have a vector
6241 mode or an integral mode, because we are guaranteed
6242 by pass_by_reference that their size is not greater
6243 than 16 bytes and TImode is 16-byte wide. */
6244 gcc_assert (mode != BLKmode);
6245
6246 /* Vector integers are handled like floats according to
6247 the Sun VIS SDK. */
6248 mclass = MODE_FLOAT;
6249 }
6250 }
6251
6252 switch (mclass)
6253 {
6254 case MODE_FLOAT:
6255 case MODE_COMPLEX_FLOAT:
6256 case MODE_VECTOR_INT:
6257 if (TARGET_ARCH64 && TARGET_FPU && named)
6258 {
6259 if (slotno >= SPARC_FP_ARG_MAX)
6260 return -1;
6261 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6262 /* Arguments filling only one single FP register are
6263 right-justified in the outer double FP register. */
6264 if (GET_MODE_SIZE (mode) <= 4)
6265 regno++;
6266 break;
6267 }
6268 /* fallthrough */
6269
6270 case MODE_INT:
6271 case MODE_COMPLEX_INT:
6272 if (slotno >= SPARC_INT_ARG_MAX)
6273 return -1;
6274 regno = regbase + slotno;
6275 break;
6276
6277 case MODE_RANDOM:
6278 if (mode == VOIDmode)
6279 /* MODE is VOIDmode when generating the actual call. */
6280 return -1;
6281
6282 gcc_assert (mode == BLKmode);
6283
6284 if (TARGET_ARCH32
6285 || !type
6286 || (TREE_CODE (type) != VECTOR_TYPE
6287 && TREE_CODE (type) != RECORD_TYPE))
6288 {
6289 if (slotno >= SPARC_INT_ARG_MAX)
6290 return -1;
6291 regno = regbase + slotno;
6292 }
6293 else /* TARGET_ARCH64 && type */
6294 {
6295 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6296
6297 /* First see what kinds of registers we would need. */
6298 if (TREE_CODE (type) == VECTOR_TYPE)
6299 fpregs_p = 1;
6300 else
6301 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6302
6303 /* The ABI obviously doesn't specify how packed structures
6304 are passed. These are defined to be passed in int regs
6305 if possible, otherwise memory. */
6306 if (packed_p || !named)
6307 fpregs_p = 0, intregs_p = 1;
6308
6309 /* If all arg slots are filled, then must pass on stack. */
6310 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6311 return -1;
6312
6313 /* If there are only int args and all int arg slots are filled,
6314 then must pass on stack. */
6315 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6316 return -1;
6317
6318 /* Note that even if all int arg slots are filled, fp members may
6319 still be passed in regs if such regs are available.
6320 *PREGNO isn't set because there may be more than one, it's up
6321 to the caller to compute them. */
6322 return slotno;
6323 }
6324 break;
6325
6326 default :
6327 gcc_unreachable ();
6328 }
6329
6330 *pregno = regno;
6331 return slotno;
6332 }
6333
6334 /* Handle recursive register counting for structure field layout. */
6335
6336 struct function_arg_record_value_parms
6337 {
6338 rtx ret; /* return expression being built. */
6339 int slotno; /* slot number of the argument. */
6340 int named; /* whether the argument is named. */
6341 int regbase; /* regno of the base register. */
6342 int stack; /* 1 if part of the argument is on the stack. */
6343 int intoffset; /* offset of the first pending integer field. */
6344 unsigned int nregs; /* number of words passed in registers. */
6345 };
6346
6347 static void function_arg_record_value_3
6348 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6349 static void function_arg_record_value_2
6350 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6351 static void function_arg_record_value_1
6352 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6353 static rtx function_arg_record_value (const_tree, machine_mode, int, int, int);
6354 static rtx function_arg_union_value (int, machine_mode, int, int);
6355
6356 /* A subroutine of function_arg_record_value. Traverse the structure
6357 recursively and determine how many registers will be required. */
6358
6359 static void
6360 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6361 struct function_arg_record_value_parms *parms,
6362 bool packed_p)
6363 {
6364 tree field;
6365
6366 /* We need to compute how many registers are needed so we can
6367 allocate the PARALLEL but before we can do that we need to know
6368 whether there are any packed fields. The ABI obviously doesn't
6369 specify how structures are passed in this case, so they are
6370 defined to be passed in int regs if possible, otherwise memory,
6371 regardless of whether there are fp values present. */
6372
6373 if (! packed_p)
6374 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6375 {
6376 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6377 {
6378 packed_p = true;
6379 break;
6380 }
6381 }
6382
6383 /* Compute how many registers we need. */
6384 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6385 {
6386 if (TREE_CODE (field) == FIELD_DECL)
6387 {
6388 HOST_WIDE_INT bitpos = startbitpos;
6389
6390 if (DECL_SIZE (field) != 0)
6391 {
6392 if (integer_zerop (DECL_SIZE (field)))
6393 continue;
6394
6395 if (tree_fits_uhwi_p (bit_position (field)))
6396 bitpos += int_bit_position (field);
6397 }
6398
6399 /* ??? FIXME: else assume zero offset. */
6400
6401 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6402 function_arg_record_value_1 (TREE_TYPE (field),
6403 bitpos,
6404 parms,
6405 packed_p);
6406 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6407 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6408 && TARGET_FPU
6409 && parms->named
6410 && ! packed_p)
6411 {
6412 if (parms->intoffset != -1)
6413 {
6414 unsigned int startbit, endbit;
6415 int intslots, this_slotno;
6416
6417 startbit = ROUND_DOWN (parms->intoffset, BITS_PER_WORD);
6418 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6419
6420 intslots = (endbit - startbit) / BITS_PER_WORD;
6421 this_slotno = parms->slotno + parms->intoffset
6422 / BITS_PER_WORD;
6423
6424 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6425 {
6426 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6427 /* We need to pass this field on the stack. */
6428 parms->stack = 1;
6429 }
6430
6431 parms->nregs += intslots;
6432 parms->intoffset = -1;
6433 }
6434
6435 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6436 If it wasn't true we wouldn't be here. */
6437 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6438 && DECL_MODE (field) == BLKmode)
6439 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6440 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6441 parms->nregs += 2;
6442 else
6443 parms->nregs += 1;
6444 }
6445 else
6446 {
6447 if (parms->intoffset == -1)
6448 parms->intoffset = bitpos;
6449 }
6450 }
6451 }
6452 }
6453
6454 /* A subroutine of function_arg_record_value. Assign the bits of the
6455 structure between parms->intoffset and bitpos to integer registers. */
6456
6457 static void
6458 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6459 struct function_arg_record_value_parms *parms)
6460 {
6461 machine_mode mode;
6462 unsigned int regno;
6463 unsigned int startbit, endbit;
6464 int this_slotno, intslots, intoffset;
6465 rtx reg;
6466
6467 if (parms->intoffset == -1)
6468 return;
6469
6470 intoffset = parms->intoffset;
6471 parms->intoffset = -1;
6472
6473 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6474 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6475 intslots = (endbit - startbit) / BITS_PER_WORD;
6476 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6477
6478 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6479 if (intslots <= 0)
6480 return;
6481
6482 /* If this is the trailing part of a word, only load that much into
6483 the register. Otherwise load the whole register. Note that in
6484 the latter case we may pick up unwanted bits. It's not a problem
6485 at the moment but may wish to revisit. */
6486
6487 if (intoffset % BITS_PER_WORD != 0)
6488 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6489 MODE_INT);
6490 else
6491 mode = word_mode;
6492
6493 intoffset /= BITS_PER_UNIT;
6494 do
6495 {
6496 regno = parms->regbase + this_slotno;
6497 reg = gen_rtx_REG (mode, regno);
6498 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6499 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6500
6501 this_slotno += 1;
6502 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6503 mode = word_mode;
6504 parms->nregs += 1;
6505 intslots -= 1;
6506 }
6507 while (intslots > 0);
6508 }
6509
6510 /* A subroutine of function_arg_record_value. Traverse the structure
6511 recursively and assign bits to floating point registers. Track which
6512 bits in between need integer registers; invoke function_arg_record_value_3
6513 to make that happen. */
6514
6515 static void
6516 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6517 struct function_arg_record_value_parms *parms,
6518 bool packed_p)
6519 {
6520 tree field;
6521
6522 if (! packed_p)
6523 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6524 {
6525 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6526 {
6527 packed_p = true;
6528 break;
6529 }
6530 }
6531
6532 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6533 {
6534 if (TREE_CODE (field) == FIELD_DECL)
6535 {
6536 HOST_WIDE_INT bitpos = startbitpos;
6537
6538 if (DECL_SIZE (field) != 0)
6539 {
6540 if (integer_zerop (DECL_SIZE (field)))
6541 continue;
6542
6543 if (tree_fits_uhwi_p (bit_position (field)))
6544 bitpos += int_bit_position (field);
6545 }
6546
6547 /* ??? FIXME: else assume zero offset. */
6548
6549 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6550 function_arg_record_value_2 (TREE_TYPE (field),
6551 bitpos,
6552 parms,
6553 packed_p);
6554 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6555 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6556 && TARGET_FPU
6557 && parms->named
6558 && ! packed_p)
6559 {
6560 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6561 int regno, nregs, pos;
6562 machine_mode mode = DECL_MODE (field);
6563 rtx reg;
6564
6565 function_arg_record_value_3 (bitpos, parms);
6566
6567 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6568 && mode == BLKmode)
6569 {
6570 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6571 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6572 }
6573 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6574 {
6575 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6576 nregs = 2;
6577 }
6578 else
6579 nregs = 1;
6580
6581 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6582 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6583 regno++;
6584 reg = gen_rtx_REG (mode, regno);
6585 pos = bitpos / BITS_PER_UNIT;
6586 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6587 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6588 parms->nregs += 1;
6589 while (--nregs > 0)
6590 {
6591 regno += GET_MODE_SIZE (mode) / 4;
6592 reg = gen_rtx_REG (mode, regno);
6593 pos += GET_MODE_SIZE (mode);
6594 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6595 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6596 parms->nregs += 1;
6597 }
6598 }
6599 else
6600 {
6601 if (parms->intoffset == -1)
6602 parms->intoffset = bitpos;
6603 }
6604 }
6605 }
6606 }
6607
6608 /* Used by function_arg and sparc_function_value_1 to implement the complex
6609 conventions of the 64-bit ABI for passing and returning structures.
6610 Return an expression valid as a return value for the FUNCTION_ARG
6611 and TARGET_FUNCTION_VALUE.
6612
6613 TYPE is the data type of the argument (as a tree).
6614 This is null for libcalls where that information may
6615 not be available.
6616 MODE is the argument's machine mode.
6617 SLOTNO is the index number of the argument's slot in the parameter array.
6618 NAMED is nonzero if this argument is a named parameter
6619 (otherwise it is an extra parameter matching an ellipsis).
6620 REGBASE is the regno of the base register for the parameter array. */
6621
6622 static rtx
6623 function_arg_record_value (const_tree type, machine_mode mode,
6624 int slotno, int named, int regbase)
6625 {
6626 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6627 struct function_arg_record_value_parms parms;
6628 unsigned int nregs;
6629
6630 parms.ret = NULL_RTX;
6631 parms.slotno = slotno;
6632 parms.named = named;
6633 parms.regbase = regbase;
6634 parms.stack = 0;
6635
6636 /* Compute how many registers we need. */
6637 parms.nregs = 0;
6638 parms.intoffset = 0;
6639 function_arg_record_value_1 (type, 0, &parms, false);
6640
6641 /* Take into account pending integer fields. */
6642 if (parms.intoffset != -1)
6643 {
6644 unsigned int startbit, endbit;
6645 int intslots, this_slotno;
6646
6647 startbit = ROUND_DOWN (parms.intoffset, BITS_PER_WORD);
6648 endbit = ROUND_UP (typesize*BITS_PER_UNIT, BITS_PER_WORD);
6649 intslots = (endbit - startbit) / BITS_PER_WORD;
6650 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6651
6652 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6653 {
6654 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6655 /* We need to pass this field on the stack. */
6656 parms.stack = 1;
6657 }
6658
6659 parms.nregs += intslots;
6660 }
6661 nregs = parms.nregs;
6662
6663 /* Allocate the vector and handle some annoying special cases. */
6664 if (nregs == 0)
6665 {
6666 /* ??? Empty structure has no value? Duh? */
6667 if (typesize <= 0)
6668 {
6669 /* Though there's nothing really to store, return a word register
6670 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6671 leads to breakage due to the fact that there are zero bytes to
6672 load. */
6673 return gen_rtx_REG (mode, regbase);
6674 }
6675 else
6676 {
6677 /* ??? C++ has structures with no fields, and yet a size. Give up
6678 for now and pass everything back in integer registers. */
6679 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6680 }
6681 if (nregs + slotno > SPARC_INT_ARG_MAX)
6682 nregs = SPARC_INT_ARG_MAX - slotno;
6683 }
6684 gcc_assert (nregs != 0);
6685
6686 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6687
6688 /* If at least one field must be passed on the stack, generate
6689 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6690 also be passed on the stack. We can't do much better because the
6691 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6692 of structures for which the fields passed exclusively in registers
6693 are not at the beginning of the structure. */
6694 if (parms.stack)
6695 XVECEXP (parms.ret, 0, 0)
6696 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6697
6698 /* Fill in the entries. */
6699 parms.nregs = 0;
6700 parms.intoffset = 0;
6701 function_arg_record_value_2 (type, 0, &parms, false);
6702 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6703
6704 gcc_assert (parms.nregs == nregs);
6705
6706 return parms.ret;
6707 }
6708
6709 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6710 of the 64-bit ABI for passing and returning unions.
6711 Return an expression valid as a return value for the FUNCTION_ARG
6712 and TARGET_FUNCTION_VALUE.
6713
6714 SIZE is the size in bytes of the union.
6715 MODE is the argument's machine mode.
6716 REGNO is the hard register the union will be passed in. */
6717
6718 static rtx
6719 function_arg_union_value (int size, machine_mode mode, int slotno,
6720 int regno)
6721 {
6722 int nwords = ROUND_ADVANCE (size), i;
6723 rtx regs;
6724
6725 /* See comment in previous function for empty structures. */
6726 if (nwords == 0)
6727 return gen_rtx_REG (mode, regno);
6728
6729 if (slotno == SPARC_INT_ARG_MAX - 1)
6730 nwords = 1;
6731
6732 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6733
6734 for (i = 0; i < nwords; i++)
6735 {
6736 /* Unions are passed left-justified. */
6737 XVECEXP (regs, 0, i)
6738 = gen_rtx_EXPR_LIST (VOIDmode,
6739 gen_rtx_REG (word_mode, regno),
6740 GEN_INT (UNITS_PER_WORD * i));
6741 regno++;
6742 }
6743
6744 return regs;
6745 }
6746
6747 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6748 for passing and returning BLKmode vectors.
6749 Return an expression valid as a return value for the FUNCTION_ARG
6750 and TARGET_FUNCTION_VALUE.
6751
6752 SIZE is the size in bytes of the vector.
6753 REGNO is the FP hard register the vector will be passed in. */
6754
6755 static rtx
6756 function_arg_vector_value (int size, int regno)
6757 {
6758 const int nregs = MAX (1, size / 8);
6759 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6760
6761 if (size < 8)
6762 XVECEXP (regs, 0, 0)
6763 = gen_rtx_EXPR_LIST (VOIDmode,
6764 gen_rtx_REG (SImode, regno),
6765 const0_rtx);
6766 else
6767 for (int i = 0; i < nregs; i++)
6768 XVECEXP (regs, 0, i)
6769 = gen_rtx_EXPR_LIST (VOIDmode,
6770 gen_rtx_REG (DImode, regno + 2*i),
6771 GEN_INT (i*8));
6772
6773 return regs;
6774 }
6775
6776 /* Determine where to put an argument to a function.
6777 Value is zero to push the argument on the stack,
6778 or a hard register in which to store the argument.
6779
6780 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6781 the preceding args and about the function being called.
6782 MODE is the argument's machine mode.
6783 TYPE is the data type of the argument (as a tree).
6784 This is null for libcalls where that information may
6785 not be available.
6786 NAMED is true if this argument is a named parameter
6787 (otherwise it is an extra parameter matching an ellipsis).
6788 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6789 TARGET_FUNCTION_INCOMING_ARG. */
6790
6791 static rtx
6792 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6793 const_tree type, bool named, bool incoming_p)
6794 {
6795 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6796
6797 int regbase = (incoming_p
6798 ? SPARC_INCOMING_INT_ARG_FIRST
6799 : SPARC_OUTGOING_INT_ARG_FIRST);
6800 int slotno, regno, padding;
6801 enum mode_class mclass = GET_MODE_CLASS (mode);
6802
6803 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6804 &regno, &padding);
6805 if (slotno == -1)
6806 return 0;
6807
6808 /* Vector types deserve special treatment because they are polymorphic wrt
6809 their mode, depending upon whether VIS instructions are enabled. */
6810 if (type && TREE_CODE (type) == VECTOR_TYPE)
6811 {
6812 HOST_WIDE_INT size = int_size_in_bytes (type);
6813 gcc_assert ((TARGET_ARCH32 && size <= 8)
6814 || (TARGET_ARCH64 && size <= 16));
6815
6816 if (mode == BLKmode)
6817 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6818
6819 mclass = MODE_FLOAT;
6820 }
6821
6822 if (TARGET_ARCH32)
6823 return gen_rtx_REG (mode, regno);
6824
6825 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6826 and are promoted to registers if possible. */
6827 if (type && TREE_CODE (type) == RECORD_TYPE)
6828 {
6829 HOST_WIDE_INT size = int_size_in_bytes (type);
6830 gcc_assert (size <= 16);
6831
6832 return function_arg_record_value (type, mode, slotno, named, regbase);
6833 }
6834
6835 /* Unions up to 16 bytes in size are passed in integer registers. */
6836 else if (type && TREE_CODE (type) == UNION_TYPE)
6837 {
6838 HOST_WIDE_INT size = int_size_in_bytes (type);
6839 gcc_assert (size <= 16);
6840
6841 return function_arg_union_value (size, mode, slotno, regno);
6842 }
6843
6844 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6845 but also have the slot allocated for them.
6846 If no prototype is in scope fp values in register slots get passed
6847 in two places, either fp regs and int regs or fp regs and memory. */
6848 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6849 && SPARC_FP_REG_P (regno))
6850 {
6851 rtx reg = gen_rtx_REG (mode, regno);
6852 if (cum->prototype_p || cum->libcall_p)
6853 {
6854 /* "* 2" because fp reg numbers are recorded in 4 byte
6855 quantities. */
6856 #if 0
6857 /* ??? This will cause the value to be passed in the fp reg and
6858 in the stack. When a prototype exists we want to pass the
6859 value in the reg but reserve space on the stack. That's an
6860 optimization, and is deferred [for a bit]. */
6861 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6862 return gen_rtx_PARALLEL (mode,
6863 gen_rtvec (2,
6864 gen_rtx_EXPR_LIST (VOIDmode,
6865 NULL_RTX, const0_rtx),
6866 gen_rtx_EXPR_LIST (VOIDmode,
6867 reg, const0_rtx)));
6868 else
6869 #else
6870 /* ??? It seems that passing back a register even when past
6871 the area declared by REG_PARM_STACK_SPACE will allocate
6872 space appropriately, and will not copy the data onto the
6873 stack, exactly as we desire.
6874
6875 This is due to locate_and_pad_parm being called in
6876 expand_call whenever reg_parm_stack_space > 0, which
6877 while beneficial to our example here, would seem to be
6878 in error from what had been intended. Ho hum... -- r~ */
6879 #endif
6880 return reg;
6881 }
6882 else
6883 {
6884 rtx v0, v1;
6885
6886 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6887 {
6888 int intreg;
6889
6890 /* On incoming, we don't need to know that the value
6891 is passed in %f0 and %i0, and it confuses other parts
6892 causing needless spillage even on the simplest cases. */
6893 if (incoming_p)
6894 return reg;
6895
6896 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6897 + (regno - SPARC_FP_ARG_FIRST) / 2);
6898
6899 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6900 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6901 const0_rtx);
6902 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6903 }
6904 else
6905 {
6906 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6907 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6908 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6909 }
6910 }
6911 }
6912
6913 /* All other aggregate types are passed in an integer register in a mode
6914 corresponding to the size of the type. */
6915 else if (type && AGGREGATE_TYPE_P (type))
6916 {
6917 HOST_WIDE_INT size = int_size_in_bytes (type);
6918 gcc_assert (size <= 16);
6919
6920 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6921 }
6922
6923 return gen_rtx_REG (mode, regno);
6924 }
6925
6926 /* Handle the TARGET_FUNCTION_ARG target hook. */
6927
6928 static rtx
6929 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
6930 const_tree type, bool named)
6931 {
6932 return sparc_function_arg_1 (cum, mode, type, named, false);
6933 }
6934
6935 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6936
6937 static rtx
6938 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
6939 const_tree type, bool named)
6940 {
6941 return sparc_function_arg_1 (cum, mode, type, named, true);
6942 }
6943
6944 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6945
6946 static unsigned int
6947 sparc_function_arg_boundary (machine_mode mode, const_tree type)
6948 {
6949 return ((TARGET_ARCH64
6950 && (GET_MODE_ALIGNMENT (mode) == 128
6951 || (type && TYPE_ALIGN (type) == 128)))
6952 ? 128
6953 : PARM_BOUNDARY);
6954 }
6955
6956 /* For an arg passed partly in registers and partly in memory,
6957 this is the number of bytes of registers used.
6958 For args passed entirely in registers or entirely in memory, zero.
6959
6960 Any arg that starts in the first 6 regs but won't entirely fit in them
6961 needs partial registers on v8. On v9, structures with integer
6962 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6963 values that begin in the last fp reg [where "last fp reg" varies with the
6964 mode] will be split between that reg and memory. */
6965
6966 static int
6967 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
6968 tree type, bool named)
6969 {
6970 int slotno, regno, padding;
6971
6972 /* We pass false for incoming_p here, it doesn't matter. */
6973 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6974 false, &regno, &padding);
6975
6976 if (slotno == -1)
6977 return 0;
6978
6979 if (TARGET_ARCH32)
6980 {
6981 if ((slotno + (mode == BLKmode
6982 ? ROUND_ADVANCE (int_size_in_bytes (type))
6983 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
6984 > SPARC_INT_ARG_MAX)
6985 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
6986 }
6987 else
6988 {
6989 /* We are guaranteed by pass_by_reference that the size of the
6990 argument is not greater than 16 bytes, so we only need to return
6991 one word if the argument is partially passed in registers. */
6992
6993 if (type && AGGREGATE_TYPE_P (type))
6994 {
6995 int size = int_size_in_bytes (type);
6996
6997 if (size > UNITS_PER_WORD
6998 && slotno == SPARC_INT_ARG_MAX - 1)
6999 return UNITS_PER_WORD;
7000 }
7001 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7002 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7003 && ! (TARGET_FPU && named)))
7004 {
7005 /* The complex types are passed as packed types. */
7006 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7007 && slotno == SPARC_INT_ARG_MAX - 1)
7008 return UNITS_PER_WORD;
7009 }
7010 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7011 {
7012 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7013 > SPARC_FP_ARG_MAX)
7014 return UNITS_PER_WORD;
7015 }
7016 }
7017
7018 return 0;
7019 }
7020
7021 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7022 Specify whether to pass the argument by reference. */
7023
7024 static bool
7025 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7026 machine_mode mode, const_tree type,
7027 bool named ATTRIBUTE_UNUSED)
7028 {
7029 if (TARGET_ARCH32)
7030 /* Original SPARC 32-bit ABI says that structures and unions,
7031 and quad-precision floats are passed by reference. For Pascal,
7032 also pass arrays by reference. All other base types are passed
7033 in registers.
7034
7035 Extended ABI (as implemented by the Sun compiler) says that all
7036 complex floats are passed by reference. Pass complex integers
7037 in registers up to 8 bytes. More generally, enforce the 2-word
7038 cap for passing arguments in registers.
7039
7040 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7041 integers are passed like floats of the same size, that is in
7042 registers up to 8 bytes. Pass all vector floats by reference
7043 like structure and unions. */
7044 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7045 || mode == SCmode
7046 /* Catch CDImode, TFmode, DCmode and TCmode. */
7047 || GET_MODE_SIZE (mode) > 8
7048 || (type
7049 && TREE_CODE (type) == VECTOR_TYPE
7050 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7051 else
7052 /* Original SPARC 64-bit ABI says that structures and unions
7053 smaller than 16 bytes are passed in registers, as well as
7054 all other base types.
7055
7056 Extended ABI (as implemented by the Sun compiler) says that
7057 complex floats are passed in registers up to 16 bytes. Pass
7058 all complex integers in registers up to 16 bytes. More generally,
7059 enforce the 2-word cap for passing arguments in registers.
7060
7061 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7062 integers are passed like floats of the same size, that is in
7063 registers (up to 16 bytes). Pass all vector floats like structure
7064 and unions. */
7065 return ((type
7066 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7067 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7068 /* Catch CTImode and TCmode. */
7069 || GET_MODE_SIZE (mode) > 16);
7070 }
7071
7072 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7073 Update the data in CUM to advance over an argument
7074 of mode MODE and data type TYPE.
7075 TYPE is null for libcalls where that information may not be available. */
7076
7077 static void
7078 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7079 const_tree type, bool named)
7080 {
7081 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7082 int regno, padding;
7083
7084 /* We pass false for incoming_p here, it doesn't matter. */
7085 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7086
7087 /* If argument requires leading padding, add it. */
7088 cum->words += padding;
7089
7090 if (TARGET_ARCH32)
7091 {
7092 cum->words += (mode != BLKmode
7093 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7094 : ROUND_ADVANCE (int_size_in_bytes (type)));
7095 }
7096 else
7097 {
7098 if (type && AGGREGATE_TYPE_P (type))
7099 {
7100 int size = int_size_in_bytes (type);
7101
7102 if (size <= 8)
7103 ++cum->words;
7104 else if (size <= 16)
7105 cum->words += 2;
7106 else /* passed by reference */
7107 ++cum->words;
7108 }
7109 else
7110 {
7111 cum->words += (mode != BLKmode
7112 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7113 : ROUND_ADVANCE (int_size_in_bytes (type)));
7114 }
7115 }
7116 }
7117
7118 /* Handle the FUNCTION_ARG_PADDING macro.
7119 For the 64 bit ABI structs are always stored left shifted in their
7120 argument slot. */
7121
7122 enum direction
7123 function_arg_padding (machine_mode mode, const_tree type)
7124 {
7125 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7126 return upward;
7127
7128 /* Fall back to the default. */
7129 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7130 }
7131
7132 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7133 Specify whether to return the return value in memory. */
7134
7135 static bool
7136 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7137 {
7138 if (TARGET_ARCH32)
7139 /* Original SPARC 32-bit ABI says that structures and unions,
7140 and quad-precision floats are returned in memory. All other
7141 base types are returned in registers.
7142
7143 Extended ABI (as implemented by the Sun compiler) says that
7144 all complex floats are returned in registers (8 FP registers
7145 at most for '_Complex long double'). Return all complex integers
7146 in registers (4 at most for '_Complex long long').
7147
7148 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7149 integers are returned like floats of the same size, that is in
7150 registers up to 8 bytes and in memory otherwise. Return all
7151 vector floats in memory like structure and unions; note that
7152 they always have BLKmode like the latter. */
7153 return (TYPE_MODE (type) == BLKmode
7154 || TYPE_MODE (type) == TFmode
7155 || (TREE_CODE (type) == VECTOR_TYPE
7156 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7157 else
7158 /* Original SPARC 64-bit ABI says that structures and unions
7159 smaller than 32 bytes are returned in registers, as well as
7160 all other base types.
7161
7162 Extended ABI (as implemented by the Sun compiler) says that all
7163 complex floats are returned in registers (8 FP registers at most
7164 for '_Complex long double'). Return all complex integers in
7165 registers (4 at most for '_Complex TItype').
7166
7167 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7168 integers are returned like floats of the same size, that is in
7169 registers. Return all vector floats like structure and unions;
7170 note that they always have BLKmode like the latter. */
7171 return (TYPE_MODE (type) == BLKmode
7172 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7173 }
7174
7175 /* Handle the TARGET_STRUCT_VALUE target hook.
7176 Return where to find the structure return value address. */
7177
7178 static rtx
7179 sparc_struct_value_rtx (tree fndecl, int incoming)
7180 {
7181 if (TARGET_ARCH64)
7182 return 0;
7183 else
7184 {
7185 rtx mem;
7186
7187 if (incoming)
7188 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7189 STRUCT_VALUE_OFFSET));
7190 else
7191 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7192 STRUCT_VALUE_OFFSET));
7193
7194 /* Only follow the SPARC ABI for fixed-size structure returns.
7195 Variable size structure returns are handled per the normal
7196 procedures in GCC. This is enabled by -mstd-struct-return */
7197 if (incoming == 2
7198 && sparc_std_struct_return
7199 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7200 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7201 {
7202 /* We must check and adjust the return address, as it is
7203 optional as to whether the return object is really
7204 provided. */
7205 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7206 rtx scratch = gen_reg_rtx (SImode);
7207 rtx_code_label *endlab = gen_label_rtx ();
7208
7209 /* Calculate the return object size */
7210 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7211 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7212 /* Construct a temporary return value */
7213 rtx temp_val
7214 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7215
7216 /* Implement SPARC 32-bit psABI callee return struct checking:
7217
7218 Fetch the instruction where we will return to and see if
7219 it's an unimp instruction (the most significant 10 bits
7220 will be zero). */
7221 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7222 plus_constant (Pmode,
7223 ret_reg, 8)));
7224 /* Assume the size is valid and pre-adjust */
7225 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7226 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7227 0, endlab);
7228 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7229 /* Write the address of the memory pointed to by temp_val into
7230 the memory pointed to by mem */
7231 emit_move_insn (mem, XEXP (temp_val, 0));
7232 emit_label (endlab);
7233 }
7234
7235 return mem;
7236 }
7237 }
7238
7239 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7240 For v9, function return values are subject to the same rules as arguments,
7241 except that up to 32 bytes may be returned in registers. */
7242
7243 static rtx
7244 sparc_function_value_1 (const_tree type, machine_mode mode,
7245 bool outgoing)
7246 {
7247 /* Beware that the two values are swapped here wrt function_arg. */
7248 int regbase = (outgoing
7249 ? SPARC_INCOMING_INT_ARG_FIRST
7250 : SPARC_OUTGOING_INT_ARG_FIRST);
7251 enum mode_class mclass = GET_MODE_CLASS (mode);
7252 int regno;
7253
7254 /* Vector types deserve special treatment because they are polymorphic wrt
7255 their mode, depending upon whether VIS instructions are enabled. */
7256 if (type && TREE_CODE (type) == VECTOR_TYPE)
7257 {
7258 HOST_WIDE_INT size = int_size_in_bytes (type);
7259 gcc_assert ((TARGET_ARCH32 && size <= 8)
7260 || (TARGET_ARCH64 && size <= 32));
7261
7262 if (mode == BLKmode)
7263 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7264
7265 mclass = MODE_FLOAT;
7266 }
7267
7268 if (TARGET_ARCH64 && type)
7269 {
7270 /* Structures up to 32 bytes in size are returned in registers. */
7271 if (TREE_CODE (type) == RECORD_TYPE)
7272 {
7273 HOST_WIDE_INT size = int_size_in_bytes (type);
7274 gcc_assert (size <= 32);
7275
7276 return function_arg_record_value (type, mode, 0, 1, regbase);
7277 }
7278
7279 /* Unions up to 32 bytes in size are returned in integer registers. */
7280 else if (TREE_CODE (type) == UNION_TYPE)
7281 {
7282 HOST_WIDE_INT size = int_size_in_bytes (type);
7283 gcc_assert (size <= 32);
7284
7285 return function_arg_union_value (size, mode, 0, regbase);
7286 }
7287
7288 /* Objects that require it are returned in FP registers. */
7289 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7290 ;
7291
7292 /* All other aggregate types are returned in an integer register in a
7293 mode corresponding to the size of the type. */
7294 else if (AGGREGATE_TYPE_P (type))
7295 {
7296 /* All other aggregate types are passed in an integer register
7297 in a mode corresponding to the size of the type. */
7298 HOST_WIDE_INT size = int_size_in_bytes (type);
7299 gcc_assert (size <= 32);
7300
7301 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7302
7303 /* ??? We probably should have made the same ABI change in
7304 3.4.0 as the one we made for unions. The latter was
7305 required by the SCD though, while the former is not
7306 specified, so we favored compatibility and efficiency.
7307
7308 Now we're stuck for aggregates larger than 16 bytes,
7309 because OImode vanished in the meantime. Let's not
7310 try to be unduly clever, and simply follow the ABI
7311 for unions in that case. */
7312 if (mode == BLKmode)
7313 return function_arg_union_value (size, mode, 0, regbase);
7314 else
7315 mclass = MODE_INT;
7316 }
7317
7318 /* We should only have pointer and integer types at this point. This
7319 must match sparc_promote_function_mode. */
7320 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7321 mode = word_mode;
7322 }
7323
7324 /* We should only have pointer and integer types at this point, except with
7325 -freg-struct-return. This must match sparc_promote_function_mode. */
7326 else if (TARGET_ARCH32
7327 && !(type && AGGREGATE_TYPE_P (type))
7328 && mclass == MODE_INT
7329 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7330 mode = word_mode;
7331
7332 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7333 regno = SPARC_FP_ARG_FIRST;
7334 else
7335 regno = regbase;
7336
7337 return gen_rtx_REG (mode, regno);
7338 }
7339
7340 /* Handle TARGET_FUNCTION_VALUE.
7341 On the SPARC, the value is found in the first "output" register, but the
7342 called function leaves it in the first "input" register. */
7343
7344 static rtx
7345 sparc_function_value (const_tree valtype,
7346 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7347 bool outgoing)
7348 {
7349 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7350 }
7351
7352 /* Handle TARGET_LIBCALL_VALUE. */
7353
7354 static rtx
7355 sparc_libcall_value (machine_mode mode,
7356 const_rtx fun ATTRIBUTE_UNUSED)
7357 {
7358 return sparc_function_value_1 (NULL_TREE, mode, false);
7359 }
7360
7361 /* Handle FUNCTION_VALUE_REGNO_P.
7362 On the SPARC, the first "output" reg is used for integer values, and the
7363 first floating point register is used for floating point values. */
7364
7365 static bool
7366 sparc_function_value_regno_p (const unsigned int regno)
7367 {
7368 return (regno == 8 || (TARGET_FPU && regno == 32));
7369 }
7370
7371 /* Do what is necessary for `va_start'. We look at the current function
7372 to determine if stdarg or varargs is used and return the address of
7373 the first unnamed parameter. */
7374
7375 static rtx
7376 sparc_builtin_saveregs (void)
7377 {
7378 int first_reg = crtl->args.info.words;
7379 rtx address;
7380 int regno;
7381
7382 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7383 emit_move_insn (gen_rtx_MEM (word_mode,
7384 gen_rtx_PLUS (Pmode,
7385 frame_pointer_rtx,
7386 GEN_INT (FIRST_PARM_OFFSET (0)
7387 + (UNITS_PER_WORD
7388 * regno)))),
7389 gen_rtx_REG (word_mode,
7390 SPARC_INCOMING_INT_ARG_FIRST + regno));
7391
7392 address = gen_rtx_PLUS (Pmode,
7393 frame_pointer_rtx,
7394 GEN_INT (FIRST_PARM_OFFSET (0)
7395 + UNITS_PER_WORD * first_reg));
7396
7397 return address;
7398 }
7399
7400 /* Implement `va_start' for stdarg. */
7401
7402 static void
7403 sparc_va_start (tree valist, rtx nextarg)
7404 {
7405 nextarg = expand_builtin_saveregs ();
7406 std_expand_builtin_va_start (valist, nextarg);
7407 }
7408
7409 /* Implement `va_arg' for stdarg. */
7410
7411 static tree
7412 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7413 gimple_seq *post_p)
7414 {
7415 HOST_WIDE_INT size, rsize, align;
7416 tree addr, incr;
7417 bool indirect;
7418 tree ptrtype = build_pointer_type (type);
7419
7420 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7421 {
7422 indirect = true;
7423 size = rsize = UNITS_PER_WORD;
7424 align = 0;
7425 }
7426 else
7427 {
7428 indirect = false;
7429 size = int_size_in_bytes (type);
7430 rsize = ROUND_UP (size, UNITS_PER_WORD);
7431 align = 0;
7432
7433 if (TARGET_ARCH64)
7434 {
7435 /* For SPARC64, objects requiring 16-byte alignment get it. */
7436 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7437 align = 2 * UNITS_PER_WORD;
7438
7439 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7440 are left-justified in their slots. */
7441 if (AGGREGATE_TYPE_P (type))
7442 {
7443 if (size == 0)
7444 size = rsize = UNITS_PER_WORD;
7445 else
7446 size = rsize;
7447 }
7448 }
7449 }
7450
7451 incr = valist;
7452 if (align)
7453 {
7454 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7455 incr = fold_convert (sizetype, incr);
7456 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7457 size_int (-align));
7458 incr = fold_convert (ptr_type_node, incr);
7459 }
7460
7461 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7462 addr = incr;
7463
7464 if (BYTES_BIG_ENDIAN && size < rsize)
7465 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7466
7467 if (indirect)
7468 {
7469 addr = fold_convert (build_pointer_type (ptrtype), addr);
7470 addr = build_va_arg_indirect_ref (addr);
7471 }
7472
7473 /* If the address isn't aligned properly for the type, we need a temporary.
7474 FIXME: This is inefficient, usually we can do this in registers. */
7475 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7476 {
7477 tree tmp = create_tmp_var (type, "va_arg_tmp");
7478 tree dest_addr = build_fold_addr_expr (tmp);
7479 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7480 3, dest_addr, addr, size_int (rsize));
7481 TREE_ADDRESSABLE (tmp) = 1;
7482 gimplify_and_add (copy, pre_p);
7483 addr = dest_addr;
7484 }
7485
7486 else
7487 addr = fold_convert (ptrtype, addr);
7488
7489 incr = fold_build_pointer_plus_hwi (incr, rsize);
7490 gimplify_assign (valist, incr, post_p);
7491
7492 return build_va_arg_indirect_ref (addr);
7493 }
7494 \f
7495 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7496 Specify whether the vector mode is supported by the hardware. */
7497
7498 static bool
7499 sparc_vector_mode_supported_p (machine_mode mode)
7500 {
7501 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7502 }
7503 \f
7504 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7505
7506 static machine_mode
7507 sparc_preferred_simd_mode (machine_mode mode)
7508 {
7509 if (TARGET_VIS)
7510 switch (mode)
7511 {
7512 case SImode:
7513 return V2SImode;
7514 case HImode:
7515 return V4HImode;
7516 case QImode:
7517 return V8QImode;
7518
7519 default:;
7520 }
7521
7522 return word_mode;
7523 }
7524 \f
7525 /* Return the string to output an unconditional branch to LABEL, which is
7526 the operand number of the label.
7527
7528 DEST is the destination insn (i.e. the label), INSN is the source. */
7529
7530 const char *
7531 output_ubranch (rtx dest, rtx_insn *insn)
7532 {
7533 static char string[64];
7534 bool v9_form = false;
7535 int delta;
7536 char *p;
7537
7538 /* Even if we are trying to use cbcond for this, evaluate
7539 whether we can use V9 branches as our backup plan. */
7540
7541 delta = 5000000;
7542 if (INSN_ADDRESSES_SET_P ())
7543 delta = (INSN_ADDRESSES (INSN_UID (dest))
7544 - INSN_ADDRESSES (INSN_UID (insn)));
7545
7546 /* Leave some instructions for "slop". */
7547 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7548 v9_form = true;
7549
7550 if (TARGET_CBCOND)
7551 {
7552 bool emit_nop = emit_cbcond_nop (insn);
7553 bool far = false;
7554 const char *rval;
7555
7556 if (delta < -500 || delta > 500)
7557 far = true;
7558
7559 if (far)
7560 {
7561 if (v9_form)
7562 rval = "ba,a,pt\t%%xcc, %l0";
7563 else
7564 rval = "b,a\t%l0";
7565 }
7566 else
7567 {
7568 if (emit_nop)
7569 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7570 else
7571 rval = "cwbe\t%%g0, %%g0, %l0";
7572 }
7573 return rval;
7574 }
7575
7576 if (v9_form)
7577 strcpy (string, "ba%*,pt\t%%xcc, ");
7578 else
7579 strcpy (string, "b%*\t");
7580
7581 p = strchr (string, '\0');
7582 *p++ = '%';
7583 *p++ = 'l';
7584 *p++ = '0';
7585 *p++ = '%';
7586 *p++ = '(';
7587 *p = '\0';
7588
7589 return string;
7590 }
7591
7592 /* Return the string to output a conditional branch to LABEL, which is
7593 the operand number of the label. OP is the conditional expression.
7594 XEXP (OP, 0) is assumed to be a condition code register (integer or
7595 floating point) and its mode specifies what kind of comparison we made.
7596
7597 DEST is the destination insn (i.e. the label), INSN is the source.
7598
7599 REVERSED is nonzero if we should reverse the sense of the comparison.
7600
7601 ANNUL is nonzero if we should generate an annulling branch. */
7602
7603 const char *
7604 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7605 rtx_insn *insn)
7606 {
7607 static char string[64];
7608 enum rtx_code code = GET_CODE (op);
7609 rtx cc_reg = XEXP (op, 0);
7610 machine_mode mode = GET_MODE (cc_reg);
7611 const char *labelno, *branch;
7612 int spaces = 8, far;
7613 char *p;
7614
7615 /* v9 branches are limited to +-1MB. If it is too far away,
7616 change
7617
7618 bne,pt %xcc, .LC30
7619
7620 to
7621
7622 be,pn %xcc, .+12
7623 nop
7624 ba .LC30
7625
7626 and
7627
7628 fbne,a,pn %fcc2, .LC29
7629
7630 to
7631
7632 fbe,pt %fcc2, .+16
7633 nop
7634 ba .LC29 */
7635
7636 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7637 if (reversed ^ far)
7638 {
7639 /* Reversal of FP compares takes care -- an ordered compare
7640 becomes an unordered compare and vice versa. */
7641 if (mode == CCFPmode || mode == CCFPEmode)
7642 code = reverse_condition_maybe_unordered (code);
7643 else
7644 code = reverse_condition (code);
7645 }
7646
7647 /* Start by writing the branch condition. */
7648 if (mode == CCFPmode || mode == CCFPEmode)
7649 {
7650 switch (code)
7651 {
7652 case NE:
7653 branch = "fbne";
7654 break;
7655 case EQ:
7656 branch = "fbe";
7657 break;
7658 case GE:
7659 branch = "fbge";
7660 break;
7661 case GT:
7662 branch = "fbg";
7663 break;
7664 case LE:
7665 branch = "fble";
7666 break;
7667 case LT:
7668 branch = "fbl";
7669 break;
7670 case UNORDERED:
7671 branch = "fbu";
7672 break;
7673 case ORDERED:
7674 branch = "fbo";
7675 break;
7676 case UNGT:
7677 branch = "fbug";
7678 break;
7679 case UNLT:
7680 branch = "fbul";
7681 break;
7682 case UNEQ:
7683 branch = "fbue";
7684 break;
7685 case UNGE:
7686 branch = "fbuge";
7687 break;
7688 case UNLE:
7689 branch = "fbule";
7690 break;
7691 case LTGT:
7692 branch = "fblg";
7693 break;
7694
7695 default:
7696 gcc_unreachable ();
7697 }
7698
7699 /* ??? !v9: FP branches cannot be preceded by another floating point
7700 insn. Because there is currently no concept of pre-delay slots,
7701 we can fix this only by always emitting a nop before a floating
7702 point branch. */
7703
7704 string[0] = '\0';
7705 if (! TARGET_V9)
7706 strcpy (string, "nop\n\t");
7707 strcat (string, branch);
7708 }
7709 else
7710 {
7711 switch (code)
7712 {
7713 case NE:
7714 branch = "bne";
7715 break;
7716 case EQ:
7717 branch = "be";
7718 break;
7719 case GE:
7720 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7721 branch = "bpos";
7722 else
7723 branch = "bge";
7724 break;
7725 case GT:
7726 branch = "bg";
7727 break;
7728 case LE:
7729 branch = "ble";
7730 break;
7731 case LT:
7732 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7733 branch = "bneg";
7734 else
7735 branch = "bl";
7736 break;
7737 case GEU:
7738 branch = "bgeu";
7739 break;
7740 case GTU:
7741 branch = "bgu";
7742 break;
7743 case LEU:
7744 branch = "bleu";
7745 break;
7746 case LTU:
7747 branch = "blu";
7748 break;
7749
7750 default:
7751 gcc_unreachable ();
7752 }
7753 strcpy (string, branch);
7754 }
7755 spaces -= strlen (branch);
7756 p = strchr (string, '\0');
7757
7758 /* Now add the annulling, the label, and a possible noop. */
7759 if (annul && ! far)
7760 {
7761 strcpy (p, ",a");
7762 p += 2;
7763 spaces -= 2;
7764 }
7765
7766 if (TARGET_V9)
7767 {
7768 rtx note;
7769 int v8 = 0;
7770
7771 if (! far && insn && INSN_ADDRESSES_SET_P ())
7772 {
7773 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7774 - INSN_ADDRESSES (INSN_UID (insn)));
7775 /* Leave some instructions for "slop". */
7776 if (delta < -260000 || delta >= 260000)
7777 v8 = 1;
7778 }
7779
7780 if (mode == CCFPmode || mode == CCFPEmode)
7781 {
7782 static char v9_fcc_labelno[] = "%%fccX, ";
7783 /* Set the char indicating the number of the fcc reg to use. */
7784 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7785 labelno = v9_fcc_labelno;
7786 if (v8)
7787 {
7788 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7789 labelno = "";
7790 }
7791 }
7792 else if (mode == CCXmode || mode == CCX_NOOVmode)
7793 {
7794 labelno = "%%xcc, ";
7795 gcc_assert (! v8);
7796 }
7797 else
7798 {
7799 labelno = "%%icc, ";
7800 if (v8)
7801 labelno = "";
7802 }
7803
7804 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7805 {
7806 strcpy (p,
7807 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7808 ? ",pt" : ",pn");
7809 p += 3;
7810 spaces -= 3;
7811 }
7812 }
7813 else
7814 labelno = "";
7815
7816 if (spaces > 0)
7817 *p++ = '\t';
7818 else
7819 *p++ = ' ';
7820 strcpy (p, labelno);
7821 p = strchr (p, '\0');
7822 if (far)
7823 {
7824 strcpy (p, ".+12\n\t nop\n\tb\t");
7825 /* Skip the next insn if requested or
7826 if we know that it will be a nop. */
7827 if (annul || ! final_sequence)
7828 p[3] = '6';
7829 p += 14;
7830 }
7831 *p++ = '%';
7832 *p++ = 'l';
7833 *p++ = label + '0';
7834 *p++ = '%';
7835 *p++ = '#';
7836 *p = '\0';
7837
7838 return string;
7839 }
7840
7841 /* Emit a library call comparison between floating point X and Y.
7842 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7843 Return the new operator to be used in the comparison sequence.
7844
7845 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7846 values as arguments instead of the TFmode registers themselves,
7847 that's why we cannot call emit_float_lib_cmp. */
7848
7849 rtx
7850 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7851 {
7852 const char *qpfunc;
7853 rtx slot0, slot1, result, tem, tem2, libfunc;
7854 machine_mode mode;
7855 enum rtx_code new_comparison;
7856
7857 switch (comparison)
7858 {
7859 case EQ:
7860 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7861 break;
7862
7863 case NE:
7864 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7865 break;
7866
7867 case GT:
7868 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7869 break;
7870
7871 case GE:
7872 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7873 break;
7874
7875 case LT:
7876 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7877 break;
7878
7879 case LE:
7880 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7881 break;
7882
7883 case ORDERED:
7884 case UNORDERED:
7885 case UNGT:
7886 case UNLT:
7887 case UNEQ:
7888 case UNGE:
7889 case UNLE:
7890 case LTGT:
7891 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7892 break;
7893
7894 default:
7895 gcc_unreachable ();
7896 }
7897
7898 if (TARGET_ARCH64)
7899 {
7900 if (MEM_P (x))
7901 {
7902 tree expr = MEM_EXPR (x);
7903 if (expr)
7904 mark_addressable (expr);
7905 slot0 = x;
7906 }
7907 else
7908 {
7909 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7910 emit_move_insn (slot0, x);
7911 }
7912
7913 if (MEM_P (y))
7914 {
7915 tree expr = MEM_EXPR (y);
7916 if (expr)
7917 mark_addressable (expr);
7918 slot1 = y;
7919 }
7920 else
7921 {
7922 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7923 emit_move_insn (slot1, y);
7924 }
7925
7926 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7927 emit_library_call (libfunc, LCT_NORMAL,
7928 DImode, 2,
7929 XEXP (slot0, 0), Pmode,
7930 XEXP (slot1, 0), Pmode);
7931 mode = DImode;
7932 }
7933 else
7934 {
7935 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7936 emit_library_call (libfunc, LCT_NORMAL,
7937 SImode, 2,
7938 x, TFmode, y, TFmode);
7939 mode = SImode;
7940 }
7941
7942
7943 /* Immediately move the result of the libcall into a pseudo
7944 register so reload doesn't clobber the value if it needs
7945 the return register for a spill reg. */
7946 result = gen_reg_rtx (mode);
7947 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7948
7949 switch (comparison)
7950 {
7951 default:
7952 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7953 case ORDERED:
7954 case UNORDERED:
7955 new_comparison = (comparison == UNORDERED ? EQ : NE);
7956 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7957 case UNGT:
7958 case UNGE:
7959 new_comparison = (comparison == UNGT ? GT : NE);
7960 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7961 case UNLE:
7962 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7963 case UNLT:
7964 tem = gen_reg_rtx (mode);
7965 if (TARGET_ARCH32)
7966 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7967 else
7968 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7969 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7970 case UNEQ:
7971 case LTGT:
7972 tem = gen_reg_rtx (mode);
7973 if (TARGET_ARCH32)
7974 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7975 else
7976 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7977 tem2 = gen_reg_rtx (mode);
7978 if (TARGET_ARCH32)
7979 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7980 else
7981 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7982 new_comparison = (comparison == UNEQ ? EQ : NE);
7983 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7984 }
7985
7986 gcc_unreachable ();
7987 }
7988
7989 /* Generate an unsigned DImode to FP conversion. This is the same code
7990 optabs would emit if we didn't have TFmode patterns. */
7991
7992 void
7993 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
7994 {
7995 rtx i0, i1, f0, in, out;
7996
7997 out = operands[0];
7998 in = force_reg (DImode, operands[1]);
7999 rtx_code_label *neglab = gen_label_rtx ();
8000 rtx_code_label *donelab = gen_label_rtx ();
8001 i0 = gen_reg_rtx (DImode);
8002 i1 = gen_reg_rtx (DImode);
8003 f0 = gen_reg_rtx (mode);
8004
8005 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8006
8007 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8008 emit_jump_insn (gen_jump (donelab));
8009 emit_barrier ();
8010
8011 emit_label (neglab);
8012
8013 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8014 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8015 emit_insn (gen_iordi3 (i0, i0, i1));
8016 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8017 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8018
8019 emit_label (donelab);
8020 }
8021
8022 /* Generate an FP to unsigned DImode conversion. This is the same code
8023 optabs would emit if we didn't have TFmode patterns. */
8024
8025 void
8026 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8027 {
8028 rtx i0, i1, f0, in, out, limit;
8029
8030 out = operands[0];
8031 in = force_reg (mode, operands[1]);
8032 rtx_code_label *neglab = gen_label_rtx ();
8033 rtx_code_label *donelab = gen_label_rtx ();
8034 i0 = gen_reg_rtx (DImode);
8035 i1 = gen_reg_rtx (DImode);
8036 limit = gen_reg_rtx (mode);
8037 f0 = gen_reg_rtx (mode);
8038
8039 emit_move_insn (limit,
8040 const_double_from_real_value (
8041 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8042 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8043
8044 emit_insn (gen_rtx_SET (out,
8045 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8046 emit_jump_insn (gen_jump (donelab));
8047 emit_barrier ();
8048
8049 emit_label (neglab);
8050
8051 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8052 emit_insn (gen_rtx_SET (i0,
8053 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8054 emit_insn (gen_movdi (i1, const1_rtx));
8055 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8056 emit_insn (gen_xordi3 (out, i0, i1));
8057
8058 emit_label (donelab);
8059 }
8060
8061 /* Return the string to output a compare and branch instruction to DEST.
8062 DEST is the destination insn (i.e. the label), INSN is the source,
8063 and OP is the conditional expression. */
8064
8065 const char *
8066 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8067 {
8068 machine_mode mode = GET_MODE (XEXP (op, 0));
8069 enum rtx_code code = GET_CODE (op);
8070 const char *cond_str, *tmpl;
8071 int far, emit_nop, len;
8072 static char string[64];
8073 char size_char;
8074
8075 /* Compare and Branch is limited to +-2KB. If it is too far away,
8076 change
8077
8078 cxbne X, Y, .LC30
8079
8080 to
8081
8082 cxbe X, Y, .+16
8083 nop
8084 ba,pt xcc, .LC30
8085 nop */
8086
8087 len = get_attr_length (insn);
8088
8089 far = len == 4;
8090 emit_nop = len == 2;
8091
8092 if (far)
8093 code = reverse_condition (code);
8094
8095 size_char = ((mode == SImode) ? 'w' : 'x');
8096
8097 switch (code)
8098 {
8099 case NE:
8100 cond_str = "ne";
8101 break;
8102
8103 case EQ:
8104 cond_str = "e";
8105 break;
8106
8107 case GE:
8108 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8109 cond_str = "pos";
8110 else
8111 cond_str = "ge";
8112 break;
8113
8114 case GT:
8115 cond_str = "g";
8116 break;
8117
8118 case LE:
8119 cond_str = "le";
8120 break;
8121
8122 case LT:
8123 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8124 cond_str = "neg";
8125 else
8126 cond_str = "l";
8127 break;
8128
8129 case GEU:
8130 cond_str = "cc";
8131 break;
8132
8133 case GTU:
8134 cond_str = "gu";
8135 break;
8136
8137 case LEU:
8138 cond_str = "leu";
8139 break;
8140
8141 case LTU:
8142 cond_str = "cs";
8143 break;
8144
8145 default:
8146 gcc_unreachable ();
8147 }
8148
8149 if (far)
8150 {
8151 int veryfar = 1, delta;
8152
8153 if (INSN_ADDRESSES_SET_P ())
8154 {
8155 delta = (INSN_ADDRESSES (INSN_UID (dest))
8156 - INSN_ADDRESSES (INSN_UID (insn)));
8157 /* Leave some instructions for "slop". */
8158 if (delta >= -260000 && delta < 260000)
8159 veryfar = 0;
8160 }
8161
8162 if (veryfar)
8163 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8164 else
8165 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8166 }
8167 else
8168 {
8169 if (emit_nop)
8170 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8171 else
8172 tmpl = "c%cb%s\t%%1, %%2, %%3";
8173 }
8174
8175 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8176
8177 return string;
8178 }
8179
8180 /* Return the string to output a conditional branch to LABEL, testing
8181 register REG. LABEL is the operand number of the label; REG is the
8182 operand number of the reg. OP is the conditional expression. The mode
8183 of REG says what kind of comparison we made.
8184
8185 DEST is the destination insn (i.e. the label), INSN is the source.
8186
8187 REVERSED is nonzero if we should reverse the sense of the comparison.
8188
8189 ANNUL is nonzero if we should generate an annulling branch. */
8190
8191 const char *
8192 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8193 int annul, rtx_insn *insn)
8194 {
8195 static char string[64];
8196 enum rtx_code code = GET_CODE (op);
8197 machine_mode mode = GET_MODE (XEXP (op, 0));
8198 rtx note;
8199 int far;
8200 char *p;
8201
8202 /* branch on register are limited to +-128KB. If it is too far away,
8203 change
8204
8205 brnz,pt %g1, .LC30
8206
8207 to
8208
8209 brz,pn %g1, .+12
8210 nop
8211 ba,pt %xcc, .LC30
8212
8213 and
8214
8215 brgez,a,pn %o1, .LC29
8216
8217 to
8218
8219 brlz,pt %o1, .+16
8220 nop
8221 ba,pt %xcc, .LC29 */
8222
8223 far = get_attr_length (insn) >= 3;
8224
8225 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8226 if (reversed ^ far)
8227 code = reverse_condition (code);
8228
8229 /* Only 64 bit versions of these instructions exist. */
8230 gcc_assert (mode == DImode);
8231
8232 /* Start by writing the branch condition. */
8233
8234 switch (code)
8235 {
8236 case NE:
8237 strcpy (string, "brnz");
8238 break;
8239
8240 case EQ:
8241 strcpy (string, "brz");
8242 break;
8243
8244 case GE:
8245 strcpy (string, "brgez");
8246 break;
8247
8248 case LT:
8249 strcpy (string, "brlz");
8250 break;
8251
8252 case LE:
8253 strcpy (string, "brlez");
8254 break;
8255
8256 case GT:
8257 strcpy (string, "brgz");
8258 break;
8259
8260 default:
8261 gcc_unreachable ();
8262 }
8263
8264 p = strchr (string, '\0');
8265
8266 /* Now add the annulling, reg, label, and nop. */
8267 if (annul && ! far)
8268 {
8269 strcpy (p, ",a");
8270 p += 2;
8271 }
8272
8273 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8274 {
8275 strcpy (p,
8276 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8277 ? ",pt" : ",pn");
8278 p += 3;
8279 }
8280
8281 *p = p < string + 8 ? '\t' : ' ';
8282 p++;
8283 *p++ = '%';
8284 *p++ = '0' + reg;
8285 *p++ = ',';
8286 *p++ = ' ';
8287 if (far)
8288 {
8289 int veryfar = 1, delta;
8290
8291 if (INSN_ADDRESSES_SET_P ())
8292 {
8293 delta = (INSN_ADDRESSES (INSN_UID (dest))
8294 - INSN_ADDRESSES (INSN_UID (insn)));
8295 /* Leave some instructions for "slop". */
8296 if (delta >= -260000 && delta < 260000)
8297 veryfar = 0;
8298 }
8299
8300 strcpy (p, ".+12\n\t nop\n\t");
8301 /* Skip the next insn if requested or
8302 if we know that it will be a nop. */
8303 if (annul || ! final_sequence)
8304 p[3] = '6';
8305 p += 12;
8306 if (veryfar)
8307 {
8308 strcpy (p, "b\t");
8309 p += 2;
8310 }
8311 else
8312 {
8313 strcpy (p, "ba,pt\t%%xcc, ");
8314 p += 13;
8315 }
8316 }
8317 *p++ = '%';
8318 *p++ = 'l';
8319 *p++ = '0' + label;
8320 *p++ = '%';
8321 *p++ = '#';
8322 *p = '\0';
8323
8324 return string;
8325 }
8326
8327 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8328 Such instructions cannot be used in the delay slot of return insn on v9.
8329 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8330 */
8331
8332 static int
8333 epilogue_renumber (register rtx *where, int test)
8334 {
8335 register const char *fmt;
8336 register int i;
8337 register enum rtx_code code;
8338
8339 if (*where == 0)
8340 return 0;
8341
8342 code = GET_CODE (*where);
8343
8344 switch (code)
8345 {
8346 case REG:
8347 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8348 return 1;
8349 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8350 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8351 case SCRATCH:
8352 case CC0:
8353 case PC:
8354 case CONST_INT:
8355 case CONST_DOUBLE:
8356 return 0;
8357
8358 /* Do not replace the frame pointer with the stack pointer because
8359 it can cause the delayed instruction to load below the stack.
8360 This occurs when instructions like:
8361
8362 (set (reg/i:SI 24 %i0)
8363 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8364 (const_int -20 [0xffffffec])) 0))
8365
8366 are in the return delayed slot. */
8367 case PLUS:
8368 if (GET_CODE (XEXP (*where, 0)) == REG
8369 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8370 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8371 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8372 return 1;
8373 break;
8374
8375 case MEM:
8376 if (SPARC_STACK_BIAS
8377 && GET_CODE (XEXP (*where, 0)) == REG
8378 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8379 return 1;
8380 break;
8381
8382 default:
8383 break;
8384 }
8385
8386 fmt = GET_RTX_FORMAT (code);
8387
8388 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8389 {
8390 if (fmt[i] == 'E')
8391 {
8392 register int j;
8393 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8394 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8395 return 1;
8396 }
8397 else if (fmt[i] == 'e'
8398 && epilogue_renumber (&(XEXP (*where, i)), test))
8399 return 1;
8400 }
8401 return 0;
8402 }
8403 \f
8404 /* Leaf functions and non-leaf functions have different needs. */
8405
8406 static const int
8407 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8408
8409 static const int
8410 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8411
8412 static const int *const reg_alloc_orders[] = {
8413 reg_leaf_alloc_order,
8414 reg_nonleaf_alloc_order};
8415
8416 void
8417 order_regs_for_local_alloc (void)
8418 {
8419 static int last_order_nonleaf = 1;
8420
8421 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8422 {
8423 last_order_nonleaf = !last_order_nonleaf;
8424 memcpy ((char *) reg_alloc_order,
8425 (const char *) reg_alloc_orders[last_order_nonleaf],
8426 FIRST_PSEUDO_REGISTER * sizeof (int));
8427 }
8428 }
8429 \f
8430 /* Return 1 if REG and MEM are legitimate enough to allow the various
8431 mem<-->reg splits to be run. */
8432
8433 int
8434 sparc_splitdi_legitimate (rtx reg, rtx mem)
8435 {
8436 /* Punt if we are here by mistake. */
8437 gcc_assert (reload_completed);
8438
8439 /* We must have an offsettable memory reference. */
8440 if (! offsettable_memref_p (mem))
8441 return 0;
8442
8443 /* If we have legitimate args for ldd/std, we do not want
8444 the split to happen. */
8445 if ((REGNO (reg) % 2) == 0
8446 && mem_min_alignment (mem, 8))
8447 return 0;
8448
8449 /* Success. */
8450 return 1;
8451 }
8452
8453 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8454
8455 int
8456 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8457 {
8458 int regno1, regno2;
8459
8460 if (GET_CODE (reg1) == SUBREG)
8461 reg1 = SUBREG_REG (reg1);
8462 if (GET_CODE (reg1) != REG)
8463 return 0;
8464 regno1 = REGNO (reg1);
8465
8466 if (GET_CODE (reg2) == SUBREG)
8467 reg2 = SUBREG_REG (reg2);
8468 if (GET_CODE (reg2) != REG)
8469 return 0;
8470 regno2 = REGNO (reg2);
8471
8472 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8473 return 1;
8474
8475 if (TARGET_VIS3)
8476 {
8477 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8478 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8479 return 1;
8480 }
8481
8482 return 0;
8483 }
8484
8485 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8486 This makes them candidates for using ldd and std insns.
8487
8488 Note reg1 and reg2 *must* be hard registers. */
8489
8490 int
8491 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8492 {
8493 /* We might have been passed a SUBREG. */
8494 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8495 return 0;
8496
8497 if (REGNO (reg1) % 2 != 0)
8498 return 0;
8499
8500 /* Integer ldd is deprecated in SPARC V9 */
8501 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8502 return 0;
8503
8504 return (REGNO (reg1) == REGNO (reg2) - 1);
8505 }
8506
8507 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8508 an ldd or std insn.
8509
8510 This can only happen when addr1 and addr2, the addresses in mem1
8511 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8512 addr1 must also be aligned on a 64-bit boundary.
8513
8514 Also iff dependent_reg_rtx is not null it should not be used to
8515 compute the address for mem1, i.e. we cannot optimize a sequence
8516 like:
8517 ld [%o0], %o0
8518 ld [%o0 + 4], %o1
8519 to
8520 ldd [%o0], %o0
8521 nor:
8522 ld [%g3 + 4], %g3
8523 ld [%g3], %g2
8524 to
8525 ldd [%g3], %g2
8526
8527 But, note that the transformation from:
8528 ld [%g2 + 4], %g3
8529 ld [%g2], %g2
8530 to
8531 ldd [%g2], %g2
8532 is perfectly fine. Thus, the peephole2 patterns always pass us
8533 the destination register of the first load, never the second one.
8534
8535 For stores we don't have a similar problem, so dependent_reg_rtx is
8536 NULL_RTX. */
8537
8538 int
8539 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8540 {
8541 rtx addr1, addr2;
8542 unsigned int reg1;
8543 HOST_WIDE_INT offset1;
8544
8545 /* The mems cannot be volatile. */
8546 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8547 return 0;
8548
8549 /* MEM1 should be aligned on a 64-bit boundary. */
8550 if (MEM_ALIGN (mem1) < 64)
8551 return 0;
8552
8553 addr1 = XEXP (mem1, 0);
8554 addr2 = XEXP (mem2, 0);
8555
8556 /* Extract a register number and offset (if used) from the first addr. */
8557 if (GET_CODE (addr1) == PLUS)
8558 {
8559 /* If not a REG, return zero. */
8560 if (GET_CODE (XEXP (addr1, 0)) != REG)
8561 return 0;
8562 else
8563 {
8564 reg1 = REGNO (XEXP (addr1, 0));
8565 /* The offset must be constant! */
8566 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8567 return 0;
8568 offset1 = INTVAL (XEXP (addr1, 1));
8569 }
8570 }
8571 else if (GET_CODE (addr1) != REG)
8572 return 0;
8573 else
8574 {
8575 reg1 = REGNO (addr1);
8576 /* This was a simple (mem (reg)) expression. Offset is 0. */
8577 offset1 = 0;
8578 }
8579
8580 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8581 if (GET_CODE (addr2) != PLUS)
8582 return 0;
8583
8584 if (GET_CODE (XEXP (addr2, 0)) != REG
8585 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8586 return 0;
8587
8588 if (reg1 != REGNO (XEXP (addr2, 0)))
8589 return 0;
8590
8591 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8592 return 0;
8593
8594 /* The first offset must be evenly divisible by 8 to ensure the
8595 address is 64 bit aligned. */
8596 if (offset1 % 8 != 0)
8597 return 0;
8598
8599 /* The offset for the second addr must be 4 more than the first addr. */
8600 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8601 return 0;
8602
8603 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8604 instructions. */
8605 return 1;
8606 }
8607
8608 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8609
8610 rtx
8611 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8612 {
8613 rtx x = widen_memory_access (mem1, mode, 0);
8614 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8615 return x;
8616 }
8617
8618 /* Return 1 if reg is a pseudo, or is the first register in
8619 a hard register pair. This makes it suitable for use in
8620 ldd and std insns. */
8621
8622 int
8623 register_ok_for_ldd (rtx reg)
8624 {
8625 /* We might have been passed a SUBREG. */
8626 if (!REG_P (reg))
8627 return 0;
8628
8629 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8630 return (REGNO (reg) % 2 == 0);
8631
8632 return 1;
8633 }
8634
8635 /* Return 1 if OP, a MEM, has an address which is known to be
8636 aligned to an 8-byte boundary. */
8637
8638 int
8639 memory_ok_for_ldd (rtx op)
8640 {
8641 /* In 64-bit mode, we assume that the address is word-aligned. */
8642 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8643 return 0;
8644
8645 if (! can_create_pseudo_p ()
8646 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8647 return 0;
8648
8649 return 1;
8650 }
8651 \f
8652 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8653
8654 static bool
8655 sparc_print_operand_punct_valid_p (unsigned char code)
8656 {
8657 if (code == '#'
8658 || code == '*'
8659 || code == '('
8660 || code == ')'
8661 || code == '_'
8662 || code == '&')
8663 return true;
8664
8665 return false;
8666 }
8667
8668 /* Implement TARGET_PRINT_OPERAND.
8669 Print operand X (an rtx) in assembler syntax to file FILE.
8670 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8671 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8672
8673 static void
8674 sparc_print_operand (FILE *file, rtx x, int code)
8675 {
8676 switch (code)
8677 {
8678 case '#':
8679 /* Output an insn in a delay slot. */
8680 if (final_sequence)
8681 sparc_indent_opcode = 1;
8682 else
8683 fputs ("\n\t nop", file);
8684 return;
8685 case '*':
8686 /* Output an annul flag if there's nothing for the delay slot and we
8687 are optimizing. This is always used with '(' below.
8688 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8689 this is a dbx bug. So, we only do this when optimizing.
8690 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8691 Always emit a nop in case the next instruction is a branch. */
8692 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8693 fputs (",a", file);
8694 return;
8695 case '(':
8696 /* Output a 'nop' if there's nothing for the delay slot and we are
8697 not optimizing. This is always used with '*' above. */
8698 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8699 fputs ("\n\t nop", file);
8700 else if (final_sequence)
8701 sparc_indent_opcode = 1;
8702 return;
8703 case ')':
8704 /* Output the right displacement from the saved PC on function return.
8705 The caller may have placed an "unimp" insn immediately after the call
8706 so we have to account for it. This insn is used in the 32-bit ABI
8707 when calling a function that returns a non zero-sized structure. The
8708 64-bit ABI doesn't have it. Be careful to have this test be the same
8709 as that for the call. The exception is when sparc_std_struct_return
8710 is enabled, the psABI is followed exactly and the adjustment is made
8711 by the code in sparc_struct_value_rtx. The call emitted is the same
8712 when sparc_std_struct_return is enabled. */
8713 if (!TARGET_ARCH64
8714 && cfun->returns_struct
8715 && !sparc_std_struct_return
8716 && DECL_SIZE (DECL_RESULT (current_function_decl))
8717 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8718 == INTEGER_CST
8719 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8720 fputs ("12", file);
8721 else
8722 fputc ('8', file);
8723 return;
8724 case '_':
8725 /* Output the Embedded Medium/Anywhere code model base register. */
8726 fputs (EMBMEDANY_BASE_REG, file);
8727 return;
8728 case '&':
8729 /* Print some local dynamic TLS name. */
8730 if (const char *name = get_some_local_dynamic_name ())
8731 assemble_name (file, name);
8732 else
8733 output_operand_lossage ("'%%&' used without any "
8734 "local dynamic TLS references");
8735 return;
8736
8737 case 'Y':
8738 /* Adjust the operand to take into account a RESTORE operation. */
8739 if (GET_CODE (x) == CONST_INT)
8740 break;
8741 else if (GET_CODE (x) != REG)
8742 output_operand_lossage ("invalid %%Y operand");
8743 else if (REGNO (x) < 8)
8744 fputs (reg_names[REGNO (x)], file);
8745 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8746 fputs (reg_names[REGNO (x)-16], file);
8747 else
8748 output_operand_lossage ("invalid %%Y operand");
8749 return;
8750 case 'L':
8751 /* Print out the low order register name of a register pair. */
8752 if (WORDS_BIG_ENDIAN)
8753 fputs (reg_names[REGNO (x)+1], file);
8754 else
8755 fputs (reg_names[REGNO (x)], file);
8756 return;
8757 case 'H':
8758 /* Print out the high order register name of a register pair. */
8759 if (WORDS_BIG_ENDIAN)
8760 fputs (reg_names[REGNO (x)], file);
8761 else
8762 fputs (reg_names[REGNO (x)+1], file);
8763 return;
8764 case 'R':
8765 /* Print out the second register name of a register pair or quad.
8766 I.e., R (%o0) => %o1. */
8767 fputs (reg_names[REGNO (x)+1], file);
8768 return;
8769 case 'S':
8770 /* Print out the third register name of a register quad.
8771 I.e., S (%o0) => %o2. */
8772 fputs (reg_names[REGNO (x)+2], file);
8773 return;
8774 case 'T':
8775 /* Print out the fourth register name of a register quad.
8776 I.e., T (%o0) => %o3. */
8777 fputs (reg_names[REGNO (x)+3], file);
8778 return;
8779 case 'x':
8780 /* Print a condition code register. */
8781 if (REGNO (x) == SPARC_ICC_REG)
8782 {
8783 /* We don't handle CC[X]_NOOVmode because they're not supposed
8784 to occur here. */
8785 if (GET_MODE (x) == CCmode)
8786 fputs ("%icc", file);
8787 else if (GET_MODE (x) == CCXmode)
8788 fputs ("%xcc", file);
8789 else
8790 gcc_unreachable ();
8791 }
8792 else
8793 /* %fccN register */
8794 fputs (reg_names[REGNO (x)], file);
8795 return;
8796 case 'm':
8797 /* Print the operand's address only. */
8798 output_address (GET_MODE (x), XEXP (x, 0));
8799 return;
8800 case 'r':
8801 /* In this case we need a register. Use %g0 if the
8802 operand is const0_rtx. */
8803 if (x == const0_rtx
8804 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8805 {
8806 fputs ("%g0", file);
8807 return;
8808 }
8809 else
8810 break;
8811
8812 case 'A':
8813 switch (GET_CODE (x))
8814 {
8815 case IOR: fputs ("or", file); break;
8816 case AND: fputs ("and", file); break;
8817 case XOR: fputs ("xor", file); break;
8818 default: output_operand_lossage ("invalid %%A operand");
8819 }
8820 return;
8821
8822 case 'B':
8823 switch (GET_CODE (x))
8824 {
8825 case IOR: fputs ("orn", file); break;
8826 case AND: fputs ("andn", file); break;
8827 case XOR: fputs ("xnor", file); break;
8828 default: output_operand_lossage ("invalid %%B operand");
8829 }
8830 return;
8831
8832 /* This is used by the conditional move instructions. */
8833 case 'C':
8834 {
8835 enum rtx_code rc = GET_CODE (x);
8836
8837 switch (rc)
8838 {
8839 case NE: fputs ("ne", file); break;
8840 case EQ: fputs ("e", file); break;
8841 case GE: fputs ("ge", file); break;
8842 case GT: fputs ("g", file); break;
8843 case LE: fputs ("le", file); break;
8844 case LT: fputs ("l", file); break;
8845 case GEU: fputs ("geu", file); break;
8846 case GTU: fputs ("gu", file); break;
8847 case LEU: fputs ("leu", file); break;
8848 case LTU: fputs ("lu", file); break;
8849 case LTGT: fputs ("lg", file); break;
8850 case UNORDERED: fputs ("u", file); break;
8851 case ORDERED: fputs ("o", file); break;
8852 case UNLT: fputs ("ul", file); break;
8853 case UNLE: fputs ("ule", file); break;
8854 case UNGT: fputs ("ug", file); break;
8855 case UNGE: fputs ("uge", file); break;
8856 case UNEQ: fputs ("ue", file); break;
8857 default: output_operand_lossage ("invalid %%C operand");
8858 }
8859 return;
8860 }
8861
8862 /* This are used by the movr instruction pattern. */
8863 case 'D':
8864 {
8865 enum rtx_code rc = GET_CODE (x);
8866 switch (rc)
8867 {
8868 case NE: fputs ("ne", file); break;
8869 case EQ: fputs ("e", file); break;
8870 case GE: fputs ("gez", file); break;
8871 case LT: fputs ("lz", file); break;
8872 case LE: fputs ("lez", file); break;
8873 case GT: fputs ("gz", file); break;
8874 default: output_operand_lossage ("invalid %%D operand");
8875 }
8876 return;
8877 }
8878
8879 case 'b':
8880 {
8881 /* Print a sign-extended character. */
8882 int i = trunc_int_for_mode (INTVAL (x), QImode);
8883 fprintf (file, "%d", i);
8884 return;
8885 }
8886
8887 case 'f':
8888 /* Operand must be a MEM; write its address. */
8889 if (GET_CODE (x) != MEM)
8890 output_operand_lossage ("invalid %%f operand");
8891 output_address (GET_MODE (x), XEXP (x, 0));
8892 return;
8893
8894 case 's':
8895 {
8896 /* Print a sign-extended 32-bit value. */
8897 HOST_WIDE_INT i;
8898 if (GET_CODE(x) == CONST_INT)
8899 i = INTVAL (x);
8900 else if (GET_CODE(x) == CONST_DOUBLE)
8901 i = CONST_DOUBLE_LOW (x);
8902 else
8903 {
8904 output_operand_lossage ("invalid %%s operand");
8905 return;
8906 }
8907 i = trunc_int_for_mode (i, SImode);
8908 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8909 return;
8910 }
8911
8912 case 0:
8913 /* Do nothing special. */
8914 break;
8915
8916 default:
8917 /* Undocumented flag. */
8918 output_operand_lossage ("invalid operand output code");
8919 }
8920
8921 if (GET_CODE (x) == REG)
8922 fputs (reg_names[REGNO (x)], file);
8923 else if (GET_CODE (x) == MEM)
8924 {
8925 fputc ('[', file);
8926 /* Poor Sun assembler doesn't understand absolute addressing. */
8927 if (CONSTANT_P (XEXP (x, 0)))
8928 fputs ("%g0+", file);
8929 output_address (GET_MODE (x), XEXP (x, 0));
8930 fputc (']', file);
8931 }
8932 else if (GET_CODE (x) == HIGH)
8933 {
8934 fputs ("%hi(", file);
8935 output_addr_const (file, XEXP (x, 0));
8936 fputc (')', file);
8937 }
8938 else if (GET_CODE (x) == LO_SUM)
8939 {
8940 sparc_print_operand (file, XEXP (x, 0), 0);
8941 if (TARGET_CM_MEDMID)
8942 fputs ("+%l44(", file);
8943 else
8944 fputs ("+%lo(", file);
8945 output_addr_const (file, XEXP (x, 1));
8946 fputc (')', file);
8947 }
8948 else if (GET_CODE (x) == CONST_DOUBLE
8949 && (GET_MODE (x) == VOIDmode
8950 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
8951 {
8952 if (CONST_DOUBLE_HIGH (x) == 0)
8953 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
8954 else if (CONST_DOUBLE_HIGH (x) == -1
8955 && CONST_DOUBLE_LOW (x) < 0)
8956 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
8957 else
8958 output_operand_lossage ("long long constant not a valid immediate operand");
8959 }
8960 else if (GET_CODE (x) == CONST_DOUBLE)
8961 output_operand_lossage ("floating point constant not a valid immediate operand");
8962 else { output_addr_const (file, x); }
8963 }
8964
8965 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8966
8967 static void
8968 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
8969 {
8970 register rtx base, index = 0;
8971 int offset = 0;
8972 register rtx addr = x;
8973
8974 if (REG_P (addr))
8975 fputs (reg_names[REGNO (addr)], file);
8976 else if (GET_CODE (addr) == PLUS)
8977 {
8978 if (CONST_INT_P (XEXP (addr, 0)))
8979 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8980 else if (CONST_INT_P (XEXP (addr, 1)))
8981 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8982 else
8983 base = XEXP (addr, 0), index = XEXP (addr, 1);
8984 if (GET_CODE (base) == LO_SUM)
8985 {
8986 gcc_assert (USE_AS_OFFSETABLE_LO10
8987 && TARGET_ARCH64
8988 && ! TARGET_CM_MEDMID);
8989 output_operand (XEXP (base, 0), 0);
8990 fputs ("+%lo(", file);
8991 output_address (VOIDmode, XEXP (base, 1));
8992 fprintf (file, ")+%d", offset);
8993 }
8994 else
8995 {
8996 fputs (reg_names[REGNO (base)], file);
8997 if (index == 0)
8998 fprintf (file, "%+d", offset);
8999 else if (REG_P (index))
9000 fprintf (file, "+%s", reg_names[REGNO (index)]);
9001 else if (GET_CODE (index) == SYMBOL_REF
9002 || GET_CODE (index) == LABEL_REF
9003 || GET_CODE (index) == CONST)
9004 fputc ('+', file), output_addr_const (file, index);
9005 else gcc_unreachable ();
9006 }
9007 }
9008 else if (GET_CODE (addr) == MINUS
9009 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9010 {
9011 output_addr_const (file, XEXP (addr, 0));
9012 fputs ("-(", file);
9013 output_addr_const (file, XEXP (addr, 1));
9014 fputs ("-.)", file);
9015 }
9016 else if (GET_CODE (addr) == LO_SUM)
9017 {
9018 output_operand (XEXP (addr, 0), 0);
9019 if (TARGET_CM_MEDMID)
9020 fputs ("+%l44(", file);
9021 else
9022 fputs ("+%lo(", file);
9023 output_address (VOIDmode, XEXP (addr, 1));
9024 fputc (')', file);
9025 }
9026 else if (flag_pic
9027 && GET_CODE (addr) == CONST
9028 && GET_CODE (XEXP (addr, 0)) == MINUS
9029 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9030 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9031 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9032 {
9033 addr = XEXP (addr, 0);
9034 output_addr_const (file, XEXP (addr, 0));
9035 /* Group the args of the second CONST in parenthesis. */
9036 fputs ("-(", file);
9037 /* Skip past the second CONST--it does nothing for us. */
9038 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9039 /* Close the parenthesis. */
9040 fputc (')', file);
9041 }
9042 else
9043 {
9044 output_addr_const (file, addr);
9045 }
9046 }
9047 \f
9048 /* Target hook for assembling integer objects. The sparc version has
9049 special handling for aligned DI-mode objects. */
9050
9051 static bool
9052 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9053 {
9054 /* ??? We only output .xword's for symbols and only then in environments
9055 where the assembler can handle them. */
9056 if (aligned_p && size == 8
9057 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
9058 {
9059 if (TARGET_V9)
9060 {
9061 assemble_integer_with_op ("\t.xword\t", x);
9062 return true;
9063 }
9064 else
9065 {
9066 assemble_aligned_integer (4, const0_rtx);
9067 assemble_aligned_integer (4, x);
9068 return true;
9069 }
9070 }
9071 return default_assemble_integer (x, size, aligned_p);
9072 }
9073 \f
9074 /* Return the value of a code used in the .proc pseudo-op that says
9075 what kind of result this function returns. For non-C types, we pick
9076 the closest C type. */
9077
9078 #ifndef SHORT_TYPE_SIZE
9079 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9080 #endif
9081
9082 #ifndef INT_TYPE_SIZE
9083 #define INT_TYPE_SIZE BITS_PER_WORD
9084 #endif
9085
9086 #ifndef LONG_TYPE_SIZE
9087 #define LONG_TYPE_SIZE BITS_PER_WORD
9088 #endif
9089
9090 #ifndef LONG_LONG_TYPE_SIZE
9091 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9092 #endif
9093
9094 #ifndef FLOAT_TYPE_SIZE
9095 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9096 #endif
9097
9098 #ifndef DOUBLE_TYPE_SIZE
9099 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9100 #endif
9101
9102 #ifndef LONG_DOUBLE_TYPE_SIZE
9103 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9104 #endif
9105
9106 unsigned long
9107 sparc_type_code (register tree type)
9108 {
9109 register unsigned long qualifiers = 0;
9110 register unsigned shift;
9111
9112 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9113 setting more, since some assemblers will give an error for this. Also,
9114 we must be careful to avoid shifts of 32 bits or more to avoid getting
9115 unpredictable results. */
9116
9117 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9118 {
9119 switch (TREE_CODE (type))
9120 {
9121 case ERROR_MARK:
9122 return qualifiers;
9123
9124 case ARRAY_TYPE:
9125 qualifiers |= (3 << shift);
9126 break;
9127
9128 case FUNCTION_TYPE:
9129 case METHOD_TYPE:
9130 qualifiers |= (2 << shift);
9131 break;
9132
9133 case POINTER_TYPE:
9134 case REFERENCE_TYPE:
9135 case OFFSET_TYPE:
9136 qualifiers |= (1 << shift);
9137 break;
9138
9139 case RECORD_TYPE:
9140 return (qualifiers | 8);
9141
9142 case UNION_TYPE:
9143 case QUAL_UNION_TYPE:
9144 return (qualifiers | 9);
9145
9146 case ENUMERAL_TYPE:
9147 return (qualifiers | 10);
9148
9149 case VOID_TYPE:
9150 return (qualifiers | 16);
9151
9152 case INTEGER_TYPE:
9153 /* If this is a range type, consider it to be the underlying
9154 type. */
9155 if (TREE_TYPE (type) != 0)
9156 break;
9157
9158 /* Carefully distinguish all the standard types of C,
9159 without messing up if the language is not C. We do this by
9160 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9161 look at both the names and the above fields, but that's redundant.
9162 Any type whose size is between two C types will be considered
9163 to be the wider of the two types. Also, we do not have a
9164 special code to use for "long long", so anything wider than
9165 long is treated the same. Note that we can't distinguish
9166 between "int" and "long" in this code if they are the same
9167 size, but that's fine, since neither can the assembler. */
9168
9169 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9170 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9171
9172 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9173 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9174
9175 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9176 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9177
9178 else
9179 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9180
9181 case REAL_TYPE:
9182 /* If this is a range type, consider it to be the underlying
9183 type. */
9184 if (TREE_TYPE (type) != 0)
9185 break;
9186
9187 /* Carefully distinguish all the standard types of C,
9188 without messing up if the language is not C. */
9189
9190 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9191 return (qualifiers | 6);
9192
9193 else
9194 return (qualifiers | 7);
9195
9196 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9197 /* ??? We need to distinguish between double and float complex types,
9198 but I don't know how yet because I can't reach this code from
9199 existing front-ends. */
9200 return (qualifiers | 7); /* Who knows? */
9201
9202 case VECTOR_TYPE:
9203 case BOOLEAN_TYPE: /* Boolean truth value type. */
9204 case LANG_TYPE:
9205 case NULLPTR_TYPE:
9206 return qualifiers;
9207
9208 default:
9209 gcc_unreachable (); /* Not a type! */
9210 }
9211 }
9212
9213 return qualifiers;
9214 }
9215 \f
9216 /* Nested function support. */
9217
9218 /* Emit RTL insns to initialize the variable parts of a trampoline.
9219 FNADDR is an RTX for the address of the function's pure code.
9220 CXT is an RTX for the static chain value for the function.
9221
9222 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9223 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9224 (to store insns). This is a bit excessive. Perhaps a different
9225 mechanism would be better here.
9226
9227 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9228
9229 static void
9230 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9231 {
9232 /* SPARC 32-bit trampoline:
9233
9234 sethi %hi(fn), %g1
9235 sethi %hi(static), %g2
9236 jmp %g1+%lo(fn)
9237 or %g2, %lo(static), %g2
9238
9239 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9240 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9241 */
9242
9243 emit_move_insn
9244 (adjust_address (m_tramp, SImode, 0),
9245 expand_binop (SImode, ior_optab,
9246 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9247 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9248 NULL_RTX, 1, OPTAB_DIRECT));
9249
9250 emit_move_insn
9251 (adjust_address (m_tramp, SImode, 4),
9252 expand_binop (SImode, ior_optab,
9253 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9254 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9255 NULL_RTX, 1, OPTAB_DIRECT));
9256
9257 emit_move_insn
9258 (adjust_address (m_tramp, SImode, 8),
9259 expand_binop (SImode, ior_optab,
9260 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9261 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9262 NULL_RTX, 1, OPTAB_DIRECT));
9263
9264 emit_move_insn
9265 (adjust_address (m_tramp, SImode, 12),
9266 expand_binop (SImode, ior_optab,
9267 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9268 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9269 NULL_RTX, 1, OPTAB_DIRECT));
9270
9271 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9272 aligned on a 16 byte boundary so one flush clears it all. */
9273 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9274 if (sparc_cpu != PROCESSOR_ULTRASPARC
9275 && sparc_cpu != PROCESSOR_ULTRASPARC3
9276 && sparc_cpu != PROCESSOR_NIAGARA
9277 && sparc_cpu != PROCESSOR_NIAGARA2
9278 && sparc_cpu != PROCESSOR_NIAGARA3
9279 && sparc_cpu != PROCESSOR_NIAGARA4)
9280 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9281
9282 /* Call __enable_execute_stack after writing onto the stack to make sure
9283 the stack address is accessible. */
9284 #ifdef HAVE_ENABLE_EXECUTE_STACK
9285 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9286 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9287 #endif
9288
9289 }
9290
9291 /* The 64-bit version is simpler because it makes more sense to load the
9292 values as "immediate" data out of the trampoline. It's also easier since
9293 we can read the PC without clobbering a register. */
9294
9295 static void
9296 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9297 {
9298 /* SPARC 64-bit trampoline:
9299
9300 rd %pc, %g1
9301 ldx [%g1+24], %g5
9302 jmp %g5
9303 ldx [%g1+16], %g5
9304 +16 bytes data
9305 */
9306
9307 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9308 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9309 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9310 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9311 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9312 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9313 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9314 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9315 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9316 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9317 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9318
9319 if (sparc_cpu != PROCESSOR_ULTRASPARC
9320 && sparc_cpu != PROCESSOR_ULTRASPARC3
9321 && sparc_cpu != PROCESSOR_NIAGARA
9322 && sparc_cpu != PROCESSOR_NIAGARA2
9323 && sparc_cpu != PROCESSOR_NIAGARA3
9324 && sparc_cpu != PROCESSOR_NIAGARA4)
9325 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9326
9327 /* Call __enable_execute_stack after writing onto the stack to make sure
9328 the stack address is accessible. */
9329 #ifdef HAVE_ENABLE_EXECUTE_STACK
9330 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9331 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9332 #endif
9333 }
9334
9335 /* Worker for TARGET_TRAMPOLINE_INIT. */
9336
9337 static void
9338 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9339 {
9340 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9341 cxt = force_reg (Pmode, cxt);
9342 if (TARGET_ARCH64)
9343 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9344 else
9345 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9346 }
9347 \f
9348 /* Adjust the cost of a scheduling dependency. Return the new cost of
9349 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9350
9351 static int
9352 supersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9353 {
9354 enum attr_type insn_type;
9355
9356 if (recog_memoized (insn) < 0)
9357 return cost;
9358
9359 insn_type = get_attr_type (insn);
9360
9361 if (REG_NOTE_KIND (link) == 0)
9362 {
9363 /* Data dependency; DEP_INSN writes a register that INSN reads some
9364 cycles later. */
9365
9366 /* if a load, then the dependence must be on the memory address;
9367 add an extra "cycle". Note that the cost could be two cycles
9368 if the reg was written late in an instruction group; we ca not tell
9369 here. */
9370 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9371 return cost + 3;
9372
9373 /* Get the delay only if the address of the store is the dependence. */
9374 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9375 {
9376 rtx pat = PATTERN(insn);
9377 rtx dep_pat = PATTERN (dep_insn);
9378
9379 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9380 return cost; /* This should not happen! */
9381
9382 /* The dependency between the two instructions was on the data that
9383 is being stored. Assume that this implies that the address of the
9384 store is not dependent. */
9385 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9386 return cost;
9387
9388 return cost + 3; /* An approximation. */
9389 }
9390
9391 /* A shift instruction cannot receive its data from an instruction
9392 in the same cycle; add a one cycle penalty. */
9393 if (insn_type == TYPE_SHIFT)
9394 return cost + 3; /* Split before cascade into shift. */
9395 }
9396 else
9397 {
9398 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9399 INSN writes some cycles later. */
9400
9401 /* These are only significant for the fpu unit; writing a fp reg before
9402 the fpu has finished with it stalls the processor. */
9403
9404 /* Reusing an integer register causes no problems. */
9405 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9406 return 0;
9407 }
9408
9409 return cost;
9410 }
9411
9412 static int
9413 hypersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9414 {
9415 enum attr_type insn_type, dep_type;
9416 rtx pat = PATTERN(insn);
9417 rtx dep_pat = PATTERN (dep_insn);
9418
9419 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9420 return cost;
9421
9422 insn_type = get_attr_type (insn);
9423 dep_type = get_attr_type (dep_insn);
9424
9425 switch (REG_NOTE_KIND (link))
9426 {
9427 case 0:
9428 /* Data dependency; DEP_INSN writes a register that INSN reads some
9429 cycles later. */
9430
9431 switch (insn_type)
9432 {
9433 case TYPE_STORE:
9434 case TYPE_FPSTORE:
9435 /* Get the delay iff the address of the store is the dependence. */
9436 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9437 return cost;
9438
9439 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9440 return cost;
9441 return cost + 3;
9442
9443 case TYPE_LOAD:
9444 case TYPE_SLOAD:
9445 case TYPE_FPLOAD:
9446 /* If a load, then the dependence must be on the memory address. If
9447 the addresses aren't equal, then it might be a false dependency */
9448 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9449 {
9450 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9451 || GET_CODE (SET_DEST (dep_pat)) != MEM
9452 || GET_CODE (SET_SRC (pat)) != MEM
9453 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9454 XEXP (SET_SRC (pat), 0)))
9455 return cost + 2;
9456
9457 return cost + 8;
9458 }
9459 break;
9460
9461 case TYPE_BRANCH:
9462 /* Compare to branch latency is 0. There is no benefit from
9463 separating compare and branch. */
9464 if (dep_type == TYPE_COMPARE)
9465 return 0;
9466 /* Floating point compare to branch latency is less than
9467 compare to conditional move. */
9468 if (dep_type == TYPE_FPCMP)
9469 return cost - 1;
9470 break;
9471 default:
9472 break;
9473 }
9474 break;
9475
9476 case REG_DEP_ANTI:
9477 /* Anti-dependencies only penalize the fpu unit. */
9478 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9479 return 0;
9480 break;
9481
9482 default:
9483 break;
9484 }
9485
9486 return cost;
9487 }
9488
9489 static int
9490 sparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
9491 {
9492 switch (sparc_cpu)
9493 {
9494 case PROCESSOR_SUPERSPARC:
9495 cost = supersparc_adjust_cost (insn, link, dep, cost);
9496 break;
9497 case PROCESSOR_HYPERSPARC:
9498 case PROCESSOR_SPARCLITE86X:
9499 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9500 break;
9501 default:
9502 break;
9503 }
9504 return cost;
9505 }
9506
9507 static void
9508 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9509 int sched_verbose ATTRIBUTE_UNUSED,
9510 int max_ready ATTRIBUTE_UNUSED)
9511 {}
9512
9513 static int
9514 sparc_use_sched_lookahead (void)
9515 {
9516 if (sparc_cpu == PROCESSOR_NIAGARA
9517 || sparc_cpu == PROCESSOR_NIAGARA2
9518 || sparc_cpu == PROCESSOR_NIAGARA3)
9519 return 0;
9520 if (sparc_cpu == PROCESSOR_NIAGARA4)
9521 return 2;
9522 if (sparc_cpu == PROCESSOR_ULTRASPARC
9523 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9524 return 4;
9525 if ((1 << sparc_cpu) &
9526 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9527 (1 << PROCESSOR_SPARCLITE86X)))
9528 return 3;
9529 return 0;
9530 }
9531
9532 static int
9533 sparc_issue_rate (void)
9534 {
9535 switch (sparc_cpu)
9536 {
9537 case PROCESSOR_NIAGARA:
9538 case PROCESSOR_NIAGARA2:
9539 case PROCESSOR_NIAGARA3:
9540 default:
9541 return 1;
9542 case PROCESSOR_NIAGARA4:
9543 case PROCESSOR_V9:
9544 /* Assume V9 processors are capable of at least dual-issue. */
9545 return 2;
9546 case PROCESSOR_SUPERSPARC:
9547 return 3;
9548 case PROCESSOR_HYPERSPARC:
9549 case PROCESSOR_SPARCLITE86X:
9550 return 2;
9551 case PROCESSOR_ULTRASPARC:
9552 case PROCESSOR_ULTRASPARC3:
9553 return 4;
9554 }
9555 }
9556
9557 static int
9558 set_extends (rtx_insn *insn)
9559 {
9560 register rtx pat = PATTERN (insn);
9561
9562 switch (GET_CODE (SET_SRC (pat)))
9563 {
9564 /* Load and some shift instructions zero extend. */
9565 case MEM:
9566 case ZERO_EXTEND:
9567 /* sethi clears the high bits */
9568 case HIGH:
9569 /* LO_SUM is used with sethi. sethi cleared the high
9570 bits and the values used with lo_sum are positive */
9571 case LO_SUM:
9572 /* Store flag stores 0 or 1 */
9573 case LT: case LTU:
9574 case GT: case GTU:
9575 case LE: case LEU:
9576 case GE: case GEU:
9577 case EQ:
9578 case NE:
9579 return 1;
9580 case AND:
9581 {
9582 rtx op0 = XEXP (SET_SRC (pat), 0);
9583 rtx op1 = XEXP (SET_SRC (pat), 1);
9584 if (GET_CODE (op1) == CONST_INT)
9585 return INTVAL (op1) >= 0;
9586 if (GET_CODE (op0) != REG)
9587 return 0;
9588 if (sparc_check_64 (op0, insn) == 1)
9589 return 1;
9590 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9591 }
9592 case IOR:
9593 case XOR:
9594 {
9595 rtx op0 = XEXP (SET_SRC (pat), 0);
9596 rtx op1 = XEXP (SET_SRC (pat), 1);
9597 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9598 return 0;
9599 if (GET_CODE (op1) == CONST_INT)
9600 return INTVAL (op1) >= 0;
9601 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9602 }
9603 case LSHIFTRT:
9604 return GET_MODE (SET_SRC (pat)) == SImode;
9605 /* Positive integers leave the high bits zero. */
9606 case CONST_DOUBLE:
9607 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9608 case CONST_INT:
9609 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9610 case ASHIFTRT:
9611 case SIGN_EXTEND:
9612 return - (GET_MODE (SET_SRC (pat)) == SImode);
9613 case REG:
9614 return sparc_check_64 (SET_SRC (pat), insn);
9615 default:
9616 return 0;
9617 }
9618 }
9619
9620 /* We _ought_ to have only one kind per function, but... */
9621 static GTY(()) rtx sparc_addr_diff_list;
9622 static GTY(()) rtx sparc_addr_list;
9623
9624 void
9625 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9626 {
9627 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9628 if (diff)
9629 sparc_addr_diff_list
9630 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9631 else
9632 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9633 }
9634
9635 static void
9636 sparc_output_addr_vec (rtx vec)
9637 {
9638 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9639 int idx, vlen = XVECLEN (body, 0);
9640
9641 #ifdef ASM_OUTPUT_ADDR_VEC_START
9642 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9643 #endif
9644
9645 #ifdef ASM_OUTPUT_CASE_LABEL
9646 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9647 NEXT_INSN (lab));
9648 #else
9649 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9650 #endif
9651
9652 for (idx = 0; idx < vlen; idx++)
9653 {
9654 ASM_OUTPUT_ADDR_VEC_ELT
9655 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9656 }
9657
9658 #ifdef ASM_OUTPUT_ADDR_VEC_END
9659 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9660 #endif
9661 }
9662
9663 static void
9664 sparc_output_addr_diff_vec (rtx vec)
9665 {
9666 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9667 rtx base = XEXP (XEXP (body, 0), 0);
9668 int idx, vlen = XVECLEN (body, 1);
9669
9670 #ifdef ASM_OUTPUT_ADDR_VEC_START
9671 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9672 #endif
9673
9674 #ifdef ASM_OUTPUT_CASE_LABEL
9675 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9676 NEXT_INSN (lab));
9677 #else
9678 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9679 #endif
9680
9681 for (idx = 0; idx < vlen; idx++)
9682 {
9683 ASM_OUTPUT_ADDR_DIFF_ELT
9684 (asm_out_file,
9685 body,
9686 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9687 CODE_LABEL_NUMBER (base));
9688 }
9689
9690 #ifdef ASM_OUTPUT_ADDR_VEC_END
9691 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9692 #endif
9693 }
9694
9695 static void
9696 sparc_output_deferred_case_vectors (void)
9697 {
9698 rtx t;
9699 int align;
9700
9701 if (sparc_addr_list == NULL_RTX
9702 && sparc_addr_diff_list == NULL_RTX)
9703 return;
9704
9705 /* Align to cache line in the function's code section. */
9706 switch_to_section (current_function_section ());
9707
9708 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9709 if (align > 0)
9710 ASM_OUTPUT_ALIGN (asm_out_file, align);
9711
9712 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9713 sparc_output_addr_vec (XEXP (t, 0));
9714 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9715 sparc_output_addr_diff_vec (XEXP (t, 0));
9716
9717 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9718 }
9719
9720 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9721 unknown. Return 1 if the high bits are zero, -1 if the register is
9722 sign extended. */
9723 int
9724 sparc_check_64 (rtx x, rtx_insn *insn)
9725 {
9726 /* If a register is set only once it is safe to ignore insns this
9727 code does not know how to handle. The loop will either recognize
9728 the single set and return the correct value or fail to recognize
9729 it and return 0. */
9730 int set_once = 0;
9731 rtx y = x;
9732
9733 gcc_assert (GET_CODE (x) == REG);
9734
9735 if (GET_MODE (x) == DImode)
9736 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9737
9738 if (flag_expensive_optimizations
9739 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9740 set_once = 1;
9741
9742 if (insn == 0)
9743 {
9744 if (set_once)
9745 insn = get_last_insn_anywhere ();
9746 else
9747 return 0;
9748 }
9749
9750 while ((insn = PREV_INSN (insn)))
9751 {
9752 switch (GET_CODE (insn))
9753 {
9754 case JUMP_INSN:
9755 case NOTE:
9756 break;
9757 case CODE_LABEL:
9758 case CALL_INSN:
9759 default:
9760 if (! set_once)
9761 return 0;
9762 break;
9763 case INSN:
9764 {
9765 rtx pat = PATTERN (insn);
9766 if (GET_CODE (pat) != SET)
9767 return 0;
9768 if (rtx_equal_p (x, SET_DEST (pat)))
9769 return set_extends (insn);
9770 if (y && rtx_equal_p (y, SET_DEST (pat)))
9771 return set_extends (insn);
9772 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9773 return 0;
9774 }
9775 }
9776 }
9777 return 0;
9778 }
9779
9780 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9781 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9782
9783 const char *
9784 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9785 {
9786 static char asm_code[60];
9787
9788 /* The scratch register is only required when the destination
9789 register is not a 64-bit global or out register. */
9790 if (which_alternative != 2)
9791 operands[3] = operands[0];
9792
9793 /* We can only shift by constants <= 63. */
9794 if (GET_CODE (operands[2]) == CONST_INT)
9795 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9796
9797 if (GET_CODE (operands[1]) == CONST_INT)
9798 {
9799 output_asm_insn ("mov\t%1, %3", operands);
9800 }
9801 else
9802 {
9803 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9804 if (sparc_check_64 (operands[1], insn) <= 0)
9805 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9806 output_asm_insn ("or\t%L1, %3, %3", operands);
9807 }
9808
9809 strcpy (asm_code, opcode);
9810
9811 if (which_alternative != 2)
9812 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9813 else
9814 return
9815 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9816 }
9817 \f
9818 /* Output rtl to increment the profiler label LABELNO
9819 for profiling a function entry. */
9820
9821 void
9822 sparc_profile_hook (int labelno)
9823 {
9824 char buf[32];
9825 rtx lab, fun;
9826
9827 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9828 if (NO_PROFILE_COUNTERS)
9829 {
9830 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9831 }
9832 else
9833 {
9834 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9835 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9836 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9837 }
9838 }
9839 \f
9840 #ifdef TARGET_SOLARIS
9841 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9842
9843 static void
9844 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9845 tree decl ATTRIBUTE_UNUSED)
9846 {
9847 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9848 {
9849 solaris_elf_asm_comdat_section (name, flags, decl);
9850 return;
9851 }
9852
9853 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9854
9855 if (!(flags & SECTION_DEBUG))
9856 fputs (",#alloc", asm_out_file);
9857 if (flags & SECTION_WRITE)
9858 fputs (",#write", asm_out_file);
9859 if (flags & SECTION_TLS)
9860 fputs (",#tls", asm_out_file);
9861 if (flags & SECTION_CODE)
9862 fputs (",#execinstr", asm_out_file);
9863
9864 if (flags & SECTION_NOTYPE)
9865 ;
9866 else if (flags & SECTION_BSS)
9867 fputs (",#nobits", asm_out_file);
9868 else
9869 fputs (",#progbits", asm_out_file);
9870
9871 fputc ('\n', asm_out_file);
9872 }
9873 #endif /* TARGET_SOLARIS */
9874
9875 /* We do not allow indirect calls to be optimized into sibling calls.
9876
9877 We cannot use sibling calls when delayed branches are disabled
9878 because they will likely require the call delay slot to be filled.
9879
9880 Also, on SPARC 32-bit we cannot emit a sibling call when the
9881 current function returns a structure. This is because the "unimp
9882 after call" convention would cause the callee to return to the
9883 wrong place. The generic code already disallows cases where the
9884 function being called returns a structure.
9885
9886 It may seem strange how this last case could occur. Usually there
9887 is code after the call which jumps to epilogue code which dumps the
9888 return value into the struct return area. That ought to invalidate
9889 the sibling call right? Well, in the C++ case we can end up passing
9890 the pointer to the struct return area to a constructor (which returns
9891 void) and then nothing else happens. Such a sibling call would look
9892 valid without the added check here.
9893
9894 VxWorks PIC PLT entries require the global pointer to be initialized
9895 on entry. We therefore can't emit sibling calls to them. */
9896 static bool
9897 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9898 {
9899 return (decl
9900 && flag_delayed_branch
9901 && (TARGET_ARCH64 || ! cfun->returns_struct)
9902 && !(TARGET_VXWORKS_RTP
9903 && flag_pic
9904 && !targetm.binds_local_p (decl)));
9905 }
9906 \f
9907 /* libfunc renaming. */
9908
9909 static void
9910 sparc_init_libfuncs (void)
9911 {
9912 if (TARGET_ARCH32)
9913 {
9914 /* Use the subroutines that Sun's library provides for integer
9915 multiply and divide. The `*' prevents an underscore from
9916 being prepended by the compiler. .umul is a little faster
9917 than .mul. */
9918 set_optab_libfunc (smul_optab, SImode, "*.umul");
9919 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9920 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9921 set_optab_libfunc (smod_optab, SImode, "*.rem");
9922 set_optab_libfunc (umod_optab, SImode, "*.urem");
9923
9924 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9925 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9926 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9927 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9928 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9929 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9930
9931 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9932 is because with soft-float, the SFmode and DFmode sqrt
9933 instructions will be absent, and the compiler will notice and
9934 try to use the TFmode sqrt instruction for calls to the
9935 builtin function sqrt, but this fails. */
9936 if (TARGET_FPU)
9937 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9938
9939 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9940 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9941 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9942 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9943 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9944 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9945
9946 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9947 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9948 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9949 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9950
9951 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9952 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9953 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9954 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9955
9956 if (DITF_CONVERSION_LIBFUNCS)
9957 {
9958 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9959 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9960 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9961 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9962 }
9963
9964 if (SUN_CONVERSION_LIBFUNCS)
9965 {
9966 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9967 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9968 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9969 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9970 }
9971 }
9972 if (TARGET_ARCH64)
9973 {
9974 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9975 do not exist in the library. Make sure the compiler does not
9976 emit calls to them by accident. (It should always use the
9977 hardware instructions.) */
9978 set_optab_libfunc (smul_optab, SImode, 0);
9979 set_optab_libfunc (sdiv_optab, SImode, 0);
9980 set_optab_libfunc (udiv_optab, SImode, 0);
9981 set_optab_libfunc (smod_optab, SImode, 0);
9982 set_optab_libfunc (umod_optab, SImode, 0);
9983
9984 if (SUN_INTEGER_MULTIPLY_64)
9985 {
9986 set_optab_libfunc (smul_optab, DImode, "__mul64");
9987 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9988 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9989 set_optab_libfunc (smod_optab, DImode, "__rem64");
9990 set_optab_libfunc (umod_optab, DImode, "__urem64");
9991 }
9992
9993 if (SUN_CONVERSION_LIBFUNCS)
9994 {
9995 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
9996 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
9997 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
9998 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
9999 }
10000 }
10001 }
10002 \f
10003 /* SPARC builtins. */
10004 enum sparc_builtins
10005 {
10006 /* FPU builtins. */
10007 SPARC_BUILTIN_LDFSR,
10008 SPARC_BUILTIN_STFSR,
10009
10010 /* VIS 1.0 builtins. */
10011 SPARC_BUILTIN_FPACK16,
10012 SPARC_BUILTIN_FPACK32,
10013 SPARC_BUILTIN_FPACKFIX,
10014 SPARC_BUILTIN_FEXPAND,
10015 SPARC_BUILTIN_FPMERGE,
10016 SPARC_BUILTIN_FMUL8X16,
10017 SPARC_BUILTIN_FMUL8X16AU,
10018 SPARC_BUILTIN_FMUL8X16AL,
10019 SPARC_BUILTIN_FMUL8SUX16,
10020 SPARC_BUILTIN_FMUL8ULX16,
10021 SPARC_BUILTIN_FMULD8SUX16,
10022 SPARC_BUILTIN_FMULD8ULX16,
10023 SPARC_BUILTIN_FALIGNDATAV4HI,
10024 SPARC_BUILTIN_FALIGNDATAV8QI,
10025 SPARC_BUILTIN_FALIGNDATAV2SI,
10026 SPARC_BUILTIN_FALIGNDATADI,
10027 SPARC_BUILTIN_WRGSR,
10028 SPARC_BUILTIN_RDGSR,
10029 SPARC_BUILTIN_ALIGNADDR,
10030 SPARC_BUILTIN_ALIGNADDRL,
10031 SPARC_BUILTIN_PDIST,
10032 SPARC_BUILTIN_EDGE8,
10033 SPARC_BUILTIN_EDGE8L,
10034 SPARC_BUILTIN_EDGE16,
10035 SPARC_BUILTIN_EDGE16L,
10036 SPARC_BUILTIN_EDGE32,
10037 SPARC_BUILTIN_EDGE32L,
10038 SPARC_BUILTIN_FCMPLE16,
10039 SPARC_BUILTIN_FCMPLE32,
10040 SPARC_BUILTIN_FCMPNE16,
10041 SPARC_BUILTIN_FCMPNE32,
10042 SPARC_BUILTIN_FCMPGT16,
10043 SPARC_BUILTIN_FCMPGT32,
10044 SPARC_BUILTIN_FCMPEQ16,
10045 SPARC_BUILTIN_FCMPEQ32,
10046 SPARC_BUILTIN_FPADD16,
10047 SPARC_BUILTIN_FPADD16S,
10048 SPARC_BUILTIN_FPADD32,
10049 SPARC_BUILTIN_FPADD32S,
10050 SPARC_BUILTIN_FPSUB16,
10051 SPARC_BUILTIN_FPSUB16S,
10052 SPARC_BUILTIN_FPSUB32,
10053 SPARC_BUILTIN_FPSUB32S,
10054 SPARC_BUILTIN_ARRAY8,
10055 SPARC_BUILTIN_ARRAY16,
10056 SPARC_BUILTIN_ARRAY32,
10057
10058 /* VIS 2.0 builtins. */
10059 SPARC_BUILTIN_EDGE8N,
10060 SPARC_BUILTIN_EDGE8LN,
10061 SPARC_BUILTIN_EDGE16N,
10062 SPARC_BUILTIN_EDGE16LN,
10063 SPARC_BUILTIN_EDGE32N,
10064 SPARC_BUILTIN_EDGE32LN,
10065 SPARC_BUILTIN_BMASK,
10066 SPARC_BUILTIN_BSHUFFLEV4HI,
10067 SPARC_BUILTIN_BSHUFFLEV8QI,
10068 SPARC_BUILTIN_BSHUFFLEV2SI,
10069 SPARC_BUILTIN_BSHUFFLEDI,
10070
10071 /* VIS 3.0 builtins. */
10072 SPARC_BUILTIN_CMASK8,
10073 SPARC_BUILTIN_CMASK16,
10074 SPARC_BUILTIN_CMASK32,
10075 SPARC_BUILTIN_FCHKSM16,
10076 SPARC_BUILTIN_FSLL16,
10077 SPARC_BUILTIN_FSLAS16,
10078 SPARC_BUILTIN_FSRL16,
10079 SPARC_BUILTIN_FSRA16,
10080 SPARC_BUILTIN_FSLL32,
10081 SPARC_BUILTIN_FSLAS32,
10082 SPARC_BUILTIN_FSRL32,
10083 SPARC_BUILTIN_FSRA32,
10084 SPARC_BUILTIN_PDISTN,
10085 SPARC_BUILTIN_FMEAN16,
10086 SPARC_BUILTIN_FPADD64,
10087 SPARC_BUILTIN_FPSUB64,
10088 SPARC_BUILTIN_FPADDS16,
10089 SPARC_BUILTIN_FPADDS16S,
10090 SPARC_BUILTIN_FPSUBS16,
10091 SPARC_BUILTIN_FPSUBS16S,
10092 SPARC_BUILTIN_FPADDS32,
10093 SPARC_BUILTIN_FPADDS32S,
10094 SPARC_BUILTIN_FPSUBS32,
10095 SPARC_BUILTIN_FPSUBS32S,
10096 SPARC_BUILTIN_FUCMPLE8,
10097 SPARC_BUILTIN_FUCMPNE8,
10098 SPARC_BUILTIN_FUCMPGT8,
10099 SPARC_BUILTIN_FUCMPEQ8,
10100 SPARC_BUILTIN_FHADDS,
10101 SPARC_BUILTIN_FHADDD,
10102 SPARC_BUILTIN_FHSUBS,
10103 SPARC_BUILTIN_FHSUBD,
10104 SPARC_BUILTIN_FNHADDS,
10105 SPARC_BUILTIN_FNHADDD,
10106 SPARC_BUILTIN_UMULXHI,
10107 SPARC_BUILTIN_XMULX,
10108 SPARC_BUILTIN_XMULXHI,
10109
10110 SPARC_BUILTIN_MAX
10111 };
10112
10113 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10114 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10115
10116 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10117 function decl or NULL_TREE if the builtin was not added. */
10118
10119 static tree
10120 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10121 tree type)
10122 {
10123 tree t
10124 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10125
10126 if (t)
10127 {
10128 sparc_builtins[code] = t;
10129 sparc_builtins_icode[code] = icode;
10130 }
10131
10132 return t;
10133 }
10134
10135 /* Likewise, but also marks the function as "const". */
10136
10137 static tree
10138 def_builtin_const (const char *name, enum insn_code icode,
10139 enum sparc_builtins code, tree type)
10140 {
10141 tree t = def_builtin (name, icode, code, type);
10142
10143 if (t)
10144 TREE_READONLY (t) = 1;
10145
10146 return t;
10147 }
10148
10149 /* Implement the TARGET_INIT_BUILTINS target hook.
10150 Create builtin functions for special SPARC instructions. */
10151
10152 static void
10153 sparc_init_builtins (void)
10154 {
10155 if (TARGET_FPU)
10156 sparc_fpu_init_builtins ();
10157
10158 if (TARGET_VIS)
10159 sparc_vis_init_builtins ();
10160 }
10161
10162 /* Create builtin functions for FPU instructions. */
10163
10164 static void
10165 sparc_fpu_init_builtins (void)
10166 {
10167 tree ftype
10168 = build_function_type_list (void_type_node,
10169 build_pointer_type (unsigned_type_node), 0);
10170 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10171 SPARC_BUILTIN_LDFSR, ftype);
10172 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10173 SPARC_BUILTIN_STFSR, ftype);
10174 }
10175
10176 /* Create builtin functions for VIS instructions. */
10177
10178 static void
10179 sparc_vis_init_builtins (void)
10180 {
10181 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10182 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10183 tree v4hi = build_vector_type (intHI_type_node, 4);
10184 tree v2hi = build_vector_type (intHI_type_node, 2);
10185 tree v2si = build_vector_type (intSI_type_node, 2);
10186 tree v1si = build_vector_type (intSI_type_node, 1);
10187
10188 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10189 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10190 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10191 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10192 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10193 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10194 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10195 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10196 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10197 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10198 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10199 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10200 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10201 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10202 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10203 v8qi, v8qi,
10204 intDI_type_node, 0);
10205 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10206 v8qi, v8qi, 0);
10207 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10208 v8qi, v8qi, 0);
10209 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10210 intDI_type_node,
10211 intDI_type_node, 0);
10212 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10213 intSI_type_node,
10214 intSI_type_node, 0);
10215 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10216 ptr_type_node,
10217 intSI_type_node, 0);
10218 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10219 ptr_type_node,
10220 intDI_type_node, 0);
10221 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10222 ptr_type_node,
10223 ptr_type_node, 0);
10224 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10225 ptr_type_node,
10226 ptr_type_node, 0);
10227 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10228 v4hi, v4hi, 0);
10229 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10230 v2si, v2si, 0);
10231 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10232 v4hi, v4hi, 0);
10233 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10234 v2si, v2si, 0);
10235 tree void_ftype_di = build_function_type_list (void_type_node,
10236 intDI_type_node, 0);
10237 tree di_ftype_void = build_function_type_list (intDI_type_node,
10238 void_type_node, 0);
10239 tree void_ftype_si = build_function_type_list (void_type_node,
10240 intSI_type_node, 0);
10241 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10242 float_type_node,
10243 float_type_node, 0);
10244 tree df_ftype_df_df = build_function_type_list (double_type_node,
10245 double_type_node,
10246 double_type_node, 0);
10247
10248 /* Packing and expanding vectors. */
10249 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10250 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10251 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10252 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10253 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10254 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10255 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10256 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10257 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10258 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10259
10260 /* Multiplications. */
10261 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10262 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10263 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10264 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10265 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10266 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10267 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10268 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10269 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10270 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10271 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10272 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10273 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10274 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10275
10276 /* Data aligning. */
10277 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10278 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10279 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10280 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10281 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10282 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10283 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10284 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10285
10286 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10287 SPARC_BUILTIN_WRGSR, void_ftype_di);
10288 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10289 SPARC_BUILTIN_RDGSR, di_ftype_void);
10290
10291 if (TARGET_ARCH64)
10292 {
10293 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10294 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10295 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10296 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10297 }
10298 else
10299 {
10300 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10301 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10302 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10303 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10304 }
10305
10306 /* Pixel distance. */
10307 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10308 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10309
10310 /* Edge handling. */
10311 if (TARGET_ARCH64)
10312 {
10313 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10314 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10315 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10316 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10317 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10318 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10319 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10320 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10321 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10322 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10323 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10324 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10325 }
10326 else
10327 {
10328 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10329 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10330 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10331 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10332 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10333 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10334 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10335 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10336 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10337 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10338 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10339 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10340 }
10341
10342 /* Pixel compare. */
10343 if (TARGET_ARCH64)
10344 {
10345 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10346 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10347 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10348 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10349 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10350 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10351 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10352 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10353 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10354 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10355 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10356 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10357 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10358 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10359 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10360 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10361 }
10362 else
10363 {
10364 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10365 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10366 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10367 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10368 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10369 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10370 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10371 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10372 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10373 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10374 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10375 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10376 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10377 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10378 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10379 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10380 }
10381
10382 /* Addition and subtraction. */
10383 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10384 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10385 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10386 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10387 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10388 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10389 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10390 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10391 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10392 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10393 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10394 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10395 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10396 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10397 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10398 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10399
10400 /* Three-dimensional array addressing. */
10401 if (TARGET_ARCH64)
10402 {
10403 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10404 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10405 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10406 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10407 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10408 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10409 }
10410 else
10411 {
10412 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10413 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10414 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10415 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10416 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10417 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10418 }
10419
10420 if (TARGET_VIS2)
10421 {
10422 /* Edge handling. */
10423 if (TARGET_ARCH64)
10424 {
10425 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10426 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10427 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10428 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10429 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10430 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10431 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10432 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10433 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10434 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10435 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10436 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10437 }
10438 else
10439 {
10440 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10441 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10442 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10443 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10444 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10445 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10446 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10447 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10448 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10449 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10450 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10451 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10452 }
10453
10454 /* Byte mask and shuffle. */
10455 if (TARGET_ARCH64)
10456 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10457 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10458 else
10459 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10460 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10461 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10462 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10463 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10464 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10465 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10466 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10467 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10468 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10469 }
10470
10471 if (TARGET_VIS3)
10472 {
10473 if (TARGET_ARCH64)
10474 {
10475 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10476 SPARC_BUILTIN_CMASK8, void_ftype_di);
10477 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10478 SPARC_BUILTIN_CMASK16, void_ftype_di);
10479 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10480 SPARC_BUILTIN_CMASK32, void_ftype_di);
10481 }
10482 else
10483 {
10484 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10485 SPARC_BUILTIN_CMASK8, void_ftype_si);
10486 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10487 SPARC_BUILTIN_CMASK16, void_ftype_si);
10488 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10489 SPARC_BUILTIN_CMASK32, void_ftype_si);
10490 }
10491
10492 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10493 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10494
10495 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10496 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10497 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10498 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10499 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10500 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10501 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10502 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10503 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10504 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10505 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10506 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10507 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10508 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10509 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10510 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10511
10512 if (TARGET_ARCH64)
10513 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10514 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10515 else
10516 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10517 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10518
10519 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10520 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10521 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10522 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10523 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10524 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10525
10526 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10527 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10528 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10529 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10530 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10531 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10532 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10533 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10534 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10535 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10536 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10537 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10538 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10539 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10540 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10541 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10542
10543 if (TARGET_ARCH64)
10544 {
10545 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10546 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10547 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10548 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10549 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10550 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10551 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10552 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10553 }
10554 else
10555 {
10556 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10557 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10558 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10559 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10560 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10561 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10562 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10563 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10564 }
10565
10566 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10567 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10568 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10569 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10570 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10571 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10572 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10573 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10574 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10575 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10576 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10577 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10578
10579 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10580 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10581 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10582 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10583 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10584 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10585 }
10586 }
10587
10588 /* Implement TARGET_BUILTIN_DECL hook. */
10589
10590 static tree
10591 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10592 {
10593 if (code >= SPARC_BUILTIN_MAX)
10594 return error_mark_node;
10595
10596 return sparc_builtins[code];
10597 }
10598
10599 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10600
10601 static rtx
10602 sparc_expand_builtin (tree exp, rtx target,
10603 rtx subtarget ATTRIBUTE_UNUSED,
10604 machine_mode tmode ATTRIBUTE_UNUSED,
10605 int ignore ATTRIBUTE_UNUSED)
10606 {
10607 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10608 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10609 enum insn_code icode = sparc_builtins_icode[code];
10610 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10611 call_expr_arg_iterator iter;
10612 int arg_count = 0;
10613 rtx pat, op[4];
10614 tree arg;
10615
10616 if (nonvoid)
10617 {
10618 machine_mode tmode = insn_data[icode].operand[0].mode;
10619 if (!target
10620 || GET_MODE (target) != tmode
10621 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10622 op[0] = gen_reg_rtx (tmode);
10623 else
10624 op[0] = target;
10625 }
10626
10627 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10628 {
10629 const struct insn_operand_data *insn_op;
10630 int idx;
10631
10632 if (arg == error_mark_node)
10633 return NULL_RTX;
10634
10635 arg_count++;
10636 idx = arg_count - !nonvoid;
10637 insn_op = &insn_data[icode].operand[idx];
10638 op[arg_count] = expand_normal (arg);
10639
10640 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10641 {
10642 if (!address_operand (op[arg_count], SImode))
10643 {
10644 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10645 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10646 }
10647 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10648 }
10649
10650 else if (insn_op->mode == V1DImode
10651 && GET_MODE (op[arg_count]) == DImode)
10652 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10653
10654 else if (insn_op->mode == V1SImode
10655 && GET_MODE (op[arg_count]) == SImode)
10656 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10657
10658 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10659 insn_op->mode))
10660 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10661 }
10662
10663 switch (arg_count)
10664 {
10665 case 0:
10666 pat = GEN_FCN (icode) (op[0]);
10667 break;
10668 case 1:
10669 if (nonvoid)
10670 pat = GEN_FCN (icode) (op[0], op[1]);
10671 else
10672 pat = GEN_FCN (icode) (op[1]);
10673 break;
10674 case 2:
10675 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10676 break;
10677 case 3:
10678 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10679 break;
10680 default:
10681 gcc_unreachable ();
10682 }
10683
10684 if (!pat)
10685 return NULL_RTX;
10686
10687 emit_insn (pat);
10688
10689 return (nonvoid ? op[0] : const0_rtx);
10690 }
10691
10692 /* Return the upper 16 bits of the 8x16 multiplication. */
10693
10694 static int
10695 sparc_vis_mul8x16 (int e8, int e16)
10696 {
10697 return (e8 * e16 + 128) / 256;
10698 }
10699
10700 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10701 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10702
10703 static void
10704 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10705 tree inner_type, tree cst0, tree cst1)
10706 {
10707 unsigned i, num = VECTOR_CST_NELTS (cst0);
10708 int scale;
10709
10710 switch (fncode)
10711 {
10712 case SPARC_BUILTIN_FMUL8X16:
10713 for (i = 0; i < num; ++i)
10714 {
10715 int val
10716 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10717 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10718 n_elts[i] = build_int_cst (inner_type, val);
10719 }
10720 break;
10721
10722 case SPARC_BUILTIN_FMUL8X16AU:
10723 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10724
10725 for (i = 0; i < num; ++i)
10726 {
10727 int val
10728 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10729 scale);
10730 n_elts[i] = build_int_cst (inner_type, val);
10731 }
10732 break;
10733
10734 case SPARC_BUILTIN_FMUL8X16AL:
10735 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10736
10737 for (i = 0; i < num; ++i)
10738 {
10739 int val
10740 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10741 scale);
10742 n_elts[i] = build_int_cst (inner_type, val);
10743 }
10744 break;
10745
10746 default:
10747 gcc_unreachable ();
10748 }
10749 }
10750
10751 /* Implement TARGET_FOLD_BUILTIN hook.
10752
10753 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10754 result of the function call is ignored. NULL_TREE is returned if the
10755 function could not be folded. */
10756
10757 static tree
10758 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10759 tree *args, bool ignore)
10760 {
10761 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10762 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10763 tree arg0, arg1, arg2;
10764
10765 if (ignore)
10766 switch (code)
10767 {
10768 case SPARC_BUILTIN_LDFSR:
10769 case SPARC_BUILTIN_STFSR:
10770 case SPARC_BUILTIN_ALIGNADDR:
10771 case SPARC_BUILTIN_WRGSR:
10772 case SPARC_BUILTIN_BMASK:
10773 case SPARC_BUILTIN_CMASK8:
10774 case SPARC_BUILTIN_CMASK16:
10775 case SPARC_BUILTIN_CMASK32:
10776 break;
10777
10778 default:
10779 return build_zero_cst (rtype);
10780 }
10781
10782 switch (code)
10783 {
10784 case SPARC_BUILTIN_FEXPAND:
10785 arg0 = args[0];
10786 STRIP_NOPS (arg0);
10787
10788 if (TREE_CODE (arg0) == VECTOR_CST)
10789 {
10790 tree inner_type = TREE_TYPE (rtype);
10791 tree *n_elts;
10792 unsigned i;
10793
10794 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10795 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10796 n_elts[i] = build_int_cst (inner_type,
10797 TREE_INT_CST_LOW
10798 (VECTOR_CST_ELT (arg0, i)) << 4);
10799 return build_vector (rtype, n_elts);
10800 }
10801 break;
10802
10803 case SPARC_BUILTIN_FMUL8X16:
10804 case SPARC_BUILTIN_FMUL8X16AU:
10805 case SPARC_BUILTIN_FMUL8X16AL:
10806 arg0 = args[0];
10807 arg1 = args[1];
10808 STRIP_NOPS (arg0);
10809 STRIP_NOPS (arg1);
10810
10811 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10812 {
10813 tree inner_type = TREE_TYPE (rtype);
10814 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10815 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10816 return build_vector (rtype, n_elts);
10817 }
10818 break;
10819
10820 case SPARC_BUILTIN_FPMERGE:
10821 arg0 = args[0];
10822 arg1 = args[1];
10823 STRIP_NOPS (arg0);
10824 STRIP_NOPS (arg1);
10825
10826 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10827 {
10828 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10829 unsigned i;
10830 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10831 {
10832 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10833 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10834 }
10835
10836 return build_vector (rtype, n_elts);
10837 }
10838 break;
10839
10840 case SPARC_BUILTIN_PDIST:
10841 case SPARC_BUILTIN_PDISTN:
10842 arg0 = args[0];
10843 arg1 = args[1];
10844 STRIP_NOPS (arg0);
10845 STRIP_NOPS (arg1);
10846 if (code == SPARC_BUILTIN_PDIST)
10847 {
10848 arg2 = args[2];
10849 STRIP_NOPS (arg2);
10850 }
10851 else
10852 arg2 = integer_zero_node;
10853
10854 if (TREE_CODE (arg0) == VECTOR_CST
10855 && TREE_CODE (arg1) == VECTOR_CST
10856 && TREE_CODE (arg2) == INTEGER_CST)
10857 {
10858 bool overflow = false;
10859 widest_int result = wi::to_widest (arg2);
10860 widest_int tmp;
10861 unsigned i;
10862
10863 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10864 {
10865 tree e0 = VECTOR_CST_ELT (arg0, i);
10866 tree e1 = VECTOR_CST_ELT (arg1, i);
10867
10868 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10869
10870 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
10871 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
10872 if (wi::neg_p (tmp))
10873 tmp = wi::neg (tmp, &neg2_ovf);
10874 else
10875 neg2_ovf = false;
10876 result = wi::add (result, tmp, SIGNED, &add2_ovf);
10877 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10878 }
10879
10880 gcc_assert (!overflow);
10881
10882 return wide_int_to_tree (rtype, result);
10883 }
10884
10885 default:
10886 break;
10887 }
10888
10889 return NULL_TREE;
10890 }
10891 \f
10892 /* ??? This duplicates information provided to the compiler by the
10893 ??? scheduler description. Some day, teach genautomata to output
10894 ??? the latencies and then CSE will just use that. */
10895
10896 static bool
10897 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
10898 int opno ATTRIBUTE_UNUSED,
10899 int *total, bool speed ATTRIBUTE_UNUSED)
10900 {
10901 int code = GET_CODE (x);
10902 bool float_mode_p = FLOAT_MODE_P (mode);
10903
10904 switch (code)
10905 {
10906 case CONST_INT:
10907 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10908 {
10909 *total = 0;
10910 return true;
10911 }
10912 /* FALLTHRU */
10913
10914 case HIGH:
10915 *total = 2;
10916 return true;
10917
10918 case CONST:
10919 case LABEL_REF:
10920 case SYMBOL_REF:
10921 *total = 4;
10922 return true;
10923
10924 case CONST_DOUBLE:
10925 if (mode == VOIDmode
10926 && ((CONST_DOUBLE_HIGH (x) == 0
10927 && CONST_DOUBLE_LOW (x) < 0x1000)
10928 || (CONST_DOUBLE_HIGH (x) == -1
10929 && CONST_DOUBLE_LOW (x) < 0
10930 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10931 *total = 0;
10932 else
10933 *total = 8;
10934 return true;
10935
10936 case MEM:
10937 /* If outer-code was a sign or zero extension, a cost
10938 of COSTS_N_INSNS (1) was already added in. This is
10939 why we are subtracting it back out. */
10940 if (outer_code == ZERO_EXTEND)
10941 {
10942 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10943 }
10944 else if (outer_code == SIGN_EXTEND)
10945 {
10946 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
10947 }
10948 else if (float_mode_p)
10949 {
10950 *total = sparc_costs->float_load;
10951 }
10952 else
10953 {
10954 *total = sparc_costs->int_load;
10955 }
10956
10957 return true;
10958
10959 case PLUS:
10960 case MINUS:
10961 if (float_mode_p)
10962 *total = sparc_costs->float_plusminus;
10963 else
10964 *total = COSTS_N_INSNS (1);
10965 return false;
10966
10967 case FMA:
10968 {
10969 rtx sub;
10970
10971 gcc_assert (float_mode_p);
10972 *total = sparc_costs->float_mul;
10973
10974 sub = XEXP (x, 0);
10975 if (GET_CODE (sub) == NEG)
10976 sub = XEXP (sub, 0);
10977 *total += rtx_cost (sub, mode, FMA, 0, speed);
10978
10979 sub = XEXP (x, 2);
10980 if (GET_CODE (sub) == NEG)
10981 sub = XEXP (sub, 0);
10982 *total += rtx_cost (sub, mode, FMA, 2, speed);
10983 return true;
10984 }
10985
10986 case MULT:
10987 if (float_mode_p)
10988 *total = sparc_costs->float_mul;
10989 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
10990 *total = COSTS_N_INSNS (25);
10991 else
10992 {
10993 int bit_cost;
10994
10995 bit_cost = 0;
10996 if (sparc_costs->int_mul_bit_factor)
10997 {
10998 int nbits;
10999
11000 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11001 {
11002 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11003 for (nbits = 0; value != 0; value &= value - 1)
11004 nbits++;
11005 }
11006 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
11007 && GET_MODE (XEXP (x, 1)) == VOIDmode)
11008 {
11009 rtx x1 = XEXP (x, 1);
11010 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
11011 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
11012
11013 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
11014 nbits++;
11015 for (; value2 != 0; value2 &= value2 - 1)
11016 nbits++;
11017 }
11018 else
11019 nbits = 7;
11020
11021 if (nbits < 3)
11022 nbits = 3;
11023 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11024 bit_cost = COSTS_N_INSNS (bit_cost);
11025 }
11026
11027 if (mode == DImode || !TARGET_HARD_MUL)
11028 *total = sparc_costs->int_mulX + bit_cost;
11029 else
11030 *total = sparc_costs->int_mul + bit_cost;
11031 }
11032 return false;
11033
11034 case ASHIFT:
11035 case ASHIFTRT:
11036 case LSHIFTRT:
11037 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11038 return false;
11039
11040 case DIV:
11041 case UDIV:
11042 case MOD:
11043 case UMOD:
11044 if (float_mode_p)
11045 {
11046 if (mode == DFmode)
11047 *total = sparc_costs->float_div_df;
11048 else
11049 *total = sparc_costs->float_div_sf;
11050 }
11051 else
11052 {
11053 if (mode == DImode)
11054 *total = sparc_costs->int_divX;
11055 else
11056 *total = sparc_costs->int_div;
11057 }
11058 return false;
11059
11060 case NEG:
11061 if (! float_mode_p)
11062 {
11063 *total = COSTS_N_INSNS (1);
11064 return false;
11065 }
11066 /* FALLTHRU */
11067
11068 case ABS:
11069 case FLOAT:
11070 case UNSIGNED_FLOAT:
11071 case FIX:
11072 case UNSIGNED_FIX:
11073 case FLOAT_EXTEND:
11074 case FLOAT_TRUNCATE:
11075 *total = sparc_costs->float_move;
11076 return false;
11077
11078 case SQRT:
11079 if (mode == DFmode)
11080 *total = sparc_costs->float_sqrt_df;
11081 else
11082 *total = sparc_costs->float_sqrt_sf;
11083 return false;
11084
11085 case COMPARE:
11086 if (float_mode_p)
11087 *total = sparc_costs->float_cmp;
11088 else
11089 *total = COSTS_N_INSNS (1);
11090 return false;
11091
11092 case IF_THEN_ELSE:
11093 if (float_mode_p)
11094 *total = sparc_costs->float_cmove;
11095 else
11096 *total = sparc_costs->int_cmove;
11097 return false;
11098
11099 case IOR:
11100 /* Handle the NAND vector patterns. */
11101 if (sparc_vector_mode_supported_p (mode)
11102 && GET_CODE (XEXP (x, 0)) == NOT
11103 && GET_CODE (XEXP (x, 1)) == NOT)
11104 {
11105 *total = COSTS_N_INSNS (1);
11106 return true;
11107 }
11108 else
11109 return false;
11110
11111 default:
11112 return false;
11113 }
11114 }
11115
11116 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11117
11118 static inline bool
11119 general_or_i64_p (reg_class_t rclass)
11120 {
11121 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11122 }
11123
11124 /* Implement TARGET_REGISTER_MOVE_COST. */
11125
11126 static int
11127 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11128 reg_class_t from, reg_class_t to)
11129 {
11130 bool need_memory = false;
11131
11132 if (from == FPCC_REGS || to == FPCC_REGS)
11133 need_memory = true;
11134 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11135 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11136 {
11137 if (TARGET_VIS3)
11138 {
11139 int size = GET_MODE_SIZE (mode);
11140 if (size == 8 || size == 4)
11141 {
11142 if (! TARGET_ARCH32 || size == 4)
11143 return 4;
11144 else
11145 return 6;
11146 }
11147 }
11148 need_memory = true;
11149 }
11150
11151 if (need_memory)
11152 {
11153 if (sparc_cpu == PROCESSOR_ULTRASPARC
11154 || sparc_cpu == PROCESSOR_ULTRASPARC3
11155 || sparc_cpu == PROCESSOR_NIAGARA
11156 || sparc_cpu == PROCESSOR_NIAGARA2
11157 || sparc_cpu == PROCESSOR_NIAGARA3
11158 || sparc_cpu == PROCESSOR_NIAGARA4)
11159 return 12;
11160
11161 return 6;
11162 }
11163
11164 return 2;
11165 }
11166
11167 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11168 This is achieved by means of a manual dynamic stack space allocation in
11169 the current frame. We make the assumption that SEQ doesn't contain any
11170 function calls, with the possible exception of calls to the GOT helper. */
11171
11172 static void
11173 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11174 {
11175 /* We must preserve the lowest 16 words for the register save area. */
11176 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11177 /* We really need only 2 words of fresh stack space. */
11178 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11179
11180 rtx slot
11181 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11182 SPARC_STACK_BIAS + offset));
11183
11184 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11185 emit_insn (gen_rtx_SET (slot, reg));
11186 if (reg2)
11187 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11188 reg2));
11189 emit_insn (seq);
11190 if (reg2)
11191 emit_insn (gen_rtx_SET (reg2,
11192 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11193 emit_insn (gen_rtx_SET (reg, slot));
11194 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11195 }
11196
11197 /* Output the assembler code for a thunk function. THUNK_DECL is the
11198 declaration for the thunk function itself, FUNCTION is the decl for
11199 the target function. DELTA is an immediate constant offset to be
11200 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11201 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11202
11203 static void
11204 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11205 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11206 tree function)
11207 {
11208 rtx this_rtx, funexp;
11209 rtx_insn *insn;
11210 unsigned int int_arg_first;
11211
11212 reload_completed = 1;
11213 epilogue_completed = 1;
11214
11215 emit_note (NOTE_INSN_PROLOGUE_END);
11216
11217 if (TARGET_FLAT)
11218 {
11219 sparc_leaf_function_p = 1;
11220
11221 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11222 }
11223 else if (flag_delayed_branch)
11224 {
11225 /* We will emit a regular sibcall below, so we need to instruct
11226 output_sibcall that we are in a leaf function. */
11227 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11228
11229 /* This will cause final.c to invoke leaf_renumber_regs so we
11230 must behave as if we were in a not-yet-leafified function. */
11231 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11232 }
11233 else
11234 {
11235 /* We will emit the sibcall manually below, so we will need to
11236 manually spill non-leaf registers. */
11237 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11238
11239 /* We really are in a leaf function. */
11240 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11241 }
11242
11243 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11244 returns a structure, the structure return pointer is there instead. */
11245 if (TARGET_ARCH64
11246 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11247 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11248 else
11249 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11250
11251 /* Add DELTA. When possible use a plain add, otherwise load it into
11252 a register first. */
11253 if (delta)
11254 {
11255 rtx delta_rtx = GEN_INT (delta);
11256
11257 if (! SPARC_SIMM13_P (delta))
11258 {
11259 rtx scratch = gen_rtx_REG (Pmode, 1);
11260 emit_move_insn (scratch, delta_rtx);
11261 delta_rtx = scratch;
11262 }
11263
11264 /* THIS_RTX += DELTA. */
11265 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11266 }
11267
11268 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11269 if (vcall_offset)
11270 {
11271 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11272 rtx scratch = gen_rtx_REG (Pmode, 1);
11273
11274 gcc_assert (vcall_offset < 0);
11275
11276 /* SCRATCH = *THIS_RTX. */
11277 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11278
11279 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11280 may not have any available scratch register at this point. */
11281 if (SPARC_SIMM13_P (vcall_offset))
11282 ;
11283 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11284 else if (! fixed_regs[5]
11285 /* The below sequence is made up of at least 2 insns,
11286 while the default method may need only one. */
11287 && vcall_offset < -8192)
11288 {
11289 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11290 emit_move_insn (scratch2, vcall_offset_rtx);
11291 vcall_offset_rtx = scratch2;
11292 }
11293 else
11294 {
11295 rtx increment = GEN_INT (-4096);
11296
11297 /* VCALL_OFFSET is a negative number whose typical range can be
11298 estimated as -32768..0 in 32-bit mode. In almost all cases
11299 it is therefore cheaper to emit multiple add insns than
11300 spilling and loading the constant into a register (at least
11301 6 insns). */
11302 while (! SPARC_SIMM13_P (vcall_offset))
11303 {
11304 emit_insn (gen_add2_insn (scratch, increment));
11305 vcall_offset += 4096;
11306 }
11307 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11308 }
11309
11310 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11311 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11312 gen_rtx_PLUS (Pmode,
11313 scratch,
11314 vcall_offset_rtx)));
11315
11316 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11317 emit_insn (gen_add2_insn (this_rtx, scratch));
11318 }
11319
11320 /* Generate a tail call to the target function. */
11321 if (! TREE_USED (function))
11322 {
11323 assemble_external (function);
11324 TREE_USED (function) = 1;
11325 }
11326 funexp = XEXP (DECL_RTL (function), 0);
11327
11328 if (flag_delayed_branch)
11329 {
11330 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11331 insn = emit_call_insn (gen_sibcall (funexp));
11332 SIBLING_CALL_P (insn) = 1;
11333 }
11334 else
11335 {
11336 /* The hoops we have to jump through in order to generate a sibcall
11337 without using delay slots... */
11338 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11339
11340 if (flag_pic)
11341 {
11342 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11343 start_sequence ();
11344 load_got_register (); /* clobbers %o7 */
11345 scratch = sparc_legitimize_pic_address (funexp, scratch);
11346 seq = get_insns ();
11347 end_sequence ();
11348 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11349 }
11350 else if (TARGET_ARCH32)
11351 {
11352 emit_insn (gen_rtx_SET (scratch,
11353 gen_rtx_HIGH (SImode, funexp)));
11354 emit_insn (gen_rtx_SET (scratch,
11355 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11356 }
11357 else /* TARGET_ARCH64 */
11358 {
11359 switch (sparc_cmodel)
11360 {
11361 case CM_MEDLOW:
11362 case CM_MEDMID:
11363 /* The destination can serve as a temporary. */
11364 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11365 break;
11366
11367 case CM_MEDANY:
11368 case CM_EMBMEDANY:
11369 /* The destination cannot serve as a temporary. */
11370 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11371 start_sequence ();
11372 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11373 seq = get_insns ();
11374 end_sequence ();
11375 emit_and_preserve (seq, spill_reg, 0);
11376 break;
11377
11378 default:
11379 gcc_unreachable ();
11380 }
11381 }
11382
11383 emit_jump_insn (gen_indirect_jump (scratch));
11384 }
11385
11386 emit_barrier ();
11387
11388 /* Run just enough of rest_of_compilation to get the insns emitted.
11389 There's not really enough bulk here to make other passes such as
11390 instruction scheduling worth while. Note that use_thunk calls
11391 assemble_start_function and assemble_end_function. */
11392 insn = get_insns ();
11393 shorten_branches (insn);
11394 final_start_function (insn, file, 1);
11395 final (insn, file, 1);
11396 final_end_function ();
11397
11398 reload_completed = 0;
11399 epilogue_completed = 0;
11400 }
11401
11402 /* Return true if sparc_output_mi_thunk would be able to output the
11403 assembler code for the thunk function specified by the arguments
11404 it is passed, and false otherwise. */
11405 static bool
11406 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11407 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11408 HOST_WIDE_INT vcall_offset,
11409 const_tree function ATTRIBUTE_UNUSED)
11410 {
11411 /* Bound the loop used in the default method above. */
11412 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11413 }
11414
11415 /* How to allocate a 'struct machine_function'. */
11416
11417 static struct machine_function *
11418 sparc_init_machine_status (void)
11419 {
11420 return ggc_cleared_alloc<machine_function> ();
11421 }
11422
11423 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11424 We need to emit DTP-relative relocations. */
11425
11426 static void
11427 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11428 {
11429 switch (size)
11430 {
11431 case 4:
11432 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11433 break;
11434 case 8:
11435 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11436 break;
11437 default:
11438 gcc_unreachable ();
11439 }
11440 output_addr_const (file, x);
11441 fputs (")", file);
11442 }
11443
11444 /* Do whatever processing is required at the end of a file. */
11445
11446 static void
11447 sparc_file_end (void)
11448 {
11449 /* If we need to emit the special GOT helper function, do so now. */
11450 if (got_helper_rtx)
11451 {
11452 const char *name = XSTR (got_helper_rtx, 0);
11453 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11454 #ifdef DWARF2_UNWIND_INFO
11455 bool do_cfi;
11456 #endif
11457
11458 if (USE_HIDDEN_LINKONCE)
11459 {
11460 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11461 get_identifier (name),
11462 build_function_type_list (void_type_node,
11463 NULL_TREE));
11464 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11465 NULL_TREE, void_type_node);
11466 TREE_PUBLIC (decl) = 1;
11467 TREE_STATIC (decl) = 1;
11468 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11469 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11470 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11471 resolve_unique_section (decl, 0, flag_function_sections);
11472 allocate_struct_function (decl, true);
11473 cfun->is_thunk = 1;
11474 current_function_decl = decl;
11475 init_varasm_status ();
11476 assemble_start_function (decl, name);
11477 }
11478 else
11479 {
11480 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11481 switch_to_section (text_section);
11482 if (align > 0)
11483 ASM_OUTPUT_ALIGN (asm_out_file, align);
11484 ASM_OUTPUT_LABEL (asm_out_file, name);
11485 }
11486
11487 #ifdef DWARF2_UNWIND_INFO
11488 do_cfi = dwarf2out_do_cfi_asm ();
11489 if (do_cfi)
11490 fprintf (asm_out_file, "\t.cfi_startproc\n");
11491 #endif
11492 if (flag_delayed_branch)
11493 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11494 reg_name, reg_name);
11495 else
11496 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11497 reg_name, reg_name);
11498 #ifdef DWARF2_UNWIND_INFO
11499 if (do_cfi)
11500 fprintf (asm_out_file, "\t.cfi_endproc\n");
11501 #endif
11502 }
11503
11504 if (NEED_INDICATE_EXEC_STACK)
11505 file_end_indicate_exec_stack ();
11506
11507 #ifdef TARGET_SOLARIS
11508 solaris_file_end ();
11509 #endif
11510 }
11511
11512 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11513 /* Implement TARGET_MANGLE_TYPE. */
11514
11515 static const char *
11516 sparc_mangle_type (const_tree type)
11517 {
11518 if (!TARGET_64BIT
11519 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11520 && TARGET_LONG_DOUBLE_128)
11521 return "g";
11522
11523 /* For all other types, use normal C++ mangling. */
11524 return NULL;
11525 }
11526 #endif
11527
11528 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11529 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11530 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11531
11532 void
11533 sparc_emit_membar_for_model (enum memmodel model,
11534 int load_store, int before_after)
11535 {
11536 /* Bits for the MEMBAR mmask field. */
11537 const int LoadLoad = 1;
11538 const int StoreLoad = 2;
11539 const int LoadStore = 4;
11540 const int StoreStore = 8;
11541
11542 int mm = 0, implied = 0;
11543
11544 switch (sparc_memory_model)
11545 {
11546 case SMM_SC:
11547 /* Sequential Consistency. All memory transactions are immediately
11548 visible in sequential execution order. No barriers needed. */
11549 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11550 break;
11551
11552 case SMM_TSO:
11553 /* Total Store Ordering: all memory transactions with store semantics
11554 are followed by an implied StoreStore. */
11555 implied |= StoreStore;
11556
11557 /* If we're not looking for a raw barrer (before+after), then atomic
11558 operations get the benefit of being both load and store. */
11559 if (load_store == 3 && before_after == 1)
11560 implied |= StoreLoad;
11561 /* FALLTHRU */
11562
11563 case SMM_PSO:
11564 /* Partial Store Ordering: all memory transactions with load semantics
11565 are followed by an implied LoadLoad | LoadStore. */
11566 implied |= LoadLoad | LoadStore;
11567
11568 /* If we're not looking for a raw barrer (before+after), then atomic
11569 operations get the benefit of being both load and store. */
11570 if (load_store == 3 && before_after == 2)
11571 implied |= StoreLoad | StoreStore;
11572 /* FALLTHRU */
11573
11574 case SMM_RMO:
11575 /* Relaxed Memory Ordering: no implicit bits. */
11576 break;
11577
11578 default:
11579 gcc_unreachable ();
11580 }
11581
11582 if (before_after & 1)
11583 {
11584 if (is_mm_release (model) || is_mm_acq_rel (model)
11585 || is_mm_seq_cst (model))
11586 {
11587 if (load_store & 1)
11588 mm |= LoadLoad | StoreLoad;
11589 if (load_store & 2)
11590 mm |= LoadStore | StoreStore;
11591 }
11592 }
11593 if (before_after & 2)
11594 {
11595 if (is_mm_acquire (model) || is_mm_acq_rel (model)
11596 || is_mm_seq_cst (model))
11597 {
11598 if (load_store & 1)
11599 mm |= LoadLoad | LoadStore;
11600 if (load_store & 2)
11601 mm |= StoreLoad | StoreStore;
11602 }
11603 }
11604
11605 /* Remove the bits implied by the system memory model. */
11606 mm &= ~implied;
11607
11608 /* For raw barriers (before+after), always emit a barrier.
11609 This will become a compile-time barrier if needed. */
11610 if (mm || before_after == 3)
11611 emit_insn (gen_membar (GEN_INT (mm)));
11612 }
11613
11614 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11615 compare and swap on the word containing the byte or half-word. */
11616
11617 static void
11618 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11619 rtx oldval, rtx newval)
11620 {
11621 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11622 rtx addr = gen_reg_rtx (Pmode);
11623 rtx off = gen_reg_rtx (SImode);
11624 rtx oldv = gen_reg_rtx (SImode);
11625 rtx newv = gen_reg_rtx (SImode);
11626 rtx oldvalue = gen_reg_rtx (SImode);
11627 rtx newvalue = gen_reg_rtx (SImode);
11628 rtx res = gen_reg_rtx (SImode);
11629 rtx resv = gen_reg_rtx (SImode);
11630 rtx memsi, val, mask, cc;
11631
11632 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11633
11634 if (Pmode != SImode)
11635 addr1 = gen_lowpart (SImode, addr1);
11636 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11637
11638 memsi = gen_rtx_MEM (SImode, addr);
11639 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11640 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11641
11642 val = copy_to_reg (memsi);
11643
11644 emit_insn (gen_rtx_SET (off,
11645 gen_rtx_XOR (SImode, off,
11646 GEN_INT (GET_MODE (mem) == QImode
11647 ? 3 : 2))));
11648
11649 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11650
11651 if (GET_MODE (mem) == QImode)
11652 mask = force_reg (SImode, GEN_INT (0xff));
11653 else
11654 mask = force_reg (SImode, GEN_INT (0xffff));
11655
11656 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
11657
11658 emit_insn (gen_rtx_SET (val,
11659 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11660 val)));
11661
11662 oldval = gen_lowpart (SImode, oldval);
11663 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
11664
11665 newval = gen_lowpart_common (SImode, newval);
11666 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
11667
11668 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
11669
11670 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
11671
11672 rtx_code_label *end_label = gen_label_rtx ();
11673 rtx_code_label *loop_label = gen_label_rtx ();
11674 emit_label (loop_label);
11675
11676 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
11677
11678 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
11679
11680 emit_move_insn (bool_result, const1_rtx);
11681
11682 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11683
11684 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11685
11686 emit_insn (gen_rtx_SET (resv,
11687 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11688 res)));
11689
11690 emit_move_insn (bool_result, const0_rtx);
11691
11692 cc = gen_compare_reg_1 (NE, resv, val);
11693 emit_insn (gen_rtx_SET (val, resv));
11694
11695 /* Use cbranchcc4 to separate the compare and branch! */
11696 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11697 cc, const0_rtx, loop_label));
11698
11699 emit_label (end_label);
11700
11701 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
11702
11703 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
11704
11705 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11706 }
11707
11708 /* Expand code to perform a compare-and-swap. */
11709
11710 void
11711 sparc_expand_compare_and_swap (rtx operands[])
11712 {
11713 rtx bval, retval, mem, oldval, newval;
11714 machine_mode mode;
11715 enum memmodel model;
11716
11717 bval = operands[0];
11718 retval = operands[1];
11719 mem = operands[2];
11720 oldval = operands[3];
11721 newval = operands[4];
11722 model = (enum memmodel) INTVAL (operands[6]);
11723 mode = GET_MODE (mem);
11724
11725 sparc_emit_membar_for_model (model, 3, 1);
11726
11727 if (reg_overlap_mentioned_p (retval, oldval))
11728 oldval = copy_to_reg (oldval);
11729
11730 if (mode == QImode || mode == HImode)
11731 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11732 else
11733 {
11734 rtx (*gen) (rtx, rtx, rtx, rtx);
11735 rtx x;
11736
11737 if (mode == SImode)
11738 gen = gen_atomic_compare_and_swapsi_1;
11739 else
11740 gen = gen_atomic_compare_and_swapdi_1;
11741 emit_insn (gen (retval, mem, oldval, newval));
11742
11743 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11744 if (x != bval)
11745 convert_move (bval, x, 1);
11746 }
11747
11748 sparc_emit_membar_for_model (model, 3, 2);
11749 }
11750
11751 void
11752 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
11753 {
11754 rtx t_1, t_2, t_3;
11755
11756 sel = gen_lowpart (DImode, sel);
11757 switch (vmode)
11758 {
11759 case V2SImode:
11760 /* inp = xxxxxxxAxxxxxxxB */
11761 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11762 NULL_RTX, 1, OPTAB_DIRECT);
11763 /* t_1 = ....xxxxxxxAxxx. */
11764 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11765 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11766 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11767 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11768 /* sel = .......B */
11769 /* t_1 = ...A.... */
11770 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11771 /* sel = ...A...B */
11772 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11773 /* sel = AAAABBBB * 4 */
11774 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11775 /* sel = { A*4, A*4+1, A*4+2, ... } */
11776 break;
11777
11778 case V4HImode:
11779 /* inp = xxxAxxxBxxxCxxxD */
11780 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11781 NULL_RTX, 1, OPTAB_DIRECT);
11782 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11783 NULL_RTX, 1, OPTAB_DIRECT);
11784 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11785 NULL_RTX, 1, OPTAB_DIRECT);
11786 /* t_1 = ..xxxAxxxBxxxCxx */
11787 /* t_2 = ....xxxAxxxBxxxC */
11788 /* t_3 = ......xxxAxxxBxx */
11789 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11790 GEN_INT (0x07),
11791 NULL_RTX, 1, OPTAB_DIRECT);
11792 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11793 GEN_INT (0x0700),
11794 NULL_RTX, 1, OPTAB_DIRECT);
11795 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11796 GEN_INT (0x070000),
11797 NULL_RTX, 1, OPTAB_DIRECT);
11798 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11799 GEN_INT (0x07000000),
11800 NULL_RTX, 1, OPTAB_DIRECT);
11801 /* sel = .......D */
11802 /* t_1 = .....C.. */
11803 /* t_2 = ...B.... */
11804 /* t_3 = .A...... */
11805 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11806 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11807 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11808 /* sel = .A.B.C.D */
11809 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11810 /* sel = AABBCCDD * 2 */
11811 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11812 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11813 break;
11814
11815 case V8QImode:
11816 /* input = xAxBxCxDxExFxGxH */
11817 sel = expand_simple_binop (DImode, AND, sel,
11818 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11819 | 0x0f0f0f0f),
11820 NULL_RTX, 1, OPTAB_DIRECT);
11821 /* sel = .A.B.C.D.E.F.G.H */
11822 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11823 NULL_RTX, 1, OPTAB_DIRECT);
11824 /* t_1 = ..A.B.C.D.E.F.G. */
11825 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11826 NULL_RTX, 1, OPTAB_DIRECT);
11827 /* sel = .AABBCCDDEEFFGGH */
11828 sel = expand_simple_binop (DImode, AND, sel,
11829 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11830 | 0xff00ff),
11831 NULL_RTX, 1, OPTAB_DIRECT);
11832 /* sel = ..AB..CD..EF..GH */
11833 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11834 NULL_RTX, 1, OPTAB_DIRECT);
11835 /* t_1 = ....AB..CD..EF.. */
11836 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11837 NULL_RTX, 1, OPTAB_DIRECT);
11838 /* sel = ..ABABCDCDEFEFGH */
11839 sel = expand_simple_binop (DImode, AND, sel,
11840 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11841 NULL_RTX, 1, OPTAB_DIRECT);
11842 /* sel = ....ABCD....EFGH */
11843 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11844 NULL_RTX, 1, OPTAB_DIRECT);
11845 /* t_1 = ........ABCD.... */
11846 sel = gen_lowpart (SImode, sel);
11847 t_1 = gen_lowpart (SImode, t_1);
11848 break;
11849
11850 default:
11851 gcc_unreachable ();
11852 }
11853
11854 /* Always perform the final addition/merge within the bmask insn. */
11855 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11856 }
11857
11858 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11859
11860 static bool
11861 sparc_frame_pointer_required (void)
11862 {
11863 /* If the stack pointer is dynamically modified in the function, it cannot
11864 serve as the frame pointer. */
11865 if (cfun->calls_alloca)
11866 return true;
11867
11868 /* If the function receives nonlocal gotos, it needs to save the frame
11869 pointer in the nonlocal_goto_save_area object. */
11870 if (cfun->has_nonlocal_label)
11871 return true;
11872
11873 /* In flat mode, that's it. */
11874 if (TARGET_FLAT)
11875 return false;
11876
11877 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11878 return !(crtl->is_leaf && only_leaf_regs_used ());
11879 }
11880
11881 /* The way this is structured, we can't eliminate SFP in favor of SP
11882 if the frame pointer is required: we want to use the SFP->HFP elimination
11883 in that case. But the test in update_eliminables doesn't know we are
11884 assuming below that we only do the former elimination. */
11885
11886 static bool
11887 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11888 {
11889 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11890 }
11891
11892 /* Return the hard frame pointer directly to bypass the stack bias. */
11893
11894 static rtx
11895 sparc_builtin_setjmp_frame_value (void)
11896 {
11897 return hard_frame_pointer_rtx;
11898 }
11899
11900 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11901 they won't be allocated. */
11902
11903 static void
11904 sparc_conditional_register_usage (void)
11905 {
11906 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11907 {
11908 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11909 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11910 }
11911 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11912 /* then honor it. */
11913 if (TARGET_ARCH32 && fixed_regs[5])
11914 fixed_regs[5] = 1;
11915 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11916 fixed_regs[5] = 0;
11917 if (! TARGET_V9)
11918 {
11919 int regno;
11920 for (regno = SPARC_FIRST_V9_FP_REG;
11921 regno <= SPARC_LAST_V9_FP_REG;
11922 regno++)
11923 fixed_regs[regno] = 1;
11924 /* %fcc0 is used by v8 and v9. */
11925 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
11926 regno <= SPARC_LAST_V9_FCC_REG;
11927 regno++)
11928 fixed_regs[regno] = 1;
11929 }
11930 if (! TARGET_FPU)
11931 {
11932 int regno;
11933 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
11934 fixed_regs[regno] = 1;
11935 }
11936 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
11937 /* then honor it. Likewise with g3 and g4. */
11938 if (fixed_regs[2] == 2)
11939 fixed_regs[2] = ! TARGET_APP_REGS;
11940 if (fixed_regs[3] == 2)
11941 fixed_regs[3] = ! TARGET_APP_REGS;
11942 if (TARGET_ARCH32 && fixed_regs[4] == 2)
11943 fixed_regs[4] = ! TARGET_APP_REGS;
11944 else if (TARGET_CM_EMBMEDANY)
11945 fixed_regs[4] = 1;
11946 else if (fixed_regs[4] == 2)
11947 fixed_regs[4] = 0;
11948 if (TARGET_FLAT)
11949 {
11950 int regno;
11951 /* Disable leaf functions. */
11952 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
11953 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11954 leaf_reg_remap [regno] = regno;
11955 }
11956 if (TARGET_VIS)
11957 global_regs[SPARC_GSR_REG] = 1;
11958 }
11959
11960 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
11961
11962 - We can't load constants into FP registers.
11963 - We can't load FP constants into integer registers when soft-float,
11964 because there is no soft-float pattern with a r/F constraint.
11965 - We can't load FP constants into integer registers for TFmode unless
11966 it is 0.0L, because there is no movtf pattern with a r/F constraint.
11967 - Try and reload integer constants (symbolic or otherwise) back into
11968 registers directly, rather than having them dumped to memory. */
11969
11970 static reg_class_t
11971 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
11972 {
11973 machine_mode mode = GET_MODE (x);
11974 if (CONSTANT_P (x))
11975 {
11976 if (FP_REG_CLASS_P (rclass)
11977 || rclass == GENERAL_OR_FP_REGS
11978 || rclass == GENERAL_OR_EXTRA_FP_REGS
11979 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
11980 || (mode == TFmode && ! const_zero_operand (x, mode)))
11981 return NO_REGS;
11982
11983 if (GET_MODE_CLASS (mode) == MODE_INT)
11984 return GENERAL_REGS;
11985
11986 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11987 {
11988 if (! FP_REG_CLASS_P (rclass)
11989 || !(const_zero_operand (x, mode)
11990 || const_all_ones_operand (x, mode)))
11991 return NO_REGS;
11992 }
11993 }
11994
11995 if (TARGET_VIS3
11996 && ! TARGET_ARCH64
11997 && (rclass == EXTRA_FP_REGS
11998 || rclass == GENERAL_OR_EXTRA_FP_REGS))
11999 {
12000 int regno = true_regnum (x);
12001
12002 if (SPARC_INT_REG_P (regno))
12003 return (rclass == EXTRA_FP_REGS
12004 ? FP_REGS : GENERAL_OR_FP_REGS);
12005 }
12006
12007 return rclass;
12008 }
12009
12010 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12011 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12012
12013 const char *
12014 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12015 {
12016 char mulstr[32];
12017
12018 gcc_assert (! TARGET_ARCH64);
12019
12020 if (sparc_check_64 (operands[1], insn) <= 0)
12021 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12022 if (which_alternative == 1)
12023 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12024 if (GET_CODE (operands[2]) == CONST_INT)
12025 {
12026 if (which_alternative == 1)
12027 {
12028 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12029 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12030 output_asm_insn (mulstr, operands);
12031 return "srlx\t%L0, 32, %H0";
12032 }
12033 else
12034 {
12035 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12036 output_asm_insn ("or\t%L1, %3, %3", operands);
12037 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12038 output_asm_insn (mulstr, operands);
12039 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12040 return "mov\t%3, %L0";
12041 }
12042 }
12043 else if (rtx_equal_p (operands[1], operands[2]))
12044 {
12045 if (which_alternative == 1)
12046 {
12047 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12048 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12049 output_asm_insn (mulstr, operands);
12050 return "srlx\t%L0, 32, %H0";
12051 }
12052 else
12053 {
12054 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12055 output_asm_insn ("or\t%L1, %3, %3", operands);
12056 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12057 output_asm_insn (mulstr, operands);
12058 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12059 return "mov\t%3, %L0";
12060 }
12061 }
12062 if (sparc_check_64 (operands[2], insn) <= 0)
12063 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12064 if (which_alternative == 1)
12065 {
12066 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12067 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12068 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12069 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12070 output_asm_insn (mulstr, operands);
12071 return "srlx\t%L0, 32, %H0";
12072 }
12073 else
12074 {
12075 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12076 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12077 output_asm_insn ("or\t%L1, %3, %3", operands);
12078 output_asm_insn ("or\t%L2, %4, %4", operands);
12079 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12080 output_asm_insn (mulstr, operands);
12081 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12082 return "mov\t%3, %L0";
12083 }
12084 }
12085
12086 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12087 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12088 and INNER_MODE are the modes describing TARGET. */
12089
12090 static void
12091 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12092 machine_mode inner_mode)
12093 {
12094 rtx t1, final_insn, sel;
12095 int bmask;
12096
12097 t1 = gen_reg_rtx (mode);
12098
12099 elt = convert_modes (SImode, inner_mode, elt, true);
12100 emit_move_insn (gen_lowpart(SImode, t1), elt);
12101
12102 switch (mode)
12103 {
12104 case V2SImode:
12105 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12106 bmask = 0x45674567;
12107 break;
12108 case V4HImode:
12109 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12110 bmask = 0x67676767;
12111 break;
12112 case V8QImode:
12113 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12114 bmask = 0x77777777;
12115 break;
12116 default:
12117 gcc_unreachable ();
12118 }
12119
12120 sel = force_reg (SImode, GEN_INT (bmask));
12121 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
12122 emit_insn (final_insn);
12123 }
12124
12125 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12126 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12127
12128 static void
12129 vector_init_fpmerge (rtx target, rtx elt)
12130 {
12131 rtx t1, t2, t2_low, t3, t3_low;
12132
12133 t1 = gen_reg_rtx (V4QImode);
12134 elt = convert_modes (SImode, QImode, elt, true);
12135 emit_move_insn (gen_lowpart (SImode, t1), elt);
12136
12137 t2 = gen_reg_rtx (V8QImode);
12138 t2_low = gen_lowpart (V4QImode, t2);
12139 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12140
12141 t3 = gen_reg_rtx (V8QImode);
12142 t3_low = gen_lowpart (V4QImode, t3);
12143 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12144
12145 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12146 }
12147
12148 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12149 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12150
12151 static void
12152 vector_init_faligndata (rtx target, rtx elt)
12153 {
12154 rtx t1 = gen_reg_rtx (V4HImode);
12155 int i;
12156
12157 elt = convert_modes (SImode, HImode, elt, true);
12158 emit_move_insn (gen_lowpart (SImode, t1), elt);
12159
12160 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12161 force_reg (SImode, GEN_INT (6)),
12162 const0_rtx));
12163
12164 for (i = 0; i < 4; i++)
12165 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12166 }
12167
12168 /* Emit code to initialize TARGET to values for individual fields VALS. */
12169
12170 void
12171 sparc_expand_vector_init (rtx target, rtx vals)
12172 {
12173 const machine_mode mode = GET_MODE (target);
12174 const machine_mode inner_mode = GET_MODE_INNER (mode);
12175 const int n_elts = GET_MODE_NUNITS (mode);
12176 int i, n_var = 0;
12177 bool all_same;
12178 rtx mem;
12179
12180 all_same = true;
12181 for (i = 0; i < n_elts; i++)
12182 {
12183 rtx x = XVECEXP (vals, 0, i);
12184 if (!CONSTANT_P (x))
12185 n_var++;
12186
12187 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12188 all_same = false;
12189 }
12190
12191 if (n_var == 0)
12192 {
12193 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12194 return;
12195 }
12196
12197 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12198 {
12199 if (GET_MODE_SIZE (inner_mode) == 4)
12200 {
12201 emit_move_insn (gen_lowpart (SImode, target),
12202 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12203 return;
12204 }
12205 else if (GET_MODE_SIZE (inner_mode) == 8)
12206 {
12207 emit_move_insn (gen_lowpart (DImode, target),
12208 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12209 return;
12210 }
12211 }
12212 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12213 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12214 {
12215 emit_move_insn (gen_highpart (word_mode, target),
12216 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12217 emit_move_insn (gen_lowpart (word_mode, target),
12218 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12219 return;
12220 }
12221
12222 if (all_same && GET_MODE_SIZE (mode) == 8)
12223 {
12224 if (TARGET_VIS2)
12225 {
12226 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12227 return;
12228 }
12229 if (mode == V8QImode)
12230 {
12231 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12232 return;
12233 }
12234 if (mode == V4HImode)
12235 {
12236 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12237 return;
12238 }
12239 }
12240
12241 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12242 for (i = 0; i < n_elts; i++)
12243 emit_move_insn (adjust_address_nv (mem, inner_mode,
12244 i * GET_MODE_SIZE (inner_mode)),
12245 XVECEXP (vals, 0, i));
12246 emit_move_insn (target, mem);
12247 }
12248
12249 /* Implement TARGET_SECONDARY_RELOAD. */
12250
12251 static reg_class_t
12252 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12253 machine_mode mode, secondary_reload_info *sri)
12254 {
12255 enum reg_class rclass = (enum reg_class) rclass_i;
12256
12257 sri->icode = CODE_FOR_nothing;
12258 sri->extra_cost = 0;
12259
12260 /* We need a temporary when loading/storing a HImode/QImode value
12261 between memory and the FPU registers. This can happen when combine puts
12262 a paradoxical subreg in a float/fix conversion insn. */
12263 if (FP_REG_CLASS_P (rclass)
12264 && (mode == HImode || mode == QImode)
12265 && (GET_CODE (x) == MEM
12266 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12267 && true_regnum (x) == -1)))
12268 return GENERAL_REGS;
12269
12270 /* On 32-bit we need a temporary when loading/storing a DFmode value
12271 between unaligned memory and the upper FPU registers. */
12272 if (TARGET_ARCH32
12273 && rclass == EXTRA_FP_REGS
12274 && mode == DFmode
12275 && GET_CODE (x) == MEM
12276 && ! mem_min_alignment (x, 8))
12277 return FP_REGS;
12278
12279 if (((TARGET_CM_MEDANY
12280 && symbolic_operand (x, mode))
12281 || (TARGET_CM_EMBMEDANY
12282 && text_segment_operand (x, mode)))
12283 && ! flag_pic)
12284 {
12285 if (in_p)
12286 sri->icode = direct_optab_handler (reload_in_optab, mode);
12287 else
12288 sri->icode = direct_optab_handler (reload_out_optab, mode);
12289 return NO_REGS;
12290 }
12291
12292 if (TARGET_VIS3 && TARGET_ARCH32)
12293 {
12294 int regno = true_regnum (x);
12295
12296 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12297 to move 8-byte values in 4-byte pieces. This only works via
12298 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12299 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12300 an FP_REGS intermediate move. */
12301 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12302 || ((general_or_i64_p (rclass)
12303 || rclass == GENERAL_OR_FP_REGS)
12304 && SPARC_FP_REG_P (regno)))
12305 {
12306 sri->extra_cost = 2;
12307 return FP_REGS;
12308 }
12309 }
12310
12311 return NO_REGS;
12312 }
12313
12314 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12315 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12316
12317 bool
12318 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12319 {
12320 enum rtx_code rc = GET_CODE (operands[1]);
12321 machine_mode cmp_mode;
12322 rtx cc_reg, dst, cmp;
12323
12324 cmp = operands[1];
12325 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12326 return false;
12327
12328 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12329 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12330
12331 cmp_mode = GET_MODE (XEXP (cmp, 0));
12332 rc = GET_CODE (cmp);
12333
12334 dst = operands[0];
12335 if (! rtx_equal_p (operands[2], dst)
12336 && ! rtx_equal_p (operands[3], dst))
12337 {
12338 if (reg_overlap_mentioned_p (dst, cmp))
12339 dst = gen_reg_rtx (mode);
12340
12341 emit_move_insn (dst, operands[3]);
12342 }
12343 else if (operands[2] == dst)
12344 {
12345 operands[2] = operands[3];
12346
12347 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12348 rc = reverse_condition_maybe_unordered (rc);
12349 else
12350 rc = reverse_condition (rc);
12351 }
12352
12353 if (XEXP (cmp, 1) == const0_rtx
12354 && GET_CODE (XEXP (cmp, 0)) == REG
12355 && cmp_mode == DImode
12356 && v9_regcmp_p (rc))
12357 cc_reg = XEXP (cmp, 0);
12358 else
12359 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12360
12361 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12362
12363 emit_insn (gen_rtx_SET (dst,
12364 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12365
12366 if (dst != operands[0])
12367 emit_move_insn (operands[0], dst);
12368
12369 return true;
12370 }
12371
12372 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12373 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12374 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12375 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12376 code to be used for the condition mask. */
12377
12378 void
12379 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12380 {
12381 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12382 enum rtx_code code = GET_CODE (operands[3]);
12383
12384 mask = gen_reg_rtx (Pmode);
12385 cop0 = operands[4];
12386 cop1 = operands[5];
12387 if (code == LT || code == GE)
12388 {
12389 rtx t;
12390
12391 code = swap_condition (code);
12392 t = cop0; cop0 = cop1; cop1 = t;
12393 }
12394
12395 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12396
12397 fcmp = gen_rtx_UNSPEC (Pmode,
12398 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12399 fcode);
12400
12401 cmask = gen_rtx_UNSPEC (DImode,
12402 gen_rtvec (2, mask, gsr),
12403 ccode);
12404
12405 bshuf = gen_rtx_UNSPEC (mode,
12406 gen_rtvec (3, operands[1], operands[2], gsr),
12407 UNSPEC_BSHUFFLE);
12408
12409 emit_insn (gen_rtx_SET (mask, fcmp));
12410 emit_insn (gen_rtx_SET (gsr, cmask));
12411
12412 emit_insn (gen_rtx_SET (operands[0], bshuf));
12413 }
12414
12415 /* On sparc, any mode which naturally allocates into the float
12416 registers should return 4 here. */
12417
12418 unsigned int
12419 sparc_regmode_natural_size (machine_mode mode)
12420 {
12421 int size = UNITS_PER_WORD;
12422
12423 if (TARGET_ARCH64)
12424 {
12425 enum mode_class mclass = GET_MODE_CLASS (mode);
12426
12427 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12428 size = 4;
12429 }
12430
12431 return size;
12432 }
12433
12434 /* Return TRUE if it is a good idea to tie two pseudo registers
12435 when one has mode MODE1 and one has mode MODE2.
12436 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12437 for any hard reg, then this must be FALSE for correct output.
12438
12439 For V9 we have to deal with the fact that only the lower 32 floating
12440 point registers are 32-bit addressable. */
12441
12442 bool
12443 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12444 {
12445 enum mode_class mclass1, mclass2;
12446 unsigned short size1, size2;
12447
12448 if (mode1 == mode2)
12449 return true;
12450
12451 mclass1 = GET_MODE_CLASS (mode1);
12452 mclass2 = GET_MODE_CLASS (mode2);
12453 if (mclass1 != mclass2)
12454 return false;
12455
12456 if (! TARGET_V9)
12457 return true;
12458
12459 /* Classes are the same and we are V9 so we have to deal with upper
12460 vs. lower floating point registers. If one of the modes is a
12461 4-byte mode, and the other is not, we have to mark them as not
12462 tieable because only the lower 32 floating point register are
12463 addressable 32-bits at a time.
12464
12465 We can't just test explicitly for SFmode, otherwise we won't
12466 cover the vector mode cases properly. */
12467
12468 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12469 return true;
12470
12471 size1 = GET_MODE_SIZE (mode1);
12472 size2 = GET_MODE_SIZE (mode2);
12473 if ((size1 > 4 && size2 == 4)
12474 || (size2 > 4 && size1 == 4))
12475 return false;
12476
12477 return true;
12478 }
12479
12480 /* Implement TARGET_CSTORE_MODE. */
12481
12482 static machine_mode
12483 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12484 {
12485 return (TARGET_ARCH64 ? DImode : SImode);
12486 }
12487
12488 /* Return the compound expression made of T1 and T2. */
12489
12490 static inline tree
12491 compound_expr (tree t1, tree t2)
12492 {
12493 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12494 }
12495
12496 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12497
12498 static void
12499 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12500 {
12501 if (!TARGET_FPU)
12502 return;
12503
12504 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12505 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12506
12507 /* We generate the equivalent of feholdexcept (&fenv_var):
12508
12509 unsigned int fenv_var;
12510 __builtin_store_fsr (&fenv_var);
12511
12512 unsigned int tmp1_var;
12513 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12514
12515 __builtin_load_fsr (&tmp1_var); */
12516
12517 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
12518 TREE_ADDRESSABLE (fenv_var) = 1;
12519 tree fenv_addr = build_fold_addr_expr (fenv_var);
12520 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12521 tree hold_stfsr
12522 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
12523 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
12524
12525 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
12526 TREE_ADDRESSABLE (tmp1_var) = 1;
12527 tree masked_fenv_var
12528 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12529 build_int_cst (unsigned_type_node,
12530 ~(accrued_exception_mask | trap_enable_mask)));
12531 tree hold_mask
12532 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
12533 NULL_TREE, NULL_TREE);
12534
12535 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12536 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12537 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12538
12539 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12540
12541 /* We reload the value of tmp1_var to clear the exceptions:
12542
12543 __builtin_load_fsr (&tmp1_var); */
12544
12545 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12546
12547 /* We generate the equivalent of feupdateenv (&fenv_var):
12548
12549 unsigned int tmp2_var;
12550 __builtin_store_fsr (&tmp2_var);
12551
12552 __builtin_load_fsr (&fenv_var);
12553
12554 if (SPARC_LOW_FE_EXCEPT_VALUES)
12555 tmp2_var >>= 5;
12556 __atomic_feraiseexcept ((int) tmp2_var); */
12557
12558 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
12559 TREE_ADDRESSABLE (tmp2_var) = 1;
12560 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
12561 tree update_stfsr
12562 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
12563 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
12564
12565 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12566
12567 tree atomic_feraiseexcept
12568 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12569 tree update_call
12570 = build_call_expr (atomic_feraiseexcept, 1,
12571 fold_convert (integer_type_node, tmp2_var));
12572
12573 if (SPARC_LOW_FE_EXCEPT_VALUES)
12574 {
12575 tree shifted_tmp2_var
12576 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12577 build_int_cst (unsigned_type_node, 5));
12578 tree update_shift
12579 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12580 update_call = compound_expr (update_shift, update_call);
12581 }
12582
12583 *update
12584 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12585 }
12586
12587 #include "gt-sparc.h"