]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
NEXT_INSN and PREV_INSN take a const rtx_insn
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2014 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stringpool.h"
29 #include "stor-layout.h"
30 #include "calls.h"
31 #include "varasm.h"
32 #include "rtl.h"
33 #include "regs.h"
34 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "insn-codes.h"
37 #include "conditions.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "flags.h"
41 #include "function.h"
42 #include "except.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "recog.h"
46 #include "diagnostic-core.h"
47 #include "ggc.h"
48 #include "tm_p.h"
49 #include "debug.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "common/common-target.h"
53 #include "hash-table.h"
54 #include "vec.h"
55 #include "basic-block.h"
56 #include "tree-ssa-alias.h"
57 #include "internal-fn.h"
58 #include "gimple-fold.h"
59 #include "tree-eh.h"
60 #include "gimple-expr.h"
61 #include "is-a.h"
62 #include "gimple.h"
63 #include "gimplify.h"
64 #include "langhooks.h"
65 #include "reload.h"
66 #include "params.h"
67 #include "df.h"
68 #include "opts.h"
69 #include "tree-pass.h"
70 #include "context.h"
71 #include "wide-int.h"
72 #include "builtins.h"
73
74 /* Processor costs */
75
76 struct processor_costs {
77 /* Integer load */
78 const int int_load;
79
80 /* Integer signed load */
81 const int int_sload;
82
83 /* Integer zeroed load */
84 const int int_zload;
85
86 /* Float load */
87 const int float_load;
88
89 /* fmov, fneg, fabs */
90 const int float_move;
91
92 /* fadd, fsub */
93 const int float_plusminus;
94
95 /* fcmp */
96 const int float_cmp;
97
98 /* fmov, fmovr */
99 const int float_cmove;
100
101 /* fmul */
102 const int float_mul;
103
104 /* fdivs */
105 const int float_div_sf;
106
107 /* fdivd */
108 const int float_div_df;
109
110 /* fsqrts */
111 const int float_sqrt_sf;
112
113 /* fsqrtd */
114 const int float_sqrt_df;
115
116 /* umul/smul */
117 const int int_mul;
118
119 /* mulX */
120 const int int_mulX;
121
122 /* integer multiply cost for each bit set past the most
123 significant 3, so the formula for multiply cost becomes:
124
125 if (rs1 < 0)
126 highest_bit = highest_clear_bit(rs1);
127 else
128 highest_bit = highest_set_bit(rs1);
129 if (highest_bit < 3)
130 highest_bit = 3;
131 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
132
133 A value of zero indicates that the multiply costs is fixed,
134 and not variable. */
135 const int int_mul_bit_factor;
136
137 /* udiv/sdiv */
138 const int int_div;
139
140 /* divX */
141 const int int_divX;
142
143 /* movcc, movr */
144 const int int_cmove;
145
146 /* penalty for shifts, due to scheduling rules etc. */
147 const int shift_penalty;
148 };
149
150 static const
151 struct processor_costs cypress_costs = {
152 COSTS_N_INSNS (2), /* int load */
153 COSTS_N_INSNS (2), /* int signed load */
154 COSTS_N_INSNS (2), /* int zeroed load */
155 COSTS_N_INSNS (2), /* float load */
156 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
157 COSTS_N_INSNS (5), /* fadd, fsub */
158 COSTS_N_INSNS (1), /* fcmp */
159 COSTS_N_INSNS (1), /* fmov, fmovr */
160 COSTS_N_INSNS (7), /* fmul */
161 COSTS_N_INSNS (37), /* fdivs */
162 COSTS_N_INSNS (37), /* fdivd */
163 COSTS_N_INSNS (63), /* fsqrts */
164 COSTS_N_INSNS (63), /* fsqrtd */
165 COSTS_N_INSNS (1), /* imul */
166 COSTS_N_INSNS (1), /* imulX */
167 0, /* imul bit factor */
168 COSTS_N_INSNS (1), /* idiv */
169 COSTS_N_INSNS (1), /* idivX */
170 COSTS_N_INSNS (1), /* movcc/movr */
171 0, /* shift penalty */
172 };
173
174 static const
175 struct processor_costs supersparc_costs = {
176 COSTS_N_INSNS (1), /* int load */
177 COSTS_N_INSNS (1), /* int signed load */
178 COSTS_N_INSNS (1), /* int zeroed load */
179 COSTS_N_INSNS (0), /* float load */
180 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
181 COSTS_N_INSNS (3), /* fadd, fsub */
182 COSTS_N_INSNS (3), /* fcmp */
183 COSTS_N_INSNS (1), /* fmov, fmovr */
184 COSTS_N_INSNS (3), /* fmul */
185 COSTS_N_INSNS (6), /* fdivs */
186 COSTS_N_INSNS (9), /* fdivd */
187 COSTS_N_INSNS (12), /* fsqrts */
188 COSTS_N_INSNS (12), /* fsqrtd */
189 COSTS_N_INSNS (4), /* imul */
190 COSTS_N_INSNS (4), /* imulX */
191 0, /* imul bit factor */
192 COSTS_N_INSNS (4), /* idiv */
193 COSTS_N_INSNS (4), /* idivX */
194 COSTS_N_INSNS (1), /* movcc/movr */
195 1, /* shift penalty */
196 };
197
198 static const
199 struct processor_costs hypersparc_costs = {
200 COSTS_N_INSNS (1), /* int load */
201 COSTS_N_INSNS (1), /* int signed load */
202 COSTS_N_INSNS (1), /* int zeroed load */
203 COSTS_N_INSNS (1), /* float load */
204 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
205 COSTS_N_INSNS (1), /* fadd, fsub */
206 COSTS_N_INSNS (1), /* fcmp */
207 COSTS_N_INSNS (1), /* fmov, fmovr */
208 COSTS_N_INSNS (1), /* fmul */
209 COSTS_N_INSNS (8), /* fdivs */
210 COSTS_N_INSNS (12), /* fdivd */
211 COSTS_N_INSNS (17), /* fsqrts */
212 COSTS_N_INSNS (17), /* fsqrtd */
213 COSTS_N_INSNS (17), /* imul */
214 COSTS_N_INSNS (17), /* imulX */
215 0, /* imul bit factor */
216 COSTS_N_INSNS (17), /* idiv */
217 COSTS_N_INSNS (17), /* idivX */
218 COSTS_N_INSNS (1), /* movcc/movr */
219 0, /* shift penalty */
220 };
221
222 static const
223 struct processor_costs leon_costs = {
224 COSTS_N_INSNS (1), /* int load */
225 COSTS_N_INSNS (1), /* int signed load */
226 COSTS_N_INSNS (1), /* int zeroed load */
227 COSTS_N_INSNS (1), /* float load */
228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229 COSTS_N_INSNS (1), /* fadd, fsub */
230 COSTS_N_INSNS (1), /* fcmp */
231 COSTS_N_INSNS (1), /* fmov, fmovr */
232 COSTS_N_INSNS (1), /* fmul */
233 COSTS_N_INSNS (15), /* fdivs */
234 COSTS_N_INSNS (15), /* fdivd */
235 COSTS_N_INSNS (23), /* fsqrts */
236 COSTS_N_INSNS (23), /* fsqrtd */
237 COSTS_N_INSNS (5), /* imul */
238 COSTS_N_INSNS (5), /* imulX */
239 0, /* imul bit factor */
240 COSTS_N_INSNS (5), /* idiv */
241 COSTS_N_INSNS (5), /* idivX */
242 COSTS_N_INSNS (1), /* movcc/movr */
243 0, /* shift penalty */
244 };
245
246 static const
247 struct processor_costs leon3_costs = {
248 COSTS_N_INSNS (1), /* int load */
249 COSTS_N_INSNS (1), /* int signed load */
250 COSTS_N_INSNS (1), /* int zeroed load */
251 COSTS_N_INSNS (1), /* float load */
252 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
253 COSTS_N_INSNS (1), /* fadd, fsub */
254 COSTS_N_INSNS (1), /* fcmp */
255 COSTS_N_INSNS (1), /* fmov, fmovr */
256 COSTS_N_INSNS (1), /* fmul */
257 COSTS_N_INSNS (14), /* fdivs */
258 COSTS_N_INSNS (15), /* fdivd */
259 COSTS_N_INSNS (22), /* fsqrts */
260 COSTS_N_INSNS (23), /* fsqrtd */
261 COSTS_N_INSNS (5), /* imul */
262 COSTS_N_INSNS (5), /* imulX */
263 0, /* imul bit factor */
264 COSTS_N_INSNS (35), /* idiv */
265 COSTS_N_INSNS (35), /* idivX */
266 COSTS_N_INSNS (1), /* movcc/movr */
267 0, /* shift penalty */
268 };
269
270 static const
271 struct processor_costs sparclet_costs = {
272 COSTS_N_INSNS (3), /* int load */
273 COSTS_N_INSNS (3), /* int signed load */
274 COSTS_N_INSNS (1), /* int zeroed load */
275 COSTS_N_INSNS (1), /* float load */
276 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
277 COSTS_N_INSNS (1), /* fadd, fsub */
278 COSTS_N_INSNS (1), /* fcmp */
279 COSTS_N_INSNS (1), /* fmov, fmovr */
280 COSTS_N_INSNS (1), /* fmul */
281 COSTS_N_INSNS (1), /* fdivs */
282 COSTS_N_INSNS (1), /* fdivd */
283 COSTS_N_INSNS (1), /* fsqrts */
284 COSTS_N_INSNS (1), /* fsqrtd */
285 COSTS_N_INSNS (5), /* imul */
286 COSTS_N_INSNS (5), /* imulX */
287 0, /* imul bit factor */
288 COSTS_N_INSNS (5), /* idiv */
289 COSTS_N_INSNS (5), /* idivX */
290 COSTS_N_INSNS (1), /* movcc/movr */
291 0, /* shift penalty */
292 };
293
294 static const
295 struct processor_costs ultrasparc_costs = {
296 COSTS_N_INSNS (2), /* int load */
297 COSTS_N_INSNS (3), /* int signed load */
298 COSTS_N_INSNS (2), /* int zeroed load */
299 COSTS_N_INSNS (2), /* float load */
300 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
301 COSTS_N_INSNS (4), /* fadd, fsub */
302 COSTS_N_INSNS (1), /* fcmp */
303 COSTS_N_INSNS (2), /* fmov, fmovr */
304 COSTS_N_INSNS (4), /* fmul */
305 COSTS_N_INSNS (13), /* fdivs */
306 COSTS_N_INSNS (23), /* fdivd */
307 COSTS_N_INSNS (13), /* fsqrts */
308 COSTS_N_INSNS (23), /* fsqrtd */
309 COSTS_N_INSNS (4), /* imul */
310 COSTS_N_INSNS (4), /* imulX */
311 2, /* imul bit factor */
312 COSTS_N_INSNS (37), /* idiv */
313 COSTS_N_INSNS (68), /* idivX */
314 COSTS_N_INSNS (2), /* movcc/movr */
315 2, /* shift penalty */
316 };
317
318 static const
319 struct processor_costs ultrasparc3_costs = {
320 COSTS_N_INSNS (2), /* int load */
321 COSTS_N_INSNS (3), /* int signed load */
322 COSTS_N_INSNS (3), /* int zeroed load */
323 COSTS_N_INSNS (2), /* float load */
324 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
325 COSTS_N_INSNS (4), /* fadd, fsub */
326 COSTS_N_INSNS (5), /* fcmp */
327 COSTS_N_INSNS (3), /* fmov, fmovr */
328 COSTS_N_INSNS (4), /* fmul */
329 COSTS_N_INSNS (17), /* fdivs */
330 COSTS_N_INSNS (20), /* fdivd */
331 COSTS_N_INSNS (20), /* fsqrts */
332 COSTS_N_INSNS (29), /* fsqrtd */
333 COSTS_N_INSNS (6), /* imul */
334 COSTS_N_INSNS (6), /* imulX */
335 0, /* imul bit factor */
336 COSTS_N_INSNS (40), /* idiv */
337 COSTS_N_INSNS (71), /* idivX */
338 COSTS_N_INSNS (2), /* movcc/movr */
339 0, /* shift penalty */
340 };
341
342 static const
343 struct processor_costs niagara_costs = {
344 COSTS_N_INSNS (3), /* int load */
345 COSTS_N_INSNS (3), /* int signed load */
346 COSTS_N_INSNS (3), /* int zeroed load */
347 COSTS_N_INSNS (9), /* float load */
348 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
349 COSTS_N_INSNS (8), /* fadd, fsub */
350 COSTS_N_INSNS (26), /* fcmp */
351 COSTS_N_INSNS (8), /* fmov, fmovr */
352 COSTS_N_INSNS (29), /* fmul */
353 COSTS_N_INSNS (54), /* fdivs */
354 COSTS_N_INSNS (83), /* fdivd */
355 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
356 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
357 COSTS_N_INSNS (11), /* imul */
358 COSTS_N_INSNS (11), /* imulX */
359 0, /* imul bit factor */
360 COSTS_N_INSNS (72), /* idiv */
361 COSTS_N_INSNS (72), /* idivX */
362 COSTS_N_INSNS (1), /* movcc/movr */
363 0, /* shift penalty */
364 };
365
366 static const
367 struct processor_costs niagara2_costs = {
368 COSTS_N_INSNS (3), /* int load */
369 COSTS_N_INSNS (3), /* int signed load */
370 COSTS_N_INSNS (3), /* int zeroed load */
371 COSTS_N_INSNS (3), /* float load */
372 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
373 COSTS_N_INSNS (6), /* fadd, fsub */
374 COSTS_N_INSNS (6), /* fcmp */
375 COSTS_N_INSNS (6), /* fmov, fmovr */
376 COSTS_N_INSNS (6), /* fmul */
377 COSTS_N_INSNS (19), /* fdivs */
378 COSTS_N_INSNS (33), /* fdivd */
379 COSTS_N_INSNS (19), /* fsqrts */
380 COSTS_N_INSNS (33), /* fsqrtd */
381 COSTS_N_INSNS (5), /* imul */
382 COSTS_N_INSNS (5), /* imulX */
383 0, /* imul bit factor */
384 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
385 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
386 COSTS_N_INSNS (1), /* movcc/movr */
387 0, /* shift penalty */
388 };
389
390 static const
391 struct processor_costs niagara3_costs = {
392 COSTS_N_INSNS (3), /* int load */
393 COSTS_N_INSNS (3), /* int signed load */
394 COSTS_N_INSNS (3), /* int zeroed load */
395 COSTS_N_INSNS (3), /* float load */
396 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
397 COSTS_N_INSNS (9), /* fadd, fsub */
398 COSTS_N_INSNS (9), /* fcmp */
399 COSTS_N_INSNS (9), /* fmov, fmovr */
400 COSTS_N_INSNS (9), /* fmul */
401 COSTS_N_INSNS (23), /* fdivs */
402 COSTS_N_INSNS (37), /* fdivd */
403 COSTS_N_INSNS (23), /* fsqrts */
404 COSTS_N_INSNS (37), /* fsqrtd */
405 COSTS_N_INSNS (9), /* imul */
406 COSTS_N_INSNS (9), /* imulX */
407 0, /* imul bit factor */
408 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
409 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
410 COSTS_N_INSNS (1), /* movcc/movr */
411 0, /* shift penalty */
412 };
413
414 static const
415 struct processor_costs niagara4_costs = {
416 COSTS_N_INSNS (5), /* int load */
417 COSTS_N_INSNS (5), /* int signed load */
418 COSTS_N_INSNS (5), /* int zeroed load */
419 COSTS_N_INSNS (5), /* float load */
420 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
421 COSTS_N_INSNS (11), /* fadd, fsub */
422 COSTS_N_INSNS (11), /* fcmp */
423 COSTS_N_INSNS (11), /* fmov, fmovr */
424 COSTS_N_INSNS (11), /* fmul */
425 COSTS_N_INSNS (24), /* fdivs */
426 COSTS_N_INSNS (37), /* fdivd */
427 COSTS_N_INSNS (24), /* fsqrts */
428 COSTS_N_INSNS (37), /* fsqrtd */
429 COSTS_N_INSNS (12), /* imul */
430 COSTS_N_INSNS (12), /* imulX */
431 0, /* imul bit factor */
432 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
433 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
434 COSTS_N_INSNS (1), /* movcc/movr */
435 0, /* shift penalty */
436 };
437
438 static const struct processor_costs *sparc_costs = &cypress_costs;
439
440 #ifdef HAVE_AS_RELAX_OPTION
441 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
442 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
443 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
444 somebody does not branch between the sethi and jmp. */
445 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
446 #else
447 #define LEAF_SIBCALL_SLOT_RESERVED_P \
448 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
449 #endif
450
451 /* Vector to say how input registers are mapped to output registers.
452 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
453 eliminate it. You must use -fomit-frame-pointer to get that. */
454 char leaf_reg_remap[] =
455 { 0, 1, 2, 3, 4, 5, 6, 7,
456 -1, -1, -1, -1, -1, -1, 14, -1,
457 -1, -1, -1, -1, -1, -1, -1, -1,
458 8, 9, 10, 11, 12, 13, -1, 15,
459
460 32, 33, 34, 35, 36, 37, 38, 39,
461 40, 41, 42, 43, 44, 45, 46, 47,
462 48, 49, 50, 51, 52, 53, 54, 55,
463 56, 57, 58, 59, 60, 61, 62, 63,
464 64, 65, 66, 67, 68, 69, 70, 71,
465 72, 73, 74, 75, 76, 77, 78, 79,
466 80, 81, 82, 83, 84, 85, 86, 87,
467 88, 89, 90, 91, 92, 93, 94, 95,
468 96, 97, 98, 99, 100, 101, 102};
469
470 /* Vector, indexed by hard register number, which contains 1
471 for a register that is allowable in a candidate for leaf
472 function treatment. */
473 char sparc_leaf_regs[] =
474 { 1, 1, 1, 1, 1, 1, 1, 1,
475 0, 0, 0, 0, 0, 0, 1, 0,
476 0, 0, 0, 0, 0, 0, 0, 0,
477 1, 1, 1, 1, 1, 1, 0, 1,
478 1, 1, 1, 1, 1, 1, 1, 1,
479 1, 1, 1, 1, 1, 1, 1, 1,
480 1, 1, 1, 1, 1, 1, 1, 1,
481 1, 1, 1, 1, 1, 1, 1, 1,
482 1, 1, 1, 1, 1, 1, 1, 1,
483 1, 1, 1, 1, 1, 1, 1, 1,
484 1, 1, 1, 1, 1, 1, 1, 1,
485 1, 1, 1, 1, 1, 1, 1, 1,
486 1, 1, 1, 1, 1, 1, 1};
487
488 struct GTY(()) machine_function
489 {
490 /* Size of the frame of the function. */
491 HOST_WIDE_INT frame_size;
492
493 /* Size of the frame of the function minus the register window save area
494 and the outgoing argument area. */
495 HOST_WIDE_INT apparent_frame_size;
496
497 /* Register we pretend the frame pointer is allocated to. Normally, this
498 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
499 record "offset" separately as it may be too big for (reg + disp). */
500 rtx frame_base_reg;
501 HOST_WIDE_INT frame_base_offset;
502
503 /* Some local-dynamic TLS symbol name. */
504 const char *some_ld_name;
505
506 /* Number of global or FP registers to be saved (as 4-byte quantities). */
507 int n_global_fp_regs;
508
509 /* True if the current function is leaf and uses only leaf regs,
510 so that the SPARC leaf function optimization can be applied.
511 Private version of crtl->uses_only_leaf_regs, see
512 sparc_expand_prologue for the rationale. */
513 int leaf_function_p;
514
515 /* True if the prologue saves local or in registers. */
516 bool save_local_in_regs_p;
517
518 /* True if the data calculated by sparc_expand_prologue are valid. */
519 bool prologue_data_valid_p;
520 };
521
522 #define sparc_frame_size cfun->machine->frame_size
523 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
524 #define sparc_frame_base_reg cfun->machine->frame_base_reg
525 #define sparc_frame_base_offset cfun->machine->frame_base_offset
526 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
527 #define sparc_leaf_function_p cfun->machine->leaf_function_p
528 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
529 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
530
531 /* 1 if the next opcode is to be specially indented. */
532 int sparc_indent_opcode = 0;
533
534 static void sparc_option_override (void);
535 static void sparc_init_modes (void);
536 static void scan_record_type (const_tree, int *, int *, int *);
537 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
538 const_tree, bool, bool, int *, int *);
539
540 static int supersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
541 static int hypersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
542
543 static void sparc_emit_set_const32 (rtx, rtx);
544 static void sparc_emit_set_const64 (rtx, rtx);
545 static void sparc_output_addr_vec (rtx);
546 static void sparc_output_addr_diff_vec (rtx);
547 static void sparc_output_deferred_case_vectors (void);
548 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
549 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
550 static rtx sparc_builtin_saveregs (void);
551 static int epilogue_renumber (rtx *, int);
552 static bool sparc_assemble_integer (rtx, unsigned int, int);
553 static int set_extends (rtx_insn *);
554 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
555 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
556 #ifdef TARGET_SOLARIS
557 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
558 tree) ATTRIBUTE_UNUSED;
559 #endif
560 static int sparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
561 static int sparc_issue_rate (void);
562 static void sparc_sched_init (FILE *, int, int);
563 static int sparc_use_sched_lookahead (void);
564
565 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
566 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
567 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
568 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
569 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
570
571 static bool sparc_function_ok_for_sibcall (tree, tree);
572 static void sparc_init_libfuncs (void);
573 static void sparc_init_builtins (void);
574 static void sparc_fpu_init_builtins (void);
575 static void sparc_vis_init_builtins (void);
576 static tree sparc_builtin_decl (unsigned, bool);
577 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
578 static tree sparc_fold_builtin (tree, int, tree *, bool);
579 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
580 HOST_WIDE_INT, tree);
581 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
582 HOST_WIDE_INT, const_tree);
583 static struct machine_function * sparc_init_machine_status (void);
584 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
585 static rtx sparc_tls_get_addr (void);
586 static rtx sparc_tls_got (void);
587 static const char *get_some_local_dynamic_name (void);
588 static int get_some_local_dynamic_name_1 (rtx *, void *);
589 static int sparc_register_move_cost (enum machine_mode,
590 reg_class_t, reg_class_t);
591 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
592 static rtx sparc_function_value (const_tree, const_tree, bool);
593 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
594 static bool sparc_function_value_regno_p (const unsigned int);
595 static rtx sparc_struct_value_rtx (tree, int);
596 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
597 int *, const_tree, int);
598 static bool sparc_return_in_memory (const_tree, const_tree);
599 static bool sparc_strict_argument_naming (cumulative_args_t);
600 static void sparc_va_start (tree, rtx);
601 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
602 static bool sparc_vector_mode_supported_p (enum machine_mode);
603 static bool sparc_tls_referenced_p (rtx);
604 static rtx sparc_legitimize_tls_address (rtx);
605 static rtx sparc_legitimize_pic_address (rtx, rtx);
606 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
607 static rtx sparc_delegitimize_address (rtx);
608 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
609 static bool sparc_pass_by_reference (cumulative_args_t,
610 enum machine_mode, const_tree, bool);
611 static void sparc_function_arg_advance (cumulative_args_t,
612 enum machine_mode, const_tree, bool);
613 static rtx sparc_function_arg_1 (cumulative_args_t,
614 enum machine_mode, const_tree, bool, bool);
615 static rtx sparc_function_arg (cumulative_args_t,
616 enum machine_mode, const_tree, bool);
617 static rtx sparc_function_incoming_arg (cumulative_args_t,
618 enum machine_mode, const_tree, bool);
619 static unsigned int sparc_function_arg_boundary (enum machine_mode,
620 const_tree);
621 static int sparc_arg_partial_bytes (cumulative_args_t,
622 enum machine_mode, tree, bool);
623 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
624 static void sparc_file_end (void);
625 static bool sparc_frame_pointer_required (void);
626 static bool sparc_can_eliminate (const int, const int);
627 static rtx sparc_builtin_setjmp_frame_value (void);
628 static void sparc_conditional_register_usage (void);
629 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
630 static const char *sparc_mangle_type (const_tree);
631 #endif
632 static void sparc_trampoline_init (rtx, tree, rtx);
633 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
634 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
635 static bool sparc_print_operand_punct_valid_p (unsigned char);
636 static void sparc_print_operand (FILE *, rtx, int);
637 static void sparc_print_operand_address (FILE *, rtx);
638 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
639 enum machine_mode,
640 secondary_reload_info *);
641 static enum machine_mode sparc_cstore_mode (enum insn_code icode);
642 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
643 \f
644 #ifdef SUBTARGET_ATTRIBUTE_TABLE
645 /* Table of valid machine attributes. */
646 static const struct attribute_spec sparc_attribute_table[] =
647 {
648 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
649 do_diagnostic } */
650 SUBTARGET_ATTRIBUTE_TABLE,
651 { NULL, 0, 0, false, false, false, NULL, false }
652 };
653 #endif
654 \f
655 /* Option handling. */
656
657 /* Parsed value. */
658 enum cmodel sparc_cmodel;
659
660 char sparc_hard_reg_printed[8];
661
662 /* Initialize the GCC target structure. */
663
664 /* The default is to use .half rather than .short for aligned HI objects. */
665 #undef TARGET_ASM_ALIGNED_HI_OP
666 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
667
668 #undef TARGET_ASM_UNALIGNED_HI_OP
669 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
670 #undef TARGET_ASM_UNALIGNED_SI_OP
671 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
672 #undef TARGET_ASM_UNALIGNED_DI_OP
673 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
674
675 /* The target hook has to handle DI-mode values. */
676 #undef TARGET_ASM_INTEGER
677 #define TARGET_ASM_INTEGER sparc_assemble_integer
678
679 #undef TARGET_ASM_FUNCTION_PROLOGUE
680 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
681 #undef TARGET_ASM_FUNCTION_EPILOGUE
682 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
683
684 #undef TARGET_SCHED_ADJUST_COST
685 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
686 #undef TARGET_SCHED_ISSUE_RATE
687 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
688 #undef TARGET_SCHED_INIT
689 #define TARGET_SCHED_INIT sparc_sched_init
690 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
691 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
692
693 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
694 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
695
696 #undef TARGET_INIT_LIBFUNCS
697 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
698
699 #undef TARGET_LEGITIMIZE_ADDRESS
700 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
701 #undef TARGET_DELEGITIMIZE_ADDRESS
702 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
703 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
704 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
705
706 #undef TARGET_INIT_BUILTINS
707 #define TARGET_INIT_BUILTINS sparc_init_builtins
708 #undef TARGET_BUILTIN_DECL
709 #define TARGET_BUILTIN_DECL sparc_builtin_decl
710 #undef TARGET_EXPAND_BUILTIN
711 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
712 #undef TARGET_FOLD_BUILTIN
713 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
714
715 #if TARGET_TLS
716 #undef TARGET_HAVE_TLS
717 #define TARGET_HAVE_TLS true
718 #endif
719
720 #undef TARGET_CANNOT_FORCE_CONST_MEM
721 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
722
723 #undef TARGET_ASM_OUTPUT_MI_THUNK
724 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
725 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
726 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
727
728 #undef TARGET_RTX_COSTS
729 #define TARGET_RTX_COSTS sparc_rtx_costs
730 #undef TARGET_ADDRESS_COST
731 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
732 #undef TARGET_REGISTER_MOVE_COST
733 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
734
735 #undef TARGET_PROMOTE_FUNCTION_MODE
736 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
737
738 #undef TARGET_FUNCTION_VALUE
739 #define TARGET_FUNCTION_VALUE sparc_function_value
740 #undef TARGET_LIBCALL_VALUE
741 #define TARGET_LIBCALL_VALUE sparc_libcall_value
742 #undef TARGET_FUNCTION_VALUE_REGNO_P
743 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
744
745 #undef TARGET_STRUCT_VALUE_RTX
746 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
747 #undef TARGET_RETURN_IN_MEMORY
748 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
749 #undef TARGET_MUST_PASS_IN_STACK
750 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
751 #undef TARGET_PASS_BY_REFERENCE
752 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
753 #undef TARGET_ARG_PARTIAL_BYTES
754 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
755 #undef TARGET_FUNCTION_ARG_ADVANCE
756 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
757 #undef TARGET_FUNCTION_ARG
758 #define TARGET_FUNCTION_ARG sparc_function_arg
759 #undef TARGET_FUNCTION_INCOMING_ARG
760 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
761 #undef TARGET_FUNCTION_ARG_BOUNDARY
762 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
763
764 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
765 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
766 #undef TARGET_STRICT_ARGUMENT_NAMING
767 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
768
769 #undef TARGET_EXPAND_BUILTIN_VA_START
770 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
771 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
772 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
773
774 #undef TARGET_VECTOR_MODE_SUPPORTED_P
775 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
776
777 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
778 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
779
780 #ifdef SUBTARGET_INSERT_ATTRIBUTES
781 #undef TARGET_INSERT_ATTRIBUTES
782 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
783 #endif
784
785 #ifdef SUBTARGET_ATTRIBUTE_TABLE
786 #undef TARGET_ATTRIBUTE_TABLE
787 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
788 #endif
789
790 #undef TARGET_RELAXED_ORDERING
791 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
792
793 #undef TARGET_OPTION_OVERRIDE
794 #define TARGET_OPTION_OVERRIDE sparc_option_override
795
796 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
797 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
798 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
799 #endif
800
801 #undef TARGET_ASM_FILE_END
802 #define TARGET_ASM_FILE_END sparc_file_end
803
804 #undef TARGET_FRAME_POINTER_REQUIRED
805 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
806
807 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
808 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
809
810 #undef TARGET_CAN_ELIMINATE
811 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
812
813 #undef TARGET_PREFERRED_RELOAD_CLASS
814 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
815
816 #undef TARGET_SECONDARY_RELOAD
817 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
818
819 #undef TARGET_CONDITIONAL_REGISTER_USAGE
820 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
821
822 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
823 #undef TARGET_MANGLE_TYPE
824 #define TARGET_MANGLE_TYPE sparc_mangle_type
825 #endif
826
827 #undef TARGET_LEGITIMATE_ADDRESS_P
828 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
829
830 #undef TARGET_LEGITIMATE_CONSTANT_P
831 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
832
833 #undef TARGET_TRAMPOLINE_INIT
834 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
835
836 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
837 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
838 #undef TARGET_PRINT_OPERAND
839 #define TARGET_PRINT_OPERAND sparc_print_operand
840 #undef TARGET_PRINT_OPERAND_ADDRESS
841 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
842
843 /* The value stored by LDSTUB. */
844 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
845 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
846
847 #undef TARGET_CSTORE_MODE
848 #define TARGET_CSTORE_MODE sparc_cstore_mode
849
850 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
851 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
852
853 struct gcc_target targetm = TARGET_INITIALIZER;
854
855 /* Return the memory reference contained in X if any, zero otherwise. */
856
857 static rtx
858 mem_ref (rtx x)
859 {
860 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
861 x = XEXP (x, 0);
862
863 if (MEM_P (x))
864 return x;
865
866 return NULL_RTX;
867 }
868
869 /* We use a machine specific pass to enable workarounds for errata.
870 We need to have the (essentially) final form of the insn stream in order
871 to properly detect the various hazards. Therefore, this machine specific
872 pass runs as late as possible. The pass is inserted in the pass pipeline
873 at the end of sparc_option_override. */
874
875 static unsigned int
876 sparc_do_work_around_errata (void)
877 {
878 rtx_insn *insn, *next;
879
880 /* Force all instructions to be split into their final form. */
881 split_all_insns_noflow ();
882
883 /* Now look for specific patterns in the insn stream. */
884 for (insn = get_insns (); insn; insn = next)
885 {
886 bool insert_nop = false;
887 rtx set;
888
889 /* Look into the instruction in a delay slot. */
890 if (NONJUMP_INSN_P (insn))
891 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
892 insn = seq->insn (1);
893
894 /* Look for a single-word load into an odd-numbered FP register. */
895 if (sparc_fix_at697f
896 && NONJUMP_INSN_P (insn)
897 && (set = single_set (insn)) != NULL_RTX
898 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
899 && MEM_P (SET_SRC (set))
900 && REG_P (SET_DEST (set))
901 && REGNO (SET_DEST (set)) > 31
902 && REGNO (SET_DEST (set)) % 2 != 0)
903 {
904 /* The wrong dependency is on the enclosing double register. */
905 const unsigned int x = REGNO (SET_DEST (set)) - 1;
906 unsigned int src1, src2, dest;
907 int code;
908
909 next = next_active_insn (insn);
910 if (!next)
911 break;
912 /* If the insn is a branch, then it cannot be problematic. */
913 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
914 continue;
915
916 extract_insn (next);
917 code = INSN_CODE (next);
918
919 switch (code)
920 {
921 case CODE_FOR_adddf3:
922 case CODE_FOR_subdf3:
923 case CODE_FOR_muldf3:
924 case CODE_FOR_divdf3:
925 dest = REGNO (recog_data.operand[0]);
926 src1 = REGNO (recog_data.operand[1]);
927 src2 = REGNO (recog_data.operand[2]);
928 if (src1 != src2)
929 {
930 /* Case [1-4]:
931 ld [address], %fx+1
932 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
933 if ((src1 == x || src2 == x)
934 && (dest == src1 || dest == src2))
935 insert_nop = true;
936 }
937 else
938 {
939 /* Case 5:
940 ld [address], %fx+1
941 FPOPd %fx, %fx, %fx */
942 if (src1 == x
943 && dest == src1
944 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
945 insert_nop = true;
946 }
947 break;
948
949 case CODE_FOR_sqrtdf2:
950 dest = REGNO (recog_data.operand[0]);
951 src1 = REGNO (recog_data.operand[1]);
952 /* Case 6:
953 ld [address], %fx+1
954 fsqrtd %fx, %fx */
955 if (src1 == x && dest == src1)
956 insert_nop = true;
957 break;
958
959 default:
960 break;
961 }
962 }
963
964 /* Look for a single-word load into an integer register. */
965 else if (sparc_fix_ut699
966 && NONJUMP_INSN_P (insn)
967 && (set = single_set (insn)) != NULL_RTX
968 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
969 && mem_ref (SET_SRC (set)) != NULL_RTX
970 && REG_P (SET_DEST (set))
971 && REGNO (SET_DEST (set)) < 32)
972 {
973 /* There is no problem if the second memory access has a data
974 dependency on the first single-cycle load. */
975 rtx x = SET_DEST (set);
976
977 next = next_active_insn (insn);
978 if (!next)
979 break;
980 /* If the insn is a branch, then it cannot be problematic. */
981 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
982 continue;
983
984 /* Look for a second memory access to/from an integer register. */
985 if ((set = single_set (next)) != NULL_RTX)
986 {
987 rtx src = SET_SRC (set);
988 rtx dest = SET_DEST (set);
989 rtx mem;
990
991 /* LDD is affected. */
992 if ((mem = mem_ref (src)) != NULL_RTX
993 && REG_P (dest)
994 && REGNO (dest) < 32
995 && !reg_mentioned_p (x, XEXP (mem, 0)))
996 insert_nop = true;
997
998 /* STD is *not* affected. */
999 else if (MEM_P (dest)
1000 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1001 && (src == CONST0_RTX (GET_MODE (dest))
1002 || (REG_P (src)
1003 && REGNO (src) < 32
1004 && REGNO (src) != REGNO (x)))
1005 && !reg_mentioned_p (x, XEXP (dest, 0)))
1006 insert_nop = true;
1007 }
1008 }
1009
1010 /* Look for a single-word load/operation into an FP register. */
1011 else if (sparc_fix_ut699
1012 && NONJUMP_INSN_P (insn)
1013 && (set = single_set (insn)) != NULL_RTX
1014 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1015 && REG_P (SET_DEST (set))
1016 && REGNO (SET_DEST (set)) > 31)
1017 {
1018 /* Number of instructions in the problematic window. */
1019 const int n_insns = 4;
1020 /* The problematic combination is with the sibling FP register. */
1021 const unsigned int x = REGNO (SET_DEST (set));
1022 const unsigned int y = x ^ 1;
1023 rtx after;
1024 int i;
1025
1026 next = next_active_insn (insn);
1027 if (!next)
1028 break;
1029 /* If the insn is a branch, then it cannot be problematic. */
1030 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1031 continue;
1032
1033 /* Look for a second load/operation into the sibling FP register. */
1034 if (!((set = single_set (next)) != NULL_RTX
1035 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1036 && REG_P (SET_DEST (set))
1037 && REGNO (SET_DEST (set)) == y))
1038 continue;
1039
1040 /* Look for a (possible) store from the FP register in the next N
1041 instructions, but bail out if it is again modified or if there
1042 is a store from the sibling FP register before this store. */
1043 for (after = next, i = 0; i < n_insns; i++)
1044 {
1045 bool branch_p;
1046
1047 after = next_active_insn (after);
1048 if (!after)
1049 break;
1050
1051 /* This is a branch with an empty delay slot. */
1052 if (!NONJUMP_INSN_P (after))
1053 {
1054 if (++i == n_insns)
1055 break;
1056 branch_p = true;
1057 after = NULL_RTX;
1058 }
1059 /* This is a branch with a filled delay slot. */
1060 else if (GET_CODE (PATTERN (after)) == SEQUENCE)
1061 {
1062 if (++i == n_insns)
1063 break;
1064 branch_p = true;
1065 after = XVECEXP (PATTERN (after), 0, 1);
1066 }
1067 /* This is a regular instruction. */
1068 else
1069 branch_p = false;
1070
1071 if (after && (set = single_set (after)) != NULL_RTX)
1072 {
1073 const rtx src = SET_SRC (set);
1074 const rtx dest = SET_DEST (set);
1075 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1076
1077 /* If the FP register is again modified before the store,
1078 then the store isn't affected. */
1079 if (REG_P (dest)
1080 && (REGNO (dest) == x
1081 || (REGNO (dest) == y && size == 8)))
1082 break;
1083
1084 if (MEM_P (dest) && REG_P (src))
1085 {
1086 /* If there is a store from the sibling FP register
1087 before the store, then the store is not affected. */
1088 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1089 break;
1090
1091 /* Otherwise, the store is affected. */
1092 if (REGNO (src) == x && size == 4)
1093 {
1094 insert_nop = true;
1095 break;
1096 }
1097 }
1098 }
1099
1100 /* If we have a branch in the first M instructions, then we
1101 cannot see the (M+2)th instruction so we play safe. */
1102 if (branch_p && i <= (n_insns - 2))
1103 {
1104 insert_nop = true;
1105 break;
1106 }
1107 }
1108 }
1109
1110 else
1111 next = NEXT_INSN (insn);
1112
1113 if (insert_nop)
1114 emit_insn_before (gen_nop (), next);
1115 }
1116
1117 return 0;
1118 }
1119
1120 namespace {
1121
1122 const pass_data pass_data_work_around_errata =
1123 {
1124 RTL_PASS, /* type */
1125 "errata", /* name */
1126 OPTGROUP_NONE, /* optinfo_flags */
1127 TV_MACH_DEP, /* tv_id */
1128 0, /* properties_required */
1129 0, /* properties_provided */
1130 0, /* properties_destroyed */
1131 0, /* todo_flags_start */
1132 0, /* todo_flags_finish */
1133 };
1134
1135 class pass_work_around_errata : public rtl_opt_pass
1136 {
1137 public:
1138 pass_work_around_errata(gcc::context *ctxt)
1139 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1140 {}
1141
1142 /* opt_pass methods: */
1143 virtual bool gate (function *)
1144 {
1145 /* The only errata we handle are those of the AT697F and UT699. */
1146 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1147 }
1148
1149 virtual unsigned int execute (function *)
1150 {
1151 return sparc_do_work_around_errata ();
1152 }
1153
1154 }; // class pass_work_around_errata
1155
1156 } // anon namespace
1157
1158 rtl_opt_pass *
1159 make_pass_work_around_errata (gcc::context *ctxt)
1160 {
1161 return new pass_work_around_errata (ctxt);
1162 }
1163
1164 /* Helpers for TARGET_DEBUG_OPTIONS. */
1165 static void
1166 dump_target_flag_bits (const int flags)
1167 {
1168 if (flags & MASK_64BIT)
1169 fprintf (stderr, "64BIT ");
1170 if (flags & MASK_APP_REGS)
1171 fprintf (stderr, "APP_REGS ");
1172 if (flags & MASK_FASTER_STRUCTS)
1173 fprintf (stderr, "FASTER_STRUCTS ");
1174 if (flags & MASK_FLAT)
1175 fprintf (stderr, "FLAT ");
1176 if (flags & MASK_FMAF)
1177 fprintf (stderr, "FMAF ");
1178 if (flags & MASK_FPU)
1179 fprintf (stderr, "FPU ");
1180 if (flags & MASK_HARD_QUAD)
1181 fprintf (stderr, "HARD_QUAD ");
1182 if (flags & MASK_POPC)
1183 fprintf (stderr, "POPC ");
1184 if (flags & MASK_PTR64)
1185 fprintf (stderr, "PTR64 ");
1186 if (flags & MASK_STACK_BIAS)
1187 fprintf (stderr, "STACK_BIAS ");
1188 if (flags & MASK_UNALIGNED_DOUBLES)
1189 fprintf (stderr, "UNALIGNED_DOUBLES ");
1190 if (flags & MASK_V8PLUS)
1191 fprintf (stderr, "V8PLUS ");
1192 if (flags & MASK_VIS)
1193 fprintf (stderr, "VIS ");
1194 if (flags & MASK_VIS2)
1195 fprintf (stderr, "VIS2 ");
1196 if (flags & MASK_VIS3)
1197 fprintf (stderr, "VIS3 ");
1198 if (flags & MASK_CBCOND)
1199 fprintf (stderr, "CBCOND ");
1200 if (flags & MASK_DEPRECATED_V8_INSNS)
1201 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1202 if (flags & MASK_SPARCLET)
1203 fprintf (stderr, "SPARCLET ");
1204 if (flags & MASK_SPARCLITE)
1205 fprintf (stderr, "SPARCLITE ");
1206 if (flags & MASK_V8)
1207 fprintf (stderr, "V8 ");
1208 if (flags & MASK_V9)
1209 fprintf (stderr, "V9 ");
1210 }
1211
1212 static void
1213 dump_target_flags (const char *prefix, const int flags)
1214 {
1215 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1216 dump_target_flag_bits (flags);
1217 fprintf(stderr, "]\n");
1218 }
1219
1220 /* Validate and override various options, and do some machine dependent
1221 initialization. */
1222
1223 static void
1224 sparc_option_override (void)
1225 {
1226 static struct code_model {
1227 const char *const name;
1228 const enum cmodel value;
1229 } const cmodels[] = {
1230 { "32", CM_32 },
1231 { "medlow", CM_MEDLOW },
1232 { "medmid", CM_MEDMID },
1233 { "medany", CM_MEDANY },
1234 { "embmedany", CM_EMBMEDANY },
1235 { NULL, (enum cmodel) 0 }
1236 };
1237 const struct code_model *cmodel;
1238 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1239 static struct cpu_default {
1240 const int cpu;
1241 const enum processor_type processor;
1242 } const cpu_default[] = {
1243 /* There must be one entry here for each TARGET_CPU value. */
1244 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1245 { TARGET_CPU_v8, PROCESSOR_V8 },
1246 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1247 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1248 { TARGET_CPU_leon, PROCESSOR_LEON },
1249 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1250 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1251 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1252 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1253 { TARGET_CPU_v9, PROCESSOR_V9 },
1254 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1255 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1256 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1257 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1258 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1259 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1260 { -1, PROCESSOR_V7 }
1261 };
1262 const struct cpu_default *def;
1263 /* Table of values for -m{cpu,tune}=. This must match the order of
1264 the enum processor_type in sparc-opts.h. */
1265 static struct cpu_table {
1266 const char *const name;
1267 const int disable;
1268 const int enable;
1269 } const cpu_table[] = {
1270 { "v7", MASK_ISA, 0 },
1271 { "cypress", MASK_ISA, 0 },
1272 { "v8", MASK_ISA, MASK_V8 },
1273 /* TI TMS390Z55 supersparc */
1274 { "supersparc", MASK_ISA, MASK_V8 },
1275 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1276 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1277 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1278 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1279 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1280 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1281 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1282 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1283 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1284 { "sparclet", MASK_ISA, MASK_SPARCLET },
1285 /* TEMIC sparclet */
1286 { "tsc701", MASK_ISA, MASK_SPARCLET },
1287 { "v9", MASK_ISA, MASK_V9 },
1288 /* UltraSPARC I, II, IIi */
1289 { "ultrasparc", MASK_ISA,
1290 /* Although insns using %y are deprecated, it is a clear win. */
1291 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1292 /* UltraSPARC III */
1293 /* ??? Check if %y issue still holds true. */
1294 { "ultrasparc3", MASK_ISA,
1295 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1296 /* UltraSPARC T1 */
1297 { "niagara", MASK_ISA,
1298 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1299 /* UltraSPARC T2 */
1300 { "niagara2", MASK_ISA,
1301 MASK_V9|MASK_POPC|MASK_VIS2 },
1302 /* UltraSPARC T3 */
1303 { "niagara3", MASK_ISA,
1304 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1305 /* UltraSPARC T4 */
1306 { "niagara4", MASK_ISA,
1307 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1308 };
1309 const struct cpu_table *cpu;
1310 unsigned int i;
1311 int fpu;
1312
1313 if (sparc_debug_string != NULL)
1314 {
1315 const char *q;
1316 char *p;
1317
1318 p = ASTRDUP (sparc_debug_string);
1319 while ((q = strtok (p, ",")) != NULL)
1320 {
1321 bool invert;
1322 int mask;
1323
1324 p = NULL;
1325 if (*q == '!')
1326 {
1327 invert = true;
1328 q++;
1329 }
1330 else
1331 invert = false;
1332
1333 if (! strcmp (q, "all"))
1334 mask = MASK_DEBUG_ALL;
1335 else if (! strcmp (q, "options"))
1336 mask = MASK_DEBUG_OPTIONS;
1337 else
1338 error ("unknown -mdebug-%s switch", q);
1339
1340 if (invert)
1341 sparc_debug &= ~mask;
1342 else
1343 sparc_debug |= mask;
1344 }
1345 }
1346
1347 if (TARGET_DEBUG_OPTIONS)
1348 {
1349 dump_target_flags("Initial target_flags", target_flags);
1350 dump_target_flags("target_flags_explicit", target_flags_explicit);
1351 }
1352
1353 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1354 SUBTARGET_OVERRIDE_OPTIONS;
1355 #endif
1356
1357 #ifndef SPARC_BI_ARCH
1358 /* Check for unsupported architecture size. */
1359 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1360 error ("%s is not supported by this configuration",
1361 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1362 #endif
1363
1364 /* We force all 64bit archs to use 128 bit long double */
1365 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1366 {
1367 error ("-mlong-double-64 not allowed with -m64");
1368 target_flags |= MASK_LONG_DOUBLE_128;
1369 }
1370
1371 /* Code model selection. */
1372 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1373
1374 #ifdef SPARC_BI_ARCH
1375 if (TARGET_ARCH32)
1376 sparc_cmodel = CM_32;
1377 #endif
1378
1379 if (sparc_cmodel_string != NULL)
1380 {
1381 if (TARGET_ARCH64)
1382 {
1383 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1384 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1385 break;
1386 if (cmodel->name == NULL)
1387 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1388 else
1389 sparc_cmodel = cmodel->value;
1390 }
1391 else
1392 error ("-mcmodel= is not supported on 32 bit systems");
1393 }
1394
1395 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1396 for (i = 8; i < 16; i++)
1397 if (!call_used_regs [i])
1398 {
1399 error ("-fcall-saved-REG is not supported for out registers");
1400 call_used_regs [i] = 1;
1401 }
1402
1403 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1404
1405 /* Set the default CPU. */
1406 if (!global_options_set.x_sparc_cpu_and_features)
1407 {
1408 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1409 if (def->cpu == TARGET_CPU_DEFAULT)
1410 break;
1411 gcc_assert (def->cpu != -1);
1412 sparc_cpu_and_features = def->processor;
1413 }
1414
1415 if (!global_options_set.x_sparc_cpu)
1416 sparc_cpu = sparc_cpu_and_features;
1417
1418 cpu = &cpu_table[(int) sparc_cpu_and_features];
1419
1420 if (TARGET_DEBUG_OPTIONS)
1421 {
1422 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1423 fprintf (stderr, "sparc_cpu: %s\n",
1424 cpu_table[(int) sparc_cpu].name);
1425 dump_target_flags ("cpu->disable", cpu->disable);
1426 dump_target_flags ("cpu->enable", cpu->enable);
1427 }
1428
1429 target_flags &= ~cpu->disable;
1430 target_flags |= (cpu->enable
1431 #ifndef HAVE_AS_FMAF_HPC_VIS3
1432 & ~(MASK_FMAF | MASK_VIS3)
1433 #endif
1434 #ifndef HAVE_AS_SPARC4
1435 & ~MASK_CBCOND
1436 #endif
1437 #ifndef HAVE_AS_LEON
1438 & ~(MASK_LEON | MASK_LEON3)
1439 #endif
1440 );
1441
1442 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1443 the processor default. */
1444 if (target_flags_explicit & MASK_FPU)
1445 target_flags = (target_flags & ~MASK_FPU) | fpu;
1446
1447 /* -mvis2 implies -mvis */
1448 if (TARGET_VIS2)
1449 target_flags |= MASK_VIS;
1450
1451 /* -mvis3 implies -mvis2 and -mvis */
1452 if (TARGET_VIS3)
1453 target_flags |= MASK_VIS2 | MASK_VIS;
1454
1455 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1456 disabled. */
1457 if (! TARGET_FPU)
1458 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1459
1460 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1461 are available.
1462 -m64 also implies v9. */
1463 if (TARGET_VIS || TARGET_ARCH64)
1464 {
1465 target_flags |= MASK_V9;
1466 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1467 }
1468
1469 /* -mvis also implies -mv8plus on 32-bit */
1470 if (TARGET_VIS && ! TARGET_ARCH64)
1471 target_flags |= MASK_V8PLUS;
1472
1473 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1474 if (TARGET_V9 && TARGET_ARCH32)
1475 target_flags |= MASK_DEPRECATED_V8_INSNS;
1476
1477 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1478 if (! TARGET_V9 || TARGET_ARCH64)
1479 target_flags &= ~MASK_V8PLUS;
1480
1481 /* Don't use stack biasing in 32 bit mode. */
1482 if (TARGET_ARCH32)
1483 target_flags &= ~MASK_STACK_BIAS;
1484
1485 /* Supply a default value for align_functions. */
1486 if (align_functions == 0
1487 && (sparc_cpu == PROCESSOR_ULTRASPARC
1488 || sparc_cpu == PROCESSOR_ULTRASPARC3
1489 || sparc_cpu == PROCESSOR_NIAGARA
1490 || sparc_cpu == PROCESSOR_NIAGARA2
1491 || sparc_cpu == PROCESSOR_NIAGARA3
1492 || sparc_cpu == PROCESSOR_NIAGARA4))
1493 align_functions = 32;
1494
1495 /* Validate PCC_STRUCT_RETURN. */
1496 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1497 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1498
1499 /* Only use .uaxword when compiling for a 64-bit target. */
1500 if (!TARGET_ARCH64)
1501 targetm.asm_out.unaligned_op.di = NULL;
1502
1503 /* Do various machine dependent initializations. */
1504 sparc_init_modes ();
1505
1506 /* Set up function hooks. */
1507 init_machine_status = sparc_init_machine_status;
1508
1509 switch (sparc_cpu)
1510 {
1511 case PROCESSOR_V7:
1512 case PROCESSOR_CYPRESS:
1513 sparc_costs = &cypress_costs;
1514 break;
1515 case PROCESSOR_V8:
1516 case PROCESSOR_SPARCLITE:
1517 case PROCESSOR_SUPERSPARC:
1518 sparc_costs = &supersparc_costs;
1519 break;
1520 case PROCESSOR_F930:
1521 case PROCESSOR_F934:
1522 case PROCESSOR_HYPERSPARC:
1523 case PROCESSOR_SPARCLITE86X:
1524 sparc_costs = &hypersparc_costs;
1525 break;
1526 case PROCESSOR_LEON:
1527 sparc_costs = &leon_costs;
1528 break;
1529 case PROCESSOR_LEON3:
1530 sparc_costs = &leon3_costs;
1531 break;
1532 case PROCESSOR_SPARCLET:
1533 case PROCESSOR_TSC701:
1534 sparc_costs = &sparclet_costs;
1535 break;
1536 case PROCESSOR_V9:
1537 case PROCESSOR_ULTRASPARC:
1538 sparc_costs = &ultrasparc_costs;
1539 break;
1540 case PROCESSOR_ULTRASPARC3:
1541 sparc_costs = &ultrasparc3_costs;
1542 break;
1543 case PROCESSOR_NIAGARA:
1544 sparc_costs = &niagara_costs;
1545 break;
1546 case PROCESSOR_NIAGARA2:
1547 sparc_costs = &niagara2_costs;
1548 break;
1549 case PROCESSOR_NIAGARA3:
1550 sparc_costs = &niagara3_costs;
1551 break;
1552 case PROCESSOR_NIAGARA4:
1553 sparc_costs = &niagara4_costs;
1554 break;
1555 case PROCESSOR_NATIVE:
1556 gcc_unreachable ();
1557 };
1558
1559 if (sparc_memory_model == SMM_DEFAULT)
1560 {
1561 /* Choose the memory model for the operating system. */
1562 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1563 if (os_default != SMM_DEFAULT)
1564 sparc_memory_model = os_default;
1565 /* Choose the most relaxed model for the processor. */
1566 else if (TARGET_V9)
1567 sparc_memory_model = SMM_RMO;
1568 else if (TARGET_LEON3)
1569 sparc_memory_model = SMM_TSO;
1570 else if (TARGET_LEON)
1571 sparc_memory_model = SMM_SC;
1572 else if (TARGET_V8)
1573 sparc_memory_model = SMM_PSO;
1574 else
1575 sparc_memory_model = SMM_SC;
1576 }
1577
1578 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1579 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1580 target_flags |= MASK_LONG_DOUBLE_128;
1581 #endif
1582
1583 if (TARGET_DEBUG_OPTIONS)
1584 dump_target_flags ("Final target_flags", target_flags);
1585
1586 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1587 ((sparc_cpu == PROCESSOR_ULTRASPARC
1588 || sparc_cpu == PROCESSOR_NIAGARA
1589 || sparc_cpu == PROCESSOR_NIAGARA2
1590 || sparc_cpu == PROCESSOR_NIAGARA3
1591 || sparc_cpu == PROCESSOR_NIAGARA4)
1592 ? 2
1593 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1594 ? 8 : 3)),
1595 global_options.x_param_values,
1596 global_options_set.x_param_values);
1597 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1598 ((sparc_cpu == PROCESSOR_ULTRASPARC
1599 || sparc_cpu == PROCESSOR_ULTRASPARC3
1600 || sparc_cpu == PROCESSOR_NIAGARA
1601 || sparc_cpu == PROCESSOR_NIAGARA2
1602 || sparc_cpu == PROCESSOR_NIAGARA3
1603 || sparc_cpu == PROCESSOR_NIAGARA4)
1604 ? 64 : 32),
1605 global_options.x_param_values,
1606 global_options_set.x_param_values);
1607
1608 /* Disable save slot sharing for call-clobbered registers by default.
1609 The IRA sharing algorithm works on single registers only and this
1610 pessimizes for double floating-point registers. */
1611 if (!global_options_set.x_flag_ira_share_save_slots)
1612 flag_ira_share_save_slots = 0;
1613
1614 /* We register a machine specific pass to work around errata, if any.
1615 The pass mut be scheduled as late as possible so that we have the
1616 (essentially) final form of the insn stream to work on.
1617 Registering the pass must be done at start up. It's convenient to
1618 do it here. */
1619 opt_pass *errata_pass = make_pass_work_around_errata (g);
1620 struct register_pass_info insert_pass_work_around_errata =
1621 {
1622 errata_pass, /* pass */
1623 "dbr", /* reference_pass_name */
1624 1, /* ref_pass_instance_number */
1625 PASS_POS_INSERT_AFTER /* po_op */
1626 };
1627 register_pass (&insert_pass_work_around_errata);
1628 }
1629 \f
1630 /* Miscellaneous utilities. */
1631
1632 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1633 or branch on register contents instructions. */
1634
1635 int
1636 v9_regcmp_p (enum rtx_code code)
1637 {
1638 return (code == EQ || code == NE || code == GE || code == LT
1639 || code == LE || code == GT);
1640 }
1641
1642 /* Nonzero if OP is a floating point constant which can
1643 be loaded into an integer register using a single
1644 sethi instruction. */
1645
1646 int
1647 fp_sethi_p (rtx op)
1648 {
1649 if (GET_CODE (op) == CONST_DOUBLE)
1650 {
1651 REAL_VALUE_TYPE r;
1652 long i;
1653
1654 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1655 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1656 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1657 }
1658
1659 return 0;
1660 }
1661
1662 /* Nonzero if OP is a floating point constant which can
1663 be loaded into an integer register using a single
1664 mov instruction. */
1665
1666 int
1667 fp_mov_p (rtx op)
1668 {
1669 if (GET_CODE (op) == CONST_DOUBLE)
1670 {
1671 REAL_VALUE_TYPE r;
1672 long i;
1673
1674 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1675 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1676 return SPARC_SIMM13_P (i);
1677 }
1678
1679 return 0;
1680 }
1681
1682 /* Nonzero if OP is a floating point constant which can
1683 be loaded into an integer register using a high/losum
1684 instruction sequence. */
1685
1686 int
1687 fp_high_losum_p (rtx op)
1688 {
1689 /* The constraints calling this should only be in
1690 SFmode move insns, so any constant which cannot
1691 be moved using a single insn will do. */
1692 if (GET_CODE (op) == CONST_DOUBLE)
1693 {
1694 REAL_VALUE_TYPE r;
1695 long i;
1696
1697 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1698 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1699 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1700 }
1701
1702 return 0;
1703 }
1704
1705 /* Return true if the address of LABEL can be loaded by means of the
1706 mov{si,di}_pic_label_ref patterns in PIC mode. */
1707
1708 static bool
1709 can_use_mov_pic_label_ref (rtx label)
1710 {
1711 /* VxWorks does not impose a fixed gap between segments; the run-time
1712 gap can be different from the object-file gap. We therefore can't
1713 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1714 are absolutely sure that X is in the same segment as the GOT.
1715 Unfortunately, the flexibility of linker scripts means that we
1716 can't be sure of that in general, so assume that GOT-relative
1717 accesses are never valid on VxWorks. */
1718 if (TARGET_VXWORKS_RTP)
1719 return false;
1720
1721 /* Similarly, if the label is non-local, it might end up being placed
1722 in a different section than the current one; now mov_pic_label_ref
1723 requires the label and the code to be in the same section. */
1724 if (LABEL_REF_NONLOCAL_P (label))
1725 return false;
1726
1727 /* Finally, if we are reordering basic blocks and partition into hot
1728 and cold sections, this might happen for any label. */
1729 if (flag_reorder_blocks_and_partition)
1730 return false;
1731
1732 return true;
1733 }
1734
1735 /* Expand a move instruction. Return true if all work is done. */
1736
1737 bool
1738 sparc_expand_move (enum machine_mode mode, rtx *operands)
1739 {
1740 /* Handle sets of MEM first. */
1741 if (GET_CODE (operands[0]) == MEM)
1742 {
1743 /* 0 is a register (or a pair of registers) on SPARC. */
1744 if (register_or_zero_operand (operands[1], mode))
1745 return false;
1746
1747 if (!reload_in_progress)
1748 {
1749 operands[0] = validize_mem (operands[0]);
1750 operands[1] = force_reg (mode, operands[1]);
1751 }
1752 }
1753
1754 /* Fixup TLS cases. */
1755 if (TARGET_HAVE_TLS
1756 && CONSTANT_P (operands[1])
1757 && sparc_tls_referenced_p (operands [1]))
1758 {
1759 operands[1] = sparc_legitimize_tls_address (operands[1]);
1760 return false;
1761 }
1762
1763 /* Fixup PIC cases. */
1764 if (flag_pic && CONSTANT_P (operands[1]))
1765 {
1766 if (pic_address_needs_scratch (operands[1]))
1767 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1768
1769 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1770 if (GET_CODE (operands[1]) == LABEL_REF
1771 && can_use_mov_pic_label_ref (operands[1]))
1772 {
1773 if (mode == SImode)
1774 {
1775 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1776 return true;
1777 }
1778
1779 if (mode == DImode)
1780 {
1781 gcc_assert (TARGET_ARCH64);
1782 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1783 return true;
1784 }
1785 }
1786
1787 if (symbolic_operand (operands[1], mode))
1788 {
1789 operands[1]
1790 = sparc_legitimize_pic_address (operands[1],
1791 reload_in_progress
1792 ? operands[0] : NULL_RTX);
1793 return false;
1794 }
1795 }
1796
1797 /* If we are trying to toss an integer constant into FP registers,
1798 or loading a FP or vector constant, force it into memory. */
1799 if (CONSTANT_P (operands[1])
1800 && REG_P (operands[0])
1801 && (SPARC_FP_REG_P (REGNO (operands[0]))
1802 || SCALAR_FLOAT_MODE_P (mode)
1803 || VECTOR_MODE_P (mode)))
1804 {
1805 /* emit_group_store will send such bogosity to us when it is
1806 not storing directly into memory. So fix this up to avoid
1807 crashes in output_constant_pool. */
1808 if (operands [1] == const0_rtx)
1809 operands[1] = CONST0_RTX (mode);
1810
1811 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1812 always other regs. */
1813 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1814 && (const_zero_operand (operands[1], mode)
1815 || const_all_ones_operand (operands[1], mode)))
1816 return false;
1817
1818 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1819 /* We are able to build any SF constant in integer registers
1820 with at most 2 instructions. */
1821 && (mode == SFmode
1822 /* And any DF constant in integer registers. */
1823 || (mode == DFmode
1824 && ! can_create_pseudo_p ())))
1825 return false;
1826
1827 operands[1] = force_const_mem (mode, operands[1]);
1828 if (!reload_in_progress)
1829 operands[1] = validize_mem (operands[1]);
1830 return false;
1831 }
1832
1833 /* Accept non-constants and valid constants unmodified. */
1834 if (!CONSTANT_P (operands[1])
1835 || GET_CODE (operands[1]) == HIGH
1836 || input_operand (operands[1], mode))
1837 return false;
1838
1839 switch (mode)
1840 {
1841 case QImode:
1842 /* All QImode constants require only one insn, so proceed. */
1843 break;
1844
1845 case HImode:
1846 case SImode:
1847 sparc_emit_set_const32 (operands[0], operands[1]);
1848 return true;
1849
1850 case DImode:
1851 /* input_operand should have filtered out 32-bit mode. */
1852 sparc_emit_set_const64 (operands[0], operands[1]);
1853 return true;
1854
1855 case TImode:
1856 {
1857 rtx high, low;
1858 /* TImode isn't available in 32-bit mode. */
1859 split_double (operands[1], &high, &low);
1860 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1861 high));
1862 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1863 low));
1864 }
1865 return true;
1866
1867 default:
1868 gcc_unreachable ();
1869 }
1870
1871 return false;
1872 }
1873
1874 /* Load OP1, a 32-bit constant, into OP0, a register.
1875 We know it can't be done in one insn when we get
1876 here, the move expander guarantees this. */
1877
1878 static void
1879 sparc_emit_set_const32 (rtx op0, rtx op1)
1880 {
1881 enum machine_mode mode = GET_MODE (op0);
1882 rtx temp = op0;
1883
1884 if (can_create_pseudo_p ())
1885 temp = gen_reg_rtx (mode);
1886
1887 if (GET_CODE (op1) == CONST_INT)
1888 {
1889 gcc_assert (!small_int_operand (op1, mode)
1890 && !const_high_operand (op1, mode));
1891
1892 /* Emit them as real moves instead of a HIGH/LO_SUM,
1893 this way CSE can see everything and reuse intermediate
1894 values if it wants. */
1895 emit_insn (gen_rtx_SET (VOIDmode, temp,
1896 GEN_INT (INTVAL (op1)
1897 & ~(HOST_WIDE_INT)0x3ff)));
1898
1899 emit_insn (gen_rtx_SET (VOIDmode,
1900 op0,
1901 gen_rtx_IOR (mode, temp,
1902 GEN_INT (INTVAL (op1) & 0x3ff))));
1903 }
1904 else
1905 {
1906 /* A symbol, emit in the traditional way. */
1907 emit_insn (gen_rtx_SET (VOIDmode, temp,
1908 gen_rtx_HIGH (mode, op1)));
1909 emit_insn (gen_rtx_SET (VOIDmode,
1910 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1911 }
1912 }
1913
1914 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1915 If TEMP is nonzero, we are forbidden to use any other scratch
1916 registers. Otherwise, we are allowed to generate them as needed.
1917
1918 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1919 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1920
1921 void
1922 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1923 {
1924 rtx temp1, temp2, temp3, temp4, temp5;
1925 rtx ti_temp = 0;
1926
1927 if (temp && GET_MODE (temp) == TImode)
1928 {
1929 ti_temp = temp;
1930 temp = gen_rtx_REG (DImode, REGNO (temp));
1931 }
1932
1933 /* SPARC-V9 code-model support. */
1934 switch (sparc_cmodel)
1935 {
1936 case CM_MEDLOW:
1937 /* The range spanned by all instructions in the object is less
1938 than 2^31 bytes (2GB) and the distance from any instruction
1939 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1940 than 2^31 bytes (2GB).
1941
1942 The executable must be in the low 4TB of the virtual address
1943 space.
1944
1945 sethi %hi(symbol), %temp1
1946 or %temp1, %lo(symbol), %reg */
1947 if (temp)
1948 temp1 = temp; /* op0 is allowed. */
1949 else
1950 temp1 = gen_reg_rtx (DImode);
1951
1952 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1953 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1954 break;
1955
1956 case CM_MEDMID:
1957 /* The range spanned by all instructions in the object is less
1958 than 2^31 bytes (2GB) and the distance from any instruction
1959 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1960 than 2^31 bytes (2GB).
1961
1962 The executable must be in the low 16TB of the virtual address
1963 space.
1964
1965 sethi %h44(symbol), %temp1
1966 or %temp1, %m44(symbol), %temp2
1967 sllx %temp2, 12, %temp3
1968 or %temp3, %l44(symbol), %reg */
1969 if (temp)
1970 {
1971 temp1 = op0;
1972 temp2 = op0;
1973 temp3 = temp; /* op0 is allowed. */
1974 }
1975 else
1976 {
1977 temp1 = gen_reg_rtx (DImode);
1978 temp2 = gen_reg_rtx (DImode);
1979 temp3 = gen_reg_rtx (DImode);
1980 }
1981
1982 emit_insn (gen_seth44 (temp1, op1));
1983 emit_insn (gen_setm44 (temp2, temp1, op1));
1984 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1985 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1986 emit_insn (gen_setl44 (op0, temp3, op1));
1987 break;
1988
1989 case CM_MEDANY:
1990 /* The range spanned by all instructions in the object is less
1991 than 2^31 bytes (2GB) and the distance from any instruction
1992 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1993 than 2^31 bytes (2GB).
1994
1995 The executable can be placed anywhere in the virtual address
1996 space.
1997
1998 sethi %hh(symbol), %temp1
1999 sethi %lm(symbol), %temp2
2000 or %temp1, %hm(symbol), %temp3
2001 sllx %temp3, 32, %temp4
2002 or %temp4, %temp2, %temp5
2003 or %temp5, %lo(symbol), %reg */
2004 if (temp)
2005 {
2006 /* It is possible that one of the registers we got for operands[2]
2007 might coincide with that of operands[0] (which is why we made
2008 it TImode). Pick the other one to use as our scratch. */
2009 if (rtx_equal_p (temp, op0))
2010 {
2011 gcc_assert (ti_temp);
2012 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2013 }
2014 temp1 = op0;
2015 temp2 = temp; /* op0 is _not_ allowed, see above. */
2016 temp3 = op0;
2017 temp4 = op0;
2018 temp5 = op0;
2019 }
2020 else
2021 {
2022 temp1 = gen_reg_rtx (DImode);
2023 temp2 = gen_reg_rtx (DImode);
2024 temp3 = gen_reg_rtx (DImode);
2025 temp4 = gen_reg_rtx (DImode);
2026 temp5 = gen_reg_rtx (DImode);
2027 }
2028
2029 emit_insn (gen_sethh (temp1, op1));
2030 emit_insn (gen_setlm (temp2, op1));
2031 emit_insn (gen_sethm (temp3, temp1, op1));
2032 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2033 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2034 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2035 gen_rtx_PLUS (DImode, temp4, temp2)));
2036 emit_insn (gen_setlo (op0, temp5, op1));
2037 break;
2038
2039 case CM_EMBMEDANY:
2040 /* Old old old backwards compatibility kruft here.
2041 Essentially it is MEDLOW with a fixed 64-bit
2042 virtual base added to all data segment addresses.
2043 Text-segment stuff is computed like MEDANY, we can't
2044 reuse the code above because the relocation knobs
2045 look different.
2046
2047 Data segment: sethi %hi(symbol), %temp1
2048 add %temp1, EMBMEDANY_BASE_REG, %temp2
2049 or %temp2, %lo(symbol), %reg */
2050 if (data_segment_operand (op1, GET_MODE (op1)))
2051 {
2052 if (temp)
2053 {
2054 temp1 = temp; /* op0 is allowed. */
2055 temp2 = op0;
2056 }
2057 else
2058 {
2059 temp1 = gen_reg_rtx (DImode);
2060 temp2 = gen_reg_rtx (DImode);
2061 }
2062
2063 emit_insn (gen_embmedany_sethi (temp1, op1));
2064 emit_insn (gen_embmedany_brsum (temp2, temp1));
2065 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2066 }
2067
2068 /* Text segment: sethi %uhi(symbol), %temp1
2069 sethi %hi(symbol), %temp2
2070 or %temp1, %ulo(symbol), %temp3
2071 sllx %temp3, 32, %temp4
2072 or %temp4, %temp2, %temp5
2073 or %temp5, %lo(symbol), %reg */
2074 else
2075 {
2076 if (temp)
2077 {
2078 /* It is possible that one of the registers we got for operands[2]
2079 might coincide with that of operands[0] (which is why we made
2080 it TImode). Pick the other one to use as our scratch. */
2081 if (rtx_equal_p (temp, op0))
2082 {
2083 gcc_assert (ti_temp);
2084 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2085 }
2086 temp1 = op0;
2087 temp2 = temp; /* op0 is _not_ allowed, see above. */
2088 temp3 = op0;
2089 temp4 = op0;
2090 temp5 = op0;
2091 }
2092 else
2093 {
2094 temp1 = gen_reg_rtx (DImode);
2095 temp2 = gen_reg_rtx (DImode);
2096 temp3 = gen_reg_rtx (DImode);
2097 temp4 = gen_reg_rtx (DImode);
2098 temp5 = gen_reg_rtx (DImode);
2099 }
2100
2101 emit_insn (gen_embmedany_textuhi (temp1, op1));
2102 emit_insn (gen_embmedany_texthi (temp2, op1));
2103 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2104 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2105 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2106 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2107 gen_rtx_PLUS (DImode, temp4, temp2)));
2108 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2109 }
2110 break;
2111
2112 default:
2113 gcc_unreachable ();
2114 }
2115 }
2116
2117 #if HOST_BITS_PER_WIDE_INT == 32
2118 static void
2119 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
2120 {
2121 gcc_unreachable ();
2122 }
2123 #else
2124 /* These avoid problems when cross compiling. If we do not
2125 go through all this hair then the optimizer will see
2126 invalid REG_EQUAL notes or in some cases none at all. */
2127 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2128 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2129 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2130 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2131
2132 /* The optimizer is not to assume anything about exactly
2133 which bits are set for a HIGH, they are unspecified.
2134 Unfortunately this leads to many missed optimizations
2135 during CSE. We mask out the non-HIGH bits, and matches
2136 a plain movdi, to alleviate this problem. */
2137 static rtx
2138 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2139 {
2140 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2141 }
2142
2143 static rtx
2144 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2145 {
2146 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
2147 }
2148
2149 static rtx
2150 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2151 {
2152 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2153 }
2154
2155 static rtx
2156 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2157 {
2158 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2159 }
2160
2161 /* Worker routines for 64-bit constant formation on arch64.
2162 One of the key things to be doing in these emissions is
2163 to create as many temp REGs as possible. This makes it
2164 possible for half-built constants to be used later when
2165 such values are similar to something required later on.
2166 Without doing this, the optimizer cannot see such
2167 opportunities. */
2168
2169 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2170 unsigned HOST_WIDE_INT, int);
2171
2172 static void
2173 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2174 unsigned HOST_WIDE_INT low_bits, int is_neg)
2175 {
2176 unsigned HOST_WIDE_INT high_bits;
2177
2178 if (is_neg)
2179 high_bits = (~low_bits) & 0xffffffff;
2180 else
2181 high_bits = low_bits;
2182
2183 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2184 if (!is_neg)
2185 {
2186 emit_insn (gen_rtx_SET (VOIDmode, op0,
2187 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2188 }
2189 else
2190 {
2191 /* If we are XOR'ing with -1, then we should emit a one's complement
2192 instead. This way the combiner will notice logical operations
2193 such as ANDN later on and substitute. */
2194 if ((low_bits & 0x3ff) == 0x3ff)
2195 {
2196 emit_insn (gen_rtx_SET (VOIDmode, op0,
2197 gen_rtx_NOT (DImode, temp)));
2198 }
2199 else
2200 {
2201 emit_insn (gen_rtx_SET (VOIDmode, op0,
2202 gen_safe_XOR64 (temp,
2203 (-(HOST_WIDE_INT)0x400
2204 | (low_bits & 0x3ff)))));
2205 }
2206 }
2207 }
2208
2209 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2210 unsigned HOST_WIDE_INT, int);
2211
2212 static void
2213 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2214 unsigned HOST_WIDE_INT high_bits,
2215 unsigned HOST_WIDE_INT low_immediate,
2216 int shift_count)
2217 {
2218 rtx temp2 = op0;
2219
2220 if ((high_bits & 0xfffffc00) != 0)
2221 {
2222 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2223 if ((high_bits & ~0xfffffc00) != 0)
2224 emit_insn (gen_rtx_SET (VOIDmode, op0,
2225 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2226 else
2227 temp2 = temp;
2228 }
2229 else
2230 {
2231 emit_insn (gen_safe_SET64 (temp, high_bits));
2232 temp2 = temp;
2233 }
2234
2235 /* Now shift it up into place. */
2236 emit_insn (gen_rtx_SET (VOIDmode, op0,
2237 gen_rtx_ASHIFT (DImode, temp2,
2238 GEN_INT (shift_count))));
2239
2240 /* If there is a low immediate part piece, finish up by
2241 putting that in as well. */
2242 if (low_immediate != 0)
2243 emit_insn (gen_rtx_SET (VOIDmode, op0,
2244 gen_safe_OR64 (op0, low_immediate)));
2245 }
2246
2247 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2248 unsigned HOST_WIDE_INT);
2249
2250 /* Full 64-bit constant decomposition. Even though this is the
2251 'worst' case, we still optimize a few things away. */
2252 static void
2253 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2254 unsigned HOST_WIDE_INT high_bits,
2255 unsigned HOST_WIDE_INT low_bits)
2256 {
2257 rtx sub_temp = op0;
2258
2259 if (can_create_pseudo_p ())
2260 sub_temp = gen_reg_rtx (DImode);
2261
2262 if ((high_bits & 0xfffffc00) != 0)
2263 {
2264 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2265 if ((high_bits & ~0xfffffc00) != 0)
2266 emit_insn (gen_rtx_SET (VOIDmode,
2267 sub_temp,
2268 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2269 else
2270 sub_temp = temp;
2271 }
2272 else
2273 {
2274 emit_insn (gen_safe_SET64 (temp, high_bits));
2275 sub_temp = temp;
2276 }
2277
2278 if (can_create_pseudo_p ())
2279 {
2280 rtx temp2 = gen_reg_rtx (DImode);
2281 rtx temp3 = gen_reg_rtx (DImode);
2282 rtx temp4 = gen_reg_rtx (DImode);
2283
2284 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2285 gen_rtx_ASHIFT (DImode, sub_temp,
2286 GEN_INT (32))));
2287
2288 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2289 if ((low_bits & ~0xfffffc00) != 0)
2290 {
2291 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2292 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2293 emit_insn (gen_rtx_SET (VOIDmode, op0,
2294 gen_rtx_PLUS (DImode, temp4, temp3)));
2295 }
2296 else
2297 {
2298 emit_insn (gen_rtx_SET (VOIDmode, op0,
2299 gen_rtx_PLUS (DImode, temp4, temp2)));
2300 }
2301 }
2302 else
2303 {
2304 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2305 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2306 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2307 int to_shift = 12;
2308
2309 /* We are in the middle of reload, so this is really
2310 painful. However we do still make an attempt to
2311 avoid emitting truly stupid code. */
2312 if (low1 != const0_rtx)
2313 {
2314 emit_insn (gen_rtx_SET (VOIDmode, op0,
2315 gen_rtx_ASHIFT (DImode, sub_temp,
2316 GEN_INT (to_shift))));
2317 emit_insn (gen_rtx_SET (VOIDmode, op0,
2318 gen_rtx_IOR (DImode, op0, low1)));
2319 sub_temp = op0;
2320 to_shift = 12;
2321 }
2322 else
2323 {
2324 to_shift += 12;
2325 }
2326 if (low2 != const0_rtx)
2327 {
2328 emit_insn (gen_rtx_SET (VOIDmode, op0,
2329 gen_rtx_ASHIFT (DImode, sub_temp,
2330 GEN_INT (to_shift))));
2331 emit_insn (gen_rtx_SET (VOIDmode, op0,
2332 gen_rtx_IOR (DImode, op0, low2)));
2333 sub_temp = op0;
2334 to_shift = 8;
2335 }
2336 else
2337 {
2338 to_shift += 8;
2339 }
2340 emit_insn (gen_rtx_SET (VOIDmode, op0,
2341 gen_rtx_ASHIFT (DImode, sub_temp,
2342 GEN_INT (to_shift))));
2343 if (low3 != const0_rtx)
2344 emit_insn (gen_rtx_SET (VOIDmode, op0,
2345 gen_rtx_IOR (DImode, op0, low3)));
2346 /* phew... */
2347 }
2348 }
2349
2350 /* Analyze a 64-bit constant for certain properties. */
2351 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2352 unsigned HOST_WIDE_INT,
2353 int *, int *, int *);
2354
2355 static void
2356 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2357 unsigned HOST_WIDE_INT low_bits,
2358 int *hbsp, int *lbsp, int *abbasp)
2359 {
2360 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2361 int i;
2362
2363 lowest_bit_set = highest_bit_set = -1;
2364 i = 0;
2365 do
2366 {
2367 if ((lowest_bit_set == -1)
2368 && ((low_bits >> i) & 1))
2369 lowest_bit_set = i;
2370 if ((highest_bit_set == -1)
2371 && ((high_bits >> (32 - i - 1)) & 1))
2372 highest_bit_set = (64 - i - 1);
2373 }
2374 while (++i < 32
2375 && ((highest_bit_set == -1)
2376 || (lowest_bit_set == -1)));
2377 if (i == 32)
2378 {
2379 i = 0;
2380 do
2381 {
2382 if ((lowest_bit_set == -1)
2383 && ((high_bits >> i) & 1))
2384 lowest_bit_set = i + 32;
2385 if ((highest_bit_set == -1)
2386 && ((low_bits >> (32 - i - 1)) & 1))
2387 highest_bit_set = 32 - i - 1;
2388 }
2389 while (++i < 32
2390 && ((highest_bit_set == -1)
2391 || (lowest_bit_set == -1)));
2392 }
2393 /* If there are no bits set this should have gone out
2394 as one instruction! */
2395 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2396 all_bits_between_are_set = 1;
2397 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2398 {
2399 if (i < 32)
2400 {
2401 if ((low_bits & (1 << i)) != 0)
2402 continue;
2403 }
2404 else
2405 {
2406 if ((high_bits & (1 << (i - 32))) != 0)
2407 continue;
2408 }
2409 all_bits_between_are_set = 0;
2410 break;
2411 }
2412 *hbsp = highest_bit_set;
2413 *lbsp = lowest_bit_set;
2414 *abbasp = all_bits_between_are_set;
2415 }
2416
2417 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2418
2419 static int
2420 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2421 unsigned HOST_WIDE_INT low_bits)
2422 {
2423 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2424
2425 if (high_bits == 0
2426 || high_bits == 0xffffffff)
2427 return 1;
2428
2429 analyze_64bit_constant (high_bits, low_bits,
2430 &highest_bit_set, &lowest_bit_set,
2431 &all_bits_between_are_set);
2432
2433 if ((highest_bit_set == 63
2434 || lowest_bit_set == 0)
2435 && all_bits_between_are_set != 0)
2436 return 1;
2437
2438 if ((highest_bit_set - lowest_bit_set) < 21)
2439 return 1;
2440
2441 return 0;
2442 }
2443
2444 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2445 unsigned HOST_WIDE_INT,
2446 int, int);
2447
2448 static unsigned HOST_WIDE_INT
2449 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2450 unsigned HOST_WIDE_INT low_bits,
2451 int lowest_bit_set, int shift)
2452 {
2453 HOST_WIDE_INT hi, lo;
2454
2455 if (lowest_bit_set < 32)
2456 {
2457 lo = (low_bits >> lowest_bit_set) << shift;
2458 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2459 }
2460 else
2461 {
2462 lo = 0;
2463 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2464 }
2465 gcc_assert (! (hi & lo));
2466 return (hi | lo);
2467 }
2468
2469 /* Here we are sure to be arch64 and this is an integer constant
2470 being loaded into a register. Emit the most efficient
2471 insn sequence possible. Detection of all the 1-insn cases
2472 has been done already. */
2473 static void
2474 sparc_emit_set_const64 (rtx op0, rtx op1)
2475 {
2476 unsigned HOST_WIDE_INT high_bits, low_bits;
2477 int lowest_bit_set, highest_bit_set;
2478 int all_bits_between_are_set;
2479 rtx temp = 0;
2480
2481 /* Sanity check that we know what we are working with. */
2482 gcc_assert (TARGET_ARCH64
2483 && (GET_CODE (op0) == SUBREG
2484 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2485
2486 if (! can_create_pseudo_p ())
2487 temp = op0;
2488
2489 if (GET_CODE (op1) != CONST_INT)
2490 {
2491 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2492 return;
2493 }
2494
2495 if (! temp)
2496 temp = gen_reg_rtx (DImode);
2497
2498 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2499 low_bits = (INTVAL (op1) & 0xffffffff);
2500
2501 /* low_bits bits 0 --> 31
2502 high_bits bits 32 --> 63 */
2503
2504 analyze_64bit_constant (high_bits, low_bits,
2505 &highest_bit_set, &lowest_bit_set,
2506 &all_bits_between_are_set);
2507
2508 /* First try for a 2-insn sequence. */
2509
2510 /* These situations are preferred because the optimizer can
2511 * do more things with them:
2512 * 1) mov -1, %reg
2513 * sllx %reg, shift, %reg
2514 * 2) mov -1, %reg
2515 * srlx %reg, shift, %reg
2516 * 3) mov some_small_const, %reg
2517 * sllx %reg, shift, %reg
2518 */
2519 if (((highest_bit_set == 63
2520 || lowest_bit_set == 0)
2521 && all_bits_between_are_set != 0)
2522 || ((highest_bit_set - lowest_bit_set) < 12))
2523 {
2524 HOST_WIDE_INT the_const = -1;
2525 int shift = lowest_bit_set;
2526
2527 if ((highest_bit_set != 63
2528 && lowest_bit_set != 0)
2529 || all_bits_between_are_set == 0)
2530 {
2531 the_const =
2532 create_simple_focus_bits (high_bits, low_bits,
2533 lowest_bit_set, 0);
2534 }
2535 else if (lowest_bit_set == 0)
2536 shift = -(63 - highest_bit_set);
2537
2538 gcc_assert (SPARC_SIMM13_P (the_const));
2539 gcc_assert (shift != 0);
2540
2541 emit_insn (gen_safe_SET64 (temp, the_const));
2542 if (shift > 0)
2543 emit_insn (gen_rtx_SET (VOIDmode,
2544 op0,
2545 gen_rtx_ASHIFT (DImode,
2546 temp,
2547 GEN_INT (shift))));
2548 else if (shift < 0)
2549 emit_insn (gen_rtx_SET (VOIDmode,
2550 op0,
2551 gen_rtx_LSHIFTRT (DImode,
2552 temp,
2553 GEN_INT (-shift))));
2554 return;
2555 }
2556
2557 /* Now a range of 22 or less bits set somewhere.
2558 * 1) sethi %hi(focus_bits), %reg
2559 * sllx %reg, shift, %reg
2560 * 2) sethi %hi(focus_bits), %reg
2561 * srlx %reg, shift, %reg
2562 */
2563 if ((highest_bit_set - lowest_bit_set) < 21)
2564 {
2565 unsigned HOST_WIDE_INT focus_bits =
2566 create_simple_focus_bits (high_bits, low_bits,
2567 lowest_bit_set, 10);
2568
2569 gcc_assert (SPARC_SETHI_P (focus_bits));
2570 gcc_assert (lowest_bit_set != 10);
2571
2572 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2573
2574 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2575 if (lowest_bit_set < 10)
2576 emit_insn (gen_rtx_SET (VOIDmode,
2577 op0,
2578 gen_rtx_LSHIFTRT (DImode, temp,
2579 GEN_INT (10 - lowest_bit_set))));
2580 else if (lowest_bit_set > 10)
2581 emit_insn (gen_rtx_SET (VOIDmode,
2582 op0,
2583 gen_rtx_ASHIFT (DImode, temp,
2584 GEN_INT (lowest_bit_set - 10))));
2585 return;
2586 }
2587
2588 /* 1) sethi %hi(low_bits), %reg
2589 * or %reg, %lo(low_bits), %reg
2590 * 2) sethi %hi(~low_bits), %reg
2591 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2592 */
2593 if (high_bits == 0
2594 || high_bits == 0xffffffff)
2595 {
2596 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2597 (high_bits == 0xffffffff));
2598 return;
2599 }
2600
2601 /* Now, try 3-insn sequences. */
2602
2603 /* 1) sethi %hi(high_bits), %reg
2604 * or %reg, %lo(high_bits), %reg
2605 * sllx %reg, 32, %reg
2606 */
2607 if (low_bits == 0)
2608 {
2609 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2610 return;
2611 }
2612
2613 /* We may be able to do something quick
2614 when the constant is negated, so try that. */
2615 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2616 (~low_bits) & 0xfffffc00))
2617 {
2618 /* NOTE: The trailing bits get XOR'd so we need the
2619 non-negated bits, not the negated ones. */
2620 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2621
2622 if ((((~high_bits) & 0xffffffff) == 0
2623 && ((~low_bits) & 0x80000000) == 0)
2624 || (((~high_bits) & 0xffffffff) == 0xffffffff
2625 && ((~low_bits) & 0x80000000) != 0))
2626 {
2627 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2628
2629 if ((SPARC_SETHI_P (fast_int)
2630 && (~high_bits & 0xffffffff) == 0)
2631 || SPARC_SIMM13_P (fast_int))
2632 emit_insn (gen_safe_SET64 (temp, fast_int));
2633 else
2634 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2635 }
2636 else
2637 {
2638 rtx negated_const;
2639 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2640 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2641 sparc_emit_set_const64 (temp, negated_const);
2642 }
2643
2644 /* If we are XOR'ing with -1, then we should emit a one's complement
2645 instead. This way the combiner will notice logical operations
2646 such as ANDN later on and substitute. */
2647 if (trailing_bits == 0x3ff)
2648 {
2649 emit_insn (gen_rtx_SET (VOIDmode, op0,
2650 gen_rtx_NOT (DImode, temp)));
2651 }
2652 else
2653 {
2654 emit_insn (gen_rtx_SET (VOIDmode,
2655 op0,
2656 gen_safe_XOR64 (temp,
2657 (-0x400 | trailing_bits))));
2658 }
2659 return;
2660 }
2661
2662 /* 1) sethi %hi(xxx), %reg
2663 * or %reg, %lo(xxx), %reg
2664 * sllx %reg, yyy, %reg
2665 *
2666 * ??? This is just a generalized version of the low_bits==0
2667 * thing above, FIXME...
2668 */
2669 if ((highest_bit_set - lowest_bit_set) < 32)
2670 {
2671 unsigned HOST_WIDE_INT focus_bits =
2672 create_simple_focus_bits (high_bits, low_bits,
2673 lowest_bit_set, 0);
2674
2675 /* We can't get here in this state. */
2676 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2677
2678 /* So what we know is that the set bits straddle the
2679 middle of the 64-bit word. */
2680 sparc_emit_set_const64_quick2 (op0, temp,
2681 focus_bits, 0,
2682 lowest_bit_set);
2683 return;
2684 }
2685
2686 /* 1) sethi %hi(high_bits), %reg
2687 * or %reg, %lo(high_bits), %reg
2688 * sllx %reg, 32, %reg
2689 * or %reg, low_bits, %reg
2690 */
2691 if (SPARC_SIMM13_P(low_bits)
2692 && ((int)low_bits > 0))
2693 {
2694 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2695 return;
2696 }
2697
2698 /* The easiest way when all else fails, is full decomposition. */
2699 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2700 }
2701 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2702
2703 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2704 return the mode to be used for the comparison. For floating-point,
2705 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2706 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2707 processing is needed. */
2708
2709 enum machine_mode
2710 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2711 {
2712 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2713 {
2714 switch (op)
2715 {
2716 case EQ:
2717 case NE:
2718 case UNORDERED:
2719 case ORDERED:
2720 case UNLT:
2721 case UNLE:
2722 case UNGT:
2723 case UNGE:
2724 case UNEQ:
2725 case LTGT:
2726 return CCFPmode;
2727
2728 case LT:
2729 case LE:
2730 case GT:
2731 case GE:
2732 return CCFPEmode;
2733
2734 default:
2735 gcc_unreachable ();
2736 }
2737 }
2738 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2739 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2740 {
2741 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2742 return CCX_NOOVmode;
2743 else
2744 return CC_NOOVmode;
2745 }
2746 else
2747 {
2748 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2749 return CCXmode;
2750 else
2751 return CCmode;
2752 }
2753 }
2754
2755 /* Emit the compare insn and return the CC reg for a CODE comparison
2756 with operands X and Y. */
2757
2758 static rtx
2759 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2760 {
2761 enum machine_mode mode;
2762 rtx cc_reg;
2763
2764 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2765 return x;
2766
2767 mode = SELECT_CC_MODE (code, x, y);
2768
2769 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2770 fcc regs (cse can't tell they're really call clobbered regs and will
2771 remove a duplicate comparison even if there is an intervening function
2772 call - it will then try to reload the cc reg via an int reg which is why
2773 we need the movcc patterns). It is possible to provide the movcc
2774 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2775 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2776 to tell cse that CCFPE mode registers (even pseudos) are call
2777 clobbered. */
2778
2779 /* ??? This is an experiment. Rather than making changes to cse which may
2780 or may not be easy/clean, we do our own cse. This is possible because
2781 we will generate hard registers. Cse knows they're call clobbered (it
2782 doesn't know the same thing about pseudos). If we guess wrong, no big
2783 deal, but if we win, great! */
2784
2785 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2786 #if 1 /* experiment */
2787 {
2788 int reg;
2789 /* We cycle through the registers to ensure they're all exercised. */
2790 static int next_fcc_reg = 0;
2791 /* Previous x,y for each fcc reg. */
2792 static rtx prev_args[4][2];
2793
2794 /* Scan prev_args for x,y. */
2795 for (reg = 0; reg < 4; reg++)
2796 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2797 break;
2798 if (reg == 4)
2799 {
2800 reg = next_fcc_reg;
2801 prev_args[reg][0] = x;
2802 prev_args[reg][1] = y;
2803 next_fcc_reg = (next_fcc_reg + 1) & 3;
2804 }
2805 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2806 }
2807 #else
2808 cc_reg = gen_reg_rtx (mode);
2809 #endif /* ! experiment */
2810 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2811 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2812 else
2813 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2814
2815 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2816 will only result in an unrecognizable insn so no point in asserting. */
2817 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2818
2819 return cc_reg;
2820 }
2821
2822
2823 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2824
2825 rtx
2826 gen_compare_reg (rtx cmp)
2827 {
2828 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2829 }
2830
2831 /* This function is used for v9 only.
2832 DEST is the target of the Scc insn.
2833 CODE is the code for an Scc's comparison.
2834 X and Y are the values we compare.
2835
2836 This function is needed to turn
2837
2838 (set (reg:SI 110)
2839 (gt (reg:CCX 100 %icc)
2840 (const_int 0)))
2841 into
2842 (set (reg:SI 110)
2843 (gt:DI (reg:CCX 100 %icc)
2844 (const_int 0)))
2845
2846 IE: The instruction recognizer needs to see the mode of the comparison to
2847 find the right instruction. We could use "gt:DI" right in the
2848 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2849
2850 static int
2851 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2852 {
2853 if (! TARGET_ARCH64
2854 && (GET_MODE (x) == DImode
2855 || GET_MODE (dest) == DImode))
2856 return 0;
2857
2858 /* Try to use the movrCC insns. */
2859 if (TARGET_ARCH64
2860 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2861 && y == const0_rtx
2862 && v9_regcmp_p (compare_code))
2863 {
2864 rtx op0 = x;
2865 rtx temp;
2866
2867 /* Special case for op0 != 0. This can be done with one instruction if
2868 dest == x. */
2869
2870 if (compare_code == NE
2871 && GET_MODE (dest) == DImode
2872 && rtx_equal_p (op0, dest))
2873 {
2874 emit_insn (gen_rtx_SET (VOIDmode, dest,
2875 gen_rtx_IF_THEN_ELSE (DImode,
2876 gen_rtx_fmt_ee (compare_code, DImode,
2877 op0, const0_rtx),
2878 const1_rtx,
2879 dest)));
2880 return 1;
2881 }
2882
2883 if (reg_overlap_mentioned_p (dest, op0))
2884 {
2885 /* Handle the case where dest == x.
2886 We "early clobber" the result. */
2887 op0 = gen_reg_rtx (GET_MODE (x));
2888 emit_move_insn (op0, x);
2889 }
2890
2891 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2892 if (GET_MODE (op0) != DImode)
2893 {
2894 temp = gen_reg_rtx (DImode);
2895 convert_move (temp, op0, 0);
2896 }
2897 else
2898 temp = op0;
2899 emit_insn (gen_rtx_SET (VOIDmode, dest,
2900 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2901 gen_rtx_fmt_ee (compare_code, DImode,
2902 temp, const0_rtx),
2903 const1_rtx,
2904 dest)));
2905 return 1;
2906 }
2907 else
2908 {
2909 x = gen_compare_reg_1 (compare_code, x, y);
2910 y = const0_rtx;
2911
2912 gcc_assert (GET_MODE (x) != CC_NOOVmode
2913 && GET_MODE (x) != CCX_NOOVmode);
2914
2915 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2916 emit_insn (gen_rtx_SET (VOIDmode, dest,
2917 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2918 gen_rtx_fmt_ee (compare_code,
2919 GET_MODE (x), x, y),
2920 const1_rtx, dest)));
2921 return 1;
2922 }
2923 }
2924
2925
2926 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2927 without jumps using the addx/subx instructions. */
2928
2929 bool
2930 emit_scc_insn (rtx operands[])
2931 {
2932 rtx tem;
2933 rtx x;
2934 rtx y;
2935 enum rtx_code code;
2936
2937 /* The quad-word fp compare library routines all return nonzero to indicate
2938 true, which is different from the equivalent libgcc routines, so we must
2939 handle them specially here. */
2940 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2941 {
2942 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2943 GET_CODE (operands[1]));
2944 operands[2] = XEXP (operands[1], 0);
2945 operands[3] = XEXP (operands[1], 1);
2946 }
2947
2948 code = GET_CODE (operands[1]);
2949 x = operands[2];
2950 y = operands[3];
2951
2952 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2953 more applications). The exception to this is "reg != 0" which can
2954 be done in one instruction on v9 (so we do it). */
2955 if (code == EQ)
2956 {
2957 if (GET_MODE (x) == SImode)
2958 {
2959 rtx pat;
2960 if (TARGET_ARCH64)
2961 pat = gen_seqsidi_special (operands[0], x, y);
2962 else
2963 pat = gen_seqsisi_special (operands[0], x, y);
2964 emit_insn (pat);
2965 return true;
2966 }
2967 else if (GET_MODE (x) == DImode)
2968 {
2969 rtx pat = gen_seqdi_special (operands[0], x, y);
2970 emit_insn (pat);
2971 return true;
2972 }
2973 }
2974
2975 if (code == NE)
2976 {
2977 if (GET_MODE (x) == SImode)
2978 {
2979 rtx pat;
2980 if (TARGET_ARCH64)
2981 pat = gen_snesidi_special (operands[0], x, y);
2982 else
2983 pat = gen_snesisi_special (operands[0], x, y);
2984 emit_insn (pat);
2985 return true;
2986 }
2987 else if (GET_MODE (x) == DImode)
2988 {
2989 rtx pat;
2990 if (TARGET_VIS3)
2991 pat = gen_snedi_special_vis3 (operands[0], x, y);
2992 else
2993 pat = gen_snedi_special (operands[0], x, y);
2994 emit_insn (pat);
2995 return true;
2996 }
2997 }
2998
2999 if (TARGET_V9
3000 && TARGET_ARCH64
3001 && GET_MODE (x) == DImode
3002 && !(TARGET_VIS3
3003 && (code == GTU || code == LTU))
3004 && gen_v9_scc (operands[0], code, x, y))
3005 return true;
3006
3007 /* We can do LTU and GEU using the addx/subx instructions too. And
3008 for GTU/LEU, if both operands are registers swap them and fall
3009 back to the easy case. */
3010 if (code == GTU || code == LEU)
3011 {
3012 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3013 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3014 {
3015 tem = x;
3016 x = y;
3017 y = tem;
3018 code = swap_condition (code);
3019 }
3020 }
3021
3022 if (code == LTU
3023 || (!TARGET_VIS3 && code == GEU))
3024 {
3025 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3026 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3027 gen_compare_reg_1 (code, x, y),
3028 const0_rtx)));
3029 return true;
3030 }
3031
3032 /* All the posibilities to use addx/subx based sequences has been
3033 exhausted, try for a 3 instruction sequence using v9 conditional
3034 moves. */
3035 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3036 return true;
3037
3038 /* Nope, do branches. */
3039 return false;
3040 }
3041
3042 /* Emit a conditional jump insn for the v9 architecture using comparison code
3043 CODE and jump target LABEL.
3044 This function exists to take advantage of the v9 brxx insns. */
3045
3046 static void
3047 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3048 {
3049 emit_jump_insn (gen_rtx_SET (VOIDmode,
3050 pc_rtx,
3051 gen_rtx_IF_THEN_ELSE (VOIDmode,
3052 gen_rtx_fmt_ee (code, GET_MODE (op0),
3053 op0, const0_rtx),
3054 gen_rtx_LABEL_REF (VOIDmode, label),
3055 pc_rtx)));
3056 }
3057
3058 /* Emit a conditional jump insn for the UA2011 architecture using
3059 comparison code CODE and jump target LABEL. This function exists
3060 to take advantage of the UA2011 Compare and Branch insns. */
3061
3062 static void
3063 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3064 {
3065 rtx if_then_else;
3066
3067 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3068 gen_rtx_fmt_ee(code, GET_MODE(op0),
3069 op0, op1),
3070 gen_rtx_LABEL_REF (VOIDmode, label),
3071 pc_rtx);
3072
3073 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
3074 }
3075
3076 void
3077 emit_conditional_branch_insn (rtx operands[])
3078 {
3079 /* The quad-word fp compare library routines all return nonzero to indicate
3080 true, which is different from the equivalent libgcc routines, so we must
3081 handle them specially here. */
3082 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3083 {
3084 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3085 GET_CODE (operands[0]));
3086 operands[1] = XEXP (operands[0], 0);
3087 operands[2] = XEXP (operands[0], 1);
3088 }
3089
3090 /* If we can tell early on that the comparison is against a constant
3091 that won't fit in the 5-bit signed immediate field of a cbcond,
3092 use one of the other v9 conditional branch sequences. */
3093 if (TARGET_CBCOND
3094 && GET_CODE (operands[1]) == REG
3095 && (GET_MODE (operands[1]) == SImode
3096 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3097 && (GET_CODE (operands[2]) != CONST_INT
3098 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3099 {
3100 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3101 return;
3102 }
3103
3104 if (TARGET_ARCH64 && operands[2] == const0_rtx
3105 && GET_CODE (operands[1]) == REG
3106 && GET_MODE (operands[1]) == DImode)
3107 {
3108 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3109 return;
3110 }
3111
3112 operands[1] = gen_compare_reg (operands[0]);
3113 operands[2] = const0_rtx;
3114 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3115 operands[1], operands[2]);
3116 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3117 operands[3]));
3118 }
3119
3120
3121 /* Generate a DFmode part of a hard TFmode register.
3122 REG is the TFmode hard register, LOW is 1 for the
3123 low 64bit of the register and 0 otherwise.
3124 */
3125 rtx
3126 gen_df_reg (rtx reg, int low)
3127 {
3128 int regno = REGNO (reg);
3129
3130 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3131 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3132 return gen_rtx_REG (DFmode, regno);
3133 }
3134 \f
3135 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3136 Unlike normal calls, TFmode operands are passed by reference. It is
3137 assumed that no more than 3 operands are required. */
3138
3139 static void
3140 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3141 {
3142 rtx ret_slot = NULL, arg[3], func_sym;
3143 int i;
3144
3145 /* We only expect to be called for conversions, unary, and binary ops. */
3146 gcc_assert (nargs == 2 || nargs == 3);
3147
3148 for (i = 0; i < nargs; ++i)
3149 {
3150 rtx this_arg = operands[i];
3151 rtx this_slot;
3152
3153 /* TFmode arguments and return values are passed by reference. */
3154 if (GET_MODE (this_arg) == TFmode)
3155 {
3156 int force_stack_temp;
3157
3158 force_stack_temp = 0;
3159 if (TARGET_BUGGY_QP_LIB && i == 0)
3160 force_stack_temp = 1;
3161
3162 if (GET_CODE (this_arg) == MEM
3163 && ! force_stack_temp)
3164 {
3165 tree expr = MEM_EXPR (this_arg);
3166 if (expr)
3167 mark_addressable (expr);
3168 this_arg = XEXP (this_arg, 0);
3169 }
3170 else if (CONSTANT_P (this_arg)
3171 && ! force_stack_temp)
3172 {
3173 this_slot = force_const_mem (TFmode, this_arg);
3174 this_arg = XEXP (this_slot, 0);
3175 }
3176 else
3177 {
3178 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3179
3180 /* Operand 0 is the return value. We'll copy it out later. */
3181 if (i > 0)
3182 emit_move_insn (this_slot, this_arg);
3183 else
3184 ret_slot = this_slot;
3185
3186 this_arg = XEXP (this_slot, 0);
3187 }
3188 }
3189
3190 arg[i] = this_arg;
3191 }
3192
3193 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3194
3195 if (GET_MODE (operands[0]) == TFmode)
3196 {
3197 if (nargs == 2)
3198 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3199 arg[0], GET_MODE (arg[0]),
3200 arg[1], GET_MODE (arg[1]));
3201 else
3202 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3203 arg[0], GET_MODE (arg[0]),
3204 arg[1], GET_MODE (arg[1]),
3205 arg[2], GET_MODE (arg[2]));
3206
3207 if (ret_slot)
3208 emit_move_insn (operands[0], ret_slot);
3209 }
3210 else
3211 {
3212 rtx ret;
3213
3214 gcc_assert (nargs == 2);
3215
3216 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3217 GET_MODE (operands[0]), 1,
3218 arg[1], GET_MODE (arg[1]));
3219
3220 if (ret != operands[0])
3221 emit_move_insn (operands[0], ret);
3222 }
3223 }
3224
3225 /* Expand soft-float TFmode calls to sparc abi routines. */
3226
3227 static void
3228 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3229 {
3230 const char *func;
3231
3232 switch (code)
3233 {
3234 case PLUS:
3235 func = "_Qp_add";
3236 break;
3237 case MINUS:
3238 func = "_Qp_sub";
3239 break;
3240 case MULT:
3241 func = "_Qp_mul";
3242 break;
3243 case DIV:
3244 func = "_Qp_div";
3245 break;
3246 default:
3247 gcc_unreachable ();
3248 }
3249
3250 emit_soft_tfmode_libcall (func, 3, operands);
3251 }
3252
3253 static void
3254 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3255 {
3256 const char *func;
3257
3258 gcc_assert (code == SQRT);
3259 func = "_Qp_sqrt";
3260
3261 emit_soft_tfmode_libcall (func, 2, operands);
3262 }
3263
3264 static void
3265 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3266 {
3267 const char *func;
3268
3269 switch (code)
3270 {
3271 case FLOAT_EXTEND:
3272 switch (GET_MODE (operands[1]))
3273 {
3274 case SFmode:
3275 func = "_Qp_stoq";
3276 break;
3277 case DFmode:
3278 func = "_Qp_dtoq";
3279 break;
3280 default:
3281 gcc_unreachable ();
3282 }
3283 break;
3284
3285 case FLOAT_TRUNCATE:
3286 switch (GET_MODE (operands[0]))
3287 {
3288 case SFmode:
3289 func = "_Qp_qtos";
3290 break;
3291 case DFmode:
3292 func = "_Qp_qtod";
3293 break;
3294 default:
3295 gcc_unreachable ();
3296 }
3297 break;
3298
3299 case FLOAT:
3300 switch (GET_MODE (operands[1]))
3301 {
3302 case SImode:
3303 func = "_Qp_itoq";
3304 if (TARGET_ARCH64)
3305 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3306 break;
3307 case DImode:
3308 func = "_Qp_xtoq";
3309 break;
3310 default:
3311 gcc_unreachable ();
3312 }
3313 break;
3314
3315 case UNSIGNED_FLOAT:
3316 switch (GET_MODE (operands[1]))
3317 {
3318 case SImode:
3319 func = "_Qp_uitoq";
3320 if (TARGET_ARCH64)
3321 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3322 break;
3323 case DImode:
3324 func = "_Qp_uxtoq";
3325 break;
3326 default:
3327 gcc_unreachable ();
3328 }
3329 break;
3330
3331 case FIX:
3332 switch (GET_MODE (operands[0]))
3333 {
3334 case SImode:
3335 func = "_Qp_qtoi";
3336 break;
3337 case DImode:
3338 func = "_Qp_qtox";
3339 break;
3340 default:
3341 gcc_unreachable ();
3342 }
3343 break;
3344
3345 case UNSIGNED_FIX:
3346 switch (GET_MODE (operands[0]))
3347 {
3348 case SImode:
3349 func = "_Qp_qtoui";
3350 break;
3351 case DImode:
3352 func = "_Qp_qtoux";
3353 break;
3354 default:
3355 gcc_unreachable ();
3356 }
3357 break;
3358
3359 default:
3360 gcc_unreachable ();
3361 }
3362
3363 emit_soft_tfmode_libcall (func, 2, operands);
3364 }
3365
3366 /* Expand a hard-float tfmode operation. All arguments must be in
3367 registers. */
3368
3369 static void
3370 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3371 {
3372 rtx op, dest;
3373
3374 if (GET_RTX_CLASS (code) == RTX_UNARY)
3375 {
3376 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3377 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3378 }
3379 else
3380 {
3381 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3382 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3383 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3384 operands[1], operands[2]);
3385 }
3386
3387 if (register_operand (operands[0], VOIDmode))
3388 dest = operands[0];
3389 else
3390 dest = gen_reg_rtx (GET_MODE (operands[0]));
3391
3392 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3393
3394 if (dest != operands[0])
3395 emit_move_insn (operands[0], dest);
3396 }
3397
3398 void
3399 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3400 {
3401 if (TARGET_HARD_QUAD)
3402 emit_hard_tfmode_operation (code, operands);
3403 else
3404 emit_soft_tfmode_binop (code, operands);
3405 }
3406
3407 void
3408 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3409 {
3410 if (TARGET_HARD_QUAD)
3411 emit_hard_tfmode_operation (code, operands);
3412 else
3413 emit_soft_tfmode_unop (code, operands);
3414 }
3415
3416 void
3417 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3418 {
3419 if (TARGET_HARD_QUAD)
3420 emit_hard_tfmode_operation (code, operands);
3421 else
3422 emit_soft_tfmode_cvt (code, operands);
3423 }
3424 \f
3425 /* Return nonzero if a branch/jump/call instruction will be emitting
3426 nop into its delay slot. */
3427
3428 int
3429 empty_delay_slot (rtx_insn *insn)
3430 {
3431 rtx seq;
3432
3433 /* If no previous instruction (should not happen), return true. */
3434 if (PREV_INSN (insn) == NULL)
3435 return 1;
3436
3437 seq = NEXT_INSN (PREV_INSN (insn));
3438 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3439 return 0;
3440
3441 return 1;
3442 }
3443
3444 /* Return nonzero if we should emit a nop after a cbcond instruction.
3445 The cbcond instruction does not have a delay slot, however there is
3446 a severe performance penalty if a control transfer appears right
3447 after a cbcond. Therefore we emit a nop when we detect this
3448 situation. */
3449
3450 int
3451 emit_cbcond_nop (rtx insn)
3452 {
3453 rtx next = next_active_insn (insn);
3454
3455 if (!next)
3456 return 1;
3457
3458 if (NONJUMP_INSN_P (next)
3459 && GET_CODE (PATTERN (next)) == SEQUENCE)
3460 next = XVECEXP (PATTERN (next), 0, 0);
3461 else if (CALL_P (next)
3462 && GET_CODE (PATTERN (next)) == PARALLEL)
3463 {
3464 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3465
3466 if (GET_CODE (delay) == RETURN)
3467 {
3468 /* It's a sibling call. Do not emit the nop if we're going
3469 to emit something other than the jump itself as the first
3470 instruction of the sibcall sequence. */
3471 if (sparc_leaf_function_p || TARGET_FLAT)
3472 return 0;
3473 }
3474 }
3475
3476 if (NONJUMP_INSN_P (next))
3477 return 0;
3478
3479 return 1;
3480 }
3481
3482 /* Return nonzero if TRIAL can go into the call delay slot. */
3483
3484 int
3485 eligible_for_call_delay (rtx trial)
3486 {
3487 rtx pat;
3488
3489 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3490 return 0;
3491
3492 /* Binutils allows
3493 call __tls_get_addr, %tgd_call (foo)
3494 add %l7, %o0, %o0, %tgd_add (foo)
3495 while Sun as/ld does not. */
3496 if (TARGET_GNU_TLS || !TARGET_TLS)
3497 return 1;
3498
3499 pat = PATTERN (trial);
3500
3501 /* We must reject tgd_add{32|64}, i.e.
3502 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3503 and tldm_add{32|64}, i.e.
3504 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3505 for Sun as/ld. */
3506 if (GET_CODE (pat) == SET
3507 && GET_CODE (SET_SRC (pat)) == PLUS)
3508 {
3509 rtx unspec = XEXP (SET_SRC (pat), 1);
3510
3511 if (GET_CODE (unspec) == UNSPEC
3512 && (XINT (unspec, 1) == UNSPEC_TLSGD
3513 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3514 return 0;
3515 }
3516
3517 return 1;
3518 }
3519
3520 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3521 instruction. RETURN_P is true if the v9 variant 'return' is to be
3522 considered in the test too.
3523
3524 TRIAL must be a SET whose destination is a REG appropriate for the
3525 'restore' instruction or, if RETURN_P is true, for the 'return'
3526 instruction. */
3527
3528 static int
3529 eligible_for_restore_insn (rtx trial, bool return_p)
3530 {
3531 rtx pat = PATTERN (trial);
3532 rtx src = SET_SRC (pat);
3533 bool src_is_freg = false;
3534 rtx src_reg;
3535
3536 /* Since we now can do moves between float and integer registers when
3537 VIS3 is enabled, we have to catch this case. We can allow such
3538 moves when doing a 'return' however. */
3539 src_reg = src;
3540 if (GET_CODE (src_reg) == SUBREG)
3541 src_reg = SUBREG_REG (src_reg);
3542 if (GET_CODE (src_reg) == REG
3543 && SPARC_FP_REG_P (REGNO (src_reg)))
3544 src_is_freg = true;
3545
3546 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3547 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3548 && arith_operand (src, GET_MODE (src))
3549 && ! src_is_freg)
3550 {
3551 if (TARGET_ARCH64)
3552 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3553 else
3554 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3555 }
3556
3557 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3558 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3559 && arith_double_operand (src, GET_MODE (src))
3560 && ! src_is_freg)
3561 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3562
3563 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3564 else if (! TARGET_FPU && register_operand (src, SFmode))
3565 return 1;
3566
3567 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3568 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3569 return 1;
3570
3571 /* If we have the 'return' instruction, anything that does not use
3572 local or output registers and can go into a delay slot wins. */
3573 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3574 return 1;
3575
3576 /* The 'restore src1,src2,dest' pattern for SImode. */
3577 else if (GET_CODE (src) == PLUS
3578 && register_operand (XEXP (src, 0), SImode)
3579 && arith_operand (XEXP (src, 1), SImode))
3580 return 1;
3581
3582 /* The 'restore src1,src2,dest' pattern for DImode. */
3583 else if (GET_CODE (src) == PLUS
3584 && register_operand (XEXP (src, 0), DImode)
3585 && arith_double_operand (XEXP (src, 1), DImode))
3586 return 1;
3587
3588 /* The 'restore src1,%lo(src2),dest' pattern. */
3589 else if (GET_CODE (src) == LO_SUM
3590 && ! TARGET_CM_MEDMID
3591 && ((register_operand (XEXP (src, 0), SImode)
3592 && immediate_operand (XEXP (src, 1), SImode))
3593 || (TARGET_ARCH64
3594 && register_operand (XEXP (src, 0), DImode)
3595 && immediate_operand (XEXP (src, 1), DImode))))
3596 return 1;
3597
3598 /* The 'restore src,src,dest' pattern. */
3599 else if (GET_CODE (src) == ASHIFT
3600 && (register_operand (XEXP (src, 0), SImode)
3601 || register_operand (XEXP (src, 0), DImode))
3602 && XEXP (src, 1) == const1_rtx)
3603 return 1;
3604
3605 return 0;
3606 }
3607
3608 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3609
3610 int
3611 eligible_for_return_delay (rtx trial)
3612 {
3613 int regno;
3614 rtx pat;
3615
3616 /* If the function uses __builtin_eh_return, the eh_return machinery
3617 occupies the delay slot. */
3618 if (crtl->calls_eh_return)
3619 return 0;
3620
3621 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3622 return 0;
3623
3624 /* In the case of a leaf or flat function, anything can go into the slot. */
3625 if (sparc_leaf_function_p || TARGET_FLAT)
3626 return 1;
3627
3628 if (!NONJUMP_INSN_P (trial))
3629 return 0;
3630
3631 pat = PATTERN (trial);
3632 if (GET_CODE (pat) == PARALLEL)
3633 {
3634 int i;
3635
3636 if (! TARGET_V9)
3637 return 0;
3638 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3639 {
3640 rtx expr = XVECEXP (pat, 0, i);
3641 if (GET_CODE (expr) != SET)
3642 return 0;
3643 if (GET_CODE (SET_DEST (expr)) != REG)
3644 return 0;
3645 regno = REGNO (SET_DEST (expr));
3646 if (regno >= 8 && regno < 24)
3647 return 0;
3648 }
3649 return !epilogue_renumber (&pat, 1);
3650 }
3651
3652 if (GET_CODE (pat) != SET)
3653 return 0;
3654
3655 if (GET_CODE (SET_DEST (pat)) != REG)
3656 return 0;
3657
3658 regno = REGNO (SET_DEST (pat));
3659
3660 /* Otherwise, only operations which can be done in tandem with
3661 a `restore' or `return' insn can go into the delay slot. */
3662 if (regno >= 8 && regno < 24)
3663 return 0;
3664
3665 /* If this instruction sets up floating point register and we have a return
3666 instruction, it can probably go in. But restore will not work
3667 with FP_REGS. */
3668 if (! SPARC_INT_REG_P (regno))
3669 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3670
3671 return eligible_for_restore_insn (trial, true);
3672 }
3673
3674 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3675
3676 int
3677 eligible_for_sibcall_delay (rtx trial)
3678 {
3679 rtx pat;
3680
3681 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3682 return 0;
3683
3684 if (!NONJUMP_INSN_P (trial))
3685 return 0;
3686
3687 pat = PATTERN (trial);
3688
3689 if (sparc_leaf_function_p || TARGET_FLAT)
3690 {
3691 /* If the tail call is done using the call instruction,
3692 we have to restore %o7 in the delay slot. */
3693 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3694 return 0;
3695
3696 /* %g1 is used to build the function address */
3697 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3698 return 0;
3699
3700 return 1;
3701 }
3702
3703 if (GET_CODE (pat) != SET)
3704 return 0;
3705
3706 /* Otherwise, only operations which can be done in tandem with
3707 a `restore' insn can go into the delay slot. */
3708 if (GET_CODE (SET_DEST (pat)) != REG
3709 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3710 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3711 return 0;
3712
3713 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3714 in most cases. */
3715 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3716 return 0;
3717
3718 return eligible_for_restore_insn (trial, false);
3719 }
3720 \f
3721 /* Determine if it's legal to put X into the constant pool. This
3722 is not possible if X contains the address of a symbol that is
3723 not constant (TLS) or not known at final link time (PIC). */
3724
3725 static bool
3726 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3727 {
3728 switch (GET_CODE (x))
3729 {
3730 case CONST_INT:
3731 case CONST_DOUBLE:
3732 case CONST_VECTOR:
3733 /* Accept all non-symbolic constants. */
3734 return false;
3735
3736 case LABEL_REF:
3737 /* Labels are OK iff we are non-PIC. */
3738 return flag_pic != 0;
3739
3740 case SYMBOL_REF:
3741 /* 'Naked' TLS symbol references are never OK,
3742 non-TLS symbols are OK iff we are non-PIC. */
3743 if (SYMBOL_REF_TLS_MODEL (x))
3744 return true;
3745 else
3746 return flag_pic != 0;
3747
3748 case CONST:
3749 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3750 case PLUS:
3751 case MINUS:
3752 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3753 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3754 case UNSPEC:
3755 return true;
3756 default:
3757 gcc_unreachable ();
3758 }
3759 }
3760 \f
3761 /* Global Offset Table support. */
3762 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3763 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3764
3765 /* Return the SYMBOL_REF for the Global Offset Table. */
3766
3767 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3768
3769 static rtx
3770 sparc_got (void)
3771 {
3772 if (!sparc_got_symbol)
3773 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3774
3775 return sparc_got_symbol;
3776 }
3777
3778 /* Ensure that we are not using patterns that are not OK with PIC. */
3779
3780 int
3781 check_pic (int i)
3782 {
3783 rtx op;
3784
3785 switch (flag_pic)
3786 {
3787 case 1:
3788 op = recog_data.operand[i];
3789 gcc_assert (GET_CODE (op) != SYMBOL_REF
3790 && (GET_CODE (op) != CONST
3791 || (GET_CODE (XEXP (op, 0)) == MINUS
3792 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3793 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3794 case 2:
3795 default:
3796 return 1;
3797 }
3798 }
3799
3800 /* Return true if X is an address which needs a temporary register when
3801 reloaded while generating PIC code. */
3802
3803 int
3804 pic_address_needs_scratch (rtx x)
3805 {
3806 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3807 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3808 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3809 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3810 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3811 return 1;
3812
3813 return 0;
3814 }
3815
3816 /* Determine if a given RTX is a valid constant. We already know this
3817 satisfies CONSTANT_P. */
3818
3819 static bool
3820 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3821 {
3822 switch (GET_CODE (x))
3823 {
3824 case CONST:
3825 case SYMBOL_REF:
3826 if (sparc_tls_referenced_p (x))
3827 return false;
3828 break;
3829
3830 case CONST_DOUBLE:
3831 if (GET_MODE (x) == VOIDmode)
3832 return true;
3833
3834 /* Floating point constants are generally not ok.
3835 The only exception is 0.0 and all-ones in VIS. */
3836 if (TARGET_VIS
3837 && SCALAR_FLOAT_MODE_P (mode)
3838 && (const_zero_operand (x, mode)
3839 || const_all_ones_operand (x, mode)))
3840 return true;
3841
3842 return false;
3843
3844 case CONST_VECTOR:
3845 /* Vector constants are generally not ok.
3846 The only exception is 0 or -1 in VIS. */
3847 if (TARGET_VIS
3848 && (const_zero_operand (x, mode)
3849 || const_all_ones_operand (x, mode)))
3850 return true;
3851
3852 return false;
3853
3854 default:
3855 break;
3856 }
3857
3858 return true;
3859 }
3860
3861 /* Determine if a given RTX is a valid constant address. */
3862
3863 bool
3864 constant_address_p (rtx x)
3865 {
3866 switch (GET_CODE (x))
3867 {
3868 case LABEL_REF:
3869 case CONST_INT:
3870 case HIGH:
3871 return true;
3872
3873 case CONST:
3874 if (flag_pic && pic_address_needs_scratch (x))
3875 return false;
3876 return sparc_legitimate_constant_p (Pmode, x);
3877
3878 case SYMBOL_REF:
3879 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3880
3881 default:
3882 return false;
3883 }
3884 }
3885
3886 /* Nonzero if the constant value X is a legitimate general operand
3887 when generating PIC code. It is given that flag_pic is on and
3888 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3889
3890 bool
3891 legitimate_pic_operand_p (rtx x)
3892 {
3893 if (pic_address_needs_scratch (x))
3894 return false;
3895 if (sparc_tls_referenced_p (x))
3896 return false;
3897 return true;
3898 }
3899
3900 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3901 (CONST_INT_P (X) \
3902 && INTVAL (X) >= -0x1000 \
3903 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3904
3905 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3906 (CONST_INT_P (X) \
3907 && INTVAL (X) >= -0x1000 \
3908 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3909
3910 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3911
3912 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3913 ordinarily. This changes a bit when generating PIC. */
3914
3915 static bool
3916 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3917 {
3918 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3919
3920 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3921 rs1 = addr;
3922 else if (GET_CODE (addr) == PLUS)
3923 {
3924 rs1 = XEXP (addr, 0);
3925 rs2 = XEXP (addr, 1);
3926
3927 /* Canonicalize. REG comes first, if there are no regs,
3928 LO_SUM comes first. */
3929 if (!REG_P (rs1)
3930 && GET_CODE (rs1) != SUBREG
3931 && (REG_P (rs2)
3932 || GET_CODE (rs2) == SUBREG
3933 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3934 {
3935 rs1 = XEXP (addr, 1);
3936 rs2 = XEXP (addr, 0);
3937 }
3938
3939 if ((flag_pic == 1
3940 && rs1 == pic_offset_table_rtx
3941 && !REG_P (rs2)
3942 && GET_CODE (rs2) != SUBREG
3943 && GET_CODE (rs2) != LO_SUM
3944 && GET_CODE (rs2) != MEM
3945 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3946 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3947 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3948 || ((REG_P (rs1)
3949 || GET_CODE (rs1) == SUBREG)
3950 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3951 {
3952 imm1 = rs2;
3953 rs2 = NULL;
3954 }
3955 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3956 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3957 {
3958 /* We prohibit REG + REG for TFmode when there are no quad move insns
3959 and we consequently need to split. We do this because REG+REG
3960 is not an offsettable address. If we get the situation in reload
3961 where source and destination of a movtf pattern are both MEMs with
3962 REG+REG address, then only one of them gets converted to an
3963 offsettable address. */
3964 if (mode == TFmode
3965 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3966 return 0;
3967
3968 /* Likewise for TImode, but in all cases. */
3969 if (mode == TImode)
3970 return 0;
3971
3972 /* We prohibit REG + REG on ARCH32 if not optimizing for
3973 DFmode/DImode because then mem_min_alignment is likely to be zero
3974 after reload and the forced split would lack a matching splitter
3975 pattern. */
3976 if (TARGET_ARCH32 && !optimize
3977 && (mode == DFmode || mode == DImode))
3978 return 0;
3979 }
3980 else if (USE_AS_OFFSETABLE_LO10
3981 && GET_CODE (rs1) == LO_SUM
3982 && TARGET_ARCH64
3983 && ! TARGET_CM_MEDMID
3984 && RTX_OK_FOR_OLO10_P (rs2, mode))
3985 {
3986 rs2 = NULL;
3987 imm1 = XEXP (rs1, 1);
3988 rs1 = XEXP (rs1, 0);
3989 if (!CONSTANT_P (imm1)
3990 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3991 return 0;
3992 }
3993 }
3994 else if (GET_CODE (addr) == LO_SUM)
3995 {
3996 rs1 = XEXP (addr, 0);
3997 imm1 = XEXP (addr, 1);
3998
3999 if (!CONSTANT_P (imm1)
4000 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4001 return 0;
4002
4003 /* We can't allow TFmode in 32-bit mode, because an offset greater
4004 than the alignment (8) may cause the LO_SUM to overflow. */
4005 if (mode == TFmode && TARGET_ARCH32)
4006 return 0;
4007 }
4008 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4009 return 1;
4010 else
4011 return 0;
4012
4013 if (GET_CODE (rs1) == SUBREG)
4014 rs1 = SUBREG_REG (rs1);
4015 if (!REG_P (rs1))
4016 return 0;
4017
4018 if (rs2)
4019 {
4020 if (GET_CODE (rs2) == SUBREG)
4021 rs2 = SUBREG_REG (rs2);
4022 if (!REG_P (rs2))
4023 return 0;
4024 }
4025
4026 if (strict)
4027 {
4028 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4029 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4030 return 0;
4031 }
4032 else
4033 {
4034 if ((! SPARC_INT_REG_P (REGNO (rs1))
4035 && REGNO (rs1) != FRAME_POINTER_REGNUM
4036 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4037 || (rs2
4038 && (! SPARC_INT_REG_P (REGNO (rs2))
4039 && REGNO (rs2) != FRAME_POINTER_REGNUM
4040 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4041 return 0;
4042 }
4043 return 1;
4044 }
4045
4046 /* Return the SYMBOL_REF for the tls_get_addr function. */
4047
4048 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4049
4050 static rtx
4051 sparc_tls_get_addr (void)
4052 {
4053 if (!sparc_tls_symbol)
4054 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4055
4056 return sparc_tls_symbol;
4057 }
4058
4059 /* Return the Global Offset Table to be used in TLS mode. */
4060
4061 static rtx
4062 sparc_tls_got (void)
4063 {
4064 /* In PIC mode, this is just the PIC offset table. */
4065 if (flag_pic)
4066 {
4067 crtl->uses_pic_offset_table = 1;
4068 return pic_offset_table_rtx;
4069 }
4070
4071 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4072 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4073 if (TARGET_SUN_TLS && TARGET_ARCH32)
4074 {
4075 load_got_register ();
4076 return global_offset_table_rtx;
4077 }
4078
4079 /* In all other cases, we load a new pseudo with the GOT symbol. */
4080 return copy_to_reg (sparc_got ());
4081 }
4082
4083 /* Return true if X contains a thread-local symbol. */
4084
4085 static bool
4086 sparc_tls_referenced_p (rtx x)
4087 {
4088 if (!TARGET_HAVE_TLS)
4089 return false;
4090
4091 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4092 x = XEXP (XEXP (x, 0), 0);
4093
4094 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4095 return true;
4096
4097 /* That's all we handle in sparc_legitimize_tls_address for now. */
4098 return false;
4099 }
4100
4101 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4102 this (thread-local) address. */
4103
4104 static rtx
4105 sparc_legitimize_tls_address (rtx addr)
4106 {
4107 rtx temp1, temp2, temp3, ret, o0, got;
4108 rtx_insn *insn;
4109
4110 gcc_assert (can_create_pseudo_p ());
4111
4112 if (GET_CODE (addr) == SYMBOL_REF)
4113 switch (SYMBOL_REF_TLS_MODEL (addr))
4114 {
4115 case TLS_MODEL_GLOBAL_DYNAMIC:
4116 start_sequence ();
4117 temp1 = gen_reg_rtx (SImode);
4118 temp2 = gen_reg_rtx (SImode);
4119 ret = gen_reg_rtx (Pmode);
4120 o0 = gen_rtx_REG (Pmode, 8);
4121 got = sparc_tls_got ();
4122 emit_insn (gen_tgd_hi22 (temp1, addr));
4123 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4124 if (TARGET_ARCH32)
4125 {
4126 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4127 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4128 addr, const1_rtx));
4129 }
4130 else
4131 {
4132 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4133 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4134 addr, const1_rtx));
4135 }
4136 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4137 insn = get_insns ();
4138 end_sequence ();
4139 emit_libcall_block (insn, ret, o0, addr);
4140 break;
4141
4142 case TLS_MODEL_LOCAL_DYNAMIC:
4143 start_sequence ();
4144 temp1 = gen_reg_rtx (SImode);
4145 temp2 = gen_reg_rtx (SImode);
4146 temp3 = gen_reg_rtx (Pmode);
4147 ret = gen_reg_rtx (Pmode);
4148 o0 = gen_rtx_REG (Pmode, 8);
4149 got = sparc_tls_got ();
4150 emit_insn (gen_tldm_hi22 (temp1));
4151 emit_insn (gen_tldm_lo10 (temp2, temp1));
4152 if (TARGET_ARCH32)
4153 {
4154 emit_insn (gen_tldm_add32 (o0, got, temp2));
4155 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4156 const1_rtx));
4157 }
4158 else
4159 {
4160 emit_insn (gen_tldm_add64 (o0, got, temp2));
4161 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4162 const1_rtx));
4163 }
4164 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4165 insn = get_insns ();
4166 end_sequence ();
4167 emit_libcall_block (insn, temp3, o0,
4168 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4169 UNSPEC_TLSLD_BASE));
4170 temp1 = gen_reg_rtx (SImode);
4171 temp2 = gen_reg_rtx (SImode);
4172 emit_insn (gen_tldo_hix22 (temp1, addr));
4173 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4174 if (TARGET_ARCH32)
4175 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4176 else
4177 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4178 break;
4179
4180 case TLS_MODEL_INITIAL_EXEC:
4181 temp1 = gen_reg_rtx (SImode);
4182 temp2 = gen_reg_rtx (SImode);
4183 temp3 = gen_reg_rtx (Pmode);
4184 got = sparc_tls_got ();
4185 emit_insn (gen_tie_hi22 (temp1, addr));
4186 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4187 if (TARGET_ARCH32)
4188 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4189 else
4190 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4191 if (TARGET_SUN_TLS)
4192 {
4193 ret = gen_reg_rtx (Pmode);
4194 if (TARGET_ARCH32)
4195 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4196 temp3, addr));
4197 else
4198 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4199 temp3, addr));
4200 }
4201 else
4202 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4203 break;
4204
4205 case TLS_MODEL_LOCAL_EXEC:
4206 temp1 = gen_reg_rtx (Pmode);
4207 temp2 = gen_reg_rtx (Pmode);
4208 if (TARGET_ARCH32)
4209 {
4210 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4211 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4212 }
4213 else
4214 {
4215 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4216 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4217 }
4218 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4219 break;
4220
4221 default:
4222 gcc_unreachable ();
4223 }
4224
4225 else if (GET_CODE (addr) == CONST)
4226 {
4227 rtx base, offset;
4228
4229 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4230
4231 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4232 offset = XEXP (XEXP (addr, 0), 1);
4233
4234 base = force_operand (base, NULL_RTX);
4235 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4236 offset = force_reg (Pmode, offset);
4237 ret = gen_rtx_PLUS (Pmode, base, offset);
4238 }
4239
4240 else
4241 gcc_unreachable (); /* for now ... */
4242
4243 return ret;
4244 }
4245
4246 /* Legitimize PIC addresses. If the address is already position-independent,
4247 we return ORIG. Newly generated position-independent addresses go into a
4248 reg. This is REG if nonzero, otherwise we allocate register(s) as
4249 necessary. */
4250
4251 static rtx
4252 sparc_legitimize_pic_address (rtx orig, rtx reg)
4253 {
4254 bool gotdata_op = false;
4255
4256 if (GET_CODE (orig) == SYMBOL_REF
4257 /* See the comment in sparc_expand_move. */
4258 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4259 {
4260 rtx pic_ref, address;
4261 rtx_insn *insn;
4262
4263 if (reg == 0)
4264 {
4265 gcc_assert (can_create_pseudo_p ());
4266 reg = gen_reg_rtx (Pmode);
4267 }
4268
4269 if (flag_pic == 2)
4270 {
4271 /* If not during reload, allocate another temp reg here for loading
4272 in the address, so that these instructions can be optimized
4273 properly. */
4274 rtx temp_reg = (! can_create_pseudo_p ()
4275 ? reg : gen_reg_rtx (Pmode));
4276
4277 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4278 won't get confused into thinking that these two instructions
4279 are loading in the true address of the symbol. If in the
4280 future a PIC rtx exists, that should be used instead. */
4281 if (TARGET_ARCH64)
4282 {
4283 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4284 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4285 }
4286 else
4287 {
4288 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4289 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4290 }
4291 address = temp_reg;
4292 gotdata_op = true;
4293 }
4294 else
4295 address = orig;
4296
4297 crtl->uses_pic_offset_table = 1;
4298 if (gotdata_op)
4299 {
4300 if (TARGET_ARCH64)
4301 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4302 pic_offset_table_rtx,
4303 address, orig));
4304 else
4305 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4306 pic_offset_table_rtx,
4307 address, orig));
4308 }
4309 else
4310 {
4311 pic_ref
4312 = gen_const_mem (Pmode,
4313 gen_rtx_PLUS (Pmode,
4314 pic_offset_table_rtx, address));
4315 insn = emit_move_insn (reg, pic_ref);
4316 }
4317
4318 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4319 by loop. */
4320 set_unique_reg_note (insn, REG_EQUAL, orig);
4321 return reg;
4322 }
4323 else if (GET_CODE (orig) == CONST)
4324 {
4325 rtx base, offset;
4326
4327 if (GET_CODE (XEXP (orig, 0)) == PLUS
4328 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4329 return orig;
4330
4331 if (reg == 0)
4332 {
4333 gcc_assert (can_create_pseudo_p ());
4334 reg = gen_reg_rtx (Pmode);
4335 }
4336
4337 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4338 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4339 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4340 base == reg ? NULL_RTX : reg);
4341
4342 if (GET_CODE (offset) == CONST_INT)
4343 {
4344 if (SMALL_INT (offset))
4345 return plus_constant (Pmode, base, INTVAL (offset));
4346 else if (can_create_pseudo_p ())
4347 offset = force_reg (Pmode, offset);
4348 else
4349 /* If we reach here, then something is seriously wrong. */
4350 gcc_unreachable ();
4351 }
4352 return gen_rtx_PLUS (Pmode, base, offset);
4353 }
4354 else if (GET_CODE (orig) == LABEL_REF)
4355 /* ??? We ought to be checking that the register is live instead, in case
4356 it is eliminated. */
4357 crtl->uses_pic_offset_table = 1;
4358
4359 return orig;
4360 }
4361
4362 /* Try machine-dependent ways of modifying an illegitimate address X
4363 to be legitimate. If we find one, return the new, valid address.
4364
4365 OLDX is the address as it was before break_out_memory_refs was called.
4366 In some cases it is useful to look at this to decide what needs to be done.
4367
4368 MODE is the mode of the operand pointed to by X.
4369
4370 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4371
4372 static rtx
4373 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4374 enum machine_mode mode)
4375 {
4376 rtx orig_x = x;
4377
4378 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4379 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4380 force_operand (XEXP (x, 0), NULL_RTX));
4381 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4382 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4383 force_operand (XEXP (x, 1), NULL_RTX));
4384 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4385 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4386 XEXP (x, 1));
4387 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4388 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4389 force_operand (XEXP (x, 1), NULL_RTX));
4390
4391 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4392 return x;
4393
4394 if (sparc_tls_referenced_p (x))
4395 x = sparc_legitimize_tls_address (x);
4396 else if (flag_pic)
4397 x = sparc_legitimize_pic_address (x, NULL_RTX);
4398 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4399 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4400 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4401 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4402 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4403 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4404 else if (GET_CODE (x) == SYMBOL_REF
4405 || GET_CODE (x) == CONST
4406 || GET_CODE (x) == LABEL_REF)
4407 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4408
4409 return x;
4410 }
4411
4412 /* Delegitimize an address that was legitimized by the above function. */
4413
4414 static rtx
4415 sparc_delegitimize_address (rtx x)
4416 {
4417 x = delegitimize_mem_from_attrs (x);
4418
4419 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4420 switch (XINT (XEXP (x, 1), 1))
4421 {
4422 case UNSPEC_MOVE_PIC:
4423 case UNSPEC_TLSLE:
4424 x = XVECEXP (XEXP (x, 1), 0, 0);
4425 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4426 break;
4427 default:
4428 break;
4429 }
4430
4431 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4432 if (GET_CODE (x) == MINUS
4433 && REG_P (XEXP (x, 0))
4434 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4435 && GET_CODE (XEXP (x, 1)) == LO_SUM
4436 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4437 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4438 {
4439 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4440 gcc_assert (GET_CODE (x) == LABEL_REF);
4441 }
4442
4443 return x;
4444 }
4445
4446 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4447 replace the input X, or the original X if no replacement is called for.
4448 The output parameter *WIN is 1 if the calling macro should goto WIN,
4449 0 if it should not.
4450
4451 For SPARC, we wish to handle addresses by splitting them into
4452 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4453 This cuts the number of extra insns by one.
4454
4455 Do nothing when generating PIC code and the address is a symbolic
4456 operand or requires a scratch register. */
4457
4458 rtx
4459 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4460 int opnum, int type,
4461 int ind_levels ATTRIBUTE_UNUSED, int *win)
4462 {
4463 /* Decompose SImode constants into HIGH+LO_SUM. */
4464 if (CONSTANT_P (x)
4465 && (mode != TFmode || TARGET_ARCH64)
4466 && GET_MODE (x) == SImode
4467 && GET_CODE (x) != LO_SUM
4468 && GET_CODE (x) != HIGH
4469 && sparc_cmodel <= CM_MEDLOW
4470 && !(flag_pic
4471 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4472 {
4473 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4474 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4475 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4476 opnum, (enum reload_type)type);
4477 *win = 1;
4478 return x;
4479 }
4480
4481 /* We have to recognize what we have already generated above. */
4482 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4483 {
4484 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4485 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4486 opnum, (enum reload_type)type);
4487 *win = 1;
4488 return x;
4489 }
4490
4491 *win = 0;
4492 return x;
4493 }
4494
4495 /* Return true if ADDR (a legitimate address expression)
4496 has an effect that depends on the machine mode it is used for.
4497
4498 In PIC mode,
4499
4500 (mem:HI [%l7+a])
4501
4502 is not equivalent to
4503
4504 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4505
4506 because [%l7+a+1] is interpreted as the address of (a+1). */
4507
4508
4509 static bool
4510 sparc_mode_dependent_address_p (const_rtx addr,
4511 addr_space_t as ATTRIBUTE_UNUSED)
4512 {
4513 if (flag_pic && GET_CODE (addr) == PLUS)
4514 {
4515 rtx op0 = XEXP (addr, 0);
4516 rtx op1 = XEXP (addr, 1);
4517 if (op0 == pic_offset_table_rtx
4518 && symbolic_operand (op1, VOIDmode))
4519 return true;
4520 }
4521
4522 return false;
4523 }
4524
4525 #ifdef HAVE_GAS_HIDDEN
4526 # define USE_HIDDEN_LINKONCE 1
4527 #else
4528 # define USE_HIDDEN_LINKONCE 0
4529 #endif
4530
4531 static void
4532 get_pc_thunk_name (char name[32], unsigned int regno)
4533 {
4534 const char *reg_name = reg_names[regno];
4535
4536 /* Skip the leading '%' as that cannot be used in a
4537 symbol name. */
4538 reg_name += 1;
4539
4540 if (USE_HIDDEN_LINKONCE)
4541 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4542 else
4543 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4544 }
4545
4546 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4547
4548 static rtx
4549 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4550 {
4551 int orig_flag_pic = flag_pic;
4552 rtx insn;
4553
4554 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4555 flag_pic = 0;
4556 if (TARGET_ARCH64)
4557 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4558 else
4559 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4560 flag_pic = orig_flag_pic;
4561
4562 return insn;
4563 }
4564
4565 /* Emit code to load the GOT register. */
4566
4567 void
4568 load_got_register (void)
4569 {
4570 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4571 if (!global_offset_table_rtx)
4572 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4573
4574 if (TARGET_VXWORKS_RTP)
4575 emit_insn (gen_vxworks_load_got ());
4576 else
4577 {
4578 /* The GOT symbol is subject to a PC-relative relocation so we need a
4579 helper function to add the PC value and thus get the final value. */
4580 if (!got_helper_rtx)
4581 {
4582 char name[32];
4583 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4584 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4585 }
4586
4587 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4588 got_helper_rtx,
4589 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4590 }
4591
4592 /* Need to emit this whether or not we obey regdecls,
4593 since setjmp/longjmp can cause life info to screw up.
4594 ??? In the case where we don't obey regdecls, this is not sufficient
4595 since we may not fall out the bottom. */
4596 emit_use (global_offset_table_rtx);
4597 }
4598
4599 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4600 address of the call target. */
4601
4602 void
4603 sparc_emit_call_insn (rtx pat, rtx addr)
4604 {
4605 rtx_insn *insn;
4606
4607 insn = emit_call_insn (pat);
4608
4609 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4610 if (TARGET_VXWORKS_RTP
4611 && flag_pic
4612 && GET_CODE (addr) == SYMBOL_REF
4613 && (SYMBOL_REF_DECL (addr)
4614 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4615 : !SYMBOL_REF_LOCAL_P (addr)))
4616 {
4617 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4618 crtl->uses_pic_offset_table = 1;
4619 }
4620 }
4621 \f
4622 /* Return 1 if RTX is a MEM which is known to be aligned to at
4623 least a DESIRED byte boundary. */
4624
4625 int
4626 mem_min_alignment (rtx mem, int desired)
4627 {
4628 rtx addr, base, offset;
4629
4630 /* If it's not a MEM we can't accept it. */
4631 if (GET_CODE (mem) != MEM)
4632 return 0;
4633
4634 /* Obviously... */
4635 if (!TARGET_UNALIGNED_DOUBLES
4636 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4637 return 1;
4638
4639 /* ??? The rest of the function predates MEM_ALIGN so
4640 there is probably a bit of redundancy. */
4641 addr = XEXP (mem, 0);
4642 base = offset = NULL_RTX;
4643 if (GET_CODE (addr) == PLUS)
4644 {
4645 if (GET_CODE (XEXP (addr, 0)) == REG)
4646 {
4647 base = XEXP (addr, 0);
4648
4649 /* What we are saying here is that if the base
4650 REG is aligned properly, the compiler will make
4651 sure any REG based index upon it will be so
4652 as well. */
4653 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4654 offset = XEXP (addr, 1);
4655 else
4656 offset = const0_rtx;
4657 }
4658 }
4659 else if (GET_CODE (addr) == REG)
4660 {
4661 base = addr;
4662 offset = const0_rtx;
4663 }
4664
4665 if (base != NULL_RTX)
4666 {
4667 int regno = REGNO (base);
4668
4669 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4670 {
4671 /* Check if the compiler has recorded some information
4672 about the alignment of the base REG. If reload has
4673 completed, we already matched with proper alignments.
4674 If not running global_alloc, reload might give us
4675 unaligned pointer to local stack though. */
4676 if (((cfun != 0
4677 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4678 || (optimize && reload_completed))
4679 && (INTVAL (offset) & (desired - 1)) == 0)
4680 return 1;
4681 }
4682 else
4683 {
4684 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4685 return 1;
4686 }
4687 }
4688 else if (! TARGET_UNALIGNED_DOUBLES
4689 || CONSTANT_P (addr)
4690 || GET_CODE (addr) == LO_SUM)
4691 {
4692 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4693 is true, in which case we can only assume that an access is aligned if
4694 it is to a constant address, or the address involves a LO_SUM. */
4695 return 1;
4696 }
4697
4698 /* An obviously unaligned address. */
4699 return 0;
4700 }
4701
4702 \f
4703 /* Vectors to keep interesting information about registers where it can easily
4704 be got. We used to use the actual mode value as the bit number, but there
4705 are more than 32 modes now. Instead we use two tables: one indexed by
4706 hard register number, and one indexed by mode. */
4707
4708 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4709 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4710 mapped into one sparc_mode_class mode. */
4711
4712 enum sparc_mode_class {
4713 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4714 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4715 CC_MODE, CCFP_MODE
4716 };
4717
4718 /* Modes for single-word and smaller quantities. */
4719 #define S_MODES \
4720 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4721
4722 /* Modes for double-word and smaller quantities. */
4723 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4724
4725 /* Modes for quad-word and smaller quantities. */
4726 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4727
4728 /* Modes for 8-word and smaller quantities. */
4729 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4730
4731 /* Modes for single-float quantities. */
4732 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4733
4734 /* Modes for double-float and smaller quantities. */
4735 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4736
4737 /* Modes for quad-float and smaller quantities. */
4738 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4739
4740 /* Modes for quad-float pairs and smaller quantities. */
4741 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4742
4743 /* Modes for double-float only quantities. */
4744 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4745
4746 /* Modes for quad-float and double-float only quantities. */
4747 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4748
4749 /* Modes for quad-float pairs and double-float only quantities. */
4750 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4751
4752 /* Modes for condition codes. */
4753 #define CC_MODES (1 << (int) CC_MODE)
4754 #define CCFP_MODES (1 << (int) CCFP_MODE)
4755
4756 /* Value is 1 if register/mode pair is acceptable on sparc.
4757 The funny mixture of D and T modes is because integer operations
4758 do not specially operate on tetra quantities, so non-quad-aligned
4759 registers can hold quadword quantities (except %o4 and %i4 because
4760 they cross fixed registers). */
4761
4762 /* This points to either the 32 bit or the 64 bit version. */
4763 const int *hard_regno_mode_classes;
4764
4765 static const int hard_32bit_mode_classes[] = {
4766 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4767 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4768 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4769 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4770
4771 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4772 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4773 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4774 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4775
4776 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4777 and none can hold SFmode/SImode values. */
4778 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4779 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4780 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4781 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4782
4783 /* %fcc[0123] */
4784 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4785
4786 /* %icc, %sfp, %gsr */
4787 CC_MODES, 0, D_MODES
4788 };
4789
4790 static const int hard_64bit_mode_classes[] = {
4791 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4792 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4793 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4794 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4795
4796 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4797 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4798 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4799 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4800
4801 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4802 and none can hold SFmode/SImode values. */
4803 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4804 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4805 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4806 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4807
4808 /* %fcc[0123] */
4809 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4810
4811 /* %icc, %sfp, %gsr */
4812 CC_MODES, 0, D_MODES
4813 };
4814
4815 int sparc_mode_class [NUM_MACHINE_MODES];
4816
4817 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4818
4819 static void
4820 sparc_init_modes (void)
4821 {
4822 int i;
4823
4824 for (i = 0; i < NUM_MACHINE_MODES; i++)
4825 {
4826 enum machine_mode m = (enum machine_mode) i;
4827 unsigned int size = GET_MODE_SIZE (m);
4828
4829 switch (GET_MODE_CLASS (m))
4830 {
4831 case MODE_INT:
4832 case MODE_PARTIAL_INT:
4833 case MODE_COMPLEX_INT:
4834 if (size < 4)
4835 sparc_mode_class[i] = 1 << (int) H_MODE;
4836 else if (size == 4)
4837 sparc_mode_class[i] = 1 << (int) S_MODE;
4838 else if (size == 8)
4839 sparc_mode_class[i] = 1 << (int) D_MODE;
4840 else if (size == 16)
4841 sparc_mode_class[i] = 1 << (int) T_MODE;
4842 else if (size == 32)
4843 sparc_mode_class[i] = 1 << (int) O_MODE;
4844 else
4845 sparc_mode_class[i] = 0;
4846 break;
4847 case MODE_VECTOR_INT:
4848 if (size == 4)
4849 sparc_mode_class[i] = 1 << (int) SF_MODE;
4850 else if (size == 8)
4851 sparc_mode_class[i] = 1 << (int) DF_MODE;
4852 else
4853 sparc_mode_class[i] = 0;
4854 break;
4855 case MODE_FLOAT:
4856 case MODE_COMPLEX_FLOAT:
4857 if (size == 4)
4858 sparc_mode_class[i] = 1 << (int) SF_MODE;
4859 else if (size == 8)
4860 sparc_mode_class[i] = 1 << (int) DF_MODE;
4861 else if (size == 16)
4862 sparc_mode_class[i] = 1 << (int) TF_MODE;
4863 else if (size == 32)
4864 sparc_mode_class[i] = 1 << (int) OF_MODE;
4865 else
4866 sparc_mode_class[i] = 0;
4867 break;
4868 case MODE_CC:
4869 if (m == CCFPmode || m == CCFPEmode)
4870 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4871 else
4872 sparc_mode_class[i] = 1 << (int) CC_MODE;
4873 break;
4874 default:
4875 sparc_mode_class[i] = 0;
4876 break;
4877 }
4878 }
4879
4880 if (TARGET_ARCH64)
4881 hard_regno_mode_classes = hard_64bit_mode_classes;
4882 else
4883 hard_regno_mode_classes = hard_32bit_mode_classes;
4884
4885 /* Initialize the array used by REGNO_REG_CLASS. */
4886 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4887 {
4888 if (i < 16 && TARGET_V8PLUS)
4889 sparc_regno_reg_class[i] = I64_REGS;
4890 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4891 sparc_regno_reg_class[i] = GENERAL_REGS;
4892 else if (i < 64)
4893 sparc_regno_reg_class[i] = FP_REGS;
4894 else if (i < 96)
4895 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4896 else if (i < 100)
4897 sparc_regno_reg_class[i] = FPCC_REGS;
4898 else
4899 sparc_regno_reg_class[i] = NO_REGS;
4900 }
4901 }
4902 \f
4903 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4904
4905 static inline bool
4906 save_global_or_fp_reg_p (unsigned int regno,
4907 int leaf_function ATTRIBUTE_UNUSED)
4908 {
4909 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4910 }
4911
4912 /* Return whether the return address register (%i7) is needed. */
4913
4914 static inline bool
4915 return_addr_reg_needed_p (int leaf_function)
4916 {
4917 /* If it is live, for example because of __builtin_return_address (0). */
4918 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4919 return true;
4920
4921 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4922 if (!leaf_function
4923 /* Loading the GOT register clobbers %o7. */
4924 || crtl->uses_pic_offset_table
4925 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4926 return true;
4927
4928 return false;
4929 }
4930
4931 /* Return whether REGNO, a local or in register, must be saved/restored. */
4932
4933 static bool
4934 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4935 {
4936 /* General case: call-saved registers live at some point. */
4937 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4938 return true;
4939
4940 /* Frame pointer register (%fp) if needed. */
4941 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4942 return true;
4943
4944 /* Return address register (%i7) if needed. */
4945 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4946 return true;
4947
4948 /* GOT register (%l7) if needed. */
4949 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4950 return true;
4951
4952 /* If the function accesses prior frames, the frame pointer and the return
4953 address of the previous frame must be saved on the stack. */
4954 if (crtl->accesses_prior_frames
4955 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4956 return true;
4957
4958 return false;
4959 }
4960
4961 /* Compute the frame size required by the function. This function is called
4962 during the reload pass and also by sparc_expand_prologue. */
4963
4964 HOST_WIDE_INT
4965 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4966 {
4967 HOST_WIDE_INT frame_size, apparent_frame_size;
4968 int args_size, n_global_fp_regs = 0;
4969 bool save_local_in_regs_p = false;
4970 unsigned int i;
4971
4972 /* If the function allocates dynamic stack space, the dynamic offset is
4973 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4974 if (leaf_function && !cfun->calls_alloca)
4975 args_size = 0;
4976 else
4977 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4978
4979 /* Calculate space needed for global registers. */
4980 if (TARGET_ARCH64)
4981 for (i = 0; i < 8; i++)
4982 if (save_global_or_fp_reg_p (i, 0))
4983 n_global_fp_regs += 2;
4984 else
4985 for (i = 0; i < 8; i += 2)
4986 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4987 n_global_fp_regs += 2;
4988
4989 /* In the flat window model, find out which local and in registers need to
4990 be saved. We don't reserve space in the current frame for them as they
4991 will be spilled into the register window save area of the caller's frame.
4992 However, as soon as we use this register window save area, we must create
4993 that of the current frame to make it the live one. */
4994 if (TARGET_FLAT)
4995 for (i = 16; i < 32; i++)
4996 if (save_local_or_in_reg_p (i, leaf_function))
4997 {
4998 save_local_in_regs_p = true;
4999 break;
5000 }
5001
5002 /* Calculate space needed for FP registers. */
5003 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5004 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5005 n_global_fp_regs += 2;
5006
5007 if (size == 0
5008 && n_global_fp_regs == 0
5009 && args_size == 0
5010 && !save_local_in_regs_p)
5011 frame_size = apparent_frame_size = 0;
5012 else
5013 {
5014 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5015 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
5016 apparent_frame_size += n_global_fp_regs * 4;
5017
5018 /* We need to add the size of the outgoing argument area. */
5019 frame_size = apparent_frame_size + ((args_size + 7) & -8);
5020
5021 /* And that of the register window save area. */
5022 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5023
5024 /* Finally, bump to the appropriate alignment. */
5025 frame_size = SPARC_STACK_ALIGN (frame_size);
5026 }
5027
5028 /* Set up values for use in prologue and epilogue. */
5029 sparc_frame_size = frame_size;
5030 sparc_apparent_frame_size = apparent_frame_size;
5031 sparc_n_global_fp_regs = n_global_fp_regs;
5032 sparc_save_local_in_regs_p = save_local_in_regs_p;
5033
5034 return frame_size;
5035 }
5036
5037 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5038
5039 int
5040 sparc_initial_elimination_offset (int to)
5041 {
5042 int offset;
5043
5044 if (to == STACK_POINTER_REGNUM)
5045 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5046 else
5047 offset = 0;
5048
5049 offset += SPARC_STACK_BIAS;
5050 return offset;
5051 }
5052
5053 /* Output any necessary .register pseudo-ops. */
5054
5055 void
5056 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5057 {
5058 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5059 int i;
5060
5061 if (TARGET_ARCH32)
5062 return;
5063
5064 /* Check if %g[2367] were used without
5065 .register being printed for them already. */
5066 for (i = 2; i < 8; i++)
5067 {
5068 if (df_regs_ever_live_p (i)
5069 && ! sparc_hard_reg_printed [i])
5070 {
5071 sparc_hard_reg_printed [i] = 1;
5072 /* %g7 is used as TLS base register, use #ignore
5073 for it instead of #scratch. */
5074 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5075 i == 7 ? "ignore" : "scratch");
5076 }
5077 if (i == 3) i = 5;
5078 }
5079 #endif
5080 }
5081
5082 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5083
5084 #if PROBE_INTERVAL > 4096
5085 #error Cannot use indexed addressing mode for stack probing
5086 #endif
5087
5088 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5089 inclusive. These are offsets from the current stack pointer.
5090
5091 Note that we don't use the REG+REG addressing mode for the probes because
5092 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5093 so the advantages of having a single code win here. */
5094
5095 static void
5096 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5097 {
5098 rtx g1 = gen_rtx_REG (Pmode, 1);
5099
5100 /* See if we have a constant small number of probes to generate. If so,
5101 that's the easy case. */
5102 if (size <= PROBE_INTERVAL)
5103 {
5104 emit_move_insn (g1, GEN_INT (first));
5105 emit_insn (gen_rtx_SET (VOIDmode, g1,
5106 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5107 emit_stack_probe (plus_constant (Pmode, g1, -size));
5108 }
5109
5110 /* The run-time loop is made up of 10 insns in the generic case while the
5111 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5112 else if (size <= 5 * PROBE_INTERVAL)
5113 {
5114 HOST_WIDE_INT i;
5115
5116 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5117 emit_insn (gen_rtx_SET (VOIDmode, g1,
5118 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5119 emit_stack_probe (g1);
5120
5121 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5122 it exceeds SIZE. If only two probes are needed, this will not
5123 generate any code. Then probe at FIRST + SIZE. */
5124 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5125 {
5126 emit_insn (gen_rtx_SET (VOIDmode, g1,
5127 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5128 emit_stack_probe (g1);
5129 }
5130
5131 emit_stack_probe (plus_constant (Pmode, g1,
5132 (i - PROBE_INTERVAL) - size));
5133 }
5134
5135 /* Otherwise, do the same as above, but in a loop. Note that we must be
5136 extra careful with variables wrapping around because we might be at
5137 the very top (or the very bottom) of the address space and we have
5138 to be able to handle this case properly; in particular, we use an
5139 equality test for the loop condition. */
5140 else
5141 {
5142 HOST_WIDE_INT rounded_size;
5143 rtx g4 = gen_rtx_REG (Pmode, 4);
5144
5145 emit_move_insn (g1, GEN_INT (first));
5146
5147
5148 /* Step 1: round SIZE to the previous multiple of the interval. */
5149
5150 rounded_size = size & -PROBE_INTERVAL;
5151 emit_move_insn (g4, GEN_INT (rounded_size));
5152
5153
5154 /* Step 2: compute initial and final value of the loop counter. */
5155
5156 /* TEST_ADDR = SP + FIRST. */
5157 emit_insn (gen_rtx_SET (VOIDmode, g1,
5158 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5159
5160 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5161 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
5162
5163
5164 /* Step 3: the loop
5165
5166 while (TEST_ADDR != LAST_ADDR)
5167 {
5168 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5169 probe at TEST_ADDR
5170 }
5171
5172 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5173 until it is equal to ROUNDED_SIZE. */
5174
5175 if (TARGET_ARCH64)
5176 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5177 else
5178 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5179
5180
5181 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5182 that SIZE is equal to ROUNDED_SIZE. */
5183
5184 if (size != rounded_size)
5185 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5186 }
5187
5188 /* Make sure nothing is scheduled before we are done. */
5189 emit_insn (gen_blockage ());
5190 }
5191
5192 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5193 absolute addresses. */
5194
5195 const char *
5196 output_probe_stack_range (rtx reg1, rtx reg2)
5197 {
5198 static int labelno = 0;
5199 char loop_lab[32], end_lab[32];
5200 rtx xops[2];
5201
5202 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
5203 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
5204
5205 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5206
5207 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
5208 xops[0] = reg1;
5209 xops[1] = reg2;
5210 output_asm_insn ("cmp\t%0, %1", xops);
5211 if (TARGET_ARCH64)
5212 fputs ("\tbe,pn\t%xcc,", asm_out_file);
5213 else
5214 fputs ("\tbe\t", asm_out_file);
5215 assemble_name_raw (asm_out_file, end_lab);
5216 fputc ('\n', asm_out_file);
5217
5218 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5219 xops[1] = GEN_INT (-PROBE_INTERVAL);
5220 output_asm_insn (" add\t%0, %1, %0", xops);
5221
5222 /* Probe at TEST_ADDR and branch. */
5223 if (TARGET_ARCH64)
5224 fputs ("\tba,pt\t%xcc,", asm_out_file);
5225 else
5226 fputs ("\tba\t", asm_out_file);
5227 assemble_name_raw (asm_out_file, loop_lab);
5228 fputc ('\n', asm_out_file);
5229 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5230 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5231
5232 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
5233
5234 return "";
5235 }
5236
5237 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5238 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5239 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5240 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5241 the action to be performed if it returns false. Return the new offset. */
5242
5243 typedef bool (*sorr_pred_t) (unsigned int, int);
5244 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5245
5246 static int
5247 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5248 int offset, int leaf_function, sorr_pred_t save_p,
5249 sorr_act_t action_true, sorr_act_t action_false)
5250 {
5251 unsigned int i;
5252 rtx mem;
5253 rtx_insn *insn;
5254
5255 if (TARGET_ARCH64 && high <= 32)
5256 {
5257 int fp_offset = -1;
5258
5259 for (i = low; i < high; i++)
5260 {
5261 if (save_p (i, leaf_function))
5262 {
5263 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5264 base, offset));
5265 if (action_true == SORR_SAVE)
5266 {
5267 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5268 RTX_FRAME_RELATED_P (insn) = 1;
5269 }
5270 else /* action_true == SORR_RESTORE */
5271 {
5272 /* The frame pointer must be restored last since its old
5273 value may be used as base address for the frame. This
5274 is problematic in 64-bit mode only because of the lack
5275 of double-word load instruction. */
5276 if (i == HARD_FRAME_POINTER_REGNUM)
5277 fp_offset = offset;
5278 else
5279 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5280 }
5281 offset += 8;
5282 }
5283 else if (action_false == SORR_ADVANCE)
5284 offset += 8;
5285 }
5286
5287 if (fp_offset >= 0)
5288 {
5289 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5290 emit_move_insn (hard_frame_pointer_rtx, mem);
5291 }
5292 }
5293 else
5294 {
5295 for (i = low; i < high; i += 2)
5296 {
5297 bool reg0 = save_p (i, leaf_function);
5298 bool reg1 = save_p (i + 1, leaf_function);
5299 enum machine_mode mode;
5300 int regno;
5301
5302 if (reg0 && reg1)
5303 {
5304 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5305 regno = i;
5306 }
5307 else if (reg0)
5308 {
5309 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5310 regno = i;
5311 }
5312 else if (reg1)
5313 {
5314 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5315 regno = i + 1;
5316 offset += 4;
5317 }
5318 else
5319 {
5320 if (action_false == SORR_ADVANCE)
5321 offset += 8;
5322 continue;
5323 }
5324
5325 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5326 if (action_true == SORR_SAVE)
5327 {
5328 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5329 RTX_FRAME_RELATED_P (insn) = 1;
5330 if (mode == DImode)
5331 {
5332 rtx set1, set2;
5333 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5334 offset));
5335 set1 = gen_rtx_SET (VOIDmode, mem,
5336 gen_rtx_REG (SImode, regno));
5337 RTX_FRAME_RELATED_P (set1) = 1;
5338 mem
5339 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5340 offset + 4));
5341 set2 = gen_rtx_SET (VOIDmode, mem,
5342 gen_rtx_REG (SImode, regno + 1));
5343 RTX_FRAME_RELATED_P (set2) = 1;
5344 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5345 gen_rtx_PARALLEL (VOIDmode,
5346 gen_rtvec (2, set1, set2)));
5347 }
5348 }
5349 else /* action_true == SORR_RESTORE */
5350 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5351
5352 /* Always preserve double-word alignment. */
5353 offset = (offset + 8) & -8;
5354 }
5355 }
5356
5357 return offset;
5358 }
5359
5360 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5361
5362 static rtx
5363 emit_adjust_base_to_offset (rtx base, int offset)
5364 {
5365 /* ??? This might be optimized a little as %g1 might already have a
5366 value close enough that a single add insn will do. */
5367 /* ??? Although, all of this is probably only a temporary fix because
5368 if %g1 can hold a function result, then sparc_expand_epilogue will
5369 lose (the result will be clobbered). */
5370 rtx new_base = gen_rtx_REG (Pmode, 1);
5371 emit_move_insn (new_base, GEN_INT (offset));
5372 emit_insn (gen_rtx_SET (VOIDmode,
5373 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5374 return new_base;
5375 }
5376
5377 /* Emit code to save/restore call-saved global and FP registers. */
5378
5379 static void
5380 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5381 {
5382 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5383 {
5384 base = emit_adjust_base_to_offset (base, offset);
5385 offset = 0;
5386 }
5387
5388 offset
5389 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5390 save_global_or_fp_reg_p, action, SORR_NONE);
5391 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5392 save_global_or_fp_reg_p, action, SORR_NONE);
5393 }
5394
5395 /* Emit code to save/restore call-saved local and in registers. */
5396
5397 static void
5398 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5399 {
5400 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5401 {
5402 base = emit_adjust_base_to_offset (base, offset);
5403 offset = 0;
5404 }
5405
5406 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5407 save_local_or_in_reg_p, action, SORR_ADVANCE);
5408 }
5409
5410 /* Emit a window_save insn. */
5411
5412 static rtx_insn *
5413 emit_window_save (rtx increment)
5414 {
5415 rtx_insn *insn = emit_insn (gen_window_save (increment));
5416 RTX_FRAME_RELATED_P (insn) = 1;
5417
5418 /* The incoming return address (%o7) is saved in %i7. */
5419 add_reg_note (insn, REG_CFA_REGISTER,
5420 gen_rtx_SET (VOIDmode,
5421 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5422 gen_rtx_REG (Pmode,
5423 INCOMING_RETURN_ADDR_REGNUM)));
5424
5425 /* The window save event. */
5426 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5427
5428 /* The CFA is %fp, the hard frame pointer. */
5429 add_reg_note (insn, REG_CFA_DEF_CFA,
5430 plus_constant (Pmode, hard_frame_pointer_rtx,
5431 INCOMING_FRAME_SP_OFFSET));
5432
5433 return insn;
5434 }
5435
5436 /* Generate an increment for the stack pointer. */
5437
5438 static rtx
5439 gen_stack_pointer_inc (rtx increment)
5440 {
5441 return gen_rtx_SET (VOIDmode,
5442 stack_pointer_rtx,
5443 gen_rtx_PLUS (Pmode,
5444 stack_pointer_rtx,
5445 increment));
5446 }
5447
5448 /* Expand the function prologue. The prologue is responsible for reserving
5449 storage for the frame, saving the call-saved registers and loading the
5450 GOT register if needed. */
5451
5452 void
5453 sparc_expand_prologue (void)
5454 {
5455 HOST_WIDE_INT size;
5456 rtx_insn *insn;
5457
5458 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5459 on the final value of the flag means deferring the prologue/epilogue
5460 expansion until just before the second scheduling pass, which is too
5461 late to emit multiple epilogues or return insns.
5462
5463 Of course we are making the assumption that the value of the flag
5464 will not change between now and its final value. Of the three parts
5465 of the formula, only the last one can reasonably vary. Let's take a
5466 closer look, after assuming that the first two ones are set to true
5467 (otherwise the last value is effectively silenced).
5468
5469 If only_leaf_regs_used returns false, the global predicate will also
5470 be false so the actual frame size calculated below will be positive.
5471 As a consequence, the save_register_window insn will be emitted in
5472 the instruction stream; now this insn explicitly references %fp
5473 which is not a leaf register so only_leaf_regs_used will always
5474 return false subsequently.
5475
5476 If only_leaf_regs_used returns true, we hope that the subsequent
5477 optimization passes won't cause non-leaf registers to pop up. For
5478 example, the regrename pass has special provisions to not rename to
5479 non-leaf registers in a leaf function. */
5480 sparc_leaf_function_p
5481 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5482
5483 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5484
5485 if (flag_stack_usage_info)
5486 current_function_static_stack_size = size;
5487
5488 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5489 {
5490 if (crtl->is_leaf && !cfun->calls_alloca)
5491 {
5492 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5493 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5494 size - STACK_CHECK_PROTECT);
5495 }
5496 else if (size > 0)
5497 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5498 }
5499
5500 if (size == 0)
5501 ; /* do nothing. */
5502 else if (sparc_leaf_function_p)
5503 {
5504 rtx size_int_rtx = GEN_INT (-size);
5505
5506 if (size <= 4096)
5507 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5508 else if (size <= 8192)
5509 {
5510 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5511 RTX_FRAME_RELATED_P (insn) = 1;
5512
5513 /* %sp is still the CFA register. */
5514 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5515 }
5516 else
5517 {
5518 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5519 emit_move_insn (size_rtx, size_int_rtx);
5520 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5521 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5522 gen_stack_pointer_inc (size_int_rtx));
5523 }
5524
5525 RTX_FRAME_RELATED_P (insn) = 1;
5526 }
5527 else
5528 {
5529 rtx size_int_rtx = GEN_INT (-size);
5530
5531 if (size <= 4096)
5532 emit_window_save (size_int_rtx);
5533 else if (size <= 8192)
5534 {
5535 emit_window_save (GEN_INT (-4096));
5536
5537 /* %sp is not the CFA register anymore. */
5538 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5539
5540 /* Make sure no %fp-based store is issued until after the frame is
5541 established. The offset between the frame pointer and the stack
5542 pointer is calculated relative to the value of the stack pointer
5543 at the end of the function prologue, and moving instructions that
5544 access the stack via the frame pointer between the instructions
5545 that decrement the stack pointer could result in accessing the
5546 register window save area, which is volatile. */
5547 emit_insn (gen_frame_blockage ());
5548 }
5549 else
5550 {
5551 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5552 emit_move_insn (size_rtx, size_int_rtx);
5553 emit_window_save (size_rtx);
5554 }
5555 }
5556
5557 if (sparc_leaf_function_p)
5558 {
5559 sparc_frame_base_reg = stack_pointer_rtx;
5560 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5561 }
5562 else
5563 {
5564 sparc_frame_base_reg = hard_frame_pointer_rtx;
5565 sparc_frame_base_offset = SPARC_STACK_BIAS;
5566 }
5567
5568 if (sparc_n_global_fp_regs > 0)
5569 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5570 sparc_frame_base_offset
5571 - sparc_apparent_frame_size,
5572 SORR_SAVE);
5573
5574 /* Load the GOT register if needed. */
5575 if (crtl->uses_pic_offset_table)
5576 load_got_register ();
5577
5578 /* Advertise that the data calculated just above are now valid. */
5579 sparc_prologue_data_valid_p = true;
5580 }
5581
5582 /* Expand the function prologue. The prologue is responsible for reserving
5583 storage for the frame, saving the call-saved registers and loading the
5584 GOT register if needed. */
5585
5586 void
5587 sparc_flat_expand_prologue (void)
5588 {
5589 HOST_WIDE_INT size;
5590 rtx_insn *insn;
5591
5592 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5593
5594 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5595
5596 if (flag_stack_usage_info)
5597 current_function_static_stack_size = size;
5598
5599 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5600 {
5601 if (crtl->is_leaf && !cfun->calls_alloca)
5602 {
5603 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5604 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5605 size - STACK_CHECK_PROTECT);
5606 }
5607 else if (size > 0)
5608 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5609 }
5610
5611 if (sparc_save_local_in_regs_p)
5612 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5613 SORR_SAVE);
5614
5615 if (size == 0)
5616 ; /* do nothing. */
5617 else
5618 {
5619 rtx size_int_rtx, size_rtx;
5620
5621 size_rtx = size_int_rtx = GEN_INT (-size);
5622
5623 /* We establish the frame (i.e. decrement the stack pointer) first, even
5624 if we use a frame pointer, because we cannot clobber any call-saved
5625 registers, including the frame pointer, if we haven't created a new
5626 register save area, for the sake of compatibility with the ABI. */
5627 if (size <= 4096)
5628 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5629 else if (size <= 8192 && !frame_pointer_needed)
5630 {
5631 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5632 RTX_FRAME_RELATED_P (insn) = 1;
5633 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5634 }
5635 else
5636 {
5637 size_rtx = gen_rtx_REG (Pmode, 1);
5638 emit_move_insn (size_rtx, size_int_rtx);
5639 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5640 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5641 gen_stack_pointer_inc (size_int_rtx));
5642 }
5643 RTX_FRAME_RELATED_P (insn) = 1;
5644
5645 /* Ensure nothing is scheduled until after the frame is established. */
5646 emit_insn (gen_blockage ());
5647
5648 if (frame_pointer_needed)
5649 {
5650 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5651 gen_rtx_MINUS (Pmode,
5652 stack_pointer_rtx,
5653 size_rtx)));
5654 RTX_FRAME_RELATED_P (insn) = 1;
5655
5656 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5657 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5658 plus_constant (Pmode, stack_pointer_rtx,
5659 size)));
5660 }
5661
5662 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5663 {
5664 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5665 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5666
5667 insn = emit_move_insn (i7, o7);
5668 RTX_FRAME_RELATED_P (insn) = 1;
5669
5670 add_reg_note (insn, REG_CFA_REGISTER,
5671 gen_rtx_SET (VOIDmode, i7, o7));
5672
5673 /* Prevent this instruction from ever being considered dead,
5674 even if this function has no epilogue. */
5675 emit_use (i7);
5676 }
5677 }
5678
5679 if (frame_pointer_needed)
5680 {
5681 sparc_frame_base_reg = hard_frame_pointer_rtx;
5682 sparc_frame_base_offset = SPARC_STACK_BIAS;
5683 }
5684 else
5685 {
5686 sparc_frame_base_reg = stack_pointer_rtx;
5687 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5688 }
5689
5690 if (sparc_n_global_fp_regs > 0)
5691 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5692 sparc_frame_base_offset
5693 - sparc_apparent_frame_size,
5694 SORR_SAVE);
5695
5696 /* Load the GOT register if needed. */
5697 if (crtl->uses_pic_offset_table)
5698 load_got_register ();
5699
5700 /* Advertise that the data calculated just above are now valid. */
5701 sparc_prologue_data_valid_p = true;
5702 }
5703
5704 /* This function generates the assembly code for function entry, which boils
5705 down to emitting the necessary .register directives. */
5706
5707 static void
5708 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5709 {
5710 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5711 if (!TARGET_FLAT)
5712 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5713
5714 sparc_output_scratch_registers (file);
5715 }
5716
5717 /* Expand the function epilogue, either normal or part of a sibcall.
5718 We emit all the instructions except the return or the call. */
5719
5720 void
5721 sparc_expand_epilogue (bool for_eh)
5722 {
5723 HOST_WIDE_INT size = sparc_frame_size;
5724
5725 if (sparc_n_global_fp_regs > 0)
5726 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5727 sparc_frame_base_offset
5728 - sparc_apparent_frame_size,
5729 SORR_RESTORE);
5730
5731 if (size == 0 || for_eh)
5732 ; /* do nothing. */
5733 else if (sparc_leaf_function_p)
5734 {
5735 if (size <= 4096)
5736 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5737 else if (size <= 8192)
5738 {
5739 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5740 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5741 }
5742 else
5743 {
5744 rtx reg = gen_rtx_REG (Pmode, 1);
5745 emit_move_insn (reg, GEN_INT (size));
5746 emit_insn (gen_stack_pointer_inc (reg));
5747 }
5748 }
5749 }
5750
5751 /* Expand the function epilogue, either normal or part of a sibcall.
5752 We emit all the instructions except the return or the call. */
5753
5754 void
5755 sparc_flat_expand_epilogue (bool for_eh)
5756 {
5757 HOST_WIDE_INT size = sparc_frame_size;
5758
5759 if (sparc_n_global_fp_regs > 0)
5760 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5761 sparc_frame_base_offset
5762 - sparc_apparent_frame_size,
5763 SORR_RESTORE);
5764
5765 /* If we have a frame pointer, we'll need both to restore it before the
5766 frame is destroyed and use its current value in destroying the frame.
5767 Since we don't have an atomic way to do that in the flat window model,
5768 we save the current value into a temporary register (%g1). */
5769 if (frame_pointer_needed && !for_eh)
5770 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5771
5772 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5773 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5774 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5775
5776 if (sparc_save_local_in_regs_p)
5777 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5778 sparc_frame_base_offset,
5779 SORR_RESTORE);
5780
5781 if (size == 0 || for_eh)
5782 ; /* do nothing. */
5783 else if (frame_pointer_needed)
5784 {
5785 /* Make sure the frame is destroyed after everything else is done. */
5786 emit_insn (gen_blockage ());
5787
5788 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5789 }
5790 else
5791 {
5792 /* Likewise. */
5793 emit_insn (gen_blockage ());
5794
5795 if (size <= 4096)
5796 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5797 else if (size <= 8192)
5798 {
5799 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5800 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5801 }
5802 else
5803 {
5804 rtx reg = gen_rtx_REG (Pmode, 1);
5805 emit_move_insn (reg, GEN_INT (size));
5806 emit_insn (gen_stack_pointer_inc (reg));
5807 }
5808 }
5809 }
5810
5811 /* Return true if it is appropriate to emit `return' instructions in the
5812 body of a function. */
5813
5814 bool
5815 sparc_can_use_return_insn_p (void)
5816 {
5817 return sparc_prologue_data_valid_p
5818 && sparc_n_global_fp_regs == 0
5819 && TARGET_FLAT
5820 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5821 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5822 }
5823
5824 /* This function generates the assembly code for function exit. */
5825
5826 static void
5827 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5828 {
5829 /* If the last two instructions of a function are "call foo; dslot;"
5830 the return address might point to the first instruction in the next
5831 function and we have to output a dummy nop for the sake of sane
5832 backtraces in such cases. This is pointless for sibling calls since
5833 the return address is explicitly adjusted. */
5834
5835 rtx insn, last_real_insn;
5836
5837 insn = get_last_insn ();
5838
5839 last_real_insn = prev_real_insn (insn);
5840 if (last_real_insn
5841 && NONJUMP_INSN_P (last_real_insn)
5842 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5843 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5844
5845 if (last_real_insn
5846 && CALL_P (last_real_insn)
5847 && !SIBLING_CALL_P (last_real_insn))
5848 fputs("\tnop\n", file);
5849
5850 sparc_output_deferred_case_vectors ();
5851 }
5852
5853 /* Output a 'restore' instruction. */
5854
5855 static void
5856 output_restore (rtx pat)
5857 {
5858 rtx operands[3];
5859
5860 if (! pat)
5861 {
5862 fputs ("\t restore\n", asm_out_file);
5863 return;
5864 }
5865
5866 gcc_assert (GET_CODE (pat) == SET);
5867
5868 operands[0] = SET_DEST (pat);
5869 pat = SET_SRC (pat);
5870
5871 switch (GET_CODE (pat))
5872 {
5873 case PLUS:
5874 operands[1] = XEXP (pat, 0);
5875 operands[2] = XEXP (pat, 1);
5876 output_asm_insn (" restore %r1, %2, %Y0", operands);
5877 break;
5878 case LO_SUM:
5879 operands[1] = XEXP (pat, 0);
5880 operands[2] = XEXP (pat, 1);
5881 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5882 break;
5883 case ASHIFT:
5884 operands[1] = XEXP (pat, 0);
5885 gcc_assert (XEXP (pat, 1) == const1_rtx);
5886 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5887 break;
5888 default:
5889 operands[1] = pat;
5890 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5891 break;
5892 }
5893 }
5894
5895 /* Output a return. */
5896
5897 const char *
5898 output_return (rtx_insn *insn)
5899 {
5900 if (crtl->calls_eh_return)
5901 {
5902 /* If the function uses __builtin_eh_return, the eh_return
5903 machinery occupies the delay slot. */
5904 gcc_assert (!final_sequence);
5905
5906 if (flag_delayed_branch)
5907 {
5908 if (!TARGET_FLAT && TARGET_V9)
5909 fputs ("\treturn\t%i7+8\n", asm_out_file);
5910 else
5911 {
5912 if (!TARGET_FLAT)
5913 fputs ("\trestore\n", asm_out_file);
5914
5915 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5916 }
5917
5918 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5919 }
5920 else
5921 {
5922 if (!TARGET_FLAT)
5923 fputs ("\trestore\n", asm_out_file);
5924
5925 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5926 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5927 }
5928 }
5929 else if (sparc_leaf_function_p || TARGET_FLAT)
5930 {
5931 /* This is a leaf or flat function so we don't have to bother restoring
5932 the register window, which frees us from dealing with the convoluted
5933 semantics of restore/return. We simply output the jump to the
5934 return address and the insn in the delay slot (if any). */
5935
5936 return "jmp\t%%o7+%)%#";
5937 }
5938 else
5939 {
5940 /* This is a regular function so we have to restore the register window.
5941 We may have a pending insn for the delay slot, which will be either
5942 combined with the 'restore' instruction or put in the delay slot of
5943 the 'return' instruction. */
5944
5945 if (final_sequence)
5946 {
5947 rtx delay, pat;
5948
5949 delay = NEXT_INSN (insn);
5950 gcc_assert (delay);
5951
5952 pat = PATTERN (delay);
5953
5954 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5955 {
5956 epilogue_renumber (&pat, 0);
5957 return "return\t%%i7+%)%#";
5958 }
5959 else
5960 {
5961 output_asm_insn ("jmp\t%%i7+%)", NULL);
5962 output_restore (pat);
5963 PATTERN (delay) = gen_blockage ();
5964 INSN_CODE (delay) = -1;
5965 }
5966 }
5967 else
5968 {
5969 /* The delay slot is empty. */
5970 if (TARGET_V9)
5971 return "return\t%%i7+%)\n\t nop";
5972 else if (flag_delayed_branch)
5973 return "jmp\t%%i7+%)\n\t restore";
5974 else
5975 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5976 }
5977 }
5978
5979 return "";
5980 }
5981
5982 /* Output a sibling call. */
5983
5984 const char *
5985 output_sibcall (rtx_insn *insn, rtx call_operand)
5986 {
5987 rtx operands[1];
5988
5989 gcc_assert (flag_delayed_branch);
5990
5991 operands[0] = call_operand;
5992
5993 if (sparc_leaf_function_p || TARGET_FLAT)
5994 {
5995 /* This is a leaf or flat function so we don't have to bother restoring
5996 the register window. We simply output the jump to the function and
5997 the insn in the delay slot (if any). */
5998
5999 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6000
6001 if (final_sequence)
6002 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6003 operands);
6004 else
6005 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6006 it into branch if possible. */
6007 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6008 operands);
6009 }
6010 else
6011 {
6012 /* This is a regular function so we have to restore the register window.
6013 We may have a pending insn for the delay slot, which will be combined
6014 with the 'restore' instruction. */
6015
6016 output_asm_insn ("call\t%a0, 0", operands);
6017
6018 if (final_sequence)
6019 {
6020 rtx_insn *delay = NEXT_INSN (insn);
6021 gcc_assert (delay);
6022
6023 output_restore (PATTERN (delay));
6024
6025 PATTERN (delay) = gen_blockage ();
6026 INSN_CODE (delay) = -1;
6027 }
6028 else
6029 output_restore (NULL_RTX);
6030 }
6031
6032 return "";
6033 }
6034 \f
6035 /* Functions for handling argument passing.
6036
6037 For 32-bit, the first 6 args are normally in registers and the rest are
6038 pushed. Any arg that starts within the first 6 words is at least
6039 partially passed in a register unless its data type forbids.
6040
6041 For 64-bit, the argument registers are laid out as an array of 16 elements
6042 and arguments are added sequentially. The first 6 int args and up to the
6043 first 16 fp args (depending on size) are passed in regs.
6044
6045 Slot Stack Integral Float Float in structure Double Long Double
6046 ---- ----- -------- ----- ------------------ ------ -----------
6047 15 [SP+248] %f31 %f30,%f31 %d30
6048 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6049 13 [SP+232] %f27 %f26,%f27 %d26
6050 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6051 11 [SP+216] %f23 %f22,%f23 %d22
6052 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6053 9 [SP+200] %f19 %f18,%f19 %d18
6054 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6055 7 [SP+184] %f15 %f14,%f15 %d14
6056 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6057 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6058 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6059 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6060 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6061 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6062 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6063
6064 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6065
6066 Integral arguments are always passed as 64-bit quantities appropriately
6067 extended.
6068
6069 Passing of floating point values is handled as follows.
6070 If a prototype is in scope:
6071 If the value is in a named argument (i.e. not a stdarg function or a
6072 value not part of the `...') then the value is passed in the appropriate
6073 fp reg.
6074 If the value is part of the `...' and is passed in one of the first 6
6075 slots then the value is passed in the appropriate int reg.
6076 If the value is part of the `...' and is not passed in one of the first 6
6077 slots then the value is passed in memory.
6078 If a prototype is not in scope:
6079 If the value is one of the first 6 arguments the value is passed in the
6080 appropriate integer reg and the appropriate fp reg.
6081 If the value is not one of the first 6 arguments the value is passed in
6082 the appropriate fp reg and in memory.
6083
6084
6085 Summary of the calling conventions implemented by GCC on the SPARC:
6086
6087 32-bit ABI:
6088 size argument return value
6089
6090 small integer <4 int. reg. int. reg.
6091 word 4 int. reg. int. reg.
6092 double word 8 int. reg. int. reg.
6093
6094 _Complex small integer <8 int. reg. int. reg.
6095 _Complex word 8 int. reg. int. reg.
6096 _Complex double word 16 memory int. reg.
6097
6098 vector integer <=8 int. reg. FP reg.
6099 vector integer >8 memory memory
6100
6101 float 4 int. reg. FP reg.
6102 double 8 int. reg. FP reg.
6103 long double 16 memory memory
6104
6105 _Complex float 8 memory FP reg.
6106 _Complex double 16 memory FP reg.
6107 _Complex long double 32 memory FP reg.
6108
6109 vector float any memory memory
6110
6111 aggregate any memory memory
6112
6113
6114
6115 64-bit ABI:
6116 size argument return value
6117
6118 small integer <8 int. reg. int. reg.
6119 word 8 int. reg. int. reg.
6120 double word 16 int. reg. int. reg.
6121
6122 _Complex small integer <16 int. reg. int. reg.
6123 _Complex word 16 int. reg. int. reg.
6124 _Complex double word 32 memory int. reg.
6125
6126 vector integer <=16 FP reg. FP reg.
6127 vector integer 16<s<=32 memory FP reg.
6128 vector integer >32 memory memory
6129
6130 float 4 FP reg. FP reg.
6131 double 8 FP reg. FP reg.
6132 long double 16 FP reg. FP reg.
6133
6134 _Complex float 8 FP reg. FP reg.
6135 _Complex double 16 FP reg. FP reg.
6136 _Complex long double 32 memory FP reg.
6137
6138 vector float <=16 FP reg. FP reg.
6139 vector float 16<s<=32 memory FP reg.
6140 vector float >32 memory memory
6141
6142 aggregate <=16 reg. reg.
6143 aggregate 16<s<=32 memory reg.
6144 aggregate >32 memory memory
6145
6146
6147
6148 Note #1: complex floating-point types follow the extended SPARC ABIs as
6149 implemented by the Sun compiler.
6150
6151 Note #2: integral vector types follow the scalar floating-point types
6152 conventions to match what is implemented by the Sun VIS SDK.
6153
6154 Note #3: floating-point vector types follow the aggregate types
6155 conventions. */
6156
6157
6158 /* Maximum number of int regs for args. */
6159 #define SPARC_INT_ARG_MAX 6
6160 /* Maximum number of fp regs for args. */
6161 #define SPARC_FP_ARG_MAX 16
6162
6163 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6164
6165 /* Handle the INIT_CUMULATIVE_ARGS macro.
6166 Initialize a variable CUM of type CUMULATIVE_ARGS
6167 for a call to a function whose data type is FNTYPE.
6168 For a library call, FNTYPE is 0. */
6169
6170 void
6171 init_cumulative_args (struct sparc_args *cum, tree fntype,
6172 rtx libname ATTRIBUTE_UNUSED,
6173 tree fndecl ATTRIBUTE_UNUSED)
6174 {
6175 cum->words = 0;
6176 cum->prototype_p = fntype && prototype_p (fntype);
6177 cum->libcall_p = fntype == 0;
6178 }
6179
6180 /* Handle promotion of pointer and integer arguments. */
6181
6182 static enum machine_mode
6183 sparc_promote_function_mode (const_tree type,
6184 enum machine_mode mode,
6185 int *punsignedp,
6186 const_tree fntype ATTRIBUTE_UNUSED,
6187 int for_return ATTRIBUTE_UNUSED)
6188 {
6189 if (type != NULL_TREE && POINTER_TYPE_P (type))
6190 {
6191 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6192 return Pmode;
6193 }
6194
6195 /* Integral arguments are passed as full words, as per the ABI. */
6196 if (GET_MODE_CLASS (mode) == MODE_INT
6197 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6198 return word_mode;
6199
6200 return mode;
6201 }
6202
6203 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6204
6205 static bool
6206 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6207 {
6208 return TARGET_ARCH64 ? true : false;
6209 }
6210
6211 /* Scan the record type TYPE and return the following predicates:
6212 - INTREGS_P: the record contains at least one field or sub-field
6213 that is eligible for promotion in integer registers.
6214 - FP_REGS_P: the record contains at least one field or sub-field
6215 that is eligible for promotion in floating-point registers.
6216 - PACKED_P: the record contains at least one field that is packed.
6217
6218 Sub-fields are not taken into account for the PACKED_P predicate. */
6219
6220 static void
6221 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6222 int *packed_p)
6223 {
6224 tree field;
6225
6226 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6227 {
6228 if (TREE_CODE (field) == FIELD_DECL)
6229 {
6230 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6231 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6232 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6233 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6234 && TARGET_FPU)
6235 *fpregs_p = 1;
6236 else
6237 *intregs_p = 1;
6238
6239 if (packed_p && DECL_PACKED (field))
6240 *packed_p = 1;
6241 }
6242 }
6243 }
6244
6245 /* Compute the slot number to pass an argument in.
6246 Return the slot number or -1 if passing on the stack.
6247
6248 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6249 the preceding args and about the function being called.
6250 MODE is the argument's machine mode.
6251 TYPE is the data type of the argument (as a tree).
6252 This is null for libcalls where that information may
6253 not be available.
6254 NAMED is nonzero if this argument is a named parameter
6255 (otherwise it is an extra parameter matching an ellipsis).
6256 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6257 *PREGNO records the register number to use if scalar type.
6258 *PPADDING records the amount of padding needed in words. */
6259
6260 static int
6261 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
6262 const_tree type, bool named, bool incoming_p,
6263 int *pregno, int *ppadding)
6264 {
6265 int regbase = (incoming_p
6266 ? SPARC_INCOMING_INT_ARG_FIRST
6267 : SPARC_OUTGOING_INT_ARG_FIRST);
6268 int slotno = cum->words;
6269 enum mode_class mclass;
6270 int regno;
6271
6272 *ppadding = 0;
6273
6274 if (type && TREE_ADDRESSABLE (type))
6275 return -1;
6276
6277 if (TARGET_ARCH32
6278 && mode == BLKmode
6279 && type
6280 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6281 return -1;
6282
6283 /* For SPARC64, objects requiring 16-byte alignment get it. */
6284 if (TARGET_ARCH64
6285 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6286 && (slotno & 1) != 0)
6287 slotno++, *ppadding = 1;
6288
6289 mclass = GET_MODE_CLASS (mode);
6290 if (type && TREE_CODE (type) == VECTOR_TYPE)
6291 {
6292 /* Vector types deserve special treatment because they are
6293 polymorphic wrt their mode, depending upon whether VIS
6294 instructions are enabled. */
6295 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6296 {
6297 /* The SPARC port defines no floating-point vector modes. */
6298 gcc_assert (mode == BLKmode);
6299 }
6300 else
6301 {
6302 /* Integral vector types should either have a vector
6303 mode or an integral mode, because we are guaranteed
6304 by pass_by_reference that their size is not greater
6305 than 16 bytes and TImode is 16-byte wide. */
6306 gcc_assert (mode != BLKmode);
6307
6308 /* Vector integers are handled like floats according to
6309 the Sun VIS SDK. */
6310 mclass = MODE_FLOAT;
6311 }
6312 }
6313
6314 switch (mclass)
6315 {
6316 case MODE_FLOAT:
6317 case MODE_COMPLEX_FLOAT:
6318 case MODE_VECTOR_INT:
6319 if (TARGET_ARCH64 && TARGET_FPU && named)
6320 {
6321 if (slotno >= SPARC_FP_ARG_MAX)
6322 return -1;
6323 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6324 /* Arguments filling only one single FP register are
6325 right-justified in the outer double FP register. */
6326 if (GET_MODE_SIZE (mode) <= 4)
6327 regno++;
6328 break;
6329 }
6330 /* fallthrough */
6331
6332 case MODE_INT:
6333 case MODE_COMPLEX_INT:
6334 if (slotno >= SPARC_INT_ARG_MAX)
6335 return -1;
6336 regno = regbase + slotno;
6337 break;
6338
6339 case MODE_RANDOM:
6340 if (mode == VOIDmode)
6341 /* MODE is VOIDmode when generating the actual call. */
6342 return -1;
6343
6344 gcc_assert (mode == BLKmode);
6345
6346 if (TARGET_ARCH32
6347 || !type
6348 || (TREE_CODE (type) != VECTOR_TYPE
6349 && TREE_CODE (type) != RECORD_TYPE))
6350 {
6351 if (slotno >= SPARC_INT_ARG_MAX)
6352 return -1;
6353 regno = regbase + slotno;
6354 }
6355 else /* TARGET_ARCH64 && type */
6356 {
6357 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6358
6359 /* First see what kinds of registers we would need. */
6360 if (TREE_CODE (type) == VECTOR_TYPE)
6361 fpregs_p = 1;
6362 else
6363 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6364
6365 /* The ABI obviously doesn't specify how packed structures
6366 are passed. These are defined to be passed in int regs
6367 if possible, otherwise memory. */
6368 if (packed_p || !named)
6369 fpregs_p = 0, intregs_p = 1;
6370
6371 /* If all arg slots are filled, then must pass on stack. */
6372 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6373 return -1;
6374
6375 /* If there are only int args and all int arg slots are filled,
6376 then must pass on stack. */
6377 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6378 return -1;
6379
6380 /* Note that even if all int arg slots are filled, fp members may
6381 still be passed in regs if such regs are available.
6382 *PREGNO isn't set because there may be more than one, it's up
6383 to the caller to compute them. */
6384 return slotno;
6385 }
6386 break;
6387
6388 default :
6389 gcc_unreachable ();
6390 }
6391
6392 *pregno = regno;
6393 return slotno;
6394 }
6395
6396 /* Handle recursive register counting for structure field layout. */
6397
6398 struct function_arg_record_value_parms
6399 {
6400 rtx ret; /* return expression being built. */
6401 int slotno; /* slot number of the argument. */
6402 int named; /* whether the argument is named. */
6403 int regbase; /* regno of the base register. */
6404 int stack; /* 1 if part of the argument is on the stack. */
6405 int intoffset; /* offset of the first pending integer field. */
6406 unsigned int nregs; /* number of words passed in registers. */
6407 };
6408
6409 static void function_arg_record_value_3
6410 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6411 static void function_arg_record_value_2
6412 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6413 static void function_arg_record_value_1
6414 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6415 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6416 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6417
6418 /* A subroutine of function_arg_record_value. Traverse the structure
6419 recursively and determine how many registers will be required. */
6420
6421 static void
6422 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6423 struct function_arg_record_value_parms *parms,
6424 bool packed_p)
6425 {
6426 tree field;
6427
6428 /* We need to compute how many registers are needed so we can
6429 allocate the PARALLEL but before we can do that we need to know
6430 whether there are any packed fields. The ABI obviously doesn't
6431 specify how structures are passed in this case, so they are
6432 defined to be passed in int regs if possible, otherwise memory,
6433 regardless of whether there are fp values present. */
6434
6435 if (! packed_p)
6436 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6437 {
6438 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6439 {
6440 packed_p = true;
6441 break;
6442 }
6443 }
6444
6445 /* Compute how many registers we need. */
6446 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6447 {
6448 if (TREE_CODE (field) == FIELD_DECL)
6449 {
6450 HOST_WIDE_INT bitpos = startbitpos;
6451
6452 if (DECL_SIZE (field) != 0)
6453 {
6454 if (integer_zerop (DECL_SIZE (field)))
6455 continue;
6456
6457 if (tree_fits_uhwi_p (bit_position (field)))
6458 bitpos += int_bit_position (field);
6459 }
6460
6461 /* ??? FIXME: else assume zero offset. */
6462
6463 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6464 function_arg_record_value_1 (TREE_TYPE (field),
6465 bitpos,
6466 parms,
6467 packed_p);
6468 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6469 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6470 && TARGET_FPU
6471 && parms->named
6472 && ! packed_p)
6473 {
6474 if (parms->intoffset != -1)
6475 {
6476 unsigned int startbit, endbit;
6477 int intslots, this_slotno;
6478
6479 startbit = parms->intoffset & -BITS_PER_WORD;
6480 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6481
6482 intslots = (endbit - startbit) / BITS_PER_WORD;
6483 this_slotno = parms->slotno + parms->intoffset
6484 / BITS_PER_WORD;
6485
6486 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6487 {
6488 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6489 /* We need to pass this field on the stack. */
6490 parms->stack = 1;
6491 }
6492
6493 parms->nregs += intslots;
6494 parms->intoffset = -1;
6495 }
6496
6497 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6498 If it wasn't true we wouldn't be here. */
6499 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6500 && DECL_MODE (field) == BLKmode)
6501 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6502 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6503 parms->nregs += 2;
6504 else
6505 parms->nregs += 1;
6506 }
6507 else
6508 {
6509 if (parms->intoffset == -1)
6510 parms->intoffset = bitpos;
6511 }
6512 }
6513 }
6514 }
6515
6516 /* A subroutine of function_arg_record_value. Assign the bits of the
6517 structure between parms->intoffset and bitpos to integer registers. */
6518
6519 static void
6520 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6521 struct function_arg_record_value_parms *parms)
6522 {
6523 enum machine_mode mode;
6524 unsigned int regno;
6525 unsigned int startbit, endbit;
6526 int this_slotno, intslots, intoffset;
6527 rtx reg;
6528
6529 if (parms->intoffset == -1)
6530 return;
6531
6532 intoffset = parms->intoffset;
6533 parms->intoffset = -1;
6534
6535 startbit = intoffset & -BITS_PER_WORD;
6536 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6537 intslots = (endbit - startbit) / BITS_PER_WORD;
6538 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6539
6540 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6541 if (intslots <= 0)
6542 return;
6543
6544 /* If this is the trailing part of a word, only load that much into
6545 the register. Otherwise load the whole register. Note that in
6546 the latter case we may pick up unwanted bits. It's not a problem
6547 at the moment but may wish to revisit. */
6548
6549 if (intoffset % BITS_PER_WORD != 0)
6550 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6551 MODE_INT);
6552 else
6553 mode = word_mode;
6554
6555 intoffset /= BITS_PER_UNIT;
6556 do
6557 {
6558 regno = parms->regbase + this_slotno;
6559 reg = gen_rtx_REG (mode, regno);
6560 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6561 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6562
6563 this_slotno += 1;
6564 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6565 mode = word_mode;
6566 parms->nregs += 1;
6567 intslots -= 1;
6568 }
6569 while (intslots > 0);
6570 }
6571
6572 /* A subroutine of function_arg_record_value. Traverse the structure
6573 recursively and assign bits to floating point registers. Track which
6574 bits in between need integer registers; invoke function_arg_record_value_3
6575 to make that happen. */
6576
6577 static void
6578 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6579 struct function_arg_record_value_parms *parms,
6580 bool packed_p)
6581 {
6582 tree field;
6583
6584 if (! packed_p)
6585 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6586 {
6587 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6588 {
6589 packed_p = true;
6590 break;
6591 }
6592 }
6593
6594 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6595 {
6596 if (TREE_CODE (field) == FIELD_DECL)
6597 {
6598 HOST_WIDE_INT bitpos = startbitpos;
6599
6600 if (DECL_SIZE (field) != 0)
6601 {
6602 if (integer_zerop (DECL_SIZE (field)))
6603 continue;
6604
6605 if (tree_fits_uhwi_p (bit_position (field)))
6606 bitpos += int_bit_position (field);
6607 }
6608
6609 /* ??? FIXME: else assume zero offset. */
6610
6611 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6612 function_arg_record_value_2 (TREE_TYPE (field),
6613 bitpos,
6614 parms,
6615 packed_p);
6616 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6617 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6618 && TARGET_FPU
6619 && parms->named
6620 && ! packed_p)
6621 {
6622 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6623 int regno, nregs, pos;
6624 enum machine_mode mode = DECL_MODE (field);
6625 rtx reg;
6626
6627 function_arg_record_value_3 (bitpos, parms);
6628
6629 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6630 && mode == BLKmode)
6631 {
6632 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6633 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6634 }
6635 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6636 {
6637 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6638 nregs = 2;
6639 }
6640 else
6641 nregs = 1;
6642
6643 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6644 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6645 regno++;
6646 reg = gen_rtx_REG (mode, regno);
6647 pos = bitpos / BITS_PER_UNIT;
6648 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6649 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6650 parms->nregs += 1;
6651 while (--nregs > 0)
6652 {
6653 regno += GET_MODE_SIZE (mode) / 4;
6654 reg = gen_rtx_REG (mode, regno);
6655 pos += GET_MODE_SIZE (mode);
6656 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6657 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6658 parms->nregs += 1;
6659 }
6660 }
6661 else
6662 {
6663 if (parms->intoffset == -1)
6664 parms->intoffset = bitpos;
6665 }
6666 }
6667 }
6668 }
6669
6670 /* Used by function_arg and sparc_function_value_1 to implement the complex
6671 conventions of the 64-bit ABI for passing and returning structures.
6672 Return an expression valid as a return value for the FUNCTION_ARG
6673 and TARGET_FUNCTION_VALUE.
6674
6675 TYPE is the data type of the argument (as a tree).
6676 This is null for libcalls where that information may
6677 not be available.
6678 MODE is the argument's machine mode.
6679 SLOTNO is the index number of the argument's slot in the parameter array.
6680 NAMED is nonzero if this argument is a named parameter
6681 (otherwise it is an extra parameter matching an ellipsis).
6682 REGBASE is the regno of the base register for the parameter array. */
6683
6684 static rtx
6685 function_arg_record_value (const_tree type, enum machine_mode mode,
6686 int slotno, int named, int regbase)
6687 {
6688 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6689 struct function_arg_record_value_parms parms;
6690 unsigned int nregs;
6691
6692 parms.ret = NULL_RTX;
6693 parms.slotno = slotno;
6694 parms.named = named;
6695 parms.regbase = regbase;
6696 parms.stack = 0;
6697
6698 /* Compute how many registers we need. */
6699 parms.nregs = 0;
6700 parms.intoffset = 0;
6701 function_arg_record_value_1 (type, 0, &parms, false);
6702
6703 /* Take into account pending integer fields. */
6704 if (parms.intoffset != -1)
6705 {
6706 unsigned int startbit, endbit;
6707 int intslots, this_slotno;
6708
6709 startbit = parms.intoffset & -BITS_PER_WORD;
6710 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6711 intslots = (endbit - startbit) / BITS_PER_WORD;
6712 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6713
6714 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6715 {
6716 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6717 /* We need to pass this field on the stack. */
6718 parms.stack = 1;
6719 }
6720
6721 parms.nregs += intslots;
6722 }
6723 nregs = parms.nregs;
6724
6725 /* Allocate the vector and handle some annoying special cases. */
6726 if (nregs == 0)
6727 {
6728 /* ??? Empty structure has no value? Duh? */
6729 if (typesize <= 0)
6730 {
6731 /* Though there's nothing really to store, return a word register
6732 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6733 leads to breakage due to the fact that there are zero bytes to
6734 load. */
6735 return gen_rtx_REG (mode, regbase);
6736 }
6737 else
6738 {
6739 /* ??? C++ has structures with no fields, and yet a size. Give up
6740 for now and pass everything back in integer registers. */
6741 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6742 }
6743 if (nregs + slotno > SPARC_INT_ARG_MAX)
6744 nregs = SPARC_INT_ARG_MAX - slotno;
6745 }
6746 gcc_assert (nregs != 0);
6747
6748 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6749
6750 /* If at least one field must be passed on the stack, generate
6751 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6752 also be passed on the stack. We can't do much better because the
6753 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6754 of structures for which the fields passed exclusively in registers
6755 are not at the beginning of the structure. */
6756 if (parms.stack)
6757 XVECEXP (parms.ret, 0, 0)
6758 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6759
6760 /* Fill in the entries. */
6761 parms.nregs = 0;
6762 parms.intoffset = 0;
6763 function_arg_record_value_2 (type, 0, &parms, false);
6764 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6765
6766 gcc_assert (parms.nregs == nregs);
6767
6768 return parms.ret;
6769 }
6770
6771 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6772 of the 64-bit ABI for passing and returning unions.
6773 Return an expression valid as a return value for the FUNCTION_ARG
6774 and TARGET_FUNCTION_VALUE.
6775
6776 SIZE is the size in bytes of the union.
6777 MODE is the argument's machine mode.
6778 REGNO is the hard register the union will be passed in. */
6779
6780 static rtx
6781 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6782 int regno)
6783 {
6784 int nwords = ROUND_ADVANCE (size), i;
6785 rtx regs;
6786
6787 /* See comment in previous function for empty structures. */
6788 if (nwords == 0)
6789 return gen_rtx_REG (mode, regno);
6790
6791 if (slotno == SPARC_INT_ARG_MAX - 1)
6792 nwords = 1;
6793
6794 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6795
6796 for (i = 0; i < nwords; i++)
6797 {
6798 /* Unions are passed left-justified. */
6799 XVECEXP (regs, 0, i)
6800 = gen_rtx_EXPR_LIST (VOIDmode,
6801 gen_rtx_REG (word_mode, regno),
6802 GEN_INT (UNITS_PER_WORD * i));
6803 regno++;
6804 }
6805
6806 return regs;
6807 }
6808
6809 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6810 for passing and returning large (BLKmode) vectors.
6811 Return an expression valid as a return value for the FUNCTION_ARG
6812 and TARGET_FUNCTION_VALUE.
6813
6814 SIZE is the size in bytes of the vector (at least 8 bytes).
6815 REGNO is the FP hard register the vector will be passed in. */
6816
6817 static rtx
6818 function_arg_vector_value (int size, int regno)
6819 {
6820 int i, nregs = size / 8;
6821 rtx regs;
6822
6823 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6824
6825 for (i = 0; i < nregs; i++)
6826 {
6827 XVECEXP (regs, 0, i)
6828 = gen_rtx_EXPR_LIST (VOIDmode,
6829 gen_rtx_REG (DImode, regno + 2*i),
6830 GEN_INT (i*8));
6831 }
6832
6833 return regs;
6834 }
6835
6836 /* Determine where to put an argument to a function.
6837 Value is zero to push the argument on the stack,
6838 or a hard register in which to store the argument.
6839
6840 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6841 the preceding args and about the function being called.
6842 MODE is the argument's machine mode.
6843 TYPE is the data type of the argument (as a tree).
6844 This is null for libcalls where that information may
6845 not be available.
6846 NAMED is true if this argument is a named parameter
6847 (otherwise it is an extra parameter matching an ellipsis).
6848 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6849 TARGET_FUNCTION_INCOMING_ARG. */
6850
6851 static rtx
6852 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6853 const_tree type, bool named, bool incoming_p)
6854 {
6855 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6856
6857 int regbase = (incoming_p
6858 ? SPARC_INCOMING_INT_ARG_FIRST
6859 : SPARC_OUTGOING_INT_ARG_FIRST);
6860 int slotno, regno, padding;
6861 enum mode_class mclass = GET_MODE_CLASS (mode);
6862
6863 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6864 &regno, &padding);
6865 if (slotno == -1)
6866 return 0;
6867
6868 /* Vector types deserve special treatment because they are polymorphic wrt
6869 their mode, depending upon whether VIS instructions are enabled. */
6870 if (type && TREE_CODE (type) == VECTOR_TYPE)
6871 {
6872 HOST_WIDE_INT size = int_size_in_bytes (type);
6873 gcc_assert ((TARGET_ARCH32 && size <= 8)
6874 || (TARGET_ARCH64 && size <= 16));
6875
6876 if (mode == BLKmode)
6877 return function_arg_vector_value (size,
6878 SPARC_FP_ARG_FIRST + 2*slotno);
6879 else
6880 mclass = MODE_FLOAT;
6881 }
6882
6883 if (TARGET_ARCH32)
6884 return gen_rtx_REG (mode, regno);
6885
6886 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6887 and are promoted to registers if possible. */
6888 if (type && TREE_CODE (type) == RECORD_TYPE)
6889 {
6890 HOST_WIDE_INT size = int_size_in_bytes (type);
6891 gcc_assert (size <= 16);
6892
6893 return function_arg_record_value (type, mode, slotno, named, regbase);
6894 }
6895
6896 /* Unions up to 16 bytes in size are passed in integer registers. */
6897 else if (type && TREE_CODE (type) == UNION_TYPE)
6898 {
6899 HOST_WIDE_INT size = int_size_in_bytes (type);
6900 gcc_assert (size <= 16);
6901
6902 return function_arg_union_value (size, mode, slotno, regno);
6903 }
6904
6905 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6906 but also have the slot allocated for them.
6907 If no prototype is in scope fp values in register slots get passed
6908 in two places, either fp regs and int regs or fp regs and memory. */
6909 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6910 && SPARC_FP_REG_P (regno))
6911 {
6912 rtx reg = gen_rtx_REG (mode, regno);
6913 if (cum->prototype_p || cum->libcall_p)
6914 {
6915 /* "* 2" because fp reg numbers are recorded in 4 byte
6916 quantities. */
6917 #if 0
6918 /* ??? This will cause the value to be passed in the fp reg and
6919 in the stack. When a prototype exists we want to pass the
6920 value in the reg but reserve space on the stack. That's an
6921 optimization, and is deferred [for a bit]. */
6922 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6923 return gen_rtx_PARALLEL (mode,
6924 gen_rtvec (2,
6925 gen_rtx_EXPR_LIST (VOIDmode,
6926 NULL_RTX, const0_rtx),
6927 gen_rtx_EXPR_LIST (VOIDmode,
6928 reg, const0_rtx)));
6929 else
6930 #else
6931 /* ??? It seems that passing back a register even when past
6932 the area declared by REG_PARM_STACK_SPACE will allocate
6933 space appropriately, and will not copy the data onto the
6934 stack, exactly as we desire.
6935
6936 This is due to locate_and_pad_parm being called in
6937 expand_call whenever reg_parm_stack_space > 0, which
6938 while beneficial to our example here, would seem to be
6939 in error from what had been intended. Ho hum... -- r~ */
6940 #endif
6941 return reg;
6942 }
6943 else
6944 {
6945 rtx v0, v1;
6946
6947 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6948 {
6949 int intreg;
6950
6951 /* On incoming, we don't need to know that the value
6952 is passed in %f0 and %i0, and it confuses other parts
6953 causing needless spillage even on the simplest cases. */
6954 if (incoming_p)
6955 return reg;
6956
6957 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6958 + (regno - SPARC_FP_ARG_FIRST) / 2);
6959
6960 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6961 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6962 const0_rtx);
6963 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6964 }
6965 else
6966 {
6967 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6968 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6969 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6970 }
6971 }
6972 }
6973
6974 /* All other aggregate types are passed in an integer register in a mode
6975 corresponding to the size of the type. */
6976 else if (type && AGGREGATE_TYPE_P (type))
6977 {
6978 HOST_WIDE_INT size = int_size_in_bytes (type);
6979 gcc_assert (size <= 16);
6980
6981 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6982 }
6983
6984 return gen_rtx_REG (mode, regno);
6985 }
6986
6987 /* Handle the TARGET_FUNCTION_ARG target hook. */
6988
6989 static rtx
6990 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6991 const_tree type, bool named)
6992 {
6993 return sparc_function_arg_1 (cum, mode, type, named, false);
6994 }
6995
6996 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6997
6998 static rtx
6999 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
7000 const_tree type, bool named)
7001 {
7002 return sparc_function_arg_1 (cum, mode, type, named, true);
7003 }
7004
7005 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7006
7007 static unsigned int
7008 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
7009 {
7010 return ((TARGET_ARCH64
7011 && (GET_MODE_ALIGNMENT (mode) == 128
7012 || (type && TYPE_ALIGN (type) == 128)))
7013 ? 128
7014 : PARM_BOUNDARY);
7015 }
7016
7017 /* For an arg passed partly in registers and partly in memory,
7018 this is the number of bytes of registers used.
7019 For args passed entirely in registers or entirely in memory, zero.
7020
7021 Any arg that starts in the first 6 regs but won't entirely fit in them
7022 needs partial registers on v8. On v9, structures with integer
7023 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7024 values that begin in the last fp reg [where "last fp reg" varies with the
7025 mode] will be split between that reg and memory. */
7026
7027 static int
7028 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
7029 tree type, bool named)
7030 {
7031 int slotno, regno, padding;
7032
7033 /* We pass false for incoming_p here, it doesn't matter. */
7034 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7035 false, &regno, &padding);
7036
7037 if (slotno == -1)
7038 return 0;
7039
7040 if (TARGET_ARCH32)
7041 {
7042 if ((slotno + (mode == BLKmode
7043 ? ROUND_ADVANCE (int_size_in_bytes (type))
7044 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
7045 > SPARC_INT_ARG_MAX)
7046 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7047 }
7048 else
7049 {
7050 /* We are guaranteed by pass_by_reference that the size of the
7051 argument is not greater than 16 bytes, so we only need to return
7052 one word if the argument is partially passed in registers. */
7053
7054 if (type && AGGREGATE_TYPE_P (type))
7055 {
7056 int size = int_size_in_bytes (type);
7057
7058 if (size > UNITS_PER_WORD
7059 && slotno == SPARC_INT_ARG_MAX - 1)
7060 return UNITS_PER_WORD;
7061 }
7062 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7063 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7064 && ! (TARGET_FPU && named)))
7065 {
7066 /* The complex types are passed as packed types. */
7067 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7068 && slotno == SPARC_INT_ARG_MAX - 1)
7069 return UNITS_PER_WORD;
7070 }
7071 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7072 {
7073 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7074 > SPARC_FP_ARG_MAX)
7075 return UNITS_PER_WORD;
7076 }
7077 }
7078
7079 return 0;
7080 }
7081
7082 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7083 Specify whether to pass the argument by reference. */
7084
7085 static bool
7086 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7087 enum machine_mode mode, const_tree type,
7088 bool named ATTRIBUTE_UNUSED)
7089 {
7090 if (TARGET_ARCH32)
7091 /* Original SPARC 32-bit ABI says that structures and unions,
7092 and quad-precision floats are passed by reference. For Pascal,
7093 also pass arrays by reference. All other base types are passed
7094 in registers.
7095
7096 Extended ABI (as implemented by the Sun compiler) says that all
7097 complex floats are passed by reference. Pass complex integers
7098 in registers up to 8 bytes. More generally, enforce the 2-word
7099 cap for passing arguments in registers.
7100
7101 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7102 integers are passed like floats of the same size, that is in
7103 registers up to 8 bytes. Pass all vector floats by reference
7104 like structure and unions. */
7105 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7106 || mode == SCmode
7107 /* Catch CDImode, TFmode, DCmode and TCmode. */
7108 || GET_MODE_SIZE (mode) > 8
7109 || (type
7110 && TREE_CODE (type) == VECTOR_TYPE
7111 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7112 else
7113 /* Original SPARC 64-bit ABI says that structures and unions
7114 smaller than 16 bytes are passed in registers, as well as
7115 all other base types.
7116
7117 Extended ABI (as implemented by the Sun compiler) says that
7118 complex floats are passed in registers up to 16 bytes. Pass
7119 all complex integers in registers up to 16 bytes. More generally,
7120 enforce the 2-word cap for passing arguments in registers.
7121
7122 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7123 integers are passed like floats of the same size, that is in
7124 registers (up to 16 bytes). Pass all vector floats like structure
7125 and unions. */
7126 return ((type
7127 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7128 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7129 /* Catch CTImode and TCmode. */
7130 || GET_MODE_SIZE (mode) > 16);
7131 }
7132
7133 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7134 Update the data in CUM to advance over an argument
7135 of mode MODE and data type TYPE.
7136 TYPE is null for libcalls where that information may not be available. */
7137
7138 static void
7139 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7140 const_tree type, bool named)
7141 {
7142 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7143 int regno, padding;
7144
7145 /* We pass false for incoming_p here, it doesn't matter. */
7146 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7147
7148 /* If argument requires leading padding, add it. */
7149 cum->words += padding;
7150
7151 if (TARGET_ARCH32)
7152 {
7153 cum->words += (mode != BLKmode
7154 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7155 : ROUND_ADVANCE (int_size_in_bytes (type)));
7156 }
7157 else
7158 {
7159 if (type && AGGREGATE_TYPE_P (type))
7160 {
7161 int size = int_size_in_bytes (type);
7162
7163 if (size <= 8)
7164 ++cum->words;
7165 else if (size <= 16)
7166 cum->words += 2;
7167 else /* passed by reference */
7168 ++cum->words;
7169 }
7170 else
7171 {
7172 cum->words += (mode != BLKmode
7173 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7174 : ROUND_ADVANCE (int_size_in_bytes (type)));
7175 }
7176 }
7177 }
7178
7179 /* Handle the FUNCTION_ARG_PADDING macro.
7180 For the 64 bit ABI structs are always stored left shifted in their
7181 argument slot. */
7182
7183 enum direction
7184 function_arg_padding (enum machine_mode mode, const_tree type)
7185 {
7186 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7187 return upward;
7188
7189 /* Fall back to the default. */
7190 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7191 }
7192
7193 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7194 Specify whether to return the return value in memory. */
7195
7196 static bool
7197 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7198 {
7199 if (TARGET_ARCH32)
7200 /* Original SPARC 32-bit ABI says that structures and unions,
7201 and quad-precision floats are returned in memory. All other
7202 base types are returned in registers.
7203
7204 Extended ABI (as implemented by the Sun compiler) says that
7205 all complex floats are returned in registers (8 FP registers
7206 at most for '_Complex long double'). Return all complex integers
7207 in registers (4 at most for '_Complex long long').
7208
7209 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7210 integers are returned like floats of the same size, that is in
7211 registers up to 8 bytes and in memory otherwise. Return all
7212 vector floats in memory like structure and unions; note that
7213 they always have BLKmode like the latter. */
7214 return (TYPE_MODE (type) == BLKmode
7215 || TYPE_MODE (type) == TFmode
7216 || (TREE_CODE (type) == VECTOR_TYPE
7217 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7218 else
7219 /* Original SPARC 64-bit ABI says that structures and unions
7220 smaller than 32 bytes are returned in registers, as well as
7221 all other base types.
7222
7223 Extended ABI (as implemented by the Sun compiler) says that all
7224 complex floats are returned in registers (8 FP registers at most
7225 for '_Complex long double'). Return all complex integers in
7226 registers (4 at most for '_Complex TItype').
7227
7228 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7229 integers are returned like floats of the same size, that is in
7230 registers. Return all vector floats like structure and unions;
7231 note that they always have BLKmode like the latter. */
7232 return (TYPE_MODE (type) == BLKmode
7233 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7234 }
7235
7236 /* Handle the TARGET_STRUCT_VALUE target hook.
7237 Return where to find the structure return value address. */
7238
7239 static rtx
7240 sparc_struct_value_rtx (tree fndecl, int incoming)
7241 {
7242 if (TARGET_ARCH64)
7243 return 0;
7244 else
7245 {
7246 rtx mem;
7247
7248 if (incoming)
7249 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7250 STRUCT_VALUE_OFFSET));
7251 else
7252 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7253 STRUCT_VALUE_OFFSET));
7254
7255 /* Only follow the SPARC ABI for fixed-size structure returns.
7256 Variable size structure returns are handled per the normal
7257 procedures in GCC. This is enabled by -mstd-struct-return */
7258 if (incoming == 2
7259 && sparc_std_struct_return
7260 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7261 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7262 {
7263 /* We must check and adjust the return address, as it is
7264 optional as to whether the return object is really
7265 provided. */
7266 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7267 rtx scratch = gen_reg_rtx (SImode);
7268 rtx endlab = gen_label_rtx ();
7269
7270 /* Calculate the return object size */
7271 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7272 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7273 /* Construct a temporary return value */
7274 rtx temp_val
7275 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7276
7277 /* Implement SPARC 32-bit psABI callee return struct checking:
7278
7279 Fetch the instruction where we will return to and see if
7280 it's an unimp instruction (the most significant 10 bits
7281 will be zero). */
7282 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7283 plus_constant (Pmode,
7284 ret_reg, 8)));
7285 /* Assume the size is valid and pre-adjust */
7286 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7287 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7288 0, endlab);
7289 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7290 /* Write the address of the memory pointed to by temp_val into
7291 the memory pointed to by mem */
7292 emit_move_insn (mem, XEXP (temp_val, 0));
7293 emit_label (endlab);
7294 }
7295
7296 return mem;
7297 }
7298 }
7299
7300 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7301 For v9, function return values are subject to the same rules as arguments,
7302 except that up to 32 bytes may be returned in registers. */
7303
7304 static rtx
7305 sparc_function_value_1 (const_tree type, enum machine_mode mode,
7306 bool outgoing)
7307 {
7308 /* Beware that the two values are swapped here wrt function_arg. */
7309 int regbase = (outgoing
7310 ? SPARC_INCOMING_INT_ARG_FIRST
7311 : SPARC_OUTGOING_INT_ARG_FIRST);
7312 enum mode_class mclass = GET_MODE_CLASS (mode);
7313 int regno;
7314
7315 /* Vector types deserve special treatment because they are polymorphic wrt
7316 their mode, depending upon whether VIS instructions are enabled. */
7317 if (type && TREE_CODE (type) == VECTOR_TYPE)
7318 {
7319 HOST_WIDE_INT size = int_size_in_bytes (type);
7320 gcc_assert ((TARGET_ARCH32 && size <= 8)
7321 || (TARGET_ARCH64 && size <= 32));
7322
7323 if (mode == BLKmode)
7324 return function_arg_vector_value (size,
7325 SPARC_FP_ARG_FIRST);
7326 else
7327 mclass = MODE_FLOAT;
7328 }
7329
7330 if (TARGET_ARCH64 && type)
7331 {
7332 /* Structures up to 32 bytes in size are returned in registers. */
7333 if (TREE_CODE (type) == RECORD_TYPE)
7334 {
7335 HOST_WIDE_INT size = int_size_in_bytes (type);
7336 gcc_assert (size <= 32);
7337
7338 return function_arg_record_value (type, mode, 0, 1, regbase);
7339 }
7340
7341 /* Unions up to 32 bytes in size are returned in integer registers. */
7342 else if (TREE_CODE (type) == UNION_TYPE)
7343 {
7344 HOST_WIDE_INT size = int_size_in_bytes (type);
7345 gcc_assert (size <= 32);
7346
7347 return function_arg_union_value (size, mode, 0, regbase);
7348 }
7349
7350 /* Objects that require it are returned in FP registers. */
7351 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7352 ;
7353
7354 /* All other aggregate types are returned in an integer register in a
7355 mode corresponding to the size of the type. */
7356 else if (AGGREGATE_TYPE_P (type))
7357 {
7358 /* All other aggregate types are passed in an integer register
7359 in a mode corresponding to the size of the type. */
7360 HOST_WIDE_INT size = int_size_in_bytes (type);
7361 gcc_assert (size <= 32);
7362
7363 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7364
7365 /* ??? We probably should have made the same ABI change in
7366 3.4.0 as the one we made for unions. The latter was
7367 required by the SCD though, while the former is not
7368 specified, so we favored compatibility and efficiency.
7369
7370 Now we're stuck for aggregates larger than 16 bytes,
7371 because OImode vanished in the meantime. Let's not
7372 try to be unduly clever, and simply follow the ABI
7373 for unions in that case. */
7374 if (mode == BLKmode)
7375 return function_arg_union_value (size, mode, 0, regbase);
7376 else
7377 mclass = MODE_INT;
7378 }
7379
7380 /* We should only have pointer and integer types at this point. This
7381 must match sparc_promote_function_mode. */
7382 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7383 mode = word_mode;
7384 }
7385
7386 /* We should only have pointer and integer types at this point. This must
7387 match sparc_promote_function_mode. */
7388 else if (TARGET_ARCH32
7389 && mclass == MODE_INT
7390 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7391 mode = word_mode;
7392
7393 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7394 regno = SPARC_FP_ARG_FIRST;
7395 else
7396 regno = regbase;
7397
7398 return gen_rtx_REG (mode, regno);
7399 }
7400
7401 /* Handle TARGET_FUNCTION_VALUE.
7402 On the SPARC, the value is found in the first "output" register, but the
7403 called function leaves it in the first "input" register. */
7404
7405 static rtx
7406 sparc_function_value (const_tree valtype,
7407 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7408 bool outgoing)
7409 {
7410 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7411 }
7412
7413 /* Handle TARGET_LIBCALL_VALUE. */
7414
7415 static rtx
7416 sparc_libcall_value (enum machine_mode mode,
7417 const_rtx fun ATTRIBUTE_UNUSED)
7418 {
7419 return sparc_function_value_1 (NULL_TREE, mode, false);
7420 }
7421
7422 /* Handle FUNCTION_VALUE_REGNO_P.
7423 On the SPARC, the first "output" reg is used for integer values, and the
7424 first floating point register is used for floating point values. */
7425
7426 static bool
7427 sparc_function_value_regno_p (const unsigned int regno)
7428 {
7429 return (regno == 8 || regno == 32);
7430 }
7431
7432 /* Do what is necessary for `va_start'. We look at the current function
7433 to determine if stdarg or varargs is used and return the address of
7434 the first unnamed parameter. */
7435
7436 static rtx
7437 sparc_builtin_saveregs (void)
7438 {
7439 int first_reg = crtl->args.info.words;
7440 rtx address;
7441 int regno;
7442
7443 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7444 emit_move_insn (gen_rtx_MEM (word_mode,
7445 gen_rtx_PLUS (Pmode,
7446 frame_pointer_rtx,
7447 GEN_INT (FIRST_PARM_OFFSET (0)
7448 + (UNITS_PER_WORD
7449 * regno)))),
7450 gen_rtx_REG (word_mode,
7451 SPARC_INCOMING_INT_ARG_FIRST + regno));
7452
7453 address = gen_rtx_PLUS (Pmode,
7454 frame_pointer_rtx,
7455 GEN_INT (FIRST_PARM_OFFSET (0)
7456 + UNITS_PER_WORD * first_reg));
7457
7458 return address;
7459 }
7460
7461 /* Implement `va_start' for stdarg. */
7462
7463 static void
7464 sparc_va_start (tree valist, rtx nextarg)
7465 {
7466 nextarg = expand_builtin_saveregs ();
7467 std_expand_builtin_va_start (valist, nextarg);
7468 }
7469
7470 /* Implement `va_arg' for stdarg. */
7471
7472 static tree
7473 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7474 gimple_seq *post_p)
7475 {
7476 HOST_WIDE_INT size, rsize, align;
7477 tree addr, incr;
7478 bool indirect;
7479 tree ptrtype = build_pointer_type (type);
7480
7481 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7482 {
7483 indirect = true;
7484 size = rsize = UNITS_PER_WORD;
7485 align = 0;
7486 }
7487 else
7488 {
7489 indirect = false;
7490 size = int_size_in_bytes (type);
7491 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7492 align = 0;
7493
7494 if (TARGET_ARCH64)
7495 {
7496 /* For SPARC64, objects requiring 16-byte alignment get it. */
7497 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7498 align = 2 * UNITS_PER_WORD;
7499
7500 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7501 are left-justified in their slots. */
7502 if (AGGREGATE_TYPE_P (type))
7503 {
7504 if (size == 0)
7505 size = rsize = UNITS_PER_WORD;
7506 else
7507 size = rsize;
7508 }
7509 }
7510 }
7511
7512 incr = valist;
7513 if (align)
7514 {
7515 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7516 incr = fold_convert (sizetype, incr);
7517 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7518 size_int (-align));
7519 incr = fold_convert (ptr_type_node, incr);
7520 }
7521
7522 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7523 addr = incr;
7524
7525 if (BYTES_BIG_ENDIAN && size < rsize)
7526 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7527
7528 if (indirect)
7529 {
7530 addr = fold_convert (build_pointer_type (ptrtype), addr);
7531 addr = build_va_arg_indirect_ref (addr);
7532 }
7533
7534 /* If the address isn't aligned properly for the type, we need a temporary.
7535 FIXME: This is inefficient, usually we can do this in registers. */
7536 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7537 {
7538 tree tmp = create_tmp_var (type, "va_arg_tmp");
7539 tree dest_addr = build_fold_addr_expr (tmp);
7540 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7541 3, dest_addr, addr, size_int (rsize));
7542 TREE_ADDRESSABLE (tmp) = 1;
7543 gimplify_and_add (copy, pre_p);
7544 addr = dest_addr;
7545 }
7546
7547 else
7548 addr = fold_convert (ptrtype, addr);
7549
7550 incr = fold_build_pointer_plus_hwi (incr, rsize);
7551 gimplify_assign (valist, incr, post_p);
7552
7553 return build_va_arg_indirect_ref (addr);
7554 }
7555 \f
7556 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7557 Specify whether the vector mode is supported by the hardware. */
7558
7559 static bool
7560 sparc_vector_mode_supported_p (enum machine_mode mode)
7561 {
7562 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7563 }
7564 \f
7565 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7566
7567 static enum machine_mode
7568 sparc_preferred_simd_mode (enum machine_mode mode)
7569 {
7570 if (TARGET_VIS)
7571 switch (mode)
7572 {
7573 case SImode:
7574 return V2SImode;
7575 case HImode:
7576 return V4HImode;
7577 case QImode:
7578 return V8QImode;
7579
7580 default:;
7581 }
7582
7583 return word_mode;
7584 }
7585 \f
7586 /* Return the string to output an unconditional branch to LABEL, which is
7587 the operand number of the label.
7588
7589 DEST is the destination insn (i.e. the label), INSN is the source. */
7590
7591 const char *
7592 output_ubranch (rtx dest, rtx_insn *insn)
7593 {
7594 static char string[64];
7595 bool v9_form = false;
7596 int delta;
7597 char *p;
7598
7599 /* Even if we are trying to use cbcond for this, evaluate
7600 whether we can use V9 branches as our backup plan. */
7601
7602 delta = 5000000;
7603 if (INSN_ADDRESSES_SET_P ())
7604 delta = (INSN_ADDRESSES (INSN_UID (dest))
7605 - INSN_ADDRESSES (INSN_UID (insn)));
7606
7607 /* Leave some instructions for "slop". */
7608 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7609 v9_form = true;
7610
7611 if (TARGET_CBCOND)
7612 {
7613 bool emit_nop = emit_cbcond_nop (insn);
7614 bool far = false;
7615 const char *rval;
7616
7617 if (delta < -500 || delta > 500)
7618 far = true;
7619
7620 if (far)
7621 {
7622 if (v9_form)
7623 rval = "ba,a,pt\t%%xcc, %l0";
7624 else
7625 rval = "b,a\t%l0";
7626 }
7627 else
7628 {
7629 if (emit_nop)
7630 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7631 else
7632 rval = "cwbe\t%%g0, %%g0, %l0";
7633 }
7634 return rval;
7635 }
7636
7637 if (v9_form)
7638 strcpy (string, "ba%*,pt\t%%xcc, ");
7639 else
7640 strcpy (string, "b%*\t");
7641
7642 p = strchr (string, '\0');
7643 *p++ = '%';
7644 *p++ = 'l';
7645 *p++ = '0';
7646 *p++ = '%';
7647 *p++ = '(';
7648 *p = '\0';
7649
7650 return string;
7651 }
7652
7653 /* Return the string to output a conditional branch to LABEL, which is
7654 the operand number of the label. OP is the conditional expression.
7655 XEXP (OP, 0) is assumed to be a condition code register (integer or
7656 floating point) and its mode specifies what kind of comparison we made.
7657
7658 DEST is the destination insn (i.e. the label), INSN is the source.
7659
7660 REVERSED is nonzero if we should reverse the sense of the comparison.
7661
7662 ANNUL is nonzero if we should generate an annulling branch. */
7663
7664 const char *
7665 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7666 rtx_insn *insn)
7667 {
7668 static char string[64];
7669 enum rtx_code code = GET_CODE (op);
7670 rtx cc_reg = XEXP (op, 0);
7671 enum machine_mode mode = GET_MODE (cc_reg);
7672 const char *labelno, *branch;
7673 int spaces = 8, far;
7674 char *p;
7675
7676 /* v9 branches are limited to +-1MB. If it is too far away,
7677 change
7678
7679 bne,pt %xcc, .LC30
7680
7681 to
7682
7683 be,pn %xcc, .+12
7684 nop
7685 ba .LC30
7686
7687 and
7688
7689 fbne,a,pn %fcc2, .LC29
7690
7691 to
7692
7693 fbe,pt %fcc2, .+16
7694 nop
7695 ba .LC29 */
7696
7697 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7698 if (reversed ^ far)
7699 {
7700 /* Reversal of FP compares takes care -- an ordered compare
7701 becomes an unordered compare and vice versa. */
7702 if (mode == CCFPmode || mode == CCFPEmode)
7703 code = reverse_condition_maybe_unordered (code);
7704 else
7705 code = reverse_condition (code);
7706 }
7707
7708 /* Start by writing the branch condition. */
7709 if (mode == CCFPmode || mode == CCFPEmode)
7710 {
7711 switch (code)
7712 {
7713 case NE:
7714 branch = "fbne";
7715 break;
7716 case EQ:
7717 branch = "fbe";
7718 break;
7719 case GE:
7720 branch = "fbge";
7721 break;
7722 case GT:
7723 branch = "fbg";
7724 break;
7725 case LE:
7726 branch = "fble";
7727 break;
7728 case LT:
7729 branch = "fbl";
7730 break;
7731 case UNORDERED:
7732 branch = "fbu";
7733 break;
7734 case ORDERED:
7735 branch = "fbo";
7736 break;
7737 case UNGT:
7738 branch = "fbug";
7739 break;
7740 case UNLT:
7741 branch = "fbul";
7742 break;
7743 case UNEQ:
7744 branch = "fbue";
7745 break;
7746 case UNGE:
7747 branch = "fbuge";
7748 break;
7749 case UNLE:
7750 branch = "fbule";
7751 break;
7752 case LTGT:
7753 branch = "fblg";
7754 break;
7755
7756 default:
7757 gcc_unreachable ();
7758 }
7759
7760 /* ??? !v9: FP branches cannot be preceded by another floating point
7761 insn. Because there is currently no concept of pre-delay slots,
7762 we can fix this only by always emitting a nop before a floating
7763 point branch. */
7764
7765 string[0] = '\0';
7766 if (! TARGET_V9)
7767 strcpy (string, "nop\n\t");
7768 strcat (string, branch);
7769 }
7770 else
7771 {
7772 switch (code)
7773 {
7774 case NE:
7775 branch = "bne";
7776 break;
7777 case EQ:
7778 branch = "be";
7779 break;
7780 case GE:
7781 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7782 branch = "bpos";
7783 else
7784 branch = "bge";
7785 break;
7786 case GT:
7787 branch = "bg";
7788 break;
7789 case LE:
7790 branch = "ble";
7791 break;
7792 case LT:
7793 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7794 branch = "bneg";
7795 else
7796 branch = "bl";
7797 break;
7798 case GEU:
7799 branch = "bgeu";
7800 break;
7801 case GTU:
7802 branch = "bgu";
7803 break;
7804 case LEU:
7805 branch = "bleu";
7806 break;
7807 case LTU:
7808 branch = "blu";
7809 break;
7810
7811 default:
7812 gcc_unreachable ();
7813 }
7814 strcpy (string, branch);
7815 }
7816 spaces -= strlen (branch);
7817 p = strchr (string, '\0');
7818
7819 /* Now add the annulling, the label, and a possible noop. */
7820 if (annul && ! far)
7821 {
7822 strcpy (p, ",a");
7823 p += 2;
7824 spaces -= 2;
7825 }
7826
7827 if (TARGET_V9)
7828 {
7829 rtx note;
7830 int v8 = 0;
7831
7832 if (! far && insn && INSN_ADDRESSES_SET_P ())
7833 {
7834 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7835 - INSN_ADDRESSES (INSN_UID (insn)));
7836 /* Leave some instructions for "slop". */
7837 if (delta < -260000 || delta >= 260000)
7838 v8 = 1;
7839 }
7840
7841 if (mode == CCFPmode || mode == CCFPEmode)
7842 {
7843 static char v9_fcc_labelno[] = "%%fccX, ";
7844 /* Set the char indicating the number of the fcc reg to use. */
7845 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7846 labelno = v9_fcc_labelno;
7847 if (v8)
7848 {
7849 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7850 labelno = "";
7851 }
7852 }
7853 else if (mode == CCXmode || mode == CCX_NOOVmode)
7854 {
7855 labelno = "%%xcc, ";
7856 gcc_assert (! v8);
7857 }
7858 else
7859 {
7860 labelno = "%%icc, ";
7861 if (v8)
7862 labelno = "";
7863 }
7864
7865 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7866 {
7867 strcpy (p,
7868 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7869 ? ",pt" : ",pn");
7870 p += 3;
7871 spaces -= 3;
7872 }
7873 }
7874 else
7875 labelno = "";
7876
7877 if (spaces > 0)
7878 *p++ = '\t';
7879 else
7880 *p++ = ' ';
7881 strcpy (p, labelno);
7882 p = strchr (p, '\0');
7883 if (far)
7884 {
7885 strcpy (p, ".+12\n\t nop\n\tb\t");
7886 /* Skip the next insn if requested or
7887 if we know that it will be a nop. */
7888 if (annul || ! final_sequence)
7889 p[3] = '6';
7890 p += 14;
7891 }
7892 *p++ = '%';
7893 *p++ = 'l';
7894 *p++ = label + '0';
7895 *p++ = '%';
7896 *p++ = '#';
7897 *p = '\0';
7898
7899 return string;
7900 }
7901
7902 /* Emit a library call comparison between floating point X and Y.
7903 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7904 Return the new operator to be used in the comparison sequence.
7905
7906 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7907 values as arguments instead of the TFmode registers themselves,
7908 that's why we cannot call emit_float_lib_cmp. */
7909
7910 rtx
7911 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7912 {
7913 const char *qpfunc;
7914 rtx slot0, slot1, result, tem, tem2, libfunc;
7915 enum machine_mode mode;
7916 enum rtx_code new_comparison;
7917
7918 switch (comparison)
7919 {
7920 case EQ:
7921 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7922 break;
7923
7924 case NE:
7925 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7926 break;
7927
7928 case GT:
7929 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7930 break;
7931
7932 case GE:
7933 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7934 break;
7935
7936 case LT:
7937 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7938 break;
7939
7940 case LE:
7941 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7942 break;
7943
7944 case ORDERED:
7945 case UNORDERED:
7946 case UNGT:
7947 case UNLT:
7948 case UNEQ:
7949 case UNGE:
7950 case UNLE:
7951 case LTGT:
7952 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7953 break;
7954
7955 default:
7956 gcc_unreachable ();
7957 }
7958
7959 if (TARGET_ARCH64)
7960 {
7961 if (MEM_P (x))
7962 {
7963 tree expr = MEM_EXPR (x);
7964 if (expr)
7965 mark_addressable (expr);
7966 slot0 = x;
7967 }
7968 else
7969 {
7970 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7971 emit_move_insn (slot0, x);
7972 }
7973
7974 if (MEM_P (y))
7975 {
7976 tree expr = MEM_EXPR (y);
7977 if (expr)
7978 mark_addressable (expr);
7979 slot1 = y;
7980 }
7981 else
7982 {
7983 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7984 emit_move_insn (slot1, y);
7985 }
7986
7987 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7988 emit_library_call (libfunc, LCT_NORMAL,
7989 DImode, 2,
7990 XEXP (slot0, 0), Pmode,
7991 XEXP (slot1, 0), Pmode);
7992 mode = DImode;
7993 }
7994 else
7995 {
7996 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7997 emit_library_call (libfunc, LCT_NORMAL,
7998 SImode, 2,
7999 x, TFmode, y, TFmode);
8000 mode = SImode;
8001 }
8002
8003
8004 /* Immediately move the result of the libcall into a pseudo
8005 register so reload doesn't clobber the value if it needs
8006 the return register for a spill reg. */
8007 result = gen_reg_rtx (mode);
8008 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8009
8010 switch (comparison)
8011 {
8012 default:
8013 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8014 case ORDERED:
8015 case UNORDERED:
8016 new_comparison = (comparison == UNORDERED ? EQ : NE);
8017 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8018 case UNGT:
8019 case UNGE:
8020 new_comparison = (comparison == UNGT ? GT : NE);
8021 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8022 case UNLE:
8023 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8024 case UNLT:
8025 tem = gen_reg_rtx (mode);
8026 if (TARGET_ARCH32)
8027 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8028 else
8029 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8030 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8031 case UNEQ:
8032 case LTGT:
8033 tem = gen_reg_rtx (mode);
8034 if (TARGET_ARCH32)
8035 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8036 else
8037 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8038 tem2 = gen_reg_rtx (mode);
8039 if (TARGET_ARCH32)
8040 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8041 else
8042 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8043 new_comparison = (comparison == UNEQ ? EQ : NE);
8044 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8045 }
8046
8047 gcc_unreachable ();
8048 }
8049
8050 /* Generate an unsigned DImode to FP conversion. This is the same code
8051 optabs would emit if we didn't have TFmode patterns. */
8052
8053 void
8054 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
8055 {
8056 rtx neglab, donelab, i0, i1, f0, in, out;
8057
8058 out = operands[0];
8059 in = force_reg (DImode, operands[1]);
8060 neglab = gen_label_rtx ();
8061 donelab = gen_label_rtx ();
8062 i0 = gen_reg_rtx (DImode);
8063 i1 = gen_reg_rtx (DImode);
8064 f0 = gen_reg_rtx (mode);
8065
8066 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8067
8068 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
8069 emit_jump_insn (gen_jump (donelab));
8070 emit_barrier ();
8071
8072 emit_label (neglab);
8073
8074 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8075 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8076 emit_insn (gen_iordi3 (i0, i0, i1));
8077 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
8078 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
8079
8080 emit_label (donelab);
8081 }
8082
8083 /* Generate an FP to unsigned DImode conversion. This is the same code
8084 optabs would emit if we didn't have TFmode patterns. */
8085
8086 void
8087 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
8088 {
8089 rtx neglab, donelab, i0, i1, f0, in, out, limit;
8090
8091 out = operands[0];
8092 in = force_reg (mode, operands[1]);
8093 neglab = gen_label_rtx ();
8094 donelab = gen_label_rtx ();
8095 i0 = gen_reg_rtx (DImode);
8096 i1 = gen_reg_rtx (DImode);
8097 limit = gen_reg_rtx (mode);
8098 f0 = gen_reg_rtx (mode);
8099
8100 emit_move_insn (limit,
8101 CONST_DOUBLE_FROM_REAL_VALUE (
8102 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8103 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8104
8105 emit_insn (gen_rtx_SET (VOIDmode,
8106 out,
8107 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8108 emit_jump_insn (gen_jump (donelab));
8109 emit_barrier ();
8110
8111 emit_label (neglab);
8112
8113 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
8114 emit_insn (gen_rtx_SET (VOIDmode,
8115 i0,
8116 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8117 emit_insn (gen_movdi (i1, const1_rtx));
8118 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8119 emit_insn (gen_xordi3 (out, i0, i1));
8120
8121 emit_label (donelab);
8122 }
8123
8124 /* Return the string to output a compare and branch instruction to DEST.
8125 DEST is the destination insn (i.e. the label), INSN is the source,
8126 and OP is the conditional expression. */
8127
8128 const char *
8129 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8130 {
8131 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8132 enum rtx_code code = GET_CODE (op);
8133 const char *cond_str, *tmpl;
8134 int far, emit_nop, len;
8135 static char string[64];
8136 char size_char;
8137
8138 /* Compare and Branch is limited to +-2KB. If it is too far away,
8139 change
8140
8141 cxbne X, Y, .LC30
8142
8143 to
8144
8145 cxbe X, Y, .+16
8146 nop
8147 ba,pt xcc, .LC30
8148 nop */
8149
8150 len = get_attr_length (insn);
8151
8152 far = len == 4;
8153 emit_nop = len == 2;
8154
8155 if (far)
8156 code = reverse_condition (code);
8157
8158 size_char = ((mode == SImode) ? 'w' : 'x');
8159
8160 switch (code)
8161 {
8162 case NE:
8163 cond_str = "ne";
8164 break;
8165
8166 case EQ:
8167 cond_str = "e";
8168 break;
8169
8170 case GE:
8171 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8172 cond_str = "pos";
8173 else
8174 cond_str = "ge";
8175 break;
8176
8177 case GT:
8178 cond_str = "g";
8179 break;
8180
8181 case LE:
8182 cond_str = "le";
8183 break;
8184
8185 case LT:
8186 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8187 cond_str = "neg";
8188 else
8189 cond_str = "l";
8190 break;
8191
8192 case GEU:
8193 cond_str = "cc";
8194 break;
8195
8196 case GTU:
8197 cond_str = "gu";
8198 break;
8199
8200 case LEU:
8201 cond_str = "leu";
8202 break;
8203
8204 case LTU:
8205 cond_str = "cs";
8206 break;
8207
8208 default:
8209 gcc_unreachable ();
8210 }
8211
8212 if (far)
8213 {
8214 int veryfar = 1, delta;
8215
8216 if (INSN_ADDRESSES_SET_P ())
8217 {
8218 delta = (INSN_ADDRESSES (INSN_UID (dest))
8219 - INSN_ADDRESSES (INSN_UID (insn)));
8220 /* Leave some instructions for "slop". */
8221 if (delta >= -260000 && delta < 260000)
8222 veryfar = 0;
8223 }
8224
8225 if (veryfar)
8226 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8227 else
8228 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8229 }
8230 else
8231 {
8232 if (emit_nop)
8233 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8234 else
8235 tmpl = "c%cb%s\t%%1, %%2, %%3";
8236 }
8237
8238 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8239
8240 return string;
8241 }
8242
8243 /* Return the string to output a conditional branch to LABEL, testing
8244 register REG. LABEL is the operand number of the label; REG is the
8245 operand number of the reg. OP is the conditional expression. The mode
8246 of REG says what kind of comparison we made.
8247
8248 DEST is the destination insn (i.e. the label), INSN is the source.
8249
8250 REVERSED is nonzero if we should reverse the sense of the comparison.
8251
8252 ANNUL is nonzero if we should generate an annulling branch. */
8253
8254 const char *
8255 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8256 int annul, rtx_insn *insn)
8257 {
8258 static char string[64];
8259 enum rtx_code code = GET_CODE (op);
8260 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8261 rtx note;
8262 int far;
8263 char *p;
8264
8265 /* branch on register are limited to +-128KB. If it is too far away,
8266 change
8267
8268 brnz,pt %g1, .LC30
8269
8270 to
8271
8272 brz,pn %g1, .+12
8273 nop
8274 ba,pt %xcc, .LC30
8275
8276 and
8277
8278 brgez,a,pn %o1, .LC29
8279
8280 to
8281
8282 brlz,pt %o1, .+16
8283 nop
8284 ba,pt %xcc, .LC29 */
8285
8286 far = get_attr_length (insn) >= 3;
8287
8288 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8289 if (reversed ^ far)
8290 code = reverse_condition (code);
8291
8292 /* Only 64 bit versions of these instructions exist. */
8293 gcc_assert (mode == DImode);
8294
8295 /* Start by writing the branch condition. */
8296
8297 switch (code)
8298 {
8299 case NE:
8300 strcpy (string, "brnz");
8301 break;
8302
8303 case EQ:
8304 strcpy (string, "brz");
8305 break;
8306
8307 case GE:
8308 strcpy (string, "brgez");
8309 break;
8310
8311 case LT:
8312 strcpy (string, "brlz");
8313 break;
8314
8315 case LE:
8316 strcpy (string, "brlez");
8317 break;
8318
8319 case GT:
8320 strcpy (string, "brgz");
8321 break;
8322
8323 default:
8324 gcc_unreachable ();
8325 }
8326
8327 p = strchr (string, '\0');
8328
8329 /* Now add the annulling, reg, label, and nop. */
8330 if (annul && ! far)
8331 {
8332 strcpy (p, ",a");
8333 p += 2;
8334 }
8335
8336 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8337 {
8338 strcpy (p,
8339 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8340 ? ",pt" : ",pn");
8341 p += 3;
8342 }
8343
8344 *p = p < string + 8 ? '\t' : ' ';
8345 p++;
8346 *p++ = '%';
8347 *p++ = '0' + reg;
8348 *p++ = ',';
8349 *p++ = ' ';
8350 if (far)
8351 {
8352 int veryfar = 1, delta;
8353
8354 if (INSN_ADDRESSES_SET_P ())
8355 {
8356 delta = (INSN_ADDRESSES (INSN_UID (dest))
8357 - INSN_ADDRESSES (INSN_UID (insn)));
8358 /* Leave some instructions for "slop". */
8359 if (delta >= -260000 && delta < 260000)
8360 veryfar = 0;
8361 }
8362
8363 strcpy (p, ".+12\n\t nop\n\t");
8364 /* Skip the next insn if requested or
8365 if we know that it will be a nop. */
8366 if (annul || ! final_sequence)
8367 p[3] = '6';
8368 p += 12;
8369 if (veryfar)
8370 {
8371 strcpy (p, "b\t");
8372 p += 2;
8373 }
8374 else
8375 {
8376 strcpy (p, "ba,pt\t%%xcc, ");
8377 p += 13;
8378 }
8379 }
8380 *p++ = '%';
8381 *p++ = 'l';
8382 *p++ = '0' + label;
8383 *p++ = '%';
8384 *p++ = '#';
8385 *p = '\0';
8386
8387 return string;
8388 }
8389
8390 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8391 Such instructions cannot be used in the delay slot of return insn on v9.
8392 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8393 */
8394
8395 static int
8396 epilogue_renumber (register rtx *where, int test)
8397 {
8398 register const char *fmt;
8399 register int i;
8400 register enum rtx_code code;
8401
8402 if (*where == 0)
8403 return 0;
8404
8405 code = GET_CODE (*where);
8406
8407 switch (code)
8408 {
8409 case REG:
8410 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8411 return 1;
8412 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8413 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8414 case SCRATCH:
8415 case CC0:
8416 case PC:
8417 case CONST_INT:
8418 case CONST_DOUBLE:
8419 return 0;
8420
8421 /* Do not replace the frame pointer with the stack pointer because
8422 it can cause the delayed instruction to load below the stack.
8423 This occurs when instructions like:
8424
8425 (set (reg/i:SI 24 %i0)
8426 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8427 (const_int -20 [0xffffffec])) 0))
8428
8429 are in the return delayed slot. */
8430 case PLUS:
8431 if (GET_CODE (XEXP (*where, 0)) == REG
8432 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8433 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8434 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8435 return 1;
8436 break;
8437
8438 case MEM:
8439 if (SPARC_STACK_BIAS
8440 && GET_CODE (XEXP (*where, 0)) == REG
8441 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8442 return 1;
8443 break;
8444
8445 default:
8446 break;
8447 }
8448
8449 fmt = GET_RTX_FORMAT (code);
8450
8451 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8452 {
8453 if (fmt[i] == 'E')
8454 {
8455 register int j;
8456 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8457 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8458 return 1;
8459 }
8460 else if (fmt[i] == 'e'
8461 && epilogue_renumber (&(XEXP (*where, i)), test))
8462 return 1;
8463 }
8464 return 0;
8465 }
8466 \f
8467 /* Leaf functions and non-leaf functions have different needs. */
8468
8469 static const int
8470 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8471
8472 static const int
8473 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8474
8475 static const int *const reg_alloc_orders[] = {
8476 reg_leaf_alloc_order,
8477 reg_nonleaf_alloc_order};
8478
8479 void
8480 order_regs_for_local_alloc (void)
8481 {
8482 static int last_order_nonleaf = 1;
8483
8484 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8485 {
8486 last_order_nonleaf = !last_order_nonleaf;
8487 memcpy ((char *) reg_alloc_order,
8488 (const char *) reg_alloc_orders[last_order_nonleaf],
8489 FIRST_PSEUDO_REGISTER * sizeof (int));
8490 }
8491 }
8492 \f
8493 /* Return 1 if REG and MEM are legitimate enough to allow the various
8494 mem<-->reg splits to be run. */
8495
8496 int
8497 sparc_splitdi_legitimate (rtx reg, rtx mem)
8498 {
8499 /* Punt if we are here by mistake. */
8500 gcc_assert (reload_completed);
8501
8502 /* We must have an offsettable memory reference. */
8503 if (! offsettable_memref_p (mem))
8504 return 0;
8505
8506 /* If we have legitimate args for ldd/std, we do not want
8507 the split to happen. */
8508 if ((REGNO (reg) % 2) == 0
8509 && mem_min_alignment (mem, 8))
8510 return 0;
8511
8512 /* Success. */
8513 return 1;
8514 }
8515
8516 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8517
8518 int
8519 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8520 {
8521 int regno1, regno2;
8522
8523 if (GET_CODE (reg1) == SUBREG)
8524 reg1 = SUBREG_REG (reg1);
8525 if (GET_CODE (reg1) != REG)
8526 return 0;
8527 regno1 = REGNO (reg1);
8528
8529 if (GET_CODE (reg2) == SUBREG)
8530 reg2 = SUBREG_REG (reg2);
8531 if (GET_CODE (reg2) != REG)
8532 return 0;
8533 regno2 = REGNO (reg2);
8534
8535 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8536 return 1;
8537
8538 if (TARGET_VIS3)
8539 {
8540 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8541 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8542 return 1;
8543 }
8544
8545 return 0;
8546 }
8547
8548 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8549 This makes them candidates for using ldd and std insns.
8550
8551 Note reg1 and reg2 *must* be hard registers. */
8552
8553 int
8554 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8555 {
8556 /* We might have been passed a SUBREG. */
8557 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8558 return 0;
8559
8560 if (REGNO (reg1) % 2 != 0)
8561 return 0;
8562
8563 /* Integer ldd is deprecated in SPARC V9 */
8564 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8565 return 0;
8566
8567 return (REGNO (reg1) == REGNO (reg2) - 1);
8568 }
8569
8570 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8571 an ldd or std insn.
8572
8573 This can only happen when addr1 and addr2, the addresses in mem1
8574 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8575 addr1 must also be aligned on a 64-bit boundary.
8576
8577 Also iff dependent_reg_rtx is not null it should not be used to
8578 compute the address for mem1, i.e. we cannot optimize a sequence
8579 like:
8580 ld [%o0], %o0
8581 ld [%o0 + 4], %o1
8582 to
8583 ldd [%o0], %o0
8584 nor:
8585 ld [%g3 + 4], %g3
8586 ld [%g3], %g2
8587 to
8588 ldd [%g3], %g2
8589
8590 But, note that the transformation from:
8591 ld [%g2 + 4], %g3
8592 ld [%g2], %g2
8593 to
8594 ldd [%g2], %g2
8595 is perfectly fine. Thus, the peephole2 patterns always pass us
8596 the destination register of the first load, never the second one.
8597
8598 For stores we don't have a similar problem, so dependent_reg_rtx is
8599 NULL_RTX. */
8600
8601 int
8602 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8603 {
8604 rtx addr1, addr2;
8605 unsigned int reg1;
8606 HOST_WIDE_INT offset1;
8607
8608 /* The mems cannot be volatile. */
8609 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8610 return 0;
8611
8612 /* MEM1 should be aligned on a 64-bit boundary. */
8613 if (MEM_ALIGN (mem1) < 64)
8614 return 0;
8615
8616 addr1 = XEXP (mem1, 0);
8617 addr2 = XEXP (mem2, 0);
8618
8619 /* Extract a register number and offset (if used) from the first addr. */
8620 if (GET_CODE (addr1) == PLUS)
8621 {
8622 /* If not a REG, return zero. */
8623 if (GET_CODE (XEXP (addr1, 0)) != REG)
8624 return 0;
8625 else
8626 {
8627 reg1 = REGNO (XEXP (addr1, 0));
8628 /* The offset must be constant! */
8629 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8630 return 0;
8631 offset1 = INTVAL (XEXP (addr1, 1));
8632 }
8633 }
8634 else if (GET_CODE (addr1) != REG)
8635 return 0;
8636 else
8637 {
8638 reg1 = REGNO (addr1);
8639 /* This was a simple (mem (reg)) expression. Offset is 0. */
8640 offset1 = 0;
8641 }
8642
8643 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8644 if (GET_CODE (addr2) != PLUS)
8645 return 0;
8646
8647 if (GET_CODE (XEXP (addr2, 0)) != REG
8648 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8649 return 0;
8650
8651 if (reg1 != REGNO (XEXP (addr2, 0)))
8652 return 0;
8653
8654 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8655 return 0;
8656
8657 /* The first offset must be evenly divisible by 8 to ensure the
8658 address is 64 bit aligned. */
8659 if (offset1 % 8 != 0)
8660 return 0;
8661
8662 /* The offset for the second addr must be 4 more than the first addr. */
8663 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8664 return 0;
8665
8666 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8667 instructions. */
8668 return 1;
8669 }
8670
8671 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8672
8673 rtx
8674 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, enum machine_mode mode)
8675 {
8676 rtx x = widen_memory_access (mem1, mode, 0);
8677 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8678 return x;
8679 }
8680
8681 /* Return 1 if reg is a pseudo, or is the first register in
8682 a hard register pair. This makes it suitable for use in
8683 ldd and std insns. */
8684
8685 int
8686 register_ok_for_ldd (rtx reg)
8687 {
8688 /* We might have been passed a SUBREG. */
8689 if (!REG_P (reg))
8690 return 0;
8691
8692 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8693 return (REGNO (reg) % 2 == 0);
8694
8695 return 1;
8696 }
8697
8698 /* Return 1 if OP, a MEM, has an address which is known to be
8699 aligned to an 8-byte boundary. */
8700
8701 int
8702 memory_ok_for_ldd (rtx op)
8703 {
8704 /* In 64-bit mode, we assume that the address is word-aligned. */
8705 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8706 return 0;
8707
8708 if (! can_create_pseudo_p ()
8709 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8710 return 0;
8711
8712 return 1;
8713 }
8714 \f
8715 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8716
8717 static bool
8718 sparc_print_operand_punct_valid_p (unsigned char code)
8719 {
8720 if (code == '#'
8721 || code == '*'
8722 || code == '('
8723 || code == ')'
8724 || code == '_'
8725 || code == '&')
8726 return true;
8727
8728 return false;
8729 }
8730
8731 /* Implement TARGET_PRINT_OPERAND.
8732 Print operand X (an rtx) in assembler syntax to file FILE.
8733 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8734 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8735
8736 static void
8737 sparc_print_operand (FILE *file, rtx x, int code)
8738 {
8739 switch (code)
8740 {
8741 case '#':
8742 /* Output an insn in a delay slot. */
8743 if (final_sequence)
8744 sparc_indent_opcode = 1;
8745 else
8746 fputs ("\n\t nop", file);
8747 return;
8748 case '*':
8749 /* Output an annul flag if there's nothing for the delay slot and we
8750 are optimizing. This is always used with '(' below.
8751 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8752 this is a dbx bug. So, we only do this when optimizing.
8753 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8754 Always emit a nop in case the next instruction is a branch. */
8755 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8756 fputs (",a", file);
8757 return;
8758 case '(':
8759 /* Output a 'nop' if there's nothing for the delay slot and we are
8760 not optimizing. This is always used with '*' above. */
8761 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8762 fputs ("\n\t nop", file);
8763 else if (final_sequence)
8764 sparc_indent_opcode = 1;
8765 return;
8766 case ')':
8767 /* Output the right displacement from the saved PC on function return.
8768 The caller may have placed an "unimp" insn immediately after the call
8769 so we have to account for it. This insn is used in the 32-bit ABI
8770 when calling a function that returns a non zero-sized structure. The
8771 64-bit ABI doesn't have it. Be careful to have this test be the same
8772 as that for the call. The exception is when sparc_std_struct_return
8773 is enabled, the psABI is followed exactly and the adjustment is made
8774 by the code in sparc_struct_value_rtx. The call emitted is the same
8775 when sparc_std_struct_return is enabled. */
8776 if (!TARGET_ARCH64
8777 && cfun->returns_struct
8778 && !sparc_std_struct_return
8779 && DECL_SIZE (DECL_RESULT (current_function_decl))
8780 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8781 == INTEGER_CST
8782 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8783 fputs ("12", file);
8784 else
8785 fputc ('8', file);
8786 return;
8787 case '_':
8788 /* Output the Embedded Medium/Anywhere code model base register. */
8789 fputs (EMBMEDANY_BASE_REG, file);
8790 return;
8791 case '&':
8792 /* Print some local dynamic TLS name. */
8793 assemble_name (file, get_some_local_dynamic_name ());
8794 return;
8795
8796 case 'Y':
8797 /* Adjust the operand to take into account a RESTORE operation. */
8798 if (GET_CODE (x) == CONST_INT)
8799 break;
8800 else if (GET_CODE (x) != REG)
8801 output_operand_lossage ("invalid %%Y operand");
8802 else if (REGNO (x) < 8)
8803 fputs (reg_names[REGNO (x)], file);
8804 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8805 fputs (reg_names[REGNO (x)-16], file);
8806 else
8807 output_operand_lossage ("invalid %%Y operand");
8808 return;
8809 case 'L':
8810 /* Print out the low order register name of a register pair. */
8811 if (WORDS_BIG_ENDIAN)
8812 fputs (reg_names[REGNO (x)+1], file);
8813 else
8814 fputs (reg_names[REGNO (x)], file);
8815 return;
8816 case 'H':
8817 /* Print out the high order register name of a register pair. */
8818 if (WORDS_BIG_ENDIAN)
8819 fputs (reg_names[REGNO (x)], file);
8820 else
8821 fputs (reg_names[REGNO (x)+1], file);
8822 return;
8823 case 'R':
8824 /* Print out the second register name of a register pair or quad.
8825 I.e., R (%o0) => %o1. */
8826 fputs (reg_names[REGNO (x)+1], file);
8827 return;
8828 case 'S':
8829 /* Print out the third register name of a register quad.
8830 I.e., S (%o0) => %o2. */
8831 fputs (reg_names[REGNO (x)+2], file);
8832 return;
8833 case 'T':
8834 /* Print out the fourth register name of a register quad.
8835 I.e., T (%o0) => %o3. */
8836 fputs (reg_names[REGNO (x)+3], file);
8837 return;
8838 case 'x':
8839 /* Print a condition code register. */
8840 if (REGNO (x) == SPARC_ICC_REG)
8841 {
8842 /* We don't handle CC[X]_NOOVmode because they're not supposed
8843 to occur here. */
8844 if (GET_MODE (x) == CCmode)
8845 fputs ("%icc", file);
8846 else if (GET_MODE (x) == CCXmode)
8847 fputs ("%xcc", file);
8848 else
8849 gcc_unreachable ();
8850 }
8851 else
8852 /* %fccN register */
8853 fputs (reg_names[REGNO (x)], file);
8854 return;
8855 case 'm':
8856 /* Print the operand's address only. */
8857 output_address (XEXP (x, 0));
8858 return;
8859 case 'r':
8860 /* In this case we need a register. Use %g0 if the
8861 operand is const0_rtx. */
8862 if (x == const0_rtx
8863 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8864 {
8865 fputs ("%g0", file);
8866 return;
8867 }
8868 else
8869 break;
8870
8871 case 'A':
8872 switch (GET_CODE (x))
8873 {
8874 case IOR: fputs ("or", file); break;
8875 case AND: fputs ("and", file); break;
8876 case XOR: fputs ("xor", file); break;
8877 default: output_operand_lossage ("invalid %%A operand");
8878 }
8879 return;
8880
8881 case 'B':
8882 switch (GET_CODE (x))
8883 {
8884 case IOR: fputs ("orn", file); break;
8885 case AND: fputs ("andn", file); break;
8886 case XOR: fputs ("xnor", file); break;
8887 default: output_operand_lossage ("invalid %%B operand");
8888 }
8889 return;
8890
8891 /* This is used by the conditional move instructions. */
8892 case 'C':
8893 {
8894 enum rtx_code rc = GET_CODE (x);
8895
8896 switch (rc)
8897 {
8898 case NE: fputs ("ne", file); break;
8899 case EQ: fputs ("e", file); break;
8900 case GE: fputs ("ge", file); break;
8901 case GT: fputs ("g", file); break;
8902 case LE: fputs ("le", file); break;
8903 case LT: fputs ("l", file); break;
8904 case GEU: fputs ("geu", file); break;
8905 case GTU: fputs ("gu", file); break;
8906 case LEU: fputs ("leu", file); break;
8907 case LTU: fputs ("lu", file); break;
8908 case LTGT: fputs ("lg", file); break;
8909 case UNORDERED: fputs ("u", file); break;
8910 case ORDERED: fputs ("o", file); break;
8911 case UNLT: fputs ("ul", file); break;
8912 case UNLE: fputs ("ule", file); break;
8913 case UNGT: fputs ("ug", file); break;
8914 case UNGE: fputs ("uge", file); break;
8915 case UNEQ: fputs ("ue", file); break;
8916 default: output_operand_lossage ("invalid %%C operand");
8917 }
8918 return;
8919 }
8920
8921 /* This are used by the movr instruction pattern. */
8922 case 'D':
8923 {
8924 enum rtx_code rc = GET_CODE (x);
8925 switch (rc)
8926 {
8927 case NE: fputs ("ne", file); break;
8928 case EQ: fputs ("e", file); break;
8929 case GE: fputs ("gez", file); break;
8930 case LT: fputs ("lz", file); break;
8931 case LE: fputs ("lez", file); break;
8932 case GT: fputs ("gz", file); break;
8933 default: output_operand_lossage ("invalid %%D operand");
8934 }
8935 return;
8936 }
8937
8938 case 'b':
8939 {
8940 /* Print a sign-extended character. */
8941 int i = trunc_int_for_mode (INTVAL (x), QImode);
8942 fprintf (file, "%d", i);
8943 return;
8944 }
8945
8946 case 'f':
8947 /* Operand must be a MEM; write its address. */
8948 if (GET_CODE (x) != MEM)
8949 output_operand_lossage ("invalid %%f operand");
8950 output_address (XEXP (x, 0));
8951 return;
8952
8953 case 's':
8954 {
8955 /* Print a sign-extended 32-bit value. */
8956 HOST_WIDE_INT i;
8957 if (GET_CODE(x) == CONST_INT)
8958 i = INTVAL (x);
8959 else if (GET_CODE(x) == CONST_DOUBLE)
8960 i = CONST_DOUBLE_LOW (x);
8961 else
8962 {
8963 output_operand_lossage ("invalid %%s operand");
8964 return;
8965 }
8966 i = trunc_int_for_mode (i, SImode);
8967 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8968 return;
8969 }
8970
8971 case 0:
8972 /* Do nothing special. */
8973 break;
8974
8975 default:
8976 /* Undocumented flag. */
8977 output_operand_lossage ("invalid operand output code");
8978 }
8979
8980 if (GET_CODE (x) == REG)
8981 fputs (reg_names[REGNO (x)], file);
8982 else if (GET_CODE (x) == MEM)
8983 {
8984 fputc ('[', file);
8985 /* Poor Sun assembler doesn't understand absolute addressing. */
8986 if (CONSTANT_P (XEXP (x, 0)))
8987 fputs ("%g0+", file);
8988 output_address (XEXP (x, 0));
8989 fputc (']', file);
8990 }
8991 else if (GET_CODE (x) == HIGH)
8992 {
8993 fputs ("%hi(", file);
8994 output_addr_const (file, XEXP (x, 0));
8995 fputc (')', file);
8996 }
8997 else if (GET_CODE (x) == LO_SUM)
8998 {
8999 sparc_print_operand (file, XEXP (x, 0), 0);
9000 if (TARGET_CM_MEDMID)
9001 fputs ("+%l44(", file);
9002 else
9003 fputs ("+%lo(", file);
9004 output_addr_const (file, XEXP (x, 1));
9005 fputc (')', file);
9006 }
9007 else if (GET_CODE (x) == CONST_DOUBLE
9008 && (GET_MODE (x) == VOIDmode
9009 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
9010 {
9011 if (CONST_DOUBLE_HIGH (x) == 0)
9012 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
9013 else if (CONST_DOUBLE_HIGH (x) == -1
9014 && CONST_DOUBLE_LOW (x) < 0)
9015 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
9016 else
9017 output_operand_lossage ("long long constant not a valid immediate operand");
9018 }
9019 else if (GET_CODE (x) == CONST_DOUBLE)
9020 output_operand_lossage ("floating point constant not a valid immediate operand");
9021 else { output_addr_const (file, x); }
9022 }
9023
9024 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9025
9026 static void
9027 sparc_print_operand_address (FILE *file, rtx x)
9028 {
9029 register rtx base, index = 0;
9030 int offset = 0;
9031 register rtx addr = x;
9032
9033 if (REG_P (addr))
9034 fputs (reg_names[REGNO (addr)], file);
9035 else if (GET_CODE (addr) == PLUS)
9036 {
9037 if (CONST_INT_P (XEXP (addr, 0)))
9038 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9039 else if (CONST_INT_P (XEXP (addr, 1)))
9040 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9041 else
9042 base = XEXP (addr, 0), index = XEXP (addr, 1);
9043 if (GET_CODE (base) == LO_SUM)
9044 {
9045 gcc_assert (USE_AS_OFFSETABLE_LO10
9046 && TARGET_ARCH64
9047 && ! TARGET_CM_MEDMID);
9048 output_operand (XEXP (base, 0), 0);
9049 fputs ("+%lo(", file);
9050 output_address (XEXP (base, 1));
9051 fprintf (file, ")+%d", offset);
9052 }
9053 else
9054 {
9055 fputs (reg_names[REGNO (base)], file);
9056 if (index == 0)
9057 fprintf (file, "%+d", offset);
9058 else if (REG_P (index))
9059 fprintf (file, "+%s", reg_names[REGNO (index)]);
9060 else if (GET_CODE (index) == SYMBOL_REF
9061 || GET_CODE (index) == LABEL_REF
9062 || GET_CODE (index) == CONST)
9063 fputc ('+', file), output_addr_const (file, index);
9064 else gcc_unreachable ();
9065 }
9066 }
9067 else if (GET_CODE (addr) == MINUS
9068 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9069 {
9070 output_addr_const (file, XEXP (addr, 0));
9071 fputs ("-(", file);
9072 output_addr_const (file, XEXP (addr, 1));
9073 fputs ("-.)", file);
9074 }
9075 else if (GET_CODE (addr) == LO_SUM)
9076 {
9077 output_operand (XEXP (addr, 0), 0);
9078 if (TARGET_CM_MEDMID)
9079 fputs ("+%l44(", file);
9080 else
9081 fputs ("+%lo(", file);
9082 output_address (XEXP (addr, 1));
9083 fputc (')', file);
9084 }
9085 else if (flag_pic
9086 && GET_CODE (addr) == CONST
9087 && GET_CODE (XEXP (addr, 0)) == MINUS
9088 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9089 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9090 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9091 {
9092 addr = XEXP (addr, 0);
9093 output_addr_const (file, XEXP (addr, 0));
9094 /* Group the args of the second CONST in parenthesis. */
9095 fputs ("-(", file);
9096 /* Skip past the second CONST--it does nothing for us. */
9097 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9098 /* Close the parenthesis. */
9099 fputc (')', file);
9100 }
9101 else
9102 {
9103 output_addr_const (file, addr);
9104 }
9105 }
9106 \f
9107 /* Target hook for assembling integer objects. The sparc version has
9108 special handling for aligned DI-mode objects. */
9109
9110 static bool
9111 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9112 {
9113 /* ??? We only output .xword's for symbols and only then in environments
9114 where the assembler can handle them. */
9115 if (aligned_p && size == 8
9116 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
9117 {
9118 if (TARGET_V9)
9119 {
9120 assemble_integer_with_op ("\t.xword\t", x);
9121 return true;
9122 }
9123 else
9124 {
9125 assemble_aligned_integer (4, const0_rtx);
9126 assemble_aligned_integer (4, x);
9127 return true;
9128 }
9129 }
9130 return default_assemble_integer (x, size, aligned_p);
9131 }
9132 \f
9133 /* Return the value of a code used in the .proc pseudo-op that says
9134 what kind of result this function returns. For non-C types, we pick
9135 the closest C type. */
9136
9137 #ifndef SHORT_TYPE_SIZE
9138 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9139 #endif
9140
9141 #ifndef INT_TYPE_SIZE
9142 #define INT_TYPE_SIZE BITS_PER_WORD
9143 #endif
9144
9145 #ifndef LONG_TYPE_SIZE
9146 #define LONG_TYPE_SIZE BITS_PER_WORD
9147 #endif
9148
9149 #ifndef LONG_LONG_TYPE_SIZE
9150 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9151 #endif
9152
9153 #ifndef FLOAT_TYPE_SIZE
9154 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9155 #endif
9156
9157 #ifndef DOUBLE_TYPE_SIZE
9158 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9159 #endif
9160
9161 #ifndef LONG_DOUBLE_TYPE_SIZE
9162 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9163 #endif
9164
9165 unsigned long
9166 sparc_type_code (register tree type)
9167 {
9168 register unsigned long qualifiers = 0;
9169 register unsigned shift;
9170
9171 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9172 setting more, since some assemblers will give an error for this. Also,
9173 we must be careful to avoid shifts of 32 bits or more to avoid getting
9174 unpredictable results. */
9175
9176 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9177 {
9178 switch (TREE_CODE (type))
9179 {
9180 case ERROR_MARK:
9181 return qualifiers;
9182
9183 case ARRAY_TYPE:
9184 qualifiers |= (3 << shift);
9185 break;
9186
9187 case FUNCTION_TYPE:
9188 case METHOD_TYPE:
9189 qualifiers |= (2 << shift);
9190 break;
9191
9192 case POINTER_TYPE:
9193 case REFERENCE_TYPE:
9194 case OFFSET_TYPE:
9195 qualifiers |= (1 << shift);
9196 break;
9197
9198 case RECORD_TYPE:
9199 return (qualifiers | 8);
9200
9201 case UNION_TYPE:
9202 case QUAL_UNION_TYPE:
9203 return (qualifiers | 9);
9204
9205 case ENUMERAL_TYPE:
9206 return (qualifiers | 10);
9207
9208 case VOID_TYPE:
9209 return (qualifiers | 16);
9210
9211 case INTEGER_TYPE:
9212 /* If this is a range type, consider it to be the underlying
9213 type. */
9214 if (TREE_TYPE (type) != 0)
9215 break;
9216
9217 /* Carefully distinguish all the standard types of C,
9218 without messing up if the language is not C. We do this by
9219 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9220 look at both the names and the above fields, but that's redundant.
9221 Any type whose size is between two C types will be considered
9222 to be the wider of the two types. Also, we do not have a
9223 special code to use for "long long", so anything wider than
9224 long is treated the same. Note that we can't distinguish
9225 between "int" and "long" in this code if they are the same
9226 size, but that's fine, since neither can the assembler. */
9227
9228 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9229 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9230
9231 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9232 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9233
9234 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9235 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9236
9237 else
9238 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9239
9240 case REAL_TYPE:
9241 /* If this is a range type, consider it to be the underlying
9242 type. */
9243 if (TREE_TYPE (type) != 0)
9244 break;
9245
9246 /* Carefully distinguish all the standard types of C,
9247 without messing up if the language is not C. */
9248
9249 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9250 return (qualifiers | 6);
9251
9252 else
9253 return (qualifiers | 7);
9254
9255 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9256 /* ??? We need to distinguish between double and float complex types,
9257 but I don't know how yet because I can't reach this code from
9258 existing front-ends. */
9259 return (qualifiers | 7); /* Who knows? */
9260
9261 case VECTOR_TYPE:
9262 case BOOLEAN_TYPE: /* Boolean truth value type. */
9263 case LANG_TYPE:
9264 case NULLPTR_TYPE:
9265 return qualifiers;
9266
9267 default:
9268 gcc_unreachable (); /* Not a type! */
9269 }
9270 }
9271
9272 return qualifiers;
9273 }
9274 \f
9275 /* Nested function support. */
9276
9277 /* Emit RTL insns to initialize the variable parts of a trampoline.
9278 FNADDR is an RTX for the address of the function's pure code.
9279 CXT is an RTX for the static chain value for the function.
9280
9281 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9282 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9283 (to store insns). This is a bit excessive. Perhaps a different
9284 mechanism would be better here.
9285
9286 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9287
9288 static void
9289 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9290 {
9291 /* SPARC 32-bit trampoline:
9292
9293 sethi %hi(fn), %g1
9294 sethi %hi(static), %g2
9295 jmp %g1+%lo(fn)
9296 or %g2, %lo(static), %g2
9297
9298 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9299 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9300 */
9301
9302 emit_move_insn
9303 (adjust_address (m_tramp, SImode, 0),
9304 expand_binop (SImode, ior_optab,
9305 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9306 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9307 NULL_RTX, 1, OPTAB_DIRECT));
9308
9309 emit_move_insn
9310 (adjust_address (m_tramp, SImode, 4),
9311 expand_binop (SImode, ior_optab,
9312 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9313 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9314 NULL_RTX, 1, OPTAB_DIRECT));
9315
9316 emit_move_insn
9317 (adjust_address (m_tramp, SImode, 8),
9318 expand_binop (SImode, ior_optab,
9319 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9320 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9321 NULL_RTX, 1, OPTAB_DIRECT));
9322
9323 emit_move_insn
9324 (adjust_address (m_tramp, SImode, 12),
9325 expand_binop (SImode, ior_optab,
9326 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9327 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9328 NULL_RTX, 1, OPTAB_DIRECT));
9329
9330 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9331 aligned on a 16 byte boundary so one flush clears it all. */
9332 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9333 if (sparc_cpu != PROCESSOR_ULTRASPARC
9334 && sparc_cpu != PROCESSOR_ULTRASPARC3
9335 && sparc_cpu != PROCESSOR_NIAGARA
9336 && sparc_cpu != PROCESSOR_NIAGARA2
9337 && sparc_cpu != PROCESSOR_NIAGARA3
9338 && sparc_cpu != PROCESSOR_NIAGARA4)
9339 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9340
9341 /* Call __enable_execute_stack after writing onto the stack to make sure
9342 the stack address is accessible. */
9343 #ifdef HAVE_ENABLE_EXECUTE_STACK
9344 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9345 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9346 #endif
9347
9348 }
9349
9350 /* The 64-bit version is simpler because it makes more sense to load the
9351 values as "immediate" data out of the trampoline. It's also easier since
9352 we can read the PC without clobbering a register. */
9353
9354 static void
9355 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9356 {
9357 /* SPARC 64-bit trampoline:
9358
9359 rd %pc, %g1
9360 ldx [%g1+24], %g5
9361 jmp %g5
9362 ldx [%g1+16], %g5
9363 +16 bytes data
9364 */
9365
9366 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9367 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9368 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9369 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9370 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9371 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9372 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9373 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9374 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9375 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9376 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9377
9378 if (sparc_cpu != PROCESSOR_ULTRASPARC
9379 && sparc_cpu != PROCESSOR_ULTRASPARC3
9380 && sparc_cpu != PROCESSOR_NIAGARA
9381 && sparc_cpu != PROCESSOR_NIAGARA2
9382 && sparc_cpu != PROCESSOR_NIAGARA3
9383 && sparc_cpu != PROCESSOR_NIAGARA4)
9384 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9385
9386 /* Call __enable_execute_stack after writing onto the stack to make sure
9387 the stack address is accessible. */
9388 #ifdef HAVE_ENABLE_EXECUTE_STACK
9389 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9390 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9391 #endif
9392 }
9393
9394 /* Worker for TARGET_TRAMPOLINE_INIT. */
9395
9396 static void
9397 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9398 {
9399 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9400 cxt = force_reg (Pmode, cxt);
9401 if (TARGET_ARCH64)
9402 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9403 else
9404 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9405 }
9406 \f
9407 /* Adjust the cost of a scheduling dependency. Return the new cost of
9408 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9409
9410 static int
9411 supersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9412 {
9413 enum attr_type insn_type;
9414
9415 if (! recog_memoized (insn))
9416 return 0;
9417
9418 insn_type = get_attr_type (insn);
9419
9420 if (REG_NOTE_KIND (link) == 0)
9421 {
9422 /* Data dependency; DEP_INSN writes a register that INSN reads some
9423 cycles later. */
9424
9425 /* if a load, then the dependence must be on the memory address;
9426 add an extra "cycle". Note that the cost could be two cycles
9427 if the reg was written late in an instruction group; we ca not tell
9428 here. */
9429 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9430 return cost + 3;
9431
9432 /* Get the delay only if the address of the store is the dependence. */
9433 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9434 {
9435 rtx pat = PATTERN(insn);
9436 rtx dep_pat = PATTERN (dep_insn);
9437
9438 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9439 return cost; /* This should not happen! */
9440
9441 /* The dependency between the two instructions was on the data that
9442 is being stored. Assume that this implies that the address of the
9443 store is not dependent. */
9444 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9445 return cost;
9446
9447 return cost + 3; /* An approximation. */
9448 }
9449
9450 /* A shift instruction cannot receive its data from an instruction
9451 in the same cycle; add a one cycle penalty. */
9452 if (insn_type == TYPE_SHIFT)
9453 return cost + 3; /* Split before cascade into shift. */
9454 }
9455 else
9456 {
9457 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9458 INSN writes some cycles later. */
9459
9460 /* These are only significant for the fpu unit; writing a fp reg before
9461 the fpu has finished with it stalls the processor. */
9462
9463 /* Reusing an integer register causes no problems. */
9464 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9465 return 0;
9466 }
9467
9468 return cost;
9469 }
9470
9471 static int
9472 hypersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9473 {
9474 enum attr_type insn_type, dep_type;
9475 rtx pat = PATTERN(insn);
9476 rtx dep_pat = PATTERN (dep_insn);
9477
9478 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9479 return cost;
9480
9481 insn_type = get_attr_type (insn);
9482 dep_type = get_attr_type (dep_insn);
9483
9484 switch (REG_NOTE_KIND (link))
9485 {
9486 case 0:
9487 /* Data dependency; DEP_INSN writes a register that INSN reads some
9488 cycles later. */
9489
9490 switch (insn_type)
9491 {
9492 case TYPE_STORE:
9493 case TYPE_FPSTORE:
9494 /* Get the delay iff the address of the store is the dependence. */
9495 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9496 return cost;
9497
9498 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9499 return cost;
9500 return cost + 3;
9501
9502 case TYPE_LOAD:
9503 case TYPE_SLOAD:
9504 case TYPE_FPLOAD:
9505 /* If a load, then the dependence must be on the memory address. If
9506 the addresses aren't equal, then it might be a false dependency */
9507 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9508 {
9509 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9510 || GET_CODE (SET_DEST (dep_pat)) != MEM
9511 || GET_CODE (SET_SRC (pat)) != MEM
9512 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9513 XEXP (SET_SRC (pat), 0)))
9514 return cost + 2;
9515
9516 return cost + 8;
9517 }
9518 break;
9519
9520 case TYPE_BRANCH:
9521 /* Compare to branch latency is 0. There is no benefit from
9522 separating compare and branch. */
9523 if (dep_type == TYPE_COMPARE)
9524 return 0;
9525 /* Floating point compare to branch latency is less than
9526 compare to conditional move. */
9527 if (dep_type == TYPE_FPCMP)
9528 return cost - 1;
9529 break;
9530 default:
9531 break;
9532 }
9533 break;
9534
9535 case REG_DEP_ANTI:
9536 /* Anti-dependencies only penalize the fpu unit. */
9537 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9538 return 0;
9539 break;
9540
9541 default:
9542 break;
9543 }
9544
9545 return cost;
9546 }
9547
9548 static int
9549 sparc_adjust_cost(rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
9550 {
9551 switch (sparc_cpu)
9552 {
9553 case PROCESSOR_SUPERSPARC:
9554 cost = supersparc_adjust_cost (insn, link, dep, cost);
9555 break;
9556 case PROCESSOR_HYPERSPARC:
9557 case PROCESSOR_SPARCLITE86X:
9558 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9559 break;
9560 default:
9561 break;
9562 }
9563 return cost;
9564 }
9565
9566 static void
9567 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9568 int sched_verbose ATTRIBUTE_UNUSED,
9569 int max_ready ATTRIBUTE_UNUSED)
9570 {}
9571
9572 static int
9573 sparc_use_sched_lookahead (void)
9574 {
9575 if (sparc_cpu == PROCESSOR_NIAGARA
9576 || sparc_cpu == PROCESSOR_NIAGARA2
9577 || sparc_cpu == PROCESSOR_NIAGARA3)
9578 return 0;
9579 if (sparc_cpu == PROCESSOR_NIAGARA4)
9580 return 2;
9581 if (sparc_cpu == PROCESSOR_ULTRASPARC
9582 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9583 return 4;
9584 if ((1 << sparc_cpu) &
9585 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9586 (1 << PROCESSOR_SPARCLITE86X)))
9587 return 3;
9588 return 0;
9589 }
9590
9591 static int
9592 sparc_issue_rate (void)
9593 {
9594 switch (sparc_cpu)
9595 {
9596 case PROCESSOR_NIAGARA:
9597 case PROCESSOR_NIAGARA2:
9598 case PROCESSOR_NIAGARA3:
9599 default:
9600 return 1;
9601 case PROCESSOR_NIAGARA4:
9602 case PROCESSOR_V9:
9603 /* Assume V9 processors are capable of at least dual-issue. */
9604 return 2;
9605 case PROCESSOR_SUPERSPARC:
9606 return 3;
9607 case PROCESSOR_HYPERSPARC:
9608 case PROCESSOR_SPARCLITE86X:
9609 return 2;
9610 case PROCESSOR_ULTRASPARC:
9611 case PROCESSOR_ULTRASPARC3:
9612 return 4;
9613 }
9614 }
9615
9616 static int
9617 set_extends (rtx_insn *insn)
9618 {
9619 register rtx pat = PATTERN (insn);
9620
9621 switch (GET_CODE (SET_SRC (pat)))
9622 {
9623 /* Load and some shift instructions zero extend. */
9624 case MEM:
9625 case ZERO_EXTEND:
9626 /* sethi clears the high bits */
9627 case HIGH:
9628 /* LO_SUM is used with sethi. sethi cleared the high
9629 bits and the values used with lo_sum are positive */
9630 case LO_SUM:
9631 /* Store flag stores 0 or 1 */
9632 case LT: case LTU:
9633 case GT: case GTU:
9634 case LE: case LEU:
9635 case GE: case GEU:
9636 case EQ:
9637 case NE:
9638 return 1;
9639 case AND:
9640 {
9641 rtx op0 = XEXP (SET_SRC (pat), 0);
9642 rtx op1 = XEXP (SET_SRC (pat), 1);
9643 if (GET_CODE (op1) == CONST_INT)
9644 return INTVAL (op1) >= 0;
9645 if (GET_CODE (op0) != REG)
9646 return 0;
9647 if (sparc_check_64 (op0, insn) == 1)
9648 return 1;
9649 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9650 }
9651 case IOR:
9652 case XOR:
9653 {
9654 rtx op0 = XEXP (SET_SRC (pat), 0);
9655 rtx op1 = XEXP (SET_SRC (pat), 1);
9656 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9657 return 0;
9658 if (GET_CODE (op1) == CONST_INT)
9659 return INTVAL (op1) >= 0;
9660 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9661 }
9662 case LSHIFTRT:
9663 return GET_MODE (SET_SRC (pat)) == SImode;
9664 /* Positive integers leave the high bits zero. */
9665 case CONST_DOUBLE:
9666 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9667 case CONST_INT:
9668 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9669 case ASHIFTRT:
9670 case SIGN_EXTEND:
9671 return - (GET_MODE (SET_SRC (pat)) == SImode);
9672 case REG:
9673 return sparc_check_64 (SET_SRC (pat), insn);
9674 default:
9675 return 0;
9676 }
9677 }
9678
9679 /* We _ought_ to have only one kind per function, but... */
9680 static GTY(()) rtx sparc_addr_diff_list;
9681 static GTY(()) rtx sparc_addr_list;
9682
9683 void
9684 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9685 {
9686 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9687 if (diff)
9688 sparc_addr_diff_list
9689 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9690 else
9691 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9692 }
9693
9694 static void
9695 sparc_output_addr_vec (rtx vec)
9696 {
9697 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9698 int idx, vlen = XVECLEN (body, 0);
9699
9700 #ifdef ASM_OUTPUT_ADDR_VEC_START
9701 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9702 #endif
9703
9704 #ifdef ASM_OUTPUT_CASE_LABEL
9705 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9706 NEXT_INSN (lab));
9707 #else
9708 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9709 #endif
9710
9711 for (idx = 0; idx < vlen; idx++)
9712 {
9713 ASM_OUTPUT_ADDR_VEC_ELT
9714 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9715 }
9716
9717 #ifdef ASM_OUTPUT_ADDR_VEC_END
9718 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9719 #endif
9720 }
9721
9722 static void
9723 sparc_output_addr_diff_vec (rtx vec)
9724 {
9725 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9726 rtx base = XEXP (XEXP (body, 0), 0);
9727 int idx, vlen = XVECLEN (body, 1);
9728
9729 #ifdef ASM_OUTPUT_ADDR_VEC_START
9730 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9731 #endif
9732
9733 #ifdef ASM_OUTPUT_CASE_LABEL
9734 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9735 NEXT_INSN (lab));
9736 #else
9737 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9738 #endif
9739
9740 for (idx = 0; idx < vlen; idx++)
9741 {
9742 ASM_OUTPUT_ADDR_DIFF_ELT
9743 (asm_out_file,
9744 body,
9745 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9746 CODE_LABEL_NUMBER (base));
9747 }
9748
9749 #ifdef ASM_OUTPUT_ADDR_VEC_END
9750 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9751 #endif
9752 }
9753
9754 static void
9755 sparc_output_deferred_case_vectors (void)
9756 {
9757 rtx t;
9758 int align;
9759
9760 if (sparc_addr_list == NULL_RTX
9761 && sparc_addr_diff_list == NULL_RTX)
9762 return;
9763
9764 /* Align to cache line in the function's code section. */
9765 switch_to_section (current_function_section ());
9766
9767 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9768 if (align > 0)
9769 ASM_OUTPUT_ALIGN (asm_out_file, align);
9770
9771 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9772 sparc_output_addr_vec (XEXP (t, 0));
9773 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9774 sparc_output_addr_diff_vec (XEXP (t, 0));
9775
9776 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9777 }
9778
9779 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9780 unknown. Return 1 if the high bits are zero, -1 if the register is
9781 sign extended. */
9782 int
9783 sparc_check_64 (rtx x, rtx_insn *insn)
9784 {
9785 /* If a register is set only once it is safe to ignore insns this
9786 code does not know how to handle. The loop will either recognize
9787 the single set and return the correct value or fail to recognize
9788 it and return 0. */
9789 int set_once = 0;
9790 rtx y = x;
9791
9792 gcc_assert (GET_CODE (x) == REG);
9793
9794 if (GET_MODE (x) == DImode)
9795 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9796
9797 if (flag_expensive_optimizations
9798 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9799 set_once = 1;
9800
9801 if (insn == 0)
9802 {
9803 if (set_once)
9804 insn = get_last_insn_anywhere ();
9805 else
9806 return 0;
9807 }
9808
9809 while ((insn = PREV_INSN (insn)))
9810 {
9811 switch (GET_CODE (insn))
9812 {
9813 case JUMP_INSN:
9814 case NOTE:
9815 break;
9816 case CODE_LABEL:
9817 case CALL_INSN:
9818 default:
9819 if (! set_once)
9820 return 0;
9821 break;
9822 case INSN:
9823 {
9824 rtx pat = PATTERN (insn);
9825 if (GET_CODE (pat) != SET)
9826 return 0;
9827 if (rtx_equal_p (x, SET_DEST (pat)))
9828 return set_extends (insn);
9829 if (y && rtx_equal_p (y, SET_DEST (pat)))
9830 return set_extends (insn);
9831 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9832 return 0;
9833 }
9834 }
9835 }
9836 return 0;
9837 }
9838
9839 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9840 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9841
9842 const char *
9843 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9844 {
9845 static char asm_code[60];
9846
9847 /* The scratch register is only required when the destination
9848 register is not a 64-bit global or out register. */
9849 if (which_alternative != 2)
9850 operands[3] = operands[0];
9851
9852 /* We can only shift by constants <= 63. */
9853 if (GET_CODE (operands[2]) == CONST_INT)
9854 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9855
9856 if (GET_CODE (operands[1]) == CONST_INT)
9857 {
9858 output_asm_insn ("mov\t%1, %3", operands);
9859 }
9860 else
9861 {
9862 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9863 if (sparc_check_64 (operands[1], insn) <= 0)
9864 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9865 output_asm_insn ("or\t%L1, %3, %3", operands);
9866 }
9867
9868 strcpy (asm_code, opcode);
9869
9870 if (which_alternative != 2)
9871 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9872 else
9873 return
9874 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9875 }
9876 \f
9877 /* Output rtl to increment the profiler label LABELNO
9878 for profiling a function entry. */
9879
9880 void
9881 sparc_profile_hook (int labelno)
9882 {
9883 char buf[32];
9884 rtx lab, fun;
9885
9886 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9887 if (NO_PROFILE_COUNTERS)
9888 {
9889 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9890 }
9891 else
9892 {
9893 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9894 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9895 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9896 }
9897 }
9898 \f
9899 #ifdef TARGET_SOLARIS
9900 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9901
9902 static void
9903 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9904 tree decl ATTRIBUTE_UNUSED)
9905 {
9906 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9907 {
9908 solaris_elf_asm_comdat_section (name, flags, decl);
9909 return;
9910 }
9911
9912 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9913
9914 if (!(flags & SECTION_DEBUG))
9915 fputs (",#alloc", asm_out_file);
9916 if (flags & SECTION_WRITE)
9917 fputs (",#write", asm_out_file);
9918 if (flags & SECTION_TLS)
9919 fputs (",#tls", asm_out_file);
9920 if (flags & SECTION_CODE)
9921 fputs (",#execinstr", asm_out_file);
9922
9923 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9924 if (HAVE_AS_SPARC_NOBITS)
9925 {
9926 if (flags & SECTION_BSS)
9927 fputs (",#nobits", asm_out_file);
9928 else
9929 fputs (",#progbits", asm_out_file);
9930 }
9931
9932 fputc ('\n', asm_out_file);
9933 }
9934 #endif /* TARGET_SOLARIS */
9935
9936 /* We do not allow indirect calls to be optimized into sibling calls.
9937
9938 We cannot use sibling calls when delayed branches are disabled
9939 because they will likely require the call delay slot to be filled.
9940
9941 Also, on SPARC 32-bit we cannot emit a sibling call when the
9942 current function returns a structure. This is because the "unimp
9943 after call" convention would cause the callee to return to the
9944 wrong place. The generic code already disallows cases where the
9945 function being called returns a structure.
9946
9947 It may seem strange how this last case could occur. Usually there
9948 is code after the call which jumps to epilogue code which dumps the
9949 return value into the struct return area. That ought to invalidate
9950 the sibling call right? Well, in the C++ case we can end up passing
9951 the pointer to the struct return area to a constructor (which returns
9952 void) and then nothing else happens. Such a sibling call would look
9953 valid without the added check here.
9954
9955 VxWorks PIC PLT entries require the global pointer to be initialized
9956 on entry. We therefore can't emit sibling calls to them. */
9957 static bool
9958 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9959 {
9960 return (decl
9961 && flag_delayed_branch
9962 && (TARGET_ARCH64 || ! cfun->returns_struct)
9963 && !(TARGET_VXWORKS_RTP
9964 && flag_pic
9965 && !targetm.binds_local_p (decl)));
9966 }
9967 \f
9968 /* libfunc renaming. */
9969
9970 static void
9971 sparc_init_libfuncs (void)
9972 {
9973 if (TARGET_ARCH32)
9974 {
9975 /* Use the subroutines that Sun's library provides for integer
9976 multiply and divide. The `*' prevents an underscore from
9977 being prepended by the compiler. .umul is a little faster
9978 than .mul. */
9979 set_optab_libfunc (smul_optab, SImode, "*.umul");
9980 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9981 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9982 set_optab_libfunc (smod_optab, SImode, "*.rem");
9983 set_optab_libfunc (umod_optab, SImode, "*.urem");
9984
9985 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9986 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9987 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9988 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9989 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9990 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9991
9992 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9993 is because with soft-float, the SFmode and DFmode sqrt
9994 instructions will be absent, and the compiler will notice and
9995 try to use the TFmode sqrt instruction for calls to the
9996 builtin function sqrt, but this fails. */
9997 if (TARGET_FPU)
9998 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9999
10000 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10001 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10002 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10003 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10004 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10005 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10006
10007 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10008 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10009 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10010 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10011
10012 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10013 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10014 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10015 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10016
10017 if (DITF_CONVERSION_LIBFUNCS)
10018 {
10019 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10020 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10021 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10022 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10023 }
10024
10025 if (SUN_CONVERSION_LIBFUNCS)
10026 {
10027 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10028 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10029 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10030 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10031 }
10032 }
10033 if (TARGET_ARCH64)
10034 {
10035 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10036 do not exist in the library. Make sure the compiler does not
10037 emit calls to them by accident. (It should always use the
10038 hardware instructions.) */
10039 set_optab_libfunc (smul_optab, SImode, 0);
10040 set_optab_libfunc (sdiv_optab, SImode, 0);
10041 set_optab_libfunc (udiv_optab, SImode, 0);
10042 set_optab_libfunc (smod_optab, SImode, 0);
10043 set_optab_libfunc (umod_optab, SImode, 0);
10044
10045 if (SUN_INTEGER_MULTIPLY_64)
10046 {
10047 set_optab_libfunc (smul_optab, DImode, "__mul64");
10048 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10049 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10050 set_optab_libfunc (smod_optab, DImode, "__rem64");
10051 set_optab_libfunc (umod_optab, DImode, "__urem64");
10052 }
10053
10054 if (SUN_CONVERSION_LIBFUNCS)
10055 {
10056 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10057 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10058 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10059 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10060 }
10061 }
10062 }
10063 \f
10064 /* SPARC builtins. */
10065 enum sparc_builtins
10066 {
10067 /* FPU builtins. */
10068 SPARC_BUILTIN_LDFSR,
10069 SPARC_BUILTIN_STFSR,
10070
10071 /* VIS 1.0 builtins. */
10072 SPARC_BUILTIN_FPACK16,
10073 SPARC_BUILTIN_FPACK32,
10074 SPARC_BUILTIN_FPACKFIX,
10075 SPARC_BUILTIN_FEXPAND,
10076 SPARC_BUILTIN_FPMERGE,
10077 SPARC_BUILTIN_FMUL8X16,
10078 SPARC_BUILTIN_FMUL8X16AU,
10079 SPARC_BUILTIN_FMUL8X16AL,
10080 SPARC_BUILTIN_FMUL8SUX16,
10081 SPARC_BUILTIN_FMUL8ULX16,
10082 SPARC_BUILTIN_FMULD8SUX16,
10083 SPARC_BUILTIN_FMULD8ULX16,
10084 SPARC_BUILTIN_FALIGNDATAV4HI,
10085 SPARC_BUILTIN_FALIGNDATAV8QI,
10086 SPARC_BUILTIN_FALIGNDATAV2SI,
10087 SPARC_BUILTIN_FALIGNDATADI,
10088 SPARC_BUILTIN_WRGSR,
10089 SPARC_BUILTIN_RDGSR,
10090 SPARC_BUILTIN_ALIGNADDR,
10091 SPARC_BUILTIN_ALIGNADDRL,
10092 SPARC_BUILTIN_PDIST,
10093 SPARC_BUILTIN_EDGE8,
10094 SPARC_BUILTIN_EDGE8L,
10095 SPARC_BUILTIN_EDGE16,
10096 SPARC_BUILTIN_EDGE16L,
10097 SPARC_BUILTIN_EDGE32,
10098 SPARC_BUILTIN_EDGE32L,
10099 SPARC_BUILTIN_FCMPLE16,
10100 SPARC_BUILTIN_FCMPLE32,
10101 SPARC_BUILTIN_FCMPNE16,
10102 SPARC_BUILTIN_FCMPNE32,
10103 SPARC_BUILTIN_FCMPGT16,
10104 SPARC_BUILTIN_FCMPGT32,
10105 SPARC_BUILTIN_FCMPEQ16,
10106 SPARC_BUILTIN_FCMPEQ32,
10107 SPARC_BUILTIN_FPADD16,
10108 SPARC_BUILTIN_FPADD16S,
10109 SPARC_BUILTIN_FPADD32,
10110 SPARC_BUILTIN_FPADD32S,
10111 SPARC_BUILTIN_FPSUB16,
10112 SPARC_BUILTIN_FPSUB16S,
10113 SPARC_BUILTIN_FPSUB32,
10114 SPARC_BUILTIN_FPSUB32S,
10115 SPARC_BUILTIN_ARRAY8,
10116 SPARC_BUILTIN_ARRAY16,
10117 SPARC_BUILTIN_ARRAY32,
10118
10119 /* VIS 2.0 builtins. */
10120 SPARC_BUILTIN_EDGE8N,
10121 SPARC_BUILTIN_EDGE8LN,
10122 SPARC_BUILTIN_EDGE16N,
10123 SPARC_BUILTIN_EDGE16LN,
10124 SPARC_BUILTIN_EDGE32N,
10125 SPARC_BUILTIN_EDGE32LN,
10126 SPARC_BUILTIN_BMASK,
10127 SPARC_BUILTIN_BSHUFFLEV4HI,
10128 SPARC_BUILTIN_BSHUFFLEV8QI,
10129 SPARC_BUILTIN_BSHUFFLEV2SI,
10130 SPARC_BUILTIN_BSHUFFLEDI,
10131
10132 /* VIS 3.0 builtins. */
10133 SPARC_BUILTIN_CMASK8,
10134 SPARC_BUILTIN_CMASK16,
10135 SPARC_BUILTIN_CMASK32,
10136 SPARC_BUILTIN_FCHKSM16,
10137 SPARC_BUILTIN_FSLL16,
10138 SPARC_BUILTIN_FSLAS16,
10139 SPARC_BUILTIN_FSRL16,
10140 SPARC_BUILTIN_FSRA16,
10141 SPARC_BUILTIN_FSLL32,
10142 SPARC_BUILTIN_FSLAS32,
10143 SPARC_BUILTIN_FSRL32,
10144 SPARC_BUILTIN_FSRA32,
10145 SPARC_BUILTIN_PDISTN,
10146 SPARC_BUILTIN_FMEAN16,
10147 SPARC_BUILTIN_FPADD64,
10148 SPARC_BUILTIN_FPSUB64,
10149 SPARC_BUILTIN_FPADDS16,
10150 SPARC_BUILTIN_FPADDS16S,
10151 SPARC_BUILTIN_FPSUBS16,
10152 SPARC_BUILTIN_FPSUBS16S,
10153 SPARC_BUILTIN_FPADDS32,
10154 SPARC_BUILTIN_FPADDS32S,
10155 SPARC_BUILTIN_FPSUBS32,
10156 SPARC_BUILTIN_FPSUBS32S,
10157 SPARC_BUILTIN_FUCMPLE8,
10158 SPARC_BUILTIN_FUCMPNE8,
10159 SPARC_BUILTIN_FUCMPGT8,
10160 SPARC_BUILTIN_FUCMPEQ8,
10161 SPARC_BUILTIN_FHADDS,
10162 SPARC_BUILTIN_FHADDD,
10163 SPARC_BUILTIN_FHSUBS,
10164 SPARC_BUILTIN_FHSUBD,
10165 SPARC_BUILTIN_FNHADDS,
10166 SPARC_BUILTIN_FNHADDD,
10167 SPARC_BUILTIN_UMULXHI,
10168 SPARC_BUILTIN_XMULX,
10169 SPARC_BUILTIN_XMULXHI,
10170
10171 SPARC_BUILTIN_MAX
10172 };
10173
10174 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10175 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10176
10177 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10178 function decl or NULL_TREE if the builtin was not added. */
10179
10180 static tree
10181 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10182 tree type)
10183 {
10184 tree t
10185 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10186
10187 if (t)
10188 {
10189 sparc_builtins[code] = t;
10190 sparc_builtins_icode[code] = icode;
10191 }
10192
10193 return t;
10194 }
10195
10196 /* Likewise, but also marks the function as "const". */
10197
10198 static tree
10199 def_builtin_const (const char *name, enum insn_code icode,
10200 enum sparc_builtins code, tree type)
10201 {
10202 tree t = def_builtin (name, icode, code, type);
10203
10204 if (t)
10205 TREE_READONLY (t) = 1;
10206
10207 return t;
10208 }
10209
10210 /* Implement the TARGET_INIT_BUILTINS target hook.
10211 Create builtin functions for special SPARC instructions. */
10212
10213 static void
10214 sparc_init_builtins (void)
10215 {
10216 if (TARGET_FPU)
10217 sparc_fpu_init_builtins ();
10218
10219 if (TARGET_VIS)
10220 sparc_vis_init_builtins ();
10221 }
10222
10223 /* Create builtin functions for FPU instructions. */
10224
10225 static void
10226 sparc_fpu_init_builtins (void)
10227 {
10228 tree ftype
10229 = build_function_type_list (void_type_node,
10230 build_pointer_type (unsigned_type_node), 0);
10231 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10232 SPARC_BUILTIN_LDFSR, ftype);
10233 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10234 SPARC_BUILTIN_STFSR, ftype);
10235 }
10236
10237 /* Create builtin functions for VIS instructions. */
10238
10239 static void
10240 sparc_vis_init_builtins (void)
10241 {
10242 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10243 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10244 tree v4hi = build_vector_type (intHI_type_node, 4);
10245 tree v2hi = build_vector_type (intHI_type_node, 2);
10246 tree v2si = build_vector_type (intSI_type_node, 2);
10247 tree v1si = build_vector_type (intSI_type_node, 1);
10248
10249 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10250 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10251 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10252 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10253 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10254 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10255 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10256 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10257 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10258 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10259 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10260 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10261 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10262 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10263 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10264 v8qi, v8qi,
10265 intDI_type_node, 0);
10266 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10267 v8qi, v8qi, 0);
10268 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10269 v8qi, v8qi, 0);
10270 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10271 intDI_type_node,
10272 intDI_type_node, 0);
10273 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10274 intSI_type_node,
10275 intSI_type_node, 0);
10276 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10277 ptr_type_node,
10278 intSI_type_node, 0);
10279 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10280 ptr_type_node,
10281 intDI_type_node, 0);
10282 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10283 ptr_type_node,
10284 ptr_type_node, 0);
10285 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10286 ptr_type_node,
10287 ptr_type_node, 0);
10288 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10289 v4hi, v4hi, 0);
10290 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10291 v2si, v2si, 0);
10292 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10293 v4hi, v4hi, 0);
10294 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10295 v2si, v2si, 0);
10296 tree void_ftype_di = build_function_type_list (void_type_node,
10297 intDI_type_node, 0);
10298 tree di_ftype_void = build_function_type_list (intDI_type_node,
10299 void_type_node, 0);
10300 tree void_ftype_si = build_function_type_list (void_type_node,
10301 intSI_type_node, 0);
10302 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10303 float_type_node,
10304 float_type_node, 0);
10305 tree df_ftype_df_df = build_function_type_list (double_type_node,
10306 double_type_node,
10307 double_type_node, 0);
10308
10309 /* Packing and expanding vectors. */
10310 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10311 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10312 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10313 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10314 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10315 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10316 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10317 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10318 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10319 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10320
10321 /* Multiplications. */
10322 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10323 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10324 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10325 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10326 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10327 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10328 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10329 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10330 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10331 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10332 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10333 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10334 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10335 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10336
10337 /* Data aligning. */
10338 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10339 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10340 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10341 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10342 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10343 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10344 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10345 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10346
10347 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10348 SPARC_BUILTIN_WRGSR, void_ftype_di);
10349 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10350 SPARC_BUILTIN_RDGSR, di_ftype_void);
10351
10352 if (TARGET_ARCH64)
10353 {
10354 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10355 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10356 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10357 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10358 }
10359 else
10360 {
10361 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10362 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10363 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10364 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10365 }
10366
10367 /* Pixel distance. */
10368 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10369 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10370
10371 /* Edge handling. */
10372 if (TARGET_ARCH64)
10373 {
10374 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10375 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10376 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10377 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10378 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10379 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10380 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10381 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10382 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10383 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10384 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10385 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10386 }
10387 else
10388 {
10389 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10390 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10391 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10392 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10393 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10394 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10395 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10396 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10397 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10398 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10399 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10400 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10401 }
10402
10403 /* Pixel compare. */
10404 if (TARGET_ARCH64)
10405 {
10406 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10407 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10408 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10409 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10410 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10411 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10412 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10413 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10414 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10415 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10416 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10417 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10418 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10419 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10420 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10421 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10422 }
10423 else
10424 {
10425 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10426 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10427 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10428 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10429 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10430 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10431 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10432 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10433 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10434 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10435 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10436 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10437 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10438 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10439 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10440 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10441 }
10442
10443 /* Addition and subtraction. */
10444 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10445 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10446 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10447 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10448 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10449 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10450 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10451 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10452 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10453 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10454 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10455 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10456 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10457 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10458 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10459 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10460
10461 /* Three-dimensional array addressing. */
10462 if (TARGET_ARCH64)
10463 {
10464 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10465 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10466 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10467 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10468 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10469 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10470 }
10471 else
10472 {
10473 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10474 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10475 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10476 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10477 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10478 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10479 }
10480
10481 if (TARGET_VIS2)
10482 {
10483 /* Edge handling. */
10484 if (TARGET_ARCH64)
10485 {
10486 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10487 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10488 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10489 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10490 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10491 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10492 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10493 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10494 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10495 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10496 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10497 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10498 }
10499 else
10500 {
10501 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10502 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10503 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10504 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10505 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10506 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10507 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10508 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10509 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10510 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10511 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10512 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10513 }
10514
10515 /* Byte mask and shuffle. */
10516 if (TARGET_ARCH64)
10517 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10518 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10519 else
10520 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10521 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10522 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10523 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10524 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10525 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10526 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10527 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10528 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10529 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10530 }
10531
10532 if (TARGET_VIS3)
10533 {
10534 if (TARGET_ARCH64)
10535 {
10536 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10537 SPARC_BUILTIN_CMASK8, void_ftype_di);
10538 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10539 SPARC_BUILTIN_CMASK16, void_ftype_di);
10540 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10541 SPARC_BUILTIN_CMASK32, void_ftype_di);
10542 }
10543 else
10544 {
10545 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10546 SPARC_BUILTIN_CMASK8, void_ftype_si);
10547 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10548 SPARC_BUILTIN_CMASK16, void_ftype_si);
10549 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10550 SPARC_BUILTIN_CMASK32, void_ftype_si);
10551 }
10552
10553 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10554 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10555
10556 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10557 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10558 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10559 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10560 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10561 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10562 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10563 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10564 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10565 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10566 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10567 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10568 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10569 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10570 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10571 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10572
10573 if (TARGET_ARCH64)
10574 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10575 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10576 else
10577 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10578 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10579
10580 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10581 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10582 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10583 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10584 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10585 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10586
10587 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10588 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10589 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10590 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10591 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10592 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10593 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10594 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10595 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10596 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10597 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10598 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10599 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10600 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10601 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10602 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10603
10604 if (TARGET_ARCH64)
10605 {
10606 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10607 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10608 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10609 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10610 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10611 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10612 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10613 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10614 }
10615 else
10616 {
10617 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10618 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10619 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10620 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10621 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10622 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10623 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10624 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10625 }
10626
10627 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10628 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10629 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10630 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10631 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10632 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10633 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10634 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10635 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10636 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10637 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10638 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10639
10640 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10641 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10642 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10643 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10644 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10645 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10646 }
10647 }
10648
10649 /* Implement TARGET_BUILTIN_DECL hook. */
10650
10651 static tree
10652 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10653 {
10654 if (code >= SPARC_BUILTIN_MAX)
10655 return error_mark_node;
10656
10657 return sparc_builtins[code];
10658 }
10659
10660 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10661
10662 static rtx
10663 sparc_expand_builtin (tree exp, rtx target,
10664 rtx subtarget ATTRIBUTE_UNUSED,
10665 enum machine_mode tmode ATTRIBUTE_UNUSED,
10666 int ignore ATTRIBUTE_UNUSED)
10667 {
10668 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10669 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10670 enum insn_code icode = sparc_builtins_icode[code];
10671 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10672 call_expr_arg_iterator iter;
10673 int arg_count = 0;
10674 rtx pat, op[4];
10675 tree arg;
10676
10677 if (nonvoid)
10678 {
10679 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10680 if (!target
10681 || GET_MODE (target) != tmode
10682 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10683 op[0] = gen_reg_rtx (tmode);
10684 else
10685 op[0] = target;
10686 }
10687
10688 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10689 {
10690 const struct insn_operand_data *insn_op;
10691 int idx;
10692
10693 if (arg == error_mark_node)
10694 return NULL_RTX;
10695
10696 arg_count++;
10697 idx = arg_count - !nonvoid;
10698 insn_op = &insn_data[icode].operand[idx];
10699 op[arg_count] = expand_normal (arg);
10700
10701 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10702 {
10703 if (!address_operand (op[arg_count], SImode))
10704 {
10705 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10706 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10707 }
10708 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10709 }
10710
10711 else if (insn_op->mode == V1DImode
10712 && GET_MODE (op[arg_count]) == DImode)
10713 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10714
10715 else if (insn_op->mode == V1SImode
10716 && GET_MODE (op[arg_count]) == SImode)
10717 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10718
10719 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10720 insn_op->mode))
10721 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10722 }
10723
10724 switch (arg_count)
10725 {
10726 case 0:
10727 pat = GEN_FCN (icode) (op[0]);
10728 break;
10729 case 1:
10730 if (nonvoid)
10731 pat = GEN_FCN (icode) (op[0], op[1]);
10732 else
10733 pat = GEN_FCN (icode) (op[1]);
10734 break;
10735 case 2:
10736 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10737 break;
10738 case 3:
10739 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10740 break;
10741 default:
10742 gcc_unreachable ();
10743 }
10744
10745 if (!pat)
10746 return NULL_RTX;
10747
10748 emit_insn (pat);
10749
10750 return (nonvoid ? op[0] : const0_rtx);
10751 }
10752
10753 /* Return the upper 16 bits of the 8x16 multiplication. */
10754
10755 static int
10756 sparc_vis_mul8x16 (int e8, int e16)
10757 {
10758 return (e8 * e16 + 128) / 256;
10759 }
10760
10761 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10762 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10763
10764 static void
10765 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10766 tree inner_type, tree cst0, tree cst1)
10767 {
10768 unsigned i, num = VECTOR_CST_NELTS (cst0);
10769 int scale;
10770
10771 switch (fncode)
10772 {
10773 case SPARC_BUILTIN_FMUL8X16:
10774 for (i = 0; i < num; ++i)
10775 {
10776 int val
10777 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10778 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10779 n_elts[i] = build_int_cst (inner_type, val);
10780 }
10781 break;
10782
10783 case SPARC_BUILTIN_FMUL8X16AU:
10784 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10785
10786 for (i = 0; i < num; ++i)
10787 {
10788 int val
10789 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10790 scale);
10791 n_elts[i] = build_int_cst (inner_type, val);
10792 }
10793 break;
10794
10795 case SPARC_BUILTIN_FMUL8X16AL:
10796 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10797
10798 for (i = 0; i < num; ++i)
10799 {
10800 int val
10801 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10802 scale);
10803 n_elts[i] = build_int_cst (inner_type, val);
10804 }
10805 break;
10806
10807 default:
10808 gcc_unreachable ();
10809 }
10810 }
10811
10812 /* Implement TARGET_FOLD_BUILTIN hook.
10813
10814 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10815 result of the function call is ignored. NULL_TREE is returned if the
10816 function could not be folded. */
10817
10818 static tree
10819 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10820 tree *args, bool ignore)
10821 {
10822 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10823 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10824 tree arg0, arg1, arg2;
10825
10826 if (ignore)
10827 switch (code)
10828 {
10829 case SPARC_BUILTIN_LDFSR:
10830 case SPARC_BUILTIN_STFSR:
10831 case SPARC_BUILTIN_ALIGNADDR:
10832 case SPARC_BUILTIN_WRGSR:
10833 case SPARC_BUILTIN_BMASK:
10834 case SPARC_BUILTIN_CMASK8:
10835 case SPARC_BUILTIN_CMASK16:
10836 case SPARC_BUILTIN_CMASK32:
10837 break;
10838
10839 default:
10840 return build_zero_cst (rtype);
10841 }
10842
10843 switch (code)
10844 {
10845 case SPARC_BUILTIN_FEXPAND:
10846 arg0 = args[0];
10847 STRIP_NOPS (arg0);
10848
10849 if (TREE_CODE (arg0) == VECTOR_CST)
10850 {
10851 tree inner_type = TREE_TYPE (rtype);
10852 tree *n_elts;
10853 unsigned i;
10854
10855 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10856 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10857 n_elts[i] = build_int_cst (inner_type,
10858 TREE_INT_CST_LOW
10859 (VECTOR_CST_ELT (arg0, i)) << 4);
10860 return build_vector (rtype, n_elts);
10861 }
10862 break;
10863
10864 case SPARC_BUILTIN_FMUL8X16:
10865 case SPARC_BUILTIN_FMUL8X16AU:
10866 case SPARC_BUILTIN_FMUL8X16AL:
10867 arg0 = args[0];
10868 arg1 = args[1];
10869 STRIP_NOPS (arg0);
10870 STRIP_NOPS (arg1);
10871
10872 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10873 {
10874 tree inner_type = TREE_TYPE (rtype);
10875 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10876 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10877 return build_vector (rtype, n_elts);
10878 }
10879 break;
10880
10881 case SPARC_BUILTIN_FPMERGE:
10882 arg0 = args[0];
10883 arg1 = args[1];
10884 STRIP_NOPS (arg0);
10885 STRIP_NOPS (arg1);
10886
10887 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10888 {
10889 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10890 unsigned i;
10891 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10892 {
10893 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10894 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10895 }
10896
10897 return build_vector (rtype, n_elts);
10898 }
10899 break;
10900
10901 case SPARC_BUILTIN_PDIST:
10902 case SPARC_BUILTIN_PDISTN:
10903 arg0 = args[0];
10904 arg1 = args[1];
10905 STRIP_NOPS (arg0);
10906 STRIP_NOPS (arg1);
10907 if (code == SPARC_BUILTIN_PDIST)
10908 {
10909 arg2 = args[2];
10910 STRIP_NOPS (arg2);
10911 }
10912 else
10913 arg2 = integer_zero_node;
10914
10915 if (TREE_CODE (arg0) == VECTOR_CST
10916 && TREE_CODE (arg1) == VECTOR_CST
10917 && TREE_CODE (arg2) == INTEGER_CST)
10918 {
10919 bool overflow = false;
10920 widest_int result = wi::to_widest (arg2);
10921 widest_int tmp;
10922 unsigned i;
10923
10924 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10925 {
10926 tree e0 = VECTOR_CST_ELT (arg0, i);
10927 tree e1 = VECTOR_CST_ELT (arg1, i);
10928
10929 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10930
10931 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
10932 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
10933 if (wi::neg_p (tmp))
10934 tmp = wi::neg (tmp, &neg2_ovf);
10935 else
10936 neg2_ovf = false;
10937 result = wi::add (result, tmp, SIGNED, &add2_ovf);
10938 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10939 }
10940
10941 gcc_assert (!overflow);
10942
10943 return wide_int_to_tree (rtype, result);
10944 }
10945
10946 default:
10947 break;
10948 }
10949
10950 return NULL_TREE;
10951 }
10952 \f
10953 /* ??? This duplicates information provided to the compiler by the
10954 ??? scheduler description. Some day, teach genautomata to output
10955 ??? the latencies and then CSE will just use that. */
10956
10957 static bool
10958 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10959 int *total, bool speed ATTRIBUTE_UNUSED)
10960 {
10961 enum machine_mode mode = GET_MODE (x);
10962 bool float_mode_p = FLOAT_MODE_P (mode);
10963
10964 switch (code)
10965 {
10966 case CONST_INT:
10967 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10968 {
10969 *total = 0;
10970 return true;
10971 }
10972 /* FALLTHRU */
10973
10974 case HIGH:
10975 *total = 2;
10976 return true;
10977
10978 case CONST:
10979 case LABEL_REF:
10980 case SYMBOL_REF:
10981 *total = 4;
10982 return true;
10983
10984 case CONST_DOUBLE:
10985 if (GET_MODE (x) == VOIDmode
10986 && ((CONST_DOUBLE_HIGH (x) == 0
10987 && CONST_DOUBLE_LOW (x) < 0x1000)
10988 || (CONST_DOUBLE_HIGH (x) == -1
10989 && CONST_DOUBLE_LOW (x) < 0
10990 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10991 *total = 0;
10992 else
10993 *total = 8;
10994 return true;
10995
10996 case MEM:
10997 /* If outer-code was a sign or zero extension, a cost
10998 of COSTS_N_INSNS (1) was already added in. This is
10999 why we are subtracting it back out. */
11000 if (outer_code == ZERO_EXTEND)
11001 {
11002 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11003 }
11004 else if (outer_code == SIGN_EXTEND)
11005 {
11006 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11007 }
11008 else if (float_mode_p)
11009 {
11010 *total = sparc_costs->float_load;
11011 }
11012 else
11013 {
11014 *total = sparc_costs->int_load;
11015 }
11016
11017 return true;
11018
11019 case PLUS:
11020 case MINUS:
11021 if (float_mode_p)
11022 *total = sparc_costs->float_plusminus;
11023 else
11024 *total = COSTS_N_INSNS (1);
11025 return false;
11026
11027 case FMA:
11028 {
11029 rtx sub;
11030
11031 gcc_assert (float_mode_p);
11032 *total = sparc_costs->float_mul;
11033
11034 sub = XEXP (x, 0);
11035 if (GET_CODE (sub) == NEG)
11036 sub = XEXP (sub, 0);
11037 *total += rtx_cost (sub, FMA, 0, speed);
11038
11039 sub = XEXP (x, 2);
11040 if (GET_CODE (sub) == NEG)
11041 sub = XEXP (sub, 0);
11042 *total += rtx_cost (sub, FMA, 2, speed);
11043 return true;
11044 }
11045
11046 case MULT:
11047 if (float_mode_p)
11048 *total = sparc_costs->float_mul;
11049 else if (! TARGET_HARD_MUL)
11050 *total = COSTS_N_INSNS (25);
11051 else
11052 {
11053 int bit_cost;
11054
11055 bit_cost = 0;
11056 if (sparc_costs->int_mul_bit_factor)
11057 {
11058 int nbits;
11059
11060 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11061 {
11062 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11063 for (nbits = 0; value != 0; value &= value - 1)
11064 nbits++;
11065 }
11066 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
11067 && GET_MODE (XEXP (x, 1)) == VOIDmode)
11068 {
11069 rtx x1 = XEXP (x, 1);
11070 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
11071 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
11072
11073 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
11074 nbits++;
11075 for (; value2 != 0; value2 &= value2 - 1)
11076 nbits++;
11077 }
11078 else
11079 nbits = 7;
11080
11081 if (nbits < 3)
11082 nbits = 3;
11083 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11084 bit_cost = COSTS_N_INSNS (bit_cost);
11085 }
11086
11087 if (mode == DImode)
11088 *total = sparc_costs->int_mulX + bit_cost;
11089 else
11090 *total = sparc_costs->int_mul + bit_cost;
11091 }
11092 return false;
11093
11094 case ASHIFT:
11095 case ASHIFTRT:
11096 case LSHIFTRT:
11097 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11098 return false;
11099
11100 case DIV:
11101 case UDIV:
11102 case MOD:
11103 case UMOD:
11104 if (float_mode_p)
11105 {
11106 if (mode == DFmode)
11107 *total = sparc_costs->float_div_df;
11108 else
11109 *total = sparc_costs->float_div_sf;
11110 }
11111 else
11112 {
11113 if (mode == DImode)
11114 *total = sparc_costs->int_divX;
11115 else
11116 *total = sparc_costs->int_div;
11117 }
11118 return false;
11119
11120 case NEG:
11121 if (! float_mode_p)
11122 {
11123 *total = COSTS_N_INSNS (1);
11124 return false;
11125 }
11126 /* FALLTHRU */
11127
11128 case ABS:
11129 case FLOAT:
11130 case UNSIGNED_FLOAT:
11131 case FIX:
11132 case UNSIGNED_FIX:
11133 case FLOAT_EXTEND:
11134 case FLOAT_TRUNCATE:
11135 *total = sparc_costs->float_move;
11136 return false;
11137
11138 case SQRT:
11139 if (mode == DFmode)
11140 *total = sparc_costs->float_sqrt_df;
11141 else
11142 *total = sparc_costs->float_sqrt_sf;
11143 return false;
11144
11145 case COMPARE:
11146 if (float_mode_p)
11147 *total = sparc_costs->float_cmp;
11148 else
11149 *total = COSTS_N_INSNS (1);
11150 return false;
11151
11152 case IF_THEN_ELSE:
11153 if (float_mode_p)
11154 *total = sparc_costs->float_cmove;
11155 else
11156 *total = sparc_costs->int_cmove;
11157 return false;
11158
11159 case IOR:
11160 /* Handle the NAND vector patterns. */
11161 if (sparc_vector_mode_supported_p (GET_MODE (x))
11162 && GET_CODE (XEXP (x, 0)) == NOT
11163 && GET_CODE (XEXP (x, 1)) == NOT)
11164 {
11165 *total = COSTS_N_INSNS (1);
11166 return true;
11167 }
11168 else
11169 return false;
11170
11171 default:
11172 return false;
11173 }
11174 }
11175
11176 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11177
11178 static inline bool
11179 general_or_i64_p (reg_class_t rclass)
11180 {
11181 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11182 }
11183
11184 /* Implement TARGET_REGISTER_MOVE_COST. */
11185
11186 static int
11187 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11188 reg_class_t from, reg_class_t to)
11189 {
11190 bool need_memory = false;
11191
11192 if (from == FPCC_REGS || to == FPCC_REGS)
11193 need_memory = true;
11194 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11195 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11196 {
11197 if (TARGET_VIS3)
11198 {
11199 int size = GET_MODE_SIZE (mode);
11200 if (size == 8 || size == 4)
11201 {
11202 if (! TARGET_ARCH32 || size == 4)
11203 return 4;
11204 else
11205 return 6;
11206 }
11207 }
11208 need_memory = true;
11209 }
11210
11211 if (need_memory)
11212 {
11213 if (sparc_cpu == PROCESSOR_ULTRASPARC
11214 || sparc_cpu == PROCESSOR_ULTRASPARC3
11215 || sparc_cpu == PROCESSOR_NIAGARA
11216 || sparc_cpu == PROCESSOR_NIAGARA2
11217 || sparc_cpu == PROCESSOR_NIAGARA3
11218 || sparc_cpu == PROCESSOR_NIAGARA4)
11219 return 12;
11220
11221 return 6;
11222 }
11223
11224 return 2;
11225 }
11226
11227 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11228 This is achieved by means of a manual dynamic stack space allocation in
11229 the current frame. We make the assumption that SEQ doesn't contain any
11230 function calls, with the possible exception of calls to the GOT helper. */
11231
11232 static void
11233 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11234 {
11235 /* We must preserve the lowest 16 words for the register save area. */
11236 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11237 /* We really need only 2 words of fresh stack space. */
11238 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11239
11240 rtx slot
11241 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11242 SPARC_STACK_BIAS + offset));
11243
11244 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11245 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
11246 if (reg2)
11247 emit_insn (gen_rtx_SET (VOIDmode,
11248 adjust_address (slot, word_mode, UNITS_PER_WORD),
11249 reg2));
11250 emit_insn (seq);
11251 if (reg2)
11252 emit_insn (gen_rtx_SET (VOIDmode,
11253 reg2,
11254 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11255 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
11256 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11257 }
11258
11259 /* Output the assembler code for a thunk function. THUNK_DECL is the
11260 declaration for the thunk function itself, FUNCTION is the decl for
11261 the target function. DELTA is an immediate constant offset to be
11262 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11263 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11264
11265 static void
11266 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11267 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11268 tree function)
11269 {
11270 rtx this_rtx, funexp;
11271 rtx_insn *insn;
11272 unsigned int int_arg_first;
11273
11274 reload_completed = 1;
11275 epilogue_completed = 1;
11276
11277 emit_note (NOTE_INSN_PROLOGUE_END);
11278
11279 if (TARGET_FLAT)
11280 {
11281 sparc_leaf_function_p = 1;
11282
11283 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11284 }
11285 else if (flag_delayed_branch)
11286 {
11287 /* We will emit a regular sibcall below, so we need to instruct
11288 output_sibcall that we are in a leaf function. */
11289 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11290
11291 /* This will cause final.c to invoke leaf_renumber_regs so we
11292 must behave as if we were in a not-yet-leafified function. */
11293 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11294 }
11295 else
11296 {
11297 /* We will emit the sibcall manually below, so we will need to
11298 manually spill non-leaf registers. */
11299 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11300
11301 /* We really are in a leaf function. */
11302 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11303 }
11304
11305 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11306 returns a structure, the structure return pointer is there instead. */
11307 if (TARGET_ARCH64
11308 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11309 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11310 else
11311 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11312
11313 /* Add DELTA. When possible use a plain add, otherwise load it into
11314 a register first. */
11315 if (delta)
11316 {
11317 rtx delta_rtx = GEN_INT (delta);
11318
11319 if (! SPARC_SIMM13_P (delta))
11320 {
11321 rtx scratch = gen_rtx_REG (Pmode, 1);
11322 emit_move_insn (scratch, delta_rtx);
11323 delta_rtx = scratch;
11324 }
11325
11326 /* THIS_RTX += DELTA. */
11327 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11328 }
11329
11330 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11331 if (vcall_offset)
11332 {
11333 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11334 rtx scratch = gen_rtx_REG (Pmode, 1);
11335
11336 gcc_assert (vcall_offset < 0);
11337
11338 /* SCRATCH = *THIS_RTX. */
11339 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11340
11341 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11342 may not have any available scratch register at this point. */
11343 if (SPARC_SIMM13_P (vcall_offset))
11344 ;
11345 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11346 else if (! fixed_regs[5]
11347 /* The below sequence is made up of at least 2 insns,
11348 while the default method may need only one. */
11349 && vcall_offset < -8192)
11350 {
11351 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11352 emit_move_insn (scratch2, vcall_offset_rtx);
11353 vcall_offset_rtx = scratch2;
11354 }
11355 else
11356 {
11357 rtx increment = GEN_INT (-4096);
11358
11359 /* VCALL_OFFSET is a negative number whose typical range can be
11360 estimated as -32768..0 in 32-bit mode. In almost all cases
11361 it is therefore cheaper to emit multiple add insns than
11362 spilling and loading the constant into a register (at least
11363 6 insns). */
11364 while (! SPARC_SIMM13_P (vcall_offset))
11365 {
11366 emit_insn (gen_add2_insn (scratch, increment));
11367 vcall_offset += 4096;
11368 }
11369 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11370 }
11371
11372 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11373 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11374 gen_rtx_PLUS (Pmode,
11375 scratch,
11376 vcall_offset_rtx)));
11377
11378 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11379 emit_insn (gen_add2_insn (this_rtx, scratch));
11380 }
11381
11382 /* Generate a tail call to the target function. */
11383 if (! TREE_USED (function))
11384 {
11385 assemble_external (function);
11386 TREE_USED (function) = 1;
11387 }
11388 funexp = XEXP (DECL_RTL (function), 0);
11389
11390 if (flag_delayed_branch)
11391 {
11392 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11393 insn = emit_call_insn (gen_sibcall (funexp));
11394 SIBLING_CALL_P (insn) = 1;
11395 }
11396 else
11397 {
11398 /* The hoops we have to jump through in order to generate a sibcall
11399 without using delay slots... */
11400 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11401
11402 if (flag_pic)
11403 {
11404 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11405 start_sequence ();
11406 load_got_register (); /* clobbers %o7 */
11407 scratch = sparc_legitimize_pic_address (funexp, scratch);
11408 seq = get_insns ();
11409 end_sequence ();
11410 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11411 }
11412 else if (TARGET_ARCH32)
11413 {
11414 emit_insn (gen_rtx_SET (VOIDmode,
11415 scratch,
11416 gen_rtx_HIGH (SImode, funexp)));
11417 emit_insn (gen_rtx_SET (VOIDmode,
11418 scratch,
11419 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11420 }
11421 else /* TARGET_ARCH64 */
11422 {
11423 switch (sparc_cmodel)
11424 {
11425 case CM_MEDLOW:
11426 case CM_MEDMID:
11427 /* The destination can serve as a temporary. */
11428 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11429 break;
11430
11431 case CM_MEDANY:
11432 case CM_EMBMEDANY:
11433 /* The destination cannot serve as a temporary. */
11434 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11435 start_sequence ();
11436 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11437 seq = get_insns ();
11438 end_sequence ();
11439 emit_and_preserve (seq, spill_reg, 0);
11440 break;
11441
11442 default:
11443 gcc_unreachable ();
11444 }
11445 }
11446
11447 emit_jump_insn (gen_indirect_jump (scratch));
11448 }
11449
11450 emit_barrier ();
11451
11452 /* Run just enough of rest_of_compilation to get the insns emitted.
11453 There's not really enough bulk here to make other passes such as
11454 instruction scheduling worth while. Note that use_thunk calls
11455 assemble_start_function and assemble_end_function. */
11456 insn = get_insns ();
11457 shorten_branches (insn);
11458 final_start_function (insn, file, 1);
11459 final (insn, file, 1);
11460 final_end_function ();
11461
11462 reload_completed = 0;
11463 epilogue_completed = 0;
11464 }
11465
11466 /* Return true if sparc_output_mi_thunk would be able to output the
11467 assembler code for the thunk function specified by the arguments
11468 it is passed, and false otherwise. */
11469 static bool
11470 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11471 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11472 HOST_WIDE_INT vcall_offset,
11473 const_tree function ATTRIBUTE_UNUSED)
11474 {
11475 /* Bound the loop used in the default method above. */
11476 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11477 }
11478
11479 /* How to allocate a 'struct machine_function'. */
11480
11481 static struct machine_function *
11482 sparc_init_machine_status (void)
11483 {
11484 return ggc_cleared_alloc<machine_function> ();
11485 }
11486
11487 /* Locate some local-dynamic symbol still in use by this function
11488 so that we can print its name in local-dynamic base patterns. */
11489
11490 static const char *
11491 get_some_local_dynamic_name (void)
11492 {
11493 rtx_insn *insn;
11494
11495 if (cfun->machine->some_ld_name)
11496 return cfun->machine->some_ld_name;
11497
11498 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11499 if (INSN_P (insn)
11500 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11501 return cfun->machine->some_ld_name;
11502
11503 gcc_unreachable ();
11504 }
11505
11506 static int
11507 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11508 {
11509 rtx x = *px;
11510
11511 if (x
11512 && GET_CODE (x) == SYMBOL_REF
11513 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11514 {
11515 cfun->machine->some_ld_name = XSTR (x, 0);
11516 return 1;
11517 }
11518
11519 return 0;
11520 }
11521
11522 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11523 We need to emit DTP-relative relocations. */
11524
11525 static void
11526 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11527 {
11528 switch (size)
11529 {
11530 case 4:
11531 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11532 break;
11533 case 8:
11534 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11535 break;
11536 default:
11537 gcc_unreachable ();
11538 }
11539 output_addr_const (file, x);
11540 fputs (")", file);
11541 }
11542
11543 /* Do whatever processing is required at the end of a file. */
11544
11545 static void
11546 sparc_file_end (void)
11547 {
11548 /* If we need to emit the special GOT helper function, do so now. */
11549 if (got_helper_rtx)
11550 {
11551 const char *name = XSTR (got_helper_rtx, 0);
11552 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11553 #ifdef DWARF2_UNWIND_INFO
11554 bool do_cfi;
11555 #endif
11556
11557 if (USE_HIDDEN_LINKONCE)
11558 {
11559 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11560 get_identifier (name),
11561 build_function_type_list (void_type_node,
11562 NULL_TREE));
11563 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11564 NULL_TREE, void_type_node);
11565 TREE_PUBLIC (decl) = 1;
11566 TREE_STATIC (decl) = 1;
11567 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11568 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11569 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11570 resolve_unique_section (decl, 0, flag_function_sections);
11571 allocate_struct_function (decl, true);
11572 cfun->is_thunk = 1;
11573 current_function_decl = decl;
11574 init_varasm_status ();
11575 assemble_start_function (decl, name);
11576 }
11577 else
11578 {
11579 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11580 switch_to_section (text_section);
11581 if (align > 0)
11582 ASM_OUTPUT_ALIGN (asm_out_file, align);
11583 ASM_OUTPUT_LABEL (asm_out_file, name);
11584 }
11585
11586 #ifdef DWARF2_UNWIND_INFO
11587 do_cfi = dwarf2out_do_cfi_asm ();
11588 if (do_cfi)
11589 fprintf (asm_out_file, "\t.cfi_startproc\n");
11590 #endif
11591 if (flag_delayed_branch)
11592 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11593 reg_name, reg_name);
11594 else
11595 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11596 reg_name, reg_name);
11597 #ifdef DWARF2_UNWIND_INFO
11598 if (do_cfi)
11599 fprintf (asm_out_file, "\t.cfi_endproc\n");
11600 #endif
11601 }
11602
11603 if (NEED_INDICATE_EXEC_STACK)
11604 file_end_indicate_exec_stack ();
11605
11606 #ifdef TARGET_SOLARIS
11607 solaris_file_end ();
11608 #endif
11609 }
11610
11611 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11612 /* Implement TARGET_MANGLE_TYPE. */
11613
11614 static const char *
11615 sparc_mangle_type (const_tree type)
11616 {
11617 if (!TARGET_64BIT
11618 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11619 && TARGET_LONG_DOUBLE_128)
11620 return "g";
11621
11622 /* For all other types, use normal C++ mangling. */
11623 return NULL;
11624 }
11625 #endif
11626
11627 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11628 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11629 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11630
11631 void
11632 sparc_emit_membar_for_model (enum memmodel model,
11633 int load_store, int before_after)
11634 {
11635 /* Bits for the MEMBAR mmask field. */
11636 const int LoadLoad = 1;
11637 const int StoreLoad = 2;
11638 const int LoadStore = 4;
11639 const int StoreStore = 8;
11640
11641 int mm = 0, implied = 0;
11642
11643 switch (sparc_memory_model)
11644 {
11645 case SMM_SC:
11646 /* Sequential Consistency. All memory transactions are immediately
11647 visible in sequential execution order. No barriers needed. */
11648 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11649 break;
11650
11651 case SMM_TSO:
11652 /* Total Store Ordering: all memory transactions with store semantics
11653 are followed by an implied StoreStore. */
11654 implied |= StoreStore;
11655
11656 /* If we're not looking for a raw barrer (before+after), then atomic
11657 operations get the benefit of being both load and store. */
11658 if (load_store == 3 && before_after == 1)
11659 implied |= StoreLoad;
11660 /* FALLTHRU */
11661
11662 case SMM_PSO:
11663 /* Partial Store Ordering: all memory transactions with load semantics
11664 are followed by an implied LoadLoad | LoadStore. */
11665 implied |= LoadLoad | LoadStore;
11666
11667 /* If we're not looking for a raw barrer (before+after), then atomic
11668 operations get the benefit of being both load and store. */
11669 if (load_store == 3 && before_after == 2)
11670 implied |= StoreLoad | StoreStore;
11671 /* FALLTHRU */
11672
11673 case SMM_RMO:
11674 /* Relaxed Memory Ordering: no implicit bits. */
11675 break;
11676
11677 default:
11678 gcc_unreachable ();
11679 }
11680
11681 if (before_after & 1)
11682 {
11683 if (model == MEMMODEL_RELEASE
11684 || model == MEMMODEL_ACQ_REL
11685 || model == MEMMODEL_SEQ_CST)
11686 {
11687 if (load_store & 1)
11688 mm |= LoadLoad | StoreLoad;
11689 if (load_store & 2)
11690 mm |= LoadStore | StoreStore;
11691 }
11692 }
11693 if (before_after & 2)
11694 {
11695 if (model == MEMMODEL_ACQUIRE
11696 || model == MEMMODEL_ACQ_REL
11697 || model == MEMMODEL_SEQ_CST)
11698 {
11699 if (load_store & 1)
11700 mm |= LoadLoad | LoadStore;
11701 if (load_store & 2)
11702 mm |= StoreLoad | StoreStore;
11703 }
11704 }
11705
11706 /* Remove the bits implied by the system memory model. */
11707 mm &= ~implied;
11708
11709 /* For raw barriers (before+after), always emit a barrier.
11710 This will become a compile-time barrier if needed. */
11711 if (mm || before_after == 3)
11712 emit_insn (gen_membar (GEN_INT (mm)));
11713 }
11714
11715 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11716 compare and swap on the word containing the byte or half-word. */
11717
11718 static void
11719 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11720 rtx oldval, rtx newval)
11721 {
11722 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11723 rtx addr = gen_reg_rtx (Pmode);
11724 rtx off = gen_reg_rtx (SImode);
11725 rtx oldv = gen_reg_rtx (SImode);
11726 rtx newv = gen_reg_rtx (SImode);
11727 rtx oldvalue = gen_reg_rtx (SImode);
11728 rtx newvalue = gen_reg_rtx (SImode);
11729 rtx res = gen_reg_rtx (SImode);
11730 rtx resv = gen_reg_rtx (SImode);
11731 rtx memsi, val, mask, end_label, loop_label, cc;
11732
11733 emit_insn (gen_rtx_SET (VOIDmode, addr,
11734 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11735
11736 if (Pmode != SImode)
11737 addr1 = gen_lowpart (SImode, addr1);
11738 emit_insn (gen_rtx_SET (VOIDmode, off,
11739 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11740
11741 memsi = gen_rtx_MEM (SImode, addr);
11742 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11743 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11744
11745 val = copy_to_reg (memsi);
11746
11747 emit_insn (gen_rtx_SET (VOIDmode, off,
11748 gen_rtx_XOR (SImode, off,
11749 GEN_INT (GET_MODE (mem) == QImode
11750 ? 3 : 2))));
11751
11752 emit_insn (gen_rtx_SET (VOIDmode, off,
11753 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11754
11755 if (GET_MODE (mem) == QImode)
11756 mask = force_reg (SImode, GEN_INT (0xff));
11757 else
11758 mask = force_reg (SImode, GEN_INT (0xffff));
11759
11760 emit_insn (gen_rtx_SET (VOIDmode, mask,
11761 gen_rtx_ASHIFT (SImode, mask, off)));
11762
11763 emit_insn (gen_rtx_SET (VOIDmode, val,
11764 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11765 val)));
11766
11767 oldval = gen_lowpart (SImode, oldval);
11768 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11769 gen_rtx_ASHIFT (SImode, oldval, off)));
11770
11771 newval = gen_lowpart_common (SImode, newval);
11772 emit_insn (gen_rtx_SET (VOIDmode, newv,
11773 gen_rtx_ASHIFT (SImode, newval, off)));
11774
11775 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11776 gen_rtx_AND (SImode, oldv, mask)));
11777
11778 emit_insn (gen_rtx_SET (VOIDmode, newv,
11779 gen_rtx_AND (SImode, newv, mask)));
11780
11781 end_label = gen_label_rtx ();
11782 loop_label = gen_label_rtx ();
11783 emit_label (loop_label);
11784
11785 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11786 gen_rtx_IOR (SImode, oldv, val)));
11787
11788 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11789 gen_rtx_IOR (SImode, newv, val)));
11790
11791 emit_move_insn (bool_result, const1_rtx);
11792
11793 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11794
11795 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11796
11797 emit_insn (gen_rtx_SET (VOIDmode, resv,
11798 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11799 res)));
11800
11801 emit_move_insn (bool_result, const0_rtx);
11802
11803 cc = gen_compare_reg_1 (NE, resv, val);
11804 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11805
11806 /* Use cbranchcc4 to separate the compare and branch! */
11807 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11808 cc, const0_rtx, loop_label));
11809
11810 emit_label (end_label);
11811
11812 emit_insn (gen_rtx_SET (VOIDmode, res,
11813 gen_rtx_AND (SImode, res, mask)));
11814
11815 emit_insn (gen_rtx_SET (VOIDmode, res,
11816 gen_rtx_LSHIFTRT (SImode, res, off)));
11817
11818 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11819 }
11820
11821 /* Expand code to perform a compare-and-swap. */
11822
11823 void
11824 sparc_expand_compare_and_swap (rtx operands[])
11825 {
11826 rtx bval, retval, mem, oldval, newval;
11827 enum machine_mode mode;
11828 enum memmodel model;
11829
11830 bval = operands[0];
11831 retval = operands[1];
11832 mem = operands[2];
11833 oldval = operands[3];
11834 newval = operands[4];
11835 model = (enum memmodel) INTVAL (operands[6]);
11836 mode = GET_MODE (mem);
11837
11838 sparc_emit_membar_for_model (model, 3, 1);
11839
11840 if (reg_overlap_mentioned_p (retval, oldval))
11841 oldval = copy_to_reg (oldval);
11842
11843 if (mode == QImode || mode == HImode)
11844 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11845 else
11846 {
11847 rtx (*gen) (rtx, rtx, rtx, rtx);
11848 rtx x;
11849
11850 if (mode == SImode)
11851 gen = gen_atomic_compare_and_swapsi_1;
11852 else
11853 gen = gen_atomic_compare_and_swapdi_1;
11854 emit_insn (gen (retval, mem, oldval, newval));
11855
11856 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11857 if (x != bval)
11858 convert_move (bval, x, 1);
11859 }
11860
11861 sparc_emit_membar_for_model (model, 3, 2);
11862 }
11863
11864 void
11865 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11866 {
11867 rtx t_1, t_2, t_3;
11868
11869 sel = gen_lowpart (DImode, sel);
11870 switch (vmode)
11871 {
11872 case V2SImode:
11873 /* inp = xxxxxxxAxxxxxxxB */
11874 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11875 NULL_RTX, 1, OPTAB_DIRECT);
11876 /* t_1 = ....xxxxxxxAxxx. */
11877 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11878 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11879 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11880 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11881 /* sel = .......B */
11882 /* t_1 = ...A.... */
11883 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11884 /* sel = ...A...B */
11885 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11886 /* sel = AAAABBBB * 4 */
11887 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11888 /* sel = { A*4, A*4+1, A*4+2, ... } */
11889 break;
11890
11891 case V4HImode:
11892 /* inp = xxxAxxxBxxxCxxxD */
11893 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11894 NULL_RTX, 1, OPTAB_DIRECT);
11895 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11896 NULL_RTX, 1, OPTAB_DIRECT);
11897 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11898 NULL_RTX, 1, OPTAB_DIRECT);
11899 /* t_1 = ..xxxAxxxBxxxCxx */
11900 /* t_2 = ....xxxAxxxBxxxC */
11901 /* t_3 = ......xxxAxxxBxx */
11902 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11903 GEN_INT (0x07),
11904 NULL_RTX, 1, OPTAB_DIRECT);
11905 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11906 GEN_INT (0x0700),
11907 NULL_RTX, 1, OPTAB_DIRECT);
11908 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11909 GEN_INT (0x070000),
11910 NULL_RTX, 1, OPTAB_DIRECT);
11911 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11912 GEN_INT (0x07000000),
11913 NULL_RTX, 1, OPTAB_DIRECT);
11914 /* sel = .......D */
11915 /* t_1 = .....C.. */
11916 /* t_2 = ...B.... */
11917 /* t_3 = .A...... */
11918 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11919 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11920 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11921 /* sel = .A.B.C.D */
11922 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11923 /* sel = AABBCCDD * 2 */
11924 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11925 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11926 break;
11927
11928 case V8QImode:
11929 /* input = xAxBxCxDxExFxGxH */
11930 sel = expand_simple_binop (DImode, AND, sel,
11931 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11932 | 0x0f0f0f0f),
11933 NULL_RTX, 1, OPTAB_DIRECT);
11934 /* sel = .A.B.C.D.E.F.G.H */
11935 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11936 NULL_RTX, 1, OPTAB_DIRECT);
11937 /* t_1 = ..A.B.C.D.E.F.G. */
11938 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11939 NULL_RTX, 1, OPTAB_DIRECT);
11940 /* sel = .AABBCCDDEEFFGGH */
11941 sel = expand_simple_binop (DImode, AND, sel,
11942 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11943 | 0xff00ff),
11944 NULL_RTX, 1, OPTAB_DIRECT);
11945 /* sel = ..AB..CD..EF..GH */
11946 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11947 NULL_RTX, 1, OPTAB_DIRECT);
11948 /* t_1 = ....AB..CD..EF.. */
11949 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11950 NULL_RTX, 1, OPTAB_DIRECT);
11951 /* sel = ..ABABCDCDEFEFGH */
11952 sel = expand_simple_binop (DImode, AND, sel,
11953 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11954 NULL_RTX, 1, OPTAB_DIRECT);
11955 /* sel = ....ABCD....EFGH */
11956 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11957 NULL_RTX, 1, OPTAB_DIRECT);
11958 /* t_1 = ........ABCD.... */
11959 sel = gen_lowpart (SImode, sel);
11960 t_1 = gen_lowpart (SImode, t_1);
11961 break;
11962
11963 default:
11964 gcc_unreachable ();
11965 }
11966
11967 /* Always perform the final addition/merge within the bmask insn. */
11968 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11969 }
11970
11971 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11972
11973 static bool
11974 sparc_frame_pointer_required (void)
11975 {
11976 /* If the stack pointer is dynamically modified in the function, it cannot
11977 serve as the frame pointer. */
11978 if (cfun->calls_alloca)
11979 return true;
11980
11981 /* If the function receives nonlocal gotos, it needs to save the frame
11982 pointer in the nonlocal_goto_save_area object. */
11983 if (cfun->has_nonlocal_label)
11984 return true;
11985
11986 /* In flat mode, that's it. */
11987 if (TARGET_FLAT)
11988 return false;
11989
11990 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11991 return !(crtl->is_leaf && only_leaf_regs_used ());
11992 }
11993
11994 /* The way this is structured, we can't eliminate SFP in favor of SP
11995 if the frame pointer is required: we want to use the SFP->HFP elimination
11996 in that case. But the test in update_eliminables doesn't know we are
11997 assuming below that we only do the former elimination. */
11998
11999 static bool
12000 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12001 {
12002 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12003 }
12004
12005 /* Return the hard frame pointer directly to bypass the stack bias. */
12006
12007 static rtx
12008 sparc_builtin_setjmp_frame_value (void)
12009 {
12010 return hard_frame_pointer_rtx;
12011 }
12012
12013 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12014 they won't be allocated. */
12015
12016 static void
12017 sparc_conditional_register_usage (void)
12018 {
12019 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12020 {
12021 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12022 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12023 }
12024 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12025 /* then honor it. */
12026 if (TARGET_ARCH32 && fixed_regs[5])
12027 fixed_regs[5] = 1;
12028 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12029 fixed_regs[5] = 0;
12030 if (! TARGET_V9)
12031 {
12032 int regno;
12033 for (regno = SPARC_FIRST_V9_FP_REG;
12034 regno <= SPARC_LAST_V9_FP_REG;
12035 regno++)
12036 fixed_regs[regno] = 1;
12037 /* %fcc0 is used by v8 and v9. */
12038 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12039 regno <= SPARC_LAST_V9_FCC_REG;
12040 regno++)
12041 fixed_regs[regno] = 1;
12042 }
12043 if (! TARGET_FPU)
12044 {
12045 int regno;
12046 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12047 fixed_regs[regno] = 1;
12048 }
12049 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12050 /* then honor it. Likewise with g3 and g4. */
12051 if (fixed_regs[2] == 2)
12052 fixed_regs[2] = ! TARGET_APP_REGS;
12053 if (fixed_regs[3] == 2)
12054 fixed_regs[3] = ! TARGET_APP_REGS;
12055 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12056 fixed_regs[4] = ! TARGET_APP_REGS;
12057 else if (TARGET_CM_EMBMEDANY)
12058 fixed_regs[4] = 1;
12059 else if (fixed_regs[4] == 2)
12060 fixed_regs[4] = 0;
12061 if (TARGET_FLAT)
12062 {
12063 int regno;
12064 /* Disable leaf functions. */
12065 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12066 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12067 leaf_reg_remap [regno] = regno;
12068 }
12069 if (TARGET_VIS)
12070 global_regs[SPARC_GSR_REG] = 1;
12071 }
12072
12073 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12074
12075 - We can't load constants into FP registers.
12076 - We can't load FP constants into integer registers when soft-float,
12077 because there is no soft-float pattern with a r/F constraint.
12078 - We can't load FP constants into integer registers for TFmode unless
12079 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12080 - Try and reload integer constants (symbolic or otherwise) back into
12081 registers directly, rather than having them dumped to memory. */
12082
12083 static reg_class_t
12084 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12085 {
12086 enum machine_mode mode = GET_MODE (x);
12087 if (CONSTANT_P (x))
12088 {
12089 if (FP_REG_CLASS_P (rclass)
12090 || rclass == GENERAL_OR_FP_REGS
12091 || rclass == GENERAL_OR_EXTRA_FP_REGS
12092 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12093 || (mode == TFmode && ! const_zero_operand (x, mode)))
12094 return NO_REGS;
12095
12096 if (GET_MODE_CLASS (mode) == MODE_INT)
12097 return GENERAL_REGS;
12098
12099 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12100 {
12101 if (! FP_REG_CLASS_P (rclass)
12102 || !(const_zero_operand (x, mode)
12103 || const_all_ones_operand (x, mode)))
12104 return NO_REGS;
12105 }
12106 }
12107
12108 if (TARGET_VIS3
12109 && ! TARGET_ARCH64
12110 && (rclass == EXTRA_FP_REGS
12111 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12112 {
12113 int regno = true_regnum (x);
12114
12115 if (SPARC_INT_REG_P (regno))
12116 return (rclass == EXTRA_FP_REGS
12117 ? FP_REGS : GENERAL_OR_FP_REGS);
12118 }
12119
12120 return rclass;
12121 }
12122
12123 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12124 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12125
12126 const char *
12127 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12128 {
12129 char mulstr[32];
12130
12131 gcc_assert (! TARGET_ARCH64);
12132
12133 if (sparc_check_64 (operands[1], insn) <= 0)
12134 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12135 if (which_alternative == 1)
12136 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12137 if (GET_CODE (operands[2]) == CONST_INT)
12138 {
12139 if (which_alternative == 1)
12140 {
12141 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12142 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12143 output_asm_insn (mulstr, operands);
12144 return "srlx\t%L0, 32, %H0";
12145 }
12146 else
12147 {
12148 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12149 output_asm_insn ("or\t%L1, %3, %3", operands);
12150 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12151 output_asm_insn (mulstr, operands);
12152 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12153 return "mov\t%3, %L0";
12154 }
12155 }
12156 else if (rtx_equal_p (operands[1], operands[2]))
12157 {
12158 if (which_alternative == 1)
12159 {
12160 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12161 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12162 output_asm_insn (mulstr, operands);
12163 return "srlx\t%L0, 32, %H0";
12164 }
12165 else
12166 {
12167 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12168 output_asm_insn ("or\t%L1, %3, %3", operands);
12169 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12170 output_asm_insn (mulstr, operands);
12171 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12172 return "mov\t%3, %L0";
12173 }
12174 }
12175 if (sparc_check_64 (operands[2], insn) <= 0)
12176 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12177 if (which_alternative == 1)
12178 {
12179 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12180 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12181 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12182 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12183 output_asm_insn (mulstr, operands);
12184 return "srlx\t%L0, 32, %H0";
12185 }
12186 else
12187 {
12188 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12189 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12190 output_asm_insn ("or\t%L1, %3, %3", operands);
12191 output_asm_insn ("or\t%L2, %4, %4", operands);
12192 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12193 output_asm_insn (mulstr, operands);
12194 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12195 return "mov\t%3, %L0";
12196 }
12197 }
12198
12199 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12200 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12201 and INNER_MODE are the modes describing TARGET. */
12202
12203 static void
12204 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
12205 enum machine_mode inner_mode)
12206 {
12207 rtx t1, final_insn, sel;
12208 int bmask;
12209
12210 t1 = gen_reg_rtx (mode);
12211
12212 elt = convert_modes (SImode, inner_mode, elt, true);
12213 emit_move_insn (gen_lowpart(SImode, t1), elt);
12214
12215 switch (mode)
12216 {
12217 case V2SImode:
12218 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12219 bmask = 0x45674567;
12220 break;
12221 case V4HImode:
12222 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12223 bmask = 0x67676767;
12224 break;
12225 case V8QImode:
12226 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12227 bmask = 0x77777777;
12228 break;
12229 default:
12230 gcc_unreachable ();
12231 }
12232
12233 sel = force_reg (SImode, GEN_INT (bmask));
12234 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
12235 emit_insn (final_insn);
12236 }
12237
12238 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12239 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12240
12241 static void
12242 vector_init_fpmerge (rtx target, rtx elt)
12243 {
12244 rtx t1, t2, t2_low, t3, t3_low;
12245
12246 t1 = gen_reg_rtx (V4QImode);
12247 elt = convert_modes (SImode, QImode, elt, true);
12248 emit_move_insn (gen_lowpart (SImode, t1), elt);
12249
12250 t2 = gen_reg_rtx (V8QImode);
12251 t2_low = gen_lowpart (V4QImode, t2);
12252 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12253
12254 t3 = gen_reg_rtx (V8QImode);
12255 t3_low = gen_lowpart (V4QImode, t3);
12256 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12257
12258 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12259 }
12260
12261 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12262 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12263
12264 static void
12265 vector_init_faligndata (rtx target, rtx elt)
12266 {
12267 rtx t1 = gen_reg_rtx (V4HImode);
12268 int i;
12269
12270 elt = convert_modes (SImode, HImode, elt, true);
12271 emit_move_insn (gen_lowpart (SImode, t1), elt);
12272
12273 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12274 force_reg (SImode, GEN_INT (6)),
12275 const0_rtx));
12276
12277 for (i = 0; i < 4; i++)
12278 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12279 }
12280
12281 /* Emit code to initialize TARGET to values for individual fields VALS. */
12282
12283 void
12284 sparc_expand_vector_init (rtx target, rtx vals)
12285 {
12286 const enum machine_mode mode = GET_MODE (target);
12287 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
12288 const int n_elts = GET_MODE_NUNITS (mode);
12289 int i, n_var = 0;
12290 bool all_same;
12291 rtx mem;
12292
12293 all_same = true;
12294 for (i = 0; i < n_elts; i++)
12295 {
12296 rtx x = XVECEXP (vals, 0, i);
12297 if (!CONSTANT_P (x))
12298 n_var++;
12299
12300 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12301 all_same = false;
12302 }
12303
12304 if (n_var == 0)
12305 {
12306 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12307 return;
12308 }
12309
12310 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12311 {
12312 if (GET_MODE_SIZE (inner_mode) == 4)
12313 {
12314 emit_move_insn (gen_lowpart (SImode, target),
12315 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12316 return;
12317 }
12318 else if (GET_MODE_SIZE (inner_mode) == 8)
12319 {
12320 emit_move_insn (gen_lowpart (DImode, target),
12321 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12322 return;
12323 }
12324 }
12325 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12326 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12327 {
12328 emit_move_insn (gen_highpart (word_mode, target),
12329 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12330 emit_move_insn (gen_lowpart (word_mode, target),
12331 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12332 return;
12333 }
12334
12335 if (all_same && GET_MODE_SIZE (mode) == 8)
12336 {
12337 if (TARGET_VIS2)
12338 {
12339 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12340 return;
12341 }
12342 if (mode == V8QImode)
12343 {
12344 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12345 return;
12346 }
12347 if (mode == V4HImode)
12348 {
12349 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12350 return;
12351 }
12352 }
12353
12354 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12355 for (i = 0; i < n_elts; i++)
12356 emit_move_insn (adjust_address_nv (mem, inner_mode,
12357 i * GET_MODE_SIZE (inner_mode)),
12358 XVECEXP (vals, 0, i));
12359 emit_move_insn (target, mem);
12360 }
12361
12362 /* Implement TARGET_SECONDARY_RELOAD. */
12363
12364 static reg_class_t
12365 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12366 enum machine_mode mode, secondary_reload_info *sri)
12367 {
12368 enum reg_class rclass = (enum reg_class) rclass_i;
12369
12370 sri->icode = CODE_FOR_nothing;
12371 sri->extra_cost = 0;
12372
12373 /* We need a temporary when loading/storing a HImode/QImode value
12374 between memory and the FPU registers. This can happen when combine puts
12375 a paradoxical subreg in a float/fix conversion insn. */
12376 if (FP_REG_CLASS_P (rclass)
12377 && (mode == HImode || mode == QImode)
12378 && (GET_CODE (x) == MEM
12379 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12380 && true_regnum (x) == -1)))
12381 return GENERAL_REGS;
12382
12383 /* On 32-bit we need a temporary when loading/storing a DFmode value
12384 between unaligned memory and the upper FPU registers. */
12385 if (TARGET_ARCH32
12386 && rclass == EXTRA_FP_REGS
12387 && mode == DFmode
12388 && GET_CODE (x) == MEM
12389 && ! mem_min_alignment (x, 8))
12390 return FP_REGS;
12391
12392 if (((TARGET_CM_MEDANY
12393 && symbolic_operand (x, mode))
12394 || (TARGET_CM_EMBMEDANY
12395 && text_segment_operand (x, mode)))
12396 && ! flag_pic)
12397 {
12398 if (in_p)
12399 sri->icode = direct_optab_handler (reload_in_optab, mode);
12400 else
12401 sri->icode = direct_optab_handler (reload_out_optab, mode);
12402 return NO_REGS;
12403 }
12404
12405 if (TARGET_VIS3 && TARGET_ARCH32)
12406 {
12407 int regno = true_regnum (x);
12408
12409 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12410 to move 8-byte values in 4-byte pieces. This only works via
12411 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12412 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12413 an FP_REGS intermediate move. */
12414 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12415 || ((general_or_i64_p (rclass)
12416 || rclass == GENERAL_OR_FP_REGS)
12417 && SPARC_FP_REG_P (regno)))
12418 {
12419 sri->extra_cost = 2;
12420 return FP_REGS;
12421 }
12422 }
12423
12424 return NO_REGS;
12425 }
12426
12427 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12428 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12429
12430 bool
12431 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
12432 {
12433 enum rtx_code rc = GET_CODE (operands[1]);
12434 enum machine_mode cmp_mode;
12435 rtx cc_reg, dst, cmp;
12436
12437 cmp = operands[1];
12438 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12439 return false;
12440
12441 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12442 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12443
12444 cmp_mode = GET_MODE (XEXP (cmp, 0));
12445 rc = GET_CODE (cmp);
12446
12447 dst = operands[0];
12448 if (! rtx_equal_p (operands[2], dst)
12449 && ! rtx_equal_p (operands[3], dst))
12450 {
12451 if (reg_overlap_mentioned_p (dst, cmp))
12452 dst = gen_reg_rtx (mode);
12453
12454 emit_move_insn (dst, operands[3]);
12455 }
12456 else if (operands[2] == dst)
12457 {
12458 operands[2] = operands[3];
12459
12460 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12461 rc = reverse_condition_maybe_unordered (rc);
12462 else
12463 rc = reverse_condition (rc);
12464 }
12465
12466 if (XEXP (cmp, 1) == const0_rtx
12467 && GET_CODE (XEXP (cmp, 0)) == REG
12468 && cmp_mode == DImode
12469 && v9_regcmp_p (rc))
12470 cc_reg = XEXP (cmp, 0);
12471 else
12472 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12473
12474 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12475
12476 emit_insn (gen_rtx_SET (VOIDmode, dst,
12477 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12478
12479 if (dst != operands[0])
12480 emit_move_insn (operands[0], dst);
12481
12482 return true;
12483 }
12484
12485 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12486 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12487 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12488 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12489 code to be used for the condition mask. */
12490
12491 void
12492 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12493 {
12494 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12495 enum rtx_code code = GET_CODE (operands[3]);
12496
12497 mask = gen_reg_rtx (Pmode);
12498 cop0 = operands[4];
12499 cop1 = operands[5];
12500 if (code == LT || code == GE)
12501 {
12502 rtx t;
12503
12504 code = swap_condition (code);
12505 t = cop0; cop0 = cop1; cop1 = t;
12506 }
12507
12508 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12509
12510 fcmp = gen_rtx_UNSPEC (Pmode,
12511 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12512 fcode);
12513
12514 cmask = gen_rtx_UNSPEC (DImode,
12515 gen_rtvec (2, mask, gsr),
12516 ccode);
12517
12518 bshuf = gen_rtx_UNSPEC (mode,
12519 gen_rtvec (3, operands[1], operands[2], gsr),
12520 UNSPEC_BSHUFFLE);
12521
12522 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12523 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12524
12525 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12526 }
12527
12528 /* On sparc, any mode which naturally allocates into the float
12529 registers should return 4 here. */
12530
12531 unsigned int
12532 sparc_regmode_natural_size (enum machine_mode mode)
12533 {
12534 int size = UNITS_PER_WORD;
12535
12536 if (TARGET_ARCH64)
12537 {
12538 enum mode_class mclass = GET_MODE_CLASS (mode);
12539
12540 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12541 size = 4;
12542 }
12543
12544 return size;
12545 }
12546
12547 /* Return TRUE if it is a good idea to tie two pseudo registers
12548 when one has mode MODE1 and one has mode MODE2.
12549 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12550 for any hard reg, then this must be FALSE for correct output.
12551
12552 For V9 we have to deal with the fact that only the lower 32 floating
12553 point registers are 32-bit addressable. */
12554
12555 bool
12556 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12557 {
12558 enum mode_class mclass1, mclass2;
12559 unsigned short size1, size2;
12560
12561 if (mode1 == mode2)
12562 return true;
12563
12564 mclass1 = GET_MODE_CLASS (mode1);
12565 mclass2 = GET_MODE_CLASS (mode2);
12566 if (mclass1 != mclass2)
12567 return false;
12568
12569 if (! TARGET_V9)
12570 return true;
12571
12572 /* Classes are the same and we are V9 so we have to deal with upper
12573 vs. lower floating point registers. If one of the modes is a
12574 4-byte mode, and the other is not, we have to mark them as not
12575 tieable because only the lower 32 floating point register are
12576 addressable 32-bits at a time.
12577
12578 We can't just test explicitly for SFmode, otherwise we won't
12579 cover the vector mode cases properly. */
12580
12581 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12582 return true;
12583
12584 size1 = GET_MODE_SIZE (mode1);
12585 size2 = GET_MODE_SIZE (mode2);
12586 if ((size1 > 4 && size2 == 4)
12587 || (size2 > 4 && size1 == 4))
12588 return false;
12589
12590 return true;
12591 }
12592
12593 /* Implement TARGET_CSTORE_MODE. */
12594
12595 static enum machine_mode
12596 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12597 {
12598 return (TARGET_ARCH64 ? DImode : SImode);
12599 }
12600
12601 /* Return the compound expression made of T1 and T2. */
12602
12603 static inline tree
12604 compound_expr (tree t1, tree t2)
12605 {
12606 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12607 }
12608
12609 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12610
12611 static void
12612 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12613 {
12614 if (!TARGET_FPU)
12615 return;
12616
12617 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12618 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12619
12620 /* We generate the equivalent of feholdexcept (&fenv_var):
12621
12622 unsigned int fenv_var;
12623 __builtin_store_fsr (&fenv_var);
12624
12625 unsigned int tmp1_var;
12626 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12627
12628 __builtin_load_fsr (&tmp1_var); */
12629
12630 tree fenv_var = create_tmp_var (unsigned_type_node, NULL);
12631 mark_addressable (fenv_var);
12632 tree fenv_addr = build_fold_addr_expr (fenv_var);
12633 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12634 tree hold_stfsr = build_call_expr (stfsr, 1, fenv_addr);
12635
12636 tree tmp1_var = create_tmp_var (unsigned_type_node, NULL);
12637 mark_addressable (tmp1_var);
12638 tree masked_fenv_var
12639 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12640 build_int_cst (unsigned_type_node,
12641 ~(accrued_exception_mask | trap_enable_mask)));
12642 tree hold_mask
12643 = build2 (MODIFY_EXPR, void_type_node, tmp1_var, masked_fenv_var);
12644
12645 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12646 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12647 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12648
12649 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12650
12651 /* We reload the value of tmp1_var to clear the exceptions:
12652
12653 __builtin_load_fsr (&tmp1_var); */
12654
12655 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12656
12657 /* We generate the equivalent of feupdateenv (&fenv_var):
12658
12659 unsigned int tmp2_var;
12660 __builtin_store_fsr (&tmp2_var);
12661
12662 __builtin_load_fsr (&fenv_var);
12663
12664 if (SPARC_LOW_FE_EXCEPT_VALUES)
12665 tmp2_var >>= 5;
12666 __atomic_feraiseexcept ((int) tmp2_var); */
12667
12668 tree tmp2_var = create_tmp_var (unsigned_type_node, NULL);
12669 mark_addressable (tmp2_var);
12670 tree tmp3_addr = build_fold_addr_expr (tmp2_var);
12671 tree update_stfsr = build_call_expr (stfsr, 1, tmp3_addr);
12672
12673 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12674
12675 tree atomic_feraiseexcept
12676 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12677 tree update_call
12678 = build_call_expr (atomic_feraiseexcept, 1,
12679 fold_convert (integer_type_node, tmp2_var));
12680
12681 if (SPARC_LOW_FE_EXCEPT_VALUES)
12682 {
12683 tree shifted_tmp2_var
12684 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12685 build_int_cst (unsigned_type_node, 5));
12686 tree update_shift
12687 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12688 update_call = compound_expr (update_shift, update_call);
12689 }
12690
12691 *update
12692 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12693 }
12694
12695 #include "gt-sparc.h"