]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sparc/sparc.c
7cf2619eed8be1f3b97bcf8c7ae722ad5b17ff12
[thirdparty/gcc.git] / gcc / config / sparc / sparc.c
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2014 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stringpool.h"
29 #include "stor-layout.h"
30 #include "calls.h"
31 #include "varasm.h"
32 #include "rtl.h"
33 #include "regs.h"
34 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "insn-codes.h"
37 #include "conditions.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "flags.h"
41 #include "function.h"
42 #include "except.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "recog.h"
46 #include "diagnostic-core.h"
47 #include "ggc.h"
48 #include "tm_p.h"
49 #include "debug.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "common/common-target.h"
53 #include "hash-table.h"
54 #include "vec.h"
55 #include "basic-block.h"
56 #include "tree-ssa-alias.h"
57 #include "internal-fn.h"
58 #include "gimple-fold.h"
59 #include "tree-eh.h"
60 #include "gimple-expr.h"
61 #include "is-a.h"
62 #include "gimple.h"
63 #include "gimplify.h"
64 #include "langhooks.h"
65 #include "reload.h"
66 #include "params.h"
67 #include "df.h"
68 #include "opts.h"
69 #include "tree-pass.h"
70 #include "context.h"
71 #include "wide-int.h"
72 #include "builtins.h"
73
74 /* Processor costs */
75
76 struct processor_costs {
77 /* Integer load */
78 const int int_load;
79
80 /* Integer signed load */
81 const int int_sload;
82
83 /* Integer zeroed load */
84 const int int_zload;
85
86 /* Float load */
87 const int float_load;
88
89 /* fmov, fneg, fabs */
90 const int float_move;
91
92 /* fadd, fsub */
93 const int float_plusminus;
94
95 /* fcmp */
96 const int float_cmp;
97
98 /* fmov, fmovr */
99 const int float_cmove;
100
101 /* fmul */
102 const int float_mul;
103
104 /* fdivs */
105 const int float_div_sf;
106
107 /* fdivd */
108 const int float_div_df;
109
110 /* fsqrts */
111 const int float_sqrt_sf;
112
113 /* fsqrtd */
114 const int float_sqrt_df;
115
116 /* umul/smul */
117 const int int_mul;
118
119 /* mulX */
120 const int int_mulX;
121
122 /* integer multiply cost for each bit set past the most
123 significant 3, so the formula for multiply cost becomes:
124
125 if (rs1 < 0)
126 highest_bit = highest_clear_bit(rs1);
127 else
128 highest_bit = highest_set_bit(rs1);
129 if (highest_bit < 3)
130 highest_bit = 3;
131 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
132
133 A value of zero indicates that the multiply costs is fixed,
134 and not variable. */
135 const int int_mul_bit_factor;
136
137 /* udiv/sdiv */
138 const int int_div;
139
140 /* divX */
141 const int int_divX;
142
143 /* movcc, movr */
144 const int int_cmove;
145
146 /* penalty for shifts, due to scheduling rules etc. */
147 const int shift_penalty;
148 };
149
150 static const
151 struct processor_costs cypress_costs = {
152 COSTS_N_INSNS (2), /* int load */
153 COSTS_N_INSNS (2), /* int signed load */
154 COSTS_N_INSNS (2), /* int zeroed load */
155 COSTS_N_INSNS (2), /* float load */
156 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
157 COSTS_N_INSNS (5), /* fadd, fsub */
158 COSTS_N_INSNS (1), /* fcmp */
159 COSTS_N_INSNS (1), /* fmov, fmovr */
160 COSTS_N_INSNS (7), /* fmul */
161 COSTS_N_INSNS (37), /* fdivs */
162 COSTS_N_INSNS (37), /* fdivd */
163 COSTS_N_INSNS (63), /* fsqrts */
164 COSTS_N_INSNS (63), /* fsqrtd */
165 COSTS_N_INSNS (1), /* imul */
166 COSTS_N_INSNS (1), /* imulX */
167 0, /* imul bit factor */
168 COSTS_N_INSNS (1), /* idiv */
169 COSTS_N_INSNS (1), /* idivX */
170 COSTS_N_INSNS (1), /* movcc/movr */
171 0, /* shift penalty */
172 };
173
174 static const
175 struct processor_costs supersparc_costs = {
176 COSTS_N_INSNS (1), /* int load */
177 COSTS_N_INSNS (1), /* int signed load */
178 COSTS_N_INSNS (1), /* int zeroed load */
179 COSTS_N_INSNS (0), /* float load */
180 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
181 COSTS_N_INSNS (3), /* fadd, fsub */
182 COSTS_N_INSNS (3), /* fcmp */
183 COSTS_N_INSNS (1), /* fmov, fmovr */
184 COSTS_N_INSNS (3), /* fmul */
185 COSTS_N_INSNS (6), /* fdivs */
186 COSTS_N_INSNS (9), /* fdivd */
187 COSTS_N_INSNS (12), /* fsqrts */
188 COSTS_N_INSNS (12), /* fsqrtd */
189 COSTS_N_INSNS (4), /* imul */
190 COSTS_N_INSNS (4), /* imulX */
191 0, /* imul bit factor */
192 COSTS_N_INSNS (4), /* idiv */
193 COSTS_N_INSNS (4), /* idivX */
194 COSTS_N_INSNS (1), /* movcc/movr */
195 1, /* shift penalty */
196 };
197
198 static const
199 struct processor_costs hypersparc_costs = {
200 COSTS_N_INSNS (1), /* int load */
201 COSTS_N_INSNS (1), /* int signed load */
202 COSTS_N_INSNS (1), /* int zeroed load */
203 COSTS_N_INSNS (1), /* float load */
204 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
205 COSTS_N_INSNS (1), /* fadd, fsub */
206 COSTS_N_INSNS (1), /* fcmp */
207 COSTS_N_INSNS (1), /* fmov, fmovr */
208 COSTS_N_INSNS (1), /* fmul */
209 COSTS_N_INSNS (8), /* fdivs */
210 COSTS_N_INSNS (12), /* fdivd */
211 COSTS_N_INSNS (17), /* fsqrts */
212 COSTS_N_INSNS (17), /* fsqrtd */
213 COSTS_N_INSNS (17), /* imul */
214 COSTS_N_INSNS (17), /* imulX */
215 0, /* imul bit factor */
216 COSTS_N_INSNS (17), /* idiv */
217 COSTS_N_INSNS (17), /* idivX */
218 COSTS_N_INSNS (1), /* movcc/movr */
219 0, /* shift penalty */
220 };
221
222 static const
223 struct processor_costs leon_costs = {
224 COSTS_N_INSNS (1), /* int load */
225 COSTS_N_INSNS (1), /* int signed load */
226 COSTS_N_INSNS (1), /* int zeroed load */
227 COSTS_N_INSNS (1), /* float load */
228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229 COSTS_N_INSNS (1), /* fadd, fsub */
230 COSTS_N_INSNS (1), /* fcmp */
231 COSTS_N_INSNS (1), /* fmov, fmovr */
232 COSTS_N_INSNS (1), /* fmul */
233 COSTS_N_INSNS (15), /* fdivs */
234 COSTS_N_INSNS (15), /* fdivd */
235 COSTS_N_INSNS (23), /* fsqrts */
236 COSTS_N_INSNS (23), /* fsqrtd */
237 COSTS_N_INSNS (5), /* imul */
238 COSTS_N_INSNS (5), /* imulX */
239 0, /* imul bit factor */
240 COSTS_N_INSNS (5), /* idiv */
241 COSTS_N_INSNS (5), /* idivX */
242 COSTS_N_INSNS (1), /* movcc/movr */
243 0, /* shift penalty */
244 };
245
246 static const
247 struct processor_costs leon3_costs = {
248 COSTS_N_INSNS (1), /* int load */
249 COSTS_N_INSNS (1), /* int signed load */
250 COSTS_N_INSNS (1), /* int zeroed load */
251 COSTS_N_INSNS (1), /* float load */
252 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
253 COSTS_N_INSNS (1), /* fadd, fsub */
254 COSTS_N_INSNS (1), /* fcmp */
255 COSTS_N_INSNS (1), /* fmov, fmovr */
256 COSTS_N_INSNS (1), /* fmul */
257 COSTS_N_INSNS (14), /* fdivs */
258 COSTS_N_INSNS (15), /* fdivd */
259 COSTS_N_INSNS (22), /* fsqrts */
260 COSTS_N_INSNS (23), /* fsqrtd */
261 COSTS_N_INSNS (5), /* imul */
262 COSTS_N_INSNS (5), /* imulX */
263 0, /* imul bit factor */
264 COSTS_N_INSNS (35), /* idiv */
265 COSTS_N_INSNS (35), /* idivX */
266 COSTS_N_INSNS (1), /* movcc/movr */
267 0, /* shift penalty */
268 };
269
270 static const
271 struct processor_costs sparclet_costs = {
272 COSTS_N_INSNS (3), /* int load */
273 COSTS_N_INSNS (3), /* int signed load */
274 COSTS_N_INSNS (1), /* int zeroed load */
275 COSTS_N_INSNS (1), /* float load */
276 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
277 COSTS_N_INSNS (1), /* fadd, fsub */
278 COSTS_N_INSNS (1), /* fcmp */
279 COSTS_N_INSNS (1), /* fmov, fmovr */
280 COSTS_N_INSNS (1), /* fmul */
281 COSTS_N_INSNS (1), /* fdivs */
282 COSTS_N_INSNS (1), /* fdivd */
283 COSTS_N_INSNS (1), /* fsqrts */
284 COSTS_N_INSNS (1), /* fsqrtd */
285 COSTS_N_INSNS (5), /* imul */
286 COSTS_N_INSNS (5), /* imulX */
287 0, /* imul bit factor */
288 COSTS_N_INSNS (5), /* idiv */
289 COSTS_N_INSNS (5), /* idivX */
290 COSTS_N_INSNS (1), /* movcc/movr */
291 0, /* shift penalty */
292 };
293
294 static const
295 struct processor_costs ultrasparc_costs = {
296 COSTS_N_INSNS (2), /* int load */
297 COSTS_N_INSNS (3), /* int signed load */
298 COSTS_N_INSNS (2), /* int zeroed load */
299 COSTS_N_INSNS (2), /* float load */
300 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
301 COSTS_N_INSNS (4), /* fadd, fsub */
302 COSTS_N_INSNS (1), /* fcmp */
303 COSTS_N_INSNS (2), /* fmov, fmovr */
304 COSTS_N_INSNS (4), /* fmul */
305 COSTS_N_INSNS (13), /* fdivs */
306 COSTS_N_INSNS (23), /* fdivd */
307 COSTS_N_INSNS (13), /* fsqrts */
308 COSTS_N_INSNS (23), /* fsqrtd */
309 COSTS_N_INSNS (4), /* imul */
310 COSTS_N_INSNS (4), /* imulX */
311 2, /* imul bit factor */
312 COSTS_N_INSNS (37), /* idiv */
313 COSTS_N_INSNS (68), /* idivX */
314 COSTS_N_INSNS (2), /* movcc/movr */
315 2, /* shift penalty */
316 };
317
318 static const
319 struct processor_costs ultrasparc3_costs = {
320 COSTS_N_INSNS (2), /* int load */
321 COSTS_N_INSNS (3), /* int signed load */
322 COSTS_N_INSNS (3), /* int zeroed load */
323 COSTS_N_INSNS (2), /* float load */
324 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
325 COSTS_N_INSNS (4), /* fadd, fsub */
326 COSTS_N_INSNS (5), /* fcmp */
327 COSTS_N_INSNS (3), /* fmov, fmovr */
328 COSTS_N_INSNS (4), /* fmul */
329 COSTS_N_INSNS (17), /* fdivs */
330 COSTS_N_INSNS (20), /* fdivd */
331 COSTS_N_INSNS (20), /* fsqrts */
332 COSTS_N_INSNS (29), /* fsqrtd */
333 COSTS_N_INSNS (6), /* imul */
334 COSTS_N_INSNS (6), /* imulX */
335 0, /* imul bit factor */
336 COSTS_N_INSNS (40), /* idiv */
337 COSTS_N_INSNS (71), /* idivX */
338 COSTS_N_INSNS (2), /* movcc/movr */
339 0, /* shift penalty */
340 };
341
342 static const
343 struct processor_costs niagara_costs = {
344 COSTS_N_INSNS (3), /* int load */
345 COSTS_N_INSNS (3), /* int signed load */
346 COSTS_N_INSNS (3), /* int zeroed load */
347 COSTS_N_INSNS (9), /* float load */
348 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
349 COSTS_N_INSNS (8), /* fadd, fsub */
350 COSTS_N_INSNS (26), /* fcmp */
351 COSTS_N_INSNS (8), /* fmov, fmovr */
352 COSTS_N_INSNS (29), /* fmul */
353 COSTS_N_INSNS (54), /* fdivs */
354 COSTS_N_INSNS (83), /* fdivd */
355 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
356 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
357 COSTS_N_INSNS (11), /* imul */
358 COSTS_N_INSNS (11), /* imulX */
359 0, /* imul bit factor */
360 COSTS_N_INSNS (72), /* idiv */
361 COSTS_N_INSNS (72), /* idivX */
362 COSTS_N_INSNS (1), /* movcc/movr */
363 0, /* shift penalty */
364 };
365
366 static const
367 struct processor_costs niagara2_costs = {
368 COSTS_N_INSNS (3), /* int load */
369 COSTS_N_INSNS (3), /* int signed load */
370 COSTS_N_INSNS (3), /* int zeroed load */
371 COSTS_N_INSNS (3), /* float load */
372 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
373 COSTS_N_INSNS (6), /* fadd, fsub */
374 COSTS_N_INSNS (6), /* fcmp */
375 COSTS_N_INSNS (6), /* fmov, fmovr */
376 COSTS_N_INSNS (6), /* fmul */
377 COSTS_N_INSNS (19), /* fdivs */
378 COSTS_N_INSNS (33), /* fdivd */
379 COSTS_N_INSNS (19), /* fsqrts */
380 COSTS_N_INSNS (33), /* fsqrtd */
381 COSTS_N_INSNS (5), /* imul */
382 COSTS_N_INSNS (5), /* imulX */
383 0, /* imul bit factor */
384 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
385 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
386 COSTS_N_INSNS (1), /* movcc/movr */
387 0, /* shift penalty */
388 };
389
390 static const
391 struct processor_costs niagara3_costs = {
392 COSTS_N_INSNS (3), /* int load */
393 COSTS_N_INSNS (3), /* int signed load */
394 COSTS_N_INSNS (3), /* int zeroed load */
395 COSTS_N_INSNS (3), /* float load */
396 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
397 COSTS_N_INSNS (9), /* fadd, fsub */
398 COSTS_N_INSNS (9), /* fcmp */
399 COSTS_N_INSNS (9), /* fmov, fmovr */
400 COSTS_N_INSNS (9), /* fmul */
401 COSTS_N_INSNS (23), /* fdivs */
402 COSTS_N_INSNS (37), /* fdivd */
403 COSTS_N_INSNS (23), /* fsqrts */
404 COSTS_N_INSNS (37), /* fsqrtd */
405 COSTS_N_INSNS (9), /* imul */
406 COSTS_N_INSNS (9), /* imulX */
407 0, /* imul bit factor */
408 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
409 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
410 COSTS_N_INSNS (1), /* movcc/movr */
411 0, /* shift penalty */
412 };
413
414 static const
415 struct processor_costs niagara4_costs = {
416 COSTS_N_INSNS (5), /* int load */
417 COSTS_N_INSNS (5), /* int signed load */
418 COSTS_N_INSNS (5), /* int zeroed load */
419 COSTS_N_INSNS (5), /* float load */
420 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
421 COSTS_N_INSNS (11), /* fadd, fsub */
422 COSTS_N_INSNS (11), /* fcmp */
423 COSTS_N_INSNS (11), /* fmov, fmovr */
424 COSTS_N_INSNS (11), /* fmul */
425 COSTS_N_INSNS (24), /* fdivs */
426 COSTS_N_INSNS (37), /* fdivd */
427 COSTS_N_INSNS (24), /* fsqrts */
428 COSTS_N_INSNS (37), /* fsqrtd */
429 COSTS_N_INSNS (12), /* imul */
430 COSTS_N_INSNS (12), /* imulX */
431 0, /* imul bit factor */
432 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
433 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
434 COSTS_N_INSNS (1), /* movcc/movr */
435 0, /* shift penalty */
436 };
437
438 static const struct processor_costs *sparc_costs = &cypress_costs;
439
440 #ifdef HAVE_AS_RELAX_OPTION
441 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
442 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
443 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
444 somebody does not branch between the sethi and jmp. */
445 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
446 #else
447 #define LEAF_SIBCALL_SLOT_RESERVED_P \
448 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
449 #endif
450
451 /* Vector to say how input registers are mapped to output registers.
452 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
453 eliminate it. You must use -fomit-frame-pointer to get that. */
454 char leaf_reg_remap[] =
455 { 0, 1, 2, 3, 4, 5, 6, 7,
456 -1, -1, -1, -1, -1, -1, 14, -1,
457 -1, -1, -1, -1, -1, -1, -1, -1,
458 8, 9, 10, 11, 12, 13, -1, 15,
459
460 32, 33, 34, 35, 36, 37, 38, 39,
461 40, 41, 42, 43, 44, 45, 46, 47,
462 48, 49, 50, 51, 52, 53, 54, 55,
463 56, 57, 58, 59, 60, 61, 62, 63,
464 64, 65, 66, 67, 68, 69, 70, 71,
465 72, 73, 74, 75, 76, 77, 78, 79,
466 80, 81, 82, 83, 84, 85, 86, 87,
467 88, 89, 90, 91, 92, 93, 94, 95,
468 96, 97, 98, 99, 100, 101, 102};
469
470 /* Vector, indexed by hard register number, which contains 1
471 for a register that is allowable in a candidate for leaf
472 function treatment. */
473 char sparc_leaf_regs[] =
474 { 1, 1, 1, 1, 1, 1, 1, 1,
475 0, 0, 0, 0, 0, 0, 1, 0,
476 0, 0, 0, 0, 0, 0, 0, 0,
477 1, 1, 1, 1, 1, 1, 0, 1,
478 1, 1, 1, 1, 1, 1, 1, 1,
479 1, 1, 1, 1, 1, 1, 1, 1,
480 1, 1, 1, 1, 1, 1, 1, 1,
481 1, 1, 1, 1, 1, 1, 1, 1,
482 1, 1, 1, 1, 1, 1, 1, 1,
483 1, 1, 1, 1, 1, 1, 1, 1,
484 1, 1, 1, 1, 1, 1, 1, 1,
485 1, 1, 1, 1, 1, 1, 1, 1,
486 1, 1, 1, 1, 1, 1, 1};
487
488 struct GTY(()) machine_function
489 {
490 /* Size of the frame of the function. */
491 HOST_WIDE_INT frame_size;
492
493 /* Size of the frame of the function minus the register window save area
494 and the outgoing argument area. */
495 HOST_WIDE_INT apparent_frame_size;
496
497 /* Register we pretend the frame pointer is allocated to. Normally, this
498 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
499 record "offset" separately as it may be too big for (reg + disp). */
500 rtx frame_base_reg;
501 HOST_WIDE_INT frame_base_offset;
502
503 /* Some local-dynamic TLS symbol name. */
504 const char *some_ld_name;
505
506 /* Number of global or FP registers to be saved (as 4-byte quantities). */
507 int n_global_fp_regs;
508
509 /* True if the current function is leaf and uses only leaf regs,
510 so that the SPARC leaf function optimization can be applied.
511 Private version of crtl->uses_only_leaf_regs, see
512 sparc_expand_prologue for the rationale. */
513 int leaf_function_p;
514
515 /* True if the prologue saves local or in registers. */
516 bool save_local_in_regs_p;
517
518 /* True if the data calculated by sparc_expand_prologue are valid. */
519 bool prologue_data_valid_p;
520 };
521
522 #define sparc_frame_size cfun->machine->frame_size
523 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
524 #define sparc_frame_base_reg cfun->machine->frame_base_reg
525 #define sparc_frame_base_offset cfun->machine->frame_base_offset
526 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
527 #define sparc_leaf_function_p cfun->machine->leaf_function_p
528 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
529 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
530
531 /* 1 if the next opcode is to be specially indented. */
532 int sparc_indent_opcode = 0;
533
534 static void sparc_option_override (void);
535 static void sparc_init_modes (void);
536 static void scan_record_type (const_tree, int *, int *, int *);
537 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
538 const_tree, bool, bool, int *, int *);
539
540 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
541 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
542
543 static void sparc_emit_set_const32 (rtx, rtx);
544 static void sparc_emit_set_const64 (rtx, rtx);
545 static void sparc_output_addr_vec (rtx);
546 static void sparc_output_addr_diff_vec (rtx);
547 static void sparc_output_deferred_case_vectors (void);
548 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
549 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
550 static rtx sparc_builtin_saveregs (void);
551 static int epilogue_renumber (rtx *, int);
552 static bool sparc_assemble_integer (rtx, unsigned int, int);
553 static int set_extends (rtx);
554 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
555 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
556 #ifdef TARGET_SOLARIS
557 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
558 tree) ATTRIBUTE_UNUSED;
559 #endif
560 static int sparc_adjust_cost (rtx, rtx, rtx, int);
561 static int sparc_issue_rate (void);
562 static void sparc_sched_init (FILE *, int, int);
563 static int sparc_use_sched_lookahead (void);
564
565 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
566 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
567 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
568 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
569 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
570
571 static bool sparc_function_ok_for_sibcall (tree, tree);
572 static void sparc_init_libfuncs (void);
573 static void sparc_init_builtins (void);
574 static void sparc_fpu_init_builtins (void);
575 static void sparc_vis_init_builtins (void);
576 static tree sparc_builtin_decl (unsigned, bool);
577 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
578 static tree sparc_fold_builtin (tree, int, tree *, bool);
579 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
580 HOST_WIDE_INT, tree);
581 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
582 HOST_WIDE_INT, const_tree);
583 static struct machine_function * sparc_init_machine_status (void);
584 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
585 static rtx sparc_tls_get_addr (void);
586 static rtx sparc_tls_got (void);
587 static const char *get_some_local_dynamic_name (void);
588 static int get_some_local_dynamic_name_1 (rtx *, void *);
589 static int sparc_register_move_cost (enum machine_mode,
590 reg_class_t, reg_class_t);
591 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
592 static rtx sparc_function_value (const_tree, const_tree, bool);
593 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
594 static bool sparc_function_value_regno_p (const unsigned int);
595 static rtx sparc_struct_value_rtx (tree, int);
596 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
597 int *, const_tree, int);
598 static bool sparc_return_in_memory (const_tree, const_tree);
599 static bool sparc_strict_argument_naming (cumulative_args_t);
600 static void sparc_va_start (tree, rtx);
601 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
602 static bool sparc_vector_mode_supported_p (enum machine_mode);
603 static bool sparc_tls_referenced_p (rtx);
604 static rtx sparc_legitimize_tls_address (rtx);
605 static rtx sparc_legitimize_pic_address (rtx, rtx);
606 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
607 static rtx sparc_delegitimize_address (rtx);
608 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
609 static bool sparc_pass_by_reference (cumulative_args_t,
610 enum machine_mode, const_tree, bool);
611 static void sparc_function_arg_advance (cumulative_args_t,
612 enum machine_mode, const_tree, bool);
613 static rtx sparc_function_arg_1 (cumulative_args_t,
614 enum machine_mode, const_tree, bool, bool);
615 static rtx sparc_function_arg (cumulative_args_t,
616 enum machine_mode, const_tree, bool);
617 static rtx sparc_function_incoming_arg (cumulative_args_t,
618 enum machine_mode, const_tree, bool);
619 static unsigned int sparc_function_arg_boundary (enum machine_mode,
620 const_tree);
621 static int sparc_arg_partial_bytes (cumulative_args_t,
622 enum machine_mode, tree, bool);
623 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
624 static void sparc_file_end (void);
625 static bool sparc_frame_pointer_required (void);
626 static bool sparc_can_eliminate (const int, const int);
627 static rtx sparc_builtin_setjmp_frame_value (void);
628 static void sparc_conditional_register_usage (void);
629 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
630 static const char *sparc_mangle_type (const_tree);
631 #endif
632 static void sparc_trampoline_init (rtx, tree, rtx);
633 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
634 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
635 static bool sparc_print_operand_punct_valid_p (unsigned char);
636 static void sparc_print_operand (FILE *, rtx, int);
637 static void sparc_print_operand_address (FILE *, rtx);
638 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
639 enum machine_mode,
640 secondary_reload_info *);
641 static enum machine_mode sparc_cstore_mode (enum insn_code icode);
642 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
643 \f
644 #ifdef SUBTARGET_ATTRIBUTE_TABLE
645 /* Table of valid machine attributes. */
646 static const struct attribute_spec sparc_attribute_table[] =
647 {
648 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
649 do_diagnostic } */
650 SUBTARGET_ATTRIBUTE_TABLE,
651 { NULL, 0, 0, false, false, false, NULL, false }
652 };
653 #endif
654 \f
655 /* Option handling. */
656
657 /* Parsed value. */
658 enum cmodel sparc_cmodel;
659
660 char sparc_hard_reg_printed[8];
661
662 /* Initialize the GCC target structure. */
663
664 /* The default is to use .half rather than .short for aligned HI objects. */
665 #undef TARGET_ASM_ALIGNED_HI_OP
666 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
667
668 #undef TARGET_ASM_UNALIGNED_HI_OP
669 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
670 #undef TARGET_ASM_UNALIGNED_SI_OP
671 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
672 #undef TARGET_ASM_UNALIGNED_DI_OP
673 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
674
675 /* The target hook has to handle DI-mode values. */
676 #undef TARGET_ASM_INTEGER
677 #define TARGET_ASM_INTEGER sparc_assemble_integer
678
679 #undef TARGET_ASM_FUNCTION_PROLOGUE
680 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
681 #undef TARGET_ASM_FUNCTION_EPILOGUE
682 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
683
684 #undef TARGET_SCHED_ADJUST_COST
685 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
686 #undef TARGET_SCHED_ISSUE_RATE
687 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
688 #undef TARGET_SCHED_INIT
689 #define TARGET_SCHED_INIT sparc_sched_init
690 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
691 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
692
693 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
694 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
695
696 #undef TARGET_INIT_LIBFUNCS
697 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
698
699 #undef TARGET_LEGITIMIZE_ADDRESS
700 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
701 #undef TARGET_DELEGITIMIZE_ADDRESS
702 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
703 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
704 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
705
706 #undef TARGET_INIT_BUILTINS
707 #define TARGET_INIT_BUILTINS sparc_init_builtins
708 #undef TARGET_BUILTIN_DECL
709 #define TARGET_BUILTIN_DECL sparc_builtin_decl
710 #undef TARGET_EXPAND_BUILTIN
711 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
712 #undef TARGET_FOLD_BUILTIN
713 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
714
715 #if TARGET_TLS
716 #undef TARGET_HAVE_TLS
717 #define TARGET_HAVE_TLS true
718 #endif
719
720 #undef TARGET_CANNOT_FORCE_CONST_MEM
721 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
722
723 #undef TARGET_ASM_OUTPUT_MI_THUNK
724 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
725 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
726 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
727
728 #undef TARGET_RTX_COSTS
729 #define TARGET_RTX_COSTS sparc_rtx_costs
730 #undef TARGET_ADDRESS_COST
731 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
732 #undef TARGET_REGISTER_MOVE_COST
733 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
734
735 #undef TARGET_PROMOTE_FUNCTION_MODE
736 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
737
738 #undef TARGET_FUNCTION_VALUE
739 #define TARGET_FUNCTION_VALUE sparc_function_value
740 #undef TARGET_LIBCALL_VALUE
741 #define TARGET_LIBCALL_VALUE sparc_libcall_value
742 #undef TARGET_FUNCTION_VALUE_REGNO_P
743 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
744
745 #undef TARGET_STRUCT_VALUE_RTX
746 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
747 #undef TARGET_RETURN_IN_MEMORY
748 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
749 #undef TARGET_MUST_PASS_IN_STACK
750 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
751 #undef TARGET_PASS_BY_REFERENCE
752 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
753 #undef TARGET_ARG_PARTIAL_BYTES
754 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
755 #undef TARGET_FUNCTION_ARG_ADVANCE
756 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
757 #undef TARGET_FUNCTION_ARG
758 #define TARGET_FUNCTION_ARG sparc_function_arg
759 #undef TARGET_FUNCTION_INCOMING_ARG
760 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
761 #undef TARGET_FUNCTION_ARG_BOUNDARY
762 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
763
764 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
765 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
766 #undef TARGET_STRICT_ARGUMENT_NAMING
767 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
768
769 #undef TARGET_EXPAND_BUILTIN_VA_START
770 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
771 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
772 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
773
774 #undef TARGET_VECTOR_MODE_SUPPORTED_P
775 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
776
777 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
778 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
779
780 #ifdef SUBTARGET_INSERT_ATTRIBUTES
781 #undef TARGET_INSERT_ATTRIBUTES
782 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
783 #endif
784
785 #ifdef SUBTARGET_ATTRIBUTE_TABLE
786 #undef TARGET_ATTRIBUTE_TABLE
787 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
788 #endif
789
790 #undef TARGET_RELAXED_ORDERING
791 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
792
793 #undef TARGET_OPTION_OVERRIDE
794 #define TARGET_OPTION_OVERRIDE sparc_option_override
795
796 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
797 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
798 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
799 #endif
800
801 #undef TARGET_ASM_FILE_END
802 #define TARGET_ASM_FILE_END sparc_file_end
803
804 #undef TARGET_FRAME_POINTER_REQUIRED
805 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
806
807 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
808 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
809
810 #undef TARGET_CAN_ELIMINATE
811 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
812
813 #undef TARGET_PREFERRED_RELOAD_CLASS
814 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
815
816 #undef TARGET_SECONDARY_RELOAD
817 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
818
819 #undef TARGET_CONDITIONAL_REGISTER_USAGE
820 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
821
822 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
823 #undef TARGET_MANGLE_TYPE
824 #define TARGET_MANGLE_TYPE sparc_mangle_type
825 #endif
826
827 #undef TARGET_LEGITIMATE_ADDRESS_P
828 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
829
830 #undef TARGET_LEGITIMATE_CONSTANT_P
831 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
832
833 #undef TARGET_TRAMPOLINE_INIT
834 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
835
836 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
837 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
838 #undef TARGET_PRINT_OPERAND
839 #define TARGET_PRINT_OPERAND sparc_print_operand
840 #undef TARGET_PRINT_OPERAND_ADDRESS
841 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
842
843 /* The value stored by LDSTUB. */
844 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
845 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
846
847 #undef TARGET_CSTORE_MODE
848 #define TARGET_CSTORE_MODE sparc_cstore_mode
849
850 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
851 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
852
853 struct gcc_target targetm = TARGET_INITIALIZER;
854
855 /* Return the memory reference contained in X if any, zero otherwise. */
856
857 static rtx
858 mem_ref (rtx x)
859 {
860 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
861 x = XEXP (x, 0);
862
863 if (MEM_P (x))
864 return x;
865
866 return NULL_RTX;
867 }
868
869 /* We use a machine specific pass to enable workarounds for errata.
870 We need to have the (essentially) final form of the insn stream in order
871 to properly detect the various hazards. Therefore, this machine specific
872 pass runs as late as possible. The pass is inserted in the pass pipeline
873 at the end of sparc_option_override. */
874
875 static unsigned int
876 sparc_do_work_around_errata (void)
877 {
878 rtx insn, next;
879
880 /* Force all instructions to be split into their final form. */
881 split_all_insns_noflow ();
882
883 /* Now look for specific patterns in the insn stream. */
884 for (insn = get_insns (); insn; insn = next)
885 {
886 bool insert_nop = false;
887 rtx set;
888
889 /* Look into the instruction in a delay slot. */
890 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
891 insn = XVECEXP (PATTERN (insn), 0, 1);
892
893 /* Look for a single-word load into an odd-numbered FP register. */
894 if (sparc_fix_at697f
895 && NONJUMP_INSN_P (insn)
896 && (set = single_set (insn)) != NULL_RTX
897 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
898 && MEM_P (SET_SRC (set))
899 && REG_P (SET_DEST (set))
900 && REGNO (SET_DEST (set)) > 31
901 && REGNO (SET_DEST (set)) % 2 != 0)
902 {
903 /* The wrong dependency is on the enclosing double register. */
904 const unsigned int x = REGNO (SET_DEST (set)) - 1;
905 unsigned int src1, src2, dest;
906 int code;
907
908 next = next_active_insn (insn);
909 if (!next)
910 break;
911 /* If the insn is a branch, then it cannot be problematic. */
912 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
913 continue;
914
915 extract_insn (next);
916 code = INSN_CODE (next);
917
918 switch (code)
919 {
920 case CODE_FOR_adddf3:
921 case CODE_FOR_subdf3:
922 case CODE_FOR_muldf3:
923 case CODE_FOR_divdf3:
924 dest = REGNO (recog_data.operand[0]);
925 src1 = REGNO (recog_data.operand[1]);
926 src2 = REGNO (recog_data.operand[2]);
927 if (src1 != src2)
928 {
929 /* Case [1-4]:
930 ld [address], %fx+1
931 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
932 if ((src1 == x || src2 == x)
933 && (dest == src1 || dest == src2))
934 insert_nop = true;
935 }
936 else
937 {
938 /* Case 5:
939 ld [address], %fx+1
940 FPOPd %fx, %fx, %fx */
941 if (src1 == x
942 && dest == src1
943 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
944 insert_nop = true;
945 }
946 break;
947
948 case CODE_FOR_sqrtdf2:
949 dest = REGNO (recog_data.operand[0]);
950 src1 = REGNO (recog_data.operand[1]);
951 /* Case 6:
952 ld [address], %fx+1
953 fsqrtd %fx, %fx */
954 if (src1 == x && dest == src1)
955 insert_nop = true;
956 break;
957
958 default:
959 break;
960 }
961 }
962
963 /* Look for a single-word load into an integer register. */
964 else if (sparc_fix_ut699
965 && NONJUMP_INSN_P (insn)
966 && (set = single_set (insn)) != NULL_RTX
967 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
968 && mem_ref (SET_SRC (set)) != NULL_RTX
969 && REG_P (SET_DEST (set))
970 && REGNO (SET_DEST (set)) < 32)
971 {
972 /* There is no problem if the second memory access has a data
973 dependency on the first single-cycle load. */
974 rtx x = SET_DEST (set);
975
976 next = next_active_insn (insn);
977 if (!next)
978 break;
979 /* If the insn is a branch, then it cannot be problematic. */
980 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
981 continue;
982
983 /* Look for a second memory access to/from an integer register. */
984 if ((set = single_set (next)) != NULL_RTX)
985 {
986 rtx src = SET_SRC (set);
987 rtx dest = SET_DEST (set);
988 rtx mem;
989
990 /* LDD is affected. */
991 if ((mem = mem_ref (src)) != NULL_RTX
992 && REG_P (dest)
993 && REGNO (dest) < 32
994 && !reg_mentioned_p (x, XEXP (mem, 0)))
995 insert_nop = true;
996
997 /* STD is *not* affected. */
998 else if (MEM_P (dest)
999 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1000 && (src == CONST0_RTX (GET_MODE (dest))
1001 || (REG_P (src)
1002 && REGNO (src) < 32
1003 && REGNO (src) != REGNO (x)))
1004 && !reg_mentioned_p (x, XEXP (dest, 0)))
1005 insert_nop = true;
1006 }
1007 }
1008
1009 /* Look for a single-word load/operation into an FP register. */
1010 else if (sparc_fix_ut699
1011 && NONJUMP_INSN_P (insn)
1012 && (set = single_set (insn)) != NULL_RTX
1013 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1014 && REG_P (SET_DEST (set))
1015 && REGNO (SET_DEST (set)) > 31)
1016 {
1017 /* Number of instructions in the problematic window. */
1018 const int n_insns = 4;
1019 /* The problematic combination is with the sibling FP register. */
1020 const unsigned int x = REGNO (SET_DEST (set));
1021 const unsigned int y = x ^ 1;
1022 rtx after;
1023 int i;
1024
1025 next = next_active_insn (insn);
1026 if (!next)
1027 break;
1028 /* If the insn is a branch, then it cannot be problematic. */
1029 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1030 continue;
1031
1032 /* Look for a second load/operation into the sibling FP register. */
1033 if (!((set = single_set (next)) != NULL_RTX
1034 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1035 && REG_P (SET_DEST (set))
1036 && REGNO (SET_DEST (set)) == y))
1037 continue;
1038
1039 /* Look for a (possible) store from the FP register in the next N
1040 instructions, but bail out if it is again modified or if there
1041 is a store from the sibling FP register before this store. */
1042 for (after = next, i = 0; i < n_insns; i++)
1043 {
1044 bool branch_p;
1045
1046 after = next_active_insn (after);
1047 if (!after)
1048 break;
1049
1050 /* This is a branch with an empty delay slot. */
1051 if (!NONJUMP_INSN_P (after))
1052 {
1053 if (++i == n_insns)
1054 break;
1055 branch_p = true;
1056 after = NULL_RTX;
1057 }
1058 /* This is a branch with a filled delay slot. */
1059 else if (GET_CODE (PATTERN (after)) == SEQUENCE)
1060 {
1061 if (++i == n_insns)
1062 break;
1063 branch_p = true;
1064 after = XVECEXP (PATTERN (after), 0, 1);
1065 }
1066 /* This is a regular instruction. */
1067 else
1068 branch_p = false;
1069
1070 if (after && (set = single_set (after)) != NULL_RTX)
1071 {
1072 const rtx src = SET_SRC (set);
1073 const rtx dest = SET_DEST (set);
1074 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1075
1076 /* If the FP register is again modified before the store,
1077 then the store isn't affected. */
1078 if (REG_P (dest)
1079 && (REGNO (dest) == x
1080 || (REGNO (dest) == y && size == 8)))
1081 break;
1082
1083 if (MEM_P (dest) && REG_P (src))
1084 {
1085 /* If there is a store from the sibling FP register
1086 before the store, then the store is not affected. */
1087 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1088 break;
1089
1090 /* Otherwise, the store is affected. */
1091 if (REGNO (src) == x && size == 4)
1092 {
1093 insert_nop = true;
1094 break;
1095 }
1096 }
1097 }
1098
1099 /* If we have a branch in the first M instructions, then we
1100 cannot see the (M+2)th instruction so we play safe. */
1101 if (branch_p && i <= (n_insns - 2))
1102 {
1103 insert_nop = true;
1104 break;
1105 }
1106 }
1107 }
1108
1109 else
1110 next = NEXT_INSN (insn);
1111
1112 if (insert_nop)
1113 emit_insn_before (gen_nop (), next);
1114 }
1115
1116 return 0;
1117 }
1118
1119 namespace {
1120
1121 const pass_data pass_data_work_around_errata =
1122 {
1123 RTL_PASS, /* type */
1124 "errata", /* name */
1125 OPTGROUP_NONE, /* optinfo_flags */
1126 TV_MACH_DEP, /* tv_id */
1127 0, /* properties_required */
1128 0, /* properties_provided */
1129 0, /* properties_destroyed */
1130 0, /* todo_flags_start */
1131 0, /* todo_flags_finish */
1132 };
1133
1134 class pass_work_around_errata : public rtl_opt_pass
1135 {
1136 public:
1137 pass_work_around_errata(gcc::context *ctxt)
1138 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1139 {}
1140
1141 /* opt_pass methods: */
1142 virtual bool gate (function *)
1143 {
1144 /* The only errata we handle are those of the AT697F and UT699. */
1145 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1146 }
1147
1148 virtual unsigned int execute (function *)
1149 {
1150 return sparc_do_work_around_errata ();
1151 }
1152
1153 }; // class pass_work_around_errata
1154
1155 } // anon namespace
1156
1157 rtl_opt_pass *
1158 make_pass_work_around_errata (gcc::context *ctxt)
1159 {
1160 return new pass_work_around_errata (ctxt);
1161 }
1162
1163 /* Helpers for TARGET_DEBUG_OPTIONS. */
1164 static void
1165 dump_target_flag_bits (const int flags)
1166 {
1167 if (flags & MASK_64BIT)
1168 fprintf (stderr, "64BIT ");
1169 if (flags & MASK_APP_REGS)
1170 fprintf (stderr, "APP_REGS ");
1171 if (flags & MASK_FASTER_STRUCTS)
1172 fprintf (stderr, "FASTER_STRUCTS ");
1173 if (flags & MASK_FLAT)
1174 fprintf (stderr, "FLAT ");
1175 if (flags & MASK_FMAF)
1176 fprintf (stderr, "FMAF ");
1177 if (flags & MASK_FPU)
1178 fprintf (stderr, "FPU ");
1179 if (flags & MASK_HARD_QUAD)
1180 fprintf (stderr, "HARD_QUAD ");
1181 if (flags & MASK_POPC)
1182 fprintf (stderr, "POPC ");
1183 if (flags & MASK_PTR64)
1184 fprintf (stderr, "PTR64 ");
1185 if (flags & MASK_STACK_BIAS)
1186 fprintf (stderr, "STACK_BIAS ");
1187 if (flags & MASK_UNALIGNED_DOUBLES)
1188 fprintf (stderr, "UNALIGNED_DOUBLES ");
1189 if (flags & MASK_V8PLUS)
1190 fprintf (stderr, "V8PLUS ");
1191 if (flags & MASK_VIS)
1192 fprintf (stderr, "VIS ");
1193 if (flags & MASK_VIS2)
1194 fprintf (stderr, "VIS2 ");
1195 if (flags & MASK_VIS3)
1196 fprintf (stderr, "VIS3 ");
1197 if (flags & MASK_CBCOND)
1198 fprintf (stderr, "CBCOND ");
1199 if (flags & MASK_DEPRECATED_V8_INSNS)
1200 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1201 if (flags & MASK_SPARCLET)
1202 fprintf (stderr, "SPARCLET ");
1203 if (flags & MASK_SPARCLITE)
1204 fprintf (stderr, "SPARCLITE ");
1205 if (flags & MASK_V8)
1206 fprintf (stderr, "V8 ");
1207 if (flags & MASK_V9)
1208 fprintf (stderr, "V9 ");
1209 }
1210
1211 static void
1212 dump_target_flags (const char *prefix, const int flags)
1213 {
1214 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1215 dump_target_flag_bits (flags);
1216 fprintf(stderr, "]\n");
1217 }
1218
1219 /* Validate and override various options, and do some machine dependent
1220 initialization. */
1221
1222 static void
1223 sparc_option_override (void)
1224 {
1225 static struct code_model {
1226 const char *const name;
1227 const enum cmodel value;
1228 } const cmodels[] = {
1229 { "32", CM_32 },
1230 { "medlow", CM_MEDLOW },
1231 { "medmid", CM_MEDMID },
1232 { "medany", CM_MEDANY },
1233 { "embmedany", CM_EMBMEDANY },
1234 { NULL, (enum cmodel) 0 }
1235 };
1236 const struct code_model *cmodel;
1237 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1238 static struct cpu_default {
1239 const int cpu;
1240 const enum processor_type processor;
1241 } const cpu_default[] = {
1242 /* There must be one entry here for each TARGET_CPU value. */
1243 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1244 { TARGET_CPU_v8, PROCESSOR_V8 },
1245 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1246 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1247 { TARGET_CPU_leon, PROCESSOR_LEON },
1248 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1249 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1250 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1251 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1252 { TARGET_CPU_v9, PROCESSOR_V9 },
1253 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1254 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1255 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1256 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1257 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1258 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1259 { -1, PROCESSOR_V7 }
1260 };
1261 const struct cpu_default *def;
1262 /* Table of values for -m{cpu,tune}=. This must match the order of
1263 the enum processor_type in sparc-opts.h. */
1264 static struct cpu_table {
1265 const char *const name;
1266 const int disable;
1267 const int enable;
1268 } const cpu_table[] = {
1269 { "v7", MASK_ISA, 0 },
1270 { "cypress", MASK_ISA, 0 },
1271 { "v8", MASK_ISA, MASK_V8 },
1272 /* TI TMS390Z55 supersparc */
1273 { "supersparc", MASK_ISA, MASK_V8 },
1274 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1275 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1276 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1277 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1278 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1279 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1280 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1281 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1282 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1283 { "sparclet", MASK_ISA, MASK_SPARCLET },
1284 /* TEMIC sparclet */
1285 { "tsc701", MASK_ISA, MASK_SPARCLET },
1286 { "v9", MASK_ISA, MASK_V9 },
1287 /* UltraSPARC I, II, IIi */
1288 { "ultrasparc", MASK_ISA,
1289 /* Although insns using %y are deprecated, it is a clear win. */
1290 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1291 /* UltraSPARC III */
1292 /* ??? Check if %y issue still holds true. */
1293 { "ultrasparc3", MASK_ISA,
1294 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1295 /* UltraSPARC T1 */
1296 { "niagara", MASK_ISA,
1297 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1298 /* UltraSPARC T2 */
1299 { "niagara2", MASK_ISA,
1300 MASK_V9|MASK_POPC|MASK_VIS2 },
1301 /* UltraSPARC T3 */
1302 { "niagara3", MASK_ISA,
1303 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1304 /* UltraSPARC T4 */
1305 { "niagara4", MASK_ISA,
1306 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1307 };
1308 const struct cpu_table *cpu;
1309 unsigned int i;
1310 int fpu;
1311
1312 if (sparc_debug_string != NULL)
1313 {
1314 const char *q;
1315 char *p;
1316
1317 p = ASTRDUP (sparc_debug_string);
1318 while ((q = strtok (p, ",")) != NULL)
1319 {
1320 bool invert;
1321 int mask;
1322
1323 p = NULL;
1324 if (*q == '!')
1325 {
1326 invert = true;
1327 q++;
1328 }
1329 else
1330 invert = false;
1331
1332 if (! strcmp (q, "all"))
1333 mask = MASK_DEBUG_ALL;
1334 else if (! strcmp (q, "options"))
1335 mask = MASK_DEBUG_OPTIONS;
1336 else
1337 error ("unknown -mdebug-%s switch", q);
1338
1339 if (invert)
1340 sparc_debug &= ~mask;
1341 else
1342 sparc_debug |= mask;
1343 }
1344 }
1345
1346 if (TARGET_DEBUG_OPTIONS)
1347 {
1348 dump_target_flags("Initial target_flags", target_flags);
1349 dump_target_flags("target_flags_explicit", target_flags_explicit);
1350 }
1351
1352 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1353 SUBTARGET_OVERRIDE_OPTIONS;
1354 #endif
1355
1356 #ifndef SPARC_BI_ARCH
1357 /* Check for unsupported architecture size. */
1358 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1359 error ("%s is not supported by this configuration",
1360 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1361 #endif
1362
1363 /* We force all 64bit archs to use 128 bit long double */
1364 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1365 {
1366 error ("-mlong-double-64 not allowed with -m64");
1367 target_flags |= MASK_LONG_DOUBLE_128;
1368 }
1369
1370 /* Code model selection. */
1371 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1372
1373 #ifdef SPARC_BI_ARCH
1374 if (TARGET_ARCH32)
1375 sparc_cmodel = CM_32;
1376 #endif
1377
1378 if (sparc_cmodel_string != NULL)
1379 {
1380 if (TARGET_ARCH64)
1381 {
1382 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1383 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1384 break;
1385 if (cmodel->name == NULL)
1386 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1387 else
1388 sparc_cmodel = cmodel->value;
1389 }
1390 else
1391 error ("-mcmodel= is not supported on 32 bit systems");
1392 }
1393
1394 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1395 for (i = 8; i < 16; i++)
1396 if (!call_used_regs [i])
1397 {
1398 error ("-fcall-saved-REG is not supported for out registers");
1399 call_used_regs [i] = 1;
1400 }
1401
1402 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1403
1404 /* Set the default CPU. */
1405 if (!global_options_set.x_sparc_cpu_and_features)
1406 {
1407 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1408 if (def->cpu == TARGET_CPU_DEFAULT)
1409 break;
1410 gcc_assert (def->cpu != -1);
1411 sparc_cpu_and_features = def->processor;
1412 }
1413
1414 if (!global_options_set.x_sparc_cpu)
1415 sparc_cpu = sparc_cpu_and_features;
1416
1417 cpu = &cpu_table[(int) sparc_cpu_and_features];
1418
1419 if (TARGET_DEBUG_OPTIONS)
1420 {
1421 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1422 fprintf (stderr, "sparc_cpu: %s\n",
1423 cpu_table[(int) sparc_cpu].name);
1424 dump_target_flags ("cpu->disable", cpu->disable);
1425 dump_target_flags ("cpu->enable", cpu->enable);
1426 }
1427
1428 target_flags &= ~cpu->disable;
1429 target_flags |= (cpu->enable
1430 #ifndef HAVE_AS_FMAF_HPC_VIS3
1431 & ~(MASK_FMAF | MASK_VIS3)
1432 #endif
1433 #ifndef HAVE_AS_SPARC4
1434 & ~MASK_CBCOND
1435 #endif
1436 #ifndef HAVE_AS_LEON
1437 & ~(MASK_LEON | MASK_LEON3)
1438 #endif
1439 );
1440
1441 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1442 the processor default. */
1443 if (target_flags_explicit & MASK_FPU)
1444 target_flags = (target_flags & ~MASK_FPU) | fpu;
1445
1446 /* -mvis2 implies -mvis */
1447 if (TARGET_VIS2)
1448 target_flags |= MASK_VIS;
1449
1450 /* -mvis3 implies -mvis2 and -mvis */
1451 if (TARGET_VIS3)
1452 target_flags |= MASK_VIS2 | MASK_VIS;
1453
1454 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1455 disabled. */
1456 if (! TARGET_FPU)
1457 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1458
1459 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1460 are available.
1461 -m64 also implies v9. */
1462 if (TARGET_VIS || TARGET_ARCH64)
1463 {
1464 target_flags |= MASK_V9;
1465 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1466 }
1467
1468 /* -mvis also implies -mv8plus on 32-bit */
1469 if (TARGET_VIS && ! TARGET_ARCH64)
1470 target_flags |= MASK_V8PLUS;
1471
1472 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1473 if (TARGET_V9 && TARGET_ARCH32)
1474 target_flags |= MASK_DEPRECATED_V8_INSNS;
1475
1476 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1477 if (! TARGET_V9 || TARGET_ARCH64)
1478 target_flags &= ~MASK_V8PLUS;
1479
1480 /* Don't use stack biasing in 32 bit mode. */
1481 if (TARGET_ARCH32)
1482 target_flags &= ~MASK_STACK_BIAS;
1483
1484 /* Supply a default value for align_functions. */
1485 if (align_functions == 0
1486 && (sparc_cpu == PROCESSOR_ULTRASPARC
1487 || sparc_cpu == PROCESSOR_ULTRASPARC3
1488 || sparc_cpu == PROCESSOR_NIAGARA
1489 || sparc_cpu == PROCESSOR_NIAGARA2
1490 || sparc_cpu == PROCESSOR_NIAGARA3
1491 || sparc_cpu == PROCESSOR_NIAGARA4))
1492 align_functions = 32;
1493
1494 /* Validate PCC_STRUCT_RETURN. */
1495 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1496 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1497
1498 /* Only use .uaxword when compiling for a 64-bit target. */
1499 if (!TARGET_ARCH64)
1500 targetm.asm_out.unaligned_op.di = NULL;
1501
1502 /* Do various machine dependent initializations. */
1503 sparc_init_modes ();
1504
1505 /* Set up function hooks. */
1506 init_machine_status = sparc_init_machine_status;
1507
1508 switch (sparc_cpu)
1509 {
1510 case PROCESSOR_V7:
1511 case PROCESSOR_CYPRESS:
1512 sparc_costs = &cypress_costs;
1513 break;
1514 case PROCESSOR_V8:
1515 case PROCESSOR_SPARCLITE:
1516 case PROCESSOR_SUPERSPARC:
1517 sparc_costs = &supersparc_costs;
1518 break;
1519 case PROCESSOR_F930:
1520 case PROCESSOR_F934:
1521 case PROCESSOR_HYPERSPARC:
1522 case PROCESSOR_SPARCLITE86X:
1523 sparc_costs = &hypersparc_costs;
1524 break;
1525 case PROCESSOR_LEON:
1526 sparc_costs = &leon_costs;
1527 break;
1528 case PROCESSOR_LEON3:
1529 sparc_costs = &leon3_costs;
1530 break;
1531 case PROCESSOR_SPARCLET:
1532 case PROCESSOR_TSC701:
1533 sparc_costs = &sparclet_costs;
1534 break;
1535 case PROCESSOR_V9:
1536 case PROCESSOR_ULTRASPARC:
1537 sparc_costs = &ultrasparc_costs;
1538 break;
1539 case PROCESSOR_ULTRASPARC3:
1540 sparc_costs = &ultrasparc3_costs;
1541 break;
1542 case PROCESSOR_NIAGARA:
1543 sparc_costs = &niagara_costs;
1544 break;
1545 case PROCESSOR_NIAGARA2:
1546 sparc_costs = &niagara2_costs;
1547 break;
1548 case PROCESSOR_NIAGARA3:
1549 sparc_costs = &niagara3_costs;
1550 break;
1551 case PROCESSOR_NIAGARA4:
1552 sparc_costs = &niagara4_costs;
1553 break;
1554 case PROCESSOR_NATIVE:
1555 gcc_unreachable ();
1556 };
1557
1558 if (sparc_memory_model == SMM_DEFAULT)
1559 {
1560 /* Choose the memory model for the operating system. */
1561 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1562 if (os_default != SMM_DEFAULT)
1563 sparc_memory_model = os_default;
1564 /* Choose the most relaxed model for the processor. */
1565 else if (TARGET_V9)
1566 sparc_memory_model = SMM_RMO;
1567 else if (TARGET_LEON3)
1568 sparc_memory_model = SMM_TSO;
1569 else if (TARGET_LEON)
1570 sparc_memory_model = SMM_SC;
1571 else if (TARGET_V8)
1572 sparc_memory_model = SMM_PSO;
1573 else
1574 sparc_memory_model = SMM_SC;
1575 }
1576
1577 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1578 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1579 target_flags |= MASK_LONG_DOUBLE_128;
1580 #endif
1581
1582 if (TARGET_DEBUG_OPTIONS)
1583 dump_target_flags ("Final target_flags", target_flags);
1584
1585 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1586 ((sparc_cpu == PROCESSOR_ULTRASPARC
1587 || sparc_cpu == PROCESSOR_NIAGARA
1588 || sparc_cpu == PROCESSOR_NIAGARA2
1589 || sparc_cpu == PROCESSOR_NIAGARA3
1590 || sparc_cpu == PROCESSOR_NIAGARA4)
1591 ? 2
1592 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1593 ? 8 : 3)),
1594 global_options.x_param_values,
1595 global_options_set.x_param_values);
1596 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1597 ((sparc_cpu == PROCESSOR_ULTRASPARC
1598 || sparc_cpu == PROCESSOR_ULTRASPARC3
1599 || sparc_cpu == PROCESSOR_NIAGARA
1600 || sparc_cpu == PROCESSOR_NIAGARA2
1601 || sparc_cpu == PROCESSOR_NIAGARA3
1602 || sparc_cpu == PROCESSOR_NIAGARA4)
1603 ? 64 : 32),
1604 global_options.x_param_values,
1605 global_options_set.x_param_values);
1606
1607 /* Disable save slot sharing for call-clobbered registers by default.
1608 The IRA sharing algorithm works on single registers only and this
1609 pessimizes for double floating-point registers. */
1610 if (!global_options_set.x_flag_ira_share_save_slots)
1611 flag_ira_share_save_slots = 0;
1612
1613 /* We register a machine specific pass to work around errata, if any.
1614 The pass mut be scheduled as late as possible so that we have the
1615 (essentially) final form of the insn stream to work on.
1616 Registering the pass must be done at start up. It's convenient to
1617 do it here. */
1618 opt_pass *errata_pass = make_pass_work_around_errata (g);
1619 struct register_pass_info insert_pass_work_around_errata =
1620 {
1621 errata_pass, /* pass */
1622 "dbr", /* reference_pass_name */
1623 1, /* ref_pass_instance_number */
1624 PASS_POS_INSERT_AFTER /* po_op */
1625 };
1626 register_pass (&insert_pass_work_around_errata);
1627 }
1628 \f
1629 /* Miscellaneous utilities. */
1630
1631 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1632 or branch on register contents instructions. */
1633
1634 int
1635 v9_regcmp_p (enum rtx_code code)
1636 {
1637 return (code == EQ || code == NE || code == GE || code == LT
1638 || code == LE || code == GT);
1639 }
1640
1641 /* Nonzero if OP is a floating point constant which can
1642 be loaded into an integer register using a single
1643 sethi instruction. */
1644
1645 int
1646 fp_sethi_p (rtx op)
1647 {
1648 if (GET_CODE (op) == CONST_DOUBLE)
1649 {
1650 REAL_VALUE_TYPE r;
1651 long i;
1652
1653 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1654 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1655 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1656 }
1657
1658 return 0;
1659 }
1660
1661 /* Nonzero if OP is a floating point constant which can
1662 be loaded into an integer register using a single
1663 mov instruction. */
1664
1665 int
1666 fp_mov_p (rtx op)
1667 {
1668 if (GET_CODE (op) == CONST_DOUBLE)
1669 {
1670 REAL_VALUE_TYPE r;
1671 long i;
1672
1673 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1674 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1675 return SPARC_SIMM13_P (i);
1676 }
1677
1678 return 0;
1679 }
1680
1681 /* Nonzero if OP is a floating point constant which can
1682 be loaded into an integer register using a high/losum
1683 instruction sequence. */
1684
1685 int
1686 fp_high_losum_p (rtx op)
1687 {
1688 /* The constraints calling this should only be in
1689 SFmode move insns, so any constant which cannot
1690 be moved using a single insn will do. */
1691 if (GET_CODE (op) == CONST_DOUBLE)
1692 {
1693 REAL_VALUE_TYPE r;
1694 long i;
1695
1696 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1697 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1698 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1699 }
1700
1701 return 0;
1702 }
1703
1704 /* Return true if the address of LABEL can be loaded by means of the
1705 mov{si,di}_pic_label_ref patterns in PIC mode. */
1706
1707 static bool
1708 can_use_mov_pic_label_ref (rtx label)
1709 {
1710 /* VxWorks does not impose a fixed gap between segments; the run-time
1711 gap can be different from the object-file gap. We therefore can't
1712 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1713 are absolutely sure that X is in the same segment as the GOT.
1714 Unfortunately, the flexibility of linker scripts means that we
1715 can't be sure of that in general, so assume that GOT-relative
1716 accesses are never valid on VxWorks. */
1717 if (TARGET_VXWORKS_RTP)
1718 return false;
1719
1720 /* Similarly, if the label is non-local, it might end up being placed
1721 in a different section than the current one; now mov_pic_label_ref
1722 requires the label and the code to be in the same section. */
1723 if (LABEL_REF_NONLOCAL_P (label))
1724 return false;
1725
1726 /* Finally, if we are reordering basic blocks and partition into hot
1727 and cold sections, this might happen for any label. */
1728 if (flag_reorder_blocks_and_partition)
1729 return false;
1730
1731 return true;
1732 }
1733
1734 /* Expand a move instruction. Return true if all work is done. */
1735
1736 bool
1737 sparc_expand_move (enum machine_mode mode, rtx *operands)
1738 {
1739 /* Handle sets of MEM first. */
1740 if (GET_CODE (operands[0]) == MEM)
1741 {
1742 /* 0 is a register (or a pair of registers) on SPARC. */
1743 if (register_or_zero_operand (operands[1], mode))
1744 return false;
1745
1746 if (!reload_in_progress)
1747 {
1748 operands[0] = validize_mem (operands[0]);
1749 operands[1] = force_reg (mode, operands[1]);
1750 }
1751 }
1752
1753 /* Fixup TLS cases. */
1754 if (TARGET_HAVE_TLS
1755 && CONSTANT_P (operands[1])
1756 && sparc_tls_referenced_p (operands [1]))
1757 {
1758 operands[1] = sparc_legitimize_tls_address (operands[1]);
1759 return false;
1760 }
1761
1762 /* Fixup PIC cases. */
1763 if (flag_pic && CONSTANT_P (operands[1]))
1764 {
1765 if (pic_address_needs_scratch (operands[1]))
1766 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1767
1768 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1769 if (GET_CODE (operands[1]) == LABEL_REF
1770 && can_use_mov_pic_label_ref (operands[1]))
1771 {
1772 if (mode == SImode)
1773 {
1774 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1775 return true;
1776 }
1777
1778 if (mode == DImode)
1779 {
1780 gcc_assert (TARGET_ARCH64);
1781 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1782 return true;
1783 }
1784 }
1785
1786 if (symbolic_operand (operands[1], mode))
1787 {
1788 operands[1]
1789 = sparc_legitimize_pic_address (operands[1],
1790 reload_in_progress
1791 ? operands[0] : NULL_RTX);
1792 return false;
1793 }
1794 }
1795
1796 /* If we are trying to toss an integer constant into FP registers,
1797 or loading a FP or vector constant, force it into memory. */
1798 if (CONSTANT_P (operands[1])
1799 && REG_P (operands[0])
1800 && (SPARC_FP_REG_P (REGNO (operands[0]))
1801 || SCALAR_FLOAT_MODE_P (mode)
1802 || VECTOR_MODE_P (mode)))
1803 {
1804 /* emit_group_store will send such bogosity to us when it is
1805 not storing directly into memory. So fix this up to avoid
1806 crashes in output_constant_pool. */
1807 if (operands [1] == const0_rtx)
1808 operands[1] = CONST0_RTX (mode);
1809
1810 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1811 always other regs. */
1812 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1813 && (const_zero_operand (operands[1], mode)
1814 || const_all_ones_operand (operands[1], mode)))
1815 return false;
1816
1817 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1818 /* We are able to build any SF constant in integer registers
1819 with at most 2 instructions. */
1820 && (mode == SFmode
1821 /* And any DF constant in integer registers. */
1822 || (mode == DFmode
1823 && ! can_create_pseudo_p ())))
1824 return false;
1825
1826 operands[1] = force_const_mem (mode, operands[1]);
1827 if (!reload_in_progress)
1828 operands[1] = validize_mem (operands[1]);
1829 return false;
1830 }
1831
1832 /* Accept non-constants and valid constants unmodified. */
1833 if (!CONSTANT_P (operands[1])
1834 || GET_CODE (operands[1]) == HIGH
1835 || input_operand (operands[1], mode))
1836 return false;
1837
1838 switch (mode)
1839 {
1840 case QImode:
1841 /* All QImode constants require only one insn, so proceed. */
1842 break;
1843
1844 case HImode:
1845 case SImode:
1846 sparc_emit_set_const32 (operands[0], operands[1]);
1847 return true;
1848
1849 case DImode:
1850 /* input_operand should have filtered out 32-bit mode. */
1851 sparc_emit_set_const64 (operands[0], operands[1]);
1852 return true;
1853
1854 case TImode:
1855 {
1856 rtx high, low;
1857 /* TImode isn't available in 32-bit mode. */
1858 split_double (operands[1], &high, &low);
1859 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1860 high));
1861 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1862 low));
1863 }
1864 return true;
1865
1866 default:
1867 gcc_unreachable ();
1868 }
1869
1870 return false;
1871 }
1872
1873 /* Load OP1, a 32-bit constant, into OP0, a register.
1874 We know it can't be done in one insn when we get
1875 here, the move expander guarantees this. */
1876
1877 static void
1878 sparc_emit_set_const32 (rtx op0, rtx op1)
1879 {
1880 enum machine_mode mode = GET_MODE (op0);
1881 rtx temp = op0;
1882
1883 if (can_create_pseudo_p ())
1884 temp = gen_reg_rtx (mode);
1885
1886 if (GET_CODE (op1) == CONST_INT)
1887 {
1888 gcc_assert (!small_int_operand (op1, mode)
1889 && !const_high_operand (op1, mode));
1890
1891 /* Emit them as real moves instead of a HIGH/LO_SUM,
1892 this way CSE can see everything and reuse intermediate
1893 values if it wants. */
1894 emit_insn (gen_rtx_SET (VOIDmode, temp,
1895 GEN_INT (INTVAL (op1)
1896 & ~(HOST_WIDE_INT)0x3ff)));
1897
1898 emit_insn (gen_rtx_SET (VOIDmode,
1899 op0,
1900 gen_rtx_IOR (mode, temp,
1901 GEN_INT (INTVAL (op1) & 0x3ff))));
1902 }
1903 else
1904 {
1905 /* A symbol, emit in the traditional way. */
1906 emit_insn (gen_rtx_SET (VOIDmode, temp,
1907 gen_rtx_HIGH (mode, op1)));
1908 emit_insn (gen_rtx_SET (VOIDmode,
1909 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1910 }
1911 }
1912
1913 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1914 If TEMP is nonzero, we are forbidden to use any other scratch
1915 registers. Otherwise, we are allowed to generate them as needed.
1916
1917 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1918 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1919
1920 void
1921 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1922 {
1923 rtx temp1, temp2, temp3, temp4, temp5;
1924 rtx ti_temp = 0;
1925
1926 if (temp && GET_MODE (temp) == TImode)
1927 {
1928 ti_temp = temp;
1929 temp = gen_rtx_REG (DImode, REGNO (temp));
1930 }
1931
1932 /* SPARC-V9 code-model support. */
1933 switch (sparc_cmodel)
1934 {
1935 case CM_MEDLOW:
1936 /* The range spanned by all instructions in the object is less
1937 than 2^31 bytes (2GB) and the distance from any instruction
1938 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1939 than 2^31 bytes (2GB).
1940
1941 The executable must be in the low 4TB of the virtual address
1942 space.
1943
1944 sethi %hi(symbol), %temp1
1945 or %temp1, %lo(symbol), %reg */
1946 if (temp)
1947 temp1 = temp; /* op0 is allowed. */
1948 else
1949 temp1 = gen_reg_rtx (DImode);
1950
1951 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1952 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1953 break;
1954
1955 case CM_MEDMID:
1956 /* The range spanned by all instructions in the object is less
1957 than 2^31 bytes (2GB) and the distance from any instruction
1958 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1959 than 2^31 bytes (2GB).
1960
1961 The executable must be in the low 16TB of the virtual address
1962 space.
1963
1964 sethi %h44(symbol), %temp1
1965 or %temp1, %m44(symbol), %temp2
1966 sllx %temp2, 12, %temp3
1967 or %temp3, %l44(symbol), %reg */
1968 if (temp)
1969 {
1970 temp1 = op0;
1971 temp2 = op0;
1972 temp3 = temp; /* op0 is allowed. */
1973 }
1974 else
1975 {
1976 temp1 = gen_reg_rtx (DImode);
1977 temp2 = gen_reg_rtx (DImode);
1978 temp3 = gen_reg_rtx (DImode);
1979 }
1980
1981 emit_insn (gen_seth44 (temp1, op1));
1982 emit_insn (gen_setm44 (temp2, temp1, op1));
1983 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1984 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1985 emit_insn (gen_setl44 (op0, temp3, op1));
1986 break;
1987
1988 case CM_MEDANY:
1989 /* The range spanned by all instructions in the object is less
1990 than 2^31 bytes (2GB) and the distance from any instruction
1991 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1992 than 2^31 bytes (2GB).
1993
1994 The executable can be placed anywhere in the virtual address
1995 space.
1996
1997 sethi %hh(symbol), %temp1
1998 sethi %lm(symbol), %temp2
1999 or %temp1, %hm(symbol), %temp3
2000 sllx %temp3, 32, %temp4
2001 or %temp4, %temp2, %temp5
2002 or %temp5, %lo(symbol), %reg */
2003 if (temp)
2004 {
2005 /* It is possible that one of the registers we got for operands[2]
2006 might coincide with that of operands[0] (which is why we made
2007 it TImode). Pick the other one to use as our scratch. */
2008 if (rtx_equal_p (temp, op0))
2009 {
2010 gcc_assert (ti_temp);
2011 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2012 }
2013 temp1 = op0;
2014 temp2 = temp; /* op0 is _not_ allowed, see above. */
2015 temp3 = op0;
2016 temp4 = op0;
2017 temp5 = op0;
2018 }
2019 else
2020 {
2021 temp1 = gen_reg_rtx (DImode);
2022 temp2 = gen_reg_rtx (DImode);
2023 temp3 = gen_reg_rtx (DImode);
2024 temp4 = gen_reg_rtx (DImode);
2025 temp5 = gen_reg_rtx (DImode);
2026 }
2027
2028 emit_insn (gen_sethh (temp1, op1));
2029 emit_insn (gen_setlm (temp2, op1));
2030 emit_insn (gen_sethm (temp3, temp1, op1));
2031 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2032 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2033 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2034 gen_rtx_PLUS (DImode, temp4, temp2)));
2035 emit_insn (gen_setlo (op0, temp5, op1));
2036 break;
2037
2038 case CM_EMBMEDANY:
2039 /* Old old old backwards compatibility kruft here.
2040 Essentially it is MEDLOW with a fixed 64-bit
2041 virtual base added to all data segment addresses.
2042 Text-segment stuff is computed like MEDANY, we can't
2043 reuse the code above because the relocation knobs
2044 look different.
2045
2046 Data segment: sethi %hi(symbol), %temp1
2047 add %temp1, EMBMEDANY_BASE_REG, %temp2
2048 or %temp2, %lo(symbol), %reg */
2049 if (data_segment_operand (op1, GET_MODE (op1)))
2050 {
2051 if (temp)
2052 {
2053 temp1 = temp; /* op0 is allowed. */
2054 temp2 = op0;
2055 }
2056 else
2057 {
2058 temp1 = gen_reg_rtx (DImode);
2059 temp2 = gen_reg_rtx (DImode);
2060 }
2061
2062 emit_insn (gen_embmedany_sethi (temp1, op1));
2063 emit_insn (gen_embmedany_brsum (temp2, temp1));
2064 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2065 }
2066
2067 /* Text segment: sethi %uhi(symbol), %temp1
2068 sethi %hi(symbol), %temp2
2069 or %temp1, %ulo(symbol), %temp3
2070 sllx %temp3, 32, %temp4
2071 or %temp4, %temp2, %temp5
2072 or %temp5, %lo(symbol), %reg */
2073 else
2074 {
2075 if (temp)
2076 {
2077 /* It is possible that one of the registers we got for operands[2]
2078 might coincide with that of operands[0] (which is why we made
2079 it TImode). Pick the other one to use as our scratch. */
2080 if (rtx_equal_p (temp, op0))
2081 {
2082 gcc_assert (ti_temp);
2083 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2084 }
2085 temp1 = op0;
2086 temp2 = temp; /* op0 is _not_ allowed, see above. */
2087 temp3 = op0;
2088 temp4 = op0;
2089 temp5 = op0;
2090 }
2091 else
2092 {
2093 temp1 = gen_reg_rtx (DImode);
2094 temp2 = gen_reg_rtx (DImode);
2095 temp3 = gen_reg_rtx (DImode);
2096 temp4 = gen_reg_rtx (DImode);
2097 temp5 = gen_reg_rtx (DImode);
2098 }
2099
2100 emit_insn (gen_embmedany_textuhi (temp1, op1));
2101 emit_insn (gen_embmedany_texthi (temp2, op1));
2102 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2103 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2104 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2105 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2106 gen_rtx_PLUS (DImode, temp4, temp2)));
2107 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2108 }
2109 break;
2110
2111 default:
2112 gcc_unreachable ();
2113 }
2114 }
2115
2116 #if HOST_BITS_PER_WIDE_INT == 32
2117 static void
2118 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
2119 {
2120 gcc_unreachable ();
2121 }
2122 #else
2123 /* These avoid problems when cross compiling. If we do not
2124 go through all this hair then the optimizer will see
2125 invalid REG_EQUAL notes or in some cases none at all. */
2126 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2127 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2128 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2129 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2130
2131 /* The optimizer is not to assume anything about exactly
2132 which bits are set for a HIGH, they are unspecified.
2133 Unfortunately this leads to many missed optimizations
2134 during CSE. We mask out the non-HIGH bits, and matches
2135 a plain movdi, to alleviate this problem. */
2136 static rtx
2137 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2138 {
2139 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2140 }
2141
2142 static rtx
2143 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2144 {
2145 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
2146 }
2147
2148 static rtx
2149 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2150 {
2151 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2152 }
2153
2154 static rtx
2155 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2156 {
2157 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2158 }
2159
2160 /* Worker routines for 64-bit constant formation on arch64.
2161 One of the key things to be doing in these emissions is
2162 to create as many temp REGs as possible. This makes it
2163 possible for half-built constants to be used later when
2164 such values are similar to something required later on.
2165 Without doing this, the optimizer cannot see such
2166 opportunities. */
2167
2168 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2169 unsigned HOST_WIDE_INT, int);
2170
2171 static void
2172 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2173 unsigned HOST_WIDE_INT low_bits, int is_neg)
2174 {
2175 unsigned HOST_WIDE_INT high_bits;
2176
2177 if (is_neg)
2178 high_bits = (~low_bits) & 0xffffffff;
2179 else
2180 high_bits = low_bits;
2181
2182 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2183 if (!is_neg)
2184 {
2185 emit_insn (gen_rtx_SET (VOIDmode, op0,
2186 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2187 }
2188 else
2189 {
2190 /* If we are XOR'ing with -1, then we should emit a one's complement
2191 instead. This way the combiner will notice logical operations
2192 such as ANDN later on and substitute. */
2193 if ((low_bits & 0x3ff) == 0x3ff)
2194 {
2195 emit_insn (gen_rtx_SET (VOIDmode, op0,
2196 gen_rtx_NOT (DImode, temp)));
2197 }
2198 else
2199 {
2200 emit_insn (gen_rtx_SET (VOIDmode, op0,
2201 gen_safe_XOR64 (temp,
2202 (-(HOST_WIDE_INT)0x400
2203 | (low_bits & 0x3ff)))));
2204 }
2205 }
2206 }
2207
2208 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2209 unsigned HOST_WIDE_INT, int);
2210
2211 static void
2212 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2213 unsigned HOST_WIDE_INT high_bits,
2214 unsigned HOST_WIDE_INT low_immediate,
2215 int shift_count)
2216 {
2217 rtx temp2 = op0;
2218
2219 if ((high_bits & 0xfffffc00) != 0)
2220 {
2221 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2222 if ((high_bits & ~0xfffffc00) != 0)
2223 emit_insn (gen_rtx_SET (VOIDmode, op0,
2224 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2225 else
2226 temp2 = temp;
2227 }
2228 else
2229 {
2230 emit_insn (gen_safe_SET64 (temp, high_bits));
2231 temp2 = temp;
2232 }
2233
2234 /* Now shift it up into place. */
2235 emit_insn (gen_rtx_SET (VOIDmode, op0,
2236 gen_rtx_ASHIFT (DImode, temp2,
2237 GEN_INT (shift_count))));
2238
2239 /* If there is a low immediate part piece, finish up by
2240 putting that in as well. */
2241 if (low_immediate != 0)
2242 emit_insn (gen_rtx_SET (VOIDmode, op0,
2243 gen_safe_OR64 (op0, low_immediate)));
2244 }
2245
2246 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2247 unsigned HOST_WIDE_INT);
2248
2249 /* Full 64-bit constant decomposition. Even though this is the
2250 'worst' case, we still optimize a few things away. */
2251 static void
2252 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2253 unsigned HOST_WIDE_INT high_bits,
2254 unsigned HOST_WIDE_INT low_bits)
2255 {
2256 rtx sub_temp = op0;
2257
2258 if (can_create_pseudo_p ())
2259 sub_temp = gen_reg_rtx (DImode);
2260
2261 if ((high_bits & 0xfffffc00) != 0)
2262 {
2263 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2264 if ((high_bits & ~0xfffffc00) != 0)
2265 emit_insn (gen_rtx_SET (VOIDmode,
2266 sub_temp,
2267 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2268 else
2269 sub_temp = temp;
2270 }
2271 else
2272 {
2273 emit_insn (gen_safe_SET64 (temp, high_bits));
2274 sub_temp = temp;
2275 }
2276
2277 if (can_create_pseudo_p ())
2278 {
2279 rtx temp2 = gen_reg_rtx (DImode);
2280 rtx temp3 = gen_reg_rtx (DImode);
2281 rtx temp4 = gen_reg_rtx (DImode);
2282
2283 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2284 gen_rtx_ASHIFT (DImode, sub_temp,
2285 GEN_INT (32))));
2286
2287 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2288 if ((low_bits & ~0xfffffc00) != 0)
2289 {
2290 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2291 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2292 emit_insn (gen_rtx_SET (VOIDmode, op0,
2293 gen_rtx_PLUS (DImode, temp4, temp3)));
2294 }
2295 else
2296 {
2297 emit_insn (gen_rtx_SET (VOIDmode, op0,
2298 gen_rtx_PLUS (DImode, temp4, temp2)));
2299 }
2300 }
2301 else
2302 {
2303 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2304 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2305 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2306 int to_shift = 12;
2307
2308 /* We are in the middle of reload, so this is really
2309 painful. However we do still make an attempt to
2310 avoid emitting truly stupid code. */
2311 if (low1 != const0_rtx)
2312 {
2313 emit_insn (gen_rtx_SET (VOIDmode, op0,
2314 gen_rtx_ASHIFT (DImode, sub_temp,
2315 GEN_INT (to_shift))));
2316 emit_insn (gen_rtx_SET (VOIDmode, op0,
2317 gen_rtx_IOR (DImode, op0, low1)));
2318 sub_temp = op0;
2319 to_shift = 12;
2320 }
2321 else
2322 {
2323 to_shift += 12;
2324 }
2325 if (low2 != const0_rtx)
2326 {
2327 emit_insn (gen_rtx_SET (VOIDmode, op0,
2328 gen_rtx_ASHIFT (DImode, sub_temp,
2329 GEN_INT (to_shift))));
2330 emit_insn (gen_rtx_SET (VOIDmode, op0,
2331 gen_rtx_IOR (DImode, op0, low2)));
2332 sub_temp = op0;
2333 to_shift = 8;
2334 }
2335 else
2336 {
2337 to_shift += 8;
2338 }
2339 emit_insn (gen_rtx_SET (VOIDmode, op0,
2340 gen_rtx_ASHIFT (DImode, sub_temp,
2341 GEN_INT (to_shift))));
2342 if (low3 != const0_rtx)
2343 emit_insn (gen_rtx_SET (VOIDmode, op0,
2344 gen_rtx_IOR (DImode, op0, low3)));
2345 /* phew... */
2346 }
2347 }
2348
2349 /* Analyze a 64-bit constant for certain properties. */
2350 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2351 unsigned HOST_WIDE_INT,
2352 int *, int *, int *);
2353
2354 static void
2355 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2356 unsigned HOST_WIDE_INT low_bits,
2357 int *hbsp, int *lbsp, int *abbasp)
2358 {
2359 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2360 int i;
2361
2362 lowest_bit_set = highest_bit_set = -1;
2363 i = 0;
2364 do
2365 {
2366 if ((lowest_bit_set == -1)
2367 && ((low_bits >> i) & 1))
2368 lowest_bit_set = i;
2369 if ((highest_bit_set == -1)
2370 && ((high_bits >> (32 - i - 1)) & 1))
2371 highest_bit_set = (64 - i - 1);
2372 }
2373 while (++i < 32
2374 && ((highest_bit_set == -1)
2375 || (lowest_bit_set == -1)));
2376 if (i == 32)
2377 {
2378 i = 0;
2379 do
2380 {
2381 if ((lowest_bit_set == -1)
2382 && ((high_bits >> i) & 1))
2383 lowest_bit_set = i + 32;
2384 if ((highest_bit_set == -1)
2385 && ((low_bits >> (32 - i - 1)) & 1))
2386 highest_bit_set = 32 - i - 1;
2387 }
2388 while (++i < 32
2389 && ((highest_bit_set == -1)
2390 || (lowest_bit_set == -1)));
2391 }
2392 /* If there are no bits set this should have gone out
2393 as one instruction! */
2394 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2395 all_bits_between_are_set = 1;
2396 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2397 {
2398 if (i < 32)
2399 {
2400 if ((low_bits & (1 << i)) != 0)
2401 continue;
2402 }
2403 else
2404 {
2405 if ((high_bits & (1 << (i - 32))) != 0)
2406 continue;
2407 }
2408 all_bits_between_are_set = 0;
2409 break;
2410 }
2411 *hbsp = highest_bit_set;
2412 *lbsp = lowest_bit_set;
2413 *abbasp = all_bits_between_are_set;
2414 }
2415
2416 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2417
2418 static int
2419 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2420 unsigned HOST_WIDE_INT low_bits)
2421 {
2422 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2423
2424 if (high_bits == 0
2425 || high_bits == 0xffffffff)
2426 return 1;
2427
2428 analyze_64bit_constant (high_bits, low_bits,
2429 &highest_bit_set, &lowest_bit_set,
2430 &all_bits_between_are_set);
2431
2432 if ((highest_bit_set == 63
2433 || lowest_bit_set == 0)
2434 && all_bits_between_are_set != 0)
2435 return 1;
2436
2437 if ((highest_bit_set - lowest_bit_set) < 21)
2438 return 1;
2439
2440 return 0;
2441 }
2442
2443 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2444 unsigned HOST_WIDE_INT,
2445 int, int);
2446
2447 static unsigned HOST_WIDE_INT
2448 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2449 unsigned HOST_WIDE_INT low_bits,
2450 int lowest_bit_set, int shift)
2451 {
2452 HOST_WIDE_INT hi, lo;
2453
2454 if (lowest_bit_set < 32)
2455 {
2456 lo = (low_bits >> lowest_bit_set) << shift;
2457 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2458 }
2459 else
2460 {
2461 lo = 0;
2462 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2463 }
2464 gcc_assert (! (hi & lo));
2465 return (hi | lo);
2466 }
2467
2468 /* Here we are sure to be arch64 and this is an integer constant
2469 being loaded into a register. Emit the most efficient
2470 insn sequence possible. Detection of all the 1-insn cases
2471 has been done already. */
2472 static void
2473 sparc_emit_set_const64 (rtx op0, rtx op1)
2474 {
2475 unsigned HOST_WIDE_INT high_bits, low_bits;
2476 int lowest_bit_set, highest_bit_set;
2477 int all_bits_between_are_set;
2478 rtx temp = 0;
2479
2480 /* Sanity check that we know what we are working with. */
2481 gcc_assert (TARGET_ARCH64
2482 && (GET_CODE (op0) == SUBREG
2483 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2484
2485 if (! can_create_pseudo_p ())
2486 temp = op0;
2487
2488 if (GET_CODE (op1) != CONST_INT)
2489 {
2490 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2491 return;
2492 }
2493
2494 if (! temp)
2495 temp = gen_reg_rtx (DImode);
2496
2497 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2498 low_bits = (INTVAL (op1) & 0xffffffff);
2499
2500 /* low_bits bits 0 --> 31
2501 high_bits bits 32 --> 63 */
2502
2503 analyze_64bit_constant (high_bits, low_bits,
2504 &highest_bit_set, &lowest_bit_set,
2505 &all_bits_between_are_set);
2506
2507 /* First try for a 2-insn sequence. */
2508
2509 /* These situations are preferred because the optimizer can
2510 * do more things with them:
2511 * 1) mov -1, %reg
2512 * sllx %reg, shift, %reg
2513 * 2) mov -1, %reg
2514 * srlx %reg, shift, %reg
2515 * 3) mov some_small_const, %reg
2516 * sllx %reg, shift, %reg
2517 */
2518 if (((highest_bit_set == 63
2519 || lowest_bit_set == 0)
2520 && all_bits_between_are_set != 0)
2521 || ((highest_bit_set - lowest_bit_set) < 12))
2522 {
2523 HOST_WIDE_INT the_const = -1;
2524 int shift = lowest_bit_set;
2525
2526 if ((highest_bit_set != 63
2527 && lowest_bit_set != 0)
2528 || all_bits_between_are_set == 0)
2529 {
2530 the_const =
2531 create_simple_focus_bits (high_bits, low_bits,
2532 lowest_bit_set, 0);
2533 }
2534 else if (lowest_bit_set == 0)
2535 shift = -(63 - highest_bit_set);
2536
2537 gcc_assert (SPARC_SIMM13_P (the_const));
2538 gcc_assert (shift != 0);
2539
2540 emit_insn (gen_safe_SET64 (temp, the_const));
2541 if (shift > 0)
2542 emit_insn (gen_rtx_SET (VOIDmode,
2543 op0,
2544 gen_rtx_ASHIFT (DImode,
2545 temp,
2546 GEN_INT (shift))));
2547 else if (shift < 0)
2548 emit_insn (gen_rtx_SET (VOIDmode,
2549 op0,
2550 gen_rtx_LSHIFTRT (DImode,
2551 temp,
2552 GEN_INT (-shift))));
2553 return;
2554 }
2555
2556 /* Now a range of 22 or less bits set somewhere.
2557 * 1) sethi %hi(focus_bits), %reg
2558 * sllx %reg, shift, %reg
2559 * 2) sethi %hi(focus_bits), %reg
2560 * srlx %reg, shift, %reg
2561 */
2562 if ((highest_bit_set - lowest_bit_set) < 21)
2563 {
2564 unsigned HOST_WIDE_INT focus_bits =
2565 create_simple_focus_bits (high_bits, low_bits,
2566 lowest_bit_set, 10);
2567
2568 gcc_assert (SPARC_SETHI_P (focus_bits));
2569 gcc_assert (lowest_bit_set != 10);
2570
2571 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2572
2573 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2574 if (lowest_bit_set < 10)
2575 emit_insn (gen_rtx_SET (VOIDmode,
2576 op0,
2577 gen_rtx_LSHIFTRT (DImode, temp,
2578 GEN_INT (10 - lowest_bit_set))));
2579 else if (lowest_bit_set > 10)
2580 emit_insn (gen_rtx_SET (VOIDmode,
2581 op0,
2582 gen_rtx_ASHIFT (DImode, temp,
2583 GEN_INT (lowest_bit_set - 10))));
2584 return;
2585 }
2586
2587 /* 1) sethi %hi(low_bits), %reg
2588 * or %reg, %lo(low_bits), %reg
2589 * 2) sethi %hi(~low_bits), %reg
2590 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2591 */
2592 if (high_bits == 0
2593 || high_bits == 0xffffffff)
2594 {
2595 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2596 (high_bits == 0xffffffff));
2597 return;
2598 }
2599
2600 /* Now, try 3-insn sequences. */
2601
2602 /* 1) sethi %hi(high_bits), %reg
2603 * or %reg, %lo(high_bits), %reg
2604 * sllx %reg, 32, %reg
2605 */
2606 if (low_bits == 0)
2607 {
2608 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2609 return;
2610 }
2611
2612 /* We may be able to do something quick
2613 when the constant is negated, so try that. */
2614 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2615 (~low_bits) & 0xfffffc00))
2616 {
2617 /* NOTE: The trailing bits get XOR'd so we need the
2618 non-negated bits, not the negated ones. */
2619 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2620
2621 if ((((~high_bits) & 0xffffffff) == 0
2622 && ((~low_bits) & 0x80000000) == 0)
2623 || (((~high_bits) & 0xffffffff) == 0xffffffff
2624 && ((~low_bits) & 0x80000000) != 0))
2625 {
2626 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2627
2628 if ((SPARC_SETHI_P (fast_int)
2629 && (~high_bits & 0xffffffff) == 0)
2630 || SPARC_SIMM13_P (fast_int))
2631 emit_insn (gen_safe_SET64 (temp, fast_int));
2632 else
2633 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2634 }
2635 else
2636 {
2637 rtx negated_const;
2638 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2639 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2640 sparc_emit_set_const64 (temp, negated_const);
2641 }
2642
2643 /* If we are XOR'ing with -1, then we should emit a one's complement
2644 instead. This way the combiner will notice logical operations
2645 such as ANDN later on and substitute. */
2646 if (trailing_bits == 0x3ff)
2647 {
2648 emit_insn (gen_rtx_SET (VOIDmode, op0,
2649 gen_rtx_NOT (DImode, temp)));
2650 }
2651 else
2652 {
2653 emit_insn (gen_rtx_SET (VOIDmode,
2654 op0,
2655 gen_safe_XOR64 (temp,
2656 (-0x400 | trailing_bits))));
2657 }
2658 return;
2659 }
2660
2661 /* 1) sethi %hi(xxx), %reg
2662 * or %reg, %lo(xxx), %reg
2663 * sllx %reg, yyy, %reg
2664 *
2665 * ??? This is just a generalized version of the low_bits==0
2666 * thing above, FIXME...
2667 */
2668 if ((highest_bit_set - lowest_bit_set) < 32)
2669 {
2670 unsigned HOST_WIDE_INT focus_bits =
2671 create_simple_focus_bits (high_bits, low_bits,
2672 lowest_bit_set, 0);
2673
2674 /* We can't get here in this state. */
2675 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2676
2677 /* So what we know is that the set bits straddle the
2678 middle of the 64-bit word. */
2679 sparc_emit_set_const64_quick2 (op0, temp,
2680 focus_bits, 0,
2681 lowest_bit_set);
2682 return;
2683 }
2684
2685 /* 1) sethi %hi(high_bits), %reg
2686 * or %reg, %lo(high_bits), %reg
2687 * sllx %reg, 32, %reg
2688 * or %reg, low_bits, %reg
2689 */
2690 if (SPARC_SIMM13_P(low_bits)
2691 && ((int)low_bits > 0))
2692 {
2693 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2694 return;
2695 }
2696
2697 /* The easiest way when all else fails, is full decomposition. */
2698 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2699 }
2700 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2701
2702 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2703 return the mode to be used for the comparison. For floating-point,
2704 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2705 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2706 processing is needed. */
2707
2708 enum machine_mode
2709 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2710 {
2711 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2712 {
2713 switch (op)
2714 {
2715 case EQ:
2716 case NE:
2717 case UNORDERED:
2718 case ORDERED:
2719 case UNLT:
2720 case UNLE:
2721 case UNGT:
2722 case UNGE:
2723 case UNEQ:
2724 case LTGT:
2725 return CCFPmode;
2726
2727 case LT:
2728 case LE:
2729 case GT:
2730 case GE:
2731 return CCFPEmode;
2732
2733 default:
2734 gcc_unreachable ();
2735 }
2736 }
2737 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2738 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2739 {
2740 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2741 return CCX_NOOVmode;
2742 else
2743 return CC_NOOVmode;
2744 }
2745 else
2746 {
2747 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2748 return CCXmode;
2749 else
2750 return CCmode;
2751 }
2752 }
2753
2754 /* Emit the compare insn and return the CC reg for a CODE comparison
2755 with operands X and Y. */
2756
2757 static rtx
2758 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2759 {
2760 enum machine_mode mode;
2761 rtx cc_reg;
2762
2763 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2764 return x;
2765
2766 mode = SELECT_CC_MODE (code, x, y);
2767
2768 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2769 fcc regs (cse can't tell they're really call clobbered regs and will
2770 remove a duplicate comparison even if there is an intervening function
2771 call - it will then try to reload the cc reg via an int reg which is why
2772 we need the movcc patterns). It is possible to provide the movcc
2773 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2774 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2775 to tell cse that CCFPE mode registers (even pseudos) are call
2776 clobbered. */
2777
2778 /* ??? This is an experiment. Rather than making changes to cse which may
2779 or may not be easy/clean, we do our own cse. This is possible because
2780 we will generate hard registers. Cse knows they're call clobbered (it
2781 doesn't know the same thing about pseudos). If we guess wrong, no big
2782 deal, but if we win, great! */
2783
2784 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2785 #if 1 /* experiment */
2786 {
2787 int reg;
2788 /* We cycle through the registers to ensure they're all exercised. */
2789 static int next_fcc_reg = 0;
2790 /* Previous x,y for each fcc reg. */
2791 static rtx prev_args[4][2];
2792
2793 /* Scan prev_args for x,y. */
2794 for (reg = 0; reg < 4; reg++)
2795 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2796 break;
2797 if (reg == 4)
2798 {
2799 reg = next_fcc_reg;
2800 prev_args[reg][0] = x;
2801 prev_args[reg][1] = y;
2802 next_fcc_reg = (next_fcc_reg + 1) & 3;
2803 }
2804 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2805 }
2806 #else
2807 cc_reg = gen_reg_rtx (mode);
2808 #endif /* ! experiment */
2809 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2810 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2811 else
2812 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2813
2814 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2815 will only result in an unrecognizable insn so no point in asserting. */
2816 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2817
2818 return cc_reg;
2819 }
2820
2821
2822 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2823
2824 rtx
2825 gen_compare_reg (rtx cmp)
2826 {
2827 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2828 }
2829
2830 /* This function is used for v9 only.
2831 DEST is the target of the Scc insn.
2832 CODE is the code for an Scc's comparison.
2833 X and Y are the values we compare.
2834
2835 This function is needed to turn
2836
2837 (set (reg:SI 110)
2838 (gt (reg:CCX 100 %icc)
2839 (const_int 0)))
2840 into
2841 (set (reg:SI 110)
2842 (gt:DI (reg:CCX 100 %icc)
2843 (const_int 0)))
2844
2845 IE: The instruction recognizer needs to see the mode of the comparison to
2846 find the right instruction. We could use "gt:DI" right in the
2847 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2848
2849 static int
2850 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2851 {
2852 if (! TARGET_ARCH64
2853 && (GET_MODE (x) == DImode
2854 || GET_MODE (dest) == DImode))
2855 return 0;
2856
2857 /* Try to use the movrCC insns. */
2858 if (TARGET_ARCH64
2859 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2860 && y == const0_rtx
2861 && v9_regcmp_p (compare_code))
2862 {
2863 rtx op0 = x;
2864 rtx temp;
2865
2866 /* Special case for op0 != 0. This can be done with one instruction if
2867 dest == x. */
2868
2869 if (compare_code == NE
2870 && GET_MODE (dest) == DImode
2871 && rtx_equal_p (op0, dest))
2872 {
2873 emit_insn (gen_rtx_SET (VOIDmode, dest,
2874 gen_rtx_IF_THEN_ELSE (DImode,
2875 gen_rtx_fmt_ee (compare_code, DImode,
2876 op0, const0_rtx),
2877 const1_rtx,
2878 dest)));
2879 return 1;
2880 }
2881
2882 if (reg_overlap_mentioned_p (dest, op0))
2883 {
2884 /* Handle the case where dest == x.
2885 We "early clobber" the result. */
2886 op0 = gen_reg_rtx (GET_MODE (x));
2887 emit_move_insn (op0, x);
2888 }
2889
2890 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2891 if (GET_MODE (op0) != DImode)
2892 {
2893 temp = gen_reg_rtx (DImode);
2894 convert_move (temp, op0, 0);
2895 }
2896 else
2897 temp = op0;
2898 emit_insn (gen_rtx_SET (VOIDmode, dest,
2899 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2900 gen_rtx_fmt_ee (compare_code, DImode,
2901 temp, const0_rtx),
2902 const1_rtx,
2903 dest)));
2904 return 1;
2905 }
2906 else
2907 {
2908 x = gen_compare_reg_1 (compare_code, x, y);
2909 y = const0_rtx;
2910
2911 gcc_assert (GET_MODE (x) != CC_NOOVmode
2912 && GET_MODE (x) != CCX_NOOVmode);
2913
2914 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2915 emit_insn (gen_rtx_SET (VOIDmode, dest,
2916 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2917 gen_rtx_fmt_ee (compare_code,
2918 GET_MODE (x), x, y),
2919 const1_rtx, dest)));
2920 return 1;
2921 }
2922 }
2923
2924
2925 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2926 without jumps using the addx/subx instructions. */
2927
2928 bool
2929 emit_scc_insn (rtx operands[])
2930 {
2931 rtx tem;
2932 rtx x;
2933 rtx y;
2934 enum rtx_code code;
2935
2936 /* The quad-word fp compare library routines all return nonzero to indicate
2937 true, which is different from the equivalent libgcc routines, so we must
2938 handle them specially here. */
2939 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2940 {
2941 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2942 GET_CODE (operands[1]));
2943 operands[2] = XEXP (operands[1], 0);
2944 operands[3] = XEXP (operands[1], 1);
2945 }
2946
2947 code = GET_CODE (operands[1]);
2948 x = operands[2];
2949 y = operands[3];
2950
2951 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2952 more applications). The exception to this is "reg != 0" which can
2953 be done in one instruction on v9 (so we do it). */
2954 if (code == EQ)
2955 {
2956 if (GET_MODE (x) == SImode)
2957 {
2958 rtx pat;
2959 if (TARGET_ARCH64)
2960 pat = gen_seqsidi_special (operands[0], x, y);
2961 else
2962 pat = gen_seqsisi_special (operands[0], x, y);
2963 emit_insn (pat);
2964 return true;
2965 }
2966 else if (GET_MODE (x) == DImode)
2967 {
2968 rtx pat = gen_seqdi_special (operands[0], x, y);
2969 emit_insn (pat);
2970 return true;
2971 }
2972 }
2973
2974 if (code == NE)
2975 {
2976 if (GET_MODE (x) == SImode)
2977 {
2978 rtx pat;
2979 if (TARGET_ARCH64)
2980 pat = gen_snesidi_special (operands[0], x, y);
2981 else
2982 pat = gen_snesisi_special (operands[0], x, y);
2983 emit_insn (pat);
2984 return true;
2985 }
2986 else if (GET_MODE (x) == DImode)
2987 {
2988 rtx pat;
2989 if (TARGET_VIS3)
2990 pat = gen_snedi_special_vis3 (operands[0], x, y);
2991 else
2992 pat = gen_snedi_special (operands[0], x, y);
2993 emit_insn (pat);
2994 return true;
2995 }
2996 }
2997
2998 if (TARGET_V9
2999 && TARGET_ARCH64
3000 && GET_MODE (x) == DImode
3001 && !(TARGET_VIS3
3002 && (code == GTU || code == LTU))
3003 && gen_v9_scc (operands[0], code, x, y))
3004 return true;
3005
3006 /* We can do LTU and GEU using the addx/subx instructions too. And
3007 for GTU/LEU, if both operands are registers swap them and fall
3008 back to the easy case. */
3009 if (code == GTU || code == LEU)
3010 {
3011 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3012 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3013 {
3014 tem = x;
3015 x = y;
3016 y = tem;
3017 code = swap_condition (code);
3018 }
3019 }
3020
3021 if (code == LTU
3022 || (!TARGET_VIS3 && code == GEU))
3023 {
3024 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3025 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3026 gen_compare_reg_1 (code, x, y),
3027 const0_rtx)));
3028 return true;
3029 }
3030
3031 /* All the posibilities to use addx/subx based sequences has been
3032 exhausted, try for a 3 instruction sequence using v9 conditional
3033 moves. */
3034 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3035 return true;
3036
3037 /* Nope, do branches. */
3038 return false;
3039 }
3040
3041 /* Emit a conditional jump insn for the v9 architecture using comparison code
3042 CODE and jump target LABEL.
3043 This function exists to take advantage of the v9 brxx insns. */
3044
3045 static void
3046 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3047 {
3048 emit_jump_insn (gen_rtx_SET (VOIDmode,
3049 pc_rtx,
3050 gen_rtx_IF_THEN_ELSE (VOIDmode,
3051 gen_rtx_fmt_ee (code, GET_MODE (op0),
3052 op0, const0_rtx),
3053 gen_rtx_LABEL_REF (VOIDmode, label),
3054 pc_rtx)));
3055 }
3056
3057 /* Emit a conditional jump insn for the UA2011 architecture using
3058 comparison code CODE and jump target LABEL. This function exists
3059 to take advantage of the UA2011 Compare and Branch insns. */
3060
3061 static void
3062 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3063 {
3064 rtx if_then_else;
3065
3066 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3067 gen_rtx_fmt_ee(code, GET_MODE(op0),
3068 op0, op1),
3069 gen_rtx_LABEL_REF (VOIDmode, label),
3070 pc_rtx);
3071
3072 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
3073 }
3074
3075 void
3076 emit_conditional_branch_insn (rtx operands[])
3077 {
3078 /* The quad-word fp compare library routines all return nonzero to indicate
3079 true, which is different from the equivalent libgcc routines, so we must
3080 handle them specially here. */
3081 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3082 {
3083 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3084 GET_CODE (operands[0]));
3085 operands[1] = XEXP (operands[0], 0);
3086 operands[2] = XEXP (operands[0], 1);
3087 }
3088
3089 /* If we can tell early on that the comparison is against a constant
3090 that won't fit in the 5-bit signed immediate field of a cbcond,
3091 use one of the other v9 conditional branch sequences. */
3092 if (TARGET_CBCOND
3093 && GET_CODE (operands[1]) == REG
3094 && (GET_MODE (operands[1]) == SImode
3095 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3096 && (GET_CODE (operands[2]) != CONST_INT
3097 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3098 {
3099 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3100 return;
3101 }
3102
3103 if (TARGET_ARCH64 && operands[2] == const0_rtx
3104 && GET_CODE (operands[1]) == REG
3105 && GET_MODE (operands[1]) == DImode)
3106 {
3107 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3108 return;
3109 }
3110
3111 operands[1] = gen_compare_reg (operands[0]);
3112 operands[2] = const0_rtx;
3113 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3114 operands[1], operands[2]);
3115 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3116 operands[3]));
3117 }
3118
3119
3120 /* Generate a DFmode part of a hard TFmode register.
3121 REG is the TFmode hard register, LOW is 1 for the
3122 low 64bit of the register and 0 otherwise.
3123 */
3124 rtx
3125 gen_df_reg (rtx reg, int low)
3126 {
3127 int regno = REGNO (reg);
3128
3129 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3130 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3131 return gen_rtx_REG (DFmode, regno);
3132 }
3133 \f
3134 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3135 Unlike normal calls, TFmode operands are passed by reference. It is
3136 assumed that no more than 3 operands are required. */
3137
3138 static void
3139 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3140 {
3141 rtx ret_slot = NULL, arg[3], func_sym;
3142 int i;
3143
3144 /* We only expect to be called for conversions, unary, and binary ops. */
3145 gcc_assert (nargs == 2 || nargs == 3);
3146
3147 for (i = 0; i < nargs; ++i)
3148 {
3149 rtx this_arg = operands[i];
3150 rtx this_slot;
3151
3152 /* TFmode arguments and return values are passed by reference. */
3153 if (GET_MODE (this_arg) == TFmode)
3154 {
3155 int force_stack_temp;
3156
3157 force_stack_temp = 0;
3158 if (TARGET_BUGGY_QP_LIB && i == 0)
3159 force_stack_temp = 1;
3160
3161 if (GET_CODE (this_arg) == MEM
3162 && ! force_stack_temp)
3163 {
3164 tree expr = MEM_EXPR (this_arg);
3165 if (expr)
3166 mark_addressable (expr);
3167 this_arg = XEXP (this_arg, 0);
3168 }
3169 else if (CONSTANT_P (this_arg)
3170 && ! force_stack_temp)
3171 {
3172 this_slot = force_const_mem (TFmode, this_arg);
3173 this_arg = XEXP (this_slot, 0);
3174 }
3175 else
3176 {
3177 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3178
3179 /* Operand 0 is the return value. We'll copy it out later. */
3180 if (i > 0)
3181 emit_move_insn (this_slot, this_arg);
3182 else
3183 ret_slot = this_slot;
3184
3185 this_arg = XEXP (this_slot, 0);
3186 }
3187 }
3188
3189 arg[i] = this_arg;
3190 }
3191
3192 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3193
3194 if (GET_MODE (operands[0]) == TFmode)
3195 {
3196 if (nargs == 2)
3197 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3198 arg[0], GET_MODE (arg[0]),
3199 arg[1], GET_MODE (arg[1]));
3200 else
3201 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3202 arg[0], GET_MODE (arg[0]),
3203 arg[1], GET_MODE (arg[1]),
3204 arg[2], GET_MODE (arg[2]));
3205
3206 if (ret_slot)
3207 emit_move_insn (operands[0], ret_slot);
3208 }
3209 else
3210 {
3211 rtx ret;
3212
3213 gcc_assert (nargs == 2);
3214
3215 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3216 GET_MODE (operands[0]), 1,
3217 arg[1], GET_MODE (arg[1]));
3218
3219 if (ret != operands[0])
3220 emit_move_insn (operands[0], ret);
3221 }
3222 }
3223
3224 /* Expand soft-float TFmode calls to sparc abi routines. */
3225
3226 static void
3227 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3228 {
3229 const char *func;
3230
3231 switch (code)
3232 {
3233 case PLUS:
3234 func = "_Qp_add";
3235 break;
3236 case MINUS:
3237 func = "_Qp_sub";
3238 break;
3239 case MULT:
3240 func = "_Qp_mul";
3241 break;
3242 case DIV:
3243 func = "_Qp_div";
3244 break;
3245 default:
3246 gcc_unreachable ();
3247 }
3248
3249 emit_soft_tfmode_libcall (func, 3, operands);
3250 }
3251
3252 static void
3253 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3254 {
3255 const char *func;
3256
3257 gcc_assert (code == SQRT);
3258 func = "_Qp_sqrt";
3259
3260 emit_soft_tfmode_libcall (func, 2, operands);
3261 }
3262
3263 static void
3264 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3265 {
3266 const char *func;
3267
3268 switch (code)
3269 {
3270 case FLOAT_EXTEND:
3271 switch (GET_MODE (operands[1]))
3272 {
3273 case SFmode:
3274 func = "_Qp_stoq";
3275 break;
3276 case DFmode:
3277 func = "_Qp_dtoq";
3278 break;
3279 default:
3280 gcc_unreachable ();
3281 }
3282 break;
3283
3284 case FLOAT_TRUNCATE:
3285 switch (GET_MODE (operands[0]))
3286 {
3287 case SFmode:
3288 func = "_Qp_qtos";
3289 break;
3290 case DFmode:
3291 func = "_Qp_qtod";
3292 break;
3293 default:
3294 gcc_unreachable ();
3295 }
3296 break;
3297
3298 case FLOAT:
3299 switch (GET_MODE (operands[1]))
3300 {
3301 case SImode:
3302 func = "_Qp_itoq";
3303 if (TARGET_ARCH64)
3304 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3305 break;
3306 case DImode:
3307 func = "_Qp_xtoq";
3308 break;
3309 default:
3310 gcc_unreachable ();
3311 }
3312 break;
3313
3314 case UNSIGNED_FLOAT:
3315 switch (GET_MODE (operands[1]))
3316 {
3317 case SImode:
3318 func = "_Qp_uitoq";
3319 if (TARGET_ARCH64)
3320 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3321 break;
3322 case DImode:
3323 func = "_Qp_uxtoq";
3324 break;
3325 default:
3326 gcc_unreachable ();
3327 }
3328 break;
3329
3330 case FIX:
3331 switch (GET_MODE (operands[0]))
3332 {
3333 case SImode:
3334 func = "_Qp_qtoi";
3335 break;
3336 case DImode:
3337 func = "_Qp_qtox";
3338 break;
3339 default:
3340 gcc_unreachable ();
3341 }
3342 break;
3343
3344 case UNSIGNED_FIX:
3345 switch (GET_MODE (operands[0]))
3346 {
3347 case SImode:
3348 func = "_Qp_qtoui";
3349 break;
3350 case DImode:
3351 func = "_Qp_qtoux";
3352 break;
3353 default:
3354 gcc_unreachable ();
3355 }
3356 break;
3357
3358 default:
3359 gcc_unreachable ();
3360 }
3361
3362 emit_soft_tfmode_libcall (func, 2, operands);
3363 }
3364
3365 /* Expand a hard-float tfmode operation. All arguments must be in
3366 registers. */
3367
3368 static void
3369 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3370 {
3371 rtx op, dest;
3372
3373 if (GET_RTX_CLASS (code) == RTX_UNARY)
3374 {
3375 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3376 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3377 }
3378 else
3379 {
3380 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3381 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3382 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3383 operands[1], operands[2]);
3384 }
3385
3386 if (register_operand (operands[0], VOIDmode))
3387 dest = operands[0];
3388 else
3389 dest = gen_reg_rtx (GET_MODE (operands[0]));
3390
3391 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3392
3393 if (dest != operands[0])
3394 emit_move_insn (operands[0], dest);
3395 }
3396
3397 void
3398 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3399 {
3400 if (TARGET_HARD_QUAD)
3401 emit_hard_tfmode_operation (code, operands);
3402 else
3403 emit_soft_tfmode_binop (code, operands);
3404 }
3405
3406 void
3407 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3408 {
3409 if (TARGET_HARD_QUAD)
3410 emit_hard_tfmode_operation (code, operands);
3411 else
3412 emit_soft_tfmode_unop (code, operands);
3413 }
3414
3415 void
3416 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3417 {
3418 if (TARGET_HARD_QUAD)
3419 emit_hard_tfmode_operation (code, operands);
3420 else
3421 emit_soft_tfmode_cvt (code, operands);
3422 }
3423 \f
3424 /* Return nonzero if a branch/jump/call instruction will be emitting
3425 nop into its delay slot. */
3426
3427 int
3428 empty_delay_slot (rtx insn)
3429 {
3430 rtx seq;
3431
3432 /* If no previous instruction (should not happen), return true. */
3433 if (PREV_INSN (insn) == NULL)
3434 return 1;
3435
3436 seq = NEXT_INSN (PREV_INSN (insn));
3437 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3438 return 0;
3439
3440 return 1;
3441 }
3442
3443 /* Return nonzero if we should emit a nop after a cbcond instruction.
3444 The cbcond instruction does not have a delay slot, however there is
3445 a severe performance penalty if a control transfer appears right
3446 after a cbcond. Therefore we emit a nop when we detect this
3447 situation. */
3448
3449 int
3450 emit_cbcond_nop (rtx insn)
3451 {
3452 rtx next = next_active_insn (insn);
3453
3454 if (!next)
3455 return 1;
3456
3457 if (NONJUMP_INSN_P (next)
3458 && GET_CODE (PATTERN (next)) == SEQUENCE)
3459 next = XVECEXP (PATTERN (next), 0, 0);
3460 else if (CALL_P (next)
3461 && GET_CODE (PATTERN (next)) == PARALLEL)
3462 {
3463 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3464
3465 if (GET_CODE (delay) == RETURN)
3466 {
3467 /* It's a sibling call. Do not emit the nop if we're going
3468 to emit something other than the jump itself as the first
3469 instruction of the sibcall sequence. */
3470 if (sparc_leaf_function_p || TARGET_FLAT)
3471 return 0;
3472 }
3473 }
3474
3475 if (NONJUMP_INSN_P (next))
3476 return 0;
3477
3478 return 1;
3479 }
3480
3481 /* Return nonzero if TRIAL can go into the call delay slot. */
3482
3483 int
3484 eligible_for_call_delay (rtx trial)
3485 {
3486 rtx pat;
3487
3488 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3489 return 0;
3490
3491 /* Binutils allows
3492 call __tls_get_addr, %tgd_call (foo)
3493 add %l7, %o0, %o0, %tgd_add (foo)
3494 while Sun as/ld does not. */
3495 if (TARGET_GNU_TLS || !TARGET_TLS)
3496 return 1;
3497
3498 pat = PATTERN (trial);
3499
3500 /* We must reject tgd_add{32|64}, i.e.
3501 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3502 and tldm_add{32|64}, i.e.
3503 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3504 for Sun as/ld. */
3505 if (GET_CODE (pat) == SET
3506 && GET_CODE (SET_SRC (pat)) == PLUS)
3507 {
3508 rtx unspec = XEXP (SET_SRC (pat), 1);
3509
3510 if (GET_CODE (unspec) == UNSPEC
3511 && (XINT (unspec, 1) == UNSPEC_TLSGD
3512 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3513 return 0;
3514 }
3515
3516 return 1;
3517 }
3518
3519 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3520 instruction. RETURN_P is true if the v9 variant 'return' is to be
3521 considered in the test too.
3522
3523 TRIAL must be a SET whose destination is a REG appropriate for the
3524 'restore' instruction or, if RETURN_P is true, for the 'return'
3525 instruction. */
3526
3527 static int
3528 eligible_for_restore_insn (rtx trial, bool return_p)
3529 {
3530 rtx pat = PATTERN (trial);
3531 rtx src = SET_SRC (pat);
3532 bool src_is_freg = false;
3533 rtx src_reg;
3534
3535 /* Since we now can do moves between float and integer registers when
3536 VIS3 is enabled, we have to catch this case. We can allow such
3537 moves when doing a 'return' however. */
3538 src_reg = src;
3539 if (GET_CODE (src_reg) == SUBREG)
3540 src_reg = SUBREG_REG (src_reg);
3541 if (GET_CODE (src_reg) == REG
3542 && SPARC_FP_REG_P (REGNO (src_reg)))
3543 src_is_freg = true;
3544
3545 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3546 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3547 && arith_operand (src, GET_MODE (src))
3548 && ! src_is_freg)
3549 {
3550 if (TARGET_ARCH64)
3551 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3552 else
3553 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3554 }
3555
3556 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3557 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3558 && arith_double_operand (src, GET_MODE (src))
3559 && ! src_is_freg)
3560 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3561
3562 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3563 else if (! TARGET_FPU && register_operand (src, SFmode))
3564 return 1;
3565
3566 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3567 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3568 return 1;
3569
3570 /* If we have the 'return' instruction, anything that does not use
3571 local or output registers and can go into a delay slot wins. */
3572 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3573 return 1;
3574
3575 /* The 'restore src1,src2,dest' pattern for SImode. */
3576 else if (GET_CODE (src) == PLUS
3577 && register_operand (XEXP (src, 0), SImode)
3578 && arith_operand (XEXP (src, 1), SImode))
3579 return 1;
3580
3581 /* The 'restore src1,src2,dest' pattern for DImode. */
3582 else if (GET_CODE (src) == PLUS
3583 && register_operand (XEXP (src, 0), DImode)
3584 && arith_double_operand (XEXP (src, 1), DImode))
3585 return 1;
3586
3587 /* The 'restore src1,%lo(src2),dest' pattern. */
3588 else if (GET_CODE (src) == LO_SUM
3589 && ! TARGET_CM_MEDMID
3590 && ((register_operand (XEXP (src, 0), SImode)
3591 && immediate_operand (XEXP (src, 1), SImode))
3592 || (TARGET_ARCH64
3593 && register_operand (XEXP (src, 0), DImode)
3594 && immediate_operand (XEXP (src, 1), DImode))))
3595 return 1;
3596
3597 /* The 'restore src,src,dest' pattern. */
3598 else if (GET_CODE (src) == ASHIFT
3599 && (register_operand (XEXP (src, 0), SImode)
3600 || register_operand (XEXP (src, 0), DImode))
3601 && XEXP (src, 1) == const1_rtx)
3602 return 1;
3603
3604 return 0;
3605 }
3606
3607 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3608
3609 int
3610 eligible_for_return_delay (rtx trial)
3611 {
3612 int regno;
3613 rtx pat;
3614
3615 /* If the function uses __builtin_eh_return, the eh_return machinery
3616 occupies the delay slot. */
3617 if (crtl->calls_eh_return)
3618 return 0;
3619
3620 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3621 return 0;
3622
3623 /* In the case of a leaf or flat function, anything can go into the slot. */
3624 if (sparc_leaf_function_p || TARGET_FLAT)
3625 return 1;
3626
3627 if (!NONJUMP_INSN_P (trial))
3628 return 0;
3629
3630 pat = PATTERN (trial);
3631 if (GET_CODE (pat) == PARALLEL)
3632 {
3633 int i;
3634
3635 if (! TARGET_V9)
3636 return 0;
3637 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3638 {
3639 rtx expr = XVECEXP (pat, 0, i);
3640 if (GET_CODE (expr) != SET)
3641 return 0;
3642 if (GET_CODE (SET_DEST (expr)) != REG)
3643 return 0;
3644 regno = REGNO (SET_DEST (expr));
3645 if (regno >= 8 && regno < 24)
3646 return 0;
3647 }
3648 return !epilogue_renumber (&pat, 1);
3649 }
3650
3651 if (GET_CODE (pat) != SET)
3652 return 0;
3653
3654 if (GET_CODE (SET_DEST (pat)) != REG)
3655 return 0;
3656
3657 regno = REGNO (SET_DEST (pat));
3658
3659 /* Otherwise, only operations which can be done in tandem with
3660 a `restore' or `return' insn can go into the delay slot. */
3661 if (regno >= 8 && regno < 24)
3662 return 0;
3663
3664 /* If this instruction sets up floating point register and we have a return
3665 instruction, it can probably go in. But restore will not work
3666 with FP_REGS. */
3667 if (! SPARC_INT_REG_P (regno))
3668 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3669
3670 return eligible_for_restore_insn (trial, true);
3671 }
3672
3673 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3674
3675 int
3676 eligible_for_sibcall_delay (rtx trial)
3677 {
3678 rtx pat;
3679
3680 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3681 return 0;
3682
3683 if (!NONJUMP_INSN_P (trial))
3684 return 0;
3685
3686 pat = PATTERN (trial);
3687
3688 if (sparc_leaf_function_p || TARGET_FLAT)
3689 {
3690 /* If the tail call is done using the call instruction,
3691 we have to restore %o7 in the delay slot. */
3692 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3693 return 0;
3694
3695 /* %g1 is used to build the function address */
3696 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3697 return 0;
3698
3699 return 1;
3700 }
3701
3702 if (GET_CODE (pat) != SET)
3703 return 0;
3704
3705 /* Otherwise, only operations which can be done in tandem with
3706 a `restore' insn can go into the delay slot. */
3707 if (GET_CODE (SET_DEST (pat)) != REG
3708 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3709 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3710 return 0;
3711
3712 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3713 in most cases. */
3714 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3715 return 0;
3716
3717 return eligible_for_restore_insn (trial, false);
3718 }
3719 \f
3720 /* Determine if it's legal to put X into the constant pool. This
3721 is not possible if X contains the address of a symbol that is
3722 not constant (TLS) or not known at final link time (PIC). */
3723
3724 static bool
3725 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3726 {
3727 switch (GET_CODE (x))
3728 {
3729 case CONST_INT:
3730 case CONST_DOUBLE:
3731 case CONST_VECTOR:
3732 /* Accept all non-symbolic constants. */
3733 return false;
3734
3735 case LABEL_REF:
3736 /* Labels are OK iff we are non-PIC. */
3737 return flag_pic != 0;
3738
3739 case SYMBOL_REF:
3740 /* 'Naked' TLS symbol references are never OK,
3741 non-TLS symbols are OK iff we are non-PIC. */
3742 if (SYMBOL_REF_TLS_MODEL (x))
3743 return true;
3744 else
3745 return flag_pic != 0;
3746
3747 case CONST:
3748 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3749 case PLUS:
3750 case MINUS:
3751 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3752 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3753 case UNSPEC:
3754 return true;
3755 default:
3756 gcc_unreachable ();
3757 }
3758 }
3759 \f
3760 /* Global Offset Table support. */
3761 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3762 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3763
3764 /* Return the SYMBOL_REF for the Global Offset Table. */
3765
3766 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3767
3768 static rtx
3769 sparc_got (void)
3770 {
3771 if (!sparc_got_symbol)
3772 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3773
3774 return sparc_got_symbol;
3775 }
3776
3777 /* Ensure that we are not using patterns that are not OK with PIC. */
3778
3779 int
3780 check_pic (int i)
3781 {
3782 rtx op;
3783
3784 switch (flag_pic)
3785 {
3786 case 1:
3787 op = recog_data.operand[i];
3788 gcc_assert (GET_CODE (op) != SYMBOL_REF
3789 && (GET_CODE (op) != CONST
3790 || (GET_CODE (XEXP (op, 0)) == MINUS
3791 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3792 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3793 case 2:
3794 default:
3795 return 1;
3796 }
3797 }
3798
3799 /* Return true if X is an address which needs a temporary register when
3800 reloaded while generating PIC code. */
3801
3802 int
3803 pic_address_needs_scratch (rtx x)
3804 {
3805 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3806 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3807 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3808 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3809 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3810 return 1;
3811
3812 return 0;
3813 }
3814
3815 /* Determine if a given RTX is a valid constant. We already know this
3816 satisfies CONSTANT_P. */
3817
3818 static bool
3819 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3820 {
3821 switch (GET_CODE (x))
3822 {
3823 case CONST:
3824 case SYMBOL_REF:
3825 if (sparc_tls_referenced_p (x))
3826 return false;
3827 break;
3828
3829 case CONST_DOUBLE:
3830 if (GET_MODE (x) == VOIDmode)
3831 return true;
3832
3833 /* Floating point constants are generally not ok.
3834 The only exception is 0.0 and all-ones in VIS. */
3835 if (TARGET_VIS
3836 && SCALAR_FLOAT_MODE_P (mode)
3837 && (const_zero_operand (x, mode)
3838 || const_all_ones_operand (x, mode)))
3839 return true;
3840
3841 return false;
3842
3843 case CONST_VECTOR:
3844 /* Vector constants are generally not ok.
3845 The only exception is 0 or -1 in VIS. */
3846 if (TARGET_VIS
3847 && (const_zero_operand (x, mode)
3848 || const_all_ones_operand (x, mode)))
3849 return true;
3850
3851 return false;
3852
3853 default:
3854 break;
3855 }
3856
3857 return true;
3858 }
3859
3860 /* Determine if a given RTX is a valid constant address. */
3861
3862 bool
3863 constant_address_p (rtx x)
3864 {
3865 switch (GET_CODE (x))
3866 {
3867 case LABEL_REF:
3868 case CONST_INT:
3869 case HIGH:
3870 return true;
3871
3872 case CONST:
3873 if (flag_pic && pic_address_needs_scratch (x))
3874 return false;
3875 return sparc_legitimate_constant_p (Pmode, x);
3876
3877 case SYMBOL_REF:
3878 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3879
3880 default:
3881 return false;
3882 }
3883 }
3884
3885 /* Nonzero if the constant value X is a legitimate general operand
3886 when generating PIC code. It is given that flag_pic is on and
3887 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3888
3889 bool
3890 legitimate_pic_operand_p (rtx x)
3891 {
3892 if (pic_address_needs_scratch (x))
3893 return false;
3894 if (sparc_tls_referenced_p (x))
3895 return false;
3896 return true;
3897 }
3898
3899 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3900 (CONST_INT_P (X) \
3901 && INTVAL (X) >= -0x1000 \
3902 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3903
3904 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3905 (CONST_INT_P (X) \
3906 && INTVAL (X) >= -0x1000 \
3907 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3908
3909 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3910
3911 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3912 ordinarily. This changes a bit when generating PIC. */
3913
3914 static bool
3915 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3916 {
3917 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3918
3919 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3920 rs1 = addr;
3921 else if (GET_CODE (addr) == PLUS)
3922 {
3923 rs1 = XEXP (addr, 0);
3924 rs2 = XEXP (addr, 1);
3925
3926 /* Canonicalize. REG comes first, if there are no regs,
3927 LO_SUM comes first. */
3928 if (!REG_P (rs1)
3929 && GET_CODE (rs1) != SUBREG
3930 && (REG_P (rs2)
3931 || GET_CODE (rs2) == SUBREG
3932 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3933 {
3934 rs1 = XEXP (addr, 1);
3935 rs2 = XEXP (addr, 0);
3936 }
3937
3938 if ((flag_pic == 1
3939 && rs1 == pic_offset_table_rtx
3940 && !REG_P (rs2)
3941 && GET_CODE (rs2) != SUBREG
3942 && GET_CODE (rs2) != LO_SUM
3943 && GET_CODE (rs2) != MEM
3944 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3945 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3946 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3947 || ((REG_P (rs1)
3948 || GET_CODE (rs1) == SUBREG)
3949 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3950 {
3951 imm1 = rs2;
3952 rs2 = NULL;
3953 }
3954 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3955 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3956 {
3957 /* We prohibit REG + REG for TFmode when there are no quad move insns
3958 and we consequently need to split. We do this because REG+REG
3959 is not an offsettable address. If we get the situation in reload
3960 where source and destination of a movtf pattern are both MEMs with
3961 REG+REG address, then only one of them gets converted to an
3962 offsettable address. */
3963 if (mode == TFmode
3964 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3965 return 0;
3966
3967 /* Likewise for TImode, but in all cases. */
3968 if (mode == TImode)
3969 return 0;
3970
3971 /* We prohibit REG + REG on ARCH32 if not optimizing for
3972 DFmode/DImode because then mem_min_alignment is likely to be zero
3973 after reload and the forced split would lack a matching splitter
3974 pattern. */
3975 if (TARGET_ARCH32 && !optimize
3976 && (mode == DFmode || mode == DImode))
3977 return 0;
3978 }
3979 else if (USE_AS_OFFSETABLE_LO10
3980 && GET_CODE (rs1) == LO_SUM
3981 && TARGET_ARCH64
3982 && ! TARGET_CM_MEDMID
3983 && RTX_OK_FOR_OLO10_P (rs2, mode))
3984 {
3985 rs2 = NULL;
3986 imm1 = XEXP (rs1, 1);
3987 rs1 = XEXP (rs1, 0);
3988 if (!CONSTANT_P (imm1)
3989 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3990 return 0;
3991 }
3992 }
3993 else if (GET_CODE (addr) == LO_SUM)
3994 {
3995 rs1 = XEXP (addr, 0);
3996 imm1 = XEXP (addr, 1);
3997
3998 if (!CONSTANT_P (imm1)
3999 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4000 return 0;
4001
4002 /* We can't allow TFmode in 32-bit mode, because an offset greater
4003 than the alignment (8) may cause the LO_SUM to overflow. */
4004 if (mode == TFmode && TARGET_ARCH32)
4005 return 0;
4006 }
4007 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4008 return 1;
4009 else
4010 return 0;
4011
4012 if (GET_CODE (rs1) == SUBREG)
4013 rs1 = SUBREG_REG (rs1);
4014 if (!REG_P (rs1))
4015 return 0;
4016
4017 if (rs2)
4018 {
4019 if (GET_CODE (rs2) == SUBREG)
4020 rs2 = SUBREG_REG (rs2);
4021 if (!REG_P (rs2))
4022 return 0;
4023 }
4024
4025 if (strict)
4026 {
4027 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4028 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4029 return 0;
4030 }
4031 else
4032 {
4033 if ((! SPARC_INT_REG_P (REGNO (rs1))
4034 && REGNO (rs1) != FRAME_POINTER_REGNUM
4035 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4036 || (rs2
4037 && (! SPARC_INT_REG_P (REGNO (rs2))
4038 && REGNO (rs2) != FRAME_POINTER_REGNUM
4039 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4040 return 0;
4041 }
4042 return 1;
4043 }
4044
4045 /* Return the SYMBOL_REF for the tls_get_addr function. */
4046
4047 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4048
4049 static rtx
4050 sparc_tls_get_addr (void)
4051 {
4052 if (!sparc_tls_symbol)
4053 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4054
4055 return sparc_tls_symbol;
4056 }
4057
4058 /* Return the Global Offset Table to be used in TLS mode. */
4059
4060 static rtx
4061 sparc_tls_got (void)
4062 {
4063 /* In PIC mode, this is just the PIC offset table. */
4064 if (flag_pic)
4065 {
4066 crtl->uses_pic_offset_table = 1;
4067 return pic_offset_table_rtx;
4068 }
4069
4070 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4071 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4072 if (TARGET_SUN_TLS && TARGET_ARCH32)
4073 {
4074 load_got_register ();
4075 return global_offset_table_rtx;
4076 }
4077
4078 /* In all other cases, we load a new pseudo with the GOT symbol. */
4079 return copy_to_reg (sparc_got ());
4080 }
4081
4082 /* Return true if X contains a thread-local symbol. */
4083
4084 static bool
4085 sparc_tls_referenced_p (rtx x)
4086 {
4087 if (!TARGET_HAVE_TLS)
4088 return false;
4089
4090 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4091 x = XEXP (XEXP (x, 0), 0);
4092
4093 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4094 return true;
4095
4096 /* That's all we handle in sparc_legitimize_tls_address for now. */
4097 return false;
4098 }
4099
4100 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4101 this (thread-local) address. */
4102
4103 static rtx
4104 sparc_legitimize_tls_address (rtx addr)
4105 {
4106 rtx temp1, temp2, temp3, ret, o0, got;
4107 rtx_insn *insn;
4108
4109 gcc_assert (can_create_pseudo_p ());
4110
4111 if (GET_CODE (addr) == SYMBOL_REF)
4112 switch (SYMBOL_REF_TLS_MODEL (addr))
4113 {
4114 case TLS_MODEL_GLOBAL_DYNAMIC:
4115 start_sequence ();
4116 temp1 = gen_reg_rtx (SImode);
4117 temp2 = gen_reg_rtx (SImode);
4118 ret = gen_reg_rtx (Pmode);
4119 o0 = gen_rtx_REG (Pmode, 8);
4120 got = sparc_tls_got ();
4121 emit_insn (gen_tgd_hi22 (temp1, addr));
4122 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4123 if (TARGET_ARCH32)
4124 {
4125 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4126 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4127 addr, const1_rtx));
4128 }
4129 else
4130 {
4131 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4132 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4133 addr, const1_rtx));
4134 }
4135 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4136 insn = get_insns ();
4137 end_sequence ();
4138 emit_libcall_block (insn, ret, o0, addr);
4139 break;
4140
4141 case TLS_MODEL_LOCAL_DYNAMIC:
4142 start_sequence ();
4143 temp1 = gen_reg_rtx (SImode);
4144 temp2 = gen_reg_rtx (SImode);
4145 temp3 = gen_reg_rtx (Pmode);
4146 ret = gen_reg_rtx (Pmode);
4147 o0 = gen_rtx_REG (Pmode, 8);
4148 got = sparc_tls_got ();
4149 emit_insn (gen_tldm_hi22 (temp1));
4150 emit_insn (gen_tldm_lo10 (temp2, temp1));
4151 if (TARGET_ARCH32)
4152 {
4153 emit_insn (gen_tldm_add32 (o0, got, temp2));
4154 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4155 const1_rtx));
4156 }
4157 else
4158 {
4159 emit_insn (gen_tldm_add64 (o0, got, temp2));
4160 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4161 const1_rtx));
4162 }
4163 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4164 insn = get_insns ();
4165 end_sequence ();
4166 emit_libcall_block (insn, temp3, o0,
4167 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4168 UNSPEC_TLSLD_BASE));
4169 temp1 = gen_reg_rtx (SImode);
4170 temp2 = gen_reg_rtx (SImode);
4171 emit_insn (gen_tldo_hix22 (temp1, addr));
4172 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4173 if (TARGET_ARCH32)
4174 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4175 else
4176 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4177 break;
4178
4179 case TLS_MODEL_INITIAL_EXEC:
4180 temp1 = gen_reg_rtx (SImode);
4181 temp2 = gen_reg_rtx (SImode);
4182 temp3 = gen_reg_rtx (Pmode);
4183 got = sparc_tls_got ();
4184 emit_insn (gen_tie_hi22 (temp1, addr));
4185 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4186 if (TARGET_ARCH32)
4187 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4188 else
4189 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4190 if (TARGET_SUN_TLS)
4191 {
4192 ret = gen_reg_rtx (Pmode);
4193 if (TARGET_ARCH32)
4194 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4195 temp3, addr));
4196 else
4197 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4198 temp3, addr));
4199 }
4200 else
4201 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4202 break;
4203
4204 case TLS_MODEL_LOCAL_EXEC:
4205 temp1 = gen_reg_rtx (Pmode);
4206 temp2 = gen_reg_rtx (Pmode);
4207 if (TARGET_ARCH32)
4208 {
4209 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4210 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4211 }
4212 else
4213 {
4214 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4215 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4216 }
4217 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4218 break;
4219
4220 default:
4221 gcc_unreachable ();
4222 }
4223
4224 else if (GET_CODE (addr) == CONST)
4225 {
4226 rtx base, offset;
4227
4228 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4229
4230 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4231 offset = XEXP (XEXP (addr, 0), 1);
4232
4233 base = force_operand (base, NULL_RTX);
4234 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4235 offset = force_reg (Pmode, offset);
4236 ret = gen_rtx_PLUS (Pmode, base, offset);
4237 }
4238
4239 else
4240 gcc_unreachable (); /* for now ... */
4241
4242 return ret;
4243 }
4244
4245 /* Legitimize PIC addresses. If the address is already position-independent,
4246 we return ORIG. Newly generated position-independent addresses go into a
4247 reg. This is REG if nonzero, otherwise we allocate register(s) as
4248 necessary. */
4249
4250 static rtx
4251 sparc_legitimize_pic_address (rtx orig, rtx reg)
4252 {
4253 bool gotdata_op = false;
4254
4255 if (GET_CODE (orig) == SYMBOL_REF
4256 /* See the comment in sparc_expand_move. */
4257 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4258 {
4259 rtx pic_ref, address;
4260 rtx_insn *insn;
4261
4262 if (reg == 0)
4263 {
4264 gcc_assert (can_create_pseudo_p ());
4265 reg = gen_reg_rtx (Pmode);
4266 }
4267
4268 if (flag_pic == 2)
4269 {
4270 /* If not during reload, allocate another temp reg here for loading
4271 in the address, so that these instructions can be optimized
4272 properly. */
4273 rtx temp_reg = (! can_create_pseudo_p ()
4274 ? reg : gen_reg_rtx (Pmode));
4275
4276 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4277 won't get confused into thinking that these two instructions
4278 are loading in the true address of the symbol. If in the
4279 future a PIC rtx exists, that should be used instead. */
4280 if (TARGET_ARCH64)
4281 {
4282 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4283 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4284 }
4285 else
4286 {
4287 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4288 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4289 }
4290 address = temp_reg;
4291 gotdata_op = true;
4292 }
4293 else
4294 address = orig;
4295
4296 crtl->uses_pic_offset_table = 1;
4297 if (gotdata_op)
4298 {
4299 if (TARGET_ARCH64)
4300 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4301 pic_offset_table_rtx,
4302 address, orig));
4303 else
4304 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4305 pic_offset_table_rtx,
4306 address, orig));
4307 }
4308 else
4309 {
4310 pic_ref
4311 = gen_const_mem (Pmode,
4312 gen_rtx_PLUS (Pmode,
4313 pic_offset_table_rtx, address));
4314 insn = emit_move_insn (reg, pic_ref);
4315 }
4316
4317 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4318 by loop. */
4319 set_unique_reg_note (insn, REG_EQUAL, orig);
4320 return reg;
4321 }
4322 else if (GET_CODE (orig) == CONST)
4323 {
4324 rtx base, offset;
4325
4326 if (GET_CODE (XEXP (orig, 0)) == PLUS
4327 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4328 return orig;
4329
4330 if (reg == 0)
4331 {
4332 gcc_assert (can_create_pseudo_p ());
4333 reg = gen_reg_rtx (Pmode);
4334 }
4335
4336 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4337 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4338 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4339 base == reg ? NULL_RTX : reg);
4340
4341 if (GET_CODE (offset) == CONST_INT)
4342 {
4343 if (SMALL_INT (offset))
4344 return plus_constant (Pmode, base, INTVAL (offset));
4345 else if (can_create_pseudo_p ())
4346 offset = force_reg (Pmode, offset);
4347 else
4348 /* If we reach here, then something is seriously wrong. */
4349 gcc_unreachable ();
4350 }
4351 return gen_rtx_PLUS (Pmode, base, offset);
4352 }
4353 else if (GET_CODE (orig) == LABEL_REF)
4354 /* ??? We ought to be checking that the register is live instead, in case
4355 it is eliminated. */
4356 crtl->uses_pic_offset_table = 1;
4357
4358 return orig;
4359 }
4360
4361 /* Try machine-dependent ways of modifying an illegitimate address X
4362 to be legitimate. If we find one, return the new, valid address.
4363
4364 OLDX is the address as it was before break_out_memory_refs was called.
4365 In some cases it is useful to look at this to decide what needs to be done.
4366
4367 MODE is the mode of the operand pointed to by X.
4368
4369 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4370
4371 static rtx
4372 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4373 enum machine_mode mode)
4374 {
4375 rtx orig_x = x;
4376
4377 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4378 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4379 force_operand (XEXP (x, 0), NULL_RTX));
4380 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4381 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4382 force_operand (XEXP (x, 1), NULL_RTX));
4383 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4384 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4385 XEXP (x, 1));
4386 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4387 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4388 force_operand (XEXP (x, 1), NULL_RTX));
4389
4390 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4391 return x;
4392
4393 if (sparc_tls_referenced_p (x))
4394 x = sparc_legitimize_tls_address (x);
4395 else if (flag_pic)
4396 x = sparc_legitimize_pic_address (x, NULL_RTX);
4397 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4398 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4399 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4400 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4401 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4402 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4403 else if (GET_CODE (x) == SYMBOL_REF
4404 || GET_CODE (x) == CONST
4405 || GET_CODE (x) == LABEL_REF)
4406 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4407
4408 return x;
4409 }
4410
4411 /* Delegitimize an address that was legitimized by the above function. */
4412
4413 static rtx
4414 sparc_delegitimize_address (rtx x)
4415 {
4416 x = delegitimize_mem_from_attrs (x);
4417
4418 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4419 switch (XINT (XEXP (x, 1), 1))
4420 {
4421 case UNSPEC_MOVE_PIC:
4422 case UNSPEC_TLSLE:
4423 x = XVECEXP (XEXP (x, 1), 0, 0);
4424 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4425 break;
4426 default:
4427 break;
4428 }
4429
4430 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4431 if (GET_CODE (x) == MINUS
4432 && REG_P (XEXP (x, 0))
4433 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4434 && GET_CODE (XEXP (x, 1)) == LO_SUM
4435 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4436 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4437 {
4438 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4439 gcc_assert (GET_CODE (x) == LABEL_REF);
4440 }
4441
4442 return x;
4443 }
4444
4445 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4446 replace the input X, or the original X if no replacement is called for.
4447 The output parameter *WIN is 1 if the calling macro should goto WIN,
4448 0 if it should not.
4449
4450 For SPARC, we wish to handle addresses by splitting them into
4451 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4452 This cuts the number of extra insns by one.
4453
4454 Do nothing when generating PIC code and the address is a symbolic
4455 operand or requires a scratch register. */
4456
4457 rtx
4458 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4459 int opnum, int type,
4460 int ind_levels ATTRIBUTE_UNUSED, int *win)
4461 {
4462 /* Decompose SImode constants into HIGH+LO_SUM. */
4463 if (CONSTANT_P (x)
4464 && (mode != TFmode || TARGET_ARCH64)
4465 && GET_MODE (x) == SImode
4466 && GET_CODE (x) != LO_SUM
4467 && GET_CODE (x) != HIGH
4468 && sparc_cmodel <= CM_MEDLOW
4469 && !(flag_pic
4470 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4471 {
4472 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4473 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4474 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4475 opnum, (enum reload_type)type);
4476 *win = 1;
4477 return x;
4478 }
4479
4480 /* We have to recognize what we have already generated above. */
4481 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4482 {
4483 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4484 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4485 opnum, (enum reload_type)type);
4486 *win = 1;
4487 return x;
4488 }
4489
4490 *win = 0;
4491 return x;
4492 }
4493
4494 /* Return true if ADDR (a legitimate address expression)
4495 has an effect that depends on the machine mode it is used for.
4496
4497 In PIC mode,
4498
4499 (mem:HI [%l7+a])
4500
4501 is not equivalent to
4502
4503 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4504
4505 because [%l7+a+1] is interpreted as the address of (a+1). */
4506
4507
4508 static bool
4509 sparc_mode_dependent_address_p (const_rtx addr,
4510 addr_space_t as ATTRIBUTE_UNUSED)
4511 {
4512 if (flag_pic && GET_CODE (addr) == PLUS)
4513 {
4514 rtx op0 = XEXP (addr, 0);
4515 rtx op1 = XEXP (addr, 1);
4516 if (op0 == pic_offset_table_rtx
4517 && symbolic_operand (op1, VOIDmode))
4518 return true;
4519 }
4520
4521 return false;
4522 }
4523
4524 #ifdef HAVE_GAS_HIDDEN
4525 # define USE_HIDDEN_LINKONCE 1
4526 #else
4527 # define USE_HIDDEN_LINKONCE 0
4528 #endif
4529
4530 static void
4531 get_pc_thunk_name (char name[32], unsigned int regno)
4532 {
4533 const char *reg_name = reg_names[regno];
4534
4535 /* Skip the leading '%' as that cannot be used in a
4536 symbol name. */
4537 reg_name += 1;
4538
4539 if (USE_HIDDEN_LINKONCE)
4540 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4541 else
4542 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4543 }
4544
4545 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4546
4547 static rtx
4548 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4549 {
4550 int orig_flag_pic = flag_pic;
4551 rtx insn;
4552
4553 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4554 flag_pic = 0;
4555 if (TARGET_ARCH64)
4556 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4557 else
4558 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4559 flag_pic = orig_flag_pic;
4560
4561 return insn;
4562 }
4563
4564 /* Emit code to load the GOT register. */
4565
4566 void
4567 load_got_register (void)
4568 {
4569 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4570 if (!global_offset_table_rtx)
4571 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4572
4573 if (TARGET_VXWORKS_RTP)
4574 emit_insn (gen_vxworks_load_got ());
4575 else
4576 {
4577 /* The GOT symbol is subject to a PC-relative relocation so we need a
4578 helper function to add the PC value and thus get the final value. */
4579 if (!got_helper_rtx)
4580 {
4581 char name[32];
4582 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4583 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4584 }
4585
4586 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4587 got_helper_rtx,
4588 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4589 }
4590
4591 /* Need to emit this whether or not we obey regdecls,
4592 since setjmp/longjmp can cause life info to screw up.
4593 ??? In the case where we don't obey regdecls, this is not sufficient
4594 since we may not fall out the bottom. */
4595 emit_use (global_offset_table_rtx);
4596 }
4597
4598 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4599 address of the call target. */
4600
4601 void
4602 sparc_emit_call_insn (rtx pat, rtx addr)
4603 {
4604 rtx_insn *insn;
4605
4606 insn = emit_call_insn (pat);
4607
4608 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4609 if (TARGET_VXWORKS_RTP
4610 && flag_pic
4611 && GET_CODE (addr) == SYMBOL_REF
4612 && (SYMBOL_REF_DECL (addr)
4613 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4614 : !SYMBOL_REF_LOCAL_P (addr)))
4615 {
4616 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4617 crtl->uses_pic_offset_table = 1;
4618 }
4619 }
4620 \f
4621 /* Return 1 if RTX is a MEM which is known to be aligned to at
4622 least a DESIRED byte boundary. */
4623
4624 int
4625 mem_min_alignment (rtx mem, int desired)
4626 {
4627 rtx addr, base, offset;
4628
4629 /* If it's not a MEM we can't accept it. */
4630 if (GET_CODE (mem) != MEM)
4631 return 0;
4632
4633 /* Obviously... */
4634 if (!TARGET_UNALIGNED_DOUBLES
4635 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4636 return 1;
4637
4638 /* ??? The rest of the function predates MEM_ALIGN so
4639 there is probably a bit of redundancy. */
4640 addr = XEXP (mem, 0);
4641 base = offset = NULL_RTX;
4642 if (GET_CODE (addr) == PLUS)
4643 {
4644 if (GET_CODE (XEXP (addr, 0)) == REG)
4645 {
4646 base = XEXP (addr, 0);
4647
4648 /* What we are saying here is that if the base
4649 REG is aligned properly, the compiler will make
4650 sure any REG based index upon it will be so
4651 as well. */
4652 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4653 offset = XEXP (addr, 1);
4654 else
4655 offset = const0_rtx;
4656 }
4657 }
4658 else if (GET_CODE (addr) == REG)
4659 {
4660 base = addr;
4661 offset = const0_rtx;
4662 }
4663
4664 if (base != NULL_RTX)
4665 {
4666 int regno = REGNO (base);
4667
4668 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4669 {
4670 /* Check if the compiler has recorded some information
4671 about the alignment of the base REG. If reload has
4672 completed, we already matched with proper alignments.
4673 If not running global_alloc, reload might give us
4674 unaligned pointer to local stack though. */
4675 if (((cfun != 0
4676 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4677 || (optimize && reload_completed))
4678 && (INTVAL (offset) & (desired - 1)) == 0)
4679 return 1;
4680 }
4681 else
4682 {
4683 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4684 return 1;
4685 }
4686 }
4687 else if (! TARGET_UNALIGNED_DOUBLES
4688 || CONSTANT_P (addr)
4689 || GET_CODE (addr) == LO_SUM)
4690 {
4691 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4692 is true, in which case we can only assume that an access is aligned if
4693 it is to a constant address, or the address involves a LO_SUM. */
4694 return 1;
4695 }
4696
4697 /* An obviously unaligned address. */
4698 return 0;
4699 }
4700
4701 \f
4702 /* Vectors to keep interesting information about registers where it can easily
4703 be got. We used to use the actual mode value as the bit number, but there
4704 are more than 32 modes now. Instead we use two tables: one indexed by
4705 hard register number, and one indexed by mode. */
4706
4707 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4708 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4709 mapped into one sparc_mode_class mode. */
4710
4711 enum sparc_mode_class {
4712 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4713 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4714 CC_MODE, CCFP_MODE
4715 };
4716
4717 /* Modes for single-word and smaller quantities. */
4718 #define S_MODES \
4719 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4720
4721 /* Modes for double-word and smaller quantities. */
4722 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4723
4724 /* Modes for quad-word and smaller quantities. */
4725 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4726
4727 /* Modes for 8-word and smaller quantities. */
4728 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4729
4730 /* Modes for single-float quantities. */
4731 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4732
4733 /* Modes for double-float and smaller quantities. */
4734 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4735
4736 /* Modes for quad-float and smaller quantities. */
4737 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4738
4739 /* Modes for quad-float pairs and smaller quantities. */
4740 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4741
4742 /* Modes for double-float only quantities. */
4743 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4744
4745 /* Modes for quad-float and double-float only quantities. */
4746 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4747
4748 /* Modes for quad-float pairs and double-float only quantities. */
4749 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4750
4751 /* Modes for condition codes. */
4752 #define CC_MODES (1 << (int) CC_MODE)
4753 #define CCFP_MODES (1 << (int) CCFP_MODE)
4754
4755 /* Value is 1 if register/mode pair is acceptable on sparc.
4756 The funny mixture of D and T modes is because integer operations
4757 do not specially operate on tetra quantities, so non-quad-aligned
4758 registers can hold quadword quantities (except %o4 and %i4 because
4759 they cross fixed registers). */
4760
4761 /* This points to either the 32 bit or the 64 bit version. */
4762 const int *hard_regno_mode_classes;
4763
4764 static const int hard_32bit_mode_classes[] = {
4765 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4766 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4767 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4768 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4769
4770 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4771 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4772 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4773 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4774
4775 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4776 and none can hold SFmode/SImode values. */
4777 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4778 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4779 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4780 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4781
4782 /* %fcc[0123] */
4783 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4784
4785 /* %icc, %sfp, %gsr */
4786 CC_MODES, 0, D_MODES
4787 };
4788
4789 static const int hard_64bit_mode_classes[] = {
4790 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4791 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4792 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4793 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4794
4795 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4796 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4797 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4798 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4799
4800 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4801 and none can hold SFmode/SImode values. */
4802 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4803 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4804 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4805 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4806
4807 /* %fcc[0123] */
4808 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4809
4810 /* %icc, %sfp, %gsr */
4811 CC_MODES, 0, D_MODES
4812 };
4813
4814 int sparc_mode_class [NUM_MACHINE_MODES];
4815
4816 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4817
4818 static void
4819 sparc_init_modes (void)
4820 {
4821 int i;
4822
4823 for (i = 0; i < NUM_MACHINE_MODES; i++)
4824 {
4825 enum machine_mode m = (enum machine_mode) i;
4826 unsigned int size = GET_MODE_SIZE (m);
4827
4828 switch (GET_MODE_CLASS (m))
4829 {
4830 case MODE_INT:
4831 case MODE_PARTIAL_INT:
4832 case MODE_COMPLEX_INT:
4833 if (size < 4)
4834 sparc_mode_class[i] = 1 << (int) H_MODE;
4835 else if (size == 4)
4836 sparc_mode_class[i] = 1 << (int) S_MODE;
4837 else if (size == 8)
4838 sparc_mode_class[i] = 1 << (int) D_MODE;
4839 else if (size == 16)
4840 sparc_mode_class[i] = 1 << (int) T_MODE;
4841 else if (size == 32)
4842 sparc_mode_class[i] = 1 << (int) O_MODE;
4843 else
4844 sparc_mode_class[i] = 0;
4845 break;
4846 case MODE_VECTOR_INT:
4847 if (size == 4)
4848 sparc_mode_class[i] = 1 << (int) SF_MODE;
4849 else if (size == 8)
4850 sparc_mode_class[i] = 1 << (int) DF_MODE;
4851 else
4852 sparc_mode_class[i] = 0;
4853 break;
4854 case MODE_FLOAT:
4855 case MODE_COMPLEX_FLOAT:
4856 if (size == 4)
4857 sparc_mode_class[i] = 1 << (int) SF_MODE;
4858 else if (size == 8)
4859 sparc_mode_class[i] = 1 << (int) DF_MODE;
4860 else if (size == 16)
4861 sparc_mode_class[i] = 1 << (int) TF_MODE;
4862 else if (size == 32)
4863 sparc_mode_class[i] = 1 << (int) OF_MODE;
4864 else
4865 sparc_mode_class[i] = 0;
4866 break;
4867 case MODE_CC:
4868 if (m == CCFPmode || m == CCFPEmode)
4869 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4870 else
4871 sparc_mode_class[i] = 1 << (int) CC_MODE;
4872 break;
4873 default:
4874 sparc_mode_class[i] = 0;
4875 break;
4876 }
4877 }
4878
4879 if (TARGET_ARCH64)
4880 hard_regno_mode_classes = hard_64bit_mode_classes;
4881 else
4882 hard_regno_mode_classes = hard_32bit_mode_classes;
4883
4884 /* Initialize the array used by REGNO_REG_CLASS. */
4885 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4886 {
4887 if (i < 16 && TARGET_V8PLUS)
4888 sparc_regno_reg_class[i] = I64_REGS;
4889 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4890 sparc_regno_reg_class[i] = GENERAL_REGS;
4891 else if (i < 64)
4892 sparc_regno_reg_class[i] = FP_REGS;
4893 else if (i < 96)
4894 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4895 else if (i < 100)
4896 sparc_regno_reg_class[i] = FPCC_REGS;
4897 else
4898 sparc_regno_reg_class[i] = NO_REGS;
4899 }
4900 }
4901 \f
4902 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4903
4904 static inline bool
4905 save_global_or_fp_reg_p (unsigned int regno,
4906 int leaf_function ATTRIBUTE_UNUSED)
4907 {
4908 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4909 }
4910
4911 /* Return whether the return address register (%i7) is needed. */
4912
4913 static inline bool
4914 return_addr_reg_needed_p (int leaf_function)
4915 {
4916 /* If it is live, for example because of __builtin_return_address (0). */
4917 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4918 return true;
4919
4920 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4921 if (!leaf_function
4922 /* Loading the GOT register clobbers %o7. */
4923 || crtl->uses_pic_offset_table
4924 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4925 return true;
4926
4927 return false;
4928 }
4929
4930 /* Return whether REGNO, a local or in register, must be saved/restored. */
4931
4932 static bool
4933 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4934 {
4935 /* General case: call-saved registers live at some point. */
4936 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4937 return true;
4938
4939 /* Frame pointer register (%fp) if needed. */
4940 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4941 return true;
4942
4943 /* Return address register (%i7) if needed. */
4944 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4945 return true;
4946
4947 /* GOT register (%l7) if needed. */
4948 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4949 return true;
4950
4951 /* If the function accesses prior frames, the frame pointer and the return
4952 address of the previous frame must be saved on the stack. */
4953 if (crtl->accesses_prior_frames
4954 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4955 return true;
4956
4957 return false;
4958 }
4959
4960 /* Compute the frame size required by the function. This function is called
4961 during the reload pass and also by sparc_expand_prologue. */
4962
4963 HOST_WIDE_INT
4964 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4965 {
4966 HOST_WIDE_INT frame_size, apparent_frame_size;
4967 int args_size, n_global_fp_regs = 0;
4968 bool save_local_in_regs_p = false;
4969 unsigned int i;
4970
4971 /* If the function allocates dynamic stack space, the dynamic offset is
4972 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4973 if (leaf_function && !cfun->calls_alloca)
4974 args_size = 0;
4975 else
4976 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4977
4978 /* Calculate space needed for global registers. */
4979 if (TARGET_ARCH64)
4980 for (i = 0; i < 8; i++)
4981 if (save_global_or_fp_reg_p (i, 0))
4982 n_global_fp_regs += 2;
4983 else
4984 for (i = 0; i < 8; i += 2)
4985 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4986 n_global_fp_regs += 2;
4987
4988 /* In the flat window model, find out which local and in registers need to
4989 be saved. We don't reserve space in the current frame for them as they
4990 will be spilled into the register window save area of the caller's frame.
4991 However, as soon as we use this register window save area, we must create
4992 that of the current frame to make it the live one. */
4993 if (TARGET_FLAT)
4994 for (i = 16; i < 32; i++)
4995 if (save_local_or_in_reg_p (i, leaf_function))
4996 {
4997 save_local_in_regs_p = true;
4998 break;
4999 }
5000
5001 /* Calculate space needed for FP registers. */
5002 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5003 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5004 n_global_fp_regs += 2;
5005
5006 if (size == 0
5007 && n_global_fp_regs == 0
5008 && args_size == 0
5009 && !save_local_in_regs_p)
5010 frame_size = apparent_frame_size = 0;
5011 else
5012 {
5013 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5014 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
5015 apparent_frame_size += n_global_fp_regs * 4;
5016
5017 /* We need to add the size of the outgoing argument area. */
5018 frame_size = apparent_frame_size + ((args_size + 7) & -8);
5019
5020 /* And that of the register window save area. */
5021 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5022
5023 /* Finally, bump to the appropriate alignment. */
5024 frame_size = SPARC_STACK_ALIGN (frame_size);
5025 }
5026
5027 /* Set up values for use in prologue and epilogue. */
5028 sparc_frame_size = frame_size;
5029 sparc_apparent_frame_size = apparent_frame_size;
5030 sparc_n_global_fp_regs = n_global_fp_regs;
5031 sparc_save_local_in_regs_p = save_local_in_regs_p;
5032
5033 return frame_size;
5034 }
5035
5036 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5037
5038 int
5039 sparc_initial_elimination_offset (int to)
5040 {
5041 int offset;
5042
5043 if (to == STACK_POINTER_REGNUM)
5044 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5045 else
5046 offset = 0;
5047
5048 offset += SPARC_STACK_BIAS;
5049 return offset;
5050 }
5051
5052 /* Output any necessary .register pseudo-ops. */
5053
5054 void
5055 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5056 {
5057 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5058 int i;
5059
5060 if (TARGET_ARCH32)
5061 return;
5062
5063 /* Check if %g[2367] were used without
5064 .register being printed for them already. */
5065 for (i = 2; i < 8; i++)
5066 {
5067 if (df_regs_ever_live_p (i)
5068 && ! sparc_hard_reg_printed [i])
5069 {
5070 sparc_hard_reg_printed [i] = 1;
5071 /* %g7 is used as TLS base register, use #ignore
5072 for it instead of #scratch. */
5073 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5074 i == 7 ? "ignore" : "scratch");
5075 }
5076 if (i == 3) i = 5;
5077 }
5078 #endif
5079 }
5080
5081 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5082
5083 #if PROBE_INTERVAL > 4096
5084 #error Cannot use indexed addressing mode for stack probing
5085 #endif
5086
5087 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5088 inclusive. These are offsets from the current stack pointer.
5089
5090 Note that we don't use the REG+REG addressing mode for the probes because
5091 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5092 so the advantages of having a single code win here. */
5093
5094 static void
5095 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5096 {
5097 rtx g1 = gen_rtx_REG (Pmode, 1);
5098
5099 /* See if we have a constant small number of probes to generate. If so,
5100 that's the easy case. */
5101 if (size <= PROBE_INTERVAL)
5102 {
5103 emit_move_insn (g1, GEN_INT (first));
5104 emit_insn (gen_rtx_SET (VOIDmode, g1,
5105 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5106 emit_stack_probe (plus_constant (Pmode, g1, -size));
5107 }
5108
5109 /* The run-time loop is made up of 10 insns in the generic case while the
5110 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5111 else if (size <= 5 * PROBE_INTERVAL)
5112 {
5113 HOST_WIDE_INT i;
5114
5115 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5116 emit_insn (gen_rtx_SET (VOIDmode, g1,
5117 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5118 emit_stack_probe (g1);
5119
5120 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5121 it exceeds SIZE. If only two probes are needed, this will not
5122 generate any code. Then probe at FIRST + SIZE. */
5123 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5124 {
5125 emit_insn (gen_rtx_SET (VOIDmode, g1,
5126 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5127 emit_stack_probe (g1);
5128 }
5129
5130 emit_stack_probe (plus_constant (Pmode, g1,
5131 (i - PROBE_INTERVAL) - size));
5132 }
5133
5134 /* Otherwise, do the same as above, but in a loop. Note that we must be
5135 extra careful with variables wrapping around because we might be at
5136 the very top (or the very bottom) of the address space and we have
5137 to be able to handle this case properly; in particular, we use an
5138 equality test for the loop condition. */
5139 else
5140 {
5141 HOST_WIDE_INT rounded_size;
5142 rtx g4 = gen_rtx_REG (Pmode, 4);
5143
5144 emit_move_insn (g1, GEN_INT (first));
5145
5146
5147 /* Step 1: round SIZE to the previous multiple of the interval. */
5148
5149 rounded_size = size & -PROBE_INTERVAL;
5150 emit_move_insn (g4, GEN_INT (rounded_size));
5151
5152
5153 /* Step 2: compute initial and final value of the loop counter. */
5154
5155 /* TEST_ADDR = SP + FIRST. */
5156 emit_insn (gen_rtx_SET (VOIDmode, g1,
5157 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5158
5159 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5160 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
5161
5162
5163 /* Step 3: the loop
5164
5165 while (TEST_ADDR != LAST_ADDR)
5166 {
5167 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5168 probe at TEST_ADDR
5169 }
5170
5171 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5172 until it is equal to ROUNDED_SIZE. */
5173
5174 if (TARGET_ARCH64)
5175 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5176 else
5177 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5178
5179
5180 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5181 that SIZE is equal to ROUNDED_SIZE. */
5182
5183 if (size != rounded_size)
5184 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5185 }
5186
5187 /* Make sure nothing is scheduled before we are done. */
5188 emit_insn (gen_blockage ());
5189 }
5190
5191 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5192 absolute addresses. */
5193
5194 const char *
5195 output_probe_stack_range (rtx reg1, rtx reg2)
5196 {
5197 static int labelno = 0;
5198 char loop_lab[32], end_lab[32];
5199 rtx xops[2];
5200
5201 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
5202 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
5203
5204 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5205
5206 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
5207 xops[0] = reg1;
5208 xops[1] = reg2;
5209 output_asm_insn ("cmp\t%0, %1", xops);
5210 if (TARGET_ARCH64)
5211 fputs ("\tbe,pn\t%xcc,", asm_out_file);
5212 else
5213 fputs ("\tbe\t", asm_out_file);
5214 assemble_name_raw (asm_out_file, end_lab);
5215 fputc ('\n', asm_out_file);
5216
5217 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5218 xops[1] = GEN_INT (-PROBE_INTERVAL);
5219 output_asm_insn (" add\t%0, %1, %0", xops);
5220
5221 /* Probe at TEST_ADDR and branch. */
5222 if (TARGET_ARCH64)
5223 fputs ("\tba,pt\t%xcc,", asm_out_file);
5224 else
5225 fputs ("\tba\t", asm_out_file);
5226 assemble_name_raw (asm_out_file, loop_lab);
5227 fputc ('\n', asm_out_file);
5228 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5229 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5230
5231 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
5232
5233 return "";
5234 }
5235
5236 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5237 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5238 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5239 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5240 the action to be performed if it returns false. Return the new offset. */
5241
5242 typedef bool (*sorr_pred_t) (unsigned int, int);
5243 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5244
5245 static int
5246 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5247 int offset, int leaf_function, sorr_pred_t save_p,
5248 sorr_act_t action_true, sorr_act_t action_false)
5249 {
5250 unsigned int i;
5251 rtx mem;
5252 rtx_insn *insn;
5253
5254 if (TARGET_ARCH64 && high <= 32)
5255 {
5256 int fp_offset = -1;
5257
5258 for (i = low; i < high; i++)
5259 {
5260 if (save_p (i, leaf_function))
5261 {
5262 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5263 base, offset));
5264 if (action_true == SORR_SAVE)
5265 {
5266 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5267 RTX_FRAME_RELATED_P (insn) = 1;
5268 }
5269 else /* action_true == SORR_RESTORE */
5270 {
5271 /* The frame pointer must be restored last since its old
5272 value may be used as base address for the frame. This
5273 is problematic in 64-bit mode only because of the lack
5274 of double-word load instruction. */
5275 if (i == HARD_FRAME_POINTER_REGNUM)
5276 fp_offset = offset;
5277 else
5278 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5279 }
5280 offset += 8;
5281 }
5282 else if (action_false == SORR_ADVANCE)
5283 offset += 8;
5284 }
5285
5286 if (fp_offset >= 0)
5287 {
5288 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5289 emit_move_insn (hard_frame_pointer_rtx, mem);
5290 }
5291 }
5292 else
5293 {
5294 for (i = low; i < high; i += 2)
5295 {
5296 bool reg0 = save_p (i, leaf_function);
5297 bool reg1 = save_p (i + 1, leaf_function);
5298 enum machine_mode mode;
5299 int regno;
5300
5301 if (reg0 && reg1)
5302 {
5303 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5304 regno = i;
5305 }
5306 else if (reg0)
5307 {
5308 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5309 regno = i;
5310 }
5311 else if (reg1)
5312 {
5313 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5314 regno = i + 1;
5315 offset += 4;
5316 }
5317 else
5318 {
5319 if (action_false == SORR_ADVANCE)
5320 offset += 8;
5321 continue;
5322 }
5323
5324 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5325 if (action_true == SORR_SAVE)
5326 {
5327 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5328 RTX_FRAME_RELATED_P (insn) = 1;
5329 if (mode == DImode)
5330 {
5331 rtx set1, set2;
5332 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5333 offset));
5334 set1 = gen_rtx_SET (VOIDmode, mem,
5335 gen_rtx_REG (SImode, regno));
5336 RTX_FRAME_RELATED_P (set1) = 1;
5337 mem
5338 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5339 offset + 4));
5340 set2 = gen_rtx_SET (VOIDmode, mem,
5341 gen_rtx_REG (SImode, regno + 1));
5342 RTX_FRAME_RELATED_P (set2) = 1;
5343 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5344 gen_rtx_PARALLEL (VOIDmode,
5345 gen_rtvec (2, set1, set2)));
5346 }
5347 }
5348 else /* action_true == SORR_RESTORE */
5349 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5350
5351 /* Always preserve double-word alignment. */
5352 offset = (offset + 8) & -8;
5353 }
5354 }
5355
5356 return offset;
5357 }
5358
5359 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5360
5361 static rtx
5362 emit_adjust_base_to_offset (rtx base, int offset)
5363 {
5364 /* ??? This might be optimized a little as %g1 might already have a
5365 value close enough that a single add insn will do. */
5366 /* ??? Although, all of this is probably only a temporary fix because
5367 if %g1 can hold a function result, then sparc_expand_epilogue will
5368 lose (the result will be clobbered). */
5369 rtx new_base = gen_rtx_REG (Pmode, 1);
5370 emit_move_insn (new_base, GEN_INT (offset));
5371 emit_insn (gen_rtx_SET (VOIDmode,
5372 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5373 return new_base;
5374 }
5375
5376 /* Emit code to save/restore call-saved global and FP registers. */
5377
5378 static void
5379 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5380 {
5381 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5382 {
5383 base = emit_adjust_base_to_offset (base, offset);
5384 offset = 0;
5385 }
5386
5387 offset
5388 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5389 save_global_or_fp_reg_p, action, SORR_NONE);
5390 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5391 save_global_or_fp_reg_p, action, SORR_NONE);
5392 }
5393
5394 /* Emit code to save/restore call-saved local and in registers. */
5395
5396 static void
5397 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5398 {
5399 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5400 {
5401 base = emit_adjust_base_to_offset (base, offset);
5402 offset = 0;
5403 }
5404
5405 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5406 save_local_or_in_reg_p, action, SORR_ADVANCE);
5407 }
5408
5409 /* Emit a window_save insn. */
5410
5411 static rtx_insn *
5412 emit_window_save (rtx increment)
5413 {
5414 rtx_insn *insn = emit_insn (gen_window_save (increment));
5415 RTX_FRAME_RELATED_P (insn) = 1;
5416
5417 /* The incoming return address (%o7) is saved in %i7. */
5418 add_reg_note (insn, REG_CFA_REGISTER,
5419 gen_rtx_SET (VOIDmode,
5420 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5421 gen_rtx_REG (Pmode,
5422 INCOMING_RETURN_ADDR_REGNUM)));
5423
5424 /* The window save event. */
5425 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5426
5427 /* The CFA is %fp, the hard frame pointer. */
5428 add_reg_note (insn, REG_CFA_DEF_CFA,
5429 plus_constant (Pmode, hard_frame_pointer_rtx,
5430 INCOMING_FRAME_SP_OFFSET));
5431
5432 return insn;
5433 }
5434
5435 /* Generate an increment for the stack pointer. */
5436
5437 static rtx
5438 gen_stack_pointer_inc (rtx increment)
5439 {
5440 return gen_rtx_SET (VOIDmode,
5441 stack_pointer_rtx,
5442 gen_rtx_PLUS (Pmode,
5443 stack_pointer_rtx,
5444 increment));
5445 }
5446
5447 /* Expand the function prologue. The prologue is responsible for reserving
5448 storage for the frame, saving the call-saved registers and loading the
5449 GOT register if needed. */
5450
5451 void
5452 sparc_expand_prologue (void)
5453 {
5454 HOST_WIDE_INT size;
5455 rtx_insn *insn;
5456
5457 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5458 on the final value of the flag means deferring the prologue/epilogue
5459 expansion until just before the second scheduling pass, which is too
5460 late to emit multiple epilogues or return insns.
5461
5462 Of course we are making the assumption that the value of the flag
5463 will not change between now and its final value. Of the three parts
5464 of the formula, only the last one can reasonably vary. Let's take a
5465 closer look, after assuming that the first two ones are set to true
5466 (otherwise the last value is effectively silenced).
5467
5468 If only_leaf_regs_used returns false, the global predicate will also
5469 be false so the actual frame size calculated below will be positive.
5470 As a consequence, the save_register_window insn will be emitted in
5471 the instruction stream; now this insn explicitly references %fp
5472 which is not a leaf register so only_leaf_regs_used will always
5473 return false subsequently.
5474
5475 If only_leaf_regs_used returns true, we hope that the subsequent
5476 optimization passes won't cause non-leaf registers to pop up. For
5477 example, the regrename pass has special provisions to not rename to
5478 non-leaf registers in a leaf function. */
5479 sparc_leaf_function_p
5480 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5481
5482 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5483
5484 if (flag_stack_usage_info)
5485 current_function_static_stack_size = size;
5486
5487 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5488 {
5489 if (crtl->is_leaf && !cfun->calls_alloca)
5490 {
5491 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5492 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5493 size - STACK_CHECK_PROTECT);
5494 }
5495 else if (size > 0)
5496 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5497 }
5498
5499 if (size == 0)
5500 ; /* do nothing. */
5501 else if (sparc_leaf_function_p)
5502 {
5503 rtx size_int_rtx = GEN_INT (-size);
5504
5505 if (size <= 4096)
5506 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5507 else if (size <= 8192)
5508 {
5509 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5510 RTX_FRAME_RELATED_P (insn) = 1;
5511
5512 /* %sp is still the CFA register. */
5513 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5514 }
5515 else
5516 {
5517 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5518 emit_move_insn (size_rtx, size_int_rtx);
5519 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5520 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5521 gen_stack_pointer_inc (size_int_rtx));
5522 }
5523
5524 RTX_FRAME_RELATED_P (insn) = 1;
5525 }
5526 else
5527 {
5528 rtx size_int_rtx = GEN_INT (-size);
5529
5530 if (size <= 4096)
5531 emit_window_save (size_int_rtx);
5532 else if (size <= 8192)
5533 {
5534 emit_window_save (GEN_INT (-4096));
5535
5536 /* %sp is not the CFA register anymore. */
5537 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5538
5539 /* Make sure no %fp-based store is issued until after the frame is
5540 established. The offset between the frame pointer and the stack
5541 pointer is calculated relative to the value of the stack pointer
5542 at the end of the function prologue, and moving instructions that
5543 access the stack via the frame pointer between the instructions
5544 that decrement the stack pointer could result in accessing the
5545 register window save area, which is volatile. */
5546 emit_insn (gen_frame_blockage ());
5547 }
5548 else
5549 {
5550 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5551 emit_move_insn (size_rtx, size_int_rtx);
5552 emit_window_save (size_rtx);
5553 }
5554 }
5555
5556 if (sparc_leaf_function_p)
5557 {
5558 sparc_frame_base_reg = stack_pointer_rtx;
5559 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5560 }
5561 else
5562 {
5563 sparc_frame_base_reg = hard_frame_pointer_rtx;
5564 sparc_frame_base_offset = SPARC_STACK_BIAS;
5565 }
5566
5567 if (sparc_n_global_fp_regs > 0)
5568 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5569 sparc_frame_base_offset
5570 - sparc_apparent_frame_size,
5571 SORR_SAVE);
5572
5573 /* Load the GOT register if needed. */
5574 if (crtl->uses_pic_offset_table)
5575 load_got_register ();
5576
5577 /* Advertise that the data calculated just above are now valid. */
5578 sparc_prologue_data_valid_p = true;
5579 }
5580
5581 /* Expand the function prologue. The prologue is responsible for reserving
5582 storage for the frame, saving the call-saved registers and loading the
5583 GOT register if needed. */
5584
5585 void
5586 sparc_flat_expand_prologue (void)
5587 {
5588 HOST_WIDE_INT size;
5589 rtx_insn *insn;
5590
5591 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5592
5593 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5594
5595 if (flag_stack_usage_info)
5596 current_function_static_stack_size = size;
5597
5598 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5599 {
5600 if (crtl->is_leaf && !cfun->calls_alloca)
5601 {
5602 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5603 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5604 size - STACK_CHECK_PROTECT);
5605 }
5606 else if (size > 0)
5607 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5608 }
5609
5610 if (sparc_save_local_in_regs_p)
5611 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5612 SORR_SAVE);
5613
5614 if (size == 0)
5615 ; /* do nothing. */
5616 else
5617 {
5618 rtx size_int_rtx, size_rtx;
5619
5620 size_rtx = size_int_rtx = GEN_INT (-size);
5621
5622 /* We establish the frame (i.e. decrement the stack pointer) first, even
5623 if we use a frame pointer, because we cannot clobber any call-saved
5624 registers, including the frame pointer, if we haven't created a new
5625 register save area, for the sake of compatibility with the ABI. */
5626 if (size <= 4096)
5627 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5628 else if (size <= 8192 && !frame_pointer_needed)
5629 {
5630 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5631 RTX_FRAME_RELATED_P (insn) = 1;
5632 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5633 }
5634 else
5635 {
5636 size_rtx = gen_rtx_REG (Pmode, 1);
5637 emit_move_insn (size_rtx, size_int_rtx);
5638 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5639 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5640 gen_stack_pointer_inc (size_int_rtx));
5641 }
5642 RTX_FRAME_RELATED_P (insn) = 1;
5643
5644 /* Ensure nothing is scheduled until after the frame is established. */
5645 emit_insn (gen_blockage ());
5646
5647 if (frame_pointer_needed)
5648 {
5649 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5650 gen_rtx_MINUS (Pmode,
5651 stack_pointer_rtx,
5652 size_rtx)));
5653 RTX_FRAME_RELATED_P (insn) = 1;
5654
5655 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5656 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5657 plus_constant (Pmode, stack_pointer_rtx,
5658 size)));
5659 }
5660
5661 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5662 {
5663 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5664 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5665
5666 insn = emit_move_insn (i7, o7);
5667 RTX_FRAME_RELATED_P (insn) = 1;
5668
5669 add_reg_note (insn, REG_CFA_REGISTER,
5670 gen_rtx_SET (VOIDmode, i7, o7));
5671
5672 /* Prevent this instruction from ever being considered dead,
5673 even if this function has no epilogue. */
5674 emit_use (i7);
5675 }
5676 }
5677
5678 if (frame_pointer_needed)
5679 {
5680 sparc_frame_base_reg = hard_frame_pointer_rtx;
5681 sparc_frame_base_offset = SPARC_STACK_BIAS;
5682 }
5683 else
5684 {
5685 sparc_frame_base_reg = stack_pointer_rtx;
5686 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5687 }
5688
5689 if (sparc_n_global_fp_regs > 0)
5690 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5691 sparc_frame_base_offset
5692 - sparc_apparent_frame_size,
5693 SORR_SAVE);
5694
5695 /* Load the GOT register if needed. */
5696 if (crtl->uses_pic_offset_table)
5697 load_got_register ();
5698
5699 /* Advertise that the data calculated just above are now valid. */
5700 sparc_prologue_data_valid_p = true;
5701 }
5702
5703 /* This function generates the assembly code for function entry, which boils
5704 down to emitting the necessary .register directives. */
5705
5706 static void
5707 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5708 {
5709 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5710 if (!TARGET_FLAT)
5711 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5712
5713 sparc_output_scratch_registers (file);
5714 }
5715
5716 /* Expand the function epilogue, either normal or part of a sibcall.
5717 We emit all the instructions except the return or the call. */
5718
5719 void
5720 sparc_expand_epilogue (bool for_eh)
5721 {
5722 HOST_WIDE_INT size = sparc_frame_size;
5723
5724 if (sparc_n_global_fp_regs > 0)
5725 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5726 sparc_frame_base_offset
5727 - sparc_apparent_frame_size,
5728 SORR_RESTORE);
5729
5730 if (size == 0 || for_eh)
5731 ; /* do nothing. */
5732 else if (sparc_leaf_function_p)
5733 {
5734 if (size <= 4096)
5735 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5736 else if (size <= 8192)
5737 {
5738 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5739 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5740 }
5741 else
5742 {
5743 rtx reg = gen_rtx_REG (Pmode, 1);
5744 emit_move_insn (reg, GEN_INT (size));
5745 emit_insn (gen_stack_pointer_inc (reg));
5746 }
5747 }
5748 }
5749
5750 /* Expand the function epilogue, either normal or part of a sibcall.
5751 We emit all the instructions except the return or the call. */
5752
5753 void
5754 sparc_flat_expand_epilogue (bool for_eh)
5755 {
5756 HOST_WIDE_INT size = sparc_frame_size;
5757
5758 if (sparc_n_global_fp_regs > 0)
5759 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5760 sparc_frame_base_offset
5761 - sparc_apparent_frame_size,
5762 SORR_RESTORE);
5763
5764 /* If we have a frame pointer, we'll need both to restore it before the
5765 frame is destroyed and use its current value in destroying the frame.
5766 Since we don't have an atomic way to do that in the flat window model,
5767 we save the current value into a temporary register (%g1). */
5768 if (frame_pointer_needed && !for_eh)
5769 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5770
5771 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5772 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5773 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5774
5775 if (sparc_save_local_in_regs_p)
5776 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5777 sparc_frame_base_offset,
5778 SORR_RESTORE);
5779
5780 if (size == 0 || for_eh)
5781 ; /* do nothing. */
5782 else if (frame_pointer_needed)
5783 {
5784 /* Make sure the frame is destroyed after everything else is done. */
5785 emit_insn (gen_blockage ());
5786
5787 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5788 }
5789 else
5790 {
5791 /* Likewise. */
5792 emit_insn (gen_blockage ());
5793
5794 if (size <= 4096)
5795 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5796 else if (size <= 8192)
5797 {
5798 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5799 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5800 }
5801 else
5802 {
5803 rtx reg = gen_rtx_REG (Pmode, 1);
5804 emit_move_insn (reg, GEN_INT (size));
5805 emit_insn (gen_stack_pointer_inc (reg));
5806 }
5807 }
5808 }
5809
5810 /* Return true if it is appropriate to emit `return' instructions in the
5811 body of a function. */
5812
5813 bool
5814 sparc_can_use_return_insn_p (void)
5815 {
5816 return sparc_prologue_data_valid_p
5817 && sparc_n_global_fp_regs == 0
5818 && TARGET_FLAT
5819 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5820 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5821 }
5822
5823 /* This function generates the assembly code for function exit. */
5824
5825 static void
5826 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5827 {
5828 /* If the last two instructions of a function are "call foo; dslot;"
5829 the return address might point to the first instruction in the next
5830 function and we have to output a dummy nop for the sake of sane
5831 backtraces in such cases. This is pointless for sibling calls since
5832 the return address is explicitly adjusted. */
5833
5834 rtx insn, last_real_insn;
5835
5836 insn = get_last_insn ();
5837
5838 last_real_insn = prev_real_insn (insn);
5839 if (last_real_insn
5840 && NONJUMP_INSN_P (last_real_insn)
5841 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5842 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5843
5844 if (last_real_insn
5845 && CALL_P (last_real_insn)
5846 && !SIBLING_CALL_P (last_real_insn))
5847 fputs("\tnop\n", file);
5848
5849 sparc_output_deferred_case_vectors ();
5850 }
5851
5852 /* Output a 'restore' instruction. */
5853
5854 static void
5855 output_restore (rtx pat)
5856 {
5857 rtx operands[3];
5858
5859 if (! pat)
5860 {
5861 fputs ("\t restore\n", asm_out_file);
5862 return;
5863 }
5864
5865 gcc_assert (GET_CODE (pat) == SET);
5866
5867 operands[0] = SET_DEST (pat);
5868 pat = SET_SRC (pat);
5869
5870 switch (GET_CODE (pat))
5871 {
5872 case PLUS:
5873 operands[1] = XEXP (pat, 0);
5874 operands[2] = XEXP (pat, 1);
5875 output_asm_insn (" restore %r1, %2, %Y0", operands);
5876 break;
5877 case LO_SUM:
5878 operands[1] = XEXP (pat, 0);
5879 operands[2] = XEXP (pat, 1);
5880 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5881 break;
5882 case ASHIFT:
5883 operands[1] = XEXP (pat, 0);
5884 gcc_assert (XEXP (pat, 1) == const1_rtx);
5885 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5886 break;
5887 default:
5888 operands[1] = pat;
5889 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5890 break;
5891 }
5892 }
5893
5894 /* Output a return. */
5895
5896 const char *
5897 output_return (rtx_insn *insn)
5898 {
5899 if (crtl->calls_eh_return)
5900 {
5901 /* If the function uses __builtin_eh_return, the eh_return
5902 machinery occupies the delay slot. */
5903 gcc_assert (!final_sequence);
5904
5905 if (flag_delayed_branch)
5906 {
5907 if (!TARGET_FLAT && TARGET_V9)
5908 fputs ("\treturn\t%i7+8\n", asm_out_file);
5909 else
5910 {
5911 if (!TARGET_FLAT)
5912 fputs ("\trestore\n", asm_out_file);
5913
5914 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5915 }
5916
5917 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5918 }
5919 else
5920 {
5921 if (!TARGET_FLAT)
5922 fputs ("\trestore\n", asm_out_file);
5923
5924 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5925 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5926 }
5927 }
5928 else if (sparc_leaf_function_p || TARGET_FLAT)
5929 {
5930 /* This is a leaf or flat function so we don't have to bother restoring
5931 the register window, which frees us from dealing with the convoluted
5932 semantics of restore/return. We simply output the jump to the
5933 return address and the insn in the delay slot (if any). */
5934
5935 return "jmp\t%%o7+%)%#";
5936 }
5937 else
5938 {
5939 /* This is a regular function so we have to restore the register window.
5940 We may have a pending insn for the delay slot, which will be either
5941 combined with the 'restore' instruction or put in the delay slot of
5942 the 'return' instruction. */
5943
5944 if (final_sequence)
5945 {
5946 rtx delay, pat;
5947
5948 delay = NEXT_INSN (insn);
5949 gcc_assert (delay);
5950
5951 pat = PATTERN (delay);
5952
5953 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5954 {
5955 epilogue_renumber (&pat, 0);
5956 return "return\t%%i7+%)%#";
5957 }
5958 else
5959 {
5960 output_asm_insn ("jmp\t%%i7+%)", NULL);
5961 output_restore (pat);
5962 PATTERN (delay) = gen_blockage ();
5963 INSN_CODE (delay) = -1;
5964 }
5965 }
5966 else
5967 {
5968 /* The delay slot is empty. */
5969 if (TARGET_V9)
5970 return "return\t%%i7+%)\n\t nop";
5971 else if (flag_delayed_branch)
5972 return "jmp\t%%i7+%)\n\t restore";
5973 else
5974 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5975 }
5976 }
5977
5978 return "";
5979 }
5980
5981 /* Output a sibling call. */
5982
5983 const char *
5984 output_sibcall (rtx_insn *insn, rtx call_operand)
5985 {
5986 rtx operands[1];
5987
5988 gcc_assert (flag_delayed_branch);
5989
5990 operands[0] = call_operand;
5991
5992 if (sparc_leaf_function_p || TARGET_FLAT)
5993 {
5994 /* This is a leaf or flat function so we don't have to bother restoring
5995 the register window. We simply output the jump to the function and
5996 the insn in the delay slot (if any). */
5997
5998 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5999
6000 if (final_sequence)
6001 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6002 operands);
6003 else
6004 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6005 it into branch if possible. */
6006 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6007 operands);
6008 }
6009 else
6010 {
6011 /* This is a regular function so we have to restore the register window.
6012 We may have a pending insn for the delay slot, which will be combined
6013 with the 'restore' instruction. */
6014
6015 output_asm_insn ("call\t%a0, 0", operands);
6016
6017 if (final_sequence)
6018 {
6019 rtx_insn *delay = NEXT_INSN (insn);
6020 gcc_assert (delay);
6021
6022 output_restore (PATTERN (delay));
6023
6024 PATTERN (delay) = gen_blockage ();
6025 INSN_CODE (delay) = -1;
6026 }
6027 else
6028 output_restore (NULL_RTX);
6029 }
6030
6031 return "";
6032 }
6033 \f
6034 /* Functions for handling argument passing.
6035
6036 For 32-bit, the first 6 args are normally in registers and the rest are
6037 pushed. Any arg that starts within the first 6 words is at least
6038 partially passed in a register unless its data type forbids.
6039
6040 For 64-bit, the argument registers are laid out as an array of 16 elements
6041 and arguments are added sequentially. The first 6 int args and up to the
6042 first 16 fp args (depending on size) are passed in regs.
6043
6044 Slot Stack Integral Float Float in structure Double Long Double
6045 ---- ----- -------- ----- ------------------ ------ -----------
6046 15 [SP+248] %f31 %f30,%f31 %d30
6047 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6048 13 [SP+232] %f27 %f26,%f27 %d26
6049 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6050 11 [SP+216] %f23 %f22,%f23 %d22
6051 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6052 9 [SP+200] %f19 %f18,%f19 %d18
6053 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6054 7 [SP+184] %f15 %f14,%f15 %d14
6055 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6056 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6057 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6058 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6059 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6060 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6061 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6062
6063 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6064
6065 Integral arguments are always passed as 64-bit quantities appropriately
6066 extended.
6067
6068 Passing of floating point values is handled as follows.
6069 If a prototype is in scope:
6070 If the value is in a named argument (i.e. not a stdarg function or a
6071 value not part of the `...') then the value is passed in the appropriate
6072 fp reg.
6073 If the value is part of the `...' and is passed in one of the first 6
6074 slots then the value is passed in the appropriate int reg.
6075 If the value is part of the `...' and is not passed in one of the first 6
6076 slots then the value is passed in memory.
6077 If a prototype is not in scope:
6078 If the value is one of the first 6 arguments the value is passed in the
6079 appropriate integer reg and the appropriate fp reg.
6080 If the value is not one of the first 6 arguments the value is passed in
6081 the appropriate fp reg and in memory.
6082
6083
6084 Summary of the calling conventions implemented by GCC on the SPARC:
6085
6086 32-bit ABI:
6087 size argument return value
6088
6089 small integer <4 int. reg. int. reg.
6090 word 4 int. reg. int. reg.
6091 double word 8 int. reg. int. reg.
6092
6093 _Complex small integer <8 int. reg. int. reg.
6094 _Complex word 8 int. reg. int. reg.
6095 _Complex double word 16 memory int. reg.
6096
6097 vector integer <=8 int. reg. FP reg.
6098 vector integer >8 memory memory
6099
6100 float 4 int. reg. FP reg.
6101 double 8 int. reg. FP reg.
6102 long double 16 memory memory
6103
6104 _Complex float 8 memory FP reg.
6105 _Complex double 16 memory FP reg.
6106 _Complex long double 32 memory FP reg.
6107
6108 vector float any memory memory
6109
6110 aggregate any memory memory
6111
6112
6113
6114 64-bit ABI:
6115 size argument return value
6116
6117 small integer <8 int. reg. int. reg.
6118 word 8 int. reg. int. reg.
6119 double word 16 int. reg. int. reg.
6120
6121 _Complex small integer <16 int. reg. int. reg.
6122 _Complex word 16 int. reg. int. reg.
6123 _Complex double word 32 memory int. reg.
6124
6125 vector integer <=16 FP reg. FP reg.
6126 vector integer 16<s<=32 memory FP reg.
6127 vector integer >32 memory memory
6128
6129 float 4 FP reg. FP reg.
6130 double 8 FP reg. FP reg.
6131 long double 16 FP reg. FP reg.
6132
6133 _Complex float 8 FP reg. FP reg.
6134 _Complex double 16 FP reg. FP reg.
6135 _Complex long double 32 memory FP reg.
6136
6137 vector float <=16 FP reg. FP reg.
6138 vector float 16<s<=32 memory FP reg.
6139 vector float >32 memory memory
6140
6141 aggregate <=16 reg. reg.
6142 aggregate 16<s<=32 memory reg.
6143 aggregate >32 memory memory
6144
6145
6146
6147 Note #1: complex floating-point types follow the extended SPARC ABIs as
6148 implemented by the Sun compiler.
6149
6150 Note #2: integral vector types follow the scalar floating-point types
6151 conventions to match what is implemented by the Sun VIS SDK.
6152
6153 Note #3: floating-point vector types follow the aggregate types
6154 conventions. */
6155
6156
6157 /* Maximum number of int regs for args. */
6158 #define SPARC_INT_ARG_MAX 6
6159 /* Maximum number of fp regs for args. */
6160 #define SPARC_FP_ARG_MAX 16
6161
6162 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6163
6164 /* Handle the INIT_CUMULATIVE_ARGS macro.
6165 Initialize a variable CUM of type CUMULATIVE_ARGS
6166 for a call to a function whose data type is FNTYPE.
6167 For a library call, FNTYPE is 0. */
6168
6169 void
6170 init_cumulative_args (struct sparc_args *cum, tree fntype,
6171 rtx libname ATTRIBUTE_UNUSED,
6172 tree fndecl ATTRIBUTE_UNUSED)
6173 {
6174 cum->words = 0;
6175 cum->prototype_p = fntype && prototype_p (fntype);
6176 cum->libcall_p = fntype == 0;
6177 }
6178
6179 /* Handle promotion of pointer and integer arguments. */
6180
6181 static enum machine_mode
6182 sparc_promote_function_mode (const_tree type,
6183 enum machine_mode mode,
6184 int *punsignedp,
6185 const_tree fntype ATTRIBUTE_UNUSED,
6186 int for_return ATTRIBUTE_UNUSED)
6187 {
6188 if (type != NULL_TREE && POINTER_TYPE_P (type))
6189 {
6190 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6191 return Pmode;
6192 }
6193
6194 /* Integral arguments are passed as full words, as per the ABI. */
6195 if (GET_MODE_CLASS (mode) == MODE_INT
6196 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6197 return word_mode;
6198
6199 return mode;
6200 }
6201
6202 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6203
6204 static bool
6205 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6206 {
6207 return TARGET_ARCH64 ? true : false;
6208 }
6209
6210 /* Scan the record type TYPE and return the following predicates:
6211 - INTREGS_P: the record contains at least one field or sub-field
6212 that is eligible for promotion in integer registers.
6213 - FP_REGS_P: the record contains at least one field or sub-field
6214 that is eligible for promotion in floating-point registers.
6215 - PACKED_P: the record contains at least one field that is packed.
6216
6217 Sub-fields are not taken into account for the PACKED_P predicate. */
6218
6219 static void
6220 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6221 int *packed_p)
6222 {
6223 tree field;
6224
6225 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6226 {
6227 if (TREE_CODE (field) == FIELD_DECL)
6228 {
6229 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6230 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6231 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6232 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6233 && TARGET_FPU)
6234 *fpregs_p = 1;
6235 else
6236 *intregs_p = 1;
6237
6238 if (packed_p && DECL_PACKED (field))
6239 *packed_p = 1;
6240 }
6241 }
6242 }
6243
6244 /* Compute the slot number to pass an argument in.
6245 Return the slot number or -1 if passing on the stack.
6246
6247 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6248 the preceding args and about the function being called.
6249 MODE is the argument's machine mode.
6250 TYPE is the data type of the argument (as a tree).
6251 This is null for libcalls where that information may
6252 not be available.
6253 NAMED is nonzero if this argument is a named parameter
6254 (otherwise it is an extra parameter matching an ellipsis).
6255 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6256 *PREGNO records the register number to use if scalar type.
6257 *PPADDING records the amount of padding needed in words. */
6258
6259 static int
6260 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
6261 const_tree type, bool named, bool incoming_p,
6262 int *pregno, int *ppadding)
6263 {
6264 int regbase = (incoming_p
6265 ? SPARC_INCOMING_INT_ARG_FIRST
6266 : SPARC_OUTGOING_INT_ARG_FIRST);
6267 int slotno = cum->words;
6268 enum mode_class mclass;
6269 int regno;
6270
6271 *ppadding = 0;
6272
6273 if (type && TREE_ADDRESSABLE (type))
6274 return -1;
6275
6276 if (TARGET_ARCH32
6277 && mode == BLKmode
6278 && type
6279 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6280 return -1;
6281
6282 /* For SPARC64, objects requiring 16-byte alignment get it. */
6283 if (TARGET_ARCH64
6284 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6285 && (slotno & 1) != 0)
6286 slotno++, *ppadding = 1;
6287
6288 mclass = GET_MODE_CLASS (mode);
6289 if (type && TREE_CODE (type) == VECTOR_TYPE)
6290 {
6291 /* Vector types deserve special treatment because they are
6292 polymorphic wrt their mode, depending upon whether VIS
6293 instructions are enabled. */
6294 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6295 {
6296 /* The SPARC port defines no floating-point vector modes. */
6297 gcc_assert (mode == BLKmode);
6298 }
6299 else
6300 {
6301 /* Integral vector types should either have a vector
6302 mode or an integral mode, because we are guaranteed
6303 by pass_by_reference that their size is not greater
6304 than 16 bytes and TImode is 16-byte wide. */
6305 gcc_assert (mode != BLKmode);
6306
6307 /* Vector integers are handled like floats according to
6308 the Sun VIS SDK. */
6309 mclass = MODE_FLOAT;
6310 }
6311 }
6312
6313 switch (mclass)
6314 {
6315 case MODE_FLOAT:
6316 case MODE_COMPLEX_FLOAT:
6317 case MODE_VECTOR_INT:
6318 if (TARGET_ARCH64 && TARGET_FPU && named)
6319 {
6320 if (slotno >= SPARC_FP_ARG_MAX)
6321 return -1;
6322 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6323 /* Arguments filling only one single FP register are
6324 right-justified in the outer double FP register. */
6325 if (GET_MODE_SIZE (mode) <= 4)
6326 regno++;
6327 break;
6328 }
6329 /* fallthrough */
6330
6331 case MODE_INT:
6332 case MODE_COMPLEX_INT:
6333 if (slotno >= SPARC_INT_ARG_MAX)
6334 return -1;
6335 regno = regbase + slotno;
6336 break;
6337
6338 case MODE_RANDOM:
6339 if (mode == VOIDmode)
6340 /* MODE is VOIDmode when generating the actual call. */
6341 return -1;
6342
6343 gcc_assert (mode == BLKmode);
6344
6345 if (TARGET_ARCH32
6346 || !type
6347 || (TREE_CODE (type) != VECTOR_TYPE
6348 && TREE_CODE (type) != RECORD_TYPE))
6349 {
6350 if (slotno >= SPARC_INT_ARG_MAX)
6351 return -1;
6352 regno = regbase + slotno;
6353 }
6354 else /* TARGET_ARCH64 && type */
6355 {
6356 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6357
6358 /* First see what kinds of registers we would need. */
6359 if (TREE_CODE (type) == VECTOR_TYPE)
6360 fpregs_p = 1;
6361 else
6362 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6363
6364 /* The ABI obviously doesn't specify how packed structures
6365 are passed. These are defined to be passed in int regs
6366 if possible, otherwise memory. */
6367 if (packed_p || !named)
6368 fpregs_p = 0, intregs_p = 1;
6369
6370 /* If all arg slots are filled, then must pass on stack. */
6371 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6372 return -1;
6373
6374 /* If there are only int args and all int arg slots are filled,
6375 then must pass on stack. */
6376 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6377 return -1;
6378
6379 /* Note that even if all int arg slots are filled, fp members may
6380 still be passed in regs if such regs are available.
6381 *PREGNO isn't set because there may be more than one, it's up
6382 to the caller to compute them. */
6383 return slotno;
6384 }
6385 break;
6386
6387 default :
6388 gcc_unreachable ();
6389 }
6390
6391 *pregno = regno;
6392 return slotno;
6393 }
6394
6395 /* Handle recursive register counting for structure field layout. */
6396
6397 struct function_arg_record_value_parms
6398 {
6399 rtx ret; /* return expression being built. */
6400 int slotno; /* slot number of the argument. */
6401 int named; /* whether the argument is named. */
6402 int regbase; /* regno of the base register. */
6403 int stack; /* 1 if part of the argument is on the stack. */
6404 int intoffset; /* offset of the first pending integer field. */
6405 unsigned int nregs; /* number of words passed in registers. */
6406 };
6407
6408 static void function_arg_record_value_3
6409 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6410 static void function_arg_record_value_2
6411 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6412 static void function_arg_record_value_1
6413 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6414 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6415 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6416
6417 /* A subroutine of function_arg_record_value. Traverse the structure
6418 recursively and determine how many registers will be required. */
6419
6420 static void
6421 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6422 struct function_arg_record_value_parms *parms,
6423 bool packed_p)
6424 {
6425 tree field;
6426
6427 /* We need to compute how many registers are needed so we can
6428 allocate the PARALLEL but before we can do that we need to know
6429 whether there are any packed fields. The ABI obviously doesn't
6430 specify how structures are passed in this case, so they are
6431 defined to be passed in int regs if possible, otherwise memory,
6432 regardless of whether there are fp values present. */
6433
6434 if (! packed_p)
6435 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6436 {
6437 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6438 {
6439 packed_p = true;
6440 break;
6441 }
6442 }
6443
6444 /* Compute how many registers we need. */
6445 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6446 {
6447 if (TREE_CODE (field) == FIELD_DECL)
6448 {
6449 HOST_WIDE_INT bitpos = startbitpos;
6450
6451 if (DECL_SIZE (field) != 0)
6452 {
6453 if (integer_zerop (DECL_SIZE (field)))
6454 continue;
6455
6456 if (tree_fits_uhwi_p (bit_position (field)))
6457 bitpos += int_bit_position (field);
6458 }
6459
6460 /* ??? FIXME: else assume zero offset. */
6461
6462 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6463 function_arg_record_value_1 (TREE_TYPE (field),
6464 bitpos,
6465 parms,
6466 packed_p);
6467 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6468 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6469 && TARGET_FPU
6470 && parms->named
6471 && ! packed_p)
6472 {
6473 if (parms->intoffset != -1)
6474 {
6475 unsigned int startbit, endbit;
6476 int intslots, this_slotno;
6477
6478 startbit = parms->intoffset & -BITS_PER_WORD;
6479 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6480
6481 intslots = (endbit - startbit) / BITS_PER_WORD;
6482 this_slotno = parms->slotno + parms->intoffset
6483 / BITS_PER_WORD;
6484
6485 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6486 {
6487 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6488 /* We need to pass this field on the stack. */
6489 parms->stack = 1;
6490 }
6491
6492 parms->nregs += intslots;
6493 parms->intoffset = -1;
6494 }
6495
6496 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6497 If it wasn't true we wouldn't be here. */
6498 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6499 && DECL_MODE (field) == BLKmode)
6500 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6501 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6502 parms->nregs += 2;
6503 else
6504 parms->nregs += 1;
6505 }
6506 else
6507 {
6508 if (parms->intoffset == -1)
6509 parms->intoffset = bitpos;
6510 }
6511 }
6512 }
6513 }
6514
6515 /* A subroutine of function_arg_record_value. Assign the bits of the
6516 structure between parms->intoffset and bitpos to integer registers. */
6517
6518 static void
6519 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6520 struct function_arg_record_value_parms *parms)
6521 {
6522 enum machine_mode mode;
6523 unsigned int regno;
6524 unsigned int startbit, endbit;
6525 int this_slotno, intslots, intoffset;
6526 rtx reg;
6527
6528 if (parms->intoffset == -1)
6529 return;
6530
6531 intoffset = parms->intoffset;
6532 parms->intoffset = -1;
6533
6534 startbit = intoffset & -BITS_PER_WORD;
6535 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6536 intslots = (endbit - startbit) / BITS_PER_WORD;
6537 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6538
6539 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6540 if (intslots <= 0)
6541 return;
6542
6543 /* If this is the trailing part of a word, only load that much into
6544 the register. Otherwise load the whole register. Note that in
6545 the latter case we may pick up unwanted bits. It's not a problem
6546 at the moment but may wish to revisit. */
6547
6548 if (intoffset % BITS_PER_WORD != 0)
6549 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6550 MODE_INT);
6551 else
6552 mode = word_mode;
6553
6554 intoffset /= BITS_PER_UNIT;
6555 do
6556 {
6557 regno = parms->regbase + this_slotno;
6558 reg = gen_rtx_REG (mode, regno);
6559 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6560 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6561
6562 this_slotno += 1;
6563 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6564 mode = word_mode;
6565 parms->nregs += 1;
6566 intslots -= 1;
6567 }
6568 while (intslots > 0);
6569 }
6570
6571 /* A subroutine of function_arg_record_value. Traverse the structure
6572 recursively and assign bits to floating point registers. Track which
6573 bits in between need integer registers; invoke function_arg_record_value_3
6574 to make that happen. */
6575
6576 static void
6577 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6578 struct function_arg_record_value_parms *parms,
6579 bool packed_p)
6580 {
6581 tree field;
6582
6583 if (! packed_p)
6584 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6585 {
6586 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6587 {
6588 packed_p = true;
6589 break;
6590 }
6591 }
6592
6593 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6594 {
6595 if (TREE_CODE (field) == FIELD_DECL)
6596 {
6597 HOST_WIDE_INT bitpos = startbitpos;
6598
6599 if (DECL_SIZE (field) != 0)
6600 {
6601 if (integer_zerop (DECL_SIZE (field)))
6602 continue;
6603
6604 if (tree_fits_uhwi_p (bit_position (field)))
6605 bitpos += int_bit_position (field);
6606 }
6607
6608 /* ??? FIXME: else assume zero offset. */
6609
6610 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6611 function_arg_record_value_2 (TREE_TYPE (field),
6612 bitpos,
6613 parms,
6614 packed_p);
6615 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6616 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6617 && TARGET_FPU
6618 && parms->named
6619 && ! packed_p)
6620 {
6621 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6622 int regno, nregs, pos;
6623 enum machine_mode mode = DECL_MODE (field);
6624 rtx reg;
6625
6626 function_arg_record_value_3 (bitpos, parms);
6627
6628 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6629 && mode == BLKmode)
6630 {
6631 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6632 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6633 }
6634 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6635 {
6636 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6637 nregs = 2;
6638 }
6639 else
6640 nregs = 1;
6641
6642 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6643 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6644 regno++;
6645 reg = gen_rtx_REG (mode, regno);
6646 pos = bitpos / BITS_PER_UNIT;
6647 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6648 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6649 parms->nregs += 1;
6650 while (--nregs > 0)
6651 {
6652 regno += GET_MODE_SIZE (mode) / 4;
6653 reg = gen_rtx_REG (mode, regno);
6654 pos += GET_MODE_SIZE (mode);
6655 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6656 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6657 parms->nregs += 1;
6658 }
6659 }
6660 else
6661 {
6662 if (parms->intoffset == -1)
6663 parms->intoffset = bitpos;
6664 }
6665 }
6666 }
6667 }
6668
6669 /* Used by function_arg and sparc_function_value_1 to implement the complex
6670 conventions of the 64-bit ABI for passing and returning structures.
6671 Return an expression valid as a return value for the FUNCTION_ARG
6672 and TARGET_FUNCTION_VALUE.
6673
6674 TYPE is the data type of the argument (as a tree).
6675 This is null for libcalls where that information may
6676 not be available.
6677 MODE is the argument's machine mode.
6678 SLOTNO is the index number of the argument's slot in the parameter array.
6679 NAMED is nonzero if this argument is a named parameter
6680 (otherwise it is an extra parameter matching an ellipsis).
6681 REGBASE is the regno of the base register for the parameter array. */
6682
6683 static rtx
6684 function_arg_record_value (const_tree type, enum machine_mode mode,
6685 int slotno, int named, int regbase)
6686 {
6687 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6688 struct function_arg_record_value_parms parms;
6689 unsigned int nregs;
6690
6691 parms.ret = NULL_RTX;
6692 parms.slotno = slotno;
6693 parms.named = named;
6694 parms.regbase = regbase;
6695 parms.stack = 0;
6696
6697 /* Compute how many registers we need. */
6698 parms.nregs = 0;
6699 parms.intoffset = 0;
6700 function_arg_record_value_1 (type, 0, &parms, false);
6701
6702 /* Take into account pending integer fields. */
6703 if (parms.intoffset != -1)
6704 {
6705 unsigned int startbit, endbit;
6706 int intslots, this_slotno;
6707
6708 startbit = parms.intoffset & -BITS_PER_WORD;
6709 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6710 intslots = (endbit - startbit) / BITS_PER_WORD;
6711 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6712
6713 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6714 {
6715 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6716 /* We need to pass this field on the stack. */
6717 parms.stack = 1;
6718 }
6719
6720 parms.nregs += intslots;
6721 }
6722 nregs = parms.nregs;
6723
6724 /* Allocate the vector and handle some annoying special cases. */
6725 if (nregs == 0)
6726 {
6727 /* ??? Empty structure has no value? Duh? */
6728 if (typesize <= 0)
6729 {
6730 /* Though there's nothing really to store, return a word register
6731 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6732 leads to breakage due to the fact that there are zero bytes to
6733 load. */
6734 return gen_rtx_REG (mode, regbase);
6735 }
6736 else
6737 {
6738 /* ??? C++ has structures with no fields, and yet a size. Give up
6739 for now and pass everything back in integer registers. */
6740 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6741 }
6742 if (nregs + slotno > SPARC_INT_ARG_MAX)
6743 nregs = SPARC_INT_ARG_MAX - slotno;
6744 }
6745 gcc_assert (nregs != 0);
6746
6747 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6748
6749 /* If at least one field must be passed on the stack, generate
6750 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6751 also be passed on the stack. We can't do much better because the
6752 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6753 of structures for which the fields passed exclusively in registers
6754 are not at the beginning of the structure. */
6755 if (parms.stack)
6756 XVECEXP (parms.ret, 0, 0)
6757 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6758
6759 /* Fill in the entries. */
6760 parms.nregs = 0;
6761 parms.intoffset = 0;
6762 function_arg_record_value_2 (type, 0, &parms, false);
6763 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6764
6765 gcc_assert (parms.nregs == nregs);
6766
6767 return parms.ret;
6768 }
6769
6770 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6771 of the 64-bit ABI for passing and returning unions.
6772 Return an expression valid as a return value for the FUNCTION_ARG
6773 and TARGET_FUNCTION_VALUE.
6774
6775 SIZE is the size in bytes of the union.
6776 MODE is the argument's machine mode.
6777 REGNO is the hard register the union will be passed in. */
6778
6779 static rtx
6780 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6781 int regno)
6782 {
6783 int nwords = ROUND_ADVANCE (size), i;
6784 rtx regs;
6785
6786 /* See comment in previous function for empty structures. */
6787 if (nwords == 0)
6788 return gen_rtx_REG (mode, regno);
6789
6790 if (slotno == SPARC_INT_ARG_MAX - 1)
6791 nwords = 1;
6792
6793 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6794
6795 for (i = 0; i < nwords; i++)
6796 {
6797 /* Unions are passed left-justified. */
6798 XVECEXP (regs, 0, i)
6799 = gen_rtx_EXPR_LIST (VOIDmode,
6800 gen_rtx_REG (word_mode, regno),
6801 GEN_INT (UNITS_PER_WORD * i));
6802 regno++;
6803 }
6804
6805 return regs;
6806 }
6807
6808 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6809 for passing and returning large (BLKmode) vectors.
6810 Return an expression valid as a return value for the FUNCTION_ARG
6811 and TARGET_FUNCTION_VALUE.
6812
6813 SIZE is the size in bytes of the vector (at least 8 bytes).
6814 REGNO is the FP hard register the vector will be passed in. */
6815
6816 static rtx
6817 function_arg_vector_value (int size, int regno)
6818 {
6819 int i, nregs = size / 8;
6820 rtx regs;
6821
6822 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6823
6824 for (i = 0; i < nregs; i++)
6825 {
6826 XVECEXP (regs, 0, i)
6827 = gen_rtx_EXPR_LIST (VOIDmode,
6828 gen_rtx_REG (DImode, regno + 2*i),
6829 GEN_INT (i*8));
6830 }
6831
6832 return regs;
6833 }
6834
6835 /* Determine where to put an argument to a function.
6836 Value is zero to push the argument on the stack,
6837 or a hard register in which to store the argument.
6838
6839 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6840 the preceding args and about the function being called.
6841 MODE is the argument's machine mode.
6842 TYPE is the data type of the argument (as a tree).
6843 This is null for libcalls where that information may
6844 not be available.
6845 NAMED is true if this argument is a named parameter
6846 (otherwise it is an extra parameter matching an ellipsis).
6847 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6848 TARGET_FUNCTION_INCOMING_ARG. */
6849
6850 static rtx
6851 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6852 const_tree type, bool named, bool incoming_p)
6853 {
6854 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6855
6856 int regbase = (incoming_p
6857 ? SPARC_INCOMING_INT_ARG_FIRST
6858 : SPARC_OUTGOING_INT_ARG_FIRST);
6859 int slotno, regno, padding;
6860 enum mode_class mclass = GET_MODE_CLASS (mode);
6861
6862 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6863 &regno, &padding);
6864 if (slotno == -1)
6865 return 0;
6866
6867 /* Vector types deserve special treatment because they are polymorphic wrt
6868 their mode, depending upon whether VIS instructions are enabled. */
6869 if (type && TREE_CODE (type) == VECTOR_TYPE)
6870 {
6871 HOST_WIDE_INT size = int_size_in_bytes (type);
6872 gcc_assert ((TARGET_ARCH32 && size <= 8)
6873 || (TARGET_ARCH64 && size <= 16));
6874
6875 if (mode == BLKmode)
6876 return function_arg_vector_value (size,
6877 SPARC_FP_ARG_FIRST + 2*slotno);
6878 else
6879 mclass = MODE_FLOAT;
6880 }
6881
6882 if (TARGET_ARCH32)
6883 return gen_rtx_REG (mode, regno);
6884
6885 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6886 and are promoted to registers if possible. */
6887 if (type && TREE_CODE (type) == RECORD_TYPE)
6888 {
6889 HOST_WIDE_INT size = int_size_in_bytes (type);
6890 gcc_assert (size <= 16);
6891
6892 return function_arg_record_value (type, mode, slotno, named, regbase);
6893 }
6894
6895 /* Unions up to 16 bytes in size are passed in integer registers. */
6896 else if (type && TREE_CODE (type) == UNION_TYPE)
6897 {
6898 HOST_WIDE_INT size = int_size_in_bytes (type);
6899 gcc_assert (size <= 16);
6900
6901 return function_arg_union_value (size, mode, slotno, regno);
6902 }
6903
6904 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6905 but also have the slot allocated for them.
6906 If no prototype is in scope fp values in register slots get passed
6907 in two places, either fp regs and int regs or fp regs and memory. */
6908 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6909 && SPARC_FP_REG_P (regno))
6910 {
6911 rtx reg = gen_rtx_REG (mode, regno);
6912 if (cum->prototype_p || cum->libcall_p)
6913 {
6914 /* "* 2" because fp reg numbers are recorded in 4 byte
6915 quantities. */
6916 #if 0
6917 /* ??? This will cause the value to be passed in the fp reg and
6918 in the stack. When a prototype exists we want to pass the
6919 value in the reg but reserve space on the stack. That's an
6920 optimization, and is deferred [for a bit]. */
6921 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6922 return gen_rtx_PARALLEL (mode,
6923 gen_rtvec (2,
6924 gen_rtx_EXPR_LIST (VOIDmode,
6925 NULL_RTX, const0_rtx),
6926 gen_rtx_EXPR_LIST (VOIDmode,
6927 reg, const0_rtx)));
6928 else
6929 #else
6930 /* ??? It seems that passing back a register even when past
6931 the area declared by REG_PARM_STACK_SPACE will allocate
6932 space appropriately, and will not copy the data onto the
6933 stack, exactly as we desire.
6934
6935 This is due to locate_and_pad_parm being called in
6936 expand_call whenever reg_parm_stack_space > 0, which
6937 while beneficial to our example here, would seem to be
6938 in error from what had been intended. Ho hum... -- r~ */
6939 #endif
6940 return reg;
6941 }
6942 else
6943 {
6944 rtx v0, v1;
6945
6946 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6947 {
6948 int intreg;
6949
6950 /* On incoming, we don't need to know that the value
6951 is passed in %f0 and %i0, and it confuses other parts
6952 causing needless spillage even on the simplest cases. */
6953 if (incoming_p)
6954 return reg;
6955
6956 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6957 + (regno - SPARC_FP_ARG_FIRST) / 2);
6958
6959 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6960 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6961 const0_rtx);
6962 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6963 }
6964 else
6965 {
6966 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6967 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6968 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6969 }
6970 }
6971 }
6972
6973 /* All other aggregate types are passed in an integer register in a mode
6974 corresponding to the size of the type. */
6975 else if (type && AGGREGATE_TYPE_P (type))
6976 {
6977 HOST_WIDE_INT size = int_size_in_bytes (type);
6978 gcc_assert (size <= 16);
6979
6980 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6981 }
6982
6983 return gen_rtx_REG (mode, regno);
6984 }
6985
6986 /* Handle the TARGET_FUNCTION_ARG target hook. */
6987
6988 static rtx
6989 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6990 const_tree type, bool named)
6991 {
6992 return sparc_function_arg_1 (cum, mode, type, named, false);
6993 }
6994
6995 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6996
6997 static rtx
6998 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6999 const_tree type, bool named)
7000 {
7001 return sparc_function_arg_1 (cum, mode, type, named, true);
7002 }
7003
7004 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7005
7006 static unsigned int
7007 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
7008 {
7009 return ((TARGET_ARCH64
7010 && (GET_MODE_ALIGNMENT (mode) == 128
7011 || (type && TYPE_ALIGN (type) == 128)))
7012 ? 128
7013 : PARM_BOUNDARY);
7014 }
7015
7016 /* For an arg passed partly in registers and partly in memory,
7017 this is the number of bytes of registers used.
7018 For args passed entirely in registers or entirely in memory, zero.
7019
7020 Any arg that starts in the first 6 regs but won't entirely fit in them
7021 needs partial registers on v8. On v9, structures with integer
7022 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7023 values that begin in the last fp reg [where "last fp reg" varies with the
7024 mode] will be split between that reg and memory. */
7025
7026 static int
7027 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
7028 tree type, bool named)
7029 {
7030 int slotno, regno, padding;
7031
7032 /* We pass false for incoming_p here, it doesn't matter. */
7033 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7034 false, &regno, &padding);
7035
7036 if (slotno == -1)
7037 return 0;
7038
7039 if (TARGET_ARCH32)
7040 {
7041 if ((slotno + (mode == BLKmode
7042 ? ROUND_ADVANCE (int_size_in_bytes (type))
7043 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
7044 > SPARC_INT_ARG_MAX)
7045 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7046 }
7047 else
7048 {
7049 /* We are guaranteed by pass_by_reference that the size of the
7050 argument is not greater than 16 bytes, so we only need to return
7051 one word if the argument is partially passed in registers. */
7052
7053 if (type && AGGREGATE_TYPE_P (type))
7054 {
7055 int size = int_size_in_bytes (type);
7056
7057 if (size > UNITS_PER_WORD
7058 && slotno == SPARC_INT_ARG_MAX - 1)
7059 return UNITS_PER_WORD;
7060 }
7061 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7062 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7063 && ! (TARGET_FPU && named)))
7064 {
7065 /* The complex types are passed as packed types. */
7066 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7067 && slotno == SPARC_INT_ARG_MAX - 1)
7068 return UNITS_PER_WORD;
7069 }
7070 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7071 {
7072 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7073 > SPARC_FP_ARG_MAX)
7074 return UNITS_PER_WORD;
7075 }
7076 }
7077
7078 return 0;
7079 }
7080
7081 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7082 Specify whether to pass the argument by reference. */
7083
7084 static bool
7085 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7086 enum machine_mode mode, const_tree type,
7087 bool named ATTRIBUTE_UNUSED)
7088 {
7089 if (TARGET_ARCH32)
7090 /* Original SPARC 32-bit ABI says that structures and unions,
7091 and quad-precision floats are passed by reference. For Pascal,
7092 also pass arrays by reference. All other base types are passed
7093 in registers.
7094
7095 Extended ABI (as implemented by the Sun compiler) says that all
7096 complex floats are passed by reference. Pass complex integers
7097 in registers up to 8 bytes. More generally, enforce the 2-word
7098 cap for passing arguments in registers.
7099
7100 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7101 integers are passed like floats of the same size, that is in
7102 registers up to 8 bytes. Pass all vector floats by reference
7103 like structure and unions. */
7104 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7105 || mode == SCmode
7106 /* Catch CDImode, TFmode, DCmode and TCmode. */
7107 || GET_MODE_SIZE (mode) > 8
7108 || (type
7109 && TREE_CODE (type) == VECTOR_TYPE
7110 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7111 else
7112 /* Original SPARC 64-bit ABI says that structures and unions
7113 smaller than 16 bytes are passed in registers, as well as
7114 all other base types.
7115
7116 Extended ABI (as implemented by the Sun compiler) says that
7117 complex floats are passed in registers up to 16 bytes. Pass
7118 all complex integers in registers up to 16 bytes. More generally,
7119 enforce the 2-word cap for passing arguments in registers.
7120
7121 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7122 integers are passed like floats of the same size, that is in
7123 registers (up to 16 bytes). Pass all vector floats like structure
7124 and unions. */
7125 return ((type
7126 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7127 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7128 /* Catch CTImode and TCmode. */
7129 || GET_MODE_SIZE (mode) > 16);
7130 }
7131
7132 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7133 Update the data in CUM to advance over an argument
7134 of mode MODE and data type TYPE.
7135 TYPE is null for libcalls where that information may not be available. */
7136
7137 static void
7138 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7139 const_tree type, bool named)
7140 {
7141 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7142 int regno, padding;
7143
7144 /* We pass false for incoming_p here, it doesn't matter. */
7145 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7146
7147 /* If argument requires leading padding, add it. */
7148 cum->words += padding;
7149
7150 if (TARGET_ARCH32)
7151 {
7152 cum->words += (mode != BLKmode
7153 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7154 : ROUND_ADVANCE (int_size_in_bytes (type)));
7155 }
7156 else
7157 {
7158 if (type && AGGREGATE_TYPE_P (type))
7159 {
7160 int size = int_size_in_bytes (type);
7161
7162 if (size <= 8)
7163 ++cum->words;
7164 else if (size <= 16)
7165 cum->words += 2;
7166 else /* passed by reference */
7167 ++cum->words;
7168 }
7169 else
7170 {
7171 cum->words += (mode != BLKmode
7172 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7173 : ROUND_ADVANCE (int_size_in_bytes (type)));
7174 }
7175 }
7176 }
7177
7178 /* Handle the FUNCTION_ARG_PADDING macro.
7179 For the 64 bit ABI structs are always stored left shifted in their
7180 argument slot. */
7181
7182 enum direction
7183 function_arg_padding (enum machine_mode mode, const_tree type)
7184 {
7185 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7186 return upward;
7187
7188 /* Fall back to the default. */
7189 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7190 }
7191
7192 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7193 Specify whether to return the return value in memory. */
7194
7195 static bool
7196 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7197 {
7198 if (TARGET_ARCH32)
7199 /* Original SPARC 32-bit ABI says that structures and unions,
7200 and quad-precision floats are returned in memory. All other
7201 base types are returned in registers.
7202
7203 Extended ABI (as implemented by the Sun compiler) says that
7204 all complex floats are returned in registers (8 FP registers
7205 at most for '_Complex long double'). Return all complex integers
7206 in registers (4 at most for '_Complex long long').
7207
7208 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7209 integers are returned like floats of the same size, that is in
7210 registers up to 8 bytes and in memory otherwise. Return all
7211 vector floats in memory like structure and unions; note that
7212 they always have BLKmode like the latter. */
7213 return (TYPE_MODE (type) == BLKmode
7214 || TYPE_MODE (type) == TFmode
7215 || (TREE_CODE (type) == VECTOR_TYPE
7216 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7217 else
7218 /* Original SPARC 64-bit ABI says that structures and unions
7219 smaller than 32 bytes are returned in registers, as well as
7220 all other base types.
7221
7222 Extended ABI (as implemented by the Sun compiler) says that all
7223 complex floats are returned in registers (8 FP registers at most
7224 for '_Complex long double'). Return all complex integers in
7225 registers (4 at most for '_Complex TItype').
7226
7227 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7228 integers are returned like floats of the same size, that is in
7229 registers. Return all vector floats like structure and unions;
7230 note that they always have BLKmode like the latter. */
7231 return (TYPE_MODE (type) == BLKmode
7232 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7233 }
7234
7235 /* Handle the TARGET_STRUCT_VALUE target hook.
7236 Return where to find the structure return value address. */
7237
7238 static rtx
7239 sparc_struct_value_rtx (tree fndecl, int incoming)
7240 {
7241 if (TARGET_ARCH64)
7242 return 0;
7243 else
7244 {
7245 rtx mem;
7246
7247 if (incoming)
7248 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7249 STRUCT_VALUE_OFFSET));
7250 else
7251 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7252 STRUCT_VALUE_OFFSET));
7253
7254 /* Only follow the SPARC ABI for fixed-size structure returns.
7255 Variable size structure returns are handled per the normal
7256 procedures in GCC. This is enabled by -mstd-struct-return */
7257 if (incoming == 2
7258 && sparc_std_struct_return
7259 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7260 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7261 {
7262 /* We must check and adjust the return address, as it is
7263 optional as to whether the return object is really
7264 provided. */
7265 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7266 rtx scratch = gen_reg_rtx (SImode);
7267 rtx endlab = gen_label_rtx ();
7268
7269 /* Calculate the return object size */
7270 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7271 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7272 /* Construct a temporary return value */
7273 rtx temp_val
7274 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7275
7276 /* Implement SPARC 32-bit psABI callee return struct checking:
7277
7278 Fetch the instruction where we will return to and see if
7279 it's an unimp instruction (the most significant 10 bits
7280 will be zero). */
7281 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7282 plus_constant (Pmode,
7283 ret_reg, 8)));
7284 /* Assume the size is valid and pre-adjust */
7285 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7286 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7287 0, endlab);
7288 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7289 /* Write the address of the memory pointed to by temp_val into
7290 the memory pointed to by mem */
7291 emit_move_insn (mem, XEXP (temp_val, 0));
7292 emit_label (endlab);
7293 }
7294
7295 return mem;
7296 }
7297 }
7298
7299 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7300 For v9, function return values are subject to the same rules as arguments,
7301 except that up to 32 bytes may be returned in registers. */
7302
7303 static rtx
7304 sparc_function_value_1 (const_tree type, enum machine_mode mode,
7305 bool outgoing)
7306 {
7307 /* Beware that the two values are swapped here wrt function_arg. */
7308 int regbase = (outgoing
7309 ? SPARC_INCOMING_INT_ARG_FIRST
7310 : SPARC_OUTGOING_INT_ARG_FIRST);
7311 enum mode_class mclass = GET_MODE_CLASS (mode);
7312 int regno;
7313
7314 /* Vector types deserve special treatment because they are polymorphic wrt
7315 their mode, depending upon whether VIS instructions are enabled. */
7316 if (type && TREE_CODE (type) == VECTOR_TYPE)
7317 {
7318 HOST_WIDE_INT size = int_size_in_bytes (type);
7319 gcc_assert ((TARGET_ARCH32 && size <= 8)
7320 || (TARGET_ARCH64 && size <= 32));
7321
7322 if (mode == BLKmode)
7323 return function_arg_vector_value (size,
7324 SPARC_FP_ARG_FIRST);
7325 else
7326 mclass = MODE_FLOAT;
7327 }
7328
7329 if (TARGET_ARCH64 && type)
7330 {
7331 /* Structures up to 32 bytes in size are returned in registers. */
7332 if (TREE_CODE (type) == RECORD_TYPE)
7333 {
7334 HOST_WIDE_INT size = int_size_in_bytes (type);
7335 gcc_assert (size <= 32);
7336
7337 return function_arg_record_value (type, mode, 0, 1, regbase);
7338 }
7339
7340 /* Unions up to 32 bytes in size are returned in integer registers. */
7341 else if (TREE_CODE (type) == UNION_TYPE)
7342 {
7343 HOST_WIDE_INT size = int_size_in_bytes (type);
7344 gcc_assert (size <= 32);
7345
7346 return function_arg_union_value (size, mode, 0, regbase);
7347 }
7348
7349 /* Objects that require it are returned in FP registers. */
7350 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7351 ;
7352
7353 /* All other aggregate types are returned in an integer register in a
7354 mode corresponding to the size of the type. */
7355 else if (AGGREGATE_TYPE_P (type))
7356 {
7357 /* All other aggregate types are passed in an integer register
7358 in a mode corresponding to the size of the type. */
7359 HOST_WIDE_INT size = int_size_in_bytes (type);
7360 gcc_assert (size <= 32);
7361
7362 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7363
7364 /* ??? We probably should have made the same ABI change in
7365 3.4.0 as the one we made for unions. The latter was
7366 required by the SCD though, while the former is not
7367 specified, so we favored compatibility and efficiency.
7368
7369 Now we're stuck for aggregates larger than 16 bytes,
7370 because OImode vanished in the meantime. Let's not
7371 try to be unduly clever, and simply follow the ABI
7372 for unions in that case. */
7373 if (mode == BLKmode)
7374 return function_arg_union_value (size, mode, 0, regbase);
7375 else
7376 mclass = MODE_INT;
7377 }
7378
7379 /* We should only have pointer and integer types at this point. This
7380 must match sparc_promote_function_mode. */
7381 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7382 mode = word_mode;
7383 }
7384
7385 /* We should only have pointer and integer types at this point. This must
7386 match sparc_promote_function_mode. */
7387 else if (TARGET_ARCH32
7388 && mclass == MODE_INT
7389 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7390 mode = word_mode;
7391
7392 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7393 regno = SPARC_FP_ARG_FIRST;
7394 else
7395 regno = regbase;
7396
7397 return gen_rtx_REG (mode, regno);
7398 }
7399
7400 /* Handle TARGET_FUNCTION_VALUE.
7401 On the SPARC, the value is found in the first "output" register, but the
7402 called function leaves it in the first "input" register. */
7403
7404 static rtx
7405 sparc_function_value (const_tree valtype,
7406 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7407 bool outgoing)
7408 {
7409 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7410 }
7411
7412 /* Handle TARGET_LIBCALL_VALUE. */
7413
7414 static rtx
7415 sparc_libcall_value (enum machine_mode mode,
7416 const_rtx fun ATTRIBUTE_UNUSED)
7417 {
7418 return sparc_function_value_1 (NULL_TREE, mode, false);
7419 }
7420
7421 /* Handle FUNCTION_VALUE_REGNO_P.
7422 On the SPARC, the first "output" reg is used for integer values, and the
7423 first floating point register is used for floating point values. */
7424
7425 static bool
7426 sparc_function_value_regno_p (const unsigned int regno)
7427 {
7428 return (regno == 8 || regno == 32);
7429 }
7430
7431 /* Do what is necessary for `va_start'. We look at the current function
7432 to determine if stdarg or varargs is used and return the address of
7433 the first unnamed parameter. */
7434
7435 static rtx
7436 sparc_builtin_saveregs (void)
7437 {
7438 int first_reg = crtl->args.info.words;
7439 rtx address;
7440 int regno;
7441
7442 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7443 emit_move_insn (gen_rtx_MEM (word_mode,
7444 gen_rtx_PLUS (Pmode,
7445 frame_pointer_rtx,
7446 GEN_INT (FIRST_PARM_OFFSET (0)
7447 + (UNITS_PER_WORD
7448 * regno)))),
7449 gen_rtx_REG (word_mode,
7450 SPARC_INCOMING_INT_ARG_FIRST + regno));
7451
7452 address = gen_rtx_PLUS (Pmode,
7453 frame_pointer_rtx,
7454 GEN_INT (FIRST_PARM_OFFSET (0)
7455 + UNITS_PER_WORD * first_reg));
7456
7457 return address;
7458 }
7459
7460 /* Implement `va_start' for stdarg. */
7461
7462 static void
7463 sparc_va_start (tree valist, rtx nextarg)
7464 {
7465 nextarg = expand_builtin_saveregs ();
7466 std_expand_builtin_va_start (valist, nextarg);
7467 }
7468
7469 /* Implement `va_arg' for stdarg. */
7470
7471 static tree
7472 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7473 gimple_seq *post_p)
7474 {
7475 HOST_WIDE_INT size, rsize, align;
7476 tree addr, incr;
7477 bool indirect;
7478 tree ptrtype = build_pointer_type (type);
7479
7480 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7481 {
7482 indirect = true;
7483 size = rsize = UNITS_PER_WORD;
7484 align = 0;
7485 }
7486 else
7487 {
7488 indirect = false;
7489 size = int_size_in_bytes (type);
7490 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7491 align = 0;
7492
7493 if (TARGET_ARCH64)
7494 {
7495 /* For SPARC64, objects requiring 16-byte alignment get it. */
7496 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7497 align = 2 * UNITS_PER_WORD;
7498
7499 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7500 are left-justified in their slots. */
7501 if (AGGREGATE_TYPE_P (type))
7502 {
7503 if (size == 0)
7504 size = rsize = UNITS_PER_WORD;
7505 else
7506 size = rsize;
7507 }
7508 }
7509 }
7510
7511 incr = valist;
7512 if (align)
7513 {
7514 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7515 incr = fold_convert (sizetype, incr);
7516 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7517 size_int (-align));
7518 incr = fold_convert (ptr_type_node, incr);
7519 }
7520
7521 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7522 addr = incr;
7523
7524 if (BYTES_BIG_ENDIAN && size < rsize)
7525 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7526
7527 if (indirect)
7528 {
7529 addr = fold_convert (build_pointer_type (ptrtype), addr);
7530 addr = build_va_arg_indirect_ref (addr);
7531 }
7532
7533 /* If the address isn't aligned properly for the type, we need a temporary.
7534 FIXME: This is inefficient, usually we can do this in registers. */
7535 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7536 {
7537 tree tmp = create_tmp_var (type, "va_arg_tmp");
7538 tree dest_addr = build_fold_addr_expr (tmp);
7539 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7540 3, dest_addr, addr, size_int (rsize));
7541 TREE_ADDRESSABLE (tmp) = 1;
7542 gimplify_and_add (copy, pre_p);
7543 addr = dest_addr;
7544 }
7545
7546 else
7547 addr = fold_convert (ptrtype, addr);
7548
7549 incr = fold_build_pointer_plus_hwi (incr, rsize);
7550 gimplify_assign (valist, incr, post_p);
7551
7552 return build_va_arg_indirect_ref (addr);
7553 }
7554 \f
7555 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7556 Specify whether the vector mode is supported by the hardware. */
7557
7558 static bool
7559 sparc_vector_mode_supported_p (enum machine_mode mode)
7560 {
7561 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7562 }
7563 \f
7564 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7565
7566 static enum machine_mode
7567 sparc_preferred_simd_mode (enum machine_mode mode)
7568 {
7569 if (TARGET_VIS)
7570 switch (mode)
7571 {
7572 case SImode:
7573 return V2SImode;
7574 case HImode:
7575 return V4HImode;
7576 case QImode:
7577 return V8QImode;
7578
7579 default:;
7580 }
7581
7582 return word_mode;
7583 }
7584 \f
7585 /* Return the string to output an unconditional branch to LABEL, which is
7586 the operand number of the label.
7587
7588 DEST is the destination insn (i.e. the label), INSN is the source. */
7589
7590 const char *
7591 output_ubranch (rtx dest, rtx_insn *insn)
7592 {
7593 static char string[64];
7594 bool v9_form = false;
7595 int delta;
7596 char *p;
7597
7598 /* Even if we are trying to use cbcond for this, evaluate
7599 whether we can use V9 branches as our backup plan. */
7600
7601 delta = 5000000;
7602 if (INSN_ADDRESSES_SET_P ())
7603 delta = (INSN_ADDRESSES (INSN_UID (dest))
7604 - INSN_ADDRESSES (INSN_UID (insn)));
7605
7606 /* Leave some instructions for "slop". */
7607 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7608 v9_form = true;
7609
7610 if (TARGET_CBCOND)
7611 {
7612 bool emit_nop = emit_cbcond_nop (insn);
7613 bool far = false;
7614 const char *rval;
7615
7616 if (delta < -500 || delta > 500)
7617 far = true;
7618
7619 if (far)
7620 {
7621 if (v9_form)
7622 rval = "ba,a,pt\t%%xcc, %l0";
7623 else
7624 rval = "b,a\t%l0";
7625 }
7626 else
7627 {
7628 if (emit_nop)
7629 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7630 else
7631 rval = "cwbe\t%%g0, %%g0, %l0";
7632 }
7633 return rval;
7634 }
7635
7636 if (v9_form)
7637 strcpy (string, "ba%*,pt\t%%xcc, ");
7638 else
7639 strcpy (string, "b%*\t");
7640
7641 p = strchr (string, '\0');
7642 *p++ = '%';
7643 *p++ = 'l';
7644 *p++ = '0';
7645 *p++ = '%';
7646 *p++ = '(';
7647 *p = '\0';
7648
7649 return string;
7650 }
7651
7652 /* Return the string to output a conditional branch to LABEL, which is
7653 the operand number of the label. OP is the conditional expression.
7654 XEXP (OP, 0) is assumed to be a condition code register (integer or
7655 floating point) and its mode specifies what kind of comparison we made.
7656
7657 DEST is the destination insn (i.e. the label), INSN is the source.
7658
7659 REVERSED is nonzero if we should reverse the sense of the comparison.
7660
7661 ANNUL is nonzero if we should generate an annulling branch. */
7662
7663 const char *
7664 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7665 rtx_insn *insn)
7666 {
7667 static char string[64];
7668 enum rtx_code code = GET_CODE (op);
7669 rtx cc_reg = XEXP (op, 0);
7670 enum machine_mode mode = GET_MODE (cc_reg);
7671 const char *labelno, *branch;
7672 int spaces = 8, far;
7673 char *p;
7674
7675 /* v9 branches are limited to +-1MB. If it is too far away,
7676 change
7677
7678 bne,pt %xcc, .LC30
7679
7680 to
7681
7682 be,pn %xcc, .+12
7683 nop
7684 ba .LC30
7685
7686 and
7687
7688 fbne,a,pn %fcc2, .LC29
7689
7690 to
7691
7692 fbe,pt %fcc2, .+16
7693 nop
7694 ba .LC29 */
7695
7696 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7697 if (reversed ^ far)
7698 {
7699 /* Reversal of FP compares takes care -- an ordered compare
7700 becomes an unordered compare and vice versa. */
7701 if (mode == CCFPmode || mode == CCFPEmode)
7702 code = reverse_condition_maybe_unordered (code);
7703 else
7704 code = reverse_condition (code);
7705 }
7706
7707 /* Start by writing the branch condition. */
7708 if (mode == CCFPmode || mode == CCFPEmode)
7709 {
7710 switch (code)
7711 {
7712 case NE:
7713 branch = "fbne";
7714 break;
7715 case EQ:
7716 branch = "fbe";
7717 break;
7718 case GE:
7719 branch = "fbge";
7720 break;
7721 case GT:
7722 branch = "fbg";
7723 break;
7724 case LE:
7725 branch = "fble";
7726 break;
7727 case LT:
7728 branch = "fbl";
7729 break;
7730 case UNORDERED:
7731 branch = "fbu";
7732 break;
7733 case ORDERED:
7734 branch = "fbo";
7735 break;
7736 case UNGT:
7737 branch = "fbug";
7738 break;
7739 case UNLT:
7740 branch = "fbul";
7741 break;
7742 case UNEQ:
7743 branch = "fbue";
7744 break;
7745 case UNGE:
7746 branch = "fbuge";
7747 break;
7748 case UNLE:
7749 branch = "fbule";
7750 break;
7751 case LTGT:
7752 branch = "fblg";
7753 break;
7754
7755 default:
7756 gcc_unreachable ();
7757 }
7758
7759 /* ??? !v9: FP branches cannot be preceded by another floating point
7760 insn. Because there is currently no concept of pre-delay slots,
7761 we can fix this only by always emitting a nop before a floating
7762 point branch. */
7763
7764 string[0] = '\0';
7765 if (! TARGET_V9)
7766 strcpy (string, "nop\n\t");
7767 strcat (string, branch);
7768 }
7769 else
7770 {
7771 switch (code)
7772 {
7773 case NE:
7774 branch = "bne";
7775 break;
7776 case EQ:
7777 branch = "be";
7778 break;
7779 case GE:
7780 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7781 branch = "bpos";
7782 else
7783 branch = "bge";
7784 break;
7785 case GT:
7786 branch = "bg";
7787 break;
7788 case LE:
7789 branch = "ble";
7790 break;
7791 case LT:
7792 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7793 branch = "bneg";
7794 else
7795 branch = "bl";
7796 break;
7797 case GEU:
7798 branch = "bgeu";
7799 break;
7800 case GTU:
7801 branch = "bgu";
7802 break;
7803 case LEU:
7804 branch = "bleu";
7805 break;
7806 case LTU:
7807 branch = "blu";
7808 break;
7809
7810 default:
7811 gcc_unreachable ();
7812 }
7813 strcpy (string, branch);
7814 }
7815 spaces -= strlen (branch);
7816 p = strchr (string, '\0');
7817
7818 /* Now add the annulling, the label, and a possible noop. */
7819 if (annul && ! far)
7820 {
7821 strcpy (p, ",a");
7822 p += 2;
7823 spaces -= 2;
7824 }
7825
7826 if (TARGET_V9)
7827 {
7828 rtx note;
7829 int v8 = 0;
7830
7831 if (! far && insn && INSN_ADDRESSES_SET_P ())
7832 {
7833 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7834 - INSN_ADDRESSES (INSN_UID (insn)));
7835 /* Leave some instructions for "slop". */
7836 if (delta < -260000 || delta >= 260000)
7837 v8 = 1;
7838 }
7839
7840 if (mode == CCFPmode || mode == CCFPEmode)
7841 {
7842 static char v9_fcc_labelno[] = "%%fccX, ";
7843 /* Set the char indicating the number of the fcc reg to use. */
7844 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7845 labelno = v9_fcc_labelno;
7846 if (v8)
7847 {
7848 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7849 labelno = "";
7850 }
7851 }
7852 else if (mode == CCXmode || mode == CCX_NOOVmode)
7853 {
7854 labelno = "%%xcc, ";
7855 gcc_assert (! v8);
7856 }
7857 else
7858 {
7859 labelno = "%%icc, ";
7860 if (v8)
7861 labelno = "";
7862 }
7863
7864 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7865 {
7866 strcpy (p,
7867 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7868 ? ",pt" : ",pn");
7869 p += 3;
7870 spaces -= 3;
7871 }
7872 }
7873 else
7874 labelno = "";
7875
7876 if (spaces > 0)
7877 *p++ = '\t';
7878 else
7879 *p++ = ' ';
7880 strcpy (p, labelno);
7881 p = strchr (p, '\0');
7882 if (far)
7883 {
7884 strcpy (p, ".+12\n\t nop\n\tb\t");
7885 /* Skip the next insn if requested or
7886 if we know that it will be a nop. */
7887 if (annul || ! final_sequence)
7888 p[3] = '6';
7889 p += 14;
7890 }
7891 *p++ = '%';
7892 *p++ = 'l';
7893 *p++ = label + '0';
7894 *p++ = '%';
7895 *p++ = '#';
7896 *p = '\0';
7897
7898 return string;
7899 }
7900
7901 /* Emit a library call comparison between floating point X and Y.
7902 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7903 Return the new operator to be used in the comparison sequence.
7904
7905 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7906 values as arguments instead of the TFmode registers themselves,
7907 that's why we cannot call emit_float_lib_cmp. */
7908
7909 rtx
7910 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7911 {
7912 const char *qpfunc;
7913 rtx slot0, slot1, result, tem, tem2, libfunc;
7914 enum machine_mode mode;
7915 enum rtx_code new_comparison;
7916
7917 switch (comparison)
7918 {
7919 case EQ:
7920 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7921 break;
7922
7923 case NE:
7924 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7925 break;
7926
7927 case GT:
7928 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7929 break;
7930
7931 case GE:
7932 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7933 break;
7934
7935 case LT:
7936 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7937 break;
7938
7939 case LE:
7940 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7941 break;
7942
7943 case ORDERED:
7944 case UNORDERED:
7945 case UNGT:
7946 case UNLT:
7947 case UNEQ:
7948 case UNGE:
7949 case UNLE:
7950 case LTGT:
7951 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7952 break;
7953
7954 default:
7955 gcc_unreachable ();
7956 }
7957
7958 if (TARGET_ARCH64)
7959 {
7960 if (MEM_P (x))
7961 {
7962 tree expr = MEM_EXPR (x);
7963 if (expr)
7964 mark_addressable (expr);
7965 slot0 = x;
7966 }
7967 else
7968 {
7969 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7970 emit_move_insn (slot0, x);
7971 }
7972
7973 if (MEM_P (y))
7974 {
7975 tree expr = MEM_EXPR (y);
7976 if (expr)
7977 mark_addressable (expr);
7978 slot1 = y;
7979 }
7980 else
7981 {
7982 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7983 emit_move_insn (slot1, y);
7984 }
7985
7986 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7987 emit_library_call (libfunc, LCT_NORMAL,
7988 DImode, 2,
7989 XEXP (slot0, 0), Pmode,
7990 XEXP (slot1, 0), Pmode);
7991 mode = DImode;
7992 }
7993 else
7994 {
7995 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7996 emit_library_call (libfunc, LCT_NORMAL,
7997 SImode, 2,
7998 x, TFmode, y, TFmode);
7999 mode = SImode;
8000 }
8001
8002
8003 /* Immediately move the result of the libcall into a pseudo
8004 register so reload doesn't clobber the value if it needs
8005 the return register for a spill reg. */
8006 result = gen_reg_rtx (mode);
8007 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8008
8009 switch (comparison)
8010 {
8011 default:
8012 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8013 case ORDERED:
8014 case UNORDERED:
8015 new_comparison = (comparison == UNORDERED ? EQ : NE);
8016 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8017 case UNGT:
8018 case UNGE:
8019 new_comparison = (comparison == UNGT ? GT : NE);
8020 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8021 case UNLE:
8022 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8023 case UNLT:
8024 tem = gen_reg_rtx (mode);
8025 if (TARGET_ARCH32)
8026 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8027 else
8028 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8029 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8030 case UNEQ:
8031 case LTGT:
8032 tem = gen_reg_rtx (mode);
8033 if (TARGET_ARCH32)
8034 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8035 else
8036 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8037 tem2 = gen_reg_rtx (mode);
8038 if (TARGET_ARCH32)
8039 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8040 else
8041 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8042 new_comparison = (comparison == UNEQ ? EQ : NE);
8043 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8044 }
8045
8046 gcc_unreachable ();
8047 }
8048
8049 /* Generate an unsigned DImode to FP conversion. This is the same code
8050 optabs would emit if we didn't have TFmode patterns. */
8051
8052 void
8053 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
8054 {
8055 rtx neglab, donelab, i0, i1, f0, in, out;
8056
8057 out = operands[0];
8058 in = force_reg (DImode, operands[1]);
8059 neglab = gen_label_rtx ();
8060 donelab = gen_label_rtx ();
8061 i0 = gen_reg_rtx (DImode);
8062 i1 = gen_reg_rtx (DImode);
8063 f0 = gen_reg_rtx (mode);
8064
8065 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8066
8067 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
8068 emit_jump_insn (gen_jump (donelab));
8069 emit_barrier ();
8070
8071 emit_label (neglab);
8072
8073 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8074 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8075 emit_insn (gen_iordi3 (i0, i0, i1));
8076 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
8077 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
8078
8079 emit_label (donelab);
8080 }
8081
8082 /* Generate an FP to unsigned DImode conversion. This is the same code
8083 optabs would emit if we didn't have TFmode patterns. */
8084
8085 void
8086 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
8087 {
8088 rtx neglab, donelab, i0, i1, f0, in, out, limit;
8089
8090 out = operands[0];
8091 in = force_reg (mode, operands[1]);
8092 neglab = gen_label_rtx ();
8093 donelab = gen_label_rtx ();
8094 i0 = gen_reg_rtx (DImode);
8095 i1 = gen_reg_rtx (DImode);
8096 limit = gen_reg_rtx (mode);
8097 f0 = gen_reg_rtx (mode);
8098
8099 emit_move_insn (limit,
8100 CONST_DOUBLE_FROM_REAL_VALUE (
8101 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8102 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8103
8104 emit_insn (gen_rtx_SET (VOIDmode,
8105 out,
8106 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8107 emit_jump_insn (gen_jump (donelab));
8108 emit_barrier ();
8109
8110 emit_label (neglab);
8111
8112 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
8113 emit_insn (gen_rtx_SET (VOIDmode,
8114 i0,
8115 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8116 emit_insn (gen_movdi (i1, const1_rtx));
8117 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8118 emit_insn (gen_xordi3 (out, i0, i1));
8119
8120 emit_label (donelab);
8121 }
8122
8123 /* Return the string to output a compare and branch instruction to DEST.
8124 DEST is the destination insn (i.e. the label), INSN is the source,
8125 and OP is the conditional expression. */
8126
8127 const char *
8128 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8129 {
8130 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8131 enum rtx_code code = GET_CODE (op);
8132 const char *cond_str, *tmpl;
8133 int far, emit_nop, len;
8134 static char string[64];
8135 char size_char;
8136
8137 /* Compare and Branch is limited to +-2KB. If it is too far away,
8138 change
8139
8140 cxbne X, Y, .LC30
8141
8142 to
8143
8144 cxbe X, Y, .+16
8145 nop
8146 ba,pt xcc, .LC30
8147 nop */
8148
8149 len = get_attr_length (insn);
8150
8151 far = len == 4;
8152 emit_nop = len == 2;
8153
8154 if (far)
8155 code = reverse_condition (code);
8156
8157 size_char = ((mode == SImode) ? 'w' : 'x');
8158
8159 switch (code)
8160 {
8161 case NE:
8162 cond_str = "ne";
8163 break;
8164
8165 case EQ:
8166 cond_str = "e";
8167 break;
8168
8169 case GE:
8170 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8171 cond_str = "pos";
8172 else
8173 cond_str = "ge";
8174 break;
8175
8176 case GT:
8177 cond_str = "g";
8178 break;
8179
8180 case LE:
8181 cond_str = "le";
8182 break;
8183
8184 case LT:
8185 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8186 cond_str = "neg";
8187 else
8188 cond_str = "l";
8189 break;
8190
8191 case GEU:
8192 cond_str = "cc";
8193 break;
8194
8195 case GTU:
8196 cond_str = "gu";
8197 break;
8198
8199 case LEU:
8200 cond_str = "leu";
8201 break;
8202
8203 case LTU:
8204 cond_str = "cs";
8205 break;
8206
8207 default:
8208 gcc_unreachable ();
8209 }
8210
8211 if (far)
8212 {
8213 int veryfar = 1, delta;
8214
8215 if (INSN_ADDRESSES_SET_P ())
8216 {
8217 delta = (INSN_ADDRESSES (INSN_UID (dest))
8218 - INSN_ADDRESSES (INSN_UID (insn)));
8219 /* Leave some instructions for "slop". */
8220 if (delta >= -260000 && delta < 260000)
8221 veryfar = 0;
8222 }
8223
8224 if (veryfar)
8225 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8226 else
8227 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8228 }
8229 else
8230 {
8231 if (emit_nop)
8232 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8233 else
8234 tmpl = "c%cb%s\t%%1, %%2, %%3";
8235 }
8236
8237 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8238
8239 return string;
8240 }
8241
8242 /* Return the string to output a conditional branch to LABEL, testing
8243 register REG. LABEL is the operand number of the label; REG is the
8244 operand number of the reg. OP is the conditional expression. The mode
8245 of REG says what kind of comparison we made.
8246
8247 DEST is the destination insn (i.e. the label), INSN is the source.
8248
8249 REVERSED is nonzero if we should reverse the sense of the comparison.
8250
8251 ANNUL is nonzero if we should generate an annulling branch. */
8252
8253 const char *
8254 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8255 int annul, rtx_insn *insn)
8256 {
8257 static char string[64];
8258 enum rtx_code code = GET_CODE (op);
8259 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8260 rtx note;
8261 int far;
8262 char *p;
8263
8264 /* branch on register are limited to +-128KB. If it is too far away,
8265 change
8266
8267 brnz,pt %g1, .LC30
8268
8269 to
8270
8271 brz,pn %g1, .+12
8272 nop
8273 ba,pt %xcc, .LC30
8274
8275 and
8276
8277 brgez,a,pn %o1, .LC29
8278
8279 to
8280
8281 brlz,pt %o1, .+16
8282 nop
8283 ba,pt %xcc, .LC29 */
8284
8285 far = get_attr_length (insn) >= 3;
8286
8287 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8288 if (reversed ^ far)
8289 code = reverse_condition (code);
8290
8291 /* Only 64 bit versions of these instructions exist. */
8292 gcc_assert (mode == DImode);
8293
8294 /* Start by writing the branch condition. */
8295
8296 switch (code)
8297 {
8298 case NE:
8299 strcpy (string, "brnz");
8300 break;
8301
8302 case EQ:
8303 strcpy (string, "brz");
8304 break;
8305
8306 case GE:
8307 strcpy (string, "brgez");
8308 break;
8309
8310 case LT:
8311 strcpy (string, "brlz");
8312 break;
8313
8314 case LE:
8315 strcpy (string, "brlez");
8316 break;
8317
8318 case GT:
8319 strcpy (string, "brgz");
8320 break;
8321
8322 default:
8323 gcc_unreachable ();
8324 }
8325
8326 p = strchr (string, '\0');
8327
8328 /* Now add the annulling, reg, label, and nop. */
8329 if (annul && ! far)
8330 {
8331 strcpy (p, ",a");
8332 p += 2;
8333 }
8334
8335 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8336 {
8337 strcpy (p,
8338 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8339 ? ",pt" : ",pn");
8340 p += 3;
8341 }
8342
8343 *p = p < string + 8 ? '\t' : ' ';
8344 p++;
8345 *p++ = '%';
8346 *p++ = '0' + reg;
8347 *p++ = ',';
8348 *p++ = ' ';
8349 if (far)
8350 {
8351 int veryfar = 1, delta;
8352
8353 if (INSN_ADDRESSES_SET_P ())
8354 {
8355 delta = (INSN_ADDRESSES (INSN_UID (dest))
8356 - INSN_ADDRESSES (INSN_UID (insn)));
8357 /* Leave some instructions for "slop". */
8358 if (delta >= -260000 && delta < 260000)
8359 veryfar = 0;
8360 }
8361
8362 strcpy (p, ".+12\n\t nop\n\t");
8363 /* Skip the next insn if requested or
8364 if we know that it will be a nop. */
8365 if (annul || ! final_sequence)
8366 p[3] = '6';
8367 p += 12;
8368 if (veryfar)
8369 {
8370 strcpy (p, "b\t");
8371 p += 2;
8372 }
8373 else
8374 {
8375 strcpy (p, "ba,pt\t%%xcc, ");
8376 p += 13;
8377 }
8378 }
8379 *p++ = '%';
8380 *p++ = 'l';
8381 *p++ = '0' + label;
8382 *p++ = '%';
8383 *p++ = '#';
8384 *p = '\0';
8385
8386 return string;
8387 }
8388
8389 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8390 Such instructions cannot be used in the delay slot of return insn on v9.
8391 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8392 */
8393
8394 static int
8395 epilogue_renumber (register rtx *where, int test)
8396 {
8397 register const char *fmt;
8398 register int i;
8399 register enum rtx_code code;
8400
8401 if (*where == 0)
8402 return 0;
8403
8404 code = GET_CODE (*where);
8405
8406 switch (code)
8407 {
8408 case REG:
8409 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8410 return 1;
8411 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8412 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8413 case SCRATCH:
8414 case CC0:
8415 case PC:
8416 case CONST_INT:
8417 case CONST_DOUBLE:
8418 return 0;
8419
8420 /* Do not replace the frame pointer with the stack pointer because
8421 it can cause the delayed instruction to load below the stack.
8422 This occurs when instructions like:
8423
8424 (set (reg/i:SI 24 %i0)
8425 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8426 (const_int -20 [0xffffffec])) 0))
8427
8428 are in the return delayed slot. */
8429 case PLUS:
8430 if (GET_CODE (XEXP (*where, 0)) == REG
8431 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8432 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8433 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8434 return 1;
8435 break;
8436
8437 case MEM:
8438 if (SPARC_STACK_BIAS
8439 && GET_CODE (XEXP (*where, 0)) == REG
8440 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8441 return 1;
8442 break;
8443
8444 default:
8445 break;
8446 }
8447
8448 fmt = GET_RTX_FORMAT (code);
8449
8450 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8451 {
8452 if (fmt[i] == 'E')
8453 {
8454 register int j;
8455 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8456 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8457 return 1;
8458 }
8459 else if (fmt[i] == 'e'
8460 && epilogue_renumber (&(XEXP (*where, i)), test))
8461 return 1;
8462 }
8463 return 0;
8464 }
8465 \f
8466 /* Leaf functions and non-leaf functions have different needs. */
8467
8468 static const int
8469 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8470
8471 static const int
8472 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8473
8474 static const int *const reg_alloc_orders[] = {
8475 reg_leaf_alloc_order,
8476 reg_nonleaf_alloc_order};
8477
8478 void
8479 order_regs_for_local_alloc (void)
8480 {
8481 static int last_order_nonleaf = 1;
8482
8483 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8484 {
8485 last_order_nonleaf = !last_order_nonleaf;
8486 memcpy ((char *) reg_alloc_order,
8487 (const char *) reg_alloc_orders[last_order_nonleaf],
8488 FIRST_PSEUDO_REGISTER * sizeof (int));
8489 }
8490 }
8491 \f
8492 /* Return 1 if REG and MEM are legitimate enough to allow the various
8493 mem<-->reg splits to be run. */
8494
8495 int
8496 sparc_splitdi_legitimate (rtx reg, rtx mem)
8497 {
8498 /* Punt if we are here by mistake. */
8499 gcc_assert (reload_completed);
8500
8501 /* We must have an offsettable memory reference. */
8502 if (! offsettable_memref_p (mem))
8503 return 0;
8504
8505 /* If we have legitimate args for ldd/std, we do not want
8506 the split to happen. */
8507 if ((REGNO (reg) % 2) == 0
8508 && mem_min_alignment (mem, 8))
8509 return 0;
8510
8511 /* Success. */
8512 return 1;
8513 }
8514
8515 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8516
8517 int
8518 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8519 {
8520 int regno1, regno2;
8521
8522 if (GET_CODE (reg1) == SUBREG)
8523 reg1 = SUBREG_REG (reg1);
8524 if (GET_CODE (reg1) != REG)
8525 return 0;
8526 regno1 = REGNO (reg1);
8527
8528 if (GET_CODE (reg2) == SUBREG)
8529 reg2 = SUBREG_REG (reg2);
8530 if (GET_CODE (reg2) != REG)
8531 return 0;
8532 regno2 = REGNO (reg2);
8533
8534 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8535 return 1;
8536
8537 if (TARGET_VIS3)
8538 {
8539 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8540 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8541 return 1;
8542 }
8543
8544 return 0;
8545 }
8546
8547 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8548 This makes them candidates for using ldd and std insns.
8549
8550 Note reg1 and reg2 *must* be hard registers. */
8551
8552 int
8553 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8554 {
8555 /* We might have been passed a SUBREG. */
8556 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8557 return 0;
8558
8559 if (REGNO (reg1) % 2 != 0)
8560 return 0;
8561
8562 /* Integer ldd is deprecated in SPARC V9 */
8563 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8564 return 0;
8565
8566 return (REGNO (reg1) == REGNO (reg2) - 1);
8567 }
8568
8569 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8570 an ldd or std insn.
8571
8572 This can only happen when addr1 and addr2, the addresses in mem1
8573 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8574 addr1 must also be aligned on a 64-bit boundary.
8575
8576 Also iff dependent_reg_rtx is not null it should not be used to
8577 compute the address for mem1, i.e. we cannot optimize a sequence
8578 like:
8579 ld [%o0], %o0
8580 ld [%o0 + 4], %o1
8581 to
8582 ldd [%o0], %o0
8583 nor:
8584 ld [%g3 + 4], %g3
8585 ld [%g3], %g2
8586 to
8587 ldd [%g3], %g2
8588
8589 But, note that the transformation from:
8590 ld [%g2 + 4], %g3
8591 ld [%g2], %g2
8592 to
8593 ldd [%g2], %g2
8594 is perfectly fine. Thus, the peephole2 patterns always pass us
8595 the destination register of the first load, never the second one.
8596
8597 For stores we don't have a similar problem, so dependent_reg_rtx is
8598 NULL_RTX. */
8599
8600 int
8601 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8602 {
8603 rtx addr1, addr2;
8604 unsigned int reg1;
8605 HOST_WIDE_INT offset1;
8606
8607 /* The mems cannot be volatile. */
8608 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8609 return 0;
8610
8611 /* MEM1 should be aligned on a 64-bit boundary. */
8612 if (MEM_ALIGN (mem1) < 64)
8613 return 0;
8614
8615 addr1 = XEXP (mem1, 0);
8616 addr2 = XEXP (mem2, 0);
8617
8618 /* Extract a register number and offset (if used) from the first addr. */
8619 if (GET_CODE (addr1) == PLUS)
8620 {
8621 /* If not a REG, return zero. */
8622 if (GET_CODE (XEXP (addr1, 0)) != REG)
8623 return 0;
8624 else
8625 {
8626 reg1 = REGNO (XEXP (addr1, 0));
8627 /* The offset must be constant! */
8628 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8629 return 0;
8630 offset1 = INTVAL (XEXP (addr1, 1));
8631 }
8632 }
8633 else if (GET_CODE (addr1) != REG)
8634 return 0;
8635 else
8636 {
8637 reg1 = REGNO (addr1);
8638 /* This was a simple (mem (reg)) expression. Offset is 0. */
8639 offset1 = 0;
8640 }
8641
8642 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8643 if (GET_CODE (addr2) != PLUS)
8644 return 0;
8645
8646 if (GET_CODE (XEXP (addr2, 0)) != REG
8647 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8648 return 0;
8649
8650 if (reg1 != REGNO (XEXP (addr2, 0)))
8651 return 0;
8652
8653 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8654 return 0;
8655
8656 /* The first offset must be evenly divisible by 8 to ensure the
8657 address is 64 bit aligned. */
8658 if (offset1 % 8 != 0)
8659 return 0;
8660
8661 /* The offset for the second addr must be 4 more than the first addr. */
8662 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8663 return 0;
8664
8665 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8666 instructions. */
8667 return 1;
8668 }
8669
8670 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8671
8672 rtx
8673 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, enum machine_mode mode)
8674 {
8675 rtx x = widen_memory_access (mem1, mode, 0);
8676 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8677 return x;
8678 }
8679
8680 /* Return 1 if reg is a pseudo, or is the first register in
8681 a hard register pair. This makes it suitable for use in
8682 ldd and std insns. */
8683
8684 int
8685 register_ok_for_ldd (rtx reg)
8686 {
8687 /* We might have been passed a SUBREG. */
8688 if (!REG_P (reg))
8689 return 0;
8690
8691 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8692 return (REGNO (reg) % 2 == 0);
8693
8694 return 1;
8695 }
8696
8697 /* Return 1 if OP, a MEM, has an address which is known to be
8698 aligned to an 8-byte boundary. */
8699
8700 int
8701 memory_ok_for_ldd (rtx op)
8702 {
8703 /* In 64-bit mode, we assume that the address is word-aligned. */
8704 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8705 return 0;
8706
8707 if (! can_create_pseudo_p ()
8708 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8709 return 0;
8710
8711 return 1;
8712 }
8713 \f
8714 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8715
8716 static bool
8717 sparc_print_operand_punct_valid_p (unsigned char code)
8718 {
8719 if (code == '#'
8720 || code == '*'
8721 || code == '('
8722 || code == ')'
8723 || code == '_'
8724 || code == '&')
8725 return true;
8726
8727 return false;
8728 }
8729
8730 /* Implement TARGET_PRINT_OPERAND.
8731 Print operand X (an rtx) in assembler syntax to file FILE.
8732 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8733 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8734
8735 static void
8736 sparc_print_operand (FILE *file, rtx x, int code)
8737 {
8738 switch (code)
8739 {
8740 case '#':
8741 /* Output an insn in a delay slot. */
8742 if (final_sequence)
8743 sparc_indent_opcode = 1;
8744 else
8745 fputs ("\n\t nop", file);
8746 return;
8747 case '*':
8748 /* Output an annul flag if there's nothing for the delay slot and we
8749 are optimizing. This is always used with '(' below.
8750 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8751 this is a dbx bug. So, we only do this when optimizing.
8752 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8753 Always emit a nop in case the next instruction is a branch. */
8754 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8755 fputs (",a", file);
8756 return;
8757 case '(':
8758 /* Output a 'nop' if there's nothing for the delay slot and we are
8759 not optimizing. This is always used with '*' above. */
8760 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8761 fputs ("\n\t nop", file);
8762 else if (final_sequence)
8763 sparc_indent_opcode = 1;
8764 return;
8765 case ')':
8766 /* Output the right displacement from the saved PC on function return.
8767 The caller may have placed an "unimp" insn immediately after the call
8768 so we have to account for it. This insn is used in the 32-bit ABI
8769 when calling a function that returns a non zero-sized structure. The
8770 64-bit ABI doesn't have it. Be careful to have this test be the same
8771 as that for the call. The exception is when sparc_std_struct_return
8772 is enabled, the psABI is followed exactly and the adjustment is made
8773 by the code in sparc_struct_value_rtx. The call emitted is the same
8774 when sparc_std_struct_return is enabled. */
8775 if (!TARGET_ARCH64
8776 && cfun->returns_struct
8777 && !sparc_std_struct_return
8778 && DECL_SIZE (DECL_RESULT (current_function_decl))
8779 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8780 == INTEGER_CST
8781 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8782 fputs ("12", file);
8783 else
8784 fputc ('8', file);
8785 return;
8786 case '_':
8787 /* Output the Embedded Medium/Anywhere code model base register. */
8788 fputs (EMBMEDANY_BASE_REG, file);
8789 return;
8790 case '&':
8791 /* Print some local dynamic TLS name. */
8792 assemble_name (file, get_some_local_dynamic_name ());
8793 return;
8794
8795 case 'Y':
8796 /* Adjust the operand to take into account a RESTORE operation. */
8797 if (GET_CODE (x) == CONST_INT)
8798 break;
8799 else if (GET_CODE (x) != REG)
8800 output_operand_lossage ("invalid %%Y operand");
8801 else if (REGNO (x) < 8)
8802 fputs (reg_names[REGNO (x)], file);
8803 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8804 fputs (reg_names[REGNO (x)-16], file);
8805 else
8806 output_operand_lossage ("invalid %%Y operand");
8807 return;
8808 case 'L':
8809 /* Print out the low order register name of a register pair. */
8810 if (WORDS_BIG_ENDIAN)
8811 fputs (reg_names[REGNO (x)+1], file);
8812 else
8813 fputs (reg_names[REGNO (x)], file);
8814 return;
8815 case 'H':
8816 /* Print out the high order register name of a register pair. */
8817 if (WORDS_BIG_ENDIAN)
8818 fputs (reg_names[REGNO (x)], file);
8819 else
8820 fputs (reg_names[REGNO (x)+1], file);
8821 return;
8822 case 'R':
8823 /* Print out the second register name of a register pair or quad.
8824 I.e., R (%o0) => %o1. */
8825 fputs (reg_names[REGNO (x)+1], file);
8826 return;
8827 case 'S':
8828 /* Print out the third register name of a register quad.
8829 I.e., S (%o0) => %o2. */
8830 fputs (reg_names[REGNO (x)+2], file);
8831 return;
8832 case 'T':
8833 /* Print out the fourth register name of a register quad.
8834 I.e., T (%o0) => %o3. */
8835 fputs (reg_names[REGNO (x)+3], file);
8836 return;
8837 case 'x':
8838 /* Print a condition code register. */
8839 if (REGNO (x) == SPARC_ICC_REG)
8840 {
8841 /* We don't handle CC[X]_NOOVmode because they're not supposed
8842 to occur here. */
8843 if (GET_MODE (x) == CCmode)
8844 fputs ("%icc", file);
8845 else if (GET_MODE (x) == CCXmode)
8846 fputs ("%xcc", file);
8847 else
8848 gcc_unreachable ();
8849 }
8850 else
8851 /* %fccN register */
8852 fputs (reg_names[REGNO (x)], file);
8853 return;
8854 case 'm':
8855 /* Print the operand's address only. */
8856 output_address (XEXP (x, 0));
8857 return;
8858 case 'r':
8859 /* In this case we need a register. Use %g0 if the
8860 operand is const0_rtx. */
8861 if (x == const0_rtx
8862 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8863 {
8864 fputs ("%g0", file);
8865 return;
8866 }
8867 else
8868 break;
8869
8870 case 'A':
8871 switch (GET_CODE (x))
8872 {
8873 case IOR: fputs ("or", file); break;
8874 case AND: fputs ("and", file); break;
8875 case XOR: fputs ("xor", file); break;
8876 default: output_operand_lossage ("invalid %%A operand");
8877 }
8878 return;
8879
8880 case 'B':
8881 switch (GET_CODE (x))
8882 {
8883 case IOR: fputs ("orn", file); break;
8884 case AND: fputs ("andn", file); break;
8885 case XOR: fputs ("xnor", file); break;
8886 default: output_operand_lossage ("invalid %%B operand");
8887 }
8888 return;
8889
8890 /* This is used by the conditional move instructions. */
8891 case 'C':
8892 {
8893 enum rtx_code rc = GET_CODE (x);
8894
8895 switch (rc)
8896 {
8897 case NE: fputs ("ne", file); break;
8898 case EQ: fputs ("e", file); break;
8899 case GE: fputs ("ge", file); break;
8900 case GT: fputs ("g", file); break;
8901 case LE: fputs ("le", file); break;
8902 case LT: fputs ("l", file); break;
8903 case GEU: fputs ("geu", file); break;
8904 case GTU: fputs ("gu", file); break;
8905 case LEU: fputs ("leu", file); break;
8906 case LTU: fputs ("lu", file); break;
8907 case LTGT: fputs ("lg", file); break;
8908 case UNORDERED: fputs ("u", file); break;
8909 case ORDERED: fputs ("o", file); break;
8910 case UNLT: fputs ("ul", file); break;
8911 case UNLE: fputs ("ule", file); break;
8912 case UNGT: fputs ("ug", file); break;
8913 case UNGE: fputs ("uge", file); break;
8914 case UNEQ: fputs ("ue", file); break;
8915 default: output_operand_lossage ("invalid %%C operand");
8916 }
8917 return;
8918 }
8919
8920 /* This are used by the movr instruction pattern. */
8921 case 'D':
8922 {
8923 enum rtx_code rc = GET_CODE (x);
8924 switch (rc)
8925 {
8926 case NE: fputs ("ne", file); break;
8927 case EQ: fputs ("e", file); break;
8928 case GE: fputs ("gez", file); break;
8929 case LT: fputs ("lz", file); break;
8930 case LE: fputs ("lez", file); break;
8931 case GT: fputs ("gz", file); break;
8932 default: output_operand_lossage ("invalid %%D operand");
8933 }
8934 return;
8935 }
8936
8937 case 'b':
8938 {
8939 /* Print a sign-extended character. */
8940 int i = trunc_int_for_mode (INTVAL (x), QImode);
8941 fprintf (file, "%d", i);
8942 return;
8943 }
8944
8945 case 'f':
8946 /* Operand must be a MEM; write its address. */
8947 if (GET_CODE (x) != MEM)
8948 output_operand_lossage ("invalid %%f operand");
8949 output_address (XEXP (x, 0));
8950 return;
8951
8952 case 's':
8953 {
8954 /* Print a sign-extended 32-bit value. */
8955 HOST_WIDE_INT i;
8956 if (GET_CODE(x) == CONST_INT)
8957 i = INTVAL (x);
8958 else if (GET_CODE(x) == CONST_DOUBLE)
8959 i = CONST_DOUBLE_LOW (x);
8960 else
8961 {
8962 output_operand_lossage ("invalid %%s operand");
8963 return;
8964 }
8965 i = trunc_int_for_mode (i, SImode);
8966 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8967 return;
8968 }
8969
8970 case 0:
8971 /* Do nothing special. */
8972 break;
8973
8974 default:
8975 /* Undocumented flag. */
8976 output_operand_lossage ("invalid operand output code");
8977 }
8978
8979 if (GET_CODE (x) == REG)
8980 fputs (reg_names[REGNO (x)], file);
8981 else if (GET_CODE (x) == MEM)
8982 {
8983 fputc ('[', file);
8984 /* Poor Sun assembler doesn't understand absolute addressing. */
8985 if (CONSTANT_P (XEXP (x, 0)))
8986 fputs ("%g0+", file);
8987 output_address (XEXP (x, 0));
8988 fputc (']', file);
8989 }
8990 else if (GET_CODE (x) == HIGH)
8991 {
8992 fputs ("%hi(", file);
8993 output_addr_const (file, XEXP (x, 0));
8994 fputc (')', file);
8995 }
8996 else if (GET_CODE (x) == LO_SUM)
8997 {
8998 sparc_print_operand (file, XEXP (x, 0), 0);
8999 if (TARGET_CM_MEDMID)
9000 fputs ("+%l44(", file);
9001 else
9002 fputs ("+%lo(", file);
9003 output_addr_const (file, XEXP (x, 1));
9004 fputc (')', file);
9005 }
9006 else if (GET_CODE (x) == CONST_DOUBLE
9007 && (GET_MODE (x) == VOIDmode
9008 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
9009 {
9010 if (CONST_DOUBLE_HIGH (x) == 0)
9011 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
9012 else if (CONST_DOUBLE_HIGH (x) == -1
9013 && CONST_DOUBLE_LOW (x) < 0)
9014 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
9015 else
9016 output_operand_lossage ("long long constant not a valid immediate operand");
9017 }
9018 else if (GET_CODE (x) == CONST_DOUBLE)
9019 output_operand_lossage ("floating point constant not a valid immediate operand");
9020 else { output_addr_const (file, x); }
9021 }
9022
9023 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9024
9025 static void
9026 sparc_print_operand_address (FILE *file, rtx x)
9027 {
9028 register rtx base, index = 0;
9029 int offset = 0;
9030 register rtx addr = x;
9031
9032 if (REG_P (addr))
9033 fputs (reg_names[REGNO (addr)], file);
9034 else if (GET_CODE (addr) == PLUS)
9035 {
9036 if (CONST_INT_P (XEXP (addr, 0)))
9037 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9038 else if (CONST_INT_P (XEXP (addr, 1)))
9039 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9040 else
9041 base = XEXP (addr, 0), index = XEXP (addr, 1);
9042 if (GET_CODE (base) == LO_SUM)
9043 {
9044 gcc_assert (USE_AS_OFFSETABLE_LO10
9045 && TARGET_ARCH64
9046 && ! TARGET_CM_MEDMID);
9047 output_operand (XEXP (base, 0), 0);
9048 fputs ("+%lo(", file);
9049 output_address (XEXP (base, 1));
9050 fprintf (file, ")+%d", offset);
9051 }
9052 else
9053 {
9054 fputs (reg_names[REGNO (base)], file);
9055 if (index == 0)
9056 fprintf (file, "%+d", offset);
9057 else if (REG_P (index))
9058 fprintf (file, "+%s", reg_names[REGNO (index)]);
9059 else if (GET_CODE (index) == SYMBOL_REF
9060 || GET_CODE (index) == LABEL_REF
9061 || GET_CODE (index) == CONST)
9062 fputc ('+', file), output_addr_const (file, index);
9063 else gcc_unreachable ();
9064 }
9065 }
9066 else if (GET_CODE (addr) == MINUS
9067 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9068 {
9069 output_addr_const (file, XEXP (addr, 0));
9070 fputs ("-(", file);
9071 output_addr_const (file, XEXP (addr, 1));
9072 fputs ("-.)", file);
9073 }
9074 else if (GET_CODE (addr) == LO_SUM)
9075 {
9076 output_operand (XEXP (addr, 0), 0);
9077 if (TARGET_CM_MEDMID)
9078 fputs ("+%l44(", file);
9079 else
9080 fputs ("+%lo(", file);
9081 output_address (XEXP (addr, 1));
9082 fputc (')', file);
9083 }
9084 else if (flag_pic
9085 && GET_CODE (addr) == CONST
9086 && GET_CODE (XEXP (addr, 0)) == MINUS
9087 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9088 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9089 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9090 {
9091 addr = XEXP (addr, 0);
9092 output_addr_const (file, XEXP (addr, 0));
9093 /* Group the args of the second CONST in parenthesis. */
9094 fputs ("-(", file);
9095 /* Skip past the second CONST--it does nothing for us. */
9096 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9097 /* Close the parenthesis. */
9098 fputc (')', file);
9099 }
9100 else
9101 {
9102 output_addr_const (file, addr);
9103 }
9104 }
9105 \f
9106 /* Target hook for assembling integer objects. The sparc version has
9107 special handling for aligned DI-mode objects. */
9108
9109 static bool
9110 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9111 {
9112 /* ??? We only output .xword's for symbols and only then in environments
9113 where the assembler can handle them. */
9114 if (aligned_p && size == 8
9115 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
9116 {
9117 if (TARGET_V9)
9118 {
9119 assemble_integer_with_op ("\t.xword\t", x);
9120 return true;
9121 }
9122 else
9123 {
9124 assemble_aligned_integer (4, const0_rtx);
9125 assemble_aligned_integer (4, x);
9126 return true;
9127 }
9128 }
9129 return default_assemble_integer (x, size, aligned_p);
9130 }
9131 \f
9132 /* Return the value of a code used in the .proc pseudo-op that says
9133 what kind of result this function returns. For non-C types, we pick
9134 the closest C type. */
9135
9136 #ifndef SHORT_TYPE_SIZE
9137 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9138 #endif
9139
9140 #ifndef INT_TYPE_SIZE
9141 #define INT_TYPE_SIZE BITS_PER_WORD
9142 #endif
9143
9144 #ifndef LONG_TYPE_SIZE
9145 #define LONG_TYPE_SIZE BITS_PER_WORD
9146 #endif
9147
9148 #ifndef LONG_LONG_TYPE_SIZE
9149 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9150 #endif
9151
9152 #ifndef FLOAT_TYPE_SIZE
9153 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9154 #endif
9155
9156 #ifndef DOUBLE_TYPE_SIZE
9157 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9158 #endif
9159
9160 #ifndef LONG_DOUBLE_TYPE_SIZE
9161 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9162 #endif
9163
9164 unsigned long
9165 sparc_type_code (register tree type)
9166 {
9167 register unsigned long qualifiers = 0;
9168 register unsigned shift;
9169
9170 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9171 setting more, since some assemblers will give an error for this. Also,
9172 we must be careful to avoid shifts of 32 bits or more to avoid getting
9173 unpredictable results. */
9174
9175 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9176 {
9177 switch (TREE_CODE (type))
9178 {
9179 case ERROR_MARK:
9180 return qualifiers;
9181
9182 case ARRAY_TYPE:
9183 qualifiers |= (3 << shift);
9184 break;
9185
9186 case FUNCTION_TYPE:
9187 case METHOD_TYPE:
9188 qualifiers |= (2 << shift);
9189 break;
9190
9191 case POINTER_TYPE:
9192 case REFERENCE_TYPE:
9193 case OFFSET_TYPE:
9194 qualifiers |= (1 << shift);
9195 break;
9196
9197 case RECORD_TYPE:
9198 return (qualifiers | 8);
9199
9200 case UNION_TYPE:
9201 case QUAL_UNION_TYPE:
9202 return (qualifiers | 9);
9203
9204 case ENUMERAL_TYPE:
9205 return (qualifiers | 10);
9206
9207 case VOID_TYPE:
9208 return (qualifiers | 16);
9209
9210 case INTEGER_TYPE:
9211 /* If this is a range type, consider it to be the underlying
9212 type. */
9213 if (TREE_TYPE (type) != 0)
9214 break;
9215
9216 /* Carefully distinguish all the standard types of C,
9217 without messing up if the language is not C. We do this by
9218 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9219 look at both the names and the above fields, but that's redundant.
9220 Any type whose size is between two C types will be considered
9221 to be the wider of the two types. Also, we do not have a
9222 special code to use for "long long", so anything wider than
9223 long is treated the same. Note that we can't distinguish
9224 between "int" and "long" in this code if they are the same
9225 size, but that's fine, since neither can the assembler. */
9226
9227 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9228 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9229
9230 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9231 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9232
9233 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9234 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9235
9236 else
9237 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9238
9239 case REAL_TYPE:
9240 /* If this is a range type, consider it to be the underlying
9241 type. */
9242 if (TREE_TYPE (type) != 0)
9243 break;
9244
9245 /* Carefully distinguish all the standard types of C,
9246 without messing up if the language is not C. */
9247
9248 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9249 return (qualifiers | 6);
9250
9251 else
9252 return (qualifiers | 7);
9253
9254 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9255 /* ??? We need to distinguish between double and float complex types,
9256 but I don't know how yet because I can't reach this code from
9257 existing front-ends. */
9258 return (qualifiers | 7); /* Who knows? */
9259
9260 case VECTOR_TYPE:
9261 case BOOLEAN_TYPE: /* Boolean truth value type. */
9262 case LANG_TYPE:
9263 case NULLPTR_TYPE:
9264 return qualifiers;
9265
9266 default:
9267 gcc_unreachable (); /* Not a type! */
9268 }
9269 }
9270
9271 return qualifiers;
9272 }
9273 \f
9274 /* Nested function support. */
9275
9276 /* Emit RTL insns to initialize the variable parts of a trampoline.
9277 FNADDR is an RTX for the address of the function's pure code.
9278 CXT is an RTX for the static chain value for the function.
9279
9280 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9281 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9282 (to store insns). This is a bit excessive. Perhaps a different
9283 mechanism would be better here.
9284
9285 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9286
9287 static void
9288 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9289 {
9290 /* SPARC 32-bit trampoline:
9291
9292 sethi %hi(fn), %g1
9293 sethi %hi(static), %g2
9294 jmp %g1+%lo(fn)
9295 or %g2, %lo(static), %g2
9296
9297 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9298 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9299 */
9300
9301 emit_move_insn
9302 (adjust_address (m_tramp, SImode, 0),
9303 expand_binop (SImode, ior_optab,
9304 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9305 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9306 NULL_RTX, 1, OPTAB_DIRECT));
9307
9308 emit_move_insn
9309 (adjust_address (m_tramp, SImode, 4),
9310 expand_binop (SImode, ior_optab,
9311 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9312 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9313 NULL_RTX, 1, OPTAB_DIRECT));
9314
9315 emit_move_insn
9316 (adjust_address (m_tramp, SImode, 8),
9317 expand_binop (SImode, ior_optab,
9318 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9319 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9320 NULL_RTX, 1, OPTAB_DIRECT));
9321
9322 emit_move_insn
9323 (adjust_address (m_tramp, SImode, 12),
9324 expand_binop (SImode, ior_optab,
9325 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9326 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9327 NULL_RTX, 1, OPTAB_DIRECT));
9328
9329 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9330 aligned on a 16 byte boundary so one flush clears it all. */
9331 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9332 if (sparc_cpu != PROCESSOR_ULTRASPARC
9333 && sparc_cpu != PROCESSOR_ULTRASPARC3
9334 && sparc_cpu != PROCESSOR_NIAGARA
9335 && sparc_cpu != PROCESSOR_NIAGARA2
9336 && sparc_cpu != PROCESSOR_NIAGARA3
9337 && sparc_cpu != PROCESSOR_NIAGARA4)
9338 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9339
9340 /* Call __enable_execute_stack after writing onto the stack to make sure
9341 the stack address is accessible. */
9342 #ifdef HAVE_ENABLE_EXECUTE_STACK
9343 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9344 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9345 #endif
9346
9347 }
9348
9349 /* The 64-bit version is simpler because it makes more sense to load the
9350 values as "immediate" data out of the trampoline. It's also easier since
9351 we can read the PC without clobbering a register. */
9352
9353 static void
9354 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9355 {
9356 /* SPARC 64-bit trampoline:
9357
9358 rd %pc, %g1
9359 ldx [%g1+24], %g5
9360 jmp %g5
9361 ldx [%g1+16], %g5
9362 +16 bytes data
9363 */
9364
9365 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9366 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9367 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9368 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9369 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9370 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9371 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9372 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9373 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9374 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9375 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9376
9377 if (sparc_cpu != PROCESSOR_ULTRASPARC
9378 && sparc_cpu != PROCESSOR_ULTRASPARC3
9379 && sparc_cpu != PROCESSOR_NIAGARA
9380 && sparc_cpu != PROCESSOR_NIAGARA2
9381 && sparc_cpu != PROCESSOR_NIAGARA3
9382 && sparc_cpu != PROCESSOR_NIAGARA4)
9383 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9384
9385 /* Call __enable_execute_stack after writing onto the stack to make sure
9386 the stack address is accessible. */
9387 #ifdef HAVE_ENABLE_EXECUTE_STACK
9388 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9389 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9390 #endif
9391 }
9392
9393 /* Worker for TARGET_TRAMPOLINE_INIT. */
9394
9395 static void
9396 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9397 {
9398 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9399 cxt = force_reg (Pmode, cxt);
9400 if (TARGET_ARCH64)
9401 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9402 else
9403 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9404 }
9405 \f
9406 /* Adjust the cost of a scheduling dependency. Return the new cost of
9407 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9408
9409 static int
9410 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9411 {
9412 enum attr_type insn_type;
9413
9414 if (! recog_memoized (insn))
9415 return 0;
9416
9417 insn_type = get_attr_type (insn);
9418
9419 if (REG_NOTE_KIND (link) == 0)
9420 {
9421 /* Data dependency; DEP_INSN writes a register that INSN reads some
9422 cycles later. */
9423
9424 /* if a load, then the dependence must be on the memory address;
9425 add an extra "cycle". Note that the cost could be two cycles
9426 if the reg was written late in an instruction group; we ca not tell
9427 here. */
9428 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9429 return cost + 3;
9430
9431 /* Get the delay only if the address of the store is the dependence. */
9432 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9433 {
9434 rtx pat = PATTERN(insn);
9435 rtx dep_pat = PATTERN (dep_insn);
9436
9437 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9438 return cost; /* This should not happen! */
9439
9440 /* The dependency between the two instructions was on the data that
9441 is being stored. Assume that this implies that the address of the
9442 store is not dependent. */
9443 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9444 return cost;
9445
9446 return cost + 3; /* An approximation. */
9447 }
9448
9449 /* A shift instruction cannot receive its data from an instruction
9450 in the same cycle; add a one cycle penalty. */
9451 if (insn_type == TYPE_SHIFT)
9452 return cost + 3; /* Split before cascade into shift. */
9453 }
9454 else
9455 {
9456 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9457 INSN writes some cycles later. */
9458
9459 /* These are only significant for the fpu unit; writing a fp reg before
9460 the fpu has finished with it stalls the processor. */
9461
9462 /* Reusing an integer register causes no problems. */
9463 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9464 return 0;
9465 }
9466
9467 return cost;
9468 }
9469
9470 static int
9471 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9472 {
9473 enum attr_type insn_type, dep_type;
9474 rtx pat = PATTERN(insn);
9475 rtx dep_pat = PATTERN (dep_insn);
9476
9477 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9478 return cost;
9479
9480 insn_type = get_attr_type (insn);
9481 dep_type = get_attr_type (dep_insn);
9482
9483 switch (REG_NOTE_KIND (link))
9484 {
9485 case 0:
9486 /* Data dependency; DEP_INSN writes a register that INSN reads some
9487 cycles later. */
9488
9489 switch (insn_type)
9490 {
9491 case TYPE_STORE:
9492 case TYPE_FPSTORE:
9493 /* Get the delay iff the address of the store is the dependence. */
9494 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9495 return cost;
9496
9497 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9498 return cost;
9499 return cost + 3;
9500
9501 case TYPE_LOAD:
9502 case TYPE_SLOAD:
9503 case TYPE_FPLOAD:
9504 /* If a load, then the dependence must be on the memory address. If
9505 the addresses aren't equal, then it might be a false dependency */
9506 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9507 {
9508 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9509 || GET_CODE (SET_DEST (dep_pat)) != MEM
9510 || GET_CODE (SET_SRC (pat)) != MEM
9511 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9512 XEXP (SET_SRC (pat), 0)))
9513 return cost + 2;
9514
9515 return cost + 8;
9516 }
9517 break;
9518
9519 case TYPE_BRANCH:
9520 /* Compare to branch latency is 0. There is no benefit from
9521 separating compare and branch. */
9522 if (dep_type == TYPE_COMPARE)
9523 return 0;
9524 /* Floating point compare to branch latency is less than
9525 compare to conditional move. */
9526 if (dep_type == TYPE_FPCMP)
9527 return cost - 1;
9528 break;
9529 default:
9530 break;
9531 }
9532 break;
9533
9534 case REG_DEP_ANTI:
9535 /* Anti-dependencies only penalize the fpu unit. */
9536 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9537 return 0;
9538 break;
9539
9540 default:
9541 break;
9542 }
9543
9544 return cost;
9545 }
9546
9547 static int
9548 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
9549 {
9550 switch (sparc_cpu)
9551 {
9552 case PROCESSOR_SUPERSPARC:
9553 cost = supersparc_adjust_cost (insn, link, dep, cost);
9554 break;
9555 case PROCESSOR_HYPERSPARC:
9556 case PROCESSOR_SPARCLITE86X:
9557 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9558 break;
9559 default:
9560 break;
9561 }
9562 return cost;
9563 }
9564
9565 static void
9566 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9567 int sched_verbose ATTRIBUTE_UNUSED,
9568 int max_ready ATTRIBUTE_UNUSED)
9569 {}
9570
9571 static int
9572 sparc_use_sched_lookahead (void)
9573 {
9574 if (sparc_cpu == PROCESSOR_NIAGARA
9575 || sparc_cpu == PROCESSOR_NIAGARA2
9576 || sparc_cpu == PROCESSOR_NIAGARA3)
9577 return 0;
9578 if (sparc_cpu == PROCESSOR_NIAGARA4)
9579 return 2;
9580 if (sparc_cpu == PROCESSOR_ULTRASPARC
9581 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9582 return 4;
9583 if ((1 << sparc_cpu) &
9584 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9585 (1 << PROCESSOR_SPARCLITE86X)))
9586 return 3;
9587 return 0;
9588 }
9589
9590 static int
9591 sparc_issue_rate (void)
9592 {
9593 switch (sparc_cpu)
9594 {
9595 case PROCESSOR_NIAGARA:
9596 case PROCESSOR_NIAGARA2:
9597 case PROCESSOR_NIAGARA3:
9598 default:
9599 return 1;
9600 case PROCESSOR_NIAGARA4:
9601 case PROCESSOR_V9:
9602 /* Assume V9 processors are capable of at least dual-issue. */
9603 return 2;
9604 case PROCESSOR_SUPERSPARC:
9605 return 3;
9606 case PROCESSOR_HYPERSPARC:
9607 case PROCESSOR_SPARCLITE86X:
9608 return 2;
9609 case PROCESSOR_ULTRASPARC:
9610 case PROCESSOR_ULTRASPARC3:
9611 return 4;
9612 }
9613 }
9614
9615 static int
9616 set_extends (rtx insn)
9617 {
9618 register rtx pat = PATTERN (insn);
9619
9620 switch (GET_CODE (SET_SRC (pat)))
9621 {
9622 /* Load and some shift instructions zero extend. */
9623 case MEM:
9624 case ZERO_EXTEND:
9625 /* sethi clears the high bits */
9626 case HIGH:
9627 /* LO_SUM is used with sethi. sethi cleared the high
9628 bits and the values used with lo_sum are positive */
9629 case LO_SUM:
9630 /* Store flag stores 0 or 1 */
9631 case LT: case LTU:
9632 case GT: case GTU:
9633 case LE: case LEU:
9634 case GE: case GEU:
9635 case EQ:
9636 case NE:
9637 return 1;
9638 case AND:
9639 {
9640 rtx op0 = XEXP (SET_SRC (pat), 0);
9641 rtx op1 = XEXP (SET_SRC (pat), 1);
9642 if (GET_CODE (op1) == CONST_INT)
9643 return INTVAL (op1) >= 0;
9644 if (GET_CODE (op0) != REG)
9645 return 0;
9646 if (sparc_check_64 (op0, insn) == 1)
9647 return 1;
9648 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9649 }
9650 case IOR:
9651 case XOR:
9652 {
9653 rtx op0 = XEXP (SET_SRC (pat), 0);
9654 rtx op1 = XEXP (SET_SRC (pat), 1);
9655 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9656 return 0;
9657 if (GET_CODE (op1) == CONST_INT)
9658 return INTVAL (op1) >= 0;
9659 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9660 }
9661 case LSHIFTRT:
9662 return GET_MODE (SET_SRC (pat)) == SImode;
9663 /* Positive integers leave the high bits zero. */
9664 case CONST_DOUBLE:
9665 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9666 case CONST_INT:
9667 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9668 case ASHIFTRT:
9669 case SIGN_EXTEND:
9670 return - (GET_MODE (SET_SRC (pat)) == SImode);
9671 case REG:
9672 return sparc_check_64 (SET_SRC (pat), insn);
9673 default:
9674 return 0;
9675 }
9676 }
9677
9678 /* We _ought_ to have only one kind per function, but... */
9679 static GTY(()) rtx sparc_addr_diff_list;
9680 static GTY(()) rtx sparc_addr_list;
9681
9682 void
9683 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9684 {
9685 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9686 if (diff)
9687 sparc_addr_diff_list
9688 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9689 else
9690 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9691 }
9692
9693 static void
9694 sparc_output_addr_vec (rtx vec)
9695 {
9696 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9697 int idx, vlen = XVECLEN (body, 0);
9698
9699 #ifdef ASM_OUTPUT_ADDR_VEC_START
9700 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9701 #endif
9702
9703 #ifdef ASM_OUTPUT_CASE_LABEL
9704 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9705 NEXT_INSN (lab));
9706 #else
9707 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9708 #endif
9709
9710 for (idx = 0; idx < vlen; idx++)
9711 {
9712 ASM_OUTPUT_ADDR_VEC_ELT
9713 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9714 }
9715
9716 #ifdef ASM_OUTPUT_ADDR_VEC_END
9717 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9718 #endif
9719 }
9720
9721 static void
9722 sparc_output_addr_diff_vec (rtx vec)
9723 {
9724 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9725 rtx base = XEXP (XEXP (body, 0), 0);
9726 int idx, vlen = XVECLEN (body, 1);
9727
9728 #ifdef ASM_OUTPUT_ADDR_VEC_START
9729 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9730 #endif
9731
9732 #ifdef ASM_OUTPUT_CASE_LABEL
9733 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9734 NEXT_INSN (lab));
9735 #else
9736 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9737 #endif
9738
9739 for (idx = 0; idx < vlen; idx++)
9740 {
9741 ASM_OUTPUT_ADDR_DIFF_ELT
9742 (asm_out_file,
9743 body,
9744 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9745 CODE_LABEL_NUMBER (base));
9746 }
9747
9748 #ifdef ASM_OUTPUT_ADDR_VEC_END
9749 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9750 #endif
9751 }
9752
9753 static void
9754 sparc_output_deferred_case_vectors (void)
9755 {
9756 rtx t;
9757 int align;
9758
9759 if (sparc_addr_list == NULL_RTX
9760 && sparc_addr_diff_list == NULL_RTX)
9761 return;
9762
9763 /* Align to cache line in the function's code section. */
9764 switch_to_section (current_function_section ());
9765
9766 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9767 if (align > 0)
9768 ASM_OUTPUT_ALIGN (asm_out_file, align);
9769
9770 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9771 sparc_output_addr_vec (XEXP (t, 0));
9772 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9773 sparc_output_addr_diff_vec (XEXP (t, 0));
9774
9775 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9776 }
9777
9778 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9779 unknown. Return 1 if the high bits are zero, -1 if the register is
9780 sign extended. */
9781 int
9782 sparc_check_64 (rtx x, rtx insn)
9783 {
9784 /* If a register is set only once it is safe to ignore insns this
9785 code does not know how to handle. The loop will either recognize
9786 the single set and return the correct value or fail to recognize
9787 it and return 0. */
9788 int set_once = 0;
9789 rtx y = x;
9790
9791 gcc_assert (GET_CODE (x) == REG);
9792
9793 if (GET_MODE (x) == DImode)
9794 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9795
9796 if (flag_expensive_optimizations
9797 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9798 set_once = 1;
9799
9800 if (insn == 0)
9801 {
9802 if (set_once)
9803 insn = get_last_insn_anywhere ();
9804 else
9805 return 0;
9806 }
9807
9808 while ((insn = PREV_INSN (insn)))
9809 {
9810 switch (GET_CODE (insn))
9811 {
9812 case JUMP_INSN:
9813 case NOTE:
9814 break;
9815 case CODE_LABEL:
9816 case CALL_INSN:
9817 default:
9818 if (! set_once)
9819 return 0;
9820 break;
9821 case INSN:
9822 {
9823 rtx pat = PATTERN (insn);
9824 if (GET_CODE (pat) != SET)
9825 return 0;
9826 if (rtx_equal_p (x, SET_DEST (pat)))
9827 return set_extends (insn);
9828 if (y && rtx_equal_p (y, SET_DEST (pat)))
9829 return set_extends (insn);
9830 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9831 return 0;
9832 }
9833 }
9834 }
9835 return 0;
9836 }
9837
9838 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9839 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9840
9841 const char *
9842 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9843 {
9844 static char asm_code[60];
9845
9846 /* The scratch register is only required when the destination
9847 register is not a 64-bit global or out register. */
9848 if (which_alternative != 2)
9849 operands[3] = operands[0];
9850
9851 /* We can only shift by constants <= 63. */
9852 if (GET_CODE (operands[2]) == CONST_INT)
9853 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9854
9855 if (GET_CODE (operands[1]) == CONST_INT)
9856 {
9857 output_asm_insn ("mov\t%1, %3", operands);
9858 }
9859 else
9860 {
9861 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9862 if (sparc_check_64 (operands[1], insn) <= 0)
9863 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9864 output_asm_insn ("or\t%L1, %3, %3", operands);
9865 }
9866
9867 strcpy (asm_code, opcode);
9868
9869 if (which_alternative != 2)
9870 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9871 else
9872 return
9873 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9874 }
9875 \f
9876 /* Output rtl to increment the profiler label LABELNO
9877 for profiling a function entry. */
9878
9879 void
9880 sparc_profile_hook (int labelno)
9881 {
9882 char buf[32];
9883 rtx lab, fun;
9884
9885 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9886 if (NO_PROFILE_COUNTERS)
9887 {
9888 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9889 }
9890 else
9891 {
9892 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9893 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9894 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9895 }
9896 }
9897 \f
9898 #ifdef TARGET_SOLARIS
9899 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9900
9901 static void
9902 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9903 tree decl ATTRIBUTE_UNUSED)
9904 {
9905 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9906 {
9907 solaris_elf_asm_comdat_section (name, flags, decl);
9908 return;
9909 }
9910
9911 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9912
9913 if (!(flags & SECTION_DEBUG))
9914 fputs (",#alloc", asm_out_file);
9915 if (flags & SECTION_WRITE)
9916 fputs (",#write", asm_out_file);
9917 if (flags & SECTION_TLS)
9918 fputs (",#tls", asm_out_file);
9919 if (flags & SECTION_CODE)
9920 fputs (",#execinstr", asm_out_file);
9921
9922 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9923 if (HAVE_AS_SPARC_NOBITS)
9924 {
9925 if (flags & SECTION_BSS)
9926 fputs (",#nobits", asm_out_file);
9927 else
9928 fputs (",#progbits", asm_out_file);
9929 }
9930
9931 fputc ('\n', asm_out_file);
9932 }
9933 #endif /* TARGET_SOLARIS */
9934
9935 /* We do not allow indirect calls to be optimized into sibling calls.
9936
9937 We cannot use sibling calls when delayed branches are disabled
9938 because they will likely require the call delay slot to be filled.
9939
9940 Also, on SPARC 32-bit we cannot emit a sibling call when the
9941 current function returns a structure. This is because the "unimp
9942 after call" convention would cause the callee to return to the
9943 wrong place. The generic code already disallows cases where the
9944 function being called returns a structure.
9945
9946 It may seem strange how this last case could occur. Usually there
9947 is code after the call which jumps to epilogue code which dumps the
9948 return value into the struct return area. That ought to invalidate
9949 the sibling call right? Well, in the C++ case we can end up passing
9950 the pointer to the struct return area to a constructor (which returns
9951 void) and then nothing else happens. Such a sibling call would look
9952 valid without the added check here.
9953
9954 VxWorks PIC PLT entries require the global pointer to be initialized
9955 on entry. We therefore can't emit sibling calls to them. */
9956 static bool
9957 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9958 {
9959 return (decl
9960 && flag_delayed_branch
9961 && (TARGET_ARCH64 || ! cfun->returns_struct)
9962 && !(TARGET_VXWORKS_RTP
9963 && flag_pic
9964 && !targetm.binds_local_p (decl)));
9965 }
9966 \f
9967 /* libfunc renaming. */
9968
9969 static void
9970 sparc_init_libfuncs (void)
9971 {
9972 if (TARGET_ARCH32)
9973 {
9974 /* Use the subroutines that Sun's library provides for integer
9975 multiply and divide. The `*' prevents an underscore from
9976 being prepended by the compiler. .umul is a little faster
9977 than .mul. */
9978 set_optab_libfunc (smul_optab, SImode, "*.umul");
9979 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9980 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9981 set_optab_libfunc (smod_optab, SImode, "*.rem");
9982 set_optab_libfunc (umod_optab, SImode, "*.urem");
9983
9984 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9985 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9986 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9987 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9988 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9989 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9990
9991 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9992 is because with soft-float, the SFmode and DFmode sqrt
9993 instructions will be absent, and the compiler will notice and
9994 try to use the TFmode sqrt instruction for calls to the
9995 builtin function sqrt, but this fails. */
9996 if (TARGET_FPU)
9997 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9998
9999 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10000 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10001 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10002 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10003 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10004 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10005
10006 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10007 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10008 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10009 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10010
10011 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10012 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10013 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10014 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10015
10016 if (DITF_CONVERSION_LIBFUNCS)
10017 {
10018 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10019 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10020 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10021 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10022 }
10023
10024 if (SUN_CONVERSION_LIBFUNCS)
10025 {
10026 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10027 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10028 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10029 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10030 }
10031 }
10032 if (TARGET_ARCH64)
10033 {
10034 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10035 do not exist in the library. Make sure the compiler does not
10036 emit calls to them by accident. (It should always use the
10037 hardware instructions.) */
10038 set_optab_libfunc (smul_optab, SImode, 0);
10039 set_optab_libfunc (sdiv_optab, SImode, 0);
10040 set_optab_libfunc (udiv_optab, SImode, 0);
10041 set_optab_libfunc (smod_optab, SImode, 0);
10042 set_optab_libfunc (umod_optab, SImode, 0);
10043
10044 if (SUN_INTEGER_MULTIPLY_64)
10045 {
10046 set_optab_libfunc (smul_optab, DImode, "__mul64");
10047 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10048 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10049 set_optab_libfunc (smod_optab, DImode, "__rem64");
10050 set_optab_libfunc (umod_optab, DImode, "__urem64");
10051 }
10052
10053 if (SUN_CONVERSION_LIBFUNCS)
10054 {
10055 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10056 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10057 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10058 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10059 }
10060 }
10061 }
10062 \f
10063 /* SPARC builtins. */
10064 enum sparc_builtins
10065 {
10066 /* FPU builtins. */
10067 SPARC_BUILTIN_LDFSR,
10068 SPARC_BUILTIN_STFSR,
10069
10070 /* VIS 1.0 builtins. */
10071 SPARC_BUILTIN_FPACK16,
10072 SPARC_BUILTIN_FPACK32,
10073 SPARC_BUILTIN_FPACKFIX,
10074 SPARC_BUILTIN_FEXPAND,
10075 SPARC_BUILTIN_FPMERGE,
10076 SPARC_BUILTIN_FMUL8X16,
10077 SPARC_BUILTIN_FMUL8X16AU,
10078 SPARC_BUILTIN_FMUL8X16AL,
10079 SPARC_BUILTIN_FMUL8SUX16,
10080 SPARC_BUILTIN_FMUL8ULX16,
10081 SPARC_BUILTIN_FMULD8SUX16,
10082 SPARC_BUILTIN_FMULD8ULX16,
10083 SPARC_BUILTIN_FALIGNDATAV4HI,
10084 SPARC_BUILTIN_FALIGNDATAV8QI,
10085 SPARC_BUILTIN_FALIGNDATAV2SI,
10086 SPARC_BUILTIN_FALIGNDATADI,
10087 SPARC_BUILTIN_WRGSR,
10088 SPARC_BUILTIN_RDGSR,
10089 SPARC_BUILTIN_ALIGNADDR,
10090 SPARC_BUILTIN_ALIGNADDRL,
10091 SPARC_BUILTIN_PDIST,
10092 SPARC_BUILTIN_EDGE8,
10093 SPARC_BUILTIN_EDGE8L,
10094 SPARC_BUILTIN_EDGE16,
10095 SPARC_BUILTIN_EDGE16L,
10096 SPARC_BUILTIN_EDGE32,
10097 SPARC_BUILTIN_EDGE32L,
10098 SPARC_BUILTIN_FCMPLE16,
10099 SPARC_BUILTIN_FCMPLE32,
10100 SPARC_BUILTIN_FCMPNE16,
10101 SPARC_BUILTIN_FCMPNE32,
10102 SPARC_BUILTIN_FCMPGT16,
10103 SPARC_BUILTIN_FCMPGT32,
10104 SPARC_BUILTIN_FCMPEQ16,
10105 SPARC_BUILTIN_FCMPEQ32,
10106 SPARC_BUILTIN_FPADD16,
10107 SPARC_BUILTIN_FPADD16S,
10108 SPARC_BUILTIN_FPADD32,
10109 SPARC_BUILTIN_FPADD32S,
10110 SPARC_BUILTIN_FPSUB16,
10111 SPARC_BUILTIN_FPSUB16S,
10112 SPARC_BUILTIN_FPSUB32,
10113 SPARC_BUILTIN_FPSUB32S,
10114 SPARC_BUILTIN_ARRAY8,
10115 SPARC_BUILTIN_ARRAY16,
10116 SPARC_BUILTIN_ARRAY32,
10117
10118 /* VIS 2.0 builtins. */
10119 SPARC_BUILTIN_EDGE8N,
10120 SPARC_BUILTIN_EDGE8LN,
10121 SPARC_BUILTIN_EDGE16N,
10122 SPARC_BUILTIN_EDGE16LN,
10123 SPARC_BUILTIN_EDGE32N,
10124 SPARC_BUILTIN_EDGE32LN,
10125 SPARC_BUILTIN_BMASK,
10126 SPARC_BUILTIN_BSHUFFLEV4HI,
10127 SPARC_BUILTIN_BSHUFFLEV8QI,
10128 SPARC_BUILTIN_BSHUFFLEV2SI,
10129 SPARC_BUILTIN_BSHUFFLEDI,
10130
10131 /* VIS 3.0 builtins. */
10132 SPARC_BUILTIN_CMASK8,
10133 SPARC_BUILTIN_CMASK16,
10134 SPARC_BUILTIN_CMASK32,
10135 SPARC_BUILTIN_FCHKSM16,
10136 SPARC_BUILTIN_FSLL16,
10137 SPARC_BUILTIN_FSLAS16,
10138 SPARC_BUILTIN_FSRL16,
10139 SPARC_BUILTIN_FSRA16,
10140 SPARC_BUILTIN_FSLL32,
10141 SPARC_BUILTIN_FSLAS32,
10142 SPARC_BUILTIN_FSRL32,
10143 SPARC_BUILTIN_FSRA32,
10144 SPARC_BUILTIN_PDISTN,
10145 SPARC_BUILTIN_FMEAN16,
10146 SPARC_BUILTIN_FPADD64,
10147 SPARC_BUILTIN_FPSUB64,
10148 SPARC_BUILTIN_FPADDS16,
10149 SPARC_BUILTIN_FPADDS16S,
10150 SPARC_BUILTIN_FPSUBS16,
10151 SPARC_BUILTIN_FPSUBS16S,
10152 SPARC_BUILTIN_FPADDS32,
10153 SPARC_BUILTIN_FPADDS32S,
10154 SPARC_BUILTIN_FPSUBS32,
10155 SPARC_BUILTIN_FPSUBS32S,
10156 SPARC_BUILTIN_FUCMPLE8,
10157 SPARC_BUILTIN_FUCMPNE8,
10158 SPARC_BUILTIN_FUCMPGT8,
10159 SPARC_BUILTIN_FUCMPEQ8,
10160 SPARC_BUILTIN_FHADDS,
10161 SPARC_BUILTIN_FHADDD,
10162 SPARC_BUILTIN_FHSUBS,
10163 SPARC_BUILTIN_FHSUBD,
10164 SPARC_BUILTIN_FNHADDS,
10165 SPARC_BUILTIN_FNHADDD,
10166 SPARC_BUILTIN_UMULXHI,
10167 SPARC_BUILTIN_XMULX,
10168 SPARC_BUILTIN_XMULXHI,
10169
10170 SPARC_BUILTIN_MAX
10171 };
10172
10173 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10174 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10175
10176 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10177 function decl or NULL_TREE if the builtin was not added. */
10178
10179 static tree
10180 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10181 tree type)
10182 {
10183 tree t
10184 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10185
10186 if (t)
10187 {
10188 sparc_builtins[code] = t;
10189 sparc_builtins_icode[code] = icode;
10190 }
10191
10192 return t;
10193 }
10194
10195 /* Likewise, but also marks the function as "const". */
10196
10197 static tree
10198 def_builtin_const (const char *name, enum insn_code icode,
10199 enum sparc_builtins code, tree type)
10200 {
10201 tree t = def_builtin (name, icode, code, type);
10202
10203 if (t)
10204 TREE_READONLY (t) = 1;
10205
10206 return t;
10207 }
10208
10209 /* Implement the TARGET_INIT_BUILTINS target hook.
10210 Create builtin functions for special SPARC instructions. */
10211
10212 static void
10213 sparc_init_builtins (void)
10214 {
10215 if (TARGET_FPU)
10216 sparc_fpu_init_builtins ();
10217
10218 if (TARGET_VIS)
10219 sparc_vis_init_builtins ();
10220 }
10221
10222 /* Create builtin functions for FPU instructions. */
10223
10224 static void
10225 sparc_fpu_init_builtins (void)
10226 {
10227 tree ftype
10228 = build_function_type_list (void_type_node,
10229 build_pointer_type (unsigned_type_node), 0);
10230 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10231 SPARC_BUILTIN_LDFSR, ftype);
10232 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10233 SPARC_BUILTIN_STFSR, ftype);
10234 }
10235
10236 /* Create builtin functions for VIS instructions. */
10237
10238 static void
10239 sparc_vis_init_builtins (void)
10240 {
10241 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10242 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10243 tree v4hi = build_vector_type (intHI_type_node, 4);
10244 tree v2hi = build_vector_type (intHI_type_node, 2);
10245 tree v2si = build_vector_type (intSI_type_node, 2);
10246 tree v1si = build_vector_type (intSI_type_node, 1);
10247
10248 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10249 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10250 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10251 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10252 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10253 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10254 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10255 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10256 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10257 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10258 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10259 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10260 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10261 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10262 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10263 v8qi, v8qi,
10264 intDI_type_node, 0);
10265 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10266 v8qi, v8qi, 0);
10267 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10268 v8qi, v8qi, 0);
10269 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10270 intDI_type_node,
10271 intDI_type_node, 0);
10272 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10273 intSI_type_node,
10274 intSI_type_node, 0);
10275 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10276 ptr_type_node,
10277 intSI_type_node, 0);
10278 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10279 ptr_type_node,
10280 intDI_type_node, 0);
10281 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10282 ptr_type_node,
10283 ptr_type_node, 0);
10284 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10285 ptr_type_node,
10286 ptr_type_node, 0);
10287 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10288 v4hi, v4hi, 0);
10289 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10290 v2si, v2si, 0);
10291 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10292 v4hi, v4hi, 0);
10293 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10294 v2si, v2si, 0);
10295 tree void_ftype_di = build_function_type_list (void_type_node,
10296 intDI_type_node, 0);
10297 tree di_ftype_void = build_function_type_list (intDI_type_node,
10298 void_type_node, 0);
10299 tree void_ftype_si = build_function_type_list (void_type_node,
10300 intSI_type_node, 0);
10301 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10302 float_type_node,
10303 float_type_node, 0);
10304 tree df_ftype_df_df = build_function_type_list (double_type_node,
10305 double_type_node,
10306 double_type_node, 0);
10307
10308 /* Packing and expanding vectors. */
10309 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10310 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10311 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10312 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10313 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10314 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10315 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10316 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10317 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10318 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10319
10320 /* Multiplications. */
10321 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10322 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10323 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10324 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10325 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10326 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10327 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10328 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10329 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10330 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10331 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10332 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10333 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10334 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10335
10336 /* Data aligning. */
10337 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10338 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10339 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10340 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10341 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10342 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10343 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10344 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10345
10346 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10347 SPARC_BUILTIN_WRGSR, void_ftype_di);
10348 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10349 SPARC_BUILTIN_RDGSR, di_ftype_void);
10350
10351 if (TARGET_ARCH64)
10352 {
10353 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10354 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10355 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10356 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10357 }
10358 else
10359 {
10360 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10361 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10362 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10363 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10364 }
10365
10366 /* Pixel distance. */
10367 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10368 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10369
10370 /* Edge handling. */
10371 if (TARGET_ARCH64)
10372 {
10373 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10374 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10375 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10376 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10377 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10378 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10379 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10380 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10381 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10382 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10383 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10384 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10385 }
10386 else
10387 {
10388 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10389 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10390 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10391 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10392 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10393 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10394 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10395 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10396 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10397 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10398 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10399 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10400 }
10401
10402 /* Pixel compare. */
10403 if (TARGET_ARCH64)
10404 {
10405 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10406 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10407 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10408 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10409 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10410 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10411 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10412 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10413 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10414 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10415 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10416 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10417 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10418 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10419 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10420 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10421 }
10422 else
10423 {
10424 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10425 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10426 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10427 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10428 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10429 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10430 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10431 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10432 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10433 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10434 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10435 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10436 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10437 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10438 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10439 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10440 }
10441
10442 /* Addition and subtraction. */
10443 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10444 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10445 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10446 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10447 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10448 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10449 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10450 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10451 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10452 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10453 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10454 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10455 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10456 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10457 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10458 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10459
10460 /* Three-dimensional array addressing. */
10461 if (TARGET_ARCH64)
10462 {
10463 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10464 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10465 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10466 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10467 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10468 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10469 }
10470 else
10471 {
10472 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10473 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10474 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10475 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10476 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10477 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10478 }
10479
10480 if (TARGET_VIS2)
10481 {
10482 /* Edge handling. */
10483 if (TARGET_ARCH64)
10484 {
10485 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10486 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10487 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10488 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10489 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10490 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10491 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10492 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10493 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10494 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10495 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10496 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10497 }
10498 else
10499 {
10500 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10501 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10502 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10503 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10504 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10505 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10506 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10507 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10508 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10509 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10510 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10511 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10512 }
10513
10514 /* Byte mask and shuffle. */
10515 if (TARGET_ARCH64)
10516 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10517 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10518 else
10519 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10520 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10521 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10522 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10523 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10524 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10525 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10526 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10527 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10528 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10529 }
10530
10531 if (TARGET_VIS3)
10532 {
10533 if (TARGET_ARCH64)
10534 {
10535 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10536 SPARC_BUILTIN_CMASK8, void_ftype_di);
10537 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10538 SPARC_BUILTIN_CMASK16, void_ftype_di);
10539 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10540 SPARC_BUILTIN_CMASK32, void_ftype_di);
10541 }
10542 else
10543 {
10544 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10545 SPARC_BUILTIN_CMASK8, void_ftype_si);
10546 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10547 SPARC_BUILTIN_CMASK16, void_ftype_si);
10548 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10549 SPARC_BUILTIN_CMASK32, void_ftype_si);
10550 }
10551
10552 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10553 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10554
10555 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10556 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10557 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10558 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10559 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10560 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10561 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10562 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10563 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10564 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10565 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10566 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10567 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10568 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10569 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10570 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10571
10572 if (TARGET_ARCH64)
10573 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10574 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10575 else
10576 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10577 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10578
10579 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10580 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10581 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10582 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10583 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10584 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10585
10586 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10587 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10588 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10589 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10590 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10591 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10592 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10593 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10594 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10595 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10596 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10597 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10598 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10599 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10600 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10601 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10602
10603 if (TARGET_ARCH64)
10604 {
10605 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10606 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10607 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10608 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10609 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10610 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10611 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10612 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10613 }
10614 else
10615 {
10616 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10617 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10618 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10619 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10620 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10621 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10622 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10623 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10624 }
10625
10626 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10627 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10628 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10629 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10630 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10631 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10632 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10633 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10634 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10635 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10636 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10637 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10638
10639 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10640 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10641 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10642 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10643 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10644 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10645 }
10646 }
10647
10648 /* Implement TARGET_BUILTIN_DECL hook. */
10649
10650 static tree
10651 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10652 {
10653 if (code >= SPARC_BUILTIN_MAX)
10654 return error_mark_node;
10655
10656 return sparc_builtins[code];
10657 }
10658
10659 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10660
10661 static rtx
10662 sparc_expand_builtin (tree exp, rtx target,
10663 rtx subtarget ATTRIBUTE_UNUSED,
10664 enum machine_mode tmode ATTRIBUTE_UNUSED,
10665 int ignore ATTRIBUTE_UNUSED)
10666 {
10667 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10668 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10669 enum insn_code icode = sparc_builtins_icode[code];
10670 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10671 call_expr_arg_iterator iter;
10672 int arg_count = 0;
10673 rtx pat, op[4];
10674 tree arg;
10675
10676 if (nonvoid)
10677 {
10678 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10679 if (!target
10680 || GET_MODE (target) != tmode
10681 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10682 op[0] = gen_reg_rtx (tmode);
10683 else
10684 op[0] = target;
10685 }
10686
10687 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10688 {
10689 const struct insn_operand_data *insn_op;
10690 int idx;
10691
10692 if (arg == error_mark_node)
10693 return NULL_RTX;
10694
10695 arg_count++;
10696 idx = arg_count - !nonvoid;
10697 insn_op = &insn_data[icode].operand[idx];
10698 op[arg_count] = expand_normal (arg);
10699
10700 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10701 {
10702 if (!address_operand (op[arg_count], SImode))
10703 {
10704 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10705 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10706 }
10707 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10708 }
10709
10710 else if (insn_op->mode == V1DImode
10711 && GET_MODE (op[arg_count]) == DImode)
10712 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10713
10714 else if (insn_op->mode == V1SImode
10715 && GET_MODE (op[arg_count]) == SImode)
10716 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10717
10718 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10719 insn_op->mode))
10720 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10721 }
10722
10723 switch (arg_count)
10724 {
10725 case 0:
10726 pat = GEN_FCN (icode) (op[0]);
10727 break;
10728 case 1:
10729 if (nonvoid)
10730 pat = GEN_FCN (icode) (op[0], op[1]);
10731 else
10732 pat = GEN_FCN (icode) (op[1]);
10733 break;
10734 case 2:
10735 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10736 break;
10737 case 3:
10738 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10739 break;
10740 default:
10741 gcc_unreachable ();
10742 }
10743
10744 if (!pat)
10745 return NULL_RTX;
10746
10747 emit_insn (pat);
10748
10749 return (nonvoid ? op[0] : const0_rtx);
10750 }
10751
10752 /* Return the upper 16 bits of the 8x16 multiplication. */
10753
10754 static int
10755 sparc_vis_mul8x16 (int e8, int e16)
10756 {
10757 return (e8 * e16 + 128) / 256;
10758 }
10759
10760 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10761 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10762
10763 static void
10764 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10765 tree inner_type, tree cst0, tree cst1)
10766 {
10767 unsigned i, num = VECTOR_CST_NELTS (cst0);
10768 int scale;
10769
10770 switch (fncode)
10771 {
10772 case SPARC_BUILTIN_FMUL8X16:
10773 for (i = 0; i < num; ++i)
10774 {
10775 int val
10776 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10777 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10778 n_elts[i] = build_int_cst (inner_type, val);
10779 }
10780 break;
10781
10782 case SPARC_BUILTIN_FMUL8X16AU:
10783 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10784
10785 for (i = 0; i < num; ++i)
10786 {
10787 int val
10788 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10789 scale);
10790 n_elts[i] = build_int_cst (inner_type, val);
10791 }
10792 break;
10793
10794 case SPARC_BUILTIN_FMUL8X16AL:
10795 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10796
10797 for (i = 0; i < num; ++i)
10798 {
10799 int val
10800 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10801 scale);
10802 n_elts[i] = build_int_cst (inner_type, val);
10803 }
10804 break;
10805
10806 default:
10807 gcc_unreachable ();
10808 }
10809 }
10810
10811 /* Implement TARGET_FOLD_BUILTIN hook.
10812
10813 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10814 result of the function call is ignored. NULL_TREE is returned if the
10815 function could not be folded. */
10816
10817 static tree
10818 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10819 tree *args, bool ignore)
10820 {
10821 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10822 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10823 tree arg0, arg1, arg2;
10824
10825 if (ignore)
10826 switch (code)
10827 {
10828 case SPARC_BUILTIN_LDFSR:
10829 case SPARC_BUILTIN_STFSR:
10830 case SPARC_BUILTIN_ALIGNADDR:
10831 case SPARC_BUILTIN_WRGSR:
10832 case SPARC_BUILTIN_BMASK:
10833 case SPARC_BUILTIN_CMASK8:
10834 case SPARC_BUILTIN_CMASK16:
10835 case SPARC_BUILTIN_CMASK32:
10836 break;
10837
10838 default:
10839 return build_zero_cst (rtype);
10840 }
10841
10842 switch (code)
10843 {
10844 case SPARC_BUILTIN_FEXPAND:
10845 arg0 = args[0];
10846 STRIP_NOPS (arg0);
10847
10848 if (TREE_CODE (arg0) == VECTOR_CST)
10849 {
10850 tree inner_type = TREE_TYPE (rtype);
10851 tree *n_elts;
10852 unsigned i;
10853
10854 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10855 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10856 n_elts[i] = build_int_cst (inner_type,
10857 TREE_INT_CST_LOW
10858 (VECTOR_CST_ELT (arg0, i)) << 4);
10859 return build_vector (rtype, n_elts);
10860 }
10861 break;
10862
10863 case SPARC_BUILTIN_FMUL8X16:
10864 case SPARC_BUILTIN_FMUL8X16AU:
10865 case SPARC_BUILTIN_FMUL8X16AL:
10866 arg0 = args[0];
10867 arg1 = args[1];
10868 STRIP_NOPS (arg0);
10869 STRIP_NOPS (arg1);
10870
10871 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10872 {
10873 tree inner_type = TREE_TYPE (rtype);
10874 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10875 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10876 return build_vector (rtype, n_elts);
10877 }
10878 break;
10879
10880 case SPARC_BUILTIN_FPMERGE:
10881 arg0 = args[0];
10882 arg1 = args[1];
10883 STRIP_NOPS (arg0);
10884 STRIP_NOPS (arg1);
10885
10886 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10887 {
10888 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10889 unsigned i;
10890 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10891 {
10892 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10893 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10894 }
10895
10896 return build_vector (rtype, n_elts);
10897 }
10898 break;
10899
10900 case SPARC_BUILTIN_PDIST:
10901 case SPARC_BUILTIN_PDISTN:
10902 arg0 = args[0];
10903 arg1 = args[1];
10904 STRIP_NOPS (arg0);
10905 STRIP_NOPS (arg1);
10906 if (code == SPARC_BUILTIN_PDIST)
10907 {
10908 arg2 = args[2];
10909 STRIP_NOPS (arg2);
10910 }
10911 else
10912 arg2 = integer_zero_node;
10913
10914 if (TREE_CODE (arg0) == VECTOR_CST
10915 && TREE_CODE (arg1) == VECTOR_CST
10916 && TREE_CODE (arg2) == INTEGER_CST)
10917 {
10918 bool overflow = false;
10919 widest_int result = wi::to_widest (arg2);
10920 widest_int tmp;
10921 unsigned i;
10922
10923 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10924 {
10925 tree e0 = VECTOR_CST_ELT (arg0, i);
10926 tree e1 = VECTOR_CST_ELT (arg1, i);
10927
10928 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10929
10930 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
10931 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
10932 if (wi::neg_p (tmp))
10933 tmp = wi::neg (tmp, &neg2_ovf);
10934 else
10935 neg2_ovf = false;
10936 result = wi::add (result, tmp, SIGNED, &add2_ovf);
10937 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10938 }
10939
10940 gcc_assert (!overflow);
10941
10942 return wide_int_to_tree (rtype, result);
10943 }
10944
10945 default:
10946 break;
10947 }
10948
10949 return NULL_TREE;
10950 }
10951 \f
10952 /* ??? This duplicates information provided to the compiler by the
10953 ??? scheduler description. Some day, teach genautomata to output
10954 ??? the latencies and then CSE will just use that. */
10955
10956 static bool
10957 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10958 int *total, bool speed ATTRIBUTE_UNUSED)
10959 {
10960 enum machine_mode mode = GET_MODE (x);
10961 bool float_mode_p = FLOAT_MODE_P (mode);
10962
10963 switch (code)
10964 {
10965 case CONST_INT:
10966 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10967 {
10968 *total = 0;
10969 return true;
10970 }
10971 /* FALLTHRU */
10972
10973 case HIGH:
10974 *total = 2;
10975 return true;
10976
10977 case CONST:
10978 case LABEL_REF:
10979 case SYMBOL_REF:
10980 *total = 4;
10981 return true;
10982
10983 case CONST_DOUBLE:
10984 if (GET_MODE (x) == VOIDmode
10985 && ((CONST_DOUBLE_HIGH (x) == 0
10986 && CONST_DOUBLE_LOW (x) < 0x1000)
10987 || (CONST_DOUBLE_HIGH (x) == -1
10988 && CONST_DOUBLE_LOW (x) < 0
10989 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10990 *total = 0;
10991 else
10992 *total = 8;
10993 return true;
10994
10995 case MEM:
10996 /* If outer-code was a sign or zero extension, a cost
10997 of COSTS_N_INSNS (1) was already added in. This is
10998 why we are subtracting it back out. */
10999 if (outer_code == ZERO_EXTEND)
11000 {
11001 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11002 }
11003 else if (outer_code == SIGN_EXTEND)
11004 {
11005 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11006 }
11007 else if (float_mode_p)
11008 {
11009 *total = sparc_costs->float_load;
11010 }
11011 else
11012 {
11013 *total = sparc_costs->int_load;
11014 }
11015
11016 return true;
11017
11018 case PLUS:
11019 case MINUS:
11020 if (float_mode_p)
11021 *total = sparc_costs->float_plusminus;
11022 else
11023 *total = COSTS_N_INSNS (1);
11024 return false;
11025
11026 case FMA:
11027 {
11028 rtx sub;
11029
11030 gcc_assert (float_mode_p);
11031 *total = sparc_costs->float_mul;
11032
11033 sub = XEXP (x, 0);
11034 if (GET_CODE (sub) == NEG)
11035 sub = XEXP (sub, 0);
11036 *total += rtx_cost (sub, FMA, 0, speed);
11037
11038 sub = XEXP (x, 2);
11039 if (GET_CODE (sub) == NEG)
11040 sub = XEXP (sub, 0);
11041 *total += rtx_cost (sub, FMA, 2, speed);
11042 return true;
11043 }
11044
11045 case MULT:
11046 if (float_mode_p)
11047 *total = sparc_costs->float_mul;
11048 else if (! TARGET_HARD_MUL)
11049 *total = COSTS_N_INSNS (25);
11050 else
11051 {
11052 int bit_cost;
11053
11054 bit_cost = 0;
11055 if (sparc_costs->int_mul_bit_factor)
11056 {
11057 int nbits;
11058
11059 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11060 {
11061 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11062 for (nbits = 0; value != 0; value &= value - 1)
11063 nbits++;
11064 }
11065 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
11066 && GET_MODE (XEXP (x, 1)) == VOIDmode)
11067 {
11068 rtx x1 = XEXP (x, 1);
11069 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
11070 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
11071
11072 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
11073 nbits++;
11074 for (; value2 != 0; value2 &= value2 - 1)
11075 nbits++;
11076 }
11077 else
11078 nbits = 7;
11079
11080 if (nbits < 3)
11081 nbits = 3;
11082 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11083 bit_cost = COSTS_N_INSNS (bit_cost);
11084 }
11085
11086 if (mode == DImode)
11087 *total = sparc_costs->int_mulX + bit_cost;
11088 else
11089 *total = sparc_costs->int_mul + bit_cost;
11090 }
11091 return false;
11092
11093 case ASHIFT:
11094 case ASHIFTRT:
11095 case LSHIFTRT:
11096 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11097 return false;
11098
11099 case DIV:
11100 case UDIV:
11101 case MOD:
11102 case UMOD:
11103 if (float_mode_p)
11104 {
11105 if (mode == DFmode)
11106 *total = sparc_costs->float_div_df;
11107 else
11108 *total = sparc_costs->float_div_sf;
11109 }
11110 else
11111 {
11112 if (mode == DImode)
11113 *total = sparc_costs->int_divX;
11114 else
11115 *total = sparc_costs->int_div;
11116 }
11117 return false;
11118
11119 case NEG:
11120 if (! float_mode_p)
11121 {
11122 *total = COSTS_N_INSNS (1);
11123 return false;
11124 }
11125 /* FALLTHRU */
11126
11127 case ABS:
11128 case FLOAT:
11129 case UNSIGNED_FLOAT:
11130 case FIX:
11131 case UNSIGNED_FIX:
11132 case FLOAT_EXTEND:
11133 case FLOAT_TRUNCATE:
11134 *total = sparc_costs->float_move;
11135 return false;
11136
11137 case SQRT:
11138 if (mode == DFmode)
11139 *total = sparc_costs->float_sqrt_df;
11140 else
11141 *total = sparc_costs->float_sqrt_sf;
11142 return false;
11143
11144 case COMPARE:
11145 if (float_mode_p)
11146 *total = sparc_costs->float_cmp;
11147 else
11148 *total = COSTS_N_INSNS (1);
11149 return false;
11150
11151 case IF_THEN_ELSE:
11152 if (float_mode_p)
11153 *total = sparc_costs->float_cmove;
11154 else
11155 *total = sparc_costs->int_cmove;
11156 return false;
11157
11158 case IOR:
11159 /* Handle the NAND vector patterns. */
11160 if (sparc_vector_mode_supported_p (GET_MODE (x))
11161 && GET_CODE (XEXP (x, 0)) == NOT
11162 && GET_CODE (XEXP (x, 1)) == NOT)
11163 {
11164 *total = COSTS_N_INSNS (1);
11165 return true;
11166 }
11167 else
11168 return false;
11169
11170 default:
11171 return false;
11172 }
11173 }
11174
11175 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11176
11177 static inline bool
11178 general_or_i64_p (reg_class_t rclass)
11179 {
11180 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11181 }
11182
11183 /* Implement TARGET_REGISTER_MOVE_COST. */
11184
11185 static int
11186 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11187 reg_class_t from, reg_class_t to)
11188 {
11189 bool need_memory = false;
11190
11191 if (from == FPCC_REGS || to == FPCC_REGS)
11192 need_memory = true;
11193 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11194 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11195 {
11196 if (TARGET_VIS3)
11197 {
11198 int size = GET_MODE_SIZE (mode);
11199 if (size == 8 || size == 4)
11200 {
11201 if (! TARGET_ARCH32 || size == 4)
11202 return 4;
11203 else
11204 return 6;
11205 }
11206 }
11207 need_memory = true;
11208 }
11209
11210 if (need_memory)
11211 {
11212 if (sparc_cpu == PROCESSOR_ULTRASPARC
11213 || sparc_cpu == PROCESSOR_ULTRASPARC3
11214 || sparc_cpu == PROCESSOR_NIAGARA
11215 || sparc_cpu == PROCESSOR_NIAGARA2
11216 || sparc_cpu == PROCESSOR_NIAGARA3
11217 || sparc_cpu == PROCESSOR_NIAGARA4)
11218 return 12;
11219
11220 return 6;
11221 }
11222
11223 return 2;
11224 }
11225
11226 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11227 This is achieved by means of a manual dynamic stack space allocation in
11228 the current frame. We make the assumption that SEQ doesn't contain any
11229 function calls, with the possible exception of calls to the GOT helper. */
11230
11231 static void
11232 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11233 {
11234 /* We must preserve the lowest 16 words for the register save area. */
11235 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11236 /* We really need only 2 words of fresh stack space. */
11237 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11238
11239 rtx slot
11240 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11241 SPARC_STACK_BIAS + offset));
11242
11243 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11244 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
11245 if (reg2)
11246 emit_insn (gen_rtx_SET (VOIDmode,
11247 adjust_address (slot, word_mode, UNITS_PER_WORD),
11248 reg2));
11249 emit_insn (seq);
11250 if (reg2)
11251 emit_insn (gen_rtx_SET (VOIDmode,
11252 reg2,
11253 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11254 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
11255 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11256 }
11257
11258 /* Output the assembler code for a thunk function. THUNK_DECL is the
11259 declaration for the thunk function itself, FUNCTION is the decl for
11260 the target function. DELTA is an immediate constant offset to be
11261 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11262 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11263
11264 static void
11265 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11266 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11267 tree function)
11268 {
11269 rtx this_rtx, funexp;
11270 rtx_insn *insn;
11271 unsigned int int_arg_first;
11272
11273 reload_completed = 1;
11274 epilogue_completed = 1;
11275
11276 emit_note (NOTE_INSN_PROLOGUE_END);
11277
11278 if (TARGET_FLAT)
11279 {
11280 sparc_leaf_function_p = 1;
11281
11282 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11283 }
11284 else if (flag_delayed_branch)
11285 {
11286 /* We will emit a regular sibcall below, so we need to instruct
11287 output_sibcall that we are in a leaf function. */
11288 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11289
11290 /* This will cause final.c to invoke leaf_renumber_regs so we
11291 must behave as if we were in a not-yet-leafified function. */
11292 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11293 }
11294 else
11295 {
11296 /* We will emit the sibcall manually below, so we will need to
11297 manually spill non-leaf registers. */
11298 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11299
11300 /* We really are in a leaf function. */
11301 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11302 }
11303
11304 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11305 returns a structure, the structure return pointer is there instead. */
11306 if (TARGET_ARCH64
11307 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11308 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11309 else
11310 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11311
11312 /* Add DELTA. When possible use a plain add, otherwise load it into
11313 a register first. */
11314 if (delta)
11315 {
11316 rtx delta_rtx = GEN_INT (delta);
11317
11318 if (! SPARC_SIMM13_P (delta))
11319 {
11320 rtx scratch = gen_rtx_REG (Pmode, 1);
11321 emit_move_insn (scratch, delta_rtx);
11322 delta_rtx = scratch;
11323 }
11324
11325 /* THIS_RTX += DELTA. */
11326 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11327 }
11328
11329 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11330 if (vcall_offset)
11331 {
11332 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11333 rtx scratch = gen_rtx_REG (Pmode, 1);
11334
11335 gcc_assert (vcall_offset < 0);
11336
11337 /* SCRATCH = *THIS_RTX. */
11338 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11339
11340 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11341 may not have any available scratch register at this point. */
11342 if (SPARC_SIMM13_P (vcall_offset))
11343 ;
11344 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11345 else if (! fixed_regs[5]
11346 /* The below sequence is made up of at least 2 insns,
11347 while the default method may need only one. */
11348 && vcall_offset < -8192)
11349 {
11350 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11351 emit_move_insn (scratch2, vcall_offset_rtx);
11352 vcall_offset_rtx = scratch2;
11353 }
11354 else
11355 {
11356 rtx increment = GEN_INT (-4096);
11357
11358 /* VCALL_OFFSET is a negative number whose typical range can be
11359 estimated as -32768..0 in 32-bit mode. In almost all cases
11360 it is therefore cheaper to emit multiple add insns than
11361 spilling and loading the constant into a register (at least
11362 6 insns). */
11363 while (! SPARC_SIMM13_P (vcall_offset))
11364 {
11365 emit_insn (gen_add2_insn (scratch, increment));
11366 vcall_offset += 4096;
11367 }
11368 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11369 }
11370
11371 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11372 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11373 gen_rtx_PLUS (Pmode,
11374 scratch,
11375 vcall_offset_rtx)));
11376
11377 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11378 emit_insn (gen_add2_insn (this_rtx, scratch));
11379 }
11380
11381 /* Generate a tail call to the target function. */
11382 if (! TREE_USED (function))
11383 {
11384 assemble_external (function);
11385 TREE_USED (function) = 1;
11386 }
11387 funexp = XEXP (DECL_RTL (function), 0);
11388
11389 if (flag_delayed_branch)
11390 {
11391 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11392 insn = emit_call_insn (gen_sibcall (funexp));
11393 SIBLING_CALL_P (insn) = 1;
11394 }
11395 else
11396 {
11397 /* The hoops we have to jump through in order to generate a sibcall
11398 without using delay slots... */
11399 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11400
11401 if (flag_pic)
11402 {
11403 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11404 start_sequence ();
11405 load_got_register (); /* clobbers %o7 */
11406 scratch = sparc_legitimize_pic_address (funexp, scratch);
11407 seq = get_insns ();
11408 end_sequence ();
11409 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11410 }
11411 else if (TARGET_ARCH32)
11412 {
11413 emit_insn (gen_rtx_SET (VOIDmode,
11414 scratch,
11415 gen_rtx_HIGH (SImode, funexp)));
11416 emit_insn (gen_rtx_SET (VOIDmode,
11417 scratch,
11418 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11419 }
11420 else /* TARGET_ARCH64 */
11421 {
11422 switch (sparc_cmodel)
11423 {
11424 case CM_MEDLOW:
11425 case CM_MEDMID:
11426 /* The destination can serve as a temporary. */
11427 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11428 break;
11429
11430 case CM_MEDANY:
11431 case CM_EMBMEDANY:
11432 /* The destination cannot serve as a temporary. */
11433 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11434 start_sequence ();
11435 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11436 seq = get_insns ();
11437 end_sequence ();
11438 emit_and_preserve (seq, spill_reg, 0);
11439 break;
11440
11441 default:
11442 gcc_unreachable ();
11443 }
11444 }
11445
11446 emit_jump_insn (gen_indirect_jump (scratch));
11447 }
11448
11449 emit_barrier ();
11450
11451 /* Run just enough of rest_of_compilation to get the insns emitted.
11452 There's not really enough bulk here to make other passes such as
11453 instruction scheduling worth while. Note that use_thunk calls
11454 assemble_start_function and assemble_end_function. */
11455 insn = get_insns ();
11456 shorten_branches (insn);
11457 final_start_function (insn, file, 1);
11458 final (insn, file, 1);
11459 final_end_function ();
11460
11461 reload_completed = 0;
11462 epilogue_completed = 0;
11463 }
11464
11465 /* Return true if sparc_output_mi_thunk would be able to output the
11466 assembler code for the thunk function specified by the arguments
11467 it is passed, and false otherwise. */
11468 static bool
11469 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11470 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11471 HOST_WIDE_INT vcall_offset,
11472 const_tree function ATTRIBUTE_UNUSED)
11473 {
11474 /* Bound the loop used in the default method above. */
11475 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11476 }
11477
11478 /* How to allocate a 'struct machine_function'. */
11479
11480 static struct machine_function *
11481 sparc_init_machine_status (void)
11482 {
11483 return ggc_cleared_alloc<machine_function> ();
11484 }
11485
11486 /* Locate some local-dynamic symbol still in use by this function
11487 so that we can print its name in local-dynamic base patterns. */
11488
11489 static const char *
11490 get_some_local_dynamic_name (void)
11491 {
11492 rtx_insn *insn;
11493
11494 if (cfun->machine->some_ld_name)
11495 return cfun->machine->some_ld_name;
11496
11497 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11498 if (INSN_P (insn)
11499 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11500 return cfun->machine->some_ld_name;
11501
11502 gcc_unreachable ();
11503 }
11504
11505 static int
11506 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11507 {
11508 rtx x = *px;
11509
11510 if (x
11511 && GET_CODE (x) == SYMBOL_REF
11512 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11513 {
11514 cfun->machine->some_ld_name = XSTR (x, 0);
11515 return 1;
11516 }
11517
11518 return 0;
11519 }
11520
11521 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11522 We need to emit DTP-relative relocations. */
11523
11524 static void
11525 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11526 {
11527 switch (size)
11528 {
11529 case 4:
11530 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11531 break;
11532 case 8:
11533 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11534 break;
11535 default:
11536 gcc_unreachable ();
11537 }
11538 output_addr_const (file, x);
11539 fputs (")", file);
11540 }
11541
11542 /* Do whatever processing is required at the end of a file. */
11543
11544 static void
11545 sparc_file_end (void)
11546 {
11547 /* If we need to emit the special GOT helper function, do so now. */
11548 if (got_helper_rtx)
11549 {
11550 const char *name = XSTR (got_helper_rtx, 0);
11551 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11552 #ifdef DWARF2_UNWIND_INFO
11553 bool do_cfi;
11554 #endif
11555
11556 if (USE_HIDDEN_LINKONCE)
11557 {
11558 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11559 get_identifier (name),
11560 build_function_type_list (void_type_node,
11561 NULL_TREE));
11562 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11563 NULL_TREE, void_type_node);
11564 TREE_PUBLIC (decl) = 1;
11565 TREE_STATIC (decl) = 1;
11566 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11567 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11568 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11569 resolve_unique_section (decl, 0, flag_function_sections);
11570 allocate_struct_function (decl, true);
11571 cfun->is_thunk = 1;
11572 current_function_decl = decl;
11573 init_varasm_status ();
11574 assemble_start_function (decl, name);
11575 }
11576 else
11577 {
11578 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11579 switch_to_section (text_section);
11580 if (align > 0)
11581 ASM_OUTPUT_ALIGN (asm_out_file, align);
11582 ASM_OUTPUT_LABEL (asm_out_file, name);
11583 }
11584
11585 #ifdef DWARF2_UNWIND_INFO
11586 do_cfi = dwarf2out_do_cfi_asm ();
11587 if (do_cfi)
11588 fprintf (asm_out_file, "\t.cfi_startproc\n");
11589 #endif
11590 if (flag_delayed_branch)
11591 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11592 reg_name, reg_name);
11593 else
11594 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11595 reg_name, reg_name);
11596 #ifdef DWARF2_UNWIND_INFO
11597 if (do_cfi)
11598 fprintf (asm_out_file, "\t.cfi_endproc\n");
11599 #endif
11600 }
11601
11602 if (NEED_INDICATE_EXEC_STACK)
11603 file_end_indicate_exec_stack ();
11604
11605 #ifdef TARGET_SOLARIS
11606 solaris_file_end ();
11607 #endif
11608 }
11609
11610 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11611 /* Implement TARGET_MANGLE_TYPE. */
11612
11613 static const char *
11614 sparc_mangle_type (const_tree type)
11615 {
11616 if (!TARGET_64BIT
11617 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11618 && TARGET_LONG_DOUBLE_128)
11619 return "g";
11620
11621 /* For all other types, use normal C++ mangling. */
11622 return NULL;
11623 }
11624 #endif
11625
11626 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11627 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11628 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11629
11630 void
11631 sparc_emit_membar_for_model (enum memmodel model,
11632 int load_store, int before_after)
11633 {
11634 /* Bits for the MEMBAR mmask field. */
11635 const int LoadLoad = 1;
11636 const int StoreLoad = 2;
11637 const int LoadStore = 4;
11638 const int StoreStore = 8;
11639
11640 int mm = 0, implied = 0;
11641
11642 switch (sparc_memory_model)
11643 {
11644 case SMM_SC:
11645 /* Sequential Consistency. All memory transactions are immediately
11646 visible in sequential execution order. No barriers needed. */
11647 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11648 break;
11649
11650 case SMM_TSO:
11651 /* Total Store Ordering: all memory transactions with store semantics
11652 are followed by an implied StoreStore. */
11653 implied |= StoreStore;
11654
11655 /* If we're not looking for a raw barrer (before+after), then atomic
11656 operations get the benefit of being both load and store. */
11657 if (load_store == 3 && before_after == 1)
11658 implied |= StoreLoad;
11659 /* FALLTHRU */
11660
11661 case SMM_PSO:
11662 /* Partial Store Ordering: all memory transactions with load semantics
11663 are followed by an implied LoadLoad | LoadStore. */
11664 implied |= LoadLoad | LoadStore;
11665
11666 /* If we're not looking for a raw barrer (before+after), then atomic
11667 operations get the benefit of being both load and store. */
11668 if (load_store == 3 && before_after == 2)
11669 implied |= StoreLoad | StoreStore;
11670 /* FALLTHRU */
11671
11672 case SMM_RMO:
11673 /* Relaxed Memory Ordering: no implicit bits. */
11674 break;
11675
11676 default:
11677 gcc_unreachable ();
11678 }
11679
11680 if (before_after & 1)
11681 {
11682 if (model == MEMMODEL_RELEASE
11683 || model == MEMMODEL_ACQ_REL
11684 || model == MEMMODEL_SEQ_CST)
11685 {
11686 if (load_store & 1)
11687 mm |= LoadLoad | StoreLoad;
11688 if (load_store & 2)
11689 mm |= LoadStore | StoreStore;
11690 }
11691 }
11692 if (before_after & 2)
11693 {
11694 if (model == MEMMODEL_ACQUIRE
11695 || model == MEMMODEL_ACQ_REL
11696 || model == MEMMODEL_SEQ_CST)
11697 {
11698 if (load_store & 1)
11699 mm |= LoadLoad | LoadStore;
11700 if (load_store & 2)
11701 mm |= StoreLoad | StoreStore;
11702 }
11703 }
11704
11705 /* Remove the bits implied by the system memory model. */
11706 mm &= ~implied;
11707
11708 /* For raw barriers (before+after), always emit a barrier.
11709 This will become a compile-time barrier if needed. */
11710 if (mm || before_after == 3)
11711 emit_insn (gen_membar (GEN_INT (mm)));
11712 }
11713
11714 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11715 compare and swap on the word containing the byte or half-word. */
11716
11717 static void
11718 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11719 rtx oldval, rtx newval)
11720 {
11721 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11722 rtx addr = gen_reg_rtx (Pmode);
11723 rtx off = gen_reg_rtx (SImode);
11724 rtx oldv = gen_reg_rtx (SImode);
11725 rtx newv = gen_reg_rtx (SImode);
11726 rtx oldvalue = gen_reg_rtx (SImode);
11727 rtx newvalue = gen_reg_rtx (SImode);
11728 rtx res = gen_reg_rtx (SImode);
11729 rtx resv = gen_reg_rtx (SImode);
11730 rtx memsi, val, mask, end_label, loop_label, cc;
11731
11732 emit_insn (gen_rtx_SET (VOIDmode, addr,
11733 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11734
11735 if (Pmode != SImode)
11736 addr1 = gen_lowpart (SImode, addr1);
11737 emit_insn (gen_rtx_SET (VOIDmode, off,
11738 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11739
11740 memsi = gen_rtx_MEM (SImode, addr);
11741 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11742 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11743
11744 val = copy_to_reg (memsi);
11745
11746 emit_insn (gen_rtx_SET (VOIDmode, off,
11747 gen_rtx_XOR (SImode, off,
11748 GEN_INT (GET_MODE (mem) == QImode
11749 ? 3 : 2))));
11750
11751 emit_insn (gen_rtx_SET (VOIDmode, off,
11752 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11753
11754 if (GET_MODE (mem) == QImode)
11755 mask = force_reg (SImode, GEN_INT (0xff));
11756 else
11757 mask = force_reg (SImode, GEN_INT (0xffff));
11758
11759 emit_insn (gen_rtx_SET (VOIDmode, mask,
11760 gen_rtx_ASHIFT (SImode, mask, off)));
11761
11762 emit_insn (gen_rtx_SET (VOIDmode, val,
11763 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11764 val)));
11765
11766 oldval = gen_lowpart (SImode, oldval);
11767 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11768 gen_rtx_ASHIFT (SImode, oldval, off)));
11769
11770 newval = gen_lowpart_common (SImode, newval);
11771 emit_insn (gen_rtx_SET (VOIDmode, newv,
11772 gen_rtx_ASHIFT (SImode, newval, off)));
11773
11774 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11775 gen_rtx_AND (SImode, oldv, mask)));
11776
11777 emit_insn (gen_rtx_SET (VOIDmode, newv,
11778 gen_rtx_AND (SImode, newv, mask)));
11779
11780 end_label = gen_label_rtx ();
11781 loop_label = gen_label_rtx ();
11782 emit_label (loop_label);
11783
11784 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11785 gen_rtx_IOR (SImode, oldv, val)));
11786
11787 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11788 gen_rtx_IOR (SImode, newv, val)));
11789
11790 emit_move_insn (bool_result, const1_rtx);
11791
11792 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11793
11794 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11795
11796 emit_insn (gen_rtx_SET (VOIDmode, resv,
11797 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11798 res)));
11799
11800 emit_move_insn (bool_result, const0_rtx);
11801
11802 cc = gen_compare_reg_1 (NE, resv, val);
11803 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11804
11805 /* Use cbranchcc4 to separate the compare and branch! */
11806 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11807 cc, const0_rtx, loop_label));
11808
11809 emit_label (end_label);
11810
11811 emit_insn (gen_rtx_SET (VOIDmode, res,
11812 gen_rtx_AND (SImode, res, mask)));
11813
11814 emit_insn (gen_rtx_SET (VOIDmode, res,
11815 gen_rtx_LSHIFTRT (SImode, res, off)));
11816
11817 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11818 }
11819
11820 /* Expand code to perform a compare-and-swap. */
11821
11822 void
11823 sparc_expand_compare_and_swap (rtx operands[])
11824 {
11825 rtx bval, retval, mem, oldval, newval;
11826 enum machine_mode mode;
11827 enum memmodel model;
11828
11829 bval = operands[0];
11830 retval = operands[1];
11831 mem = operands[2];
11832 oldval = operands[3];
11833 newval = operands[4];
11834 model = (enum memmodel) INTVAL (operands[6]);
11835 mode = GET_MODE (mem);
11836
11837 sparc_emit_membar_for_model (model, 3, 1);
11838
11839 if (reg_overlap_mentioned_p (retval, oldval))
11840 oldval = copy_to_reg (oldval);
11841
11842 if (mode == QImode || mode == HImode)
11843 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11844 else
11845 {
11846 rtx (*gen) (rtx, rtx, rtx, rtx);
11847 rtx x;
11848
11849 if (mode == SImode)
11850 gen = gen_atomic_compare_and_swapsi_1;
11851 else
11852 gen = gen_atomic_compare_and_swapdi_1;
11853 emit_insn (gen (retval, mem, oldval, newval));
11854
11855 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11856 if (x != bval)
11857 convert_move (bval, x, 1);
11858 }
11859
11860 sparc_emit_membar_for_model (model, 3, 2);
11861 }
11862
11863 void
11864 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11865 {
11866 rtx t_1, t_2, t_3;
11867
11868 sel = gen_lowpart (DImode, sel);
11869 switch (vmode)
11870 {
11871 case V2SImode:
11872 /* inp = xxxxxxxAxxxxxxxB */
11873 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11874 NULL_RTX, 1, OPTAB_DIRECT);
11875 /* t_1 = ....xxxxxxxAxxx. */
11876 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11877 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11878 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11879 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11880 /* sel = .......B */
11881 /* t_1 = ...A.... */
11882 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11883 /* sel = ...A...B */
11884 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11885 /* sel = AAAABBBB * 4 */
11886 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11887 /* sel = { A*4, A*4+1, A*4+2, ... } */
11888 break;
11889
11890 case V4HImode:
11891 /* inp = xxxAxxxBxxxCxxxD */
11892 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11893 NULL_RTX, 1, OPTAB_DIRECT);
11894 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11895 NULL_RTX, 1, OPTAB_DIRECT);
11896 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11897 NULL_RTX, 1, OPTAB_DIRECT);
11898 /* t_1 = ..xxxAxxxBxxxCxx */
11899 /* t_2 = ....xxxAxxxBxxxC */
11900 /* t_3 = ......xxxAxxxBxx */
11901 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11902 GEN_INT (0x07),
11903 NULL_RTX, 1, OPTAB_DIRECT);
11904 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11905 GEN_INT (0x0700),
11906 NULL_RTX, 1, OPTAB_DIRECT);
11907 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11908 GEN_INT (0x070000),
11909 NULL_RTX, 1, OPTAB_DIRECT);
11910 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11911 GEN_INT (0x07000000),
11912 NULL_RTX, 1, OPTAB_DIRECT);
11913 /* sel = .......D */
11914 /* t_1 = .....C.. */
11915 /* t_2 = ...B.... */
11916 /* t_3 = .A...... */
11917 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11918 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11919 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11920 /* sel = .A.B.C.D */
11921 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11922 /* sel = AABBCCDD * 2 */
11923 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11924 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11925 break;
11926
11927 case V8QImode:
11928 /* input = xAxBxCxDxExFxGxH */
11929 sel = expand_simple_binop (DImode, AND, sel,
11930 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11931 | 0x0f0f0f0f),
11932 NULL_RTX, 1, OPTAB_DIRECT);
11933 /* sel = .A.B.C.D.E.F.G.H */
11934 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11935 NULL_RTX, 1, OPTAB_DIRECT);
11936 /* t_1 = ..A.B.C.D.E.F.G. */
11937 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11938 NULL_RTX, 1, OPTAB_DIRECT);
11939 /* sel = .AABBCCDDEEFFGGH */
11940 sel = expand_simple_binop (DImode, AND, sel,
11941 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11942 | 0xff00ff),
11943 NULL_RTX, 1, OPTAB_DIRECT);
11944 /* sel = ..AB..CD..EF..GH */
11945 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11946 NULL_RTX, 1, OPTAB_DIRECT);
11947 /* t_1 = ....AB..CD..EF.. */
11948 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11949 NULL_RTX, 1, OPTAB_DIRECT);
11950 /* sel = ..ABABCDCDEFEFGH */
11951 sel = expand_simple_binop (DImode, AND, sel,
11952 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11953 NULL_RTX, 1, OPTAB_DIRECT);
11954 /* sel = ....ABCD....EFGH */
11955 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11956 NULL_RTX, 1, OPTAB_DIRECT);
11957 /* t_1 = ........ABCD.... */
11958 sel = gen_lowpart (SImode, sel);
11959 t_1 = gen_lowpart (SImode, t_1);
11960 break;
11961
11962 default:
11963 gcc_unreachable ();
11964 }
11965
11966 /* Always perform the final addition/merge within the bmask insn. */
11967 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11968 }
11969
11970 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11971
11972 static bool
11973 sparc_frame_pointer_required (void)
11974 {
11975 /* If the stack pointer is dynamically modified in the function, it cannot
11976 serve as the frame pointer. */
11977 if (cfun->calls_alloca)
11978 return true;
11979
11980 /* If the function receives nonlocal gotos, it needs to save the frame
11981 pointer in the nonlocal_goto_save_area object. */
11982 if (cfun->has_nonlocal_label)
11983 return true;
11984
11985 /* In flat mode, that's it. */
11986 if (TARGET_FLAT)
11987 return false;
11988
11989 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11990 return !(crtl->is_leaf && only_leaf_regs_used ());
11991 }
11992
11993 /* The way this is structured, we can't eliminate SFP in favor of SP
11994 if the frame pointer is required: we want to use the SFP->HFP elimination
11995 in that case. But the test in update_eliminables doesn't know we are
11996 assuming below that we only do the former elimination. */
11997
11998 static bool
11999 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12000 {
12001 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12002 }
12003
12004 /* Return the hard frame pointer directly to bypass the stack bias. */
12005
12006 static rtx
12007 sparc_builtin_setjmp_frame_value (void)
12008 {
12009 return hard_frame_pointer_rtx;
12010 }
12011
12012 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12013 they won't be allocated. */
12014
12015 static void
12016 sparc_conditional_register_usage (void)
12017 {
12018 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12019 {
12020 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12021 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12022 }
12023 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12024 /* then honor it. */
12025 if (TARGET_ARCH32 && fixed_regs[5])
12026 fixed_regs[5] = 1;
12027 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12028 fixed_regs[5] = 0;
12029 if (! TARGET_V9)
12030 {
12031 int regno;
12032 for (regno = SPARC_FIRST_V9_FP_REG;
12033 regno <= SPARC_LAST_V9_FP_REG;
12034 regno++)
12035 fixed_regs[regno] = 1;
12036 /* %fcc0 is used by v8 and v9. */
12037 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12038 regno <= SPARC_LAST_V9_FCC_REG;
12039 regno++)
12040 fixed_regs[regno] = 1;
12041 }
12042 if (! TARGET_FPU)
12043 {
12044 int regno;
12045 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12046 fixed_regs[regno] = 1;
12047 }
12048 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12049 /* then honor it. Likewise with g3 and g4. */
12050 if (fixed_regs[2] == 2)
12051 fixed_regs[2] = ! TARGET_APP_REGS;
12052 if (fixed_regs[3] == 2)
12053 fixed_regs[3] = ! TARGET_APP_REGS;
12054 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12055 fixed_regs[4] = ! TARGET_APP_REGS;
12056 else if (TARGET_CM_EMBMEDANY)
12057 fixed_regs[4] = 1;
12058 else if (fixed_regs[4] == 2)
12059 fixed_regs[4] = 0;
12060 if (TARGET_FLAT)
12061 {
12062 int regno;
12063 /* Disable leaf functions. */
12064 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12065 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12066 leaf_reg_remap [regno] = regno;
12067 }
12068 if (TARGET_VIS)
12069 global_regs[SPARC_GSR_REG] = 1;
12070 }
12071
12072 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12073
12074 - We can't load constants into FP registers.
12075 - We can't load FP constants into integer registers when soft-float,
12076 because there is no soft-float pattern with a r/F constraint.
12077 - We can't load FP constants into integer registers for TFmode unless
12078 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12079 - Try and reload integer constants (symbolic or otherwise) back into
12080 registers directly, rather than having them dumped to memory. */
12081
12082 static reg_class_t
12083 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12084 {
12085 enum machine_mode mode = GET_MODE (x);
12086 if (CONSTANT_P (x))
12087 {
12088 if (FP_REG_CLASS_P (rclass)
12089 || rclass == GENERAL_OR_FP_REGS
12090 || rclass == GENERAL_OR_EXTRA_FP_REGS
12091 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12092 || (mode == TFmode && ! const_zero_operand (x, mode)))
12093 return NO_REGS;
12094
12095 if (GET_MODE_CLASS (mode) == MODE_INT)
12096 return GENERAL_REGS;
12097
12098 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12099 {
12100 if (! FP_REG_CLASS_P (rclass)
12101 || !(const_zero_operand (x, mode)
12102 || const_all_ones_operand (x, mode)))
12103 return NO_REGS;
12104 }
12105 }
12106
12107 if (TARGET_VIS3
12108 && ! TARGET_ARCH64
12109 && (rclass == EXTRA_FP_REGS
12110 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12111 {
12112 int regno = true_regnum (x);
12113
12114 if (SPARC_INT_REG_P (regno))
12115 return (rclass == EXTRA_FP_REGS
12116 ? FP_REGS : GENERAL_OR_FP_REGS);
12117 }
12118
12119 return rclass;
12120 }
12121
12122 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12123 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12124
12125 const char *
12126 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12127 {
12128 char mulstr[32];
12129
12130 gcc_assert (! TARGET_ARCH64);
12131
12132 if (sparc_check_64 (operands[1], insn) <= 0)
12133 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12134 if (which_alternative == 1)
12135 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12136 if (GET_CODE (operands[2]) == CONST_INT)
12137 {
12138 if (which_alternative == 1)
12139 {
12140 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12141 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12142 output_asm_insn (mulstr, operands);
12143 return "srlx\t%L0, 32, %H0";
12144 }
12145 else
12146 {
12147 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12148 output_asm_insn ("or\t%L1, %3, %3", operands);
12149 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12150 output_asm_insn (mulstr, operands);
12151 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12152 return "mov\t%3, %L0";
12153 }
12154 }
12155 else if (rtx_equal_p (operands[1], operands[2]))
12156 {
12157 if (which_alternative == 1)
12158 {
12159 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12160 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12161 output_asm_insn (mulstr, operands);
12162 return "srlx\t%L0, 32, %H0";
12163 }
12164 else
12165 {
12166 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12167 output_asm_insn ("or\t%L1, %3, %3", operands);
12168 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12169 output_asm_insn (mulstr, operands);
12170 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12171 return "mov\t%3, %L0";
12172 }
12173 }
12174 if (sparc_check_64 (operands[2], insn) <= 0)
12175 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12176 if (which_alternative == 1)
12177 {
12178 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12179 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12180 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12181 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12182 output_asm_insn (mulstr, operands);
12183 return "srlx\t%L0, 32, %H0";
12184 }
12185 else
12186 {
12187 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12188 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12189 output_asm_insn ("or\t%L1, %3, %3", operands);
12190 output_asm_insn ("or\t%L2, %4, %4", operands);
12191 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12192 output_asm_insn (mulstr, operands);
12193 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12194 return "mov\t%3, %L0";
12195 }
12196 }
12197
12198 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12199 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12200 and INNER_MODE are the modes describing TARGET. */
12201
12202 static void
12203 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
12204 enum machine_mode inner_mode)
12205 {
12206 rtx t1, final_insn, sel;
12207 int bmask;
12208
12209 t1 = gen_reg_rtx (mode);
12210
12211 elt = convert_modes (SImode, inner_mode, elt, true);
12212 emit_move_insn (gen_lowpart(SImode, t1), elt);
12213
12214 switch (mode)
12215 {
12216 case V2SImode:
12217 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12218 bmask = 0x45674567;
12219 break;
12220 case V4HImode:
12221 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12222 bmask = 0x67676767;
12223 break;
12224 case V8QImode:
12225 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12226 bmask = 0x77777777;
12227 break;
12228 default:
12229 gcc_unreachable ();
12230 }
12231
12232 sel = force_reg (SImode, GEN_INT (bmask));
12233 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
12234 emit_insn (final_insn);
12235 }
12236
12237 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12238 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12239
12240 static void
12241 vector_init_fpmerge (rtx target, rtx elt)
12242 {
12243 rtx t1, t2, t2_low, t3, t3_low;
12244
12245 t1 = gen_reg_rtx (V4QImode);
12246 elt = convert_modes (SImode, QImode, elt, true);
12247 emit_move_insn (gen_lowpart (SImode, t1), elt);
12248
12249 t2 = gen_reg_rtx (V8QImode);
12250 t2_low = gen_lowpart (V4QImode, t2);
12251 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12252
12253 t3 = gen_reg_rtx (V8QImode);
12254 t3_low = gen_lowpart (V4QImode, t3);
12255 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12256
12257 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12258 }
12259
12260 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12261 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12262
12263 static void
12264 vector_init_faligndata (rtx target, rtx elt)
12265 {
12266 rtx t1 = gen_reg_rtx (V4HImode);
12267 int i;
12268
12269 elt = convert_modes (SImode, HImode, elt, true);
12270 emit_move_insn (gen_lowpart (SImode, t1), elt);
12271
12272 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12273 force_reg (SImode, GEN_INT (6)),
12274 const0_rtx));
12275
12276 for (i = 0; i < 4; i++)
12277 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12278 }
12279
12280 /* Emit code to initialize TARGET to values for individual fields VALS. */
12281
12282 void
12283 sparc_expand_vector_init (rtx target, rtx vals)
12284 {
12285 const enum machine_mode mode = GET_MODE (target);
12286 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
12287 const int n_elts = GET_MODE_NUNITS (mode);
12288 int i, n_var = 0;
12289 bool all_same;
12290 rtx mem;
12291
12292 all_same = true;
12293 for (i = 0; i < n_elts; i++)
12294 {
12295 rtx x = XVECEXP (vals, 0, i);
12296 if (!CONSTANT_P (x))
12297 n_var++;
12298
12299 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12300 all_same = false;
12301 }
12302
12303 if (n_var == 0)
12304 {
12305 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12306 return;
12307 }
12308
12309 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12310 {
12311 if (GET_MODE_SIZE (inner_mode) == 4)
12312 {
12313 emit_move_insn (gen_lowpart (SImode, target),
12314 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12315 return;
12316 }
12317 else if (GET_MODE_SIZE (inner_mode) == 8)
12318 {
12319 emit_move_insn (gen_lowpart (DImode, target),
12320 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12321 return;
12322 }
12323 }
12324 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12325 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12326 {
12327 emit_move_insn (gen_highpart (word_mode, target),
12328 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12329 emit_move_insn (gen_lowpart (word_mode, target),
12330 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12331 return;
12332 }
12333
12334 if (all_same && GET_MODE_SIZE (mode) == 8)
12335 {
12336 if (TARGET_VIS2)
12337 {
12338 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12339 return;
12340 }
12341 if (mode == V8QImode)
12342 {
12343 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12344 return;
12345 }
12346 if (mode == V4HImode)
12347 {
12348 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12349 return;
12350 }
12351 }
12352
12353 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12354 for (i = 0; i < n_elts; i++)
12355 emit_move_insn (adjust_address_nv (mem, inner_mode,
12356 i * GET_MODE_SIZE (inner_mode)),
12357 XVECEXP (vals, 0, i));
12358 emit_move_insn (target, mem);
12359 }
12360
12361 /* Implement TARGET_SECONDARY_RELOAD. */
12362
12363 static reg_class_t
12364 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12365 enum machine_mode mode, secondary_reload_info *sri)
12366 {
12367 enum reg_class rclass = (enum reg_class) rclass_i;
12368
12369 sri->icode = CODE_FOR_nothing;
12370 sri->extra_cost = 0;
12371
12372 /* We need a temporary when loading/storing a HImode/QImode value
12373 between memory and the FPU registers. This can happen when combine puts
12374 a paradoxical subreg in a float/fix conversion insn. */
12375 if (FP_REG_CLASS_P (rclass)
12376 && (mode == HImode || mode == QImode)
12377 && (GET_CODE (x) == MEM
12378 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12379 && true_regnum (x) == -1)))
12380 return GENERAL_REGS;
12381
12382 /* On 32-bit we need a temporary when loading/storing a DFmode value
12383 between unaligned memory and the upper FPU registers. */
12384 if (TARGET_ARCH32
12385 && rclass == EXTRA_FP_REGS
12386 && mode == DFmode
12387 && GET_CODE (x) == MEM
12388 && ! mem_min_alignment (x, 8))
12389 return FP_REGS;
12390
12391 if (((TARGET_CM_MEDANY
12392 && symbolic_operand (x, mode))
12393 || (TARGET_CM_EMBMEDANY
12394 && text_segment_operand (x, mode)))
12395 && ! flag_pic)
12396 {
12397 if (in_p)
12398 sri->icode = direct_optab_handler (reload_in_optab, mode);
12399 else
12400 sri->icode = direct_optab_handler (reload_out_optab, mode);
12401 return NO_REGS;
12402 }
12403
12404 if (TARGET_VIS3 && TARGET_ARCH32)
12405 {
12406 int regno = true_regnum (x);
12407
12408 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12409 to move 8-byte values in 4-byte pieces. This only works via
12410 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12411 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12412 an FP_REGS intermediate move. */
12413 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12414 || ((general_or_i64_p (rclass)
12415 || rclass == GENERAL_OR_FP_REGS)
12416 && SPARC_FP_REG_P (regno)))
12417 {
12418 sri->extra_cost = 2;
12419 return FP_REGS;
12420 }
12421 }
12422
12423 return NO_REGS;
12424 }
12425
12426 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12427 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12428
12429 bool
12430 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
12431 {
12432 enum rtx_code rc = GET_CODE (operands[1]);
12433 enum machine_mode cmp_mode;
12434 rtx cc_reg, dst, cmp;
12435
12436 cmp = operands[1];
12437 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12438 return false;
12439
12440 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12441 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12442
12443 cmp_mode = GET_MODE (XEXP (cmp, 0));
12444 rc = GET_CODE (cmp);
12445
12446 dst = operands[0];
12447 if (! rtx_equal_p (operands[2], dst)
12448 && ! rtx_equal_p (operands[3], dst))
12449 {
12450 if (reg_overlap_mentioned_p (dst, cmp))
12451 dst = gen_reg_rtx (mode);
12452
12453 emit_move_insn (dst, operands[3]);
12454 }
12455 else if (operands[2] == dst)
12456 {
12457 operands[2] = operands[3];
12458
12459 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12460 rc = reverse_condition_maybe_unordered (rc);
12461 else
12462 rc = reverse_condition (rc);
12463 }
12464
12465 if (XEXP (cmp, 1) == const0_rtx
12466 && GET_CODE (XEXP (cmp, 0)) == REG
12467 && cmp_mode == DImode
12468 && v9_regcmp_p (rc))
12469 cc_reg = XEXP (cmp, 0);
12470 else
12471 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12472
12473 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12474
12475 emit_insn (gen_rtx_SET (VOIDmode, dst,
12476 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12477
12478 if (dst != operands[0])
12479 emit_move_insn (operands[0], dst);
12480
12481 return true;
12482 }
12483
12484 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12485 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12486 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12487 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12488 code to be used for the condition mask. */
12489
12490 void
12491 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12492 {
12493 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12494 enum rtx_code code = GET_CODE (operands[3]);
12495
12496 mask = gen_reg_rtx (Pmode);
12497 cop0 = operands[4];
12498 cop1 = operands[5];
12499 if (code == LT || code == GE)
12500 {
12501 rtx t;
12502
12503 code = swap_condition (code);
12504 t = cop0; cop0 = cop1; cop1 = t;
12505 }
12506
12507 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12508
12509 fcmp = gen_rtx_UNSPEC (Pmode,
12510 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12511 fcode);
12512
12513 cmask = gen_rtx_UNSPEC (DImode,
12514 gen_rtvec (2, mask, gsr),
12515 ccode);
12516
12517 bshuf = gen_rtx_UNSPEC (mode,
12518 gen_rtvec (3, operands[1], operands[2], gsr),
12519 UNSPEC_BSHUFFLE);
12520
12521 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12522 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12523
12524 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12525 }
12526
12527 /* On sparc, any mode which naturally allocates into the float
12528 registers should return 4 here. */
12529
12530 unsigned int
12531 sparc_regmode_natural_size (enum machine_mode mode)
12532 {
12533 int size = UNITS_PER_WORD;
12534
12535 if (TARGET_ARCH64)
12536 {
12537 enum mode_class mclass = GET_MODE_CLASS (mode);
12538
12539 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12540 size = 4;
12541 }
12542
12543 return size;
12544 }
12545
12546 /* Return TRUE if it is a good idea to tie two pseudo registers
12547 when one has mode MODE1 and one has mode MODE2.
12548 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12549 for any hard reg, then this must be FALSE for correct output.
12550
12551 For V9 we have to deal with the fact that only the lower 32 floating
12552 point registers are 32-bit addressable. */
12553
12554 bool
12555 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12556 {
12557 enum mode_class mclass1, mclass2;
12558 unsigned short size1, size2;
12559
12560 if (mode1 == mode2)
12561 return true;
12562
12563 mclass1 = GET_MODE_CLASS (mode1);
12564 mclass2 = GET_MODE_CLASS (mode2);
12565 if (mclass1 != mclass2)
12566 return false;
12567
12568 if (! TARGET_V9)
12569 return true;
12570
12571 /* Classes are the same and we are V9 so we have to deal with upper
12572 vs. lower floating point registers. If one of the modes is a
12573 4-byte mode, and the other is not, we have to mark them as not
12574 tieable because only the lower 32 floating point register are
12575 addressable 32-bits at a time.
12576
12577 We can't just test explicitly for SFmode, otherwise we won't
12578 cover the vector mode cases properly. */
12579
12580 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12581 return true;
12582
12583 size1 = GET_MODE_SIZE (mode1);
12584 size2 = GET_MODE_SIZE (mode2);
12585 if ((size1 > 4 && size2 == 4)
12586 || (size2 > 4 && size1 == 4))
12587 return false;
12588
12589 return true;
12590 }
12591
12592 /* Implement TARGET_CSTORE_MODE. */
12593
12594 static enum machine_mode
12595 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12596 {
12597 return (TARGET_ARCH64 ? DImode : SImode);
12598 }
12599
12600 /* Return the compound expression made of T1 and T2. */
12601
12602 static inline tree
12603 compound_expr (tree t1, tree t2)
12604 {
12605 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12606 }
12607
12608 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12609
12610 static void
12611 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12612 {
12613 if (!TARGET_FPU)
12614 return;
12615
12616 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12617 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12618
12619 /* We generate the equivalent of feholdexcept (&fenv_var):
12620
12621 unsigned int fenv_var;
12622 __builtin_store_fsr (&fenv_var);
12623
12624 unsigned int tmp1_var;
12625 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12626
12627 __builtin_load_fsr (&tmp1_var); */
12628
12629 tree fenv_var = create_tmp_var (unsigned_type_node, NULL);
12630 mark_addressable (fenv_var);
12631 tree fenv_addr = build_fold_addr_expr (fenv_var);
12632 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12633 tree hold_stfsr = build_call_expr (stfsr, 1, fenv_addr);
12634
12635 tree tmp1_var = create_tmp_var (unsigned_type_node, NULL);
12636 mark_addressable (tmp1_var);
12637 tree masked_fenv_var
12638 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12639 build_int_cst (unsigned_type_node,
12640 ~(accrued_exception_mask | trap_enable_mask)));
12641 tree hold_mask
12642 = build2 (MODIFY_EXPR, void_type_node, tmp1_var, masked_fenv_var);
12643
12644 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12645 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12646 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12647
12648 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12649
12650 /* We reload the value of tmp1_var to clear the exceptions:
12651
12652 __builtin_load_fsr (&tmp1_var); */
12653
12654 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12655
12656 /* We generate the equivalent of feupdateenv (&fenv_var):
12657
12658 unsigned int tmp2_var;
12659 __builtin_store_fsr (&tmp2_var);
12660
12661 __builtin_load_fsr (&fenv_var);
12662
12663 if (SPARC_LOW_FE_EXCEPT_VALUES)
12664 tmp2_var >>= 5;
12665 __atomic_feraiseexcept ((int) tmp2_var); */
12666
12667 tree tmp2_var = create_tmp_var (unsigned_type_node, NULL);
12668 mark_addressable (tmp2_var);
12669 tree tmp3_addr = build_fold_addr_expr (tmp2_var);
12670 tree update_stfsr = build_call_expr (stfsr, 1, tmp3_addr);
12671
12672 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12673
12674 tree atomic_feraiseexcept
12675 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12676 tree update_call
12677 = build_call_expr (atomic_feraiseexcept, 1,
12678 fold_convert (integer_type_node, tmp2_var));
12679
12680 if (SPARC_LOW_FE_EXCEPT_VALUES)
12681 {
12682 tree shifted_tmp2_var
12683 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12684 build_int_cst (unsigned_type_node, 5));
12685 tree update_shift
12686 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12687 update_call = compound_expr (update_shift, update_call);
12688 }
12689
12690 *update
12691 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12692 }
12693
12694 #include "gt-sparc.h"